Browse Source

Merge branch 'xianyi:develop' into travispytorch

pull/3759/head
Martin Kroeker GitHub 3 years ago
parent
commit
db1c6a0b0f
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
34 changed files with 506 additions and 68 deletions
  1. +114
    -0
      .github/workflows/mips64.yml
  2. +2
    -2
      cmake/system.cmake
  3. +4
    -0
      common.h
  4. +5
    -0
      ctest.c
  5. +1
    -1
      ctest/c_sblat1c.c
  6. +2
    -0
      driver/others/blas_server_omp.c
  7. +3
    -1
      kernel/Makefile
  8. +151
    -0
      kernel/mips/sdot_msa.c
  9. +160
    -0
      kernel/mips64/KERNEL.MIPS64_GENERIC
  10. +1
    -1
      kernel/mips64/dnrm2.S
  11. +12
    -12
      kernel/x86_64/dgemm_ncopy_8_skylakex.c
  12. +1
    -1
      kernel/x86_64/omatcopy_rt.c
  13. +2
    -2
      lapack-netlib/SRC/claed0.c
  14. +2
    -2
      lapack-netlib/SRC/claed7.c
  15. +3
    -3
      lapack-netlib/SRC/clalsa.c
  16. +2
    -2
      lapack-netlib/SRC/cstedc.c
  17. +2
    -2
      lapack-netlib/SRC/dlaed0.c
  18. +2
    -2
      lapack-netlib/SRC/dlaed7.c
  19. +4
    -4
      lapack-netlib/SRC/dlaeda.c
  20. +3
    -3
      lapack-netlib/SRC/dlalsa.c
  21. +1
    -1
      lapack-netlib/SRC/dlasd0.c
  22. +2
    -2
      lapack-netlib/SRC/dlasda.c
  23. +2
    -2
      lapack-netlib/SRC/dstedc.c
  24. +2
    -2
      lapack-netlib/SRC/slaed0.c
  25. +2
    -2
      lapack-netlib/SRC/slaed7.c
  26. +4
    -4
      lapack-netlib/SRC/slaeda.c
  27. +3
    -3
      lapack-netlib/SRC/slalsa.c
  28. +1
    -1
      lapack-netlib/SRC/slasd0.c
  29. +2
    -2
      lapack-netlib/SRC/slasda.c
  30. +2
    -2
      lapack-netlib/SRC/sstedc.c
  31. +2
    -2
      lapack-netlib/SRC/zlaed0.c
  32. +2
    -2
      lapack-netlib/SRC/zlaed7.c
  33. +3
    -3
      lapack-netlib/SRC/zlalsa.c
  34. +2
    -2
      lapack-netlib/SRC/zstedc.c

+ 114
- 0
.github/workflows/mips64.yml View File

@@ -0,0 +1,114 @@
name: mips64 qemu test

on: [push, pull_request]

jobs:
TEST:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
- target: MIPS64_GENERIC
triple: mips64el-linux-gnuabi64
opts: NO_SHARED=1 TARGET=MIPS64_GENERIC
- target: SICORTEX
triple: mips64el-linux-gnuabi64
opts: NO_SHARED=1 TARGET=SICORTEX
- target: I6400
triple: mipsisa64r6el-linux-gnuabi64
opts: NO_SHARED=1 TARGET=I6400
- target: P6600
triple: mipsisa64r6el-linux-gnuabi64
opts: NO_SHARED=1 TARGET=P6600
- target: I6500
triple: mipsisa64r6el-linux-gnuabi64
opts: NO_SHARED=1 TARGET=I6500

steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: install build deps
run: |
sudo apt-get update
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \
gcc-${{ matrix.triple }} gfortran-${{ matrix.triple }} libgomp1-mips64el-cross

- name: checkout qemu
uses: actions/checkout@v3
with:
repository: qemu/qemu
path: qemu
ref: 79dfa177ae348bb5ab5f97c0915359b13d6186e2

- name: build qemu
run: |
cd qemu
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=mips64el-linux-user --disable-system
make -j$(nproc)
make install

- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.target }}

- name: Configure ccache
run: |
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s

- name: build OpenBLAS
run: make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc)

- name: test
run: |
export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH
qemu-mips64el ./utest/openblas_utest
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat2 < ./ctest/sin2
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat2 < ./ctest/din2
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat2 < ./ctest/cin2
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat2 < ./ctest/zin2
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat3 < ./ctest/sin3
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat3 < ./ctest/din3
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat3 < ./ctest/cin3
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat3 < ./ctest/zin3
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat1
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat3 < ./test/zblat3.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat3 < ./test/zblat3.dat

+ 2
- 2
cmake/system.cmake View File

@@ -197,14 +197,14 @@ if (DEFINED TARGET)
if (${TARGET} STREQUAL SKYLAKEX AND NOT NO_AVX512)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
endif()
if (${TARGET} STREQUAL HASWELL AND NOT NO_AVX2)
if ((${TARGET} STREQUAL HASWELL OR ${TARGET} STREQUAL ZEN) AND NOT NO_AVX2)
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
endif()
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2 -mfma")
endif()
endif()
if (DEFINED HAVE_AVX)


+ 4
- 0
common.h View File

@@ -387,6 +387,10 @@ typedef int blasint;
#endif
*/

#ifdef __EMSCRIPTEN__
#define YIELDING
#endif

#ifndef YIELDING
#define YIELDING sched_yield()
#endif


+ 5
- 0
ctest.c View File

@@ -173,3 +173,8 @@ HAVE_C11
ARCH_E2K
#endif

#if defined(__EMSCRIPTEN__)
ARCH_RISCV64
OS_WINDOWS
#endif


+ 1
- 1
ctest/c_sblat1c.c View File

@@ -969,7 +969,7 @@ real *sfac;
1.17 };

/* Local variables */
extern /* Subroutine */ srottest_();
extern /* Subroutine */ void srottest_();
static integer i__, k, ksize;
extern /* Subroutine */ int stest_(), srotmtest_();
static integer ki, kn;


+ 2
- 0
driver/others/blas_server_omp.c View File

@@ -69,6 +69,8 @@

int blas_server_avail = 0;

extern int openblas_omp_adaptive_env();

static void * blas_thread_buffer[MAX_PARALLEL_NUMBER][MAX_CPU_NUMBER];
#ifdef HAVE_C11
static atomic_bool blas_buffer_inuse[MAX_PARALLEL_NUMBER];


+ 3
- 1
kernel/Makefile View File

@@ -23,7 +23,7 @@ ifeq ($(C_COMPILER), CLANG)
# Any clang posing as gcc 4.2 should be new enough (3.4 or later)
GCCVERSIONCHECK := $(GCCVERSIONGT4)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ2)
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
AVX2OPT = -mavx2
AVX2OPT = -mavx2 -mfma
endif
endif
ifdef NO_AVX2
@@ -73,6 +73,8 @@ else ifeq ($(TARGET_CORE), SKYLAKEX)
endif
else ifeq ($(TARGET_CORE), HASWELL)
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(AVX2OPT)
else ifeq ($(TARGET_CORE), ZEN)
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(AVX2OPT)
else ifeq ($(TARGET_CORE), LOONGSON3R4)
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(MSA_FLAGS)
else


+ 151
- 0
kernel/mips/sdot_msa.c View File

@@ -39,10 +39,19 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
FLOAT x0, x1, x2, x3, y0, y1, y2, y3;
v4f32 vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7;
v4f32 vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7;
#if defined(DSDOT)
v2f64 dvx0, dvx1, dvx2, dvx3, dvx4, dvx5, dvx6, dvx7;
v2f64 dvy0, dvy1, dvy2, dvy3, dvy4, dvy5, dvy6, dvy7;
v2f64 dot0 = {0, 0};
v2f64 dot1 = {0, 0};
v2f64 dot2 = {0, 0};
v2f64 dot3 = {0, 0};
#else
v4f32 dot0 = {0, 0, 0, 0};
v4f32 dot1 = {0, 0, 0, 0};
v4f32 dot2 = {0, 0, 0, 0};
v4f32 dot3 = {0, 0, 0, 0};
#endif

if (n < 1) return (dot);

@@ -83,6 +92,61 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
x_pref += 32;
y_pref += 32;

#if defined(DSDOT)
/* Extend single precision to double precision */
dvy0 = __msa_fexupr_d(vy0);
dvy1 = __msa_fexupr_d(vy1);
dvy2 = __msa_fexupr_d(vy2);
dvy3 = __msa_fexupr_d(vy3);
dvy4 = __msa_fexupr_d(vy4);
dvy5 = __msa_fexupr_d(vy5);
dvy6 = __msa_fexupr_d(vy6);
dvy7 = __msa_fexupr_d(vy7);

vy0 = (v4f32)__msa_fexupl_d(vy0);
vy1 = (v4f32)__msa_fexupl_d(vy1);
vy2 = (v4f32)__msa_fexupl_d(vy2);
vy3 = (v4f32)__msa_fexupl_d(vy3);
vy4 = (v4f32)__msa_fexupl_d(vy4);
vy5 = (v4f32)__msa_fexupl_d(vy5);
vy6 = (v4f32)__msa_fexupl_d(vy6);
vy7 = (v4f32)__msa_fexupl_d(vy7);

dvx0 = __msa_fexupr_d(vx0);
dvx1 = __msa_fexupr_d(vx1);
dvx2 = __msa_fexupr_d(vx2);
dvx3 = __msa_fexupr_d(vx3);
dvx4 = __msa_fexupr_d(vx4);
dvx5 = __msa_fexupr_d(vx5);
dvx6 = __msa_fexupr_d(vx6);
dvx7 = __msa_fexupr_d(vx7);

vx0 = (v4f32)__msa_fexupl_d(vx0);
vx1 = (v4f32)__msa_fexupl_d(vx1);
vx2 = (v4f32)__msa_fexupl_d(vx2);
vx3 = (v4f32)__msa_fexupl_d(vx3);
vx4 = (v4f32)__msa_fexupl_d(vx4);
vx5 = (v4f32)__msa_fexupl_d(vx5);
vx6 = (v4f32)__msa_fexupl_d(vx6);
vx7 = (v4f32)__msa_fexupl_d(vx7);

dot0 += (dvy0 * dvx0);
dot1 += (dvy1 * dvx1);
dot2 += (dvy2 * dvx2);
dot3 += (dvy3 * dvx3);
dot0 += (dvy4 * dvx4);
dot1 += (dvy5 * dvx5);
dot2 += (dvy6 * dvx6);
dot3 += (dvy7 * dvx7);
dot0 += ((v2f64)vy0 * (v2f64)vx0);
dot1 += ((v2f64)vy1 * (v2f64)vx1);
dot2 += ((v2f64)vy2 * (v2f64)vx2);
dot3 += ((v2f64)vy3 * (v2f64)vx3);
dot0 += ((v2f64)vy4 * (v2f64)vx4);
dot1 += ((v2f64)vy5 * (v2f64)vx5);
dot2 += ((v2f64)vy6 * (v2f64)vx6);
dot3 += ((v2f64)vy7 * (v2f64)vx7);
#else
dot0 += (vy0 * vx0);
dot1 += (vy1 * vx1);
dot2 += (vy2 * vx2);
@@ -91,6 +155,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
dot1 += (vy5 * vx5);
dot2 += (vy6 * vx6);
dot3 += (vy7 * vx7);
#endif
}

if (n & 31)
@@ -100,10 +165,41 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
LD_SP4_INC(x, 4, vx0, vx1, vx2, vx3);
LD_SP4_INC(y, 4, vy0, vy1, vy2, vy3);

#if defined(DSDOT)
dvy0 = __msa_fexupr_d(vy0);
dvy1 = __msa_fexupr_d(vy1);
dvy2 = __msa_fexupr_d(vy2);
dvy3 = __msa_fexupr_d(vy3);

vy0 = (v4f32)__msa_fexupl_d(vy0);
vy1 = (v4f32)__msa_fexupl_d(vy1);
vy2 = (v4f32)__msa_fexupl_d(vy2);
vy3 = (v4f32)__msa_fexupl_d(vy3);

dvx0 = __msa_fexupr_d(vx0);
dvx1 = __msa_fexupr_d(vx1);
dvx2 = __msa_fexupr_d(vx2);
dvx3 = __msa_fexupr_d(vx3);

vx0 = (v4f32)__msa_fexupl_d(vx0);
vx1 = (v4f32)__msa_fexupl_d(vx1);
vx2 = (v4f32)__msa_fexupl_d(vx2);
vx3 = (v4f32)__msa_fexupl_d(vx3);

dot0 += (dvy0 * dvx0);
dot1 += (dvy1 * dvx1);
dot2 += (dvy2 * dvx2);
dot3 += (dvy3 * dvx3);
dot0 += ((v2f64)vy0 * (v2f64)vx0);
dot1 += ((v2f64)vy1 * (v2f64)vx1);
dot2 += ((v2f64)vy2 * (v2f64)vx2);
dot3 += ((v2f64)vy3 * (v2f64)vx3);
#else
dot0 += (vy0 * vx0);
dot1 += (vy1 * vx1);
dot2 += (vy2 * vx2);
dot3 += (vy3 * vx3);
#endif
}

if (n & 8)
@@ -111,8 +207,27 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
LD_SP2_INC(x, 4, vx0, vx1);
LD_SP2_INC(y, 4, vy0, vy1);

#if defined(DSDOT)
dvy0 = __msa_fexupr_d(vy0);
dvy1 = __msa_fexupr_d(vy1);

vy0 = (v4f32)__msa_fexupl_d(vy0);
vy1 = (v4f32)__msa_fexupl_d(vy1);

dvx0 = __msa_fexupr_d(vx0);
dvx1 = __msa_fexupr_d(vx1);

vx0 = (v4f32)__msa_fexupl_d(vx0);
vx1 = (v4f32)__msa_fexupl_d(vx1);

dot0 += (dvy0 * dvx0);
dot1 += (dvy1 * dvx1);
dot0 += ((v2f64)vy0 * (v2f64)vx0);
dot1 += ((v2f64)vy1 * (v2f64)vx1);
#else
dot0 += (vy0 * vx0);
dot1 += (vy1 * vx1);
#endif
}

if (n & 4)
@@ -120,7 +235,16 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
vx0 = LD_SP(x); x += 4;
vy0 = LD_SP(y); y += 4;

#if defined(DSDOT)
dvy0 = __msa_fexupr_d(vy0);
vy0 = (v4f32)__msa_fexupl_d(vy0);
dvx0 = __msa_fexupr_d(vx0);
vx0 = (v4f32)__msa_fexupl_d(vx0);
dot0 += (dvy0 * dvx0);
dot0 += ((v2f64)vy0 * (v2f64)vx0);
#else
dot0 += (vy0 * vx0);
#endif
}

if (n & 2)
@@ -128,8 +252,13 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
LD_GP2_INC(x, 1, x0, x1);
LD_GP2_INC(y, 1, y0, y1);

#if defined(DSDOT)
dot += ((double)y0 * (double)x0);
dot += ((double)y1 * (double)x1);
#else
dot += (y0 * x0);
dot += (y1 * x1);
#endif
}

if (n & 1)
@@ -137,7 +266,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
x0 = *x;
y0 = *y;

#if defined(DSDOT)
dot += ((double)y0 * (double)x0);
#else
dot += (y0 * x0);
#endif
}
}

@@ -145,8 +278,10 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)

dot += dot0[0];
dot += dot0[1];
#if !defined(DSDOT)
dot += dot0[2];
dot += dot0[3];
#endif
}
else
{
@@ -155,10 +290,17 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
LD_GP4_INC(x, inc_x, x0, x1, x2, x3);
LD_GP4_INC(y, inc_y, y0, y1, y2, y3);

#if defined(DSDOT)
dot += ((double)y0 * (double)x0);
dot += ((double)y1 * (double)x1);
dot += ((double)y2 * (double)x2);
dot += ((double)y3 * (double)x3);
#else
dot += (y0 * x0);
dot += (y1 * x1);
dot += (y2 * x2);
dot += (y3 * x3);
#endif
}

if (n & 2)
@@ -166,8 +308,13 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
LD_GP2_INC(x, inc_x, x0, x1);
LD_GP2_INC(y, inc_y, y0, y1);

#if defined(DSDOT)
dot += ((double)y0 * (double)x0);
dot += ((double)y1 * (double)x1);
#else
dot += (y0 * x0);
dot += (y1 * x1);
#endif
}

if (n & 1)
@@ -175,7 +322,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
x0 = *x;
y0 = *y;

#if defined(DSDOT)
dot += ((double)y0 * (double)x0);
#else
dot += (y0 * x0);
#endif
}
}



+ 160
- 0
kernel/mips64/KERNEL.MIPS64_GENERIC View File

@@ -0,0 +1,160 @@
SGEMM_BETA = ../generic/gemm_beta.c
DGEMM_BETA = ../generic/gemm_beta.c
CGEMM_BETA = ../generic/zgemm_beta.c
ZGEMM_BETA = ../generic/zgemm_beta.c

STRMMKERNEL = ../generic/trmmkernel_2x2.c
DTRMMKERNEL = ../generic/trmmkernel_2x2.c
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c

SGEMMKERNEL = ../generic/gemmkernel_2x2.c
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
SGEMMONCOPYOBJ = sgemm_oncopy.o
SGEMMOTCOPYOBJ = sgemm_otcopy.o

DGEMMKERNEL = ../generic/gemmkernel_2x2.c
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o

CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
CGEMMONCOPYOBJ = cgemm_oncopy.o
CGEMMOTCOPYOBJ = cgemm_otcopy.o

ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMONCOPYOBJ = zgemm_oncopy.o
ZGEMMOTCOPYOBJ = zgemm_otcopy.o

STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

#Pure C for other kernels
SAMAXKERNEL = ../mips/amax.c
DAMAXKERNEL = ../mips/amax.c
CAMAXKERNEL = ../mips/zamax.c
ZAMAXKERNEL = ../mips/zamax.c

SAMINKERNEL = ../mips/amin.c
DAMINKERNEL = ../mips/amin.c
CAMINKERNEL = ../mips/zamin.c
ZAMINKERNEL = ../mips/zamin.c

SMAXKERNEL = ../mips/max.c
DMAXKERNEL = ../mips/max.c

SMINKERNEL = ../mips/min.c
DMINKERNEL = ../mips/min.c

ISAMAXKERNEL = ../mips/iamax.c
IDAMAXKERNEL = ../mips/iamax.c
ICAMAXKERNEL = ../mips/izamax.c
IZAMAXKERNEL = ../mips/izamax.c

ISAMINKERNEL = ../mips/iamin.c
IDAMINKERNEL = ../mips/iamin.c
ICAMINKERNEL = ../mips/izamin.c
IZAMINKERNEL = ../mips/izamin.c

ISMAXKERNEL = ../mips/imax.c
IDMAXKERNEL = ../mips/imax.c

ISMINKERNEL = ../mips/imin.c
IDMINKERNEL = ../mips/imin.c

SASUMKERNEL = ../mips/asum.c
DASUMKERNEL = ../mips/asum.c
CASUMKERNEL = ../mips/zasum.c
ZASUMKERNEL = ../mips/zasum.c

SSUMKERNEL = ../mips/sum.c
DSUMKERNEL = ../mips/sum.c
CSUMKERNEL = ../mips/zsum.c
ZSUMKERNEL = ../mips/zsum.c

SAXPYKERNEL = ../mips/axpy.c
DAXPYKERNEL = ../mips/axpy.c
CAXPYKERNEL = ../mips/zaxpy.c
ZAXPYKERNEL = ../mips/zaxpy.c

SCOPYKERNEL = ../mips/copy.c
DCOPYKERNEL = ../mips/copy.c
CCOPYKERNEL = ../mips/zcopy.c
ZCOPYKERNEL = ../mips/zcopy.c

SDOTKERNEL = ../mips/dot.c
DDOTKERNEL = ../mips/dot.c
CDOTKERNEL = ../mips/zdot.c
ZDOTKERNEL = ../mips/zdot.c

SNRM2KERNEL = ../mips/nrm2.c
DNRM2KERNEL = ../mips/nrm2.c
CNRM2KERNEL = ../mips/znrm2.c
ZNRM2KERNEL = ../mips/znrm2.c

SROTKERNEL = ../mips/rot.c
DROTKERNEL = ../mips/rot.c
CROTKERNEL = ../mips/zrot.c
ZROTKERNEL = ../mips/zrot.c

SSCALKERNEL = ../mips/scal.c
DSCALKERNEL = ../mips/scal.c
CSCALKERNEL = ../mips/zscal.c
ZSCALKERNEL = ../mips/zscal.c

SSWAPKERNEL = ../mips/swap.c
DSWAPKERNEL = ../mips/swap.c
CSWAPKERNEL = ../mips/zswap.c
ZSWAPKERNEL = ../mips/zswap.c

SGEMVNKERNEL = ../mips/gemv_n.c
DGEMVNKERNEL = ../mips/gemv_n.c
CGEMVNKERNEL = ../mips/zgemv_n.c
ZGEMVNKERNEL = ../mips/zgemv_n.c

SGEMVTKERNEL = ../mips/gemv_t.c
DGEMVTKERNEL = ../mips/gemv_t.c
CGEMVTKERNEL = ../mips/zgemv_t.c
ZGEMVTKERNEL = ../mips/zgemv_t.c

SSYMV_U_KERNEL = ../generic/symv_k.c
SSYMV_L_KERNEL = ../generic/symv_k.c
DSYMV_U_KERNEL = ../generic/symv_k.c
DSYMV_L_KERNEL = ../generic/symv_k.c
QSYMV_U_KERNEL = ../generic/symv_k.c
QSYMV_L_KERNEL = ../generic/symv_k.c
CSYMV_U_KERNEL = ../generic/zsymv_k.c
CSYMV_L_KERNEL = ../generic/zsymv_k.c
ZSYMV_U_KERNEL = ../generic/zsymv_k.c
ZSYMV_L_KERNEL = ../generic/zsymv_k.c
XSYMV_U_KERNEL = ../generic/zsymv_k.c
XSYMV_L_KERNEL = ../generic/zsymv_k.c

ZHEMV_U_KERNEL = ../generic/zhemv_k.c
ZHEMV_L_KERNEL = ../generic/zhemv_k.c

CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c

+ 1
- 1
kernel/mips64/dnrm2.S View File

@@ -90,7 +90,7 @@
//Init INF
lui TEMP, 0x7FF0
dsll TEMP, TEMP, 32
MTC1 TEMP, INF
MTC TEMP, INF

LD a1, 0 * SIZE(X)
daddiu N, N, -1


+ 12
- 12
kernel/x86_64/dgemm_ncopy_8_skylakex.c View File

@@ -52,18 +52,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT * __restrict a, BLASLONG lda, FLOAT * __
FLOAT ctemp05, ctemp06, ctemp07, ctemp08;
FLOAT ctemp09, ctemp10, ctemp11, ctemp12;
FLOAT ctemp13, ctemp14, ctemp15, ctemp16;
FLOAT ctemp17, ctemp18, ctemp19, ctemp20;
FLOAT ctemp21, ctemp22, ctemp23, ctemp24;
FLOAT ctemp25, ctemp26, ctemp27, ctemp28;
FLOAT ctemp29, ctemp30, ctemp31, ctemp32;
FLOAT ctemp33, ctemp34, ctemp35, ctemp36;
FLOAT ctemp37, ctemp38, ctemp39, ctemp40;
FLOAT ctemp41, ctemp42, ctemp43, ctemp44;
FLOAT ctemp45, ctemp46, ctemp47, ctemp48;
FLOAT ctemp49, ctemp50, ctemp51, ctemp52;
FLOAT ctemp53, ctemp54, ctemp55, ctemp56;
FLOAT ctemp57, ctemp58, ctemp59, ctemp60;
FLOAT ctemp61, ctemp62, ctemp63, ctemp64;
FLOAT ctemp17 /*, ctemp18, ctemp19, ctemp20*/ ;
FLOAT /*ctemp21, ctemp22,*/ ctemp23, ctemp24;
FLOAT ctemp25 /*, ctemp26, ctemp27, ctemp28*/ ;
FLOAT /*ctemp29, ctemp30,*/ ctemp31, ctemp32;
FLOAT ctemp33 /*, ctemp34, ctemp35, ctemp36*/ ;
FLOAT /*ctemp37, ctemp38,*/ ctemp39, ctemp40;
FLOAT ctemp41 /*, ctemp42, ctemp43, ctemp44*/ ;
FLOAT /*ctemp45, ctemp46,*/ ctemp47, ctemp48;
FLOAT ctemp49 /*, ctemp50, ctemp51, ctemp52*/ ;
FLOAT /*ctemp53, ctemp54,*/ ctemp55, ctemp56;
FLOAT ctemp57 /*, ctemp58, ctemp59, ctemp60*/ ;
FLOAT /*ctemp61, ctemp62,*/ ctemp63, ctemp64;


aoffset = a;


+ 1
- 1
kernel/x86_64/omatcopy_rt.c View File

@@ -142,7 +142,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
,"xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15");\
}
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb){
float *src, *dst, *dst_tmp, *src_base, *dst_base;
float *src, *dst, *dst_tmp=0, *src_base, *dst_base;
uint64_t src_ld_bytes = (uint64_t)lda * sizeof(float), dst_ld_bytes = (uint64_t)ldb * sizeof(float), num_rows = 0;
BLASLONG cols_left, rows_done; float ALPHA = alpha;
if(ALPHA==0.0){


+ 2
- 2
lapack-netlib/SRC/claed0.c View File

@@ -796,10 +796,10 @@ L10:

temp = log((real) (*n)) / log(2.f);
lgn = (integer) temp;
if (pow_ii(&c__2, &lgn) < *n) {
if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
if (pow_ii(&c__2, &lgn) < *n) {
if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
iprmpt = indxq + *n + 1;


+ 2
- 2
lapack-netlib/SRC/claed7.c View File

@@ -864,11 +864,11 @@ f"> */
/* Form the z-vector which consists of the last row of Q_1 and the */
/* first row of Q_2. */

ptr = pow_ii(&c__2, tlvls) + 1;
ptr = pow_ii(c__2, *tlvls) + 1;
i__1 = *curlvl - 1;
for (i__ = 1; i__ <= i__1; ++i__) {
i__2 = *tlvls - i__;
ptr += pow_ii(&c__2, &i__2);
ptr += pow_ii(c__2, i__2);
/* L10: */
}
curr = ptr + *curpbm;


+ 3
- 3
lapack-netlib/SRC/clalsa.c View File

@@ -1051,7 +1051,7 @@ f"> */
/* Finally go through the left singular vector matrices of all */
/* the other subproblems bottom-up on the tree. */

j = pow_ii(&c__2, &nlvl);
j = pow_ii(c__2, nlvl);
sqre = 0;

for (lvl = nlvl; lvl >= 1; --lvl) {
@@ -1065,7 +1065,7 @@ f"> */
ll = 1;
} else {
i__1 = lvl - 1;
lf = pow_ii(&c__2, &i__1);
lf = pow_ii(c__2, i__1);
ll = (lf << 1) - 1;
}
i__1 = ll;
@@ -1110,7 +1110,7 @@ L170:
ll = 1;
} else {
i__2 = lvl - 1;
lf = pow_ii(&c__2, &i__2);
lf = pow_ii(c__2, i__2);
ll = (lf << 1) - 1;
}
i__2 = lf;


+ 2
- 2
lapack-netlib/SRC/cstedc.c View File

@@ -836,10 +836,10 @@ f"> */
lrwmin = *n - 1 << 1;
} else if (icompz == 1) {
lgn = (integer) (log((real) (*n)) / log(2.f));
if (pow_ii(&c__2, &lgn) < *n) {
if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
if (pow_ii(&c__2, &lgn) < *n) {
if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
lwmin = *n * *n;


+ 2
- 2
lapack-netlib/SRC/dlaed0.c View File

@@ -827,10 +827,10 @@ L10:

temp = log((doublereal) (*n)) / log(2.);
lgn = (integer) temp;
if (pow_ii(&c__2, &lgn) < *n) {
if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
if (pow_ii(&c__2, &lgn) < *n) {
if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
iprmpt = indxq + *n + 1;


+ 2
- 2
lapack-netlib/SRC/dlaed7.c View File

@@ -885,11 +885,11 @@ f"> */
/* Form the z-vector which consists of the last row of Q_1 and the */
/* first row of Q_2. */

ptr = pow_ii(&c__2, tlvls) + 1;
ptr = pow_ii(c__2, *tlvls) + 1;
i__1 = *curlvl - 1;
for (i__ = 1; i__ <= i__1; ++i__) {
i__2 = *tlvls - i__;
ptr += pow_ii(&c__2, &i__2);
ptr += pow_ii(c__2, i__2);
/* L10: */
}
curr = ptr + *curpbm;


+ 4
- 4
lapack-netlib/SRC/dlaeda.c View File

@@ -754,7 +754,7 @@ f"> */
/* scheme */

i__1 = *curlvl - 1;
curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1;
curr = ptr + *curpbm * pow_ii(c__2, *curlvl) + pow_ii(c__2, i__1) - 1;

/* Determine size of these matrices. We add HALF to the value of */
/* the SQRT in case the machine underestimates one of these square */
@@ -781,12 +781,12 @@ f"> */
/* rotations and permutation and then multiplying the center matrices */
/* against the current Z. */

ptr = pow_ii(&c__2, tlvls) + 1;
ptr = pow_ii(c__2, *tlvls) + 1;
i__1 = *curlvl - 1;
for (k = 1; k <= i__1; ++k) {
i__2 = *curlvl - k;
i__3 = *curlvl - k - 1;
curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) -
curr = ptr + *curpbm * pow_ii(c__2, i__2) + pow_ii(c__2, i__3) -
1;
psiz1 = prmptr[curr + 1] - prmptr[curr];
psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
@@ -847,7 +847,7 @@ f"> */
c__1);

i__2 = *tlvls - k;
ptr += pow_ii(&c__2, &i__2);
ptr += pow_ii(c__2, i__2);
/* L70: */
}



+ 3
- 3
lapack-netlib/SRC/dlalsa.c View File

@@ -951,7 +951,7 @@ f"> */
/* Finally go through the left singular vector matrices of all */
/* the other subproblems bottom-up on the tree. */

j = pow_ii(&c__2, &nlvl);
j = pow_ii(c__2, nlvl);
sqre = 0;

for (lvl = nlvl; lvl >= 1; --lvl) {
@@ -965,7 +965,7 @@ f"> */
ll = 1;
} else {
i__1 = lvl - 1;
lf = pow_ii(&c__2, &i__1);
lf = pow_ii(c__2, i__1);
ll = (lf << 1) - 1;
}
i__1 = ll;
@@ -1010,7 +1010,7 @@ L50:
ll = 1;
} else {
i__2 = lvl - 1;
lf = pow_ii(&c__2, &i__2);
lf = pow_ii(c__2, i__2);
ll = (lf << 1) - 1;
}
i__2 = lf;


+ 1
- 1
lapack-netlib/SRC/dlasd0.c View File

@@ -824,7 +824,7 @@ f"> */
ll = 1;
} else {
i__1 = lvl - 1;
lf = pow_ii(&c__2, &i__1);
lf = pow_ii(c__2, i__1);
ll = (lf << 1) - 1;
}
i__1 = ll;


+ 2
- 2
lapack-netlib/SRC/dlasda.c View File

@@ -1027,7 +1027,7 @@ f"> */

/* Now conquer each subproblem bottom-up. */

j = pow_ii(&c__2, &nlvl);
j = pow_ii(c__2, nlvl);
for (lvl = nlvl; lvl >= 1; --lvl) {
lvl2 = (lvl << 1) - 1;

@@ -1039,7 +1039,7 @@ f"> */
ll = 1;
} else {
i__1 = lvl - 1;
lf = pow_ii(&c__2, &i__1);
lf = pow_ii(c__2, i__1);
ll = (lf << 1) - 1;
}
i__1 = ll;


+ 2
- 2
lapack-netlib/SRC/dstedc.c View File

@@ -806,10 +806,10 @@ f"> */
lwmin = *n - 1 << 1;
} else {
lgn = (integer) (log((doublereal) (*n)) / log(2.));
if (pow_ii(&c__2, &lgn) < *n) {
if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
if (pow_ii(&c__2, &lgn) < *n) {
if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
if (icompz == 1) {


+ 2
- 2
lapack-netlib/SRC/slaed0.c View File

@@ -823,10 +823,10 @@ L10:

temp = log((real) (*n)) / log(2.f);
lgn = (integer) temp;
if (pow_ii(&c__2, &lgn) < *n) {
if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
if (pow_ii(&c__2, &lgn) < *n) {
if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
iprmpt = indxq + *n + 1;


+ 2
- 2
lapack-netlib/SRC/slaed7.c View File

@@ -883,11 +883,11 @@ f"> */
/* Form the z-vector which consists of the last row of Q_1 and the */
/* first row of Q_2. */

ptr = pow_ii(&c__2, tlvls) + 1;
ptr = pow_ii(c__2, *tlvls) + 1;
i__1 = *curlvl - 1;
for (i__ = 1; i__ <= i__1; ++i__) {
i__2 = *tlvls - i__;
ptr += pow_ii(&c__2, &i__2);
ptr += pow_ii(c__2, i__2);
/* L10: */
}
curr = ptr + *curpbm;


+ 4
- 4
lapack-netlib/SRC/slaeda.c View File

@@ -753,7 +753,7 @@ f"> */
/* scheme */

i__1 = *curlvl - 1;
curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1;
curr = ptr + *curpbm * pow_ii(c__2, *curlvl) + pow_ii(c__2, i__1) - 1;

/* Determine size of these matrices. We add HALF to the value of */
/* the SQRT in case the machine underestimates one of these square */
@@ -779,12 +779,12 @@ f"> */
/* rotations and permutation and then multiplying the center matrices */
/* against the current Z. */

ptr = pow_ii(&c__2, tlvls) + 1;
ptr = pow_ii(c__2, *tlvls) + 1;
i__1 = *curlvl - 1;
for (k = 1; k <= i__1; ++k) {
i__2 = *curlvl - k;
i__3 = *curlvl - k - 1;
curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) -
curr = ptr + *curpbm * pow_ii(c__2, i__2) + pow_ii(c__2, i__3) -
1;
psiz1 = prmptr[curr + 1] - prmptr[curr];
psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
@@ -844,7 +844,7 @@ f"> */
c__1);

i__2 = *tlvls - k;
ptr += pow_ii(&c__2, &i__2);
ptr += pow_ii(c__2, i__2);
/* L70: */
}



+ 3
- 3
lapack-netlib/SRC/slalsa.c View File

@@ -946,7 +946,7 @@ f"> */
/* Finally go through the left singular vector matrices of all */
/* the other subproblems bottom-up on the tree. */

j = pow_ii(&c__2, &nlvl);
j = pow_ii(c__2, nlvl);
sqre = 0;

for (lvl = nlvl; lvl >= 1; --lvl) {
@@ -960,7 +960,7 @@ f"> */
ll = 1;
} else {
i__1 = lvl - 1;
lf = pow_ii(&c__2, &i__1);
lf = pow_ii(c__2, i__1);
ll = (lf << 1) - 1;
}
i__1 = ll;
@@ -1005,7 +1005,7 @@ L50:
ll = 1;
} else {
i__2 = lvl - 1;
lf = pow_ii(&c__2, &i__2);
lf = pow_ii(c__2, i__2);
ll = (lf << 1) - 1;
}
i__2 = lf;


+ 1
- 1
lapack-netlib/SRC/slasd0.c View File

@@ -821,7 +821,7 @@ f"> */
ll = 1;
} else {
i__1 = lvl - 1;
lf = pow_ii(&c__2, &i__1);
lf = pow_ii(c__2, i__1);
ll = (lf << 1) - 1;
}
i__1 = ll;


+ 2
- 2
lapack-netlib/SRC/slasda.c View File

@@ -1023,7 +1023,7 @@ f"> */

/* Now conquer each subproblem bottom-up. */

j = pow_ii(&c__2, &nlvl);
j = pow_ii(c__2, nlvl);
for (lvl = nlvl; lvl >= 1; --lvl) {
lvl2 = (lvl << 1) - 1;

@@ -1035,7 +1035,7 @@ f"> */
ll = 1;
} else {
i__1 = lvl - 1;
lf = pow_ii(&c__2, &i__1);
lf = pow_ii(c__2, i__1);
ll = (lf << 1) - 1;
}
i__1 = ll;


+ 2
- 2
lapack-netlib/SRC/sstedc.c View File

@@ -804,10 +804,10 @@ f"> */
lwmin = *n - 1 << 1;
} else {
lgn = (integer) (log((real) (*n)) / log(2.f));
if (pow_ii(&c__2, &lgn) < *n) {
if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
if (pow_ii(&c__2, &lgn) < *n) {
if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
if (icompz == 1) {


+ 2
- 2
lapack-netlib/SRC/zlaed0.c View File

@@ -793,10 +793,10 @@ L10:

temp = log((doublereal) (*n)) / log(2.);
lgn = (integer) temp;
if (pow_ii(&c__2, &lgn) < *n) {
if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
if (pow_ii(&c__2, &lgn) < *n) {
if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
iprmpt = indxq + *n + 1;


+ 2
- 2
lapack-netlib/SRC/zlaed7.c View File

@@ -864,11 +864,11 @@ f"> */
/* Form the z-vector which consists of the last row of Q_1 and the */
/* first row of Q_2. */

ptr = pow_ii(&c__2, tlvls) + 1;
ptr = pow_ii(c__2, *tlvls) + 1;
i__1 = *curlvl - 1;
for (i__ = 1; i__ <= i__1; ++i__) {
i__2 = *tlvls - i__;
ptr += pow_ii(&c__2, &i__2);
ptr += pow_ii(c__2, i__2);
/* L10: */
}
curr = ptr + *curpbm;


+ 3
- 3
lapack-netlib/SRC/zlalsa.c View File

@@ -1051,7 +1051,7 @@ f"> */
/* Finally go through the left singular vector matrices of all */
/* the other subproblems bottom-up on the tree. */

j = pow_ii(&c__2, &nlvl);
j = pow_ii(c__2, nlvl);
sqre = 0;

for (lvl = nlvl; lvl >= 1; --lvl) {
@@ -1065,7 +1065,7 @@ f"> */
ll = 1;
} else {
i__1 = lvl - 1;
lf = pow_ii(&c__2, &i__1);
lf = pow_ii(c__2, i__1);
ll = (lf << 1) - 1;
}
i__1 = ll;
@@ -1110,7 +1110,7 @@ L170:
ll = 1;
} else {
i__2 = lvl - 1;
lf = pow_ii(&c__2, &i__2);
lf = pow_ii(c__2, i__2);
ll = (lf << 1) - 1;
}
i__2 = lf;


+ 2
- 2
lapack-netlib/SRC/zstedc.c View File

@@ -836,10 +836,10 @@ f"> */
lrwmin = *n - 1 << 1;
} else if (icompz == 1) {
lgn = (integer) (log((doublereal) (*n)) / log(2.));
if (pow_ii(&c__2, &lgn) < *n) {
if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
if (pow_ii(&c__2, &lgn) < *n) {
if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
lwmin = *n * *n;


Loading…
Cancel
Save