| @@ -0,0 +1,114 @@ | |||
| name: mips64 qemu test | |||
| on: [push, pull_request] | |||
| jobs: | |||
| TEST: | |||
| runs-on: ubuntu-latest | |||
| strategy: | |||
| fail-fast: false | |||
| matrix: | |||
| include: | |||
| - target: MIPS64_GENERIC | |||
| triple: mips64el-linux-gnuabi64 | |||
| opts: NO_SHARED=1 TARGET=MIPS64_GENERIC | |||
| - target: SICORTEX | |||
| triple: mips64el-linux-gnuabi64 | |||
| opts: NO_SHARED=1 TARGET=SICORTEX | |||
| - target: I6400 | |||
| triple: mipsisa64r6el-linux-gnuabi64 | |||
| opts: NO_SHARED=1 TARGET=I6400 | |||
| - target: P6600 | |||
| triple: mipsisa64r6el-linux-gnuabi64 | |||
| opts: NO_SHARED=1 TARGET=P6600 | |||
| - target: I6500 | |||
| triple: mipsisa64r6el-linux-gnuabi64 | |||
| opts: NO_SHARED=1 TARGET=I6500 | |||
| steps: | |||
| - name: Checkout repository | |||
| uses: actions/checkout@v3 | |||
| - name: install build deps | |||
| run: | | |||
| sudo apt-get update | |||
| sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \ | |||
| gcc-${{ matrix.triple }} gfortran-${{ matrix.triple }} libgomp1-mips64el-cross | |||
| - name: checkout qemu | |||
| uses: actions/checkout@v3 | |||
| with: | |||
| repository: qemu/qemu | |||
| path: qemu | |||
| ref: 79dfa177ae348bb5ab5f97c0915359b13d6186e2 | |||
| - name: build qemu | |||
| run: | | |||
| cd qemu | |||
| ./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=mips64el-linux-user --disable-system | |||
| make -j$(nproc) | |||
| make install | |||
| - name: Compilation cache | |||
| uses: actions/cache@v3 | |||
| with: | |||
| path: ~/.ccache | |||
| key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }} | |||
| restore-keys: | | |||
| ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }} | |||
| ccache-${{ runner.os }}-${{ matrix.target }} | |||
| - name: Configure ccache | |||
| run: | | |||
| test -d ~/.ccache || mkdir -p ~/.ccache | |||
| echo "max_size = 300M" > ~/.ccache/ccache.conf | |||
| echo "compression = true" >> ~/.ccache/ccache.conf | |||
| ccache -s | |||
| - name: build OpenBLAS | |||
| run: make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc) | |||
| - name: test | |||
| run: | | |||
| export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH | |||
| qemu-mips64el ./utest/openblas_utest | |||
| OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat2 < ./ctest/sin2 | |||
| OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat2 < ./ctest/din2 | |||
| OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat2 < ./ctest/cin2 | |||
| OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat2 < ./ctest/zin2 | |||
| OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat3 < ./ctest/sin3 | |||
| OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat3 < ./ctest/din3 | |||
| OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat3 < ./ctest/cin3 | |||
| OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat3 < ./ctest/zin3 | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat1 | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat1 | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat1 | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat1 | |||
| rm -f ./test/?BLAT2.SUMM | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat2 < ./test/sblat2.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat2 < ./test/dblat2.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat2 < ./test/cblat2.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat2 < ./test/zblat2.dat | |||
| rm -f ./test/?BLAT2.SUMM | |||
| OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat2 < ./test/sblat2.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat2 < ./test/dblat2.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat2 < ./test/cblat2.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat2 < ./test/zblat2.dat | |||
| rm -f ./test/?BLAT3.SUMM | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat3 < ./test/sblat3.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat3 < ./test/dblat3.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat3 < ./test/cblat3.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat3 < ./test/zblat3.dat | |||
| rm -f ./test/?BLAT3.SUMM | |||
| OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat3 < ./test/sblat3.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat3 < ./test/dblat3.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat3 < ./test/cblat3.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat3 < ./test/zblat3.dat | |||
| @@ -197,14 +197,14 @@ if (DEFINED TARGET) | |||
| if (${TARGET} STREQUAL SKYLAKEX AND NOT NO_AVX512) | |||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") | |||
| endif() | |||
| if (${TARGET} STREQUAL HASWELL AND NOT NO_AVX2) | |||
| if ((${TARGET} STREQUAL HASWELL OR ${TARGET} STREQUAL ZEN) AND NOT NO_AVX2) | |||
| if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") | |||
| execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) | |||
| if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7) | |||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") | |||
| endif() | |||
| elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG") | |||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") | |||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2 -mfma") | |||
| endif() | |||
| endif() | |||
| if (DEFINED HAVE_AVX) | |||
| @@ -387,6 +387,10 @@ typedef int blasint; | |||
| #endif | |||
| */ | |||
| #ifdef __EMSCRIPTEN__ | |||
| #define YIELDING | |||
| #endif | |||
| #ifndef YIELDING | |||
| #define YIELDING sched_yield() | |||
| #endif | |||
| @@ -173,3 +173,8 @@ HAVE_C11 | |||
| ARCH_E2K | |||
| #endif | |||
| #if defined(__EMSCRIPTEN__) | |||
| ARCH_RISCV64 | |||
| OS_WINDOWS | |||
| #endif | |||
| @@ -969,7 +969,7 @@ real *sfac; | |||
| 1.17 }; | |||
| /* Local variables */ | |||
| extern /* Subroutine */ srottest_(); | |||
| extern /* Subroutine */ void srottest_(); | |||
| static integer i__, k, ksize; | |||
| extern /* Subroutine */ int stest_(), srotmtest_(); | |||
| static integer ki, kn; | |||
| @@ -69,6 +69,8 @@ | |||
| int blas_server_avail = 0; | |||
| extern int openblas_omp_adaptive_env(); | |||
| static void * blas_thread_buffer[MAX_PARALLEL_NUMBER][MAX_CPU_NUMBER]; | |||
| #ifdef HAVE_C11 | |||
| static atomic_bool blas_buffer_inuse[MAX_PARALLEL_NUMBER]; | |||
| @@ -23,7 +23,7 @@ ifeq ($(C_COMPILER), CLANG) | |||
| # Any clang posing as gcc 4.2 should be new enough (3.4 or later) | |||
| GCCVERSIONCHECK := $(GCCVERSIONGT4)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ2) | |||
| ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111)) | |||
| AVX2OPT = -mavx2 | |||
| AVX2OPT = -mavx2 -mfma | |||
| endif | |||
| endif | |||
| ifdef NO_AVX2 | |||
| @@ -73,6 +73,8 @@ else ifeq ($(TARGET_CORE), SKYLAKEX) | |||
| endif | |||
| else ifeq ($(TARGET_CORE), HASWELL) | |||
| override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(AVX2OPT) | |||
| else ifeq ($(TARGET_CORE), ZEN) | |||
| override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(AVX2OPT) | |||
| else ifeq ($(TARGET_CORE), LOONGSON3R4) | |||
| override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(MSA_FLAGS) | |||
| else | |||
| @@ -39,10 +39,19 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| FLOAT x0, x1, x2, x3, y0, y1, y2, y3; | |||
| v4f32 vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; | |||
| v4f32 vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; | |||
| #if defined(DSDOT) | |||
| v2f64 dvx0, dvx1, dvx2, dvx3, dvx4, dvx5, dvx6, dvx7; | |||
| v2f64 dvy0, dvy1, dvy2, dvy3, dvy4, dvy5, dvy6, dvy7; | |||
| v2f64 dot0 = {0, 0}; | |||
| v2f64 dot1 = {0, 0}; | |||
| v2f64 dot2 = {0, 0}; | |||
| v2f64 dot3 = {0, 0}; | |||
| #else | |||
| v4f32 dot0 = {0, 0, 0, 0}; | |||
| v4f32 dot1 = {0, 0, 0, 0}; | |||
| v4f32 dot2 = {0, 0, 0, 0}; | |||
| v4f32 dot3 = {0, 0, 0, 0}; | |||
| #endif | |||
| if (n < 1) return (dot); | |||
| @@ -83,6 +92,61 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| x_pref += 32; | |||
| y_pref += 32; | |||
| #if defined(DSDOT) | |||
| /* Extend single precision to double precision */ | |||
| dvy0 = __msa_fexupr_d(vy0); | |||
| dvy1 = __msa_fexupr_d(vy1); | |||
| dvy2 = __msa_fexupr_d(vy2); | |||
| dvy3 = __msa_fexupr_d(vy3); | |||
| dvy4 = __msa_fexupr_d(vy4); | |||
| dvy5 = __msa_fexupr_d(vy5); | |||
| dvy6 = __msa_fexupr_d(vy6); | |||
| dvy7 = __msa_fexupr_d(vy7); | |||
| vy0 = (v4f32)__msa_fexupl_d(vy0); | |||
| vy1 = (v4f32)__msa_fexupl_d(vy1); | |||
| vy2 = (v4f32)__msa_fexupl_d(vy2); | |||
| vy3 = (v4f32)__msa_fexupl_d(vy3); | |||
| vy4 = (v4f32)__msa_fexupl_d(vy4); | |||
| vy5 = (v4f32)__msa_fexupl_d(vy5); | |||
| vy6 = (v4f32)__msa_fexupl_d(vy6); | |||
| vy7 = (v4f32)__msa_fexupl_d(vy7); | |||
| dvx0 = __msa_fexupr_d(vx0); | |||
| dvx1 = __msa_fexupr_d(vx1); | |||
| dvx2 = __msa_fexupr_d(vx2); | |||
| dvx3 = __msa_fexupr_d(vx3); | |||
| dvx4 = __msa_fexupr_d(vx4); | |||
| dvx5 = __msa_fexupr_d(vx5); | |||
| dvx6 = __msa_fexupr_d(vx6); | |||
| dvx7 = __msa_fexupr_d(vx7); | |||
| vx0 = (v4f32)__msa_fexupl_d(vx0); | |||
| vx1 = (v4f32)__msa_fexupl_d(vx1); | |||
| vx2 = (v4f32)__msa_fexupl_d(vx2); | |||
| vx3 = (v4f32)__msa_fexupl_d(vx3); | |||
| vx4 = (v4f32)__msa_fexupl_d(vx4); | |||
| vx5 = (v4f32)__msa_fexupl_d(vx5); | |||
| vx6 = (v4f32)__msa_fexupl_d(vx6); | |||
| vx7 = (v4f32)__msa_fexupl_d(vx7); | |||
| dot0 += (dvy0 * dvx0); | |||
| dot1 += (dvy1 * dvx1); | |||
| dot2 += (dvy2 * dvx2); | |||
| dot3 += (dvy3 * dvx3); | |||
| dot0 += (dvy4 * dvx4); | |||
| dot1 += (dvy5 * dvx5); | |||
| dot2 += (dvy6 * dvx6); | |||
| dot3 += (dvy7 * dvx7); | |||
| dot0 += ((v2f64)vy0 * (v2f64)vx0); | |||
| dot1 += ((v2f64)vy1 * (v2f64)vx1); | |||
| dot2 += ((v2f64)vy2 * (v2f64)vx2); | |||
| dot3 += ((v2f64)vy3 * (v2f64)vx3); | |||
| dot0 += ((v2f64)vy4 * (v2f64)vx4); | |||
| dot1 += ((v2f64)vy5 * (v2f64)vx5); | |||
| dot2 += ((v2f64)vy6 * (v2f64)vx6); | |||
| dot3 += ((v2f64)vy7 * (v2f64)vx7); | |||
| #else | |||
| dot0 += (vy0 * vx0); | |||
| dot1 += (vy1 * vx1); | |||
| dot2 += (vy2 * vx2); | |||
| @@ -91,6 +155,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| dot1 += (vy5 * vx5); | |||
| dot2 += (vy6 * vx6); | |||
| dot3 += (vy7 * vx7); | |||
| #endif | |||
| } | |||
| if (n & 31) | |||
| @@ -100,10 +165,41 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| LD_SP4_INC(x, 4, vx0, vx1, vx2, vx3); | |||
| LD_SP4_INC(y, 4, vy0, vy1, vy2, vy3); | |||
| #if defined(DSDOT) | |||
| dvy0 = __msa_fexupr_d(vy0); | |||
| dvy1 = __msa_fexupr_d(vy1); | |||
| dvy2 = __msa_fexupr_d(vy2); | |||
| dvy3 = __msa_fexupr_d(vy3); | |||
| vy0 = (v4f32)__msa_fexupl_d(vy0); | |||
| vy1 = (v4f32)__msa_fexupl_d(vy1); | |||
| vy2 = (v4f32)__msa_fexupl_d(vy2); | |||
| vy3 = (v4f32)__msa_fexupl_d(vy3); | |||
| dvx0 = __msa_fexupr_d(vx0); | |||
| dvx1 = __msa_fexupr_d(vx1); | |||
| dvx2 = __msa_fexupr_d(vx2); | |||
| dvx3 = __msa_fexupr_d(vx3); | |||
| vx0 = (v4f32)__msa_fexupl_d(vx0); | |||
| vx1 = (v4f32)__msa_fexupl_d(vx1); | |||
| vx2 = (v4f32)__msa_fexupl_d(vx2); | |||
| vx3 = (v4f32)__msa_fexupl_d(vx3); | |||
| dot0 += (dvy0 * dvx0); | |||
| dot1 += (dvy1 * dvx1); | |||
| dot2 += (dvy2 * dvx2); | |||
| dot3 += (dvy3 * dvx3); | |||
| dot0 += ((v2f64)vy0 * (v2f64)vx0); | |||
| dot1 += ((v2f64)vy1 * (v2f64)vx1); | |||
| dot2 += ((v2f64)vy2 * (v2f64)vx2); | |||
| dot3 += ((v2f64)vy3 * (v2f64)vx3); | |||
| #else | |||
| dot0 += (vy0 * vx0); | |||
| dot1 += (vy1 * vx1); | |||
| dot2 += (vy2 * vx2); | |||
| dot3 += (vy3 * vx3); | |||
| #endif | |||
| } | |||
| if (n & 8) | |||
| @@ -111,8 +207,27 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| LD_SP2_INC(x, 4, vx0, vx1); | |||
| LD_SP2_INC(y, 4, vy0, vy1); | |||
| #if defined(DSDOT) | |||
| dvy0 = __msa_fexupr_d(vy0); | |||
| dvy1 = __msa_fexupr_d(vy1); | |||
| vy0 = (v4f32)__msa_fexupl_d(vy0); | |||
| vy1 = (v4f32)__msa_fexupl_d(vy1); | |||
| dvx0 = __msa_fexupr_d(vx0); | |||
| dvx1 = __msa_fexupr_d(vx1); | |||
| vx0 = (v4f32)__msa_fexupl_d(vx0); | |||
| vx1 = (v4f32)__msa_fexupl_d(vx1); | |||
| dot0 += (dvy0 * dvx0); | |||
| dot1 += (dvy1 * dvx1); | |||
| dot0 += ((v2f64)vy0 * (v2f64)vx0); | |||
| dot1 += ((v2f64)vy1 * (v2f64)vx1); | |||
| #else | |||
| dot0 += (vy0 * vx0); | |||
| dot1 += (vy1 * vx1); | |||
| #endif | |||
| } | |||
| if (n & 4) | |||
| @@ -120,7 +235,16 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| vx0 = LD_SP(x); x += 4; | |||
| vy0 = LD_SP(y); y += 4; | |||
| #if defined(DSDOT) | |||
| dvy0 = __msa_fexupr_d(vy0); | |||
| vy0 = (v4f32)__msa_fexupl_d(vy0); | |||
| dvx0 = __msa_fexupr_d(vx0); | |||
| vx0 = (v4f32)__msa_fexupl_d(vx0); | |||
| dot0 += (dvy0 * dvx0); | |||
| dot0 += ((v2f64)vy0 * (v2f64)vx0); | |||
| #else | |||
| dot0 += (vy0 * vx0); | |||
| #endif | |||
| } | |||
| if (n & 2) | |||
| @@ -128,8 +252,13 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| LD_GP2_INC(x, 1, x0, x1); | |||
| LD_GP2_INC(y, 1, y0, y1); | |||
| #if defined(DSDOT) | |||
| dot += ((double)y0 * (double)x0); | |||
| dot += ((double)y1 * (double)x1); | |||
| #else | |||
| dot += (y0 * x0); | |||
| dot += (y1 * x1); | |||
| #endif | |||
| } | |||
| if (n & 1) | |||
| @@ -137,7 +266,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| x0 = *x; | |||
| y0 = *y; | |||
| #if defined(DSDOT) | |||
| dot += ((double)y0 * (double)x0); | |||
| #else | |||
| dot += (y0 * x0); | |||
| #endif | |||
| } | |||
| } | |||
| @@ -145,8 +278,10 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| dot += dot0[0]; | |||
| dot += dot0[1]; | |||
| #if !defined(DSDOT) | |||
| dot += dot0[2]; | |||
| dot += dot0[3]; | |||
| #endif | |||
| } | |||
| else | |||
| { | |||
| @@ -155,10 +290,17 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| LD_GP4_INC(x, inc_x, x0, x1, x2, x3); | |||
| LD_GP4_INC(y, inc_y, y0, y1, y2, y3); | |||
| #if defined(DSDOT) | |||
| dot += ((double)y0 * (double)x0); | |||
| dot += ((double)y1 * (double)x1); | |||
| dot += ((double)y2 * (double)x2); | |||
| dot += ((double)y3 * (double)x3); | |||
| #else | |||
| dot += (y0 * x0); | |||
| dot += (y1 * x1); | |||
| dot += (y2 * x2); | |||
| dot += (y3 * x3); | |||
| #endif | |||
| } | |||
| if (n & 2) | |||
| @@ -166,8 +308,13 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| LD_GP2_INC(x, inc_x, x0, x1); | |||
| LD_GP2_INC(y, inc_y, y0, y1); | |||
| #if defined(DSDOT) | |||
| dot += ((double)y0 * (double)x0); | |||
| dot += ((double)y1 * (double)x1); | |||
| #else | |||
| dot += (y0 * x0); | |||
| dot += (y1 * x1); | |||
| #endif | |||
| } | |||
| if (n & 1) | |||
| @@ -175,7 +322,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| x0 = *x; | |||
| y0 = *y; | |||
| #if defined(DSDOT) | |||
| dot += ((double)y0 * (double)x0); | |||
| #else | |||
| dot += (y0 * x0); | |||
| #endif | |||
| } | |||
| } | |||
| @@ -0,0 +1,160 @@ | |||
| SGEMM_BETA = ../generic/gemm_beta.c | |||
| DGEMM_BETA = ../generic/gemm_beta.c | |||
| CGEMM_BETA = ../generic/zgemm_beta.c | |||
| ZGEMM_BETA = ../generic/zgemm_beta.c | |||
| STRMMKERNEL = ../generic/trmmkernel_2x2.c | |||
| DTRMMKERNEL = ../generic/trmmkernel_2x2.c | |||
| CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
| ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
| SGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
| SGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
| SGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
| DGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
| DGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
| DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
| CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
| ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| #Pure C for other kernels | |||
| SAMAXKERNEL = ../mips/amax.c | |||
| DAMAXKERNEL = ../mips/amax.c | |||
| CAMAXKERNEL = ../mips/zamax.c | |||
| ZAMAXKERNEL = ../mips/zamax.c | |||
| SAMINKERNEL = ../mips/amin.c | |||
| DAMINKERNEL = ../mips/amin.c | |||
| CAMINKERNEL = ../mips/zamin.c | |||
| ZAMINKERNEL = ../mips/zamin.c | |||
| SMAXKERNEL = ../mips/max.c | |||
| DMAXKERNEL = ../mips/max.c | |||
| SMINKERNEL = ../mips/min.c | |||
| DMINKERNEL = ../mips/min.c | |||
| ISAMAXKERNEL = ../mips/iamax.c | |||
| IDAMAXKERNEL = ../mips/iamax.c | |||
| ICAMAXKERNEL = ../mips/izamax.c | |||
| IZAMAXKERNEL = ../mips/izamax.c | |||
| ISAMINKERNEL = ../mips/iamin.c | |||
| IDAMINKERNEL = ../mips/iamin.c | |||
| ICAMINKERNEL = ../mips/izamin.c | |||
| IZAMINKERNEL = ../mips/izamin.c | |||
| ISMAXKERNEL = ../mips/imax.c | |||
| IDMAXKERNEL = ../mips/imax.c | |||
| ISMINKERNEL = ../mips/imin.c | |||
| IDMINKERNEL = ../mips/imin.c | |||
| SASUMKERNEL = ../mips/asum.c | |||
| DASUMKERNEL = ../mips/asum.c | |||
| CASUMKERNEL = ../mips/zasum.c | |||
| ZASUMKERNEL = ../mips/zasum.c | |||
| SSUMKERNEL = ../mips/sum.c | |||
| DSUMKERNEL = ../mips/sum.c | |||
| CSUMKERNEL = ../mips/zsum.c | |||
| ZSUMKERNEL = ../mips/zsum.c | |||
| SAXPYKERNEL = ../mips/axpy.c | |||
| DAXPYKERNEL = ../mips/axpy.c | |||
| CAXPYKERNEL = ../mips/zaxpy.c | |||
| ZAXPYKERNEL = ../mips/zaxpy.c | |||
| SCOPYKERNEL = ../mips/copy.c | |||
| DCOPYKERNEL = ../mips/copy.c | |||
| CCOPYKERNEL = ../mips/zcopy.c | |||
| ZCOPYKERNEL = ../mips/zcopy.c | |||
| SDOTKERNEL = ../mips/dot.c | |||
| DDOTKERNEL = ../mips/dot.c | |||
| CDOTKERNEL = ../mips/zdot.c | |||
| ZDOTKERNEL = ../mips/zdot.c | |||
| SNRM2KERNEL = ../mips/nrm2.c | |||
| DNRM2KERNEL = ../mips/nrm2.c | |||
| CNRM2KERNEL = ../mips/znrm2.c | |||
| ZNRM2KERNEL = ../mips/znrm2.c | |||
| SROTKERNEL = ../mips/rot.c | |||
| DROTKERNEL = ../mips/rot.c | |||
| CROTKERNEL = ../mips/zrot.c | |||
| ZROTKERNEL = ../mips/zrot.c | |||
| SSCALKERNEL = ../mips/scal.c | |||
| DSCALKERNEL = ../mips/scal.c | |||
| CSCALKERNEL = ../mips/zscal.c | |||
| ZSCALKERNEL = ../mips/zscal.c | |||
| SSWAPKERNEL = ../mips/swap.c | |||
| DSWAPKERNEL = ../mips/swap.c | |||
| CSWAPKERNEL = ../mips/zswap.c | |||
| ZSWAPKERNEL = ../mips/zswap.c | |||
| SGEMVNKERNEL = ../mips/gemv_n.c | |||
| DGEMVNKERNEL = ../mips/gemv_n.c | |||
| CGEMVNKERNEL = ../mips/zgemv_n.c | |||
| ZGEMVNKERNEL = ../mips/zgemv_n.c | |||
| SGEMVTKERNEL = ../mips/gemv_t.c | |||
| DGEMVTKERNEL = ../mips/gemv_t.c | |||
| CGEMVTKERNEL = ../mips/zgemv_t.c | |||
| ZGEMVTKERNEL = ../mips/zgemv_t.c | |||
| SSYMV_U_KERNEL = ../generic/symv_k.c | |||
| SSYMV_L_KERNEL = ../generic/symv_k.c | |||
| DSYMV_U_KERNEL = ../generic/symv_k.c | |||
| DSYMV_L_KERNEL = ../generic/symv_k.c | |||
| QSYMV_U_KERNEL = ../generic/symv_k.c | |||
| QSYMV_L_KERNEL = ../generic/symv_k.c | |||
| CSYMV_U_KERNEL = ../generic/zsymv_k.c | |||
| CSYMV_L_KERNEL = ../generic/zsymv_k.c | |||
| ZSYMV_U_KERNEL = ../generic/zsymv_k.c | |||
| ZSYMV_L_KERNEL = ../generic/zsymv_k.c | |||
| XSYMV_U_KERNEL = ../generic/zsymv_k.c | |||
| XSYMV_L_KERNEL = ../generic/zsymv_k.c | |||
| ZHEMV_U_KERNEL = ../generic/zhemv_k.c | |||
| ZHEMV_L_KERNEL = ../generic/zhemv_k.c | |||
| CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | |||
| ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | |||
| @@ -90,7 +90,7 @@ | |||
| //Init INF | |||
| lui TEMP, 0x7FF0 | |||
| dsll TEMP, TEMP, 32 | |||
| MTC1 TEMP, INF | |||
| MTC TEMP, INF | |||
| LD a1, 0 * SIZE(X) | |||
| daddiu N, N, -1 | |||
| @@ -52,18 +52,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT * __restrict a, BLASLONG lda, FLOAT * __ | |||
| FLOAT ctemp05, ctemp06, ctemp07, ctemp08; | |||
| FLOAT ctemp09, ctemp10, ctemp11, ctemp12; | |||
| FLOAT ctemp13, ctemp14, ctemp15, ctemp16; | |||
| FLOAT ctemp17, ctemp18, ctemp19, ctemp20; | |||
| FLOAT ctemp21, ctemp22, ctemp23, ctemp24; | |||
| FLOAT ctemp25, ctemp26, ctemp27, ctemp28; | |||
| FLOAT ctemp29, ctemp30, ctemp31, ctemp32; | |||
| FLOAT ctemp33, ctemp34, ctemp35, ctemp36; | |||
| FLOAT ctemp37, ctemp38, ctemp39, ctemp40; | |||
| FLOAT ctemp41, ctemp42, ctemp43, ctemp44; | |||
| FLOAT ctemp45, ctemp46, ctemp47, ctemp48; | |||
| FLOAT ctemp49, ctemp50, ctemp51, ctemp52; | |||
| FLOAT ctemp53, ctemp54, ctemp55, ctemp56; | |||
| FLOAT ctemp57, ctemp58, ctemp59, ctemp60; | |||
| FLOAT ctemp61, ctemp62, ctemp63, ctemp64; | |||
| FLOAT ctemp17 /*, ctemp18, ctemp19, ctemp20*/ ; | |||
| FLOAT /*ctemp21, ctemp22,*/ ctemp23, ctemp24; | |||
| FLOAT ctemp25 /*, ctemp26, ctemp27, ctemp28*/ ; | |||
| FLOAT /*ctemp29, ctemp30,*/ ctemp31, ctemp32; | |||
| FLOAT ctemp33 /*, ctemp34, ctemp35, ctemp36*/ ; | |||
| FLOAT /*ctemp37, ctemp38,*/ ctemp39, ctemp40; | |||
| FLOAT ctemp41 /*, ctemp42, ctemp43, ctemp44*/ ; | |||
| FLOAT /*ctemp45, ctemp46,*/ ctemp47, ctemp48; | |||
| FLOAT ctemp49 /*, ctemp50, ctemp51, ctemp52*/ ; | |||
| FLOAT /*ctemp53, ctemp54,*/ ctemp55, ctemp56; | |||
| FLOAT ctemp57 /*, ctemp58, ctemp59, ctemp60*/ ; | |||
| FLOAT /*ctemp61, ctemp62,*/ ctemp63, ctemp64; | |||
| aoffset = a; | |||
| @@ -142,7 +142,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| ,"xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15");\ | |||
| } | |||
| int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb){ | |||
| float *src, *dst, *dst_tmp, *src_base, *dst_base; | |||
| float *src, *dst, *dst_tmp=0, *src_base, *dst_base; | |||
| uint64_t src_ld_bytes = (uint64_t)lda * sizeof(float), dst_ld_bytes = (uint64_t)ldb * sizeof(float), num_rows = 0; | |||
| BLASLONG cols_left, rows_done; float ALPHA = alpha; | |||
| if(ALPHA==0.0){ | |||
| @@ -796,10 +796,10 @@ L10: | |||
| temp = log((real) (*n)) / log(2.f); | |||
| lgn = (integer) temp; | |||
| if (pow_ii(&c__2, &lgn) < *n) { | |||
| if (pow_ii(c__2, lgn) < *n) { | |||
| ++lgn; | |||
| } | |||
| if (pow_ii(&c__2, &lgn) < *n) { | |||
| if (pow_ii(c__2, lgn) < *n) { | |||
| ++lgn; | |||
| } | |||
| iprmpt = indxq + *n + 1; | |||
| @@ -864,11 +864,11 @@ f"> */ | |||
| /* Form the z-vector which consists of the last row of Q_1 and the */ | |||
| /* first row of Q_2. */ | |||
| ptr = pow_ii(&c__2, tlvls) + 1; | |||
| ptr = pow_ii(c__2, *tlvls) + 1; | |||
| i__1 = *curlvl - 1; | |||
| for (i__ = 1; i__ <= i__1; ++i__) { | |||
| i__2 = *tlvls - i__; | |||
| ptr += pow_ii(&c__2, &i__2); | |||
| ptr += pow_ii(c__2, i__2); | |||
| /* L10: */ | |||
| } | |||
| curr = ptr + *curpbm; | |||
| @@ -1051,7 +1051,7 @@ f"> */ | |||
| /* Finally go through the left singular vector matrices of all */ | |||
| /* the other subproblems bottom-up on the tree. */ | |||
| j = pow_ii(&c__2, &nlvl); | |||
| j = pow_ii(c__2, nlvl); | |||
| sqre = 0; | |||
| for (lvl = nlvl; lvl >= 1; --lvl) { | |||
| @@ -1065,7 +1065,7 @@ f"> */ | |||
| ll = 1; | |||
| } else { | |||
| i__1 = lvl - 1; | |||
| lf = pow_ii(&c__2, &i__1); | |||
| lf = pow_ii(c__2, i__1); | |||
| ll = (lf << 1) - 1; | |||
| } | |||
| i__1 = ll; | |||
| @@ -1110,7 +1110,7 @@ L170: | |||
| ll = 1; | |||
| } else { | |||
| i__2 = lvl - 1; | |||
| lf = pow_ii(&c__2, &i__2); | |||
| lf = pow_ii(c__2, i__2); | |||
| ll = (lf << 1) - 1; | |||
| } | |||
| i__2 = lf; | |||
| @@ -836,10 +836,10 @@ f"> */ | |||
| lrwmin = *n - 1 << 1; | |||
| } else if (icompz == 1) { | |||
| lgn = (integer) (log((real) (*n)) / log(2.f)); | |||
| if (pow_ii(&c__2, &lgn) < *n) { | |||
| if (pow_ii(c__2, lgn) < *n) { | |||
| ++lgn; | |||
| } | |||
| if (pow_ii(&c__2, &lgn) < *n) { | |||
| if (pow_ii(c__2, lgn) < *n) { | |||
| ++lgn; | |||
| } | |||
| lwmin = *n * *n; | |||
| @@ -827,10 +827,10 @@ L10: | |||
| temp = log((doublereal) (*n)) / log(2.); | |||
| lgn = (integer) temp; | |||
| if (pow_ii(&c__2, &lgn) < *n) { | |||
| if (pow_ii(c__2, lgn) < *n) { | |||
| ++lgn; | |||
| } | |||
| if (pow_ii(&c__2, &lgn) < *n) { | |||
| if (pow_ii(c__2, lgn) < *n) { | |||
| ++lgn; | |||
| } | |||
| iprmpt = indxq + *n + 1; | |||
| @@ -885,11 +885,11 @@ f"> */ | |||
| /* Form the z-vector which consists of the last row of Q_1 and the */ | |||
| /* first row of Q_2. */ | |||
| ptr = pow_ii(&c__2, tlvls) + 1; | |||
| ptr = pow_ii(c__2, *tlvls) + 1; | |||
| i__1 = *curlvl - 1; | |||
| for (i__ = 1; i__ <= i__1; ++i__) { | |||
| i__2 = *tlvls - i__; | |||
| ptr += pow_ii(&c__2, &i__2); | |||
| ptr += pow_ii(c__2, i__2); | |||
| /* L10: */ | |||
| } | |||
| curr = ptr + *curpbm; | |||
| @@ -754,7 +754,7 @@ f"> */ | |||
| /* scheme */ | |||
| i__1 = *curlvl - 1; | |||
| curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1; | |||
| curr = ptr + *curpbm * pow_ii(c__2, *curlvl) + pow_ii(c__2, i__1) - 1; | |||
| /* Determine size of these matrices. We add HALF to the value of */ | |||
| /* the SQRT in case the machine underestimates one of these square */ | |||
| @@ -781,12 +781,12 @@ f"> */ | |||
| /* rotations and permutation and then multiplying the center matrices */ | |||
| /* against the current Z. */ | |||
| ptr = pow_ii(&c__2, tlvls) + 1; | |||
| ptr = pow_ii(c__2, *tlvls) + 1; | |||
| i__1 = *curlvl - 1; | |||
| for (k = 1; k <= i__1; ++k) { | |||
| i__2 = *curlvl - k; | |||
| i__3 = *curlvl - k - 1; | |||
| curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) - | |||
| curr = ptr + *curpbm * pow_ii(c__2, i__2) + pow_ii(c__2, i__3) - | |||
| 1; | |||
| psiz1 = prmptr[curr + 1] - prmptr[curr]; | |||
| psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; | |||
| @@ -847,7 +847,7 @@ f"> */ | |||
| c__1); | |||
| i__2 = *tlvls - k; | |||
| ptr += pow_ii(&c__2, &i__2); | |||
| ptr += pow_ii(c__2, i__2); | |||
| /* L70: */ | |||
| } | |||
| @@ -951,7 +951,7 @@ f"> */ | |||
| /* Finally go through the left singular vector matrices of all */ | |||
| /* the other subproblems bottom-up on the tree. */ | |||
| j = pow_ii(&c__2, &nlvl); | |||
| j = pow_ii(c__2, nlvl); | |||
| sqre = 0; | |||
| for (lvl = nlvl; lvl >= 1; --lvl) { | |||
| @@ -965,7 +965,7 @@ f"> */ | |||
| ll = 1; | |||
| } else { | |||
| i__1 = lvl - 1; | |||
| lf = pow_ii(&c__2, &i__1); | |||
| lf = pow_ii(c__2, i__1); | |||
| ll = (lf << 1) - 1; | |||
| } | |||
| i__1 = ll; | |||
| @@ -1010,7 +1010,7 @@ L50: | |||
| ll = 1; | |||
| } else { | |||
| i__2 = lvl - 1; | |||
| lf = pow_ii(&c__2, &i__2); | |||
| lf = pow_ii(c__2, i__2); | |||
| ll = (lf << 1) - 1; | |||
| } | |||
| i__2 = lf; | |||
| @@ -824,7 +824,7 @@ f"> */ | |||
| ll = 1; | |||
| } else { | |||
| i__1 = lvl - 1; | |||
| lf = pow_ii(&c__2, &i__1); | |||
| lf = pow_ii(c__2, i__1); | |||
| ll = (lf << 1) - 1; | |||
| } | |||
| i__1 = ll; | |||
| @@ -1027,7 +1027,7 @@ f"> */ | |||
| /* Now conquer each subproblem bottom-up. */ | |||
| j = pow_ii(&c__2, &nlvl); | |||
| j = pow_ii(c__2, nlvl); | |||
| for (lvl = nlvl; lvl >= 1; --lvl) { | |||
| lvl2 = (lvl << 1) - 1; | |||
| @@ -1039,7 +1039,7 @@ f"> */ | |||
| ll = 1; | |||
| } else { | |||
| i__1 = lvl - 1; | |||
| lf = pow_ii(&c__2, &i__1); | |||
| lf = pow_ii(c__2, i__1); | |||
| ll = (lf << 1) - 1; | |||
| } | |||
| i__1 = ll; | |||
| @@ -806,10 +806,10 @@ f"> */ | |||
| lwmin = *n - 1 << 1; | |||
| } else { | |||
| lgn = (integer) (log((doublereal) (*n)) / log(2.)); | |||
| if (pow_ii(&c__2, &lgn) < *n) { | |||
| if (pow_ii(c__2, lgn) < *n) { | |||
| ++lgn; | |||
| } | |||
| if (pow_ii(&c__2, &lgn) < *n) { | |||
| if (pow_ii(c__2, lgn) < *n) { | |||
| ++lgn; | |||
| } | |||
| if (icompz == 1) { | |||
| @@ -823,10 +823,10 @@ L10: | |||
| temp = log((real) (*n)) / log(2.f); | |||
| lgn = (integer) temp; | |||
| if (pow_ii(&c__2, &lgn) < *n) { | |||
| if (pow_ii(c__2, lgn) < *n) { | |||
| ++lgn; | |||
| } | |||
| if (pow_ii(&c__2, &lgn) < *n) { | |||
| if (pow_ii(c__2, lgn) < *n) { | |||
| ++lgn; | |||
| } | |||
| iprmpt = indxq + *n + 1; | |||
| @@ -883,11 +883,11 @@ f"> */ | |||
| /* Form the z-vector which consists of the last row of Q_1 and the */ | |||
| /* first row of Q_2. */ | |||
| ptr = pow_ii(&c__2, tlvls) + 1; | |||
| ptr = pow_ii(c__2, *tlvls) + 1; | |||
| i__1 = *curlvl - 1; | |||
| for (i__ = 1; i__ <= i__1; ++i__) { | |||
| i__2 = *tlvls - i__; | |||
| ptr += pow_ii(&c__2, &i__2); | |||
| ptr += pow_ii(c__2, i__2); | |||
| /* L10: */ | |||
| } | |||
| curr = ptr + *curpbm; | |||
| @@ -753,7 +753,7 @@ f"> */ | |||
| /* scheme */ | |||
| i__1 = *curlvl - 1; | |||
| curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1; | |||
| curr = ptr + *curpbm * pow_ii(c__2, *curlvl) + pow_ii(c__2, i__1) - 1; | |||
| /* Determine size of these matrices. We add HALF to the value of */ | |||
| /* the SQRT in case the machine underestimates one of these square */ | |||
| @@ -779,12 +779,12 @@ f"> */ | |||
| /* rotations and permutation and then multiplying the center matrices */ | |||
| /* against the current Z. */ | |||
| ptr = pow_ii(&c__2, tlvls) + 1; | |||
| ptr = pow_ii(c__2, *tlvls) + 1; | |||
| i__1 = *curlvl - 1; | |||
| for (k = 1; k <= i__1; ++k) { | |||
| i__2 = *curlvl - k; | |||
| i__3 = *curlvl - k - 1; | |||
| curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) - | |||
| curr = ptr + *curpbm * pow_ii(c__2, i__2) + pow_ii(c__2, i__3) - | |||
| 1; | |||
| psiz1 = prmptr[curr + 1] - prmptr[curr]; | |||
| psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; | |||
| @@ -844,7 +844,7 @@ f"> */ | |||
| c__1); | |||
| i__2 = *tlvls - k; | |||
| ptr += pow_ii(&c__2, &i__2); | |||
| ptr += pow_ii(c__2, i__2); | |||
| /* L70: */ | |||
| } | |||
| @@ -946,7 +946,7 @@ f"> */ | |||
| /* Finally go through the left singular vector matrices of all */ | |||
| /* the other subproblems bottom-up on the tree. */ | |||
| j = pow_ii(&c__2, &nlvl); | |||
| j = pow_ii(c__2, nlvl); | |||
| sqre = 0; | |||
| for (lvl = nlvl; lvl >= 1; --lvl) { | |||
| @@ -960,7 +960,7 @@ f"> */ | |||
| ll = 1; | |||
| } else { | |||
| i__1 = lvl - 1; | |||
| lf = pow_ii(&c__2, &i__1); | |||
| lf = pow_ii(c__2, i__1); | |||
| ll = (lf << 1) - 1; | |||
| } | |||
| i__1 = ll; | |||
| @@ -1005,7 +1005,7 @@ L50: | |||
| ll = 1; | |||
| } else { | |||
| i__2 = lvl - 1; | |||
| lf = pow_ii(&c__2, &i__2); | |||
| lf = pow_ii(c__2, i__2); | |||
| ll = (lf << 1) - 1; | |||
| } | |||
| i__2 = lf; | |||
| @@ -821,7 +821,7 @@ f"> */ | |||
| ll = 1; | |||
| } else { | |||
| i__1 = lvl - 1; | |||
| lf = pow_ii(&c__2, &i__1); | |||
| lf = pow_ii(c__2, i__1); | |||
| ll = (lf << 1) - 1; | |||
| } | |||
| i__1 = ll; | |||
| @@ -1023,7 +1023,7 @@ f"> */ | |||
| /* Now conquer each subproblem bottom-up. */ | |||
| j = pow_ii(&c__2, &nlvl); | |||
| j = pow_ii(c__2, nlvl); | |||
| for (lvl = nlvl; lvl >= 1; --lvl) { | |||
| lvl2 = (lvl << 1) - 1; | |||
| @@ -1035,7 +1035,7 @@ f"> */ | |||
| ll = 1; | |||
| } else { | |||
| i__1 = lvl - 1; | |||
| lf = pow_ii(&c__2, &i__1); | |||
| lf = pow_ii(c__2, i__1); | |||
| ll = (lf << 1) - 1; | |||
| } | |||
| i__1 = ll; | |||
| @@ -804,10 +804,10 @@ f"> */ | |||
| lwmin = *n - 1 << 1; | |||
| } else { | |||
| lgn = (integer) (log((real) (*n)) / log(2.f)); | |||
| if (pow_ii(&c__2, &lgn) < *n) { | |||
| if (pow_ii(c__2, lgn) < *n) { | |||
| ++lgn; | |||
| } | |||
| if (pow_ii(&c__2, &lgn) < *n) { | |||
| if (pow_ii(c__2, lgn) < *n) { | |||
| ++lgn; | |||
| } | |||
| if (icompz == 1) { | |||
| @@ -793,10 +793,10 @@ L10: | |||
| temp = log((doublereal) (*n)) / log(2.); | |||
| lgn = (integer) temp; | |||
| if (pow_ii(&c__2, &lgn) < *n) { | |||
| if (pow_ii(c__2, lgn) < *n) { | |||
| ++lgn; | |||
| } | |||
| if (pow_ii(&c__2, &lgn) < *n) { | |||
| if (pow_ii(c__2, lgn) < *n) { | |||
| ++lgn; | |||
| } | |||
| iprmpt = indxq + *n + 1; | |||
| @@ -864,11 +864,11 @@ f"> */ | |||
| /* Form the z-vector which consists of the last row of Q_1 and the */ | |||
| /* first row of Q_2. */ | |||
| ptr = pow_ii(&c__2, tlvls) + 1; | |||
| ptr = pow_ii(c__2, *tlvls) + 1; | |||
| i__1 = *curlvl - 1; | |||
| for (i__ = 1; i__ <= i__1; ++i__) { | |||
| i__2 = *tlvls - i__; | |||
| ptr += pow_ii(&c__2, &i__2); | |||
| ptr += pow_ii(c__2, i__2); | |||
| /* L10: */ | |||
| } | |||
| curr = ptr + *curpbm; | |||
| @@ -1051,7 +1051,7 @@ f"> */ | |||
| /* Finally go through the left singular vector matrices of all */ | |||
| /* the other subproblems bottom-up on the tree. */ | |||
| j = pow_ii(&c__2, &nlvl); | |||
| j = pow_ii(c__2, nlvl); | |||
| sqre = 0; | |||
| for (lvl = nlvl; lvl >= 1; --lvl) { | |||
| @@ -1065,7 +1065,7 @@ f"> */ | |||
| ll = 1; | |||
| } else { | |||
| i__1 = lvl - 1; | |||
| lf = pow_ii(&c__2, &i__1); | |||
| lf = pow_ii(c__2, i__1); | |||
| ll = (lf << 1) - 1; | |||
| } | |||
| i__1 = ll; | |||
| @@ -1110,7 +1110,7 @@ L170: | |||
| ll = 1; | |||
| } else { | |||
| i__2 = lvl - 1; | |||
| lf = pow_ii(&c__2, &i__2); | |||
| lf = pow_ii(c__2, i__2); | |||
| ll = (lf << 1) - 1; | |||
| } | |||
| i__2 = lf; | |||
| @@ -836,10 +836,10 @@ f"> */ | |||
| lrwmin = *n - 1 << 1; | |||
| } else if (icompz == 1) { | |||
| lgn = (integer) (log((doublereal) (*n)) / log(2.)); | |||
| if (pow_ii(&c__2, &lgn) < *n) { | |||
| if (pow_ii(c__2, lgn) < *n) { | |||
| ++lgn; | |||
| } | |||
| if (pow_ii(&c__2, &lgn) < *n) { | |||
| if (pow_ii(c__2, lgn) < *n) { | |||
| ++lgn; | |||
| } | |||
| lwmin = *n * *n; | |||