From 7c8ea130a35400ee15320b4747d52ff1c8349773 Mon Sep 17 00:00:00 2001 From: steppi Date: Tue, 18 Jul 2023 14:36:03 -0400 Subject: [PATCH 001/125] Set up cirun workflow for arm64 graviton --- .cirun.yml | 16 ++++ .github/workflows/arm64_graviton.yml | 126 +++++++++++++++++++++++++++ 2 files changed, 142 insertions(+) create mode 100644 .cirun.yml create mode 100644 .github/workflows/arm64_graviton.yml diff --git a/.cirun.yml b/.cirun.yml new file mode 100644 index 000000000..f0e0149d3 --- /dev/null +++ b/.cirun.yml @@ -0,0 +1,16 @@ +# Self-Hosted Github Action Runners on AWS via Cirun.io +# Reference: https://docs.cirun.io/Reference/yml.html +runners: + - name: "aws-runner-graviton" + # Cloud Provider: AWS + cloud: "aws" + region: "us-east-1" + # Cheapest VM on AWS + instance_type: "c7g.large" + # Ubuntu-22.04, ami image + machine_image: "ami-0a0c8eebcdd6dcbd0" + preemptible: false + # Add this label in the "runs-on" param in .github/workflows/.yml + # So that this runner is created for running the workflow + labels: + - "cirun-aws-runner-graviton" diff --git a/.github/workflows/arm64_graviton.yml b/.github/workflows/arm64_graviton.yml new file mode 100644 index 000000000..bcb05047c --- /dev/null +++ b/.github/workflows/arm64_graviton.yml @@ -0,0 +1,126 @@ +name: arm64 graviton cirun + +on: [push, pull_request] + +permissions: + contents: read # to fetch code (actions/checkout) + +jobs: + build: + runs-on: "cirun-aws-runner-graviton--${{ github.run_id }}" + + strategy: + fail-fast: false + matrix: + fortran: [gfortran] + build: [cmake, make] + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Print system information + run: | + if [ "$RUNNER_OS" == "Linux" ]; then + cat /proc/cpuinfo + else + echo "::error::$RUNNER_OS not supported" + exit 1 + fi + + - name: Install Dependencies + run: | + if [ "$RUNNER_OS" == "Linux" ]; then + sudo apt update + sudo apt-get install -y gfortran cmake ccache libtinfo5 + else + echo "::error::$RUNNER_OS not supported" + exit 1 + fi + + - name: Compilation cache + uses: actions/cache@v3 + with: + path: ~/.ccache + # We include the commit sha in the cache key, as new cache entries are + # only created if there is no existing entry for the key yet. + # GNU make and cmake call the compilers differently. It looks like + # that causes the cache to mismatch. Keep the ccache for both build + # tools separate to avoid polluting each other. + key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }} + # Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler. + restore-keys: | + ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }} + ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }} + ccache-${{ runner.os }}-${{ matrix.build }} + + - name: Configure ccache + run: | + if [ "${{ matrix.build }}" = "make" ]; then + # Add ccache to path + if [ "$RUNNER_OS" = "Linux" ]; then + echo "/usr/lib/ccache" >> $GITHUB_PATH + else + echo "::error::$RUNNER_OS not supported" + exit 1 + fi + fi + # Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB). + test -d ~/.ccache || mkdir -p ~/.ccache + echo "max_size = 300M" > ~/.ccache/ccache.conf + echo "compression = true" >> ~/.ccache/ccache.conf + ccache -s + + - name: Build OpenBLAS + run: | + case "${{ matrix.build }}" in + "make") + make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}" + ;; + "cmake") + mkdir build && cd build + cmake -DDYNAMIC_ARCH=1 \ + -DNOFORTRAN=0 \ + -DBUILD_WITHOUT_LAPACK=0 \ + -DCMAKE_VERBOSE_MAKEFILE=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \ + .. + cmake --build . + ;; + *) + echo "::error::Configuration not supported" + exit 1 + ;; + esac + + - name: Show ccache status + continue-on-error: true + run: ccache -s + + - name: Run tests + timeout-minutes: 60 + run: | + case "${{ matrix.build }}" in + "make") + MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0' + echo "::group::Tests in 'test' directory" + make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" + echo "::endgroup::" + echo "::group::Tests in 'ctest' directory" + make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" + echo "::endgroup::" + echo "::group::Tests in 'utest' directory" + make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" + echo "::endgroup::" + ;; + "cmake") + cd build && ctest + ;; + *) + echo "::error::Configuration not supported" + exit 1 + ;; + esac From 76aa6bac4df3014acaad26390e6c7e3085d25806 Mon Sep 17 00:00:00 2001 From: steppi Date: Wed, 26 Jul 2023 12:01:12 -0400 Subject: [PATCH 004/125] Fix cirun url [skip actions] --- .cirun.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cirun.yml b/.cirun.yml index f0e0149d3..bfc6494d0 100644 --- a/.cirun.yml +++ b/.cirun.yml @@ -1,5 +1,5 @@ # Self-Hosted Github Action Runners on AWS via Cirun.io -# Reference: https://docs.cirun.io/Reference/yml.html +# Reference: https://docs.cirun.io/reference/yaml runners: - name: "aws-runner-graviton" # Cloud Provider: AWS From f2cf9293744c4a17e04cc1690a419ef641fa4c21 Mon Sep 17 00:00:00 2001 From: gxw Date: Thu, 31 Aug 2023 16:59:37 +0800 Subject: [PATCH 005/125] LoongArch64: Add sgemv kernel --- kernel/loongarch64/KERNEL.LOONGSON3R5 | 3 + kernel/loongarch64/sgemv_n_8_lasx.S | 463 ++++++++++++++++++++++++++ kernel/loongarch64/sgemv_t_8_lasx.S | 405 ++++++++++++++++++++++ 3 files changed, 871 insertions(+) create mode 100644 kernel/loongarch64/sgemv_n_8_lasx.S create mode 100644 kernel/loongarch64/sgemv_t_8_lasx.S diff --git a/kernel/loongarch64/KERNEL.LOONGSON3R5 b/kernel/loongarch64/KERNEL.LOONGSON3R5 index 67d1fd11c..c23c2fac5 100644 --- a/kernel/loongarch64/KERNEL.LOONGSON3R5 +++ b/kernel/loongarch64/KERNEL.LOONGSON3R5 @@ -21,6 +21,9 @@ SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) + +SGEMVNKERNEL = sgemv_n_8_lasx.S +SGEMVTKERNEL = sgemv_t_8_lasx.S endif DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c diff --git a/kernel/loongarch64/sgemv_n_8_lasx.S b/kernel/loongarch64/sgemv_n_8_lasx.S new file mode 100644 index 000000000..da172ca50 --- /dev/null +++ b/kernel/loongarch64/sgemv_n_8_lasx.S @@ -0,0 +1,463 @@ +/******************************************************************************* +Copyright (c) 2023, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ +#define ASSEMBLER + +#include "common.h" +#include "loongarch64_asm.S" + +/********************************************************************* +* 2023/08/30 guxiwei +* UTEST : OK +* CTEST : OK +* TEST : OK +* +* +*********************************************************************/ + +/* int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, + * FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) + */ +#define M $r4 +#define N $r5 +#define ALPHA $f0 +#define A $r7 +#define LDA $r8 +#define X $r9 +#define INC_X $r10 +#define Y $r11 +#define INC_Y $r6 + +#define J $r12 +#define I $r13 +#define K $r14 +#define Y_ORG $r15 +#define OFFSET $r16 +#define K_LDA $r17 +#define M4 $r18 +#define T0 $r19 +#define PA0 $r20 +#define PA1 $r23 +#define PA2 $r24 +#define PA3 $r25 +#define PA4 $r26 +#define PA5 $r27 +#define PA6 $r28 +#define PA7 $r29 + +#define VALPHA $xr1 +#define X0 $xr2 +#define X1 $xr3 +#define X2 $xr4 +#define X3 $xr5 +#define X4 $xr6 +#define X5 $xr7 +#define X6 $xr8 +#define X7 $xr9 +#define Y0 $xr10 +#define A0 $xr11 +#define A1 $xr12 +#define A2 $xr13 +#define A3 $xr14 +#define A4 $xr15 +#define A5 $xr16 +#define A6 $xr17 +#define A7 $xr18 + +#define X0_F $f2 +#define X1_F $f3 +#define X2_F $f4 +#define X3_F $f5 +#define X4_F $f6 +#define X5_F $f7 +#define X6_F $f8 +#define X7_F $f9 +#define Y0_F $f10 +#define A0_F $f11 +#define A1_F $f12 +#define A2_F $f13 +#define A3_F $f14 +#define A4_F $f15 +#define A5_F $f16 +#define A6_F $f17 +#define A7_F $f18 + +.macro SLOAD_X_8 + GLDREPL xv, w, X0, X, 0x00, X1, X, 0x04, X2, X, 0x08, X3, X, 0x0C, \ + X4, X, 0x10, X5, X, 0x14, X6, X, 0x18, X7, X, 0x1C + GMUL xvf, s, X0, X0, VALPHA, X1, X1, VALPHA, X2, X2, VALPHA, X3, X3, VALPHA, \ + X4, X4, VALPHA, X5, X5, VALPHA, X6, X6, VALPHA, X7, X7, VALPHA +.endm + +.macro SLOAD_X_8_GAP + xvldrepl.w X0, X, 0x00 + PTR_ADD T0, X, INC_X + xvldrepl.w X1, T0, 0x00 + PTR_ADD T0, T0, INC_X + xvldrepl.w X2, T0, 0x00 + PTR_ADD T0, T0, INC_X + xvldrepl.w X3, T0, 0x00 + PTR_ADD T0, T0, INC_X + xvldrepl.w X4, T0, 0x00 + PTR_ADD T0, T0, INC_X + xvldrepl.w X5, T0, 0x00 + PTR_ADD T0, T0, INC_X + xvldrepl.w X6, T0, 0x00 + PTR_ADD T0, T0, INC_X + xvldrepl.w X7, T0, 0x00 + GMUL xvf, s, X0, X0, VALPHA, X1, X1, VALPHA, X2, X2, VALPHA, X3, X3, VALPHA, \ + X4, X4, VALPHA, X5, X5, VALPHA, X6, X6, VALPHA, X7, X7, VALPHA +.endm + +.macro SLOAD_X_4 + GLDREPL xv, w, X0, X, 0x00, X1, X, 0x04, X2, X, 0x08, X3, X, 0x0C + GMUL xvf, s, X0, X0, VALPHA, X1, X1, VALPHA, X2, X2, VALPHA, X3, X3, VALPHA +.endm + +.macro SLOAD_X_4_GAP + xvldrepl.w X0, X, 0x00 + PTR_ADD T0, X, INC_X + xvldrepl.w X1, T0, 0x00 + PTR_ADD T0, T0, INC_X + xvldrepl.w X2, T0, 0x00 + PTR_ADD T0, T0, INC_X + xvldrepl.w X3, T0, 0x00 + GMUL xvf, s, X0, X0, VALPHA, X1, X1, VALPHA, X2, X2, VALPHA, X3, X3, VALPHA +.endm + +.macro SLOAD_X_2 + GLDREPL xv, w, X0, X, 0x00, X1, X, 0x04 + GMUL xvf, s, X0, X0, VALPHA, X1, X1, VALPHA +.endm + +.macro SLOAD_X_2_GAP + xvldrepl.w X0, X, 0x00 + PTR_ADD T0, X, INC_X + xvldrepl.w X1, T0, 0x00 + GMUL xvf, s, X0, X0, VALPHA, X1, X1, VALPHA +.endm + +.macro SLOAD_X_1 + GLDREPL xv, w, X0, X, 0x00 + GMUL xvf, s, X0, X0, VALPHA +.endm + +.macro SLOAD_Y_8 + GLD xv, , Y0, Y, 0 +.endm + +.macro SLOAD_Y_8_GAP + fld.s Y0_F, Y, 0 + fldx.s A0_F, Y, INC_Y + PTR_ALSL T0, INC_Y, Y, 1 + fld.s A1_F, T0, 0 + fldx.s A2_F, T0, INC_Y + PTR_ALSL T0, INC_Y, Y, 2 + fld.s A3_F, T0, 0 + fldx.s A4_F, T0, INC_Y + PTR_ADD T0, T0, INC_Y + PTR_ADD T0, T0, INC_Y + fld.s A5_F, T0, 0 + fldx.s A6_F, T0, INC_Y + GINSVE0 xv, w, Y0, A0, 1, Y0, A1, 2, Y0, A2, 3, Y0, A3, 4, \ + Y0, A4, 5, Y0, A5, 6, Y0, A6, 7 +.endm + +.macro SLOAD_Y_1 + GLD f, s, Y0_F, Y, 0 +.endm + +.macro SGEMV_N_8x8 + GLD_INC xv, , 0x20, \ + A0, PA0, 0, A1, PA1, 0, \ + A2, PA2, 0, A3, PA3, 0, \ + A4, PA4, 0, A5, PA5, 0, \ + A6, PA6, 0, A7, PA7, 0 + GMADD xvf, s, Y0, A0, X0, Y0, Y0, A1, X1, Y0, \ + Y0, A2, X2, Y0, Y0, A3, X3, Y0, \ + Y0, A4, X4, Y0, Y0, A5, X5, Y0, \ + Y0, A6, X6, Y0, Y0, A7, X7, Y0 +.endm + +.macro SGEMV_N_1x8 + GLD_INC f, s, 0x04, \ + A0_F, PA0, 0, A1_F, PA1, 0, \ + A2_F, PA2, 0, A3_F, PA3, 0, \ + A4_F, PA4, 0, A5_F, PA5, 0, \ + A6_F, PA6, 0, A7_F, PA7, 0 + GMADD f, s, Y0_F, A0_F, X0_F, Y0_F, Y0_F, A1_F, X1_F, Y0_F, \ + Y0_F, A2_F, X2_F, Y0_F, Y0_F, A3_F, X3_F, Y0_F, \ + Y0_F, A4_F, X4_F, Y0_F, Y0_F, A5_F, X5_F, Y0_F, \ + Y0_F, A6_F, X6_F, Y0_F, Y0_F, A7_F, X7_F, Y0_F +.endm + +.macro SGEMV_N_8x4 + GLD_INC xv, , 0x20, \ + A0, PA0, 0, A1, PA1, 0, \ + A2, PA2, 0, A3, PA3, 0 + GMADD xvf, s, Y0, A0, X0, Y0, Y0, A1, X1, Y0, \ + Y0, A2, X2, Y0, Y0, A3, X3, Y0 +.endm + +.macro SGEMV_N_1x4 + GLD_INC f, s, 0x04, \ + A0_F, PA0, 0, A1_F, PA1, 0, \ + A2_F, PA2, 0, A3_F, PA3, 0 + GMADD f, s, Y0_F, A0_F, X0_F, Y0_F, Y0_F, A1_F, X1_F, Y0_F, \ + Y0_F, A2_F, X2_F, Y0_F, Y0_F, A3_F, X3_F, Y0_F +.endm + +.macro SGEMV_N_8x2 + GLD_INC xv, , 0x20, \ + A0, PA0, 0, A1, PA1, 0 + GMADD xvf, s, Y0, A0, X0, Y0, Y0, A1, X1, Y0 +.endm + +.macro SGEMV_N_1x2 + GLD_INC f, s, 0x04, \ + A0_F, PA0, 0, A1_F, PA1, 0 + GMADD f, s, Y0_F, A0_F, X0_F, Y0_F, Y0_F, A1_F, X1_F, Y0_F +.endm + +.macro SGEMV_N_1x1 + GLD_INC f, s, 0x04, A0_F, PA0, 0 + GMADD f, s, Y0_F, A0_F, X0_F, Y0_F +.endm + +.macro SSTORE_Y_8 + GST xv, , Y0, Y, 0 +.endm + +.macro SSTORE_Y_8_GAP + xvstelm.w Y0, Y, 0, 0 + PTR_ADD T0, Y, INC_Y + xvstelm.w Y0, T0, 0, 1 + PTR_ADD T0, T0, INC_Y + xvstelm.w Y0, T0, 0, 2 + PTR_ADD T0, T0, INC_Y + xvstelm.w Y0, T0, 0, 3 + + PTR_ADD T0, T0, INC_Y + xvstelm.w Y0, T0, 0, 4 + PTR_ADD T0, T0, INC_Y + xvstelm.w Y0, T0, 0, 5 + PTR_ADD T0, T0, INC_Y + xvstelm.w Y0, T0, 0, 6 + PTR_ADD T0, T0, INC_Y + xvstelm.w Y0, T0, 0, 7 +.endm + +.macro SSTORE_Y_1 + GST f, s, Y0_F, Y, 0 +.endm + +.macro SGEMV_N XW:req, X_8:req, X_4:req, X_2:req, X_1:req, Y_8:req, Y_4:req, Y_1:req + PTR_SRLI J, N, 3 + beqz J, .L_\XW\()_N_7 + PTR_SLLI K_LDA, LDA, 3 + PTR_SUB K_LDA, K_LDA, M4 +.L_\XW\()_N_L8: + SLOAD_\X_8 + xor K, K, K + move Y, Y_ORG + PTR_SRLI I, M, 3 + beqz I, .L_\XW\()_M_7 +.align 5 +.L_\XW\()_M_L8: + SLOAD_\Y_8 + SGEMV_N_8x8 + SSTORE_\Y_8 + PTR_ADDI I, I, -1 + PTR_ALSL Y, INC_Y, Y, 3 + PTR_ADDI K, K, 8 + bnez I, .L_\XW\()_M_L8 +.L_\XW\()_M_7: + andi I, M, 7 + beqz I, .L_\XW\()_M_END +.align 5 +.L_\XW\()_M_L1: + SLOAD_\Y_1 + SGEMV_N_1x8 + SSTORE_\Y_1 + PTR_ADDI I, I, -1 + PTR_ADD Y, Y, INC_Y + PTR_ADDI K, K, 1 + bnez I, .L_\XW\()_M_L1 +.L_\XW\()_M_END: + PTR_ADDI J, J, -1 +#if __loongarch_grlen == 64 + GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA, \ + PA4, PA4, K_LDA, PA5, PA5, K_LDA, PA6, PA6, K_LDA, PA7, PA7, K_LDA +#elif __loongarch_grlen == 32 + GADD , w, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA, \ + PA4, PA4, K_LDA, PA5, PA5, K_LDA, PA6, PA6, K_LDA, PA7, PA7, K_LDA +#else + GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA, \ + PA4, PA4, K_LDA, PA5, PA5, K_LDA, PA6, PA6, K_LDA, PA7, PA7, K_LDA +#endif + PTR_ALSL X, INC_X, X, 3 + bnez J, .L_\XW\()_N_L8 +.L_\XW\()_N_7: + andi J, N, 4 + beqz J, .L_\XW\()_N_3 + SLOAD_\X_4 + xor K, K, K + move Y, Y_ORG + + PTR_SRLI I, M, 3 + beqz I, .L_\XW\()_N_4_M_7 +.align 5 +.L_\XW\()_N_4_M_L8: + SLOAD_\Y_8 + SGEMV_N_8x4 + SSTORE_\Y_8 + PTR_ADDI I, I, -1 + PTR_ADDI K, K, 8 + PTR_ALSL Y, INC_Y, Y, 3 + bnez I, .L_\XW\()_N_4_M_L8 +.L_\XW\()_N_4_M_7: + andi I, M, 7 + beqz I, .L_\XW\()_N_4_M_END +.align 5 +.L_\XW\()_N_4_M_L1: + SLOAD_\Y_1 + SGEMV_N_1x4 + SSTORE_\Y_1 + PTR_ADDI I, I, -1 + PTR_ADD Y, Y, INC_Y + PTR_ADDI K, K, 1 + bnez I, .L_\XW\()_N_4_M_L1 +.L_\XW\()_N_4_M_END: + PTR_SLLI K_LDA, LDA, 2 + PTR_SUB K_LDA, K_LDA, M4 +#if __loongarch_grlen == 64 + GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA +#elif __loongarch_grlen == 32 + GADD , w PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA +#else + GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA +#endif + PTR_ALSL X, INC_X, X, 2 +.L_\XW\()_N_3: + andi J, N, 2 + beqz J, .L_\XW\()_N_1 + SLOAD_\X_2 + xor K, K, K + move Y, Y_ORG + PTR_SRLI I, M, 3 + beqz I, .L_\XW\()_N_2_M_7 +.align 5 +.L_\XW\()_N_2_M_L8: + SLOAD_\Y_8 + SGEMV_N_8x2 + SSTORE_\Y_8 + PTR_ADDI I, I, -1 + PTR_ADDI K, K, 8 + PTR_ALSL Y, INC_Y, Y, 3 + bnez I, .L_\XW\()_N_2_M_L8 +.L_\XW\()_N_2_M_7: + andi I, M, 7 + beqz I, .L_\XW\()_N_2_M_END +.align 5 +.L_\XW\()_N_2_M_L1: + SLOAD_\Y_1 + SGEMV_N_1x2 + SSTORE_\Y_1 + PTR_ADDI I, I, -1 + PTR_ADD Y, Y, INC_Y + PTR_ADDI K, K, 1 + bnez I, .L_\XW\()_N_2_M_L1 +.L_\XW\()_N_2_M_END: + PTR_SLLI K_LDA, LDA, 1 + PTR_SUB K_LDA, K_LDA, M4 + PTR_ADD PA0, PA0, K_LDA + PTR_ADD PA1, PA1, K_LDA + PTR_ALSL X, INC_X, X, 1 +.L_\XW\()_N_1: + andi J, N, 1 + beqz J, .L_END + SLOAD_\X_1 + xor K, K, K + move Y, Y_ORG + move I, M + beqz I, .L_END +.align 5 +.L_\XW\()_N_1_M_L1: + SLOAD_\Y_1 + SGEMV_N_1x1 + SSTORE_\Y_1 + PTR_ADDI I, I, -1 + PTR_ADD Y, Y, INC_Y + PTR_ADDI K, K, 1 + bnez I, .L_\XW\()_N_1_M_L1 + b .L_END +.endm + + PROLOGUE + PTR_LD INC_Y, $sp, 0 + push_if_used 17 + 7, 19 + PTR_ADDI K, $r0, 0x01 + PTR_SUB I, INC_X, K + PTR_SUB J, INC_Y, K + maskeqz I, K, I /* if(inc_x == 1) I = 0; else I = 1; */ + maskeqz J, K, J /* if(inc_y == 1) j = 0; else j = 1; */ + PTR_ALSL I, I, J, 1 + GSLLI , d, LDA, LDA, 2, INC_X, INC_X, 2, INC_Y, INC_Y, 2, M4, M, 2 + xvreplve0.w VALPHA, $xr0 + move Y_ORG, Y + move PA0, A +#if __loongarch_grlen == 64 + GADD , d, PA1, PA0, LDA, PA2, PA1, LDA, PA3, PA2, LDA, PA4, PA3, LDA, \ + PA5, PA4, LDA, PA6, PA5, LDA, PA7, PA6, LDA +#elif __loongarch_grlen == 32 + GADD , w, PA1, PA0, LDA, PA2, PA1, LDA, PA3, PA2, LDA, PA4, PA3, LDA, \ + PA5, PA4, LDA, PA6, PA5, LDA, PA7, PA6, LDA +#else + GADD , d, PA1, PA0, LDA, PA2, PA1, LDA, PA3, PA2, LDA, PA4, PA3, LDA, \ + PA5, PA4, LDA, PA6, PA5, LDA, PA7, PA6, LDA +#endif + la.local T0, .L_GAP_TABLE + PTR_ALSL I, I, T0, 1 + ld.h K, I, 0 + PTR_ADD T0, T0, K + jirl $r0, T0, 0 +.L_GAP_TABLE: + .hword .L_GAP_0_0 - .L_GAP_TABLE + .hword .L_GAP_0_1 - .L_GAP_TABLE + .hword .L_GAP_1_0 - .L_GAP_TABLE + .hword .L_GAP_1_1 - .L_GAP_TABLE +.L_GAP_0_0: /* if (inc_x == 1) && (incy == 1) */ + SGEMV_N GAP_0_0, X_8, X_4, X_2, X_1, Y_8, Y_4, Y_1 +.L_GAP_0_1: /* if (inc_x == 1) && (incy != 1) */ + SGEMV_N GAP_0_1, X_8, X_4, X_2, X_1, Y_8_GAP, Y_4_GAP, Y_1 +.L_GAP_1_0: /* if (inc_x != 1) && (incy == 1) */ + SGEMV_N GAP_1_0, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8, Y_4, Y_1 +.L_GAP_1_1: /* if (inc_x != 1) && (incy != 1) */ + SGEMV_N GAP_1_1, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8_GAP, Y_4_GAP, Y_1 +.L_END: + pop_if_used 17 + 7, 19 + jirl $r0, $r1, 0x0 + EPILOGUE diff --git a/kernel/loongarch64/sgemv_t_8_lasx.S b/kernel/loongarch64/sgemv_t_8_lasx.S new file mode 100644 index 000000000..dde3f4a30 --- /dev/null +++ b/kernel/loongarch64/sgemv_t_8_lasx.S @@ -0,0 +1,405 @@ +/******************************************************************************* +Copyright (c) 2023, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ +#define ASSEMBLER + +#include "common.h" +#include "loongarch64_asm.S" + +/********************************************************************* +* 2023/08/30 guxiwei +* UTEST : OK +* CTEST : OK +* TEST : OK +* +* +*********************************************************************/ + +/* int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, + * FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) + */ +#define M $r4 +#define N $r5 +#define ALPHA $f0 +#define A $r7 +#define LDA $r8 +#define X $r9 +#define INC_X $r10 +#define Y $r11 +#define INC_Y $r6 + +#define J $r12 +#define I $r13 +#define K $r14 +#define PY0 $r14 +#define X_ORG $r15 +#define PY1 $r16 +#define K_LDA $r17 +#define PY2 $r18 +#define T0 $r19 +#define PA0 $r20 +#define PA1 $r23 +#define PA2 $r24 +#define PA3 $r25 +#define PA4 $r26 +#define PA5 $r27 +#define PA6 $r28 +#define PA7 $r29 +#define M4 $r30 + +#define VALPHA $xr0 +#define X0 $xr1 +#define A0 $xr2 +#define A1 $xr3 +#define A2 $xr4 +#define A3 $xr5 +#define A4 $xr6 +#define A5 $xr7 +#define A6 $xr8 +#define A7 $xr9 +#define TP0 $xr10 +#define TP1 $xr11 +#define TP2 $xr12 +#define TP3 $xr13 +#define TP4 $xr14 +#define TP5 $xr15 +#define TP6 $xr16 +#define TP7 $xr17 +#define Y0 $xr2 +#define Y1 $xr3 +#define Y2 $xr4 +#define Y3 $xr5 +#define Y4 $xr6 +#define Y5 $xr7 +#define Y6 $xr8 +#define Y7 $xr9 + +.macro ZERO_Y8 + GXOR xv, v, TP0, TP0, TP0, TP1, TP1, TP1, TP2, TP2, TP2, TP3, TP3, TP3, \ + TP4, TP4, TP4, TP5, TP5, TP5, TP6, TP6, TP6, TP7, TP7, TP7 +.endm + +.macro ZERO_Y4 + GXOR xv, v, TP0, TP0, TP0, TP1, TP1, TP1, TP2, TP2, TP2, TP3, TP3, TP3 +.endm + +.macro ZERO_Y2 + GXOR xv, v, TP0, TP0, TP0, TP1, TP1, TP1 +.endm + +.macro ZERO_Y1 + GXOR xv, v, TP0, TP0, TP0 +.endm + +.macro SLOAD_X8 + GLD xv, , X0, X, 0x00 +.endm + +.macro SLOAD_X8_GAP + fld.s $f1, X, 0x00 + fldx.s $f2, X, INC_X + PTR_ALSL T0, INC_X, X, 1 + fld.s $f3, T0, 0x00 + fldx.s $f4, T0, INC_X + GINSVE0 xv, w, X0, A0, 1, X0, A1, 2, X0, A2, 3 + PTR_ALSL T0, INC_X, X, 2 + fld.s $f2, T0, 0x00 + fldx.s $f3, T0, INC_X + PTR_ALSL T0, INC_X, T0, 1 + fld.s $f4, T0, 0x00 + fldx.s $f5, T0, INC_X + GINSVE0 xv, w, X0, A0, 4, X0, A1, 5, X0, A2, 6, X0, A3, 7 +.endm + +.macro SGEMV_T_8x8 + GLD_INC xv, , 0x20, \ + A0, PA0, 0, A1, PA1, 0, \ + A2, PA2, 0, A3, PA3, 0, \ + A4, PA4, 0, A5, PA5, 0, \ + A6, PA6, 0, A7, PA7, 0 + GMADD xvf, s, TP0, A0, X0, TP0, TP1, A1, X0, TP1, \ + TP2, A2, X0, TP2, TP3, A3, X0, TP3, \ + TP4, A4, X0, TP4, TP5, A5, X0, TP5, \ + TP6, A6, X0, TP6, TP7, A7, X0, TP7 +.endm + +.macro SGEMV_T_4x8 + GLD_INC xv, , 0x20, \ + A0, PA0, 0, A1, PA1, 0, \ + A2, PA2, 0, A3, PA3, 0 + GMADD xvf, s, TP0, A0, X0, TP0, TP1, A1, X0, TP1, \ + TP2, A2, X0, TP2, TP3, A3, X0, TP3 +.endm + +.macro SGEMV_T_2x8 + GLD_INC xv, , 0x20, \ + A0, PA0, 0, A1, PA1, 0 + GMADD xvf, s, TP0, A0, X0, TP0, TP1, A1, X0, TP1 +.endm + +.macro SGEMV_T XW:req X8:req, X4:req + PTR_SRLI J, N, 3 + beqz J, .L_\XW\()_N_7 + PTR_SLLI K_LDA, LDA, 3 + PTR_SUB K_LDA, K_LDA, M4 +.L_\XW\()_N_L8: + ZERO_Y8 + move X, X_ORG + PTR_SRLI I, M, 3 + beqz I, .L_\XW\()_M_7 +.align 5 +.L_\XW\()_M_L8: + SLOAD_\X8 + SGEMV_T_8x8 + PTR_ADDI I, I, -1 + PTR_ALSL X, INC_X, X, 3 + bnez I, .L_\XW\()_M_L8 +.L_\XW\()_M_7: + // Accumulated + GACC xvf, s, Y0, TP0, Y1, TP1, Y2, TP2, Y3, TP3, Y4, TP4, \ + Y5, TP5, Y6, TP6, Y7, TP7 + andi I, M, 7 + beqz I, .L_\XW\()_M_END +.align 5 +.L_\XW\()_M_L1: + fld.s $f1, X, 0x00 + fld.s $f10, PA0, 0x00 + fld.s $f11, PA1, 0x00 + fld.s $f12, PA2, 0x00 + fld.s $f13, PA3, 0x00 + fld.s $f14, PA4, 0x00 + fld.s $f15, PA5, 0x00 + fld.s $f16, PA6, 0x00 + fld.s $f17, PA7, 0x00 +#if __loongarch_grlen == 64 + GADDI , d, PA0, PA0, 0x04, PA1, PA1, 0x04, PA2, PA2, 0x04, PA3, PA3, 0x04, \ + PA4, PA4, 0x04, PA5, PA5, 0x04, PA6, PA6, 0x04, PA7, PA7, 0x04 +#elif __loongarch_grlen == 32 + GADDI , w, PA0, PA0, 0x04, PA1, PA1, 0x04, PA2, PA2, 0x04, PA3, PA3, 0x04, \ + PA4, PA4, 0x04, PA5, PA5, 0x04, PA6, PA6, 0x04, PA7, PA7, 0x04 +#else + GADDI , d, PA0, PA0, 0x04, PA1, PA1, 0x04, PA2, PA2, 0x04, PA3, PA3, 0x04, \ + PA4, PA4, 0x04, PA5, PA5, 0x04, PA6, PA6, 0x04, PA7, PA7, 0x04 +#endif + GMADD f, s, $f2, $f10, $f1, $f2, $f3, $f11, $f1, $f3, $f4, $f12, $f1, $f4, $f5, $f13, $f1, $f5, \ + $f6, $f14, $f1, $f6, $f7, $f15, $f1, $f7, $f8, $f16, $f1, $f8, $f9, $f17, $f1, $f9, + PTR_ADDI I, I, -1 + PTR_ADD X, X, INC_X + bnez I, .L_\XW\()_M_L1 +.L_\XW\()_M_END: + fld.s $f10, Y, 0x00 + fldx.s $f11, Y, INC_Y + PTR_ALSL PY0, INC_Y, Y, 1 + fld.s $f12, PY0, 0x00 + fldx.s $f13, PY0, INC_Y + PTR_ALSL PY1, INC_Y, Y, 2 + fld.s $f14, PY1, 0x00 + fldx.s $f15, PY1, INC_Y + PTR_ALSL PY2, INC_Y, PY1, 1 + fld.s $f16, PY2, 0x00 + fldx.s $f17, PY2, INC_Y + + GMADD f, s, $f10, ALPHA, $f2, $f10, $f11, ALPHA, $f3, $f11, $f12, ALPHA, $f4, $f12, $f13, ALPHA, $f5, $f13, \ + $f14, ALPHA, $f6, $f14, $f15, ALPHA, $f7, $f15, $f16, ALPHA, $f8, $f16, $f17, ALPHA, $f9, $f17 + + PTR_ADDI J, J, -1 +#if __loongarch_grlen == 64 + GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA, \ + PA4, PA4, K_LDA, PA5, PA5, K_LDA, PA6, PA6, K_LDA, PA7, PA7, K_LDA +#elif __loongarch_grlen == 32 + GADD , w, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA, \ + PA4, PA4, K_LDA, PA5, PA5, K_LDA, PA6, PA6, K_LDA, PA7, PA7, K_LDA +#else + GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA, \ + PA4, PA4, K_LDA, PA5, PA5, K_LDA, PA6, PA6, K_LDA, PA7, PA7, K_LDA +#endif + fst.s $f10, Y, 0x00 + fstx.s $f11, Y, INC_Y + fst.s $f12, PY0, 0x00 + fstx.s $f13, PY0, INC_Y + fst.s $f14, PY1, 0x00 + fstx.s $f15, PY1, INC_Y + fst.s $f16, PY2, 0x00 + fstx.s $f17, PY2, INC_Y + + PTR_ALSL Y, INC_Y, Y, 3 + bnez J, .L_\XW\()_N_L8 +.L_\XW\()_N_7: + andi J, N, 4 + beqz J, .L_\XW\()_N_3 + ZERO_Y4 + move X, X_ORG + PTR_SRLI I, M, 3 + beqz I, .L_\XW\()_N_4_M_7 +.align 5 +.L_\XW\()_N_4_M_L8: + SLOAD_\X8 + SGEMV_T_4x8 + PTR_ADDI I, I, -1 + PTR_ALSL X, INC_X, X, 3 + bnez I, .L_\XW\()_N_4_M_L8 +.L_\XW\()_N_4_M_7: + // Accumulated + GACC xvf, s, Y0, TP0, Y1, TP1, Y2, TP2, Y3, TP3 + andi I, M, 7 + beqz I, .L_\XW\()_N_4_M_END +.align 5 +.L_\XW\()_N_4_M_L1: + fld.s $f1, X, 0x00 + GLD_INC f, s, 0x04, $f10, PA0, 0x00, $f11, PA1, 0x00, $f12, PA2, 0x00, $f13, PA3, 0x00 + GMADD f, s, $f2, $f10, $f1, $f2, $f3, $f11, $f1, $f3, $f4, $f12, $f1, $f4, $f5, $f13, $f1, $f5 + PTR_ADDI I, I, -1 + PTR_ADD X, X, INC_X + bnez I, .L_\XW\()_N_4_M_L1 +.L_\XW\()_N_4_M_END: + fld.s $f10, Y, 0x00 + fldx.s $f11, Y, INC_Y + PTR_ALSL PY0, INC_Y, Y, 1 + fld.s $f12, PY0, 0x00 + fldx.s $f13, PY0, INC_Y + + GMADD f, s, $f10, ALPHA, $f2, $f10, $f11, ALPHA, $f3, $f11, $f12, ALPHA, $f4, $f12, $f13, ALPHA, $f5, $f13 + + PTR_SLLI K_LDA, LDA, 2 + PTR_SUB K_LDA, K_LDA, M4 + +#if __loongarch_grlen == 64 + GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA +#elif __loongarch_grlen == 32 + GADD , w, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA +#else + GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA +#endif + fst.s $f10, Y, 0x00 + fstx.s $f11, Y, INC_Y + fst.s $f12, PY0, 0x00 + fstx.s $f13, PY0, INC_Y + PTR_ALSL Y, INC_Y, Y, 2 +.L_\XW\()_N_3: + andi J, N, 2 + beqz J, .L_\XW\()_N_1 + ZERO_Y2 + move X, X_ORG + PTR_SRLI I, M, 3 + beqz I, .L_\XW\()_N_2_M_7 +.align 5 +.L_\XW\()_N_2_M_L8: + SLOAD_\X8 + SGEMV_T_2x8 + PTR_ADDI I, I, -1 + PTR_ALSL X, INC_X, X, 3 + bnez I, .L_\XW\()_N_2_M_L8 +.L_\XW\()_N_2_M_7: + // Accumulated + GACC xvf, s, Y0, TP0, Y1, TP1 + andi I, M, 7 + beqz I, .L_\XW\()_N_2_M_END +.align 5 +.L_\XW\()_N_2_M_L1: + fld.s $f1, X, 0x00 + GLD_INC f, s, 0x04, $f10, PA0, 0x00, $f11, PA1, 0x00 + GMADD f, s, $f2, $f10, $f1, $f2, $f3, $f11, $f1, $f3 + PTR_ADDI I, I, -1 + PTR_ADD X, X, INC_X + bnez I, .L_\XW\()_N_2_M_L1 +.L_\XW\()_N_2_M_END: + fld.s $f10, Y, 0x00 + fldx.s $f11, Y, INC_Y + + GMADD f, s, $f10, ALPHA, $f2, $f10, $f11, ALPHA, $f3, $f11 + + PTR_SLLI K_LDA, LDA, 1 + PTR_SUB K_LDA, K_LDA, M4 + +#if __loongarch_grlen == 64 + GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA +#elif __loongarch_grlen == 32 + GADD , w, PA0, PA0, K_LDA, PA1, PA1, K_LDA +#else + GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA +#endif + fst.s $f10, Y, 0x00 + fstx.s $f11, Y, INC_Y + PTR_ALSL Y, INC_Y, Y, 1 +.L_\XW\()_N_1: + andi J, N, 1 + beqz J, .L_END + ZERO_Y1 + move X, X_ORG + move I, M + beqz I, .L_END +.align 5 +.L_\XW\()_N_1_M_L1: + fld.s $f2, PA0, 0x00 + fld.s $f1, X, 0x00 + fmadd.s $f10, $f2, $f1, $f10 + PTR_ADDI I, I, -1 + PTR_ADD X, X, INC_X + PTR_ADDI PA0, PA0, 0x04 + bnez I, .L_\XW\()_N_1_M_L1 + + fld.s $f2, Y, 0x00 + fmadd.s $f2, ALPHA, $f10, $f2 + fst.s $f2, Y, 0x00 + b .L_END +.endm + + PROLOGUE + PTR_LD INC_Y, $sp, 0 + push_if_used 17 + 8, 18 + PTR_ADDI K, $r0, 0x01 + PTR_SUB I, INC_X, K + maskeqz I, K, I /* if(inc_x == 1) I = 0; else I = 1; */ + GSLLI , d, LDA, LDA, 2, INC_X, INC_X, 2, INC_Y, INC_Y, 2, M4, M, 2 + xvreplve0.w VALPHA, $xr0 + move X_ORG, X + move PA0, A +#if __loongarch_grlen == 64 + GADD , d, PA1, PA0, LDA, PA2, PA1, LDA, PA3, PA2, LDA, PA4, PA3, LDA, \ + PA5, PA4, LDA, PA6, PA5, LDA, PA7, PA6, LDA +#elif __loongarch_grlen == 32 + GADD , w, PA1, PA0, LDA, PA2, PA1, LDA, PA3, PA2, LDA, PA4, PA3, LDA, \ + PA5, PA4, LDA, PA6, PA5, LDA, PA7, PA6, LDA +#else + GADD , d, PA1, PA0, LDA, PA2, PA1, LDA, PA3, PA2, LDA, PA4, PA3, LDA, \ + PA5, PA4, LDA, PA6, PA5, LDA, PA7, PA6, LDA +#endif + la.local T0, .L_GAP_TABLE + PTR_ALSL I, I, T0, 1 + ld.h K, I, 0 + PTR_ADD T0, T0, K + jirl $r0, T0, 0 +.L_GAP_TABLE: + .hword .L_GAP_0 - .L_GAP_TABLE + .hword .L_GAP_1 - .L_GAP_TABLE +.L_GAP_0: /* if (incx == 1) */ + SGEMV_T GAP_0, X8, X4 +.L_GAP_1: /* if (incx != 1) */ + SGEMV_T GAP_1, X8_GAP, X4_GAP +.L_END: + pop_if_used 17 + 8, 18 + jirl $r0, $r1, 0x0 + EPILOGUE From 4867cf5dd78c47fff3cc18a49267575903651a1f Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 4 Sep 2023 08:39:40 +0200 Subject: [PATCH 006/125] Update version to 0.3.24.dev --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 444baa114..35077f3c2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,7 @@ project(OpenBLAS C ASM) set(OpenBLAS_MAJOR_VERSION 0) set(OpenBLAS_MINOR_VERSION 3) -set(OpenBLAS_PATCH_VERSION 24) +set(OpenBLAS_PATCH_VERSION 24.dev) set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") From c3f2a3c0ca8d4a3700f81f176dd3f1bc08cff3dd Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 4 Sep 2023 08:40:25 +0200 Subject: [PATCH 007/125] Update version to 0.3.24.dev --- Makefile.rule | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.rule b/Makefile.rule index 80060a0fc..707924904 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -3,7 +3,7 @@ # # This library's version -VERSION = 0.3.24 +VERSION = 0.3.24.dev # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library From 09911f077e62818109b3a393ca4c2fdf9a2112f7 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 5 Sep 2023 16:33:40 +0200 Subject: [PATCH 008/125] Disable SVE targets for DYNAMIC_ARCH when compiling with (homebrew)gcc on macOS/arm64 --- Makefile.system | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile.system b/Makefile.system index b3968d739..5a4af9698 100644 --- a/Makefile.system +++ b/Makefile.system @@ -397,6 +397,9 @@ ifeq ($(OSNAME), Darwin) ifndef MACOSX_DEPLOYMENT_TARGET ifeq ($(ARCH), arm64) export MACOSX_DEPLOYMENT_TARGET=11.0 +ifeq ($(C_COMPILER), GCC) +export NO_SVE = 1 +endif else export MACOSX_DEPLOYMENT_TARGET=10.8 endif From 6a611db560089b11d2b786179cf26443171798fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tiziano=20M=C3=BCller?= Date: Sun, 10 Sep 2023 08:44:07 +0200 Subject: [PATCH 009/125] memory: show correct number of max threads --- driver/others/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/driver/others/memory.c b/driver/others/memory.c index 4fceae754..fb7f36cdc 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -3012,7 +3012,7 @@ void *blas_memory_alloc(int procpos){ if (memory_overflowed) goto terminate; fprintf(stderr,"OpenBLAS warning: precompiled NUM_THREADS exceeded, adding auxiliary array for thread metadata.\n"); fprintf(stderr,"To avoid this warning, please rebuild your copy of OpenBLAS with a larger NUM_THREADS setting\n"); - fprintf(stderr,"or set the environment variable OPENBLAS_NUM_THREADS to %d or lower\n", NUM_BUFFERS); + fprintf(stderr,"or set the environment variable OPENBLAS_NUM_THREADS to %d or lower\n", MAX_CPU_NUMBER); memory_overflowed=1; new_release_info = (struct release_t*) malloc(512*sizeof(struct release_t)); newmemory = (struct newmemstruct*) malloc(512*sizeof(struct newmemstruct)); From fb97cc4d5e81d788ba2c99b016cb3b7628e64229 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 14 Sep 2023 10:46:23 +0200 Subject: [PATCH 010/125] Add la_constants.o to SCLAUX/DZLAUX --- lapack-netlib/SRC/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile index c75fd5f49..26314c4df 100644 --- a/lapack-netlib/SRC/Makefile +++ b/lapack-netlib/SRC/Makefile @@ -85,7 +85,7 @@ ALLAUX_O = ilaenv.o ilaenv2stage.o ieeeck.o lsamen.o xerbla.o xerbla_array.o \ ../INSTALL/ilaver.o ../INSTALL/lsame.o ../INSTALL/slamch.o ifneq "$(or $(BUILD_SINGLE),$(BUILD_COMPLEX))" "" -SCLAUX = \ +SCLAUX = la_constants.o \ sbdsvdx.o sstevx.o sstein.o \ sbdsdc.o \ sbdsqr.o sdisna.o slabad.o slacpy.o sladiv.o slae2.o slaebz.o \ @@ -106,7 +106,7 @@ SCLAUX = \ endif ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" "" -DZLAUX = \ +DZLAUX = la_constants.o\ dcombssq.o \ dbdsvdx.o dstevx.o dstein.o \ dbdsdc.o \ From 7779bb6fb1938dad2961f819cc13cdb4316442da Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 14 Sep 2023 20:21:06 +0200 Subject: [PATCH 011/125] Make IWORK array larger to avoid overflow --- lapack-netlib/SRC/dtgex2.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lapack-netlib/SRC/dtgex2.f b/lapack-netlib/SRC/dtgex2.f index 00c23a833..1ccd92c8f 100644 --- a/lapack-netlib/SRC/dtgex2.f +++ b/lapack-netlib/SRC/dtgex2.f @@ -254,7 +254,7 @@ $ THRESHA, THRESHB * .. * .. Local Arrays .. - INTEGER IWORK( LDST ) + INTEGER IWORK( LDST + 2 ) DOUBLE PRECISION AI( 2 ), AR( 2 ), BE( 2 ), IR( LDST, LDST ), $ IRCOP( LDST, LDST ), LI( LDST, LDST ), $ LICOP( LDST, LDST ), S( LDST, LDST ), From 1285b53e398ec9cbab79368f762062f5154ed383 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 14 Sep 2023 20:22:11 +0200 Subject: [PATCH 012/125] Make IWORK array larger to avoid overflow --- lapack-netlib/SRC/stgex2.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lapack-netlib/SRC/stgex2.f b/lapack-netlib/SRC/stgex2.f index d61da2327..885739ab5 100644 --- a/lapack-netlib/SRC/stgex2.f +++ b/lapack-netlib/SRC/stgex2.f @@ -255,7 +255,7 @@ $ THRESHA, THRESHB * .. * .. Local Arrays .. - INTEGER IWORK( LDST ) + INTEGER IWORK( LDST + 2 ) REAL AI( 2 ), AR( 2 ), BE( 2 ), IR( LDST, LDST ), $ IRCOP( LDST, LDST ), LI( LDST, LDST ), $ LICOP( LDST, LDST ), S( LDST, LDST ), From 7e939fb8312d512c7e3948d7977d0af4ba660371 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 19 Sep 2023 23:33:39 +0200 Subject: [PATCH 013/125] Fix handling of additional buffer structures in case of overflow --- driver/others/memory.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/driver/others/memory.c b/driver/others/memory.c index fb7f36cdc..b27fec431 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -73,6 +73,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" +#define NEW_BUFFERS 512 #ifndef likely #ifdef __GNUC__ #define likely(x) __builtin_expect(!!(x), 1) @@ -2897,7 +2898,7 @@ void *blas_memory_alloc(int procpos){ #endif position ++; - } while (position < 512+NUM_BUFFERS); + } while (position < NEW_BUFFERS + NUM_BUFFERS); } #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); @@ -3014,9 +3015,10 @@ void *blas_memory_alloc(int procpos){ fprintf(stderr,"To avoid this warning, please rebuild your copy of OpenBLAS with a larger NUM_THREADS setting\n"); fprintf(stderr,"or set the environment variable OPENBLAS_NUM_THREADS to %d or lower\n", MAX_CPU_NUMBER); memory_overflowed=1; - new_release_info = (struct release_t*) malloc(512*sizeof(struct release_t)); - newmemory = (struct newmemstruct*) malloc(512*sizeof(struct newmemstruct)); - for (i = 0; i < 512; i++) { + MB; + new_release_info = (struct release_t*) malloc(NEW_BUFFERS * sizeof(struct release_t)); + newmemory = (struct newmemstruct*) malloc(NEW_BUFFERS * sizeof(struct newmemstruct)); + for (i = 0; i < NEW_BUFFERS; i++) { newmemory[i].addr = (void *)0; #if defined(WHEREAMI) && !defined(USE_OPENMP) newmemory[i].pos = -1; @@ -3129,12 +3131,12 @@ void blas_memory_free(void *free_area){ printf(" Position : %d\n", position); #endif if (unlikely(memory_overflowed && position >= NUM_BUFFERS)) { - while ((position < NUM_BUFFERS+512) && (newmemory[position-NUM_BUFFERS].addr != free_area)) + while ((position < NUM_BUFFERS+NEW_BUFFERS) && (newmemory[position-NUM_BUFFERS].addr != free_area)) position++; // arm: ensure all writes are finished before other thread takes this memory WMB; - - newmemory[position].used = 0; +if (position - NUM_BUFFERS >= NEW_BUFFERS) goto error; + newmemory[position-NUM_BUFFERS].used = 0; #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); #endif @@ -3213,7 +3215,7 @@ void blas_shutdown(void){ memory[pos].lock = 0; } if (memory_overflowed) - for (pos = 0; pos < 512; pos ++){ + for (pos = 0; pos < NEW_BUFFERS; pos ++){ newmemory[pos].addr = (void *)0; newmemory[pos].used = 0; #if defined(WHEREAMI) && !defined(USE_OPENMP) From 6876ae0c3b77b3fb08185e8619801343e4388868 Mon Sep 17 00:00:00 2001 From: Angelika Schwarz <17718454+angsch@users.noreply.github.com> Date: Wed, 20 Sep 2023 19:10:08 +0200 Subject: [PATCH 014/125] Fix division by zero in zrotg The cases [ c s ] * [ 0 ] = [ |db_i| ] [-s c ] [ i*db_i ] [ 0 ] and [ c s ] * [ 0 ] = [ |db_r| ] [-s c ] [ db_r ] [ 0 ] computed s incorrectly. To flip the entries of vector, s should be conjg(db)/|db| and not conjg(db) / da, where da == 0.0. --- interface/zrotg.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/interface/zrotg.c b/interface/zrotg.c index af6f85c1c..4d2a9d510 100644 --- a/interface/zrotg.c +++ b/interface/zrotg.c @@ -61,16 +61,16 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { *(S1 + 0) = *(DB + 0); *(S1 + 1) = *(DB + 1) *-1; if (da_r == ZERO && da_i == ZERO) { - *C = ZERO; + *C = ZERO; if (db_r == ZERO) { (*DA) = fabsl(db_i); - *S = *S1 /da_r; - *(S+1) = *(S1+1) /da_r; + *S = *S1 /(*DA); + *(S+1) = *(S1+1) /(*DA); return; } else if ( db_i == ZERO) { *DA = fabsl(db_r); - *S = *S1 /da_r; - *(S+1) = *(S1+1) /da_r; + *S = *S1 /(*DA); + *(S+1) = *(S1+1) /(*DA); return; } else { long double g1 = MAX( fabsl(db_r), fabsl(db_i)); From db3a43c8edeb36ecc9e7cde10b1c06be3f2147fc Mon Sep 17 00:00:00 2001 From: Angelika Schwarz <17718454+angsch@users.noreply.github.com> Date: Wed, 20 Sep 2023 19:42:13 +0200 Subject: [PATCH 015/125] Simplify rotg * The check da != ZERO is no longer necessary since there is a special case ada == ZERO, where ada = |da|. * Add the missing check c != ZERO before the division. Note that with these two changes the long double code follows the float/double version of the code. --- interface/rotg.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/interface/rotg.c b/interface/rotg.c index 8d40d9c53..423ebda21 100644 --- a/interface/rotg.c +++ b/interface/rotg.c @@ -66,13 +66,8 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ c = da / r; s = db / r; z = ONE; - if (da != ZERO) { - if (ada > adb){ - z = s; - } else { - z = ONE / c; - } - } + if (ada > adb) z = s; + if ((ada <= adb) && (c != ZERO)) z = ONE / c; *C = c; *S = s; From 44e6e5479b87f697b4d4fc92030c162f2451b384 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 21 Sep 2023 23:01:21 +0200 Subject: [PATCH 016/125] Use the C compiler for the C SBGEMM test source --- test/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Makefile b/test/Makefile index 46a7b1158..fa054f15b 100644 --- a/test/Makefile +++ b/test/Makefile @@ -326,7 +326,7 @@ endif ifeq ($(BUILD_BFLOAT16),1) test_sbgemm : compare_sgemm_sbgemm.c ../$(LIBNAME) - $(FC) $(FLDFLAGS) -o test_sbgemm compare_sgemm_sbgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) + $(CC) $(FLDFLAGS) -o test_sbgemm compare_sgemm_sbgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) endif ifeq ($(BUILD_COMPLEX),1) From 2390e0bfbc203f5566b0fede523b1caf1c344deb Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 21 Sep 2023 23:04:25 +0200 Subject: [PATCH 017/125] Quote the BU (underscore) option as it may not be set --- exports/Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/exports/Makefile b/exports/Makefile index d81735342..7682f851d 100644 --- a/exports/Makefile +++ b/exports/Makefile @@ -272,23 +272,23 @@ static : ../$(LIBNAME) rm -f goto.$(SUFFIX) osx.def : $(GENSYM) ../Makefile.system ../getarch.c - ./$(GENSYM) osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) + ./$(GENSYM) osx $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) aix.def : $(GENSYM) ../Makefile.system ../getarch.c - ./$(GENSYM) aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) + ./$(GENSYM) aix $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) objcopy.def : $(GENSYM) ../Makefile.system ../getarch.c - ./$(GENSYM) objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) + ./$(GENSYM) objcopy $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) objconv.def : $(GENSYM) ../Makefile.system ../getarch.c - ./$(GENSYM) objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) + ./$(GENSYM) objconv $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) test : linktest.c $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK. rm -f linktest linktest.c : $(GENSYM) ../Makefile.system ../getarch.c - ./$(GENSYM) linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > linktest.c + ./$(GENSYM) linktest $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > linktest.c clean :: @rm -f *.def *.dylib __.SYMDEF* *.renamed From b926e70ebd879bb022d265a3859bfb5481b4d99f Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 21 Sep 2023 23:07:32 +0200 Subject: [PATCH 018/125] Fix typo in build rule of "profiled" sbgemm --- interface/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/interface/Makefile b/interface/Makefile index 2ac9663d6..78335357b 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -1301,7 +1301,7 @@ xhpr2.$(SUFFIX) xhpr2.$(PSUFFIX) : zhpr2.c ifeq ($(BUILD_BFLOAT16),1) sbgemm.$(SUFFIX) sbgemm.$(PSUFFIX) : gemm.c ../param.h $(CC) -c $(CFLAGS) $< -o $(@F) -sbgemmt.$(SUFFIX) sbgemm.$(PSUFFIX) : gemmt.c ../param.h +sbgemmt.$(SUFFIX) sbgemmt.$(PSUFFIX) : gemmt.c ../param.h $(CC) -c $(CFLAGS) $< -o $(@F) endif From bb4718322294bd9f28b0343a643d0986e9046a2b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 24 Sep 2023 10:13:47 +0200 Subject: [PATCH 019/125] Force -qextname for trailing underscore generation when IBM xlf is used with gcc --- Makefile.system | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Makefile.system b/Makefile.system index 5a4af9698..ae6db40b0 100644 --- a/Makefile.system +++ b/Makefile.system @@ -1167,6 +1167,10 @@ endif ifeq ($(F_COMPILER), IBM) CCOMMON_OPT += -DF_INTERFACE_IBM +FEXTRALIB += -lxlf90 +ifeq ($(C_COMPILER), GCC) +FCOMMON_OPT += -qextname +endif # FCOMMON_OPT += -qarch=440 ifdef BINARY64 FCOMMON_OPT += -q64 From 8012afcabbc912e32961924a77858e334ee75356 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 24 Sep 2023 10:15:12 +0200 Subject: [PATCH 020/125] Avoid using some gcc-specific flags with IBM xlf --- Makefile.power | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Makefile.power b/Makefile.power index 28a0bae08..33702c932 100644 --- a/Makefile.power +++ b/Makefile.power @@ -42,15 +42,14 @@ FCOMMON_OPT += -O2 -qrecur -qnosave else FCOMMON_OPT += -O2 -frecursive -fno-fast-math endif -ifeq ($(C_COMPILER), GCC) + +ifeq ($(F_COMPILER), GFORTRAN) ifneq ($(GCCVERSIONGT4), 1) $(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended) FCOMMON_OPT += -mcpu=power8 -mtune=power8 else FCOMMON_OPT += -mcpu=power9 -mtune=power9 endif -else -FCOMMON_OPT += -mcpu=power9 -mtune=power9 endif else FCOMMON_OPT += -O2 -Mrecursive @@ -84,12 +83,16 @@ CCOMMON_OPT += -DUSE_OPENMP -fopenmp else CCOMMON_OPT += -DUSE_OPENMP -mp endif +ifeq ($(F_COMPILER), IBM) +FCOMMON_OPT += -DUSE_OPENMP +else ifneq ($(F_COMPILER), PGI) FCOMMON_OPT += -DUSE_OPENMP -fopenmp else FCOMMON_OPT += -DUSE_OPENMP -mp endif endif +endif # workaround for C->FORTRAN ABI violation in LAPACKE ifeq ($(F_COMPILER), GFORTRAN) From 4de963dc17eb682e774a85a494a28001d6e6aa98 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 24 Sep 2023 10:16:37 +0200 Subject: [PATCH 021/125] Enforce trailing underscores on symbols when IBM xlf is combined with gcc --- f_check | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/f_check b/f_check index 526c41dc6..f30231bc4 100755 --- a/f_check +++ b/f_check @@ -155,6 +155,10 @@ else *'IBM XL'*) vendor=IBM openmp='-openmp' + case "$CC" in *gcc*) + bu=_ + ;; + esac ;; *NAG*) vendor=NAG @@ -223,6 +227,10 @@ else *ppuf*|*xlf*) vendor=IBM openmp='-openmp' + case "$CC" in *gcc*) + bu=_ + ;; + esac ;; *open64*) vendor=OPEN64 From 7a96908d0cb0ee3cc5b49390a5ec0ca3a71fefdf Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 24 Sep 2023 10:18:24 +0200 Subject: [PATCH 022/125] Add -lgomp when IBM xlf is combined with gcc in OPENMP builds --- ctest/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ctest/Makefile b/ctest/Makefile index 9e85d23b9..af5b34a36 100644 --- a/ctest/Makefile +++ b/ctest/Makefile @@ -214,6 +214,11 @@ endif ifeq ($(F_COMPILER), NAG) CEXTRALIB = -lgomp endif +ifeq ($(F_COMPILER), IBM) +ifeq ($(C_COMPILER), GCC) +CEXTRALIB += -lgomp +endif +endif endif ifeq ($(BUILD_SINGLE),1) From 2a9981a2442106f67d963ee68cb4ee3b1a7a0334 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 24 Sep 2023 10:19:11 +0200 Subject: [PATCH 023/125] Add -lgomp when IBM xlf is combined with gcc in OPENMP builds --- test/Makefile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/Makefile b/test/Makefile index fa054f15b..715842b4d 100644 --- a/test/Makefile +++ b/test/Makefile @@ -271,6 +271,11 @@ endif ifeq ($(F_COMPILER), NAG) CEXTRALIB = -lgomp endif +ifeq ($(F_COMPILER), IBM) +ifeq ($(C_COMPILER), GCC) +CEXTRALIB += -lgomp +endif +endif endif ifeq ($(BUILD_SINGLE),1) @@ -326,7 +331,7 @@ endif ifeq ($(BUILD_BFLOAT16),1) test_sbgemm : compare_sgemm_sbgemm.c ../$(LIBNAME) - $(CC) $(FLDFLAGS) -o test_sbgemm compare_sgemm_sbgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) + $(CC) $(CFLAGS) -o test_sbgemm compare_sgemm_sbgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) endif ifeq ($(BUILD_COMPLEX),1) From 4670eb1462b73ffa82699a70bc383bfd41461a0f Mon Sep 17 00:00:00 2001 From: gxw Date: Wed, 20 Sep 2023 09:09:35 +0800 Subject: [PATCH 024/125] LoongArch64: Add dtrsm kernel --- kernel/loongarch64/KERNEL.LOONGSON3R5 | 10 +- .../loongarch64/dtrsm_kernel_LN_16x4_lasx.S | 1366 +++++++++++ .../loongarch64/dtrsm_kernel_LT_16x4_lasx.S | 959 ++++++++ .../loongarch64/dtrsm_kernel_RN_16x4_lasx.S | 882 +++++++ .../loongarch64/dtrsm_kernel_RT_16x4_lasx.S | 953 ++++++++ kernel/loongarch64/dtrsm_kernel_macro.S | 2147 +++++++++++++++++ 6 files changed, 6312 insertions(+), 5 deletions(-) create mode 100644 kernel/loongarch64/dtrsm_kernel_LN_16x4_lasx.S create mode 100644 kernel/loongarch64/dtrsm_kernel_LT_16x4_lasx.S create mode 100644 kernel/loongarch64/dtrsm_kernel_RN_16x4_lasx.S create mode 100644 kernel/loongarch64/dtrsm_kernel_RT_16x4_lasx.S create mode 100644 kernel/loongarch64/dtrsm_kernel_macro.S diff --git a/kernel/loongarch64/KERNEL.LOONGSON3R5 b/kernel/loongarch64/KERNEL.LOONGSON3R5 index c23c2fac5..011e8b89e 100644 --- a/kernel/loongarch64/KERNEL.LOONGSON3R5 +++ b/kernel/loongarch64/KERNEL.LOONGSON3R5 @@ -24,12 +24,12 @@ SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) SGEMVNKERNEL = sgemv_n_8_lasx.S SGEMVTKERNEL = sgemv_t_8_lasx.S -endif -DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c -DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c -DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c -DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c +DTRSMKERNEL_LN = dtrsm_kernel_LN_16x4_lasx.S +DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_lasx.S +DTRSMKERNEL_RN = dtrsm_kernel_RN_16x4_lasx.S +DTRSMKERNEL_RT = dtrsm_kernel_RT_16x4_lasx.S +endif STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c diff --git a/kernel/loongarch64/dtrsm_kernel_LN_16x4_lasx.S b/kernel/loongarch64/dtrsm_kernel_LN_16x4_lasx.S new file mode 100644 index 000000000..3315daccb --- /dev/null +++ b/kernel/loongarch64/dtrsm_kernel_LN_16x4_lasx.S @@ -0,0 +1,1366 @@ +/******************************************************************************* +Copyright (c) 2023, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ +#define ASSEMBLER + +#include "common.h" +#include "loongarch64_asm.S" + +/********************************************************************* +* 2023/07/26 guxiwei +* UTEST : OK +* CTEST : OK +* TEST : OK +* +* +*********************************************************************/ + +/* int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT dummy1, FLOAT *a, FLOAT *b, + * FLOAT *c, BLASLONG ldc, BLASLONG offset) + */ +#define M $r4 // param 1: bm +#define N $r5 // param 2: bn +#define K $r6 // param 3: bk +#define A $r7 // param 5: ba +#define B $r8 // param 6: bb +#define C $r9 // param 7: bc +#define LDC $r10 // param 8: ldc +#define OFFSET $r11 // param 9: offset + +/* Cycle control parameters */ +#define I $r13 +#define J $r14 +#define L $r15 +#define TL $r16 +/* Matrix address */ +#define A0 $r17 +#define B0 $r18 +#define C0 $r19 +#define C1 $r20 +#define C2 $r23 +#define C3 $r24 +#define T0 $r25 +#define T1 $r26 +#define T2 $r27 +#define KK $r28 +#define AA $r29 +#define CC $r30 +#undef ZERO +#define ZERO $r0 + +#define U0 $xr0 +#define U1 $xr1 +#define U2 $xr2 +#define U3 $xr3 +#define U4 $xr4 +#define U5 $xr5 +#define U6 $xr6 +#define U7 $xr7 +#define U8 $xr8 +#define U9 $xr9 +#define U10 $xr10 +#define U11 $xr11 +#define U12 $xr12 +#define U13 $xr13 +#define U14 $xr14 +#define U15 $xr15 +#define D0 $xr16 +#define D1 $xr17 +#define D2 $xr18 +#define D3 $xr19 +#define D4 $xr20 +#define D5 $xr21 +#define D6 $xr22 +#define D7 $xr23 +#define D8 $xr24 +#define D9 $xr25 +#define D10 $xr26 +#define D11 $xr27 +#define D12 $xr28 +#define D13 $xr29 +#define D14 $xr30 +#define D15 $xr31 + +/* Prefetch interval */ +#define A_PRE 0x400 +#define B_PRE 0x100 + +#include "dtrsm_kernel_macro.S" + +// By integrating the dgemm and dsolve processes, the following advantages can be obtained: +// 1. Avoid the overhead of function calls (by not invoking dgemm_kernel) +// 2. Reduce the storage and retrieval of C data +// 3. Vectorization of dsolve +// GEMM_UNROLL_M x DGEMM_UNROLL_N is 16x4, which is a fairly large size. +// To achieve finer-grained optimization, 15 scenarios have been addressed: +// 16x4, 16x2, 16x1, 8x4, 8x2, 8x1, 4x4, 4x2, 4x1, 2x4, 2x2, 2x1, 1x4, 1x2, 1x1. + +.macro dsolve_16 N +// if N = 4 the data layout of C is as follows: +// U0 U1 U2 U3 +// U4 U5 U6 U7 +// U8 U9 U10 U11 +// U12 U13 U14 U15 +// if N = 2 the dat layout of C is as follows: +// U0 U1 U2 U3 +// U4 U5 U6 U7 +// if N = 1 the dat layout of C is as follows: +// U0 U1 U2 U3 +// The matrix A has dimensions of 16x16, and +// it will be divided into 4 segments for processing. + +#define G12 U3 +#define G13 U7 +#define G14 U11 +#define G15 U15 + GTRANSPOSE4x4_D U3, U7, U11, U15, G12, G13, G14, G15, D0, D1 + // A + // G12 G13 G14 G15 + // ----------------- + // 204 | D9 + // 220 221 | D8 D7 + // 236 237 238 | D6 D5 D4 + // 252 253 254 255 | D3 D2 D1 D0 + PTR_ADDI T0, A0, 252 * 8 + GLDREPL xv, d, D3, T0, 0, D2, T0, 1 * 8, D1, T0, 2 * 8, D0, T0, 3 * 8 + PTR_ADDI T0, A0, 236 * 8 + GLDREPL xv, d, D6, T0, 0, D5, T0, 1 * 8, D4, T0, 2 * 8 + PTR_ADDI T0, A0, 220 * 8 + GLDREPL xv, d, D8, T0, 0, D7, T0, 1 * 8 + PTR_ADDI T0, A0, 204 * 8 + GLDREPL xv, d, D9, T0, 0 + + xvfmul.d G15, G15, D0 + GNMSUB xvf, d, G14, G15, D1, G14 + xvfmul.d G14, G14, D4 + GNMSUB xvf, d, G13, G15, D2, G13, G13, G14, D5, G13 + xvfmul.d G13, G13, D7 + GNMSUB xvf, d, G12, G15, D3, G12, G12, G14, D6, G12, G12, G13, D8, G12 + xvfmul.d G12, G12, D9 + // Store B +.if \N == 4 + // x x x x ... x x x x + // x x x x ... x x x x + // x x x x ... x x x x + // b48 b49 b50 b51 ... b60 b61 b62 b63 + GST xv, , G12, B0, 48 * 8, G13, B0, 52 * 8, G14, B0, 56 * 8, G15, B0, 60 * 8 +.elseif \N == 2 + // x x x x ... x x x x + // x x x x ... x x x x + // x x x x ... x x x x + // b24 b25 b26 b27 b28 b29 b30 b31 + GST v, , $vr3, B0, 24 * 8, $vr7, B0, 26 * 8, $vr11, B0, 28 * 8, $vr15, B0, 30 * 8 +.elseif \N == 1 + // x x x x + // x x x x + // x x x x + // b12 b13 b14 b15 + GST f, d, $f3, B0, 12 * 8, $f7, B0, 13 * 8, $f11, B0, 14 * 8, $f15, B0, 15 * 8 +.endif + // Transpose G15 G14 G13 G12 + GTRANSPOSE4x4_D G12, G13, G14, G15, D0, D1, D2, D3, D4, D5 + // Store C +.if \N == 4 + // x x x x ... c12 c13 c14 c15 + // x x x x ... c28 c29 c30 c31 + // x x x x ... c44 c45 c46 c47 + // x x x x ... c60 c61 c62 c63 + GST xv, , D0, C0, 12 * 8, D1, C1, 12 * 8, D2, C2, 12 * 8, D3, C3, 12 * 8 +.elseif \N == 2 + // x x x x ... c12 c13 c14 c15 + // x x x x ... c28 c29 c30 c31 + GST xv, , D0, C0, 12 * 8, D1, C1, 12 * 8 +.elseif \N == 1 + // Store C + // x x x x ... c12 c13 c14 c15 + GST xv, , D0, C0, 12 * 8 +.endif + +#define G8 U2 +#define G9 U6 +#define G10 U10 +#define G11 U14 + GTRANSPOSE4x4_D U2, U6, U10, U14, G8, G9, G10, G11, D0, D1 + // A + // G8 G9 G10 G11 + // ----------------- + // 136 | D9 + // 152 153 | D8 D7 + // 168 169 170 | D6 D5 D4 + // 184 185 186 187 | D3 D2 D1 D0 + // 200 201 202 203 | D15 D14 D13 D12 + // 216 217 218 219 | D11 D10 D9 D8 + // 232 233 234 235 | D7 D6 D5 D4 + // 248 249 250 251 | D3 D2 D1 D0 + PTR_ADDI T0, A0, 248 * 8 + GLDREPL xv, d, D3, T0, 0, D2, T0, 1 * 8, D1, T0, 2 * 8, D0, T0, 3 * 8 + PTR_ADDI T0, A0, 232 * 8 + GLDREPL xv, d, D7, T0, 0, D6, T0, 1 * 8, D5, T0, 2 * 8, D4, T0, 3 * 8 + PTR_ADDI T0, A0, 216 * 8 + GLDREPL xv, d, D11, T0, 0, D10, T0, 1 * 8, D9, T0, 2 * 8, D8, T0, 3 * 8 + PTR_ADDI T0, A0, 200 * 8 + GLDREPL xv, d, D15, T0, 0, D14, T0, 1 * 8, D13, T0, 2 * 8, D12, T0, 3 * 8 + GNMSUB xvf, d, G11, G15, D0, G11, G10, G15, D1, G10, G9, G15, D2, G9, G8, G15, D3, G8, \ + G11, G14, D4, G11, G10, G14, D5, G10, G9, G14, D6, G9, G8, G14, D7, G8, \ + G11, G13, D8, G11, G10, G13, D9, G10, G9, G13, D10, G9, G8, G13, D11, G8, \ + G11, G12, D12, G11, G10, G12, D13, G10, G9, G12, D14, G9, G8, G12, D15, G8 + PTR_ADDI T0, A0, 184 * 8 + GLDREPL xv, d, D3, T0, 0, D2, T0, 1 * 8, D1, T0, 2 * 8, D0, T0, 3 * 8 + PTR_ADDI T0, A0, 168 * 8 + GLDREPL xv, d, D6, T0, 0, D5, T0, 1 * 8, D4, T0, 2 * 8 + PTR_ADDI T0, A0, 152 * 8 + GLDREPL xv, d, D8, T0, 0, D7, T0, 1 * 8 + PTR_ADDI T0, A0, 136 * 8 + GLDREPL xv, d, D9, T0, 0 + + xvfmul.d G11, G11, D0 + GNMSUB xvf, d, G10, G11, D1, G10, G9, G11, D2, G9, G8, G11, D3, G8 + xvfmul.d G10, G10, D4 + GNMSUB xvf, d, G9, G10, D5, G9, G8, G10, D6, G8 + xvfmul.d G9, G9, D7 + GNMSUB xvf, d, G8, G9, D8, G8 + xvfmul.d G8, G8, D9 + // Store B +.if \N == 4 + // x x x x ... x x x x + // x x x x ... x x x x + // b32 b33 b34 b34 ... b44 b45 b46 b47 + // b48 b49 b50 b51 ... b60 b61 b62 b63 + GST xv, , G8, B0, 32 * 8, G9, B0, 36 * 8, G10, B0, 40 * 8, G11, B0, 44 * 8 +.elseif \N == 2 + // x x x x ... x x x x + // x x x x ... x x x x + // b16 b17 b18 b19 b20 b21 b22 b23 + // b24 b25 b26 b27 b28 b29 b30 b31 + GST v, , $vr2, B0, 16 * 8, $vr6, B0, 18 * 8, $vr10, B0, 20 * 8, $vr14, B0, 22 * 8 +.elseif \N == 1 + // x x x x + // x x x x + // b8 b9 b10 b11 + // b12 b13 b14 b15 + GST f, d, $f2, B0, 8 * 8, $f6, B0, 9 * 8, $f10, B0, 10 * 8, $f14, B0, 11 * 8 +.endif + // Transpose G11 G10 G9 G8 + GTRANSPOSE4x4_D G8, G9, G10, G11, D0, D1, D2, D3, D4, D5 + // Store C +.if \N == 4 + // x x x x ... c8 c9 c10 c11 c12 c13 c14 c15 + // x x x x ... c24 c25 c26 c27 c28 c29 c30 c31 + // x x x x ... c40 c41 c42 c43 c44 c45 c46 c47 + // x x x x ... c56 c57 c58 c59 c60 c61 c62 c63 + GST xv, , D0, C0, 8 * 8, D1, C1, 8 * 8, D2, C2, 8 * 8, D3, C3, 8 * 8 +.elseif \N == 2 + // x x x x ... c8 c9 c10 c11 c12 c13 c14 c15 + // x x x x ... c24 c25 c26 c27 c28 c29 c30 c31 + GST xv, , D0, C0, 8 * 8, D1, C1, 8 * 8 +.elseif \N == 1 + // x x x x ... c8 c9 c10 c11 c12 c13 c14 c15 + GST xv, , D0, C0, 8 * 8 +.endif + +#define G4 U1 +#define G5 U5 +#define G6 U9 +#define G7 U13 + GTRANSPOSE4x4_D U1, U5, U9, U13, G4, G5, G6, G7, D0, D1 + // A + // G4 G5 G6 G7 + // ------------------ + // 68 | D9 + // 84 85 | D8 D7 + // 100 101 102 | D6 D5 D4 + // 116 117 118 119 | D3 D2 D1 D0 + // 132 133 134 135 | D15 D14 D13 D12 + // 148 149 150 151 | D11 D10 D9 D8 + // 164 165 166 167 | D7 D6 D5 D4 + // 180 181 182 183 | D3 D2 D1 D0 + // 196 197 198 199 | D15 D14 D13 D12 + // 212 213 214 215 | D11 D10 D9 D8 + // 228 229 230 231 | D7 D6 D5 D4 + // 244 245 246 247 | D3 D2 D1 D0 + PTR_ADDI T0, A0, 244 * 8 + GLDREPL xv, d, D3, T0, 0, D2, T0, 1 * 8, D1, T0, 2 * 8, D0, T0, 3 * 8 + PTR_ADDI T0, A0, 228 * 8 + GLDREPL xv, d, D7, T0, 0, D6, T0, 1 * 8, D5, T0, 2 * 8, D4, T0, 3 * 8 + PTR_ADDI T0, A0, 212 * 8 + GLDREPL xv, d, D11, T0, 0, D10, T0, 1 * 8, D9, T0, 2 * 8, D8, T0, 3 * 8 + PTR_ADDI T0, A0, 196 * 8 + GLDREPL xv, d, D15, T0, 0, D14, T0, 1 * 8, D13, T0, 2 * 8, D12, T0, 3 * 8 + GNMSUB xvf, d, G7, G15, D0, G7, G6, G15, D1, G6, G5, G15, D2, G5, G4, G15, D3, G4, \ + G7, G14, D4, G7, G6, G14, D5, G6, G5, G14, D6, G5, G4, G14, D7, G4, \ + G7, G13, D8, G7, G6, G13, D9, G6, G5, G13, D10, G5, G4, G13, D11, G4, \ + G7, G12, D12, G7, G6, G12, D13, G6, G5, G12, D14, G5, G4, G12, D15, G4 + PTR_ADDI T0, A0, 180 * 8 + GLDREPL xv, d, D3, T0, 0, D2, T0, 1 * 8, D1, T0, 2 * 8, D0, T0, 3 * 8 + PTR_ADDI T0, A0, 164 * 8 + GLDREPL xv, d, D7, T0, 0, D6, T0, 1 * 8, D5, T0, 2 * 8, D4, T0, 3 * 8 + PTR_ADDI T0, A0, 148 * 8 + GLDREPL xv, d, D11, T0, 0, D10, T0, 1 * 8, D9, T0, 2 * 8, D8, T0, 3 * 8 + PTR_ADDI T0, A0, 132 * 8 + GLDREPL xv, d, D15, T0, 0, D14, T0, 1 * 8, D13, T0, 2 * 8, D12, T0, 3 * 8 + GNMSUB xvf, d, G7, G11, D0, G7, G6, G11, D1, G6, G5, G11, D2, G5, G4, G11, D3, G4, \ + G7, G10, D4, G7, G6, G10, D5, G6, G5, G10, D6, G5, G4, G10, D7, G4, \ + G7, G9, D8, G7, G6, G9, D9, G6, G5, G9, D10, G5, G4, G9, D11, G4, \ + G7, G8, D12, G7, G6, G8, D13, G6, G5, G8, D14, G5, G4, G8, D15, G4 + PTR_ADDI T0, A0, 116 * 8 + GLDREPL xv, d, D3, T0, 0, D2, T0, 1 * 8, D1, T0, 2 * 8, D0, T0, 3 * 8 + PTR_ADDI T0, A0, 100 * 8 + GLDREPL xv, d, D6, T0, 0, D5, T0, 1 * 8, D4, T0, 2 * 8 + PTR_ADDI T0, A0, 84 * 8 + GLDREPL xv, d, D8, T0, 0, D7, T0, 1 * 8 + PTR_ADDI T0, A0, 68 * 8 + GLDREPL xv, d, D9, T0, 0 + xvfmul.d G7, G7, D0 + GNMSUB xvf, d, G6, G7, D1, G6, G5, G7, D2, G5, G4, G7, D3, G4 + xvfmul.d G6, G6, D4 + GNMSUB xvf, d, G5, G6, D5, G5, G4, G6, D6, G4 + xvfmul.d G5, G5, D7 + GNMSUB xvf, d, G4, G5, D8, G4 + xvfmul.d G4, G4, D9 + // Store B +.if \N == 4 + // x x x x ... x x x x + // b16 b17 b18 b19 ... b28 b29 b30 b31 + // b32 b33 b34 b34 ... b44 b45 b46 b47 + // b48 b49 b50 b51 ... b60 b61 b62 b63 + GST xv, , G4, B0, 16 * 8, G5, B0, 20 * 8, G6, B0, 24 * 8, G7, B0, 28 * 8 +.elseif \N == 2 + // x x x x ... x x x x + // b8 b9 b10 b11 b12 b13 b14 b15 + // b16 b17 b18 b19 b20 b21 b22 b23 + // b24 b25 b26 b27 b28 b29 b30 b31 + GST v, , $vr1, B0, 8 * 8, $vr5, B0, 10 * 8, $vr9, B0, 12 * 8, $vr13, B0, 14 * 8 +.elseif \N == 1 + // x x x x + // b4 b5 b6 b7 + // b8 b9 b10 b11 + // b12 b13 b14 b15 + GST f, d, $f1, B0, 4 * 8, $f5, B0, 5 * 8, $f9, B0, 6 * 8, $f13, B0, 7 * 8 +.endif + // Transpose G7 G6 G5 G4 + GTRANSPOSE4x4_D G4, G5, G6, G7, D0, D1, D2, D3, D4, D5 + // Store C +.if \N == 4 + // x x x x c4 c5 c6 c7 c8 c9 c10 c11 c12 c13 c14 c15 + // x x x x c20 c21 c22 c23 c24 c25 c26 c27 c28 c29 c30 c31 + // x x x x c36 c37 c38 c39 c40 c41 c42 c43 c44 c45 c46 c47 + // x x x x c52 c53 c54 c55 c56 c57 c58 c59 c60 c61 c62 c63 + GST xv, , D0, C0, 4 * 8, D1, C1, 4 * 8, D2, C2, 4 * 8, D3, C3, 4 * 8 +.elseif \N == 2 + // x x x x c4 c5 c6 c7 c8 c9 c10 c11 c12 c13 c14 c15 + // x x x x c20 c21 c22 c23 c24 c25 c26 c27 c28 c29 c30 c31 + GST xv, , D0, C0, 4 * 8, D1, C1, 4 * 8 +.elseif \N == 1 + // x x x x c4 c5 c6 c7 c8 c9 c10 c11 c12 c13 c14 c15 + GST xv, , D0, C0, 4 * 8 +.endif + +#define G0 U0 +#define G1 U4 +#define G2 U8 +#define G3 U12 + GTRANSPOSE4x4_D U0, U4, U8, U12, G0, G1, G2, G3, D0, D1 + // A + // G0 G1 G2 G3 + // ------------------ + // 0 | D9 + // 16 17 | D8 D7 + // 32 33 34 | D6 D5 D4 + // 48 49 50 51 | D3 D2 D1 D0 + // 64 65 66 67 | D15 D14 D13 D12 + // 80 81 82 83 | D11 D10 D9 D8 + // 96 97 98 99 | D7 D6 D5 D4 + // 112 113 114 115 | D3 D2 D1 D0 + // 128 129 130 131 | D15 D14 D13 D12 + // 144 145 146 147 | D11 D10 D9 D8 + // 160 161 162 163 | D7 D6 D5 D4 + // 176 177 178 179 | D3 D2 D1 D0 + // 192 193 194 195 | D15 D14 D13 D12 + // 208 209 210 211 | D11 D10 D9 D8 + // 224 225 226 227 | D7 D6 D5 D4 + // 240 241 242 243 | D3 D2 D1 D0 + PTR_ADDI T0, A0, 240 * 8 + GLDREPL xv, d, D3, T0, 0, D2, T0, 1 * 8, D1, T0, 2 * 8, D0, T0, 3 * 8 + PTR_ADDI T0, A0, 224 * 8 + GLDREPL xv, d, D7, T0, 0, D6, T0, 1 * 8, D5, T0, 2 * 8, D4, T0, 3 * 8 + PTR_ADDI T0, A0, 208 * 8 + GLDREPL xv, d, D11, T0, 0, D10, T0, 1 * 8, D9, T0, 2 * 8, D8, T0, 3 * 8 + PTR_ADDI T0, A0, 192 * 8 + GLDREPL xv, d, D15, T0, 0, D14, T0, 1 * 8, D13, T0, 2 * 8, D12, T0, 3 * 8 + GNMSUB xvf, d, G3, G15, D0, G3, G2, G15, D1, G2, G1, G15, D2, G1, G0, G15, D3, G0, \ + G3, G14, D4, G3, G2, G14, D5, G2, G1, G14, D6, G1, G0, G14, D7, G0, \ + G3, G13, D8, G3, G2, G13, D9, G2, G1, G13, D10, G1, G0, G13, D11, G0, \ + G3, G12, D12, G3, G2, G12, D13, G2, G1, G12, D14, G1, G0, G12, D15, G0 + PTR_ADDI T0, A0, 176 * 8 + GLDREPL xv, d, D3, T0, 0, D2, T0, 1 * 8, D1, T0, 2 * 8, D0, T0, 3 * 8 + PTR_ADDI T0, A0, 160 * 8 + GLDREPL xv, d, D7, T0, 0, D6, T0, 1 * 8, D5, T0, 2 * 8, D4, T0, 3 * 8 + PTR_ADDI T0, A0, 144 * 8 + GLDREPL xv, d, D11, T0, 0, D10, T0, 1 * 8, D9, T0, 2 * 8, D8, T0, 3 * 8 + PTR_ADDI T0, A0, 128 * 8 + GLDREPL xv, d, D15, T0, 0, D14, T0, 1 * 8, D13, T0, 2 * 8, D12, T0, 3 * 8 + GNMSUB xvf, d, G3, G11, D0, G3, G2, G11, D1, G2, G1, G11, D2, G1, G0, G11, D3, G0, \ + G3, G10, D4, G3, G2, G10, D5, G2, G1, G10, D6, G1, G0, G10, D7, G0, \ + G3, G9, D8, G3, G2, G9, D9, G2, G1, G9, D10, G1, G0, G9, D11, G0, \ + G3, G8, D12, G3, G2, G8, D13, G2, G1, G8, D14, G1, G0, G8, D15, G0 + PTR_ADDI T0, A0, 112 * 8 + GLDREPL xv, d, D3, T0, 0, D2, T0, 1 * 8, D1, T0, 2 * 8, D0, T0, 3 * 8 + PTR_ADDI T0, A0, 96 * 8 + GLDREPL xv, d, D7, T0, 0, D6, T0, 1 * 8, D5, T0, 2 * 8, D4, T0, 3 * 8 + PTR_ADDI T0, A0, 80 * 8 + GLDREPL xv, d, D11, T0, 0, D10, T0, 1 * 8, D9, T0, 2 * 8, D8, T0, 3 * 8 + PTR_ADDI T0, A0, 64 * 8 + GLDREPL xv, d, D15, T0, 0, D14, T0, 1 * 8, D13, T0, 2 * 8, D12, T0, 3 * 8 + GNMSUB xvf, d, G3, G7, D0, G3, G2, G7, D1, G2, G1, G7, D2, G1, G0, G7, D3, G0, \ + G3, G6, D4, G3, G2, G6, D5, G2, G1, G6, D6, G1, G0, G6, D7, G0, \ + G3, G5, D8, G3, G2, G5, D9, G2, G1, G5, D10, G1, G0, G5, D11, G0, \ + G3, G4, D12, G3, G2, G4, D13, G2, G1, G4, D14, G1, G0, G4, D15, G0 + PTR_ADDI T0, A0, 48 * 8 + GLDREPL xv, d, D3, T0, 0, D2, T0, 1 * 8, D1, T0, 2 * 8, D0, T0, 3 * 8 + PTR_ADDI T0, A0, 32 * 8 + GLDREPL xv, d, D6, T0, 0, D5, T0, 1 * 8, D4, T0, 2 * 8 + PTR_ADDI T0, A0, 16 * 8 + GLDREPL xv, d, D8, T0, 0, D7, T0, 1 * 8 + PTR_ADDI T0, A0, 0 * 8 + GLDREPL xv, d, D9, T0, 0 + + xvfmul.d G3, G3, D0 + GNMSUB xvf, d, G2, G3, D1, G2, G1, G3, D2, G1, G0, G3, D3, G0 + xvfmul.d G2, G2, D4 + GNMSUB xvf, d, G1, G2, D5, G1, G0, G2, D6, G0 + xvfmul.d G1, G1, D7 + GNMSUB xvf, d, G0, G1, D8, G0 + xvfmul.d G0, G0, D9 + // Store B +.if \N == 4 + // b0 b1 b2 b3 ... b12 b13 b14 b15 + // b16 b17 b18 b19 ... b28 b29 b30 b31 + // b32 b33 b34 b34 ... b44 b45 b46 b47 + // b48 b49 b50 b51 ... b60 b61 b62 b63 + GST xv, , G0, B0, 0, G1, B0, 4 * 8, G2, B0, 8 * 8, G3, B0, 12 * 8 +.elseif \N == 2 + // b0 b1 b2 b3 b4 b5 b6 b7 + // b8 b9 b10 b11 b12 b13 b14 b15 + // b16 b17 b18 b19 b20 b21 b22 b23 + // b24 b25 b26 b27 b28 b29 b30 b31 + GST v, , $vr0, B0, 0, $vr4, B0, 2 * 8, $vr8, B0, 4 * 8, $vr12, B0, 6 * 8 +.elseif \N == 1 + // b0 b1 b2 b3 + // b4 b5 b6 b7 + // b8 b9 b10 b11 + // b12 b13 b14 b15 + GST f, d, $f0, B0, 0, $f4, B0, 1 * 8, $f8, B0, 2 * 8, $f12, B0, 3 * 8 +.endif + // Transpose C3 C2 C1 C0 + GTRANSPOSE4x4_D G0, G1, G2, G3, D0, D1, D2, D3, D4, D5 + // Store C +.if \N == 4 + // c0 c1 c2 c3 ... c12 c13 c14 c15 + // c16 c17 c18 c19 ... c28 c29 c30 c31 + // c32 c33 c34 c34 ... c44 c45 c46 c47 + // c48 c49 c50 c51 ... c60 c61 c62 c63 + GST xv, , D0, C0, 0, D1, C1, 0, D2, C2, 0, D3, C3, 0 +.elseif \N == 2 + // c0 c1 c2 c3 ... c12 c13 c14 c15 + // c16 c17 c18 c19 ... c28 c29 c30 c31 + GST xv, , D0, C0, 0, D1, C1, 0 +.elseif \N == 1 + // c0 c1 c2 c3 ... c12 c13 c14 c15 + GST xv, , D0, C0, 0 +.endif + +#undef G0 +#undef G1 +#undef G2 +#undef G3 +#undef G4 +#undef G5 +#undef G6 +#undef G7 +#undef G8 +#undef G9 +#undef G10 +#undef G11 +#undef G12 +#undef G13 +#undef G14 +#undef G15 +.endm + +.macro dsolve_8 N +// if N = 4 the data layout of C is as follows: +// U0 U1 +// U2 U3 +// U4 U5 +// U6 U7 +// if N = 2 the dat layout of C is as follows: +// U0 U1 +// U2 U3 +// if N = 1 the dat layout of C is as follows: +// U0 U1 +// The matrix A has dimensions of 8x8, and +// it will be divided into 2 segments for processing. + +#define G4 U1 +#define G5 U3 +#define G6 U5 +#define G7 U7 + // Transpose U7 U5 U3 U1 + GTRANSPOSE4x4_D U1, U3, U5, U7, G4, G5, G6, G7, D0, D1 + // A + // G4 G5 G6 G7 + // --------------- + // 36 | D9 + // 44 45 | D8 D7 + // 52 53 54 | D6 D5 D4 + // 60 61 62 63 | D3 D2 D1 D0 + PTR_ADDI T0, A0, 60 * 8 + GLDREPL xv, d, D3, T0, 0, D2, T0, 1 * 8, D1, T0, 2 * 8, D0, T0, 3 * 8 + PTR_ADDI T0, A0, 52 * 8 + GLDREPL xv, d, D6, T0, 0, D5, T0, 1 * 8, D4, T0, 2 * 8 + PTR_ADDI T0, A0, 44 * 8 + GLDREPL xv, d, D8, T0, 0, D7, T0, 1 * 8 + PTR_ADDI T0, A0, 36 * 8 + GLDREPL xv, d, D9, T0, 0 + + xvfmul.d G7, G7, D0 + GNMSUB xvf, d, G6, G7, D1, G6, G5, G7, D2, G5, G4, G7, D3, G4 + xvfmul.d G6, G6, D4 + GNMSUB xvf, d, G5, G6, D5, G5, G4, G6, D6, G4 + xvfmul.d G5, G5, D7 + GNMSUB xvf, d, G4, G5, D8, G4 + xvfmul.d G4, G4, D9 + // Store B +.if \N == 4 + GST xv, , G4, B0, 16 * 8, G5, B0, 20 * 8, G6, B0, 24 * 8, G7, B0, 28 * 8 +.elseif \N == 2 + GST v, , $vr1, B0, 8 * 8, $vr3, B0, 10 * 8, $vr5, B0, 12 * 8, $vr7, B0, 14 * 8 +.elseif \N == 1 + GST f, d, $f1, B0, 4 * 8, $f3, B0, 5 * 8, $f5, B0, 6 * 8, $f7, B0, 7 * 8 +.endif + // Transpose + GTRANSPOSE4x4_D G4, G5, G6, G7, D4, D5, D6, D7, D8, D9 + // Store C +.if \N == 4 + GST xv, , D4, C0, 4 * 8, D5, C1, 4 * 8, D6, C2, 4 * 8, D7, C3, 4 * 8 +.elseif \N == 2 + GST xv, , D4, C0, 4 * 8, D5, C1, 4 * 8 +.elseif \N == 1 + GST xv, , D4, C0, 4 * 8 +.endif + +#define G0 U0 +#define G1 U2 +#define G2 U4 +#define G3 U6 + // Transpose U6 U4 U2 U0 + GTRANSPOSE4x4_D U0, U2, U4, U6, G0, G1, G2, G3, D0, D1 + // A + // G0 G1 G2 G3 + //----------------- + // 0 | D9 + // 8 9 | D8 D7 + // 16 17 18 | D6 D5 D4 + // 24 25 26 27 | D3 D2 D1 D0 + // 32 33 34 35 | D15 D14 D13 D12 + // 40 41 42 43 | D11 D10 D9 D8 + // 48 49 50 51 | D7 D6 D5 D4 + // 56 57 58 59 | D3 D2 D1 D0 + PTR_ADDI T0, A0, 56 * 8 + GLDREPL xv, d, D3, T0, 0, D2, T0, 1 * 8, D1, T0, 2 * 8, D0, T0, 3 * 8 + PTR_ADDI T0, A0, 48 * 8 + GLDREPL xv, d, D7, T0, 0, D6, T0, 1 * 8, D5, T0, 2 * 8, D4, T0, 3 * 8 + PTR_ADDI T0, A0, 40 * 8 + GLDREPL xv, d, D11, T0, 0, D10, T0, 1 * 8, D9, T0, 2 * 8, D8, T0, 3 * 8 + PTR_ADDI T0, A0, 32 * 8 + GLDREPL xv, d, D15, T0, 0, D14, T0, 1 * 8, D13, T0, 2 * 8, D12, T0, 3 * 8 + GNMSUB xvf, d, G3, G7, D0, G3, G2, G7, D1, G2, G1, G7, D2, G1, G0, G7, D3, G0, \ + G3, G6, D4, G3, G2, G6, D5, G2, G1, G6, D6, G1, G0, G6, D7, G0, \ + G3, G5, D8, G3, G2, G5, D9, G2, G1, G5, D10, G1, G0, G5, D11, G0, \ + G3, G4, D12, G3, G2, G4, D13, G2, G1, G4, D14, G1, G0, G4, D15, G0 + PTR_ADDI T0, A0, 24 * 8 + GLDREPL xv, d, D3, T0, 0, D2, T0, 1 * 8, D1, T0, 2 * 8, D0, T0, 3 * 8 + PTR_ADDI T0, A0, 16 * 8 + GLDREPL xv, d, D6, T0, 0, D5, T0, 1 * 8, D4, T0, 2 * 8 + PTR_ADDI T0, A0, 8 * 8 + GLDREPL xv, d, D8, T0, 0, D7, T0, 1 * 8 + PTR_ADDI T0, A0, 0 * 8 + GLDREPL xv, d, D9, T0, 0 + + xvfmul.d G3, G3, D0 + GNMSUB xvf, d, G2, G3, D1, G2, G1, G3, D2, G1, G0, G3, D3, G0 + xvfmul.d G2, G2, D4 + GNMSUB xvf, d, G1, G2, D5, G1, G0, G2, D6, G0 + xvfmul.d G1, G1, D7 + GNMSUB xvf, d, G0, G1, D8, G0 + xvfmul.d G0, G0, D9 + // Store B +.if \N == 4 + GST xv, , G0, B0, 0, G1, B0, 4 * 8, G2, B0, 8 * 8, G3, B0, 12 * 8 +.elseif \N == 2 + GST v, , $vr0, B0, 0, $vr2, B0, 2 * 8, $vr4, B0, 4 * 8, $vr6, B0, 6 * 8 +.elseif \N == 1 + GST f, d, $f0, B0, 0, $f2, B0, 1 * 8, $f4, B0, 2 * 8, $f6, B0, 3 * 8 +.endif + // Transpose + GTRANSPOSE4x4_D G0, G1, G2, G3, D0, D1, D2, D3, D4, D5 + // Store C +.if \N == 4 + GST xv, , D0, C0, 0, D1, C1, 0, D2, C2, 0, D3, C3, 0 +.elseif \N == 2 + GST xv, , D0, C0, 0, D1, C1, 0 +.elseif \N == 1 + GST xv, , D0, C0, 0 +.endif + +#undef G0 +#undef G1 +#undef G2 +#undef G3 +#undef G4 +#undef G5 +#undef G6 +#undef G7 +.endm + +.macro dsolve_4 N +// if N = 4 the data layout of C is as follows: +// U0 +// U1 +// U2 +// U3 +// if N = 2 the dat layout of C is as follows: +// U0 +// U1 +// if N = 1 the dat layout of C is as follows: +// U0 +// The matrix A has dimensions of 4x4, and +// it will be divided into 1 segments for processing. + +#define G0 U0 +#define G1 U1 +#define G2 U2 +#define G3 U3 + // Transpose U3 U2 U1 U0 + GTRANSPOSE4x4_D U0, U1, U2, U3, G0, G1, G2, G3, D0, D1 + // A + // G0 G1 G2 G3 + //------------- + // 0 | D9 + // 4 5 | D8 D7 + // 8 9 10 | D6 D5 D4 + // 12 13 14 15 | D3 D2 D1 D0 + GLDREPL xv, d, D3, A0, 12 * 8, D2, A0, 13 * 8, D1, A0, 14 * 8, D0, A0, 15 * 8, \ + D6, A0, 8 * 8, D5, A0, 9 * 8, D4, A0, 10 * 8, \ + D8, A0, 4 * 8, D7, A0, 5 * 8, \ + D9, A0, 0 * 8 + xvfmul.d G3, G3, D0 + GNMSUB xvf, d, G2, G3, D1, G2, G1, G3, D2, G1, G0, G3, D3, G0 + xvfmul.d G2, G2, D4 + GNMSUB xvf, d, G1, G2, D5, G1, G0, G2, D6, G0 + xvfmul.d G1, G1, D7 + GNMSUB xvf, d, G0, G1, D8, G0 + xvfmul.d G0, G0, D9 + // Store B +.if \N == 4 + GST xv, , G0, B0, 0, G1, B0, 4 * 8, G2, B0, 8 * 8, G3, B0, 12 * 8 +.elseif \N == 2 + GST v, , $vr0, B0, 0, $vr1, B0, 2 * 8, $vr2, B0, 4 * 8, $vr3, B0, 6 * 8 +.elseif \N == 1 + GST f, d, $f0, B0, 0, $f1, B0, 1 * 8, $f2, B0, 2 * 8, $f3, B0, 3 * 8 +.endif + // Transpose + GTRANSPOSE4x4_D G0, G1, G2, G3, D0, D1, D2, D3, D4, D5 + // Store C +.if \N == 4 + GST xv, , D0, C0, 0, D1, C1, 0, D2, C2, 0, D3, C3, 0 +.elseif \N == 2 + GST xv, , D0, C0, 0, D1, C1, 0 +.elseif \N == 1 + GST xv, , D0, C0, 0 +.endif + +#undef G0 +#undef G1 +#undef G2 +#undef G3 +.endm + +.macro dsolve_2 N +#define G0 U2 +#define G1 U3 + // Transpose + GSBUTTERFLY xv, d, G0, G1, U1, U0 + // A + // G0 G1 + // ------ + // 0 | D2 + // 2 3 | D1 D0 + GLDREPL xv, d, D2, A0, 0, D1, A0, 2 * 8, D0, A0, 3 * 8 + xvfmul.d G1, G1, D0 + GNMSUB xvf, d, G0, G1, D1, G0 + xvfmul.d G0, G0, D2 + // Store B +.if \N == 4 + GST xv, , G0, B0, 0, G1, B0, 4 * 8 +.elseif \N == 2 + GST v, , $vr2, B0, 0, $vr3, B0, 2 * 8 +.elseif \N == 1 + GST f, d, $f2, B0, 0, $f3, B0, 8 +.endif + // Transpose + GSBUTTERFLY xv, d, D0, D1, G1, G0 + // Store C +.if \N == 4 + vst $vr16, C0, 0x00 + vst $vr17, C1, 0x00 + xvstelm.d D0, C2, 0x00, 0x02 + xvstelm.d D1, C3, 0x00, 0x02 + xvstelm.d D0, C2, 0x08, 0x03 + xvstelm.d D1, C3, 0x08, 0x03 +.elseif \N == 2 + GST v, , $vr16, C0, 0, $vr17, C1, 0 +.elseif \N == 1 + GST v, , $vr16, C0, 0 +.endif + +#undef G0 +#undef G1 +.endm + +.macro dgemm_dsolve_16x4 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_16x4_load + dgemm_16x4 + b .L_dsolve_16x4 +.L_dsolve_16x4_load: + // Load C + GLD xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60 + GLD xv, , U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60 + GLD xv, , U8, C2, 0x00, U9, C2, 0x20, U10, C2, 0x40, U11, C2, 0x60 + GLD xv, , U12, C3, 0x00, U13, C3, 0x20, U14, C3, 0x40, U15, C3, 0x60 +/********************** solver ******************/ +.L_dsolve_16x4: + PTR_ADDI A0, T1, -(16 * 8 * 8) + PTR_ADDI A0, A0, -(16 * 8 * 8) + PTR_ADDI B0, T2, -(16 * 4 * 8) + dsolve_16 4 +.endm + +.macro dgemm_dsolve_1x4 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_1x4_load + dgemm_1x4 + b .L_dsolve_1x4 +.L_dsolve_1x4_load: + // Load C + fld.d $f0, C0, 0x00 + fld.d $f1, C1, 0x00 + fld.d $f2, C2, 0x00 + fld.d $f3, C3, 0x00 + xvinsve0.d U0, U1, 0x01 + xvinsve0.d U0, U2, 0x02 + xvinsve0.d U0, U3, 0x03 +.L_dsolve_1x4: + or A0, T1, T1 + or B0, T2, T2 + GLDREPL xv, d, D0, A0, -1 * 8 + GMUL xvf, d, U0, U0, D0 + // Store C + xvstelm.d U0, C0, 0x00, 0x00 + xvstelm.d U0, C1, 0x00, 0x01 + xvstelm.d U0, C2, 0x00, 0x02 + xvstelm.d U0, C3, 0x00, 0x03 + // Store B + xvst U0, B0, -32 +.endm + +.macro dgemm_dsolve_2x4 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_2x4_load + dgemm_2x4 + b .L_dsolve_2x4 +.L_dsolve_2x4_load: + /* Load C0 */ + xvld U0, C0, 0x00 + /* Load C1 */ + xvld U1, C1, 0x00 + /* Load C2 */ + xvld U2, C2, 0x00 + /* Load C3 */ + xvld U3, C3, 0x00 + + xvpermi.q U0, U2, 0x02 + xvpermi.q U1, U3, 0x02 +/********************** solver ******************/ +.L_dsolve_2x4: + PTR_ADDI A0, T1, -(2 * 2 * 8) + PTR_ADDI B0, T2, -(2 * 4 * 8) + dsolve_2 4 +.endm + +.macro dgemm_dsolve_4x4 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_4x4_load + dgemm_4x4 + b .L_dsolve_4x4 +.L_dsolve_4x4_load: + /* Load C0 */ + xvld U0, C0, 0x00 + /* Load C1 */ + xvld U1, C1, 0x00 + /* Load C2 */ + xvld U2, C2, 0x00 + /* Load C3 */ + xvld U3, C3, 0x00 +/************** solver *****************/ +.L_dsolve_4x4: + PTR_ADDI A0, T1, -(4 * 4 * 8) + PTR_ADDI B0, T2, -(4 * 4 * 8) + + dsolve_4 4 +.endm + +.macro dgemm_dsolve_8x4 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_8x4_load + dgemm_8x4 + b .L_dsolve_8x4 +.L_dsolve_8x4_load: + /* Load C0 */ + xvld U0, C0, 0x00 + xvld U1, C0, 0x20 + + /* Load C1 */ + xvld U2, C1, 0x00 + xvld U3, C1, 0x20 + + /* Load C2 */ + xvld U4, C2, 0x00 + xvld U5, C2, 0x20 + + /* Load C3 */ + xvld U6, C3, 0x00 + xvld U7, C3, 0x20 +/********* solver *********/ +.L_dsolve_8x4: + PTR_ADDI A0, T1, -(8 * 8 * 8) + PTR_ADDI B0, T2, -(8 * 4 * 8) + + dsolve_8 4 +.endm + +.macro dgemm_dsolve_4x2 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_4x2_load + dgemm_4x2 + b .L_dsolve_4x2 +.L_dsolve_4x2_load: + /* Load C0 */ + xvld U0, C0, 0x00 + /* Load C1 */ + xvld U1, C1, 0x00 +.L_dsolve_4x2: + PTR_ADDI A0, T1, -(4 * 4 * 8) + PTR_ADDI B0, T2, -(4 * 2 * 8) + + dsolve_4 2 +.endm + +.macro dgemm_dsolve_2x2 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_2x2_load + dgemm_2x2 + b .L_dsolve_2x2 +.L_dsolve_2x2_load: + /* Load C0 */ + xvld U0, C0, 0x00 + /* Load C1 */ + xvld U1, C1, 0x00 +.L_dsolve_2x2: + PTR_ADDI A0, T1, -(2 * 2 * 8) + PTR_ADDI B0, T2, -(2 * 2 * 8) + + dsolve_2 2 +.endm + +.macro dgemm_dsolve_8x2 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_8x2_load + dgemm_8x2 + b .L_dsolve_8x2 +.L_dsolve_8x2_load: + /* Load C0 */ + xvld U0, C0, 0x00 + xvld U1, C0, 0x20 + /* Load C1 */ + xvld U2, C1, 0x00 + xvld U3, C1, 0x20 +.L_dsolve_8x2: + PTR_ADDI A0, T1, -(8 * 8 * 8) + PTR_ADDI B0, T2, -(8 * 2 * 8) + + dsolve_8 2 +.endm + +.macro dgemm_dsolve_16x2 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_16x2_load + dgemm_16x2 + b .L_dsolve_16x2 +.L_dsolve_16x2_load: + /* Load C0 */ + xvld U0, C0, 0x00 + xvld U1, C0, 0x20 + xvld U2, C0, 0x40 + xvld U3, C0, 0x60 + /* Load C1 */ + xvld U4, C1, 0x00 + xvld U5, C1, 0x20 + xvld U6, C1, 0x40 + xvld U7, C1, 0x60 +.L_dsolve_16x2: + PTR_ADDI A0, T1, -(16 * 8 * 8) + PTR_ADDI A0, A0, -(16 * 8 * 8) + PTR_ADDI B0, T2, -(16 * 2 * 8) + + dsolve_16 2 +.endm + +.macro dgemm_dsolve_2x1 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_2x1_load + dgemm_2x1 + b .L_dsolve_2x1 +.L_dsolve_2x1_load: + /* Load C0 */ + xvld U0, C0, 0x00 +.L_dsolve_2x1: + PTR_ADDI A0, T1, -(2 * 2 * 8) + PTR_ADDI B0, T2, -(2 * 1 * 8) + + dsolve_2 1 +.endm + +.macro dgemm_dsolve_4x1 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_4x1_load + dgemm_4x1 + b .L_dsolve_4x1 +.L_dsolve_4x1_load: + /* Load C0 */ + xvld U0, C0, 0x00 +.L_dsolve_4x1: + PTR_ADDI A0, T1, -(4 * 4 * 8) + PTR_ADDI B0, T2, -(4 * 1 * 8) + + dsolve_4 1 +.endm + +.macro dgemm_dsolve_8x1 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_8x1_load + dgemm_8x1 + b .L_dsolve_8x1 +.L_dsolve_8x1_load: + /* Load C0 */ + xvld U0, C0, 0x00 + xvld U1, C0, 0x20 +.L_dsolve_8x1: + PTR_ADDI A0, T1, -(8 * 8 * 8) + PTR_ADDI B0, T2, -(8 * 1 * 8) + + dsolve_8 1 +.endm + +.macro dgemm_dsolve_16x1 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_16x1_load + dgemm_16x1 + b .L_dsolve_16x1 +.L_dsolve_16x1_load: + /* Load C0 */ + xvld U0, C0, 0x00 + xvld U1, C0, 0x20 + xvld U2, C0, 0x40 + xvld U3, C0, 0x60 +.L_dsolve_16x1: + PTR_ADDI A0, T1, -(16 * 8 * 8) + PTR_ADDI A0, A0, -(16 * 8 * 8) + PTR_ADDI B0, T2, -(16 * 1 * 8) + + dsolve_16 1 +.endm + + PROLOGUE + push_if_used 26, 32 + PTR_SLLI LDC, LDC, 3 + /* if (!(N >> 2)) goto L_N3 */ + PTR_SRAI J, N, 2 /* J = bn >> 2 */ + andi N, N, 0x03 + beq ZERO, J, .L_N3 +.align 5 +.L_J1: + PTR_ADDI J, J, -1 + PTR_ADD KK, M, OFFSET + + andi I, M, 15 + beq ZERO, I, .L_M16 + andi I, M, 1 + beqz I, .L_M2 +.L_M1: + PTR_ADDI T0, M, -1 + PTR_SLLI T0, T0, 3 + PTR_MUL AA, T0, K + PTR_ADD AA, AA, A + PTR_ALSL A0, KK, AA, 3 /* a + (m - 1) * k + kk */ + PTR_ADD CC, T0, C /* c + (m - 1) */ + + PTR_SLLI T0, KK, 5 + PTR_ADD B0, B, T0 /* b + 4 * kk */ + PTR_SUB L, K, KK + GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC + dgemm_dsolve_1x4 + PTR_ADDI KK, KK, -1 +.L_M2: + andi I, M, 2 + beqz I, .L_M4 + PTR_SRLI T0, M, 1 + PTR_SLLI T0, T0, 1 + PTR_ADDI T0, T0, -2 + PTR_SLLI T0, T0, 3 /* ((m & -2) - 2) */ + PTR_ADD CC, T0, C /* c + ((m & -2) - 2)*/ + PTR_SLLI T1, KK, 4 + PTR_MUL AA, T0, K + PTR_ADD AA, AA, A + PTR_ADD A0, AA, T1 /* a + ((m & -2) - 2) * k + 2 * kk */ + PTR_SLLI T0, KK, 5 + PTR_ADD B0, B, T0 /* b + 4 * kk */ + PTR_SUB L, K, KK + GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC + dgemm_dsolve_2x4 + PTR_ADDI KK, KK, -2 +.L_M4: + andi I, M, 4 + beqz I, .L_M8 + PTR_SRLI T0, M, 2 + PTR_SLLI T0, T0, 2 + PTR_ADDI T0, T0, -4 + PTR_SLLI T0, T0, 3 /* ((m & -4) - 4) */ + PTR_ADD CC, T0, C /* c + ((m & -4) - 4)*/ + PTR_SLLI T1, KK, 5 + PTR_MUL AA, T0, K + PTR_ADD AA, AA, A + PTR_ADD A0, AA, T1 /* a + ((m & -4) - 4) * k + 4 * kk */ + PTR_SLLI T0, KK, 5 + PTR_ADD B0, B, T0 /* b + 4 * kk */ + PTR_SUB L, K, KK + GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC + dgemm_dsolve_4x4 + PTR_ADDI KK, KK, -4 +.L_M8: + andi I, M, 8 + beqz I, .L_M16 + PTR_SRLI T0, M, 3 + PTR_SLLI T0, T0, 3 + PTR_ADDI T0, T0, -8 + PTR_SLLI T0, T0, 3 /* ((m & -8) - 8) */ + PTR_ADD CC, T0, C /* c + ((m & -8) - 8)*/ + PTR_SLLI T1, KK, 6 + PTR_MUL AA, T0, K + PTR_ADD AA, AA, A + PTR_ADD A0, AA, T1 /* a + ((m & -8) - 8) * k + 8 * kk */ + PTR_SLLI T0, KK, 5 + PTR_ADD B0, B, T0 /* b + 4 * kk */ + PTR_SUB L, K, KK + GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC + dgemm_dsolve_8x4 + PTR_ADDI KK, KK, -8 +.L_M16: + PTR_SRAI I, M, 4 /* I = bm >> 4 */ + beq ZERO, I, .L_M0 + + PTR_SRLI T0, M, 4 + PTR_SLLI T0, T0, 4 + PTR_ADDI T0, T0, -16 /* ((M & -16)) - 16) */ + PTR_SLLI T0, T0, 3 + PTR_MUL AA, T0, K + PTR_ADD AA, A, AA + PTR_ADD CC, C, T0 +.align 5 +.L_I1: + PTR_SLLI T0, KK, 5 + PTR_ADD B0, B, T0 + PTR_SUB L, K, KK + GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC + PTR_SLLI T0, KK, 7 + PTR_ADD A0, AA, T0 + dgemm_dsolve_16x4 + PTR_ADDI I, I, -1 + PTR_ADDI KK, KK, -16 + PTR_ADDI CC, CC, -(16 * 8) + PTR_SLLI T0, K, 7 + PTR_SUB AA, AA, T0 + blt ZERO, I, .L_I1 +.L_M0: + PTR_SLLI T0, K, 3 + PTR_ALSL B, T0, B, 2 // b += 4 * k; + PTR_ALSL C, LDC, C, 2 // c += 4 * ldc + blt ZERO, J, .L_J1 +.L_N3: + andi J, N, 2 + beq ZERO, J, .L_N1 + + PTR_ADD KK, M, OFFSET + andi I, M, 15 + beq ZERO, I, .L_N3_M16 + andi I, M, 1 + beqz I, .L_N3_M2 +.L_N3_M1: + PTR_ADDI KK, KK, -1 + + PTR_ADDI T0, M, -1 + PTR_SLLI T0, T0, 3 + PTR_MUL AA, T0, K + PTR_ADD AA, AA, A + PTR_ALSL A0, KK, AA, 3 /* a + (m - 1) * k + kk */ + PTR_ADD CC, T0, C /* c + (m - 1) */ + + PTR_SLLI T0, KK, 4 + PTR_ADD B0, B, T0 /* b + 2 * kk */ + GADD , d, C0, CC, ZERO, C1, C0, LDC + // dgemm_dsolve_1x2 + GLD f, d, $f0, A0, 0, $f1, C0, 0, $f2, C1, 0 + GMUL f, d, $f1, $f1, $f0, $f2, $f2, $f0 + GST f, d, $f1, C0, 0, $f2, C1, 0, $f1, B0, 0, $f2, B0, 8 +.L_N3_M2: + andi I, M, 2 + beqz I, .L_N3_M4 + PTR_SRLI T0, M, 1 + PTR_SLLI T0, T0, 1 + PTR_ADDI T0, T0, -2 + PTR_SLLI T0, T0, 3 /* ((m & -2) - 2) */ + PTR_ADD CC, T0, C /* c + ((m & -2) - 2)*/ + PTR_SLLI T1, KK, 4 + PTR_MUL AA, T0, K + PTR_ADD AA, AA, A + PTR_ADD A0, AA, T1 /* a + ((m & -2) - 2) * k + 2 * kk */ + PTR_SLLI T0, KK, 4 + PTR_ADD B0, B, T0 /* b + 2 * kk */ + PTR_SUB L, K, KK + GADD , d, C0, CC, ZERO, C1, C0, LDC + dgemm_dsolve_2x2 + PTR_ADDI KK, KK, -2 +.L_N3_M4: + andi I, M, 4 + beqz I, .L_N3_M8 + PTR_SRLI T0, M, 2 + PTR_SLLI T0, T0, 2 + PTR_ADDI T0, T0, -4 + PTR_SLLI T0, T0, 3 /* ((m & -4) - 4) */ + PTR_ADD CC, T0, C /* c + ((m & -4) - 4)*/ + PTR_SLLI T1, KK, 5 + PTR_MUL AA, T0, K + PTR_ADD AA, AA, A + PTR_ADD A0, AA, T1 /* a + ((m & -4) - 4) * k + 4 * kk */ + PTR_SLLI T0, KK, 4 + PTR_ADD B0, B, T0 /* b + 2 * kk */ + PTR_SUB L, K, KK + GADD , d, C0, CC, ZERO, C1, C0, LDC + dgemm_dsolve_4x2 + PTR_ADDI KK, KK, -4 +.L_N3_M8: + andi I, M, 8 + beqz I, .L_N3_M16 + PTR_SRLI T0, M, 3 + PTR_SLLI T0, T0, 3 + PTR_ADDI T0, T0, -8 + PTR_SLLI T0, T0, 3 /* ((m & -8) - 8) */ + PTR_ADD CC, T0, C /* c + ((m & -8) - 8)*/ + PTR_SLLI T1, KK, 6 + PTR_MUL AA, T0, K + PTR_ADD AA, AA, A + PTR_ADD A0, AA, T1 /* a + ((m & -8) - 8) * k + 8 * kk */ + PTR_SLLI T0, KK, 4 + PTR_ADD B0, B, T0 /* b + 2 * kk */ + PTR_SUB L, K, KK + GADD , d, C0, CC, ZERO, C1, C0, LDC + dgemm_dsolve_8x2 + PTR_ADDI KK, KK, -8 +.L_N3_M16: + PTR_SRAI I, M, 4 /* I = bm >> 4 */ + beq ZERO, I, .L_N3_M0 + + PTR_SRLI T0, M, 4 + PTR_SLLI T0, T0, 4 + PTR_ADDI T0, T0, -16 /* ((M & -16)) - 16) */ + PTR_SLLI T0, T0, 3 + PTR_MUL AA, T0, K + PTR_ADD AA, A, AA + PTR_ADD CC, C, T0 +.align 5 +.L_N3_I1: + PTR_SLLI T0, KK, 4 + PTR_ADD B0, B, T0 + PTR_SUB L, K, KK + GADD , d, C0, CC, ZERO, C1, C0, LDC + PTR_SLLI T0, KK, 7 + PTR_ADD A0, AA, T0 + dgemm_dsolve_16x2 + PTR_ADDI I, I, -1 + PTR_ADDI KK, KK, -16 + PTR_ADDI CC, CC, -(16 * 8) + PTR_SLLI T0, K, 7 + PTR_SUB AA, AA, T0 + blt ZERO, I, .L_N3_I1 +.L_N3_M0: + PTR_SLLI T0, K, 3 + PTR_ALSL B, T0, B, 1 // b += 2 * k; + PTR_ALSL C, LDC, C, 1 // c += 2 * ldc +.L_N1: + andi J, N, 1 + beq ZERO, J, .L_N0 + + PTR_ADD KK, M, OFFSET + andi I, M, 15 + beq ZERO, I, .L_N1_M16 + andi I, M, 1 + beqz I, .L_N1_M2 +.L_N1_M1: + PTR_ADDI KK, KK, -1 + + PTR_ADDI T0, M, -1 + PTR_SLLI T0, T0, 3 + PTR_MUL AA, T0, K + PTR_ADD AA, AA, A + PTR_ALSL A0, KK, AA, 3 /* a + (m - 1) * k + kk */ + PTR_ADD CC, T0, C /* c + (m - 1) */ + + PTR_SLLI T0, KK, 3 + PTR_ADD B0, B, T0 /* b + kk */ + GADD , d, C0, CC, ZERO + // dgemm_dsolve_1x1 + GLD f, d, $f0, A0, 0, $f1, C0, 0 + GMUL f, d, $f1, $f1, $f0 + GST f, d, $f1, C0, 0, $f1, B0, 0 +.L_N1_M2: + andi I, M, 2 + beqz I, .L_N1_M4 + PTR_SRLI T0, M, 1 + PTR_SLLI T0, T0, 1 + PTR_ADDI T0, T0, -2 + PTR_SLLI T0, T0, 3 /* ((m & -2) - 2) */ + PTR_ADD CC, T0, C /* c + ((m & -2) - 2)*/ + PTR_SLLI T1, KK, 4 + PTR_MUL AA, T0, K + PTR_ADD AA, AA, A + PTR_ADD A0, AA, T1 /* a + ((m & -2) - 2) * k + 2 * kk */ + PTR_SLLI T0, KK, 3 + PTR_ADD B0, B, T0 /* b + kk */ + PTR_SUB L, K, KK + GADD , d, C0, CC, ZERO + dgemm_dsolve_2x1 + PTR_ADDI KK, KK, -2 +.L_N1_M4: + andi I, M, 4 + beqz I, .L_N1_M8 + PTR_SRLI T0, M, 2 + PTR_SLLI T0, T0, 2 + PTR_ADDI T0, T0, -4 + PTR_SLLI T0, T0, 3 /* ((m & -4) - 4) */ + PTR_ADD CC, T0, C /* c + ((m & -4) - 4)*/ + PTR_SLLI T1, KK, 5 + PTR_MUL AA, T0, K + PTR_ADD AA, AA, A + PTR_ADD A0, AA, T1 /* a + ((m & -4) - 4) * k + 4 * kk */ + PTR_SLLI T0, KK, 3 + PTR_ADD B0, B, T0 /* b + kk */ + PTR_SUB L, K, KK + GADD , d, C0, CC, ZERO + dgemm_dsolve_4x1 + PTR_ADDI KK, KK, -4 +.L_N1_M8: + andi I, M, 8 + beqz I, .L_N1_M16 + PTR_SRLI T0, M, 3 + PTR_SLLI T0, T0, 3 + PTR_ADDI T0, T0, -8 + PTR_SLLI T0, T0, 3 /* ((m & -8) - 8) */ + PTR_ADD CC, T0, C /* c + ((m & -8) - 8)*/ + PTR_SLLI T1, KK, 6 + PTR_MUL AA, T0, K + PTR_ADD AA, AA, A + PTR_ADD A0, AA, T1 /* a + ((m & -8) - 8) * k + 8 * kk */ + PTR_SLLI T0, KK, 3 + PTR_ADD B0, B, T0 /* b + kk */ + PTR_SUB L, K, KK + GADD , d, C0, CC, ZERO + dgemm_dsolve_8x1 + PTR_ADDI KK, KK, -8 +.L_N1_M16: + PTR_SRAI I, M, 4 /* I = bm >> 4 */ + beq ZERO, I, .L_N1_M0 + + PTR_SRLI T0, M, 4 + PTR_SLLI T0, T0, 4 + PTR_ADDI T0, T0, -16 /* ((M & -16)) - 16) */ + PTR_SLLI T0, T0, 3 + PTR_MUL AA, T0, K + PTR_ADD AA, A, AA + PTR_ADD CC, C, T0 +.align 5 +.L_N1_I1: + PTR_SLLI T0, KK, 3 + PTR_ADD B0, B, T0 + PTR_SUB L, K, KK + GADD , d, C0, CC, ZERO + PTR_SLLI T0, KK, 7 + PTR_ADD A0, AA, T0 + dgemm_dsolve_16x1 + PTR_ADDI I, I, -1 + PTR_ADDI KK, KK, -16 + PTR_ADDI CC, CC, -(16 * 8) + PTR_SLLI T0, K, 7 + PTR_SUB AA, AA, T0 + blt ZERO, I, .L_N1_I1 +.L_N1_M0: +.L_N0: + pop_if_used 26, 32 + jirl $r0, $r1, 0x0 + EPILOGUE diff --git a/kernel/loongarch64/dtrsm_kernel_LT_16x4_lasx.S b/kernel/loongarch64/dtrsm_kernel_LT_16x4_lasx.S new file mode 100644 index 000000000..0e2cacccf --- /dev/null +++ b/kernel/loongarch64/dtrsm_kernel_LT_16x4_lasx.S @@ -0,0 +1,959 @@ +/******************************************************************************* +Copyright (c) 2023, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ +#define ASSEMBLER + +#include "common.h" +#include "loongarch64_asm.S" + +/********************************************************************* +* 2023/08/26 guxiwei +* UTEST : OK +* CTEST : OK +* TEST : OK +* +* +*********************************************************************/ + +/* int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT dummy1, FLOAT *a, FLOAT *b, + * FLOAT *c, BLASLONG ldc, BLASLONG offset) + */ + +#define M $r4 // param 1: bm +#define N $r5 // param 2: bn +#define K $r6 // param 3: bk +#define A $r7 // param 5: ba +#define B $r8 // param 6: bb +#define C $r9 // param 7: bc +#define LDC $r10 // param 8: ldc +#define OFFSET $r11 // param 9: offset + +/* Cycle control parameters */ +#define I $r13 +#define J $r14 +#define L $r15 +#define TL $r16 +/* Matrix address */ +#define A0 $r17 +#define B0 $r18 +#define C0 $r19 +#define C1 $r20 +#define C2 $r23 +#define C3 $r24 +#define T0 $r25 +#define T1 $r26 +#define T2 $r27 +#define KK $r28 +#define AA $r29 +#define CC $r30 +#define BB B0 +#undef ZERO +#define ZERO $r0 + +#define U0 $xr0 +#define U1 $xr1 +#define U2 $xr2 +#define U3 $xr3 +#define U4 $xr4 +#define U5 $xr5 +#define U6 $xr6 +#define U7 $xr7 +#define U8 $xr8 +#define U9 $xr9 +#define U10 $xr10 +#define U11 $xr11 +#define U12 $xr12 +#define U13 $xr13 +#define U14 $xr14 +#define U15 $xr15 +#define D0 $xr16 +#define D1 $xr17 +#define D2 $xr18 +#define D3 $xr19 +#define D4 $xr20 +#define D5 $xr21 +#define D6 $xr22 +#define D7 $xr23 +#define D8 $xr24 +#define D9 $xr25 +#define D10 $xr26 +#define D11 $xr27 +#define D12 $xr28 +#define D13 $xr29 +#define D14 $xr30 +#define D15 $xr31 +#define G0 D0 +#define G1 D1 +#define G2 D2 +#define G3 D3 +#define G4 D4 +#define G5 D5 +#define G6 D6 +#define G7 D7 +#define G8 D8 +#define G9 D9 +#define G10 D10 +#define G11 D11 +#define G12 D12 +#define G13 D13 +#define G14 D14 +#define G15 D15 + +/* Prefetch interval */ +#define A_PRE 0x400 +#define B_PRE 0x100 + +#include "dtrsm_kernel_macro.S" + +.macro ldrepl_macro start, end, stride +// Load Ux (x = 0...15) +.if \start <= \end + GLDREPL xv, d, $xr\start, A0, \stride * 8 + ldrepl_macro %start + 1, \end, %stride + 1 +.endif +.endm +.macro nmsub_macro start0, end0, start1, reg +// Gx -= reg * Ux +.if \start0 <= \end0 + xvfnmsub.d $xr\start0, \reg, $xr\start1, $xr\start0 + nmsub_macro %start0 + 1, \end0, %start1 + 1, \reg +.endif +.endm +.macro B_st_macro start, end, stride, N +// Store Gx(x = 16...31) +.if \start <= \end +.if \N == 4 + xvst $xr\start, B0, \stride * 0x20 +.elseif \N == 2 + vst $vr\start, B0, \stride * 0x10 +.elseif \N == 1 + fst.d $f\start, B0, \stride * 0x08 +.endif + B_st_macro %start + 1, \end, %stride + 1, \N +.endif +.endm + +.macro dsolve_16 N +// The data layout of C (4x16) is as follows (store 4 data in each register): +// U0 U1 U2 U3 +// U4 U5 U6 U7 +// U8 U9 U10 U11 +// U12 U13 U14 U15 +// The first step is to transpose the result of C + GTRANSPOSE4x4_D U3, U7, U11, U15, G12, G13, G14, G15, D0, D1 + GTRANSPOSE4x4_D U2, U6, U10, U14, G8, G9, G10, G11, D0, D1 + GTRANSPOSE4x4_D U1, U5, U9, U13, G4, G5, G6, G7, U3, U7 + GTRANSPOSE4x4_D U0, U4, U8, U12, G0, G1, G2, G3, U3, U7 +// Now we have the following memory layout of C: +// 0 1 2 3 ... 15 +// 0 | | | | | | | +// 1 | G0 | G1 | G2 | G3 | ... | G15 | +// 2 | | | | | | | +// 3 | | | | | | | +// Next we are going to process matrix A with a size of 16x16, +// using only the upper triangular portion. The memory layout of +// matrix A is as follows, quite large. +//0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +// 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 +// 34 35 36 37 38 39 40 41 42 43 44 45 46 47 +// 51 52 53 54 55 56 57 58 59 60 61 62 63 +// 68 69 70 71 72 73 74 75 76 77 78 79 +// 85 86 87 88 89 90 91 92 93 94 95 +// 102 103 104 105 106 107 108 109 110 111 +// 119 120 121 122 123 124 125 126 127 +// 136 137 138 139 140 141 142 143 +// 153 154 155 156 157 158 159 +// 170 171 172 173 174 175 +// 187 188 189 190 191 +// 204 205 206 207 +// 221 222 223 +// 238 239 +// 255 +// Sequentially extract data from A in row order +// Load 0 + ldrepl_macro 0, 15, 0 + GMUL xvf, d, G0, G0, U0 + nmsub_macro 17, 31, 1, G0 + PTR_ADDI A0, A0, 17 * 8 +// Load 1 + ldrepl_macro 1, 15, 0 + GMUL xvf, d, G1, G1, U1 + nmsub_macro 18, 31, 2, G1 + PTR_ADDI A0, A0, 17 * 8 +// Load 2 + ldrepl_macro 2, 15, 0 + GMUL xvf, d, G2, G2, U2 + nmsub_macro 19, 31, 3, G2 + PTR_ADDI A0, A0, 17 * 8 +// Load 3 + ldrepl_macro 3, 15, 0 + GMUL xvf, d, G3, G3, U3 + nmsub_macro 20, 31, 4, G3 + PTR_ADDI A0, A0, 17 * 8 +// Load 4 + ldrepl_macro 4, 15, 0 + GMUL xvf, d, G4, G4, U4 + nmsub_macro 21, 31, 5, G4 + PTR_ADDI A0, A0, 17 * 8 +// Load 5 + ldrepl_macro 5, 15, 0 + GMUL xvf, d, G5, G5, U5 + nmsub_macro 22, 31, 6, G5 + PTR_ADDI A0, A0, 17 * 8 +// Load 6 + ldrepl_macro 6, 15, 0 + GMUL xvf, d, G6, G6, U6 + nmsub_macro 23, 31, 7, G6 + PTR_ADDI A0, A0, 17 * 8 +// Load 7 + ldrepl_macro 7, 15, 0 + GMUL xvf, d, G7, G7, U7 + nmsub_macro 24, 31, 8, G7 + PTR_ADDI A0, A0, 17 * 8 +// Load 8 + ldrepl_macro 8, 15, 0 + GMUL xvf, d, G8, G8, U8 + nmsub_macro 25, 31, 9, G8 + PTR_ADDI A0, A0, 17 * 8 +// Load 9 + ldrepl_macro 9, 15, 0 + GMUL xvf, d, G9, G9, U9 + nmsub_macro 26, 31, 10, G9 + PTR_ADDI A0, A0, 17 * 8 +// Load 10 + ldrepl_macro 10, 15, 0 + GMUL xvf, d, G10, G10, U10 + nmsub_macro 27, 31, 11, G10 + PTR_ADDI A0, A0, 17 * 8 +// Load 11 + ldrepl_macro 11, 15, 0 + GMUL xvf, d, G11, G11, U11 + nmsub_macro 28, 31, 12, G11 + PTR_ADDI A0, A0, 17 * 8 +// Load 12 + ldrepl_macro 12, 15, 0 + GMUL xvf, d, G12, G12, U12 + nmsub_macro 29, 31, 13, G12 + PTR_ADDI A0, A0, 17 * 8 +// Load 13 + ldrepl_macro 13, 15, 0 + GMUL xvf, d, G13, G13, U13 + nmsub_macro 30, 31, 14, G13 + PTR_ADDI A0, A0, 17 * 8 +// Load 14 + ldrepl_macro 14, 15, 0 + GMUL xvf, d, G14, G14, U14 + nmsub_macro 31, 31, 15, G14 + PTR_ADDI A0, A0, 17 * 8 +// Load 15 + ldrepl_macro 15, 15, 0 + GMUL xvf, d, G15, G15, U15 +// Finally, We can store the result. +// For B, stored sequentially, and C, first transpose and then store + B_st_macro 16, 31, 0, \N + GTRANSPOSE4x4_D G0, G1, G2, G3, G0, G1, G2, G3, U0, U1 + GTRANSPOSE4x4_D G4, G5, G6, G7, G4, G5, G6, G7, U0, U1 + GTRANSPOSE4x4_D G8, G9, G10, G11, G8, G9, G10, G11, U0, U1 + GTRANSPOSE4x4_D G12, G13, G14, G15, G12, G13, G14, G15, U0, U1 +.if \N == 4 + GST xv, , G0, C0, 0x00, G4, C0, 0x20, G8, C0, 0x40, G12, C0, 0x60, \ + G1, C1, 0x00, G5, C1, 0x20, G9, C1, 0x40, G13, C1, 0x60, \ + G2, C2, 0x00, G6, C2, 0x20, G10, C2, 0x40, G14, C2, 0x60, \ + G3, C3, 0x00, G7, C3, 0x20, G11, C3, 0x40, G15, C3, 0x60 +.elseif \N == 2 + GST xv, , G0, C0, 0x00, G4, C0, 0x20, G8, C0, 0x40, G12, C0, 0x60, \ + G1, C1, 0x00, G5, C1, 0x20, G9, C1, 0x40, G13, C1, 0x60 +.elseif \N == 1 + GST xv, , G0, C0, 0x00, G4, C0, 0x20, G8, C0, 0x40, G12, C0, 0x60 +.endif +.endm + +.macro dgemm_dsolve_16x4 + bge ZERO, KK, .L_dsolve_16x4_load + dgemm_16x4 + b .L_dsolve_16x4 +.L_dsolve_16x4_load: + // Load C + GLD xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60 + GLD xv, , U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60 + GLD xv, , U8, C2, 0x00, U9, C2, 0x20, U10, C2, 0x40, U11, C2, 0x60 + GLD xv, , U12, C3, 0x00, U13, C3, 0x20, U14, C3, 0x40, U15, C3, 0x60 +/********************** solver ******************/ +.L_dsolve_16x4: + dsolve_16 4 +.endm + +.macro dsolve_8 N +// The data layout of C (4x8) is as follows (store 4 data in each register): +// U0 U1 +// U2 U3 +// U4 U5 +// U6 U7 +// The first step is to transpose the result of C + GTRANSPOSE4x4_D U1, U3, U5, U7, G4, G5, G6, G7, G8, G9 + GTRANSPOSE4x4_D U0, U2, U4, U6, G0, G1, G2, G3, G8, G9 +// Now we have the following memory layout of C: +// 0 1 2 3 ... 7 +// 0 | | | | | | | +// 1 | G0 | G1 | G2 | G3 | ... | G7 | +// 2 | | | | | | | +// 3 | | | | | | | +// Next we are going to process matrix A with a size of 8x8, +// using only the upper triangular portion. The memory layout of +// matrix A is as follows: +//0 1 2 3 4 5 6 7 +// 9 10 11 12 13 14 15 +// 18 19 20 21 22 23 +// 27 28 29 30 31 +// 36 37 38 39 +// 45 46 47 +// 54 55 +// 63 +// Sequentially extract data from A in row order +// Load 0 + ldrepl_macro 0, 7, 0 + GMUL xvf, d, G0, G0, U0 + nmsub_macro 17, 23, 1, G0 + PTR_ADDI A0, A0, 9 * 8 +// Load 1 + ldrepl_macro 1, 7, 0 + GMUL xvf, d, G1, G1, U1 + nmsub_macro 18, 23, 2, G1 + PTR_ADDI A0, A0, 9 * 8 +// Load 2 + ldrepl_macro 2, 7, 0 + GMUL xvf, d, G2, G2, U2 + nmsub_macro 19, 23, 3, G2 + PTR_ADDI A0, A0, 9 * 8 +// Load 3 + ldrepl_macro 3, 7, 0 + GMUL xvf, d, G3, G3, U3 + nmsub_macro 20, 23, 4, G3 + PTR_ADDI A0, A0, 9 * 8 +// Load 4 + ldrepl_macro 4, 7, 0 + GMUL xvf, d, G4, G4, U4 + nmsub_macro 21, 23, 5, G4 + PTR_ADDI A0, A0, 9 * 8 +// Load 5 + ldrepl_macro 5, 7, 0 + GMUL xvf, d, G5, G5, U5 + nmsub_macro 22, 23, 6, G5 + PTR_ADDI A0, A0, 9 * 8 +// Load 6 + ldrepl_macro 6, 7, 0 + GMUL xvf, d, G6, G6, U6 + nmsub_macro 23, 23, 7, G6 + PTR_ADDI A0, A0, 9 * 8 +// Load 7 + ldrepl_macro 7, 7, 0 + GMUL xvf, d, G7, G7, U7 +// Finally, We can store the result. +// For B, stored sequentially, and C, first transpose and then store + B_st_macro 16, 23, 0, \N + GTRANSPOSE4x4_D G0, G1, G2, G3, G0, G1, G2, G3, U0, U1 + GTRANSPOSE4x4_D G4, G5, G6, G7, G4, G5, G6, G7, U0, U1 +.if \N == 4 + GST xv, , G0, C0, 0x00, G4, C0, 0x20, \ + G1, C1, 0x00, G5, C1, 0x20, \ + G2, C2, 0x00, G6, C2, 0x20, \ + G3, C3, 0x00, G7, C3, 0x20 +.elseif \N == 2 + GST xv, , G0, C0, 0x00, G4, C0, 0x20, \ + G1, C1, 0x00, G5, C1, 0x20 +.elseif \N == 1 + GST xv, , G0, C0, 0x00, G4, C0, 0x20 +.endif +.endm + +.macro dgemm_dsolve_8x4 + bge ZERO, L, .L_dsolve_8x4_load + dgemm_8x4 + b .L_dsolve_8x4 +.L_dsolve_8x4_load: + /* Load C0 */ + xvld U0, C0, 0x00 + xvld U1, C0, 0x20 + + /* Load C1 */ + xvld U2, C1, 0x00 + xvld U3, C1, 0x20 + + /* Load C2 */ + xvld U4, C2, 0x00 + xvld U5, C2, 0x20 + + /* Load C3 */ + xvld U6, C3, 0x00 + xvld U7, C3, 0x20 +/********* solver *********/ +.L_dsolve_8x4: + dsolve_8 4 +.endm + +.macro dsolve_4 N +// The data layout of C (4x4) is as follows (store 4 data in each register): +// U0 +// U1 +// U2 +// U3 +// The first step is to transpose the result of C + GTRANSPOSE4x4_D U0, U1, U2, U3, G0, G1, G2, G3, G4, G5 +// Now we have the following memory layout of C: +// 0 1 2 3 +// 0 | | | | | +// 1 | G0 | G1 | G2 | G3 | +// 2 | | | | | +// 3 | | | | | +// Next we are going to process matrix A with a size of 4x4, +// using only the upper triangular portion. The memory layout of +// matrix A is as follows: +//0 1 2 3 +// 5 6 7 +// 10 11 +// 15 +// Sequentially extract data from A in row order +// Load 0 + ldrepl_macro 0, 3, 0 + GMUL xvf, d, G0, G0, U0 + nmsub_macro 17, 19, 1, G0 + PTR_ADDI A0, A0, 5 * 8 +// Load 1 + ldrepl_macro 1, 3, 0 + GMUL xvf, d, G1, G1, U1 + nmsub_macro 18, 19, 2, G1 + PTR_ADDI A0, A0, 5 * 8 +// Load 2 + ldrepl_macro 2, 3, 0 + GMUL xvf, d, G2, G2, U2 + nmsub_macro 19, 19, 3, G2 + PTR_ADDI A0, A0, 5 * 8 +// Load 3 + ldrepl_macro 3, 3, 0 + GMUL xvf, d, G3, G3, U3 +// Finally, We can store the result. +// For B, stored sequentially, and C, first transpose and then store + B_st_macro 16, 19, 0, \N + GTRANSPOSE4x4_D G0, G1, G2, G3, G0, G1, G2, G3, U0, U1 +.if \N == 4 + GST xv, , G0, C0, 0x00, G1, C1, 0x00, G2, C2, 0x00, G3, C3, 0x00 +.elseif \N == 2 + GST xv, , G0, C0, 0x00, G1, C1, 0x00 +.elseif \N == 1 + GST xv, , G0, C0, 0x00 +.endif +.endm + +.macro dgemm_dsolve_4x4 + bge ZERO, L, .L_dsolve_4x4_load + dgemm_4x4 + b .L_dsolve_4x4 +.L_dsolve_4x4_load: + /* Load C0 */ + xvld U0, C0, 0x00 + /* Load C1 */ + xvld U1, C1, 0x00 + /* Load C2 */ + xvld U2, C2, 0x00 + /* Load C3 */ + xvld U3, C3, 0x00 +/************** solver *****************/ +.L_dsolve_4x4: + dsolve_4 4 +.endm + +.macro dsolve_2 N +// Transpose + GSBUTTERFLY xv, d, G0, G1, U1, U0 +// Now we have the following memory layout of C: +// 0 1 +// 0 | | | +// 1 | G0 | G1 | +// 2 | | | +// 3 | | | +// Next we are going to process matrix A with a size of 2x2, +// using only the upper triangular portion. The memory layout of +// matrix A is as follows: +//0 1 +// 3 +// Sequentially extract data from A in row order +// Load 0 + ldrepl_macro 0, 1, 0 + GMUL xvf, d, G0, G0, U0 + nmsub_macro 17, 17, 1, G0 + PTR_ADDI A0, A0, 3 * 8 +// Load 1 + ldrepl_macro 1, 1, 0 + GMUL xvf, d, G1, G1, U1 +// Finally, We can store the result. +// For B, stored sequentially, and C, first transpose and then store + B_st_macro 16, 17, 0, \N + GSBUTTERFLY xv, d, U0, U1, G1, G0 +.if \N == 4 + vst $vr0, C0, 0x00 + vst $vr1, C1, 0x00 + xvstelm.d U0, C2, 0x00, 0x02 + xvstelm.d U1, C3, 0x00, 0x02 + xvstelm.d U0, C2, 0x08, 0x03 + xvstelm.d U1, C3, 0x08, 0x03 +.elseif \N == 2 + vst $vr0, C0, 0x00 + vst $vr1, C1, 0x00 +.elseif \N == 1 + vst $vr0, C0, 0x00 +.endif +.endm + +.macro dgemm_dsolve_2x4 + bge ZERO, L, .L_dsolve_2x4_load + dgemm_2x4 + b .L_dsolve_2x4 +.L_dsolve_2x4_load: + /* Load C0 */ + xvld U0, C0, 0x00 + /* Load C1 */ + xvld U1, C1, 0x00 + /* Load C2 */ + xvld U2, C2, 0x00 + /* Load C3 */ + xvld U3, C3, 0x00 + + xvpermi.q U0, U2, 0x02 + xvpermi.q U1, U3, 0x02 +/********************** solver ******************/ +.L_dsolve_2x4: + dsolve_2 4 +.endm + +.macro dgemm_dsolve_1x4 + bge ZERO, L, .L_dsolve_1x4_load + dgemm_1x4 + b .L_dsolve_1x4 +.L_dsolve_1x4_load: + // Load C + fld.d $f0, C0, 0x00 + fld.d $f1, C1, 0x00 + fld.d $f2, C2, 0x00 + fld.d $f3, C3, 0x00 + xvinsve0.d U0, U1, 0x01 + xvinsve0.d U0, U2, 0x02 + xvinsve0.d U0, U3, 0x03 +.L_dsolve_1x4: + GLDREPL xv, d, D0, A0, 0x00 + GMUL xvf, d, U0, U0, D0 + // Store C + xvstelm.d U0, C0, 0x00, 0x00 + xvstelm.d U0, C1, 0x00, 0x01 + xvstelm.d U0, C2, 0x00, 0x02 + xvstelm.d U0, C3, 0x00, 0x03 + // Store B + xvst U0, B0, 0x00 +.endm + +.macro dgemm_dsolve_16x2 + bge ZERO, L, .L_dsolve_16x2_load + dgemm_16x2 + b .L_dsolve_16x2 +.L_dsolve_16x2_load: + /* Load C0 */ + xvld U0, C0, 0x00 + xvld U1, C0, 0x20 + xvld U2, C0, 0x40 + xvld U3, C0, 0x60 + /* Load C1 */ + xvld U4, C1, 0x00 + xvld U5, C1, 0x20 + xvld U6, C1, 0x40 + xvld U7, C1, 0x60 +.L_dsolve_16x2: + dsolve_16 2 +.endm + +.macro dgemm_dsolve_8x2 + bge ZERO, L, .L_dsolve_8x2_load + dgemm_8x2 + b .L_dsolve_8x2 +.L_dsolve_8x2_load: + /* Load C0 */ + xvld U0, C0, 0x00 + xvld U1, C0, 0x20 + /* Load C1 */ + xvld U2, C1, 0x00 + xvld U3, C1, 0x20 +.L_dsolve_8x2: + dsolve_8 2 +.endm + +.macro dgemm_dsolve_4x2 + bge ZERO, L, .L_dsolve_4x2_load + dgemm_4x2 + b .L_dsolve_4x2 +.L_dsolve_4x2_load: + /* Load C0 */ + xvld U0, C0, 0x00 + /* Load C1 */ + xvld U1, C1, 0x00 +.L_dsolve_4x2: + dsolve_4 2 +.endm + +.macro dgemm_dsolve_1x2 + bge ZERO, L, .L_dsolve_1x2_load + dgemm_1x2 + b .L_dsolve_1x2 +.L_dsolve_1x2_load: + // Load C + fld.d $f0, C0, 0x00 + fld.d $f1, C1, 0x00 + xvinsve0.d U0, U1, 0x01 +.L_dsolve_1x2: + GLDREPL xv, d, D0, A0, 0x00 + GMUL xvf, d, U0, U0, D0 + // Store C + xvstelm.d U0, C0, 0x00, 0x00 + xvstelm.d U0, C1, 0x00, 0x01 + // Store B + vst $vr0, B0, 0x00 +.endm + +.macro dgemm_dsolve_2x2 + bge ZERO, L, .L_dsolve_2x2_load + dgemm_2x2 + b .L_dsolve_2x2 +.L_dsolve_2x2_load: + /* Load C0 */ + xvld U0, C0, 0x00 + /* Load C1 */ + xvld U1, C1, 0x00 +.L_dsolve_2x2: + dsolve_2 2 +.endm + +.macro dgemm_dsolve_16x1 + bge ZERO, L, .L_dsolve_16x1_load + dgemm_16x1 + b .L_dsolve_16x1 +.L_dsolve_16x1_load: + /* Load C0 */ + xvld U0, C0, 0x00 + xvld U1, C0, 0x20 + xvld U2, C0, 0x40 + xvld U3, C0, 0x60 +.L_dsolve_16x1: + dsolve_16 1 +.endm + +.macro dgemm_dsolve_8x1 + bge ZERO, L, .L_dsolve_8x1_load + dgemm_8x1 + b .L_dsolve_8x1 +.L_dsolve_8x1_load: + /* Load C0 */ + xvld U0, C0, 0x00 + xvld U1, C0, 0x20 +.L_dsolve_8x1: + dsolve_8 1 +.endm + +.macro dgemm_dsolve_4x1 + bge ZERO, L, .L_dsolve_4x1_load + dgemm_4x1 + b .L_dsolve_4x1 +.L_dsolve_4x1_load: + /* Load C0 */ + xvld U0, C0, 0x00 +.L_dsolve_4x1: + dsolve_4 1 +.endm + +.macro dgemm_dsolve_2x1 + bge ZERO, L, .L_dsolve_2x1_load + dgemm_2x1 + b .L_dsolve_2x1 +.L_dsolve_2x1_load: + /* Load C0 */ + xvld U0, C0, 0x00 +.L_dsolve_2x1: + dsolve_2 1 +.endm + +.macro dgemm_dsolve_1x1 + bge ZERO, L, .L_dsolve_1x1_load + dgemm_1x1 + b .L_dsolve_1x1 +.L_dsolve_1x1_load: + // Load C + fld.d $f0, C0, 0x00 +.L_dsolve_1x1: + GLDREPL xv, d, D0, A0, 0x00 + GMUL xvf, d, U0, U0, D0 + // Store C + xvstelm.d U0, C0, 0x00, 0x00 + // Store B + xvstelm.d U0, B0, 0x00, 0x00 +.endm + + PROLOGUE + push_if_used 26, 32 + PTR_SLLI LDC, LDC, 3 + /* if (!(N >> 2)) goto L_N3 */ + PTR_SRAI J, N, 2 /* J = bn >> 2 */ + andi N, N, 0x03 + beq ZERO, J, .L_N3 +.align 5 +.L_J1: + PTR_ADDI J, J, -1 + move KK, OFFSET + move AA, A + move CC, C + PTR_SRAI I, M, 4 // M >> 4 + beqz I, .L_M15 +.align 4 +.L_I1: + GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_16x4 + PTR_ADDI I, I, -1 + PTR_SLLI T0, K, 7 + PTR_ADDI CC, CC, 0x80 // cc += 16 + PTR_ADDI KK, KK, 0x10 // kk += 16 + PTR_ADD AA, AA, T0 // aa += 16 * k + bnez I, .L_I1 +.L_M15: + andi I, M, 8 + beqz I, .L_M7 +.L_M8: + GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_8x4 + PTR_SLLI T0, K, 6 + PTR_ADDI CC, CC, 0x40 // cc += 8 + PTR_ADDI KK, KK, 0x08 // kk += 8 + PTR_ADD AA, AA, T0 // aa += 8 * k +.L_M7: + andi I, M, 4 + beqz I, .L_M3 +.L_M4: + GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_4x4 + PTR_SLLI T0, K, 5 + PTR_ADDI CC, CC, 0x20 // cc += 4 + PTR_ADDI KK, KK, 0x04 // kk += 4 + PTR_ADD AA, AA, T0 // aa += 4 * k +.L_M3: + andi I, M, 2 + beqz I, .L_M1 +.L_M2: + GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_2x4 + PTR_SLLI T0, K, 4 + PTR_ADDI CC, CC, 0x10 // cc += 2 + PTR_ADDI KK, KK, 0x02 // kk += 2 + PTR_ADD AA, AA, T0 // aa += 2 * k +.L_M1: + andi I, M, 1 + beqz I, .L_M0 + GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_1x4 + PTR_SLLI T0, K, 3 + PTR_ADDI CC, CC, 0x08 // cc += 1 + PTR_ADDI KK, KK, 0x01 // kk += 1 + PTR_ADD AA, AA, T0 // aa += 1 * k +.L_M0: + PTR_SLLI T0, K, 5 + PTR_SLLI T1, LDC, 2 + PTR_ADD B, B, T0 // b += 4 * k + PTR_ADD C, C, T1 // c += 4 * ldc + bnez J, .L_J1 +.L_N3: + andi J, N, 2 + beq ZERO, J, .L_N1 +.L_N2: + move KK, OFFSET + move AA, A + move CC, C + PTR_SRAI I, M, 4 // M >> 4 + beqz I, .L_N2_M15 +.align 4 +.L_N2_I1: + GADD , d, C0, CC, ZERO, C1, C0, LDC + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_16x2 + PTR_ADDI I, I, -1 + PTR_SLLI T0, K, 7 + PTR_ADDI CC, CC, 0x80 // cc += 16 + PTR_ADDI KK, KK, 0x10 // kk += 16 + PTR_ADD AA, AA, T0 // aa += 16 * k + bnez I, .L_N2_I1 +.L_N2_M15: + andi I, M, 8 + beqz I, .L_N2_M7 +.L_N2_M8: + GADD , d, C0, CC, ZERO, C1, C0, LDC + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_8x2 + PTR_SLLI T0, K, 6 + PTR_ADDI CC, CC, 0x40 // cc += 8 + PTR_ADDI KK, KK, 0x08 // kk += 8 + PTR_ADD AA, AA, T0 // aa += 8 * k +.L_N2_M7: + andi I, M, 4 + beqz I, .L_N2_M3 +.L_N2_M4: + GADD , d, C0, CC, ZERO, C1, C0, LDC + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_4x2 + PTR_SLLI T0, K, 5 + PTR_ADDI CC, CC, 0x20 // cc += 4 + PTR_ADDI KK, KK, 0x04 // kk += 4 + PTR_ADD AA, AA, T0 // aa += 4 * k +.L_N2_M3: + andi I, M, 2 + beqz I, .L_N2_M1 +.L_N2_M2: + GADD , d, C0, CC, ZERO, C1, C0, LDC + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_2x2 + PTR_SLLI T0, K, 4 + PTR_ADDI CC, CC, 0x10 // cc += 2 + PTR_ADDI KK, KK, 0x02 // kk += 2 + PTR_ADD AA, AA, T0 // aa += 2 * k +.L_N2_M1: + andi I, M, 1 + beqz I, .L_N2_M0 + GADD , d, C0, CC, ZERO, C1, C0, LDC + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_1x2 + PTR_SLLI T0, K, 3 + PTR_ADDI CC, CC, 0x08 // cc += 1 + PTR_ADDI KK, KK, 0x01 // kk += 1 + PTR_ADD AA, AA, T0 // aa += 1 * k +.L_N2_M0: + PTR_SLLI T0, K, 4 + PTR_SLLI T1, LDC, 1 + PTR_ADD B, B, T0 // b += 2 * k + PTR_ADD C, C, T1 // c += 2 * ldc +.L_N1: + andi J, N, 1 + beq ZERO, J, .L_N0 + + move KK, OFFSET + move AA, A + move CC, C + PTR_SRAI I, M, 4 // M >> 4 + beqz I, .L_N1_M15 +.align 4 +.L_N1_I1: + GADD , d, C0, CC, ZERO + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_16x1 + PTR_ADDI I, I, -1 + PTR_SLLI T0, K, 7 + PTR_ADDI CC, CC, 0x80 // cc += 16 + PTR_ADDI KK, KK, 0x10 // kk += 16 + PTR_ADD AA, AA, T0 // aa += 16 * k + bnez I, .L_N1_I1 +.L_N1_M15: + andi I, M, 8 + beqz I, .L_N1_M7 +.L_N1_M8: + GADD , d, C0, CC, ZERO + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_8x1 + PTR_SLLI T0, K, 6 + PTR_ADDI CC, CC, 0x40 // cc += 8 + PTR_ADDI KK, KK, 0x08 // kk += 8 + PTR_ADD AA, AA, T0 // aa += 8 * k +.L_N1_M7: + andi I, M, 4 + beqz I, .L_N1_M3 +.L_N1_M4: + GADD , d, C0, CC, ZERO + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_4x1 + PTR_SLLI T0, K, 5 + PTR_ADDI CC, CC, 0x20 // cc += 4 + PTR_ADDI KK, KK, 0x04 // kk += 4 + PTR_ADD AA, AA, T0 // aa += 4 * k +.L_N1_M3: + andi I, M, 2 + beqz I, .L_N1_M1 +.L_N1_M2: + GADD , d, C0, CC, ZERO + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_2x1 + PTR_SLLI T0, K, 4 + PTR_ADDI CC, CC, 0x10 // cc += 2 + PTR_ADDI KK, KK, 0x02 // kk += 2 + PTR_ADD AA, AA, T0 // aa += 2 * k +.L_N1_M1: + andi I, M, 1 + beqz I, .L_N1_M0 + GADD , d, C0, CC, ZERO + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_1x1 + PTR_SLLI T0, K, 3 + PTR_ADDI CC, CC, 0x08 // cc += 1 + PTR_ADDI KK, KK, 0x01 // kk += 1 + PTR_ADD AA, AA, T0 // aa += 1 * k +.L_N1_M0: +.L_N0: + pop_if_used 26, 32 + jirl $r0, $r1, 0x0 + EPILOGUE diff --git a/kernel/loongarch64/dtrsm_kernel_RN_16x4_lasx.S b/kernel/loongarch64/dtrsm_kernel_RN_16x4_lasx.S new file mode 100644 index 000000000..421339736 --- /dev/null +++ b/kernel/loongarch64/dtrsm_kernel_RN_16x4_lasx.S @@ -0,0 +1,882 @@ +/******************************************************************************* +Copyright (c) 2023, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ +#define ASSEMBLER + +#include "common.h" +#include "loongarch64_asm.S" + +/********************************************************************* +* 2023/09/26 guxiwei +* UTEST : OK +* CTEST : OK +* TEST : OK +* +* +*********************************************************************/ + +/* int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT dummy1, FLOAT *a, FLOAT *b, + * FLOAT *c, BLASLONG ldc, BLASLONG offset) + */ + +#define M $r4 // param 1: bm +#define N $r5 // param 2: bn +#define K $r6 // param 3: bk +#define A $r7 // param 5: ba +#define B $r8 // param 6: bb +#define C $r9 // param 7: bc +#define LDC $r10 // param 8: ldc +#define OFFSET $r11 // param 9: offset + +/* Cycle control parameters */ +#define I $r13 +#define J $r14 +#define L $r15 +#define TL $r16 +/* Matrix address */ +#define A0 $r17 +#define B0 $r18 +#define C0 $r19 +#define C1 $r20 +#define C2 $r23 +#define C3 $r24 +#define T0 $r25 +#define T1 $r26 +#define T2 $r27 +#define KK $r28 +#define AA $r29 +#define CC $r30 +#define BB B0 +#undef ZERO +#define ZERO $r0 + +#define U0 $xr0 +#define U1 $xr1 +#define U2 $xr2 +#define U3 $xr3 +#define U4 $xr4 +#define U5 $xr5 +#define U6 $xr6 +#define U7 $xr7 +#define U8 $xr8 +#define U9 $xr9 +#define U10 $xr10 +#define U11 $xr11 +#define U12 $xr12 +#define U13 $xr13 +#define U14 $xr14 +#define U15 $xr15 +#define D0 $xr16 +#define D1 $xr17 +#define D2 $xr18 +#define D3 $xr19 +#define D4 $xr20 +#define D5 $xr21 +#define D6 $xr22 +#define D7 $xr23 +#define D8 $xr24 +#define D9 $xr25 +#define D10 $xr26 +#define D11 $xr27 +#define D12 $xr28 +#define D13 $xr29 +#define D14 $xr30 +#define D15 $xr31 +#define G0 D0 +#define G1 D1 +#define G2 D2 +#define G3 D3 +#define G4 D4 +#define G5 D5 +#define G6 D6 +#define G7 D7 +#define G8 D8 +#define G9 D9 +#define G10 D10 +#define G11 D11 +#define G12 D12 +#define G13 D13 +#define G14 D14 +#define G15 D15 + +/* Prefetch interval */ +#define A_PRE 0x400 +#define B_PRE 0x100 + +#include "dtrsm_kernel_macro.S" + +.macro ldrepl_macro start, end, stride +// Load Ux (x = 0...15) +.if \start <= \end + GLDREPL xv, d, $xr\start, B0, \stride * 8 + ldrepl_macro %start + 1, \end, %stride + 1 +.endif +.endm + +.macro nmsub_macro start0, end0, start1, reg +// Ux -= reg * Dx +.if \start0 <= \end0 + xvfnmsub.d $xr\start0, \reg, $xr\start1, $xr\start0 + nmsub_macro %start0 + 1, \end0, %start1 + 1, \reg +.endif +.endm + +.macro A_st_macro start, end, stride, N +// Store Ux(x = 0...15) +.if \start <= \end +.if \N == 4 + xvst $xr\start, A0, \stride * 0x20 +.elseif \N == 2 + vst $vr\start, A0, \stride * 0x10 +.elseif \N == 1 + fst.d $f\start, A0, \stride * 0x08 +.endif + A_st_macro %start + 1, \end, %stride + 1, \N +.endif +.endm + +.macro dsolve_16x4 +// We are going to process matrix B with a size of 4x4, +// using only the upper triangular portion. The memory layout of +// matrix B is as follows: +//0 1 2 3 +// 5 6 7 +// 10 11 +// 15 +// Sequentially extract data from B in row order + ldrepl_macro 16, 19, 0 + GMUL xvf, d, U0, D0, U0, U1, D0, U1, U2, D0, U2, U3, D0, U3 + ldrepl_macro 20, 22, 5 + nmsub_macro 4, 7, 0, D1 + ldrepl_macro 23, 24, 10 + GMUL xvf, d, U4, D4, U4, U5, D4, U5, U6, D4, U6, U7, D4, U7 + ldrepl_macro 25, 25, 15 + nmsub_macro 8, 11, 0, D2 + nmsub_macro 8, 11, 4, D5 + GMUL xvf, d, U8, D7, U8, U9, D7, U9, U10, D7, U10, U11, D7, U11 + nmsub_macro 12, 15, 0, D3 + nmsub_macro 12, 15, 4, D6 + nmsub_macro 12, 15, 8, D8 + GMUL xvf, d, U12, D9, U12, U13, D9, U13, U14, D9, U14, U15, D9, U15 +// Store A + A_st_macro 0, 15, 0, 4 +// Store C + GST xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60, \ + U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60, \ + U8, C2, 0x00, U9, C2, 0x20, U10, C2, 0x40, U11, C2, 0x60, \ + U12, C3, 0x00, U13, C3, 0x20, U14, C3, 0x40, U15, C3, 0x60 +.endm + +.macro dsolve_16x2 +// We are going to process matrix B with a size of 2x2, +// using only the upper triangular portion. The memory layout of +// matrix B is as follows: +//0 1 +// 3 +// Sequentially extract data from B in row order + ldrepl_macro 16, 17, 0 + GMUL xvf, d, U0, D0, U0, U1, D0, U1, U2, D0, U2, U3, D0, U3 + ldrepl_macro 18, 18, 3 + nmsub_macro 4, 7, 0, D1 + GMUL xvf, d, U4, D2, U4, U5, D2, U5, U6, D2, U6, U7, D2, U7 +// Store A + A_st_macro 0, 7, 0, 4 +// Store C + GST xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60, \ + U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60 +.endm + +.macro dsolve_8x4 +// We are going to process matrix B with a size of 4x4, +// using only the upper triangular portion. The memory layout of +// matrix B is as follows: +//0 1 2 3 +// 5 6 7 +// 10 11 +// 15 +// Sequentially extract data from B in row order + ldrepl_macro 16, 19, 0 + GMUL xvf, d, U0, D0, U0, U1, D0, U1 + ldrepl_macro 20, 22, 5 + nmsub_macro 2, 3, 0, D1 + ldrepl_macro 23, 24, 10 + GMUL xvf, d, U2, D4, U2, U3, D4, U3 + ldrepl_macro 25, 25, 15 + nmsub_macro 4, 5, 0, D2 + nmsub_macro 4, 5, 2, D5 + GMUL xvf, d, U4, D7, U4, U5, D7, U5 + nmsub_macro 6, 7, 0, D3 + nmsub_macro 6, 7, 2, D6 + nmsub_macro 6, 7, 4, D8 + GMUL xvf, d, U6, D9, U6, U7, D9, U7 +// Store A + A_st_macro 0, 7, 0, 4 +// Store C + GST xv, , U0, C0, 0x00, U1, C0, 0x20, \ + U2, C1, 0x00, U3, C1, 0x20, \ + U4, C2, 0x00, U5, C2, 0x20, \ + U6, C3, 0x00, U7, C3, 0x20 +.endm + +.macro dsolve_8x2 +// We are going to process matrix B with a size of 2x2, +// using only the upper triangular portion. The memory layout of +// matrix B is as follows: +//0 1 +// 3 +// Sequentially extract data from B in row order + ldrepl_macro 16, 17, 0 + GMUL xvf, d, U0, D0, U0, U1, D0, U1 + ldrepl_macro 18, 18, 3 + nmsub_macro 2, 3, 0, D1 + GMUL xvf, d, U2, D2, U2, U3, D2, U3 +// Store A + A_st_macro 0, 3, 0, 4 +// Store C + GST xv, , U0, C0, 0x00, U1, C0, 0x20, \ + U2, C1, 0x00, U3, C1, 0x20 +.endm + +.macro dsolve_4x4 +// We are going to process matrix B with a size of 4x4, +// using only the upper triangular portion. The memory layout of +// matrix B is as follows: +//0 1 2 3 +// 5 6 7 +// 10 11 +// 15 +// Sequentially extract data from B in row order + ldrepl_macro 16, 19, 0 + GMUL xvf, d, U0, D0, U0 + ldrepl_macro 20, 22, 5 + nmsub_macro 1, 1, 0, D1 + ldrepl_macro 23, 24, 10 + GMUL xvf, d, U1, D4, U1 + ldrepl_macro 25, 25, 15 + nmsub_macro 2, 2, 0, D2 + nmsub_macro 2, 2, 1, D5 + GMUL xvf, d, U2, D7, U2 + nmsub_macro 3, 3, 0, D3 + nmsub_macro 3, 3, 1, D6 + nmsub_macro 3, 3, 2, D8 + GMUL xvf, d, U3, D9, U3 +// Store A + A_st_macro 0, 3, 0, 4 +// Store C + GST xv, , U0, C0, 0x00, U1, C1, 0x00, U2, C2, 0x00, U3, C3, 0x00 +.endm + +.macro dsolve_4x2 +// We are going to process matrix B with a size of 2x2, +// using only the upper triangular portion. The memory layout of +// matrix B is as follows: +//0 1 +// 3 +// Sequentially extract data from B in row order + ldrepl_macro 16, 17, 0 + GMUL xvf, d, U0, D0, U0 + ldrepl_macro 18, 18, 3 + nmsub_macro 1, 1, 0, D1 + GMUL xvf, d, U1, D2, U1 +// Store A + A_st_macro 0, 1, 0, 4 +// Store C + GST xv, , U0, C0, 0x00, U1, C1, 0x00 +.endm + +.macro dsolve_2x4 +// We are going to process matrix B with a size of 4x4, +// using only the upper triangular portion. The memory layout of +// matrix B is as follows: +//0 1 2 3 +// 5 6 7 +// 10 11 +// 15 +// Sequentially extract data from B in row order + ldrepl_macro 16, 19, 0 + GMUL xvf, d, U0, D0, U0 + ldrepl_macro 20, 22, 5 + nmsub_macro 1, 1, 0, D1 + ldrepl_macro 23, 24, 10 + GMUL xvf, d, U1, D4, U1 + + ldrepl_macro 25, 25, 15 + nmsub_macro 2, 2, 0, D2 + nmsub_macro 2, 2, 1, D5 + GMUL xvf, d, U2, D7, U2 + nmsub_macro 3, 3, 0, D3 + nmsub_macro 3, 3, 1, D6 + nmsub_macro 3, 3, 2, D8 + GMUL xvf, d, U3, D9, U3 +// Store A + A_st_macro 0, 3, 0, 2 +// Store C + GST v, , $vr0, C0, 0x00, $vr1, C1, 0x00, $vr2, C2, 0x00, $vr3, C3, 0x00, +.endm + +.macro dsolve_2x2 +// We are going to process matrix B with a size of 2x2, +// using only the upper triangular portion. The memory layout of +// matrix B is as follows: +//0 1 +// 3 +// Sequentially extract data from B in row order + ldrepl_macro 16, 17, 0 + GMUL xvf, d, U0, D0, U0 + ldrepl_macro 18, 18, 3 + nmsub_macro 1, 1, 0, D1 + GMUL xvf, d, U1, D2, U1 +// Store A + A_st_macro 0, 1, 0, 2 +// Store C + GST v, , $vr0, C0, 0x00, $vr1, C1, 0x00 +.endm + +.macro dsolve_1x4 +// We are going to process matrix B with a size of 4x4, +// using only the upper triangular portion. The memory layout of +// matrix B is as follows: +//0 1 2 3 +// 5 6 7 +// 10 11 +// 15 +// Sequentially extract data from B in row order + ldrepl_macro 16, 19, 0 + GMUL xvf, d, U0, D0, U0 + ldrepl_macro 20, 22, 5 + nmsub_macro 1, 1, 0, D1 + ldrepl_macro 23, 24, 10 + GMUL xvf, d, U1, D4, U1 + + ldrepl_macro 25, 25, 15 + nmsub_macro 2, 2, 0, D2 + nmsub_macro 2, 2, 1, D5 + GMUL xvf, d, U2, D7, U2 + nmsub_macro 3, 3, 0, D3 + nmsub_macro 3, 3, 1, D6 + nmsub_macro 3, 3, 2, D8 + GMUL xvf, d, U3, D9, U3 +// Store A + A_st_macro 0, 3, 0, 1 +// Store C + GST f, d, $f0, C0, 0x00, $f1, C1, 0x00, $f2, C2, 0x00, $f3, C3, 0x00, +.endm + +.macro dsolve_1x2 +// We are going to process matrix B with a size of 2x2, +// using only the upper triangular portion. The memory layout of +// matrix B is as follows: +//0 1 +// 3 +// Sequentially extract data from B in row order + ldrepl_macro 16, 17, 0 + GMUL xvf, d, U0, D0, U0 + ldrepl_macro 18, 18, 3 + nmsub_macro 1, 1, 0, D1 + GMUL xvf, d, U1, D2, U1 +// Store A + A_st_macro 0, 1, 0, 1 +// Store C + GST f, d, $f0, C0, 0x00, $f1, C1, 0x00 +.endm + +.macro dgemm_dsolve_16x4 + bge ZERO, L, .L_dsolve_16x4_load + dgemm_16x4 + b .L_dsolve_16x4 +.L_dsolve_16x4_load: + // Load C + GLD xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60 + GLD xv, , U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60 + GLD xv, , U8, C2, 0x00, U9, C2, 0x20, U10, C2, 0x40, U11, C2, 0x60 + GLD xv, , U12, C3, 0x00, U13, C3, 0x20, U14, C3, 0x40, U15, C3, 0x60 +/********************** solver ******************/ +.L_dsolve_16x4: + dsolve_16x4 +.endm + +.macro dgemm_dsolve_8x4 + bge ZERO, L, .L_dsolve_8x4_load + dgemm_8x4 + b .L_dsolve_8x4 +.L_dsolve_8x4_load: + /* Load C0 */ + xvld U0, C0, 0x00 + xvld U1, C0, 0x20 + + /* Load C1 */ + xvld U2, C1, 0x00 + xvld U3, C1, 0x20 + + /* Load C2 */ + xvld U4, C2, 0x00 + xvld U5, C2, 0x20 + + /* Load C3 */ + xvld U6, C3, 0x00 + xvld U7, C3, 0x20 +/********* solver *********/ +.L_dsolve_8x4: + dsolve_8x4 +.endm + +.macro dgemm_dsolve_4x4 + bge ZERO, L, .L_dsolve_4x4_load + dgemm_4x4 + b .L_dsolve_4x4 +.L_dsolve_4x4_load: + /* Load C0 */ + xvld U0, C0, 0x00 + /* Load C1 */ + xvld U1, C1, 0x00 + /* Load C2 */ + xvld U2, C2, 0x00 + /* Load C3 */ + xvld U3, C3, 0x00 +/************** solver *****************/ +.L_dsolve_4x4: + dsolve_4x4 +.endm + +.macro dgemm_dsolve_2x4 + bge ZERO, L, .L_dsolve_2x4_load + dgemm_2x4 + xvpermi.q U2, U0, 0x01 + xvpermi.q U3, U1, 0x01 + b .L_dsolve_2x4 +.L_dsolve_2x4_load: + /* Load C0 */ + xvld U0, C0, 0x00 + /* Load C1 */ + xvld U1, C1, 0x00 + /* Load C2 */ + xvld U2, C2, 0x00 + /* Load C3 */ + xvld U3, C3, 0x00 +/********************** solver ******************/ +.L_dsolve_2x4: + dsolve_2x4 +.endm + +.macro dgemm_dsolve_1x4 + bge ZERO, L, .L_dsolve_1x4_load + dgemm_1x4 + xvpackod.d U1, U0, U0 + xvpermi.q U2, U0, 0x01 + xvpermi.q U3, U1, 0x01 + b .L_dsolve_1x4 +.L_dsolve_1x4_load: + // Load C + fld.d $f0, C0, 0x00 + fld.d $f1, C1, 0x00 + fld.d $f2, C2, 0x00 + fld.d $f3, C3, 0x00 +.L_dsolve_1x4: + dsolve_1x4 +.endm + +.macro dgemm_dsolve_16x2 + bge ZERO, L, .L_dsolve_16x2_load + dgemm_16x2 + b .L_dsolve_16x2 +.L_dsolve_16x2_load: + /* Load C0 */ + xvld U0, C0, 0x00 + xvld U1, C0, 0x20 + xvld U2, C0, 0x40 + xvld U3, C0, 0x60 + /* Load C1 */ + xvld U4, C1, 0x00 + xvld U5, C1, 0x20 + xvld U6, C1, 0x40 + xvld U7, C1, 0x60 +.L_dsolve_16x2: + dsolve_16x2 +.endm + +.macro dgemm_dsolve_8x2 + bge ZERO, L, .L_dsolve_8x2_load + dgemm_8x2 + b .L_dsolve_8x2 +.L_dsolve_8x2_load: + /* Load C0 */ + xvld U0, C0, 0x00 + xvld U1, C0, 0x20 + /* Load C1 */ + xvld U2, C1, 0x00 + xvld U3, C1, 0x20 +.L_dsolve_8x2: + dsolve_8x2 +.endm + +.macro dgemm_dsolve_4x2 + bge ZERO, L, .L_dsolve_4x2_load + dgemm_4x2 + b .L_dsolve_4x2 +.L_dsolve_4x2_load: + /* Load C0 */ + xvld U0, C0, 0x00 + /* Load C1 */ + xvld U1, C1, 0x00 +.L_dsolve_4x2: + dsolve_4x2 +.endm + +.macro dgemm_dsolve_2x2 + bge ZERO, L, .L_dsolve_2x2_load + dgemm_2x2 + b .L_dsolve_2x2 +.L_dsolve_2x2_load: + /* Load C0 */ + xvld U0, C0, 0x00 + /* Load C1 */ + xvld U1, C1, 0x00 +.L_dsolve_2x2: + dsolve_2x2 +.endm + +.macro dgemm_dsolve_1x2 + bge ZERO, L, .L_dsolve_1x2_load + dgemm_1x2 + xvpackod.d U1, U0, U0 + b .L_dsolve_1x2 +.L_dsolve_1x2_load: + // Load C + fld.d $f0, C0, 0x00 + fld.d $f1, C1, 0x00 +.L_dsolve_1x2: + dsolve_1x2 +.endm + +.macro dgemm_dsolve_16x1 + bge ZERO, L, .L_dsolve_16x1_load + dgemm_16x1 + b .L_dsolve_16x1 +.L_dsolve_16x1_load: + /* Load C0 */ + xvld U0, C0, 0x00 + xvld U1, C0, 0x20 + xvld U2, C0, 0x40 + xvld U3, C0, 0x60 +.L_dsolve_16x1: + ldrepl_macro 16, 16, 0 + GMUL xvf, d, U0, D0, U0, U1, D0, U1, U2, D0, U2, U3, D0, U3 + // Store A + A_st_macro 0, 3, 0, 4 + // Strore C + GST xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60 +.endm + +.macro dgemm_dsolve_8x1 + bge ZERO, L, .L_dsolve_8x1_load + dgemm_8x1 + b .L_dsolve_8x1 +.L_dsolve_8x1_load: + /* Load C0 */ + xvld U0, C0, 0x00 + xvld U1, C0, 0x20 +.L_dsolve_8x1: + ldrepl_macro 16, 16, 0 + GMUL xvf, d, U0, D0, U0, U1, D0, U1 + // Store A + A_st_macro 0, 1, 0, 4 + // Strore C + GST xv, , U0, C0, 0x00, U1, C0, 0x20 +.endm + +.macro dgemm_dsolve_4x1 + bge ZERO, L, .L_dsolve_4x1_load + dgemm_4x1 + b .L_dsolve_4x1 +.L_dsolve_4x1_load: + /* Load C0 */ + xvld U0, C0, 0x00 +.L_dsolve_4x1: + ldrepl_macro 16, 16, 0 + GMUL xvf, d, U0, D0, U0 + // Store A + A_st_macro 0, 0, 0, 4 + // Strore C + GST xv, , U0, C0, 0x00 +.endm + +.macro dgemm_dsolve_2x1 + bge ZERO, L, .L_dsolve_2x1_load + dgemm_2x1 + b .L_dsolve_2x1 +.L_dsolve_2x1_load: + /* Load C0 */ + xvld U0, C0, 0x00 +.L_dsolve_2x1: + ldrepl_macro 16, 16, 0 + GMUL xvf, d, U0, D0, U0 + // Store A + A_st_macro 0, 0, 0, 2 + // Strore C + GST v, , $vr0, C0, 0x00 +.endm + +.macro dgemm_dsolve_1x1 + bge ZERO, L, .L_dsolve_1x1_load + dgemm_1x1 + b .L_dsolve_1x1 +.L_dsolve_1x1_load: + // Load C + fld.d $f0, C0, 0x00 +.L_dsolve_1x1: + ldrepl_macro 16, 16, 0 + GMUL xvf, d, U0, D0, U0 + // Store A + A_st_macro 0, 0, 0, 1 + // Strore C + GST f, d, $f0, C0, 0x00 +.endm + + PROLOGUE + push_if_used 26, 32 + PTR_SLLI LDC, LDC, 3 + PTR_SUB KK, ZERO, OFFSET + /* if (!(N >> 2)) goto L_N3 */ + PTR_SRAI J, N, 2 /* J = bn >> 2 */ + andi N, N, 0x03 + beq ZERO, J, .L_N3 +.align 5 +.L_J1: + PTR_ADDI J, J, -1 + move AA, A + move CC, C + PTR_SRAI I, M, 4 // M >> 4 + beqz I, .L_M15 +.align 4 +.L_I1: + GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_16x4 + PTR_ADDI I, I, -1 + PTR_SLLI T0, K, 7 + PTR_ADDI CC, CC, 0x80 // cc += 16 + PTR_ADD AA, AA, T0 // aa += 16 * k + bnez I, .L_I1 +.L_M15: + andi I, M, 8 + beqz I, .L_M7 +.L_M8: + GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_8x4 + PTR_SLLI T0, K, 6 + PTR_ADDI CC, CC, 0x40 // cc += 8 + PTR_ADD AA, AA, T0 // aa += 8 * k +.L_M7: + andi I, M, 4 + beqz I, .L_M3 +.L_M4: + GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_4x4 + PTR_SLLI T0, K, 5 + PTR_ADDI CC, CC, 0x20 // cc += 4 + PTR_ADD AA, AA, T0 // aa += 4 * k +.L_M3: + andi I, M, 2 + beqz I, .L_M1 +.L_M2: + GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_2x4 + PTR_SLLI T0, K, 4 + PTR_ADDI CC, CC, 0x10 // cc += 2 + PTR_ADD AA, AA, T0 // aa += 2 * k +.L_M1: + andi I, M, 1 + beqz I, .L_M0 + GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_1x4 + PTR_SLLI T0, K, 3 + PTR_ADDI CC, CC, 0x08 // cc += 1 + PTR_ADD AA, AA, T0 // aa += 1 * k +.L_M0: + PTR_SLLI T0, K, 5 + PTR_SLLI T1, LDC, 2 + PTR_ADD B, B, T0 // b += 4 * k + PTR_ADD C, C, T1 // c += 4 * ldc + PTR_ADDI KK, KK, 4 // kk += 4 + bnez J, .L_J1 +.L_N3: + andi J, N, 2 + beq ZERO, J, .L_N1 +.L_N2: + move AA, A + move CC, C + PTR_SRAI I, M, 4 // M >> 4 + beqz I, .L_N2_M15 +.align 4 +.L_N2_I1: + GADD , d, C0, CC, ZERO, C1, C0, LDC + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_16x2 + PTR_ADDI I, I, -1 + PTR_SLLI T0, K, 7 + PTR_ADDI CC, CC, 0x80 // cc += 16 + PTR_ADD AA, AA, T0 // aa += 16 * k + bnez I, .L_N2_I1 +.L_N2_M15: + andi I, M, 8 + beqz I, .L_N2_M7 +.L_N2_M8: + GADD , d, C0, CC, ZERO, C1, C0, LDC + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_8x2 + PTR_SLLI T0, K, 6 + PTR_ADDI CC, CC, 0x40 // cc += 8 + PTR_ADD AA, AA, T0 // aa += 8 * k +.L_N2_M7: + andi I, M, 4 + beqz I, .L_N2_M3 +.L_N2_M4: + GADD , d, C0, CC, ZERO, C1, C0, LDC + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_4x2 + PTR_SLLI T0, K, 5 + PTR_ADDI CC, CC, 0x20 // cc += 4 + PTR_ADD AA, AA, T0 // aa += 4 * k +.L_N2_M3: + andi I, M, 2 + beqz I, .L_N2_M1 +.L_N2_M2: + GADD , d, C0, CC, ZERO, C1, C0, LDC + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_2x2 + PTR_SLLI T0, K, 4 + PTR_ADDI CC, CC, 0x10 // cc += 2 + PTR_ADD AA, AA, T0 // aa += 2 * k +.L_N2_M1: + andi I, M, 1 + beqz I, .L_N2_M0 + GADD , d, C0, CC, ZERO, C1, C0, LDC + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_1x2 + PTR_SLLI T0, K, 3 + PTR_ADDI CC, CC, 0x08 // cc += 1 + PTR_ADD AA, AA, T0 // aa += 1 * k +.L_N2_M0: + PTR_SLLI T0, K, 4 + PTR_SLLI T1, LDC, 1 + PTR_ADD B, B, T0 // b += 2 * k + PTR_ADD C, C, T1 // c += 2 * ldc + PTR_ADDI KK, KK, 2 // kk += 2 +.L_N1: + andi J, N, 1 + beq ZERO, J, .L_N0 + move AA, A + move CC, C + PTR_SRAI I, M, 4 // M >> 4 + beqz I, .L_N1_M15 +.align 4 +.L_N1_I1: + GADD , d, C0, CC, ZERO + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_16x1 + PTR_ADDI I, I, -1 + PTR_SLLI T0, K, 7 + PTR_ADDI CC, CC, 0x80 // cc += 16 + PTR_ADD AA, AA, T0 // aa += 16 * k + bnez I, .L_N1_I1 +.L_N1_M15: + andi I, M, 8 + beqz I, .L_N1_M7 +.L_N1_M8: + GADD , d, C0, CC, ZERO + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_8x1 + PTR_SLLI T0, K, 6 + PTR_ADDI CC, CC, 0x40 // cc += 8 + PTR_ADD AA, AA, T0 // aa += 8 * k +.L_N1_M7: + andi I, M, 4 + beqz I, .L_N1_M3 +.L_N1_M4: + GADD , d, C0, CC, ZERO + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_4x1 + PTR_SLLI T0, K, 5 + PTR_ADDI CC, CC, 0x20 // cc += 4 + PTR_ADD AA, AA, T0 // aa += 4 * k +.L_N1_M3: + andi I, M, 2 + beqz I, .L_N1_M1 +.L_N1_M2: + GADD , d, C0, CC, ZERO + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_2x1 + PTR_SLLI T0, K, 4 + PTR_ADDI CC, CC, 0x10 // cc += 2 + PTR_ADD AA, AA, T0 // aa += 2 * k +.L_N1_M1: + andi I, M, 1 + beqz I, .L_N1_M0 + GADD , d, C0, CC, ZERO + move A0, AA + move B0, B + move L, KK + dgemm_dsolve_1x1 + PTR_SLLI T0, K, 3 + PTR_ADDI CC, CC, 0x08 // cc += 1 + PTR_ADD AA, AA, T0 // aa += 1 * k +.L_N1_M0: +.L_N0: + pop_if_used 26, 32 + jirl $r0, $r1, 0x0 + EPILOGUE diff --git a/kernel/loongarch64/dtrsm_kernel_RT_16x4_lasx.S b/kernel/loongarch64/dtrsm_kernel_RT_16x4_lasx.S new file mode 100644 index 000000000..5f86d75b5 --- /dev/null +++ b/kernel/loongarch64/dtrsm_kernel_RT_16x4_lasx.S @@ -0,0 +1,953 @@ +/******************************************************************************* +Copyright (c) 2023, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ +#define ASSEMBLER + +#include "common.h" +#include "loongarch64_asm.S" + +/********************************************************************* +* 2023/09/26 guxiwei +* UTEST : OK +* CTEST : OK +* TEST : OK +* +* +*********************************************************************/ + +/* int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT dummy1, FLOAT *a, FLOAT *b, + * FLOAT *c, BLASLONG ldc, BLASLONG offset) + */ +#define M $r4 // param 1: bm +#define N $r5 // param 2: bn +#define K $r6 // param 3: bk +#define A $r7 // param 5: ba +#define B $r8 // param 6: bb +#define C $r9 // param 7: bc +#define LDC $r10 // param 8: ldc +#define OFFSET $r11 // param 9: offset + +/* Cycle control parameters */ +#define I $r13 +#define J $r14 +#define L $r15 +#define TL $r16 +/* Matrix address */ +#define A0 $r17 +#define B0 $r18 +#define C0 $r19 +#define C1 $r20 +#define C2 $r23 +#define C3 $r24 +#define T0 $r25 +#define T1 $r26 +#define T2 $r27 +#define KK $r28 +#define AA $r29 +#define CC $r30 +#define BB $r31 +#undef ZERO +#define ZERO $r0 + +#define U0 $xr0 +#define U1 $xr1 +#define U2 $xr2 +#define U3 $xr3 +#define U4 $xr4 +#define U5 $xr5 +#define U6 $xr6 +#define U7 $xr7 +#define U8 $xr8 +#define U9 $xr9 +#define U10 $xr10 +#define U11 $xr11 +#define U12 $xr12 +#define U13 $xr13 +#define U14 $xr14 +#define U15 $xr15 +#define D0 $xr16 +#define D1 $xr17 +#define D2 $xr18 +#define D3 $xr19 +#define D4 $xr20 +#define D5 $xr21 +#define D6 $xr22 +#define D7 $xr23 +#define D8 $xr24 +#define D9 $xr25 +#define D10 $xr26 +#define D11 $xr27 +#define D12 $xr28 +#define D13 $xr29 +#define D14 $xr30 +#define D15 $xr31 + +/* Prefetch interval */ +#define A_PRE 0x400 +#define B_PRE 0x100 + +#include "dtrsm_kernel_macro.S" + +.macro ldrepl_macro start, end, stride +// Load Ux (x = 0...15) +.if \start <= \end + GLDREPL xv, d, $xr\start, B0, \stride * 8 + ldrepl_macro %start + 1, \end, %stride + 1 +.endif +.endm + +.macro nmsub_macro start0, end0, start1, reg +// Ux -= reg * Dx +.if \start0 <= \end0 + xvfnmsub.d $xr\start0, \reg, $xr\start1, $xr\start0 + nmsub_macro %start0 + 1, \end0, %start1 + 1, \reg +.endif +.endm + +.macro A_st_macro start, end, stride, N +// Store Ux(x = 0...15) +.if \start <= \end +.if \N == 4 + xvst $xr\start, A0, \stride * 0x20 +.elseif \N == 2 + vst $vr\start, A0, \stride * 0x10 +.elseif \N == 1 + fst.d $f\start, A0, \stride * 0x08 +.endif + A_st_macro %start + 1, \end, %stride + 1, \N +.endif +.endm + +.macro dsolve_16x2 +// We are going to process matrix B with a size of 2x2, +// using only the upper triangular portion. The memory layout of +// matrix B is as follows: +//0 +//2 3 +// Sequentially extract data from B in row order + ldrepl_macro 16, 16, 0 + ldrepl_macro 17, 18, 2 + GMUL xvf, d, U4, D2, U4, U5, D2, U5, U6, D2, U6, U7, D2, U7 + nmsub_macro 0, 3, 4, D1 + GMUL xvf, d, U0, D0, U0, U1, D0, U1, U2, D0, U2, U3, D0, U3 +// Store A + A_st_macro 0, 7, 0, 4 +// Store C + GST xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60, \ + U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60 +.endm + +.macro dsolve_8x2 +// We are going to process matrix B with a size of 2x2, +// using only the upper triangular portion. The memory layout of +// matrix B is as follows: +//0 +//2 3 +// Sequentially extract data from B in row order + ldrepl_macro 16, 16, 0 + ldrepl_macro 17, 18, 2 + GMUL xvf, d, U2, D2, U2, U3, D2, U3 + nmsub_macro 0, 1, 2, D1 + GMUL xvf, d, U0, D0, U0, U1, D0, U1 +// Store A + A_st_macro 0, 3, 0, 4 +// Store C + GST xv, , U0, C0, 0x00, U1, C0, 0x20, \ + U2, C1, 0x00, U3, C1, 0x20 +.endm + +.macro dsolve_4x2 +// We are going to process matrix B with a size of 2x2, +// using only the upper triangular portion. The memory layout of +// matrix B is as follows: +//0 +//2 3 +// Sequentially extract data from B in row order + ldrepl_macro 16, 16, 0 + ldrepl_macro 17, 18, 2 + GMUL xvf, d, U1, D2, U1 + nmsub_macro 0, 0, 1, D1 + GMUL xvf, d, U0, D0, U0 +// Store A + A_st_macro 0, 1, 0, 4 +// Store C + GST xv, , U0, C0, 0x00, U1, C1, 0x00 +.endm + +.macro dsolve_2x2 +// We are going to process matrix B with a size of 2x2, +// using only the upper triangular portion. The memory layout of +// matrix B is as follows: +//0 +//2 3 +// Sequentially extract data from B in row order + ldrepl_macro 16, 16, 0 + ldrepl_macro 17, 18, 2 + GMUL xvf, d, U1, D2, U1 + nmsub_macro 0, 0, 1, D1 + GMUL xvf, d, U0, D0, U0 +// Store A + A_st_macro 0, 1, 0, 2 +// Store C + GST v, , $vr0, C0, 0x00, $vr1, C1, 0x00 +.endm + +.macro dsolve_1x2 +// We are going to process matrix B with a size of 2x2, +// using only the upper triangular portion. The memory layout of +// matrix B is as follows: +//0 +//2 3 +// Sequentially extract data from B in row order + ldrepl_macro 16, 16, 0 + ldrepl_macro 17, 18, 2 + GMUL xvf, d, U1, D2, U1 + nmsub_macro 0, 0, 1, D1 + GMUL xvf, d, U0, D0, U0 +// Store A + A_st_macro 0, 1, 0, 1 +// Store C + GST f, d, $f0, C0, 0x00, $f1, C1, 0x00 +.endm + +.macro dsolve_16x4 +// We are going to process matrix B with a size of 4x4, +// using only the upper triangular portion. The memory layout of +// matrix B is as follows: +//0 +//4 5 +//8 9 10 +//12 13 14 15 +// Sequentially extract data from B in row order + ldrepl_macro 22, 25, 12 + GMUL xvf, d, U12, D9, U12, U13, D9, U13, U14, D9, U14, U15, D9, U15 + ldrepl_macro 19, 21, 8 + nmsub_macro 8, 11, 12, D8 + ldrepl_macro 17, 18, 4 + GMUL xvf, d, U8, D5, U8, U9, D5, U9, U10, D5, U10, U11, D5, U11 + ldrepl_macro 16, 16, 0 + nmsub_macro 4, 7, 12, D7 + nmsub_macro 4, 7, 8, D4 + GMUL xvf, d, U4, D2, U4, U5, D2, U5, U6, D2, U6, U7, D2, U7 + nmsub_macro 0, 3, 12, D6 + nmsub_macro 0, 3, 8, D3 + nmsub_macro 0, 3, 4, D1 + GMUL xvf, d, U0, D0, U0, U1, D0, U1, U2, D0, U2, U3, D0, U3 +// Store A + A_st_macro 0, 15, 0, 4 +// Store C + GST xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60, \ + U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60, \ + U8, C2, 0x00, U9, C2, 0x20, U10, C2, 0x40, U11, C2, 0x60, \ + U12, C3, 0x00, U13, C3, 0x20, U14, C3, 0x40, U15, C3, 0x60 +.endm + +.macro dsolve_8x4 +// We are going to process matrix B with a size of 4x4, +// using only the upper triangular portion. The memory layout of +// matrix B is as follows: +//0 +//4 5 +//8 9 10 +//12 13 14 15 +// Sequentially extract data from B in row order + ldrepl_macro 22, 25, 12 + GMUL xvf, d, U6, D9, U6, U7, D9, U7 + ldrepl_macro 19, 21, 8 + nmsub_macro 4, 5, 6, D8 + ldrepl_macro 17, 18, 4 + GMUL xvf, d, U4, D5, U4, U5, D5, U5 + ldrepl_macro 16, 16, 0 + nmsub_macro 2, 3, 6, D7 + nmsub_macro 2, 3, 4, D4 + GMUL xvf, d, U2, D2, U2, U3, D2, U3 + nmsub_macro 0, 1, 6, D6 + nmsub_macro 0, 1, 4, D3 + nmsub_macro 0, 1, 2, D1 + GMUL xvf, d, U0, D0, U0, U1, D0, U1 +// Store A + A_st_macro 0, 7, 0, 4 +// Store C + GST xv, , U0, C0, 0x00, U1, C0, 0x20, \ + U2, C1, 0x00, U3, C1, 0x20, \ + U4, C2, 0x00, U5, C2, 0x20, \ + U6, C3, 0x00, U7, C3, 0x20 +.endm + +.macro dsolve_4x4 +// We are going to process matrix B with a size of 4x4, +// using only the upper triangular portion. The memory layout of +// matrix B is as follows: +//0 +//4 5 +//8 9 10 +//12 13 14 15 +// Sequentially extract data from B in row order + ldrepl_macro 22, 25, 12 + GMUL xvf, d, U3, D9, U3 + ldrepl_macro 19, 21, 8 + nmsub_macro 2, 2, 3, D8 + ldrepl_macro 17, 18, 4 + GMUL xvf, d, U2, D5, U2 + ldrepl_macro 16, 16, 0 + nmsub_macro 1, 1, 3, D7 + nmsub_macro 1, 1, 2, D4 + GMUL xvf, d, U1, D2, U1 + nmsub_macro 0, 0, 3, D6 + nmsub_macro 0, 0, 2, D3 + nmsub_macro 0, 0, 1, D1 + GMUL xvf, d, U0, D0, U0 +// Store A + A_st_macro 0, 3, 0, 4 +// Store C + GST xv, , U0, C0, 0x00, U1, C1, 0x00, U2, C2, 0x00, U3, C3, 0x00 +.endm + +.macro dsolve_2x4 +// We are going to process matrix B with a size of 4x4, +// using only the upper triangular portion. The memory layout of +// matrix B is as follows: +//0 +//4 5 +//8 9 10 +//12 13 14 15 +// Sequentially extract data from B in row order + ldrepl_macro 22, 25, 12 + GMUL xvf, d, U3, D9, U3 + ldrepl_macro 19, 21, 8 + nmsub_macro 2, 2, 3, D8 + ldrepl_macro 17, 18, 4 + GMUL xvf, d, U2, D5, U2 + ldrepl_macro 16, 16, 0 + nmsub_macro 1, 1, 3, D7 + nmsub_macro 1, 1, 2, D4 + GMUL xvf, d, U1, D2, U1 + nmsub_macro 0, 0, 3, D6 + nmsub_macro 0, 0, 2, D3 + nmsub_macro 0, 0, 1, D1 + GMUL xvf, d, U0, D0, U0 +// Store A + A_st_macro 0, 3, 0, 2 +// Store C + GST v, , $vr0, C0, 0x00, $vr1, C1, 0x00, $vr2, C2, 0x00, $vr3, C3, 0x00 +.endm + +.macro dsolve_1x4 +// We are going to process matrix B with a size of 4x4, +// using only the upper triangular portion. The memory layout of +// matrix B is as follows: +//0 +//4 5 +//8 9 10 +//12 13 14 15 +// Sequentially extract data from B in row order + ldrepl_macro 22, 25, 12 + GMUL xvf, d, U3, D9, U3 + ldrepl_macro 19, 21, 8 + nmsub_macro 2, 2, 3, D8 + ldrepl_macro 17, 18, 4 + GMUL xvf, d, U2, D5, U2 + ldrepl_macro 16, 16, 0 + nmsub_macro 1, 1, 3, D7 + nmsub_macro 1, 1, 2, D4 + GMUL xvf, d, U1, D2, U1 + nmsub_macro 0, 0, 3, D6 + nmsub_macro 0, 0, 2, D3 + nmsub_macro 0, 0, 1, D1 + GMUL xvf, d, U0, D0, U0 +// Store A + A_st_macro 0, 3, 0, 1 +// Store C + GST f, d, $f0, C0, 0x00, $f1, C1, 0x00, $f2, C2, 0x00, $f3, C3, 0x00, +.endm + +.macro dgemm_dsolve_16x1 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_16x1_load + dgemm_16x1 + b .L_dsolve_16x1 +.L_dsolve_16x1_load: + /* Load C0 */ + xvld U0, C0, 0x00 + xvld U1, C0, 0x20 + xvld U2, C0, 0x40 + xvld U3, C0, 0x60 +.L_dsolve_16x1: + PTR_ADDI A0, T1, -16 * 8 + PTR_ADDI B0, T2, -1 * 8 + ldrepl_macro 16, 16, 0 + GMUL xvf, d, U0, D0, U0, U1, D0, U1, U2, D0, U2, U3, D0, U3 + // Store A + A_st_macro 0, 3, 0, 4 + // Strore C + GST xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60 +.endm + +.macro dgemm_dsolve_8x1 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_8x1_load + dgemm_8x1 + b .L_dsolve_8x1 +.L_dsolve_8x1_load: + /* Load C0 */ + xvld U0, C0, 0x00 + xvld U1, C0, 0x20 +.L_dsolve_8x1: + PTR_ADDI A0, T1, -8 * 8 + PTR_ADDI B0, T2, -1 * 8 + ldrepl_macro 16, 16, 0 + GMUL xvf, d, U0, D0, U0, U1, D0, U1 + // Store A + A_st_macro 0, 1, 0, 4 + // Strore C + GST xv, , U0, C0, 0x00, U1, C0, 0x20 +.endm + +.macro dgemm_dsolve_4x1 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_4x1_load + dgemm_4x1 + b .L_dsolve_4x1 +.L_dsolve_4x1_load: + /* Load C0 */ + xvld U0, C0, 0x00 +.L_dsolve_4x1: + PTR_ADDI A0, T1, -4 * 8 + PTR_ADDI B0, T2, -1 * 8 + ldrepl_macro 16, 16, 0 + GMUL xvf, d, U0, D0, U0 + // Store A + A_st_macro 0, 0, 0, 4 + // Strore C + GST xv, , U0, C0, 0x00 +.endm + +.macro dgemm_dsolve_2x1 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_2x1_load + dgemm_2x1 + b .L_dsolve_2x1 +.L_dsolve_2x1_load: + /* Load C0 */ + xvld U0, C0, 0x00 +.L_dsolve_2x1: + PTR_ADDI A0, T1, -2 * 8 + PTR_ADDI B0, T2, -1 * 8 + ldrepl_macro 16, 16, 0 + GMUL xvf, d, U0, D0, U0 + // Store A + A_st_macro 0, 0, 0, 2 + // Strore C + GST v, , $vr0, C0, 0x00 +.endm + +.macro dgemm_dsolve_1x1 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_1x1_load + dgemm_1x1 + b .L_dsolve_1x1 +.L_dsolve_1x1_load: + // Load C + fld.d $f0, C0, 0x00 +.L_dsolve_1x1: + PTR_ADDI A0, T1, -1 * 8 + PTR_ADDI B0, T2, -1 * 8 + ldrepl_macro 16, 16, 0 + GMUL xvf, d, U0, D0, U0 + // Store A + A_st_macro 0, 0, 0, 1 + // Strore C + GST f, d, $f0, C0, 0x00 +.endm + +.macro dgemm_dsolve_16x2 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_16x2_load + dgemm_16x2 + b .L_dsolve_16x2 +.L_dsolve_16x2_load: + /* Load C0 */ + xvld U0, C0, 0x00 + xvld U1, C0, 0x20 + xvld U2, C0, 0x40 + xvld U3, C0, 0x60 + /* Load C1 */ + xvld U4, C1, 0x00 + xvld U5, C1, 0x20 + xvld U6, C1, 0x40 + xvld U7, C1, 0x60 +.L_dsolve_16x2: + PTR_ADDI A0, T1, -(16 * 2) * 8 + PTR_ADDI B0, T2, -(2 * 2) * 8 + dsolve_16x2 +.endm + +.macro dgemm_dsolve_8x2 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_8x2_load + dgemm_8x2 + b .L_dsolve_8x2 +.L_dsolve_8x2_load: + /* Load C0 */ + xvld U0, C0, 0x00 + xvld U1, C0, 0x20 + /* Load C1 */ + xvld U2, C1, 0x00 + xvld U3, C1, 0x20 +.L_dsolve_8x2: + PTR_ADDI A0, T1, -(8 * 2) * 8 + PTR_ADDI B0, T2, -(2 * 2) * 8 + dsolve_8x2 +.endm + +.macro dgemm_dsolve_4x2 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_4x2_load + dgemm_4x2 + b .L_dsolve_4x2 +.L_dsolve_4x2_load: + /* Load C0 */ + xvld U0, C0, 0x00 + /* Load C1 */ + xvld U1, C1, 0x00 +.L_dsolve_4x2: + PTR_ADDI A0, T1, -(4 * 2) * 8 + PTR_ADDI B0, T2, -(2 * 2) * 8 + dsolve_4x2 +.endm + +.macro dgemm_dsolve_2x2 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_2x2_load + dgemm_2x2 + b .L_dsolve_2x2 +.L_dsolve_2x2_load: + /* Load C0 */ + xvld U0, C0, 0x00 + /* Load C1 */ + xvld U1, C1, 0x00 +.L_dsolve_2x2: + PTR_ADDI A0, T1, -(2 * 2) * 8 + PTR_ADDI B0, T2, -(2 * 2) * 8 + dsolve_2x2 +.endm + +.macro dgemm_dsolve_1x2 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_1x2_load + dgemm_1x2 + xvpackod.d U1, U0, U0 + b .L_dsolve_1x2 +.L_dsolve_1x2_load: + // Load C + fld.d $f0, C0, 0x00 + fld.d $f1, C1, 0x00 +.L_dsolve_1x2: + PTR_ADDI A0, T1, -(1 * 2) * 8 + PTR_ADDI B0, T2, -(2 * 2) * 8 + dsolve_1x2 +.endm + +.macro dgemm_dsolve_16x4 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_16x4_load + dgemm_16x4 + b .L_dsolve_16x4 +.L_dsolve_16x4_load: + // Load C + GLD xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60 + GLD xv, , U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60 + GLD xv, , U8, C2, 0x00, U9, C2, 0x20, U10, C2, 0x40, U11, C2, 0x60 + GLD xv, , U12, C3, 0x00, U13, C3, 0x20, U14, C3, 0x40, U15, C3, 0x60 +/********************** solver ******************/ +.L_dsolve_16x4: + PTR_ADDI A0, T1, -(16 * 4) * 8 + PTR_ADDI B0, T2, -(4 * 4) * 8 + dsolve_16x4 +.endm + +.macro dgemm_dsolve_8x4 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_8x4_load + dgemm_8x4 + b .L_dsolve_8x4 +.L_dsolve_8x4_load: + /* Load C0 */ + xvld U0, C0, 0x00 + xvld U1, C0, 0x20 + + /* Load C1 */ + xvld U2, C1, 0x00 + xvld U3, C1, 0x20 + + /* Load C2 */ + xvld U4, C2, 0x00 + xvld U5, C2, 0x20 + + /* Load C3 */ + xvld U6, C3, 0x00 + xvld U7, C3, 0x20 +/********* solver *********/ +.L_dsolve_8x4: + PTR_ADDI A0, T1, -(8 * 4) * 8 + PTR_ADDI B0, T2, -(4 * 4) * 8 + dsolve_8x4 +.endm + +.macro dgemm_dsolve_4x4 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_4x4_load + dgemm_4x4 + b .L_dsolve_4x4 +.L_dsolve_4x4_load: + /* Load C0 */ + xvld U0, C0, 0x00 + /* Load C1 */ + xvld U1, C1, 0x00 + /* Load C2 */ + xvld U2, C2, 0x00 + /* Load C3 */ + xvld U3, C3, 0x00 +/************** solver *****************/ +.L_dsolve_4x4: + PTR_ADDI A0, T1, -(4 * 4) * 8 + PTR_ADDI B0, T2, -(4 * 4) * 8 + dsolve_4x4 +.endm + +.macro dgemm_dsolve_2x4 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_2x4_load + dgemm_2x4 + xvpermi.q U2, U0, 0x01 + xvpermi.q U3, U1, 0x01 + b .L_dsolve_2x4 +.L_dsolve_2x4_load: + /* Load C0 */ + xvld U0, C0, 0x00 + /* Load C1 */ + xvld U1, C1, 0x00 + /* Load C2 */ + xvld U2, C2, 0x00 + /* Load C3 */ + xvld U3, C3, 0x00 +/********************** solver ******************/ +.L_dsolve_2x4: + PTR_ADDI A0, T1, -(2 * 4) * 8 + PTR_ADDI B0, T2, -(4 * 4) * 8 + dsolve_2x4 +.endm + +.macro dgemm_dsolve_1x4 + or T1, A0, A0 + or T2, B0, B0 + bge ZERO, L, .L_dsolve_1x4_load + dgemm_1x4 + xvpackod.d U1, U0, U0 + xvpermi.q U2, U0, 0x01 + xvpermi.q U3, U1, 0x01 + b .L_dsolve_1x4 +.L_dsolve_1x4_load: + // Load C + fld.d $f0, C0, 0x00 + fld.d $f1, C1, 0x00 + fld.d $f2, C2, 0x00 + fld.d $f3, C3, 0x00 +.L_dsolve_1x4: + PTR_ADDI A0, T1, -(1 * 4) * 8 + PTR_ADDI B0, T2, -(4 * 4) * 8 + dsolve_1x4 +.endm + + PROLOGUE + push_if_used 26, 32 + PTR_SLLI LDC, LDC, 3 + PTR_SUB KK, N, OFFSET + PTR_MUL T0, N, LDC + PTR_MUL T1, N, K + PTR_ADD C, C, T0 // c += n * ldc + PTR_SLLI T1, T1, 3 + PTR_ADD B, B, T1 + + andi J, N, 1 + beqz J, .L_N2 +.L_N1: + move AA, A + PTR_SUB C, C, LDC // c -= ldc + PTR_SLLI T0, K, 3 + PTR_SLLI T1, KK, 3 + PTR_SUB B, B, T0 // b -= k + PTR_ADD BB, B, T1 // bb = b + kk + move CC, C + + PTR_SRAI I, M, 4 // M >> 4 + beqz I, .L_N1_M15 +.align 4 +.L_N1_I1: + PTR_SLLI T1, KK, 7 + GADD , d, C0, CC, ZERO + PTR_ADD A0, AA, T1 // a0 = aa + 16 * kk + move B0, BB + PTR_SUB L, K, KK // L = K - KK + dgemm_dsolve_16x1 + PTR_ADDI I, I, -1 + PTR_SLLI T0, K, 7 + PTR_ADDI CC, CC, 0x80 // cc += 16 + PTR_ADD AA, AA, T0 // aa += 16 * k + bnez I, .L_N1_I1 +.L_N1_M15: + andi I, M, 8 + beqz I, .L_N1_M7 +.L_N1_M8: + PTR_SLLI T1, KK, 6 + GADD , d, C0, CC, ZERO + PTR_ADD A0, AA, T1 // a0 = aa + 8 * kk + move B0, BB + PTR_SUB L, K, KK // L = K - KK + dgemm_dsolve_8x1 + PTR_SLLI T0, K, 6 + PTR_ADDI CC, CC, 0x40 // cc += 8 + PTR_ADD AA, AA, T0 // aa += 8 * k +.L_N1_M7: + andi I, M, 4 + beqz I, .L_N1_M3 +.L_N1_M4: + PTR_SLLI T1, KK, 5 + GADD , d, C0, CC, ZERO + PTR_ADD A0, AA, T1 // a0 = aa + 4 * kk + move B0, BB + PTR_SUB L, K, KK // L = K - KK + dgemm_dsolve_4x1 + PTR_SLLI T0, K, 5 + PTR_ADDI CC, CC, 0x20 // cc += 4 + PTR_ADD AA, AA, T0 // aa += 4 * k +.L_N1_M3: + andi I, M, 2 + beqz I, .L_N1_M1 +.L_N1_M2: + PTR_SLLI T1, KK, 4 + GADD , d, C0, CC, ZERO + PTR_ADD A0, AA, T1 // a0 = aa + 2 * kk + move B0, BB + PTR_SUB L, K, KK // L = K - KK + dgemm_dsolve_2x1 + PTR_SLLI T0, K, 4 + PTR_ADDI CC, CC, 0x10 // cc += 2 + PTR_ADD AA, AA, T0 // aa += 2 * k +.L_N1_M1: + andi I, M, 1 + beqz I, .L_N1_M0 + PTR_SLLI T1, KK, 3 + GADD , d, C0, CC, ZERO + PTR_ADD A0, AA, T1 // a0 = aa + kk + move B0, BB + PTR_SUB L, K, KK // L = K - KK + dgemm_dsolve_1x1 + PTR_SLLI T0, K, 3 + PTR_ADDI CC, CC, 0x08 // cc += 1 + PTR_ADD AA, AA, T0 // aa += 1 * k +.L_N1_M0: + PTR_ADDI KK, KK, -1 +.L_N2: + andi J, N, 2 + beq ZERO, J, .L_N4 + move AA, A + PTR_SLLI T0, LDC, 1 + PTR_SLLI T1, K, 4 + PTR_SLLI T2, KK, 4 + PTR_SUB B, B, T1 + PTR_SUB C, C, T0 + PTR_ADD BB, B, T2 + move CC, C + PTR_SRAI I, M, 4 // M >> 4 + beqz I, .L_N2_M15 +.align 4 +.L_N2_I1: + PTR_SLLI T1, KK, 7 + GADD , d, C0, CC, ZERO, C1, C0, LDC + PTR_ADD A0, AA, T1 // a0 = aa + 16 * kk + move B0, BB + PTR_SUB L, K, KK // L = K - KK + dgemm_dsolve_16x2 + PTR_ADDI I, I, -1 + PTR_SLLI T0, K, 7 + PTR_ADDI CC, CC, 0x80 // cc += 16 + PTR_ADD AA, AA, T0 // aa += 16 * k + bnez I, .L_N2_I1 +.L_N2_M15: + andi I, M, 8 + beqz I, .L_N2_M7 +.L_N2_M8: + PTR_SLLI T1, KK, 6 + GADD , d, C0, CC, ZERO, C1, C0, LDC + PTR_ADD A0, AA, T1 // a0 = aa + 8 * kk + move B0, BB + PTR_SUB L, K, KK // L = K - KK + dgemm_dsolve_8x2 + PTR_SLLI T0, K, 6 + PTR_ADDI CC, CC, 0x40 // cc += 8 + PTR_ADD AA, AA, T0 // aa += 8 * k +.L_N2_M7: + andi I, M, 4 + beqz I, .L_N2_M3 +.L_N2_M4: + PTR_SLLI T1, KK, 5 + GADD , d, C0, CC, ZERO, C1, C0, LDC + PTR_ADD A0, AA, T1 // a0 = aa + 4 * kk + move B0, BB + PTR_SUB L, K, KK // L = K - KK + dgemm_dsolve_4x2 + PTR_SLLI T0, K, 5 + PTR_ADDI CC, CC, 0x20 // cc += 4 + PTR_ADD AA, AA, T0 // aa += 4 * k +.L_N2_M3: + andi I, M, 2 + beqz I, .L_N2_M1 +.L_N2_M2: + PTR_SLLI T1, KK, 4 + GADD , d, C0, CC, ZERO, C1, C0, LDC + PTR_ADD A0, AA, T1 // a0 = aa + 2 * kk + move B0, BB + PTR_SUB L, K, KK // L = K - KK + dgemm_dsolve_2x2 + PTR_SLLI T0, K, 4 + PTR_ADDI CC, CC, 0x10 // cc += 2 + PTR_ADD AA, AA, T0 // aa += 2 * k +.L_N2_M1: + andi I, M, 1 + beqz I, .L_N2_M0 + PTR_SLLI T1, KK, 3 + GADD , d, C0, CC, ZERO, C1, C0, LDC + PTR_ADD A0, AA, T1 // a0 = aa + kk + move B0, BB + PTR_SUB L, K, KK // L = K - KK + dgemm_dsolve_1x2 + PTR_SLLI T0, K, 3 + PTR_ADDI CC, CC, 0x08 // cc += 1 + PTR_ADD AA, AA, T0 // aa += 1 * k +.L_N2_M0: + PTR_ADDI KK, KK, -2 +.L_N4: + PTR_SRAI J, N, 2 /* J = bn >> 2 */ + beq ZERO, J, .L_N0 +.align 5 +.L_J1: + PTR_ADDI J, J, -1 + move AA, A + PTR_SLLI T0, LDC, 2 + PTR_SLLI T1, K, 5 + PTR_SLLI T2, KK, 5 + PTR_SUB B, B, T1 + PTR_SUB C, C, T0 + PTR_ADD BB, B, T2 + move CC, C + PTR_SRAI I, M, 4 // M >> 4 + beqz I, .L_M15 +.align 4 +.L_I1: + PTR_SLLI T1, KK, 7 + GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC + PTR_ADD A0, AA, T1 // a0 = aa + 16 * kk + move B0, BB + PTR_SUB L, K, KK // L = K - KK + dgemm_dsolve_16x4 + PTR_ADDI I, I, -1 + PTR_SLLI T0, K, 7 + PTR_ADDI CC, CC, 0x80 // cc += 16 + PTR_ADD AA, AA, T0 // aa += 16 * k + bnez I, .L_I1 +.L_M15: + andi I, M, 8 + beqz I, .L_M7 +.L_M8: + PTR_SLLI T1, KK, 6 + GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC + PTR_ADD A0, AA, T1 // a0 = aa + 8 * kk + move B0, BB + PTR_SUB L, K, KK // L = K - KK + dgemm_dsolve_8x4 + PTR_SLLI T0, K, 6 + PTR_ADDI CC, CC, 0x40 // cc += 8 + PTR_ADD AA, AA, T0 // aa += 8 * k +.L_M7: + andi I, M, 4 + beqz I, .L_M3 +.L_M4: + PTR_SLLI T1, KK, 5 + GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC + PTR_ADD A0, AA, T1 // a0 = aa + 4 * kk + move B0, BB + PTR_SUB L, K, KK // L = K - KK + dgemm_dsolve_4x4 + PTR_SLLI T0, K, 5 + PTR_ADDI CC, CC, 0x20 // cc += 4 + PTR_ADD AA, AA, T0 // aa += 4 * k +.L_M3: + andi I, M, 2 + beqz I, .L_M1 +.L_M2: + PTR_SLLI T1, KK, 4 + GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC + PTR_ADD A0, AA, T1 // a0 = aa + 2 * kk + move B0, BB + PTR_SUB L, K, KK // L = K - KK + dgemm_dsolve_2x4 + PTR_SLLI T0, K, 4 + PTR_ADDI CC, CC, 0x10 // cc += 2 + PTR_ADD AA, AA, T0 // aa += 2 * k +.L_M1: + andi I, M, 1 + beqz I, .L_M0 + PTR_SLLI T1, KK, 3 + GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC + PTR_ADD A0, AA, T1 // a0 = aa + kk + move B0, BB + PTR_SUB L, K, KK // L = K - KK + dgemm_dsolve_1x4 + PTR_SLLI T0, K, 3 + PTR_ADDI CC, CC, 0x08 // cc += 1 + PTR_ADD AA, AA, T0 // aa += 1 * k +.L_M0: + PTR_ADDI KK, KK, -4 + bnez J, .L_J1 +.L_N0: + pop_if_used 26, 32 + jirl $r0, $r1, 0x0 + EPILOGUE diff --git a/kernel/loongarch64/dtrsm_kernel_macro.S b/kernel/loongarch64/dtrsm_kernel_macro.S new file mode 100644 index 000000000..88b7121d1 --- /dev/null +++ b/kernel/loongarch64/dtrsm_kernel_macro.S @@ -0,0 +1,2147 @@ +/******************************************************************************* +Copyright (c) 2023, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ + +/************** Dgemm Kernel 16x4 ****************/ +.macro KERNEL2x16x4 + xvld U0, A0, 0x00 + xvfmadd.d D0, U8, U12, D0 + xvfmadd.d D1, U9, U12, D1 + + xvld U1, A0, 0x20 + xvfmadd.d D2, U10, U12, D2 + xvfmadd.d D3, U11, U12, D3 + + xvld U2, A0, 0x40 + xvfmadd.d D4, U8, U13, D4 + xvfmadd.d D5, U9, U13, D5 + + xvld U3, A0, 0x60 + xvfmadd.d D6, U10, U13, D6 + xvfmadd.d D7, U11, U13, D7 + + xvldrepl.d U4, B0, 0x00 + xvfmadd.d D8, U8, U14, D8 + xvfmadd.d D9, U9, U14, D9 + + preld 0, B0, B_PRE + xvldrepl.d U5, B0, 0x08 + xvfmadd.d D10, U10, U14, D10 + xvfmadd.d D11, U11, U14, D11 + + preld 0, A0, A_PRE + xvldrepl.d U6, B0, 0x10 + xvfmadd.d D12, U8, U15, D12 + xvfmadd.d D13, U9, U15, D13 + + preld 0, A0, A_PRE + 0x40 + xvldrepl.d U7, B0, 0x18 + xvfmadd.d D14, U10, U15, D14 + xvfmadd.d D15, U11, U15, D15 + + addi.d A0, A0, 0x80 + addi.d B0, B0, 0x20 + + xvld U8, A0, 0x00 + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D1, U1, U4, D1 + + xvld U9, A0, 0x20 + xvfmadd.d D2, U2, U4, D2 + xvfmadd.d D3, U3, U4, D3 + + xvld U10, A0, 0x40 + xvfmadd.d D4, U0, U5, D4 + xvfmadd.d D5, U1, U5, D5 + + xvld U11, A0, 0x60 + xvfmadd.d D6, U2, U5, D6 + xvfmadd.d D7, U3, U5, D7 + + xvldrepl.d U12, B0, 0x00 + xvfmadd.d D8, U0, U6, D8 + xvfmadd.d D9, U1, U6, D9 + + preld 0, B0, B_PRE + xvldrepl.d U13, B0, 0x08 + xvfmadd.d D10, U2, U6, D10 + xvfmadd.d D11, U3, U6, D11 + + preld 0, A0, A_PRE + xvldrepl.d U14, B0, 0x10 + xvfmadd.d D12, U0, U7, D12 + xvfmadd.d D13, U1, U7, D13 + + preld 0, A0, A_PRE + 0x40 + xvldrepl.d U15, B0, 0x18 + xvfmadd.d D14, U2, U7, D14 + xvfmadd.d D15, U3, U7, D15 + + addi.d A0, A0, 0x80 + addi.d B0, B0, 0x20 +.endm + +.macro KERNEL2x16x4_END + xvld U0, A0, 0x00 + xvfmadd.d D0, U8, U12, D0 + xvfmadd.d D1, U9, U12, D1 + + xvld U1, A0, 0x20 + xvfmadd.d D2, U10, U12, D2 + xvfmadd.d D3, U11, U12, D3 + + xvld U2, A0, 0x40 + xvfmadd.d D4, U8, U13, D4 + xvfmadd.d D5, U9, U13, D5 + + xvld U3, A0, 0x60 + xvfmadd.d D6, U10, U13, D6 + xvfmadd.d D7, U11, U13, D7 + + xvldrepl.d U4, B0, 0x00 + xvfmadd.d D8, U8, U14, D8 + xvfmadd.d D9, U9, U14, D9 + + preld 0, B0, B_PRE + xvldrepl.d U5, B0, 0x08 + xvfmadd.d D10, U10, U14, D10 + xvfmadd.d D11, U11, U14, D11 + + preld 0, A0, A_PRE + xvldrepl.d U6, B0, 0x10 + xvfmadd.d D12, U8, U15, D12 + xvfmadd.d D13, U9, U15, D13 + + preld 0, A0, A_PRE + 0x40 + xvldrepl.d U7, B0, 0x18 + xvfmadd.d D14, U10, U15, D14 + xvfmadd.d D15, U11, U15, D15 + + addi.d A0, A0, 0x80 + addi.d B0, B0, 0x20 + + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D1, U1, U4, D1 + + xvfmadd.d D2, U2, U4, D2 + xvfmadd.d D3, U3, U4, D3 + + xvfmadd.d D4, U0, U5, D4 + xvfmadd.d D5, U1, U5, D5 + + xvfmadd.d D6, U2, U5, D6 + xvfmadd.d D7, U3, U5, D7 + + xvfmadd.d D8, U0, U6, D8 + xvfmadd.d D9, U1, U6, D9 + + preld 0, B0, B_PRE + xvfmadd.d D10, U2, U6, D10 + xvfmadd.d D11, U3, U6, D11 + + preld 0, A0, A_PRE + xvfmadd.d D12, U0, U7, D12 + xvfmadd.d D13, U1, U7, D13 + + preld 0, A0, A_PRE + 0x40 + xvfmadd.d D14, U2, U7, D14 + xvfmadd.d D15, U3, U7, D15 +.endm + +.macro KERNEL8x16x4 +.rept 4 + KERNEL2x16x4 +.endr +.endm + +.macro KERNEL8x16x4_END +.rept 3 + KERNEL2x16x4 +.endr + KERNEL2x16x4_END +.endm + +.macro KERNEL2x8x4 + xvld U0, A0, 0x00 + xvld U1, A0, 0x20 + + xvldrepl.d U4, B0, 0x00 + xvfmadd.d D0, U8, U12, D0 + xvfmadd.d D1, U9, U12, D1 + + xvldrepl.d U5, B0, 0x08 + xvfmadd.d D4, U8, U13, D4 + xvfmadd.d D5, U9, U13, D5 + + xvldrepl.d U6, B0, 0x10 + xvfmadd.d D8, U8, U14, D8 + xvfmadd.d D9, U9, U14, D9 + + xvldrepl.d U7, B0, 0x18 + xvfmadd.d D12, U8, U15, D12 + xvfmadd.d D13, U9, U15, D13 + + addi.d A0, A0, 0x40 + addi.d B0, B0, 0x20 + + xvld U8, A0, 0x00 + xvld U9, A0, 0x20 + + xvldrepl.d U12, B0, 0x00 + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D1, U1, U4, D1 + + xvldrepl.d U13, B0, 0x08 + xvfmadd.d D4, U0, U5, D4 + xvfmadd.d D5, U1, U5, D5 + + xvldrepl.d U14, B0, 0x10 + xvfmadd.d D8, U0, U6, D8 + xvfmadd.d D9, U1, U6, D9 + + xvldrepl.d U15, B0, 0x18 + xvfmadd.d D12, U0, U7, D12 + xvfmadd.d D13, U1, U7, D13 + + addi.d A0, A0, 0x40 + addi.d B0, B0, 0x20 +.endm + +.macro KERNEL2x8x4_END + xvld U0, A0, 0x00 + xvld U1, A0, 0x20 + + xvldrepl.d U4, B0, 0x00 + xvfmadd.d D0, U8, U12, D0 + xvfmadd.d D1, U9, U12, D1 + + xvldrepl.d U5, B0, 0x08 + xvfmadd.d D4, U8, U13, D4 + xvfmadd.d D5, U9, U13, D5 + + xvldrepl.d U6, B0, 0x10 + xvfmadd.d D8, U8, U14, D8 + xvfmadd.d D9, U9, U14, D9 + + xvldrepl.d U7, B0, 0x18 + xvfmadd.d D12, U8, U15, D12 + xvfmadd.d D13, U9, U15, D13 + + addi.d A0, A0, 0x40 + addi.d B0, B0, 0x20 + + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D1, U1, U4, D1 + + xvfmadd.d D4, U0, U5, D4 + xvfmadd.d D5, U1, U5, D5 + + xvfmadd.d D8, U0, U6, D8 + xvfmadd.d D9, U1, U6, D9 + + xvfmadd.d D12, U0, U7, D12 + xvfmadd.d D13, U1, U7, D13 +.endm + +.macro KERNEL8x8x4 +.rept 4 + KERNEL2x8x4 +.endr +.endm + +.macro KERNEL8x8x4_END +.rept 3 + KERNEL2x8x4 +.endr + KERNEL2x8x4_END +.endm + +.macro KERNEL2x4x4 + xvld U0, A0, 0x00 + + xvldrepl.d U4, B0, 0x00 + xvfmadd.d D0, U8, U12, D0 + + xvldrepl.d U5, B0, 0x08 + xvfmadd.d D4, U8, U13, D4 + + xvldrepl.d U6, B0, 0x10 + xvfmadd.d D8, U8, U14, D8 + + xvldrepl.d U7, B0, 0x18 + xvfmadd.d D12, U8, U15, D12 + + addi.d A0, A0, 0x20 + addi.d B0, B0, 0x20 + + xvld U8, A0, 0x00 + + xvldrepl.d U12, B0, 0x00 + xvfmadd.d D0, U0, U4, D0 + + xvldrepl.d U13, B0, 0x08 + xvfmadd.d D4, U0, U5, D4 + + xvldrepl.d U14, B0, 0x10 + xvfmadd.d D8, U0, U6, D8 + + xvldrepl.d U15, B0, 0x18 + xvfmadd.d D12, U0, U7, D12 + + addi.d A0, A0, 0x20 + addi.d B0, B0, 0x20 +.endm + +.macro KERNEL2x4x4_END + xvld U0, A0, 0x00 + + xvldrepl.d U4, B0, 0x00 + xvfmadd.d D0, U8, U12, D0 + + xvldrepl.d U5, B0, 0x08 + xvfmadd.d D4, U8, U13, D4 + + xvldrepl.d U6, B0, 0x10 + xvfmadd.d D8, U8, U14, D8 + + xvldrepl.d U7, B0, 0x18 + xvfmadd.d D12, U8, U15, D12 + + addi.d A0, A0, 0x20 + addi.d B0, B0, 0x20 + + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D4, U0, U5, D4 + xvfmadd.d D8, U0, U6, D8 + xvfmadd.d D12, U0, U7, D12 +.endm + +.macro KERNEL8x4x4 +.rept 4 + KERNEL2x4x4 +.endr +.endm + +.macro KERNEL8x4x4_END +.rept 3 + KERNEL2x4x4 +.endr + KERNEL2x4x4_END +.endm + +.macro KERNEL2x2x4 + xvldrepl.d U0, A0, 0x00 + xvldrepl.d U1, A0, 0x08 + + xvfmadd.d D0, U8, U12, D0 + xvfmadd.d D1, U9, U12, D1 + + xvld U4, B0, 0x00 + addi.d A0, A0, 0x10 + addi.d B0, B0, 0x20 + + xvldrepl.d U8, A0, 0x00 + xvldrepl.d U9, A0, 0x08 + + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D1, U1, U4, D1 + + xvld U12, B0, 0x00 + addi.d A0, A0, 0x10 + addi.d B0, B0, 0x20 +.endm + +.macro KERNEL2x2x4_END + xvldrepl.d U0, A0, 0x00 + xvldrepl.d U1, A0, 0x08 + + xvfmadd.d D0, U8, U12, D0 + xvfmadd.d D1, U9, U12, D1 + + xvld U4, B0, 0x00 + addi.d A0, A0, 0x10 + addi.d B0, B0, 0x20 + + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D1, U1, U4, D1 +.endm + +.macro KERNEL8x2x4 +.rept 4 + KERNEL2x2x4 +.endr +.endm + +.macro KERNEL8x2x4_END +.rept 3 + KERNEL2x2x4 +.endr + KERNEL2x2x4_END +.endm + +.macro KERNEL2x1x4 + xvldrepl.d U0, A0, 0x00 + xvfmadd.d D0, U8, U12, D0 + xvld U4, B0, 0x00 + + addi.d A0, A0, 0x08 + addi.d B0, B0, 0x20 + + xvldrepl.d U8, A0, 0x00 + xvfmadd.d D0, U0, U4, D0 + xvld U12, B0, 0x00 + + addi.d A0, A0, 0x08 + addi.d B0, B0, 0x20 +.endm + +.macro KERNEL2x1x4_END + xvldrepl.d U0, A0, 0x00 + xvfmadd.d D0, U8, U12, D0 + xvld U4, B0, 0x00 + + addi.d A0, A0, 0x08 + addi.d B0, B0, 0x20 + + xvfmadd.d D0, U0, U4, D0 +.endm + +.macro KERNEL8x1x4 +.rept 4 + KERNEL2x1x4 +.endr +.endm + +.macro KERNEL8x1x4_END +.rept 3 + KERNEL2x1x4 +.endr + KERNEL2x1x4_END +.endm + +.macro KERNEL2x16x2 + xvld U0, A0, 0x00 + xvfmadd.d D0, U8, U12, D0 + xvfmadd.d D1, U9, U12, D1 + + xvld U1, A0, 0x20 + xvfmadd.d D2, U10, U12, D2 + xvfmadd.d D3, U11, U12, D3 + + xvld U2, A0, 0x40 + xvfmadd.d D4, U8, U13, D4 + xvfmadd.d D5, U9, U13, D5 + + xvld U3, A0, 0x60 + xvfmadd.d D6, U10, U13, D6 + xvfmadd.d D7, U11, U13, D7 + + xvldrepl.d U4, B0, 0x00 + xvldrepl.d U5, B0, 0x08 + + addi.d A0, A0, 0x80 + addi.d B0, B0, 0x10 + + xvld U8, A0, 0x00 + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D1, U1, U4, D1 + + xvld U9, A0, 0x20 + xvfmadd.d D2, U2, U4, D2 + xvfmadd.d D3, U3, U4, D3 + + xvld U10, A0, 0x40 + xvfmadd.d D4, U0, U5, D4 + xvfmadd.d D5, U1, U5, D5 + + xvld U11, A0, 0x60 + xvfmadd.d D6, U2, U5, D6 + xvfmadd.d D7, U3, U5, D7 + + xvldrepl.d U12, B0, 0x00 + xvldrepl.d U13, B0, 0x08 + + addi.d A0, A0, 0x80 + addi.d B0, B0, 0x10 +.endm + +.macro KERNEL2x16x2_END + xvld U0, A0, 0x00 + xvfmadd.d D0, U8, U12, D0 + xvfmadd.d D1, U9, U12, D1 + + xvld U1, A0, 0x20 + xvfmadd.d D2, U10, U12, D2 + xvfmadd.d D3, U11, U12, D3 + + xvld U2, A0, 0x40 + xvfmadd.d D4, U8, U13, D4 + xvfmadd.d D5, U9, U13, D5 + + xvld U3, A0, 0x60 + xvfmadd.d D6, U10, U13, D6 + xvfmadd.d D7, U11, U13, D7 + + xvldrepl.d U4, B0, 0x00 + xvldrepl.d U5, B0, 0x08 + + addi.d A0, A0, 0x80 + addi.d B0, B0, 0x10 + + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D1, U1, U4, D1 + + xvfmadd.d D2, U2, U4, D2 + xvfmadd.d D3, U3, U4, D3 + + xvfmadd.d D4, U0, U5, D4 + xvfmadd.d D5, U1, U5, D5 + + xvfmadd.d D6, U2, U5, D6 + xvfmadd.d D7, U3, U5, D7 +.endm + +.macro KERNEL8x16x2 +.rept 4 + KERNEL2x16x2 +.endr +.endm + +.macro KERNEL8x16x2_END +.rept 3 + KERNEL2x16x2 +.endr + KERNEL2x16x2_END +.endm + +.macro KERNEL2x8x2 + xvld U0, A0, 0x00 + xvfmadd.d D0, U8, U12, D0 + xvfmadd.d D1, U9, U12, D1 + + xvld U1, A0, 0x20 + xvfmadd.d D4, U8, U13, D4 + xvfmadd.d D5, U9, U13, D5 + + xvldrepl.d U4, B0, 0x00 + xvldrepl.d U5, B0, 0x08 + + addi.d A0, A0, 0x40 + addi.d B0, B0, 0x10 + + xvld U8, A0, 0x00 + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D1, U1, U4, D1 + + xvld U9, A0, 0x20 + xvfmadd.d D4, U0, U5, D4 + xvfmadd.d D5, U1, U5, D5 + + xvldrepl.d U12, B0, 0x00 + xvldrepl.d U13, B0, 0x08 + + addi.d A0, A0, 0x40 + addi.d B0, B0, 0x10 +.endm + +.macro KERNEL2x8x2_END + xvld U0, A0, 0x00 + xvfmadd.d D0, U8, U12, D0 + xvfmadd.d D1, U9, U12, D1 + + xvld U1, A0, 0x20 + xvfmadd.d D4, U8, U13, D4 + xvfmadd.d D5, U9, U13, D5 + + xvldrepl.d U4, B0, 0x00 + xvldrepl.d U5, B0, 0x08 + + addi.d A0, A0, 0x40 + addi.d B0, B0, 0x10 + + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D1, U1, U4, D1 + + xvfmadd.d D4, U0, U5, D4 + xvfmadd.d D5, U1, U5, D5 +.endm + +.macro KERNEL8x8x2 +.rept 4 + KERNEL2x8x2 +.endr +.endm + +.macro KERNEL8x8x2_END +.rept 3 + KERNEL2x8x2 + .endr + KERNEL2x8x2_END +.endm + +.macro KERNEL2x4x2 + xvld U0, A0, 0x00 + xvfmadd.d D0, U8, U12, D0 + xvfmadd.d D4, U8, U13, D4 + + xvldrepl.d U4, B0, 0x00 + xvldrepl.d U5, B0, 0x08 + + addi.d A0, A0, 0x20 + addi.d B0, B0, 0x10 + + xvld U8, A0, 0x00 + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D4, U0, U5, D4 + + xvldrepl.d U12, B0, 0x00 + xvldrepl.d U13, B0, 0x08 + + addi.d A0, A0, 0x20 + addi.d B0, B0, 0x10 +.endm + +.macro KERNEL2x4x2_END + xvld U0, A0, 0x00 + xvfmadd.d D0, U8, U12, D0 + xvfmadd.d D4, U8, U13, D4 + + xvldrepl.d U4, B0, 0x00 + xvldrepl.d U5, B0, 0x08 + + addi.d A0, A0, 0x20 + addi.d B0, B0, 0x10 + + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D4, U0, U5, D4 +.endm + +.macro KERNEL8x4x2 +.rept 4 + KERNEL2x4x2 +.endr +.endm + +.macro KERNEL8x4x2_END +.rept 3 + KERNEL2x4x2 +.endr + KERNEL2x4x2_END +.endm + +.macro KERNEL2x2x2 + xvld U0, A0, 0x00 + xvfmadd.d D0, U8, U12, D0 + xvfmadd.d D4, U8, U13, D4 + + xvldrepl.d U4, B0, 0x00 + xvldrepl.d U5, B0, 0x08 + + addi.d A0, A0, 0x10 + addi.d B0, B0, 0x10 + + xvld U8, A0, 0x00 + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D4, U0, U5, D4 + + xvldrepl.d U12, B0, 0x00 + xvldrepl.d U13, B0, 0x08 + + addi.d A0, A0, 0x10 + addi.d B0, B0, 0x10 +.endm + +.macro KERNEL2x2x2_END + xvld U0, A0, 0x00 + xvfmadd.d D0, U8, U12, D0 + xvfmadd.d D4, U8, U13, D4 + + xvldrepl.d U4, B0, 0x00 + xvldrepl.d U5, B0, 0x08 + + addi.d A0, A0, 0x10 + addi.d B0, B0, 0x10 + + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D4, U0, U5, D4 +.endm + +.macro KERNEL8x2x2 +.rept 4 + KERNEL2x2x2 +.endr +.endm + +.macro KERNEL8x2x2_END +.rept 3 + KERNEL2x2x2 +.endr + KERNEL2x2x2_END +.endm + +.macro KERNEL2x1x2 + xvld U0, A0, 0x00 + xvfmadd.d D0, U8, U12, D0 + xvfmadd.d D4, U8, U13, D4 + + xvldrepl.d U4, B0, 0x00 + xvldrepl.d U5, B0, 0x08 + + addi.d A0, A0, 0x08 + addi.d B0, B0, 0x10 + + xvld U8, A0, 0x00 + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D4, U0, U5, D4 + + xvldrepl.d U12, B0, 0x00 + xvldrepl.d U13, B0, 0x08 + + addi.d A0, A0, 0x08 + addi.d B0, B0, 0x10 +.endm + +.macro KERNEL2x1x2_END + xvld U0, A0, 0x00 + xvfmadd.d D0, U8, U12, D0 + xvfmadd.d D4, U8, U13, D4 + + xvldrepl.d U4, B0, 0x00 + xvldrepl.d U5, B0, 0x08 + + addi.d A0, A0, 0x08 + addi.d B0, B0, 0x10 + + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D4, U0, U5, D4 +.endm + +.macro KERNEL8x1x2 +.rept 4 + KERNEL2x1x2 +.endr +.endm + +.macro KERNEL8x1x2_END +.rept 3 + KERNEL2x1x2 +.endr + KERNEL2x1x2_END +.endm + +.macro KERNEL2x16x1 + xvld U0, A0, 0x00 + xvfmadd.d D0, U8, U12, D0 + xvfmadd.d D1, U9, U12, D1 + + xvld U1, A0, 0x20 + xvfmadd.d D2, U10, U12, D2 + xvfmadd.d D3, U11, U12, D3 + + xvld U2, A0, 0x40 + xvld U3, A0, 0x60 + + xvldrepl.d U4, B0, 0x00 + + addi.d A0, A0, 0x80 + addi.d B0, B0, 0x08 + + xvld U8, A0, 0x00 + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D1, U1, U4, D1 + + xvld U9, A0, 0x20 + xvfmadd.d D2, U2, U4, D2 + xvfmadd.d D3, U3, U4, D3 + + xvld U10, A0, 0x40 + xvld U11, A0, 0x60 + + xvldrepl.d U12, B0, 0x00 + + addi.d A0, A0, 0x80 + addi.d B0, B0, 0x08 +.endm + +.macro KERNEL2x16x1_END + xvld U0, A0, 0x00 + xvfmadd.d D0, U8, U12, D0 + xvfmadd.d D1, U9, U12, D1 + + xvld U1, A0, 0x20 + xvfmadd.d D2, U10, U12, D2 + xvfmadd.d D3, U11, U12, D3 + + xvld U2, A0, 0x40 + xvld U3, A0, 0x60 + + xvldrepl.d U4, B0, 0x00 + + addi.d A0, A0, 0x80 + addi.d B0, B0, 0x08 + + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D1, U1, U4, D1 + + xvfmadd.d D2, U2, U4, D2 + xvfmadd.d D3, U3, U4, D3 +.endm + +.macro KERNEL8x16x1 +.rept 4 + KERNEL2x16x1 +.endr +.endm + +.macro KERNEL8x16x1_END +.rept 3 + KERNEL2x16x1 +.endr + KERNEL2x16x1_END +.endm + +.macro KERNEL2x8x1 + xvld U0, A0, 0x00 + xvfmadd.d D0, U8, U12, D0 + xvfmadd.d D1, U9, U12, D1 + xvld U1, A0, 0x20 + xvldrepl.d U4, B0, 0x00 + + addi.d A0, A0, 0x40 + addi.d B0, B0, 0x08 + + xvld U8, A0, 0x00 + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D1, U1, U4, D1 + xvld U9, A0, 0x20 + xvldrepl.d U12, B0, 0x00 + + addi.d A0, A0, 0x40 + addi.d B0, B0, 0x08 +.endm + +.macro KERNEL2x8x1_END + xvld U0, A0, 0x00 + xvfmadd.d D0, U8, U12, D0 + xvfmadd.d D1, U9, U12, D1 + xvld U1, A0, 0x20 + xvldrepl.d U4, B0, 0x00 + + addi.d A0, A0, 0x40 + addi.d B0, B0, 0x08 + + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D1, U1, U4, D1 +.endm + +.macro KERNEL8x8x1 +.rept 4 + KERNEL2x8x1 +.endr +.endm + +.macro KERNEL8x8x1_END +.rept 3 + KERNEL2x8x1 +.endr + KERNEL2x8x1_END +.endm + +.macro KERNEL2x4x1 + xvld U0, A0, 0x00 + xvfmadd.d D0, U8, U12, D0 + xvldrepl.d U4, B0, 0x00 + + addi.d A0, A0, 0x20 + addi.d B0, B0, 0x08 + + xvld U8, A0, 0x00 + xvfmadd.d D0, U0, U4, D0 + xvldrepl.d U12, B0, 0x00 + + addi.d A0, A0, 0x20 + addi.d B0, B0, 0x08 +.endm + +.macro KERNEL2x4x1_END + xvld U0, A0, 0x00 + xvfmadd.d D0, U8, U12, D0 + xvldrepl.d U4, B0, 0x00 + + addi.d A0, A0, 0x20 + addi.d B0, B0, 0x08 + + xvfmadd.d D0, U0, U4, D0 +.endm + +.macro KERNEL8x4x1 +.rept 4 + KERNEL2x4x1 +.endr +.endm + +.macro KERNEL8x4x1_END +.rept 3 + KERNEL2x4x1 +.endr + KERNEL2x4x1_END +.endm + +.macro KERNEL2x2x1 + xvld U0, A0, 0x00 + xvfmadd.d D0, U8, U12, D0 + xvldrepl.d U4, B0, 0x00 + + addi.d A0, A0, 0x10 + addi.d B0, B0, 0x08 + + xvld U8, A0, 0x00 + xvfmadd.d D0, U0, U4, D0 + xvldrepl.d U12, B0, 0x00 + + addi.d A0, A0, 0x10 + addi.d B0, B0, 0x08 +.endm + +.macro KERNEL2x2x1_END + xvld U0, A0, 0x00 + xvfmadd.d D0, U8, U12, D0 + xvldrepl.d U4, B0, 0x00 + + addi.d A0, A0, 0x10 + addi.d B0, B0, 0x08 + + xvfmadd.d D0, U0, U4, D0 +.endm + +.macro KERNEL8x2x1 +.rept 4 + KERNEL2x2x1 +.endr +.endm + +.macro KERNEL8x2x1_END +.rept 3 + KERNEL2x2x1 +.endr + KERNEL2x2x1_END +.endm + +.macro KERNEL2x1x1 + xvld U0, A0, 0x00 + xvfmadd.d D0, U8, U12, D0 + xvldrepl.d U4, B0, 0x00 + + addi.d A0, A0, 0x08 + addi.d B0, B0, 0x08 + + xvld U8, A0, 0x00 + xvfmadd.d D0, U0, U4, D0 + xvldrepl.d U12, B0, 0x00 + + addi.d A0, A0, 0x08 + addi.d B0, B0, 0x08 +.endm + +.macro KERNEL2x1x1_END + xvld U0, A0, 0x00 + xvfmadd.d D0, U8, U12, D0 + xvldrepl.d U4, B0, 0x00 + + addi.d A0, A0, 0x08 + addi.d B0, B0, 0x08 + + xvfmadd.d D0, U0, U4, D0 +.endm + +.macro KERNEL8x1x1 +.rept 4 + KERNEL2x1x1 +.endr +.endm + +.macro KERNEL8x1x1_END +.rept 3 + KERNEL2x1x1 +.endr + KERNEL2x1x1_END +.endm + +.macro dgemm_16x4 +.L_dgemm_16x4: // See dgemm_kernel_16x4.S + xvld U0, A0, 0x00 + xvld U1, A0, 0x20 + xvld U2, A0, 0x40 + xvld U3, A0, 0x60 + + xvldrepl.d U4, B0, 0x00 + /* line 1 */ + xvfmul.d D0, U0, U4 + xvfmul.d D1, U1, U4 + xvfmul.d D2, U2, U4 + xvfmul.d D3, U3, U4 + + xvldrepl.d U5, B0, 0x08 + /* line 2 */ + xvfmul.d D4, U0, U5 + xvfmul.d D5, U1, U5 + xvfmul.d D6, U2, U5 + xvfmul.d D7, U3, U5 + + xvldrepl.d U6, B0, 0x10 + /* line 3 */ + xvfmul.d D8, U0, U6 + xvfmul.d D9, U1, U6 + xvfmul.d D10, U2, U6 + xvfmul.d D11, U3, U6 + + xvldrepl.d U7, B0, 0x18 + /* line 4 */ + xvfmul.d D12, U0, U7 + xvfmul.d D13, U1, U7 + xvfmul.d D14, U2, U7 + xvfmul.d D15, U3, U7 + + /* Add stride for A0 and B0 */ + PTR_ADDI A0, A0, 0x80 + PTR_ADDI B0, B0, 0x20 + /* Reduce L */ + PTR_ADDI L, L, -1 + PTR_SRAI TL, L, 3 /* TL = (L-1) >> 3 */ + /* if (TL < 1) goto L_L7 */ + beq ZERO,TL, .L_dgemm_16x4_L7 + + xvld U8, A0, 0x00 + xvld U9, A0, 0x20 + xvld U10, A0, 0x40 + xvld U11, A0, 0x60 + + PTR_ADDI TL, TL, -1 + + xvldrepl.d U12, B0, 0x00 + xvldrepl.d U13, B0, 0x08 + xvldrepl.d U14, B0, 0x10 + xvldrepl.d U15, B0, 0x18 + PTR_ADDI A0, A0, 0x80 + PTR_ADDI B0, B0, 0x20 + + beq ZERO, TL, .L_dgemm_16x4_TL1_END +.align 5 +.L_dgemm_16x4_TL1: + KERNEL8x16x4 + PTR_ADDI TL, TL, -1 + blt ZERO, TL, .L_dgemm_16x4_TL1 +.L_dgemm_16x4_TL1_END: + KERNEL8x16x4_END +.L_dgemm_16x4_L7: + andi TL, L, 7 + beq TL, ZERO, .L_dgemm_16x4_L0 +.align 5 +.L_dgemm_16x4_L71: + xvld U0, A0, 0x00 + xvld U1, A0, 0x20 + xvld U2, A0, 0x40 + xvld U3, A0, 0x60 + + xvldrepl.d U4, B0, 0x00 + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D1, U1, U4, D1 + xvfmadd.d D2, U2, U4, D2 + xvfmadd.d D3, U3, U4, D3 + + xvldrepl.d U5, B0, 0x08 + xvfmadd.d D4, U0, U5, D4 + xvfmadd.d D5, U1, U5, D5 + xvfmadd.d D6, U2, U5, D6 + xvfmadd.d D7, U3, U5, D7 + + xvldrepl.d U6, B0, 0x10 + xvfmadd.d D8, U0, U6, D8 + xvfmadd.d D9, U1, U6, D9 + xvfmadd.d D10, U2, U6, D10 + xvfmadd.d D11, U3, U6, D11 + + xvldrepl.d U7, B0, 0x18 + xvfmadd.d D12, U0, U7, D12 + xvfmadd.d D13, U1, U7, D13 + xvfmadd.d D14, U2, U7, D14 + xvfmadd.d D15, U3, U7, D15 + + PTR_ADDI A0, A0, 0x80 + PTR_ADDI B0, B0, 0x20 + + PTR_ADDI TL, TL, -1 + blt ZERO,TL, .L_dgemm_16x4_L71 +.L_dgemm_16x4_L0: + // Load C + GLD xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60 + GLD xv, , U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60 + GLD xv, , U8, C2, 0x00, U9, C2, 0x20, U10, C2, 0x40, U11, C2, 0x60 + GLD xv, , U12, C3, 0x00, U13, C3, 0x20, U14, C3, 0x40, U15, C3, 0x60 + GSUB xvf, d, U0, U0, D0, U1, U1, D1, U2, U2, D2, U3, U3, D3, \ + U4, U4, D4, U5, U5, D5, U6, U6, D6, U7, U7, D7, \ + U8, U8, D8, U9, U9, D9, U10, U10, D10, U11, U11, D11, \ + U12, U12, D12, U13, U13, D13, U14, U14, D14, U15, U15, D15 +.endm + +.macro dgemm_1x4 +.L_dgemm_1x4: // See dgemm_kernel_16x4.S + xvldrepl.d U0, A0, 0x00 + xvld U4, B0, 0x00 + xvfmul.d D0, U0, U4 + + /* Add stride for A0 and B0 */ + PTR_ADDI A0, A0, 0x08 + PTR_ADDI B0, B0, 0x20 + /* Reduce L */ + PTR_ADDI L, L, -1 + PTR_SRAI TL, L, 3 /* TL = (L-1) >> 3 */ + /* if (TL < 1) goto L_M1_L7 */ + beq ZERO,TL, .L_dgemm_1x4_M1_L7 + xvldrepl.d U8, A0, 0x00 + + PTR_ADDI TL, TL, -1 + xvld U12, B0, 0x00 + PTR_ADDI A0, A0, 0x08 + PTR_ADDI B0, B0, 0x20 + + beq ZERO, TL, .L_dgemm_1x4_M1_TL1_END +.align 5 +.L_dgemm_1x4_M1_TL1: + KERNEL8x1x4 + PTR_ADDI TL, TL, -1 + blt ZERO,TL, .L_dgemm_1x4_M1_TL1 +.L_dgemm_1x4_M1_TL1_END: + KERNEL8x1x4_END +.L_dgemm_1x4_M1_L7: + /* if (!(L & 7)) goto L_M1_L0 */ + andi TL, L, 7 + beq TL, ZERO,.L_dgemm_1x4_M1_L0 +.align 5 +.L_dgemm_1x4_M1_L71: + xvldrepl.d U0, A0, 0x00 + xvld U4, B0, 0x00 + xvfmadd.d D0, U0, U4, D0 + + /* Add stride for A0, B0 */ + PTR_ADDI A0, A0, 0x08 + PTR_ADDI B0, B0, 0x20 + + PTR_ADDI TL, TL, -1 + blt ZERO,TL, .L_dgemm_1x4_M1_L71 +.L_dgemm_1x4_M1_L0: + // Load C + fld.d $f0, C0, 0x00 + fld.d $f1, C1, 0x00 + fld.d $f2, C2, 0x00 + fld.d $f3, C3, 0x00 + xvinsve0.d U0, U1, 0x01 + xvinsve0.d U0, U2, 0x02 + xvinsve0.d U0, U3, 0x03 + GSUB xvf, d, U0, U0, D0 +.endm + +.macro dgemm_2x4 +.L_dgemm_2x4: + /* Load 2 * 64 from A0 */ + xvldrepl.d U0, A0, 0x00 + xvldrepl.d U1, A0, 0x08 + xvld U4, B0, 0x00 + xvfmul.d D0, U0, U4 + xvfmul.d D1, U1, U4 + /* Add stride for A0 and B0 */ + PTR_ADDI A0, A0, 0x10 + PTR_ADDI B0, B0, 0x20 + /* Reduce L */ + PTR_ADDI L, L, -1 + PTR_SRAI TL, L, 3 /* TL = (L-1) >> 3 */ + /* if (TL < 1) goto L_M2_L7 */ + beq ZERO,TL, .L_dgemm_2x4_M2_L7 + + xvldrepl.d U8, A0, 0x00 + xvldrepl.d U9, A0, 0x08 + + PTR_ADDI TL, TL, -1 + + xvld U12, B0, 0x00 + PTR_ADDI A0, A0, 0x10 + PTR_ADDI B0, B0, 0x20 + + beq ZERO, TL, .L_dgemm_2x4_M2_TL1_END +.align 5 +.L_dgemm_2x4_M2_TL1: + KERNEL8x2x4 + + PTR_ADDI TL, TL, -1 /* TL-- */ + blt ZERO,TL, .L_dgemm_2x4_M2_TL1 +.L_dgemm_2x4_M2_TL1_END: + KERNEL8x2x4_END + +.L_dgemm_2x4_M2_L7: + /* if (!(L & 7)) goto L_M2_L0 */ + andi TL, L, 7 + beq TL, ZERO,.L_dgemm_2x4_M2_L0 +.align 5 +.L_dgemm_2x4_M2_L71: + xvldrepl.d U0, A0, 0x00 + xvldrepl.d U1, A0, 0x08 + + xvld U4, B0, 0x00 + + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D1, U1, U4, D1 + /* Add stride for A0, B0 */ + PTR_ADDI A0, A0, 0x10 + PTR_ADDI B0, B0, 0x20 + + PTR_ADDI TL, TL, -1 + blt ZERO,TL, .L_dgemm_2x4_M2_L71 +.L_dgemm_2x4_M2_L0: + xvpackev.d D4, D1, D0 + xvpackod.d D5, D1, D0 + /* Load C0 */ + xvld U0, C0, 0x00 + /* Load C1 */ + xvld U1, C1, 0x00 + /* Load C2 */ + xvld U2, C2, 0x00 + /* Load C3 */ + xvld U3, C3, 0x00 + + xvpermi.q U0, U2, 0x02 + xvpermi.q U1, U3, 0x02 + + GSUB xvf, d, U0, U0, D4, U1, U1, D5 +.endm + +.macro dgemm_4x4 +.L_dgemm_4x4: + /* Load 4 * 64 from A0 */ + xvld U0, A0, 0x00 + + xvldrepl.d U4, B0, 0x00 + /* line 1 */ + xvfmul.d D0, U0, U4 + + xvldrepl.d U5, B0, 0x08 + /* line 2 */ + xvfmul.d D4, U0, U5 + + xvldrepl.d U6, B0, 0x10 + /* line 3 */ + xvfmul.d D8, U0, U6 + + xvldrepl.d U7, B0, 0x18 + /* line 4 */ + xvfmul.d D12, U0, U7 + + /* Add stride for A0 and B0 */ + PTR_ADDI A0, A0, 0x20 + PTR_ADDI B0, B0, 0x20 + /* Reduce L */ + PTR_ADDI L, L, -1 + PTR_SRAI TL, L, 3 /* TL = (L-1) >> 3 */ + /* if (TL < 1) goto L_M4_L7 */ + beq ZERO,TL, .L_dgemm_4x4_M4_L7 + + xvld U8, A0, 0x00 + + PTR_ADDI TL, TL, -1 + + xvldrepl.d U12, B0, 0x00 + xvldrepl.d U13, B0, 0x08 + xvldrepl.d U14, B0, 0x10 + xvldrepl.d U15, B0, 0x18 + PTR_ADDI A0, A0, 0x20 + PTR_ADDI B0, B0, 0x20 + + beq ZERO, TL, .L_dgemm_4x4_M4_TL1_END +.align 5 +.L_dgemm_4x4_M4_TL1: /* TL-- */ + KERNEL8x4x4 + + PTR_ADDI TL, TL, -1 + blt ZERO,TL, .L_dgemm_4x4_M4_TL1 +.L_dgemm_4x4_M4_TL1_END: + KERNEL8x4x4_END +.L_dgemm_4x4_M4_L7: + /* if (!(L & 7)) goto L_M4_L0 */ + andi TL, L, 7 + beq TL, ZERO,.L_dgemm_4x4_M4_L0 +.align 5 +.L_dgemm_4x4_M4_L71: + xvld U0, A0, 0x00 + + xvldrepl.d U4, B0, 0x00 + xvfmadd.d D0, U0, U4, D0 + + xvldrepl.d U4, B0, 0x08 + xvfmadd.d D4, U0, U4, D4 + + xvldrepl.d U4, B0, 0x10 + xvfmadd.d D8, U0, U4, D8 + + xvldrepl.d U4, B0, 0x18 + xvfmadd.d D12, U0, U4, D12 + + /* Add stride for A0, B0 */ + PTR_ADDI A0, A0, 0x20 + PTR_ADDI B0, B0, 0x20 + + PTR_ADDI TL, TL, -1 + blt ZERO,TL, .L_dgemm_4x4_M4_L71 + .L_dgemm_4x4_M4_L0: + /* Load C0 */ + xvld U0, C0, 0x00 + /* Load C1 */ + xvld U1, C1, 0x00 + /* Load C2 */ + xvld U2, C2, 0x00 + /* Load C3 */ + xvld U3, C3, 0x00 + + GSUB xvf, d, U0, U0, D0, U1, U1, D4, U2, U2, D8, U3, U3, D12 +.endm + +.macro dgemm_8x4 +.L_dgemm_8x4: + /* Load 8 * 64 from A0 */ + xvld U0, A0, 0x00 + xvld U1, A0, 0x20 + + xvldrepl.d U4, B0, 0x00 + /* line 1 */ + xvfmul.d D0, U0, U4 + xvfmul.d D1, U1, U4 + + xvldrepl.d U5, B0, 0x08 + /* line 2 */ + xvfmul.d D4, U0, U5 + xvfmul.d D5, U1, U5 + + xvldrepl.d U6, B0, 0x10 + /* line 3 */ + xvfmul.d D8, U0, U6 + xvfmul.d D9, U1, U6 + + xvldrepl.d U7, B0, 0x18 + /* line 4 */ + xvfmul.d D12, U0, U7 + xvfmul.d D13, U1, U7 + + /* Add stride for A0 and B0 */ + PTR_ADDI A0, A0, 0x40 + PTR_ADDI B0, B0, 0x20 + /* Reduce L */ + PTR_ADDI L, L, -1 + PTR_SRAI TL, L, 3 /* TL = (L-1) >> 3 */ + /* if (TL < 1) goto L_M8_L7 */ + beq ZERO,TL, .L_dgemm_8x4_M8_L7 + + xvld U8, A0, 0x00 + xvld U9, A0, 0x20 + + PTR_ADDI TL, TL, -1 + + xvldrepl.d U12, B0, 0x00 + xvldrepl.d U13, B0, 0x08 + xvldrepl.d U14, B0, 0x10 + xvldrepl.d U15, B0, 0x18 + PTR_ADDI A0, A0, 0x40 + PTR_ADDI B0, B0, 0x20 + + beq ZERO, TL, .L_dgemm_8x4_M8_TL1_END +.align 5 +.L_dgemm_8x4_M8_TL1: /* TL-- */ + KERNEL8x8x4 + + PTR_ADDI TL, TL, -1 /* TL-- */ + blt ZERO,TL, .L_dgemm_8x4_M8_TL1 + +.L_dgemm_8x4_M8_TL1_END: + KERNEL8x8x4_END + +.L_dgemm_8x4_M8_L7: + /* if (!(L & 7)) goto L_M8_L0 */ + andi TL, L, 7 + beq TL, ZERO,.L_dgemm_8x4_M8_L0 +.align 5 +.L_dgemm_8x4_M8_L71: + xvld U0, A0, 0x00 + xvld U1, A0, 0x20 + + xvldrepl.d U4, B0, 0x00 + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D1, U1, U4, D1 + + xvldrepl.d U5, B0, 0x08 + xvfmadd.d D4, U0, U5, D4 + xvfmadd.d D5, U1, U5, D5 + + xvldrepl.d U6, B0, 0x10 + xvfmadd.d D8, U0, U6, D8 + xvfmadd.d D9, U1, U6, D9 + + xvldrepl.d U7, B0, 0x18 + xvfmadd.d D12, U0, U7, D12 + xvfmadd.d D13, U1, U7, D13 + + /* Add stride for A0, B0 */ + PTR_ADDI A0, A0, 0x40 + PTR_ADDI B0, B0, 0x20 + + PTR_ADDI TL, TL, -1 + blt ZERO,TL, .L_dgemm_8x4_M8_L71 +.L_dgemm_8x4_M8_L0: + /* Load C0 */ + xvld U0, C0, 0x00 + xvld U1, C0, 0x20 + + /* Load C1 */ + xvld U2, C1, 0x00 + xvld U3, C1, 0x20 + + /* Load C2 */ + xvld U4, C2, 0x00 + xvld U5, C2, 0x20 + + /* Load C3 */ + xvld U6, C3, 0x00 + xvld U7, C3, 0x20 + + GSUB xvf, d, U0, U0, D0, U1, U1, D1, \ + U2, U2, D4, U3, U3, D5, \ + U4, U4, D8, U5, U5, D9, \ + U6, U6, D12, U7, U7, D13 +.endm + +.macro dgemm_4x2 +.L_dgemm_4x2: + /* Load 4 * 64 from A0 */ + xvld U0, A0, 0x00 + + xvldrepl.d U4, B0, 0x00 + /* line 1 */ + xvfmul.d D0, U0, U4 + + xvldrepl.d U5, B0, 0x08 + /* line 2 */ + xvfmul.d D4, U0, U5 + + /* Add stride for A0 and B0 */ + PTR_ADDI A0, A0, 0x20 + PTR_ADDI B0, B0, 0x10 + /* Reduce L */ + PTR_ADDI L, L, -1 + PTR_SRAI TL, L, 3 /* TL = (L-1) >> 3 */ + /* if (TL < 1) goto L_dgemm_4x2_N3_M4_L7 */ + beq ZERO,TL, .L_dgemm_4x2_N3_M4_L7 + + xvld U8, A0, 0x00 + + PTR_ADDI TL, TL, -1 + + xvldrepl.d U12, B0, 0x00 + xvldrepl.d U13, B0, 0x08 + PTR_ADDI A0, A0, 0x20 + PTR_ADDI B0, B0, 0x10 + + beq ZERO, TL, .L_dgemm_4x2_N3_M4_TL1_END +.align 5 +.L_dgemm_4x2_N3_M4_TL1: /* TL-- */ + KERNEL8x4x2 + + PTR_ADDI TL, TL, -1 /* TL-- */ + blt ZERO,TL, .L_dgemm_4x2_N3_M4_TL1 +.L_dgemm_4x2_N3_M4_TL1_END: + KERNEL8x4x2_END + +.L_dgemm_4x2_N3_M4_L7: + /* if (!(L & 7)) goto L_dgemm_4x2_N3_M4_L0 */ + andi TL, L, 7 + beq TL, ZERO,.L_dgemm_4x2_N3_M4_L0 +.align 5 +.L_dgemm_4x2_N3_M4_L71: + xvld U0, A0, 0x00 + + xvldrepl.d U4, B0, 0x00 + xvfmadd.d D0, U0, U4, D0 + + xvldrepl.d U5, B0, 0x08 + xvfmadd.d D4, U0, U5, D4 + + /* Add stride for A0, B0 */ + PTR_ADDI A0, A0, 0x20 + PTR_ADDI B0, B0, 0x10 + + PTR_ADDI TL, TL, -1 + blt ZERO,TL, .L_dgemm_4x2_N3_M4_L71 + +.L_dgemm_4x2_N3_M4_L0: + /* Load C0 */ + xvld U0, C0, 0x00 + /* Load C1 */ + xvld U1, C1, 0x00 + GSUB xvf, d, U0, U0, D0, U1, U1, D4 +.endm + +.macro dgemm_2x2 +.L_dgemm_2x2: + /* Load 2 * 64 from A0 */ + xvld U0, A0, 0x00 + + xvldrepl.d U4, B0, 0x00 + /* line 1 */ + xvfmul.d D0, U0, U4 + + xvldrepl.d U4, B0, 0x08 + /* line 2 */ + xvfmul.d D4, U0, U4 + + /* Add stride for A0 and B0 */ + PTR_ADDI A0, A0, 0x10 + PTR_ADDI B0, B0, 0x10 + /* Reduce L */ + PTR_ADDI L, L, -1 + PTR_SRAI TL, L, 3 /* TL = (L-1) >> 3 */ + /* if (TL < 1) goto L_dgemm_2x2_N3_M2_L7 */ + beq ZERO,TL, .L_dgemm_2x2_N3_M2_L7 + + xvld U8, A0, 0x00 + + PTR_ADDI TL, TL, -1 + + xvldrepl.d U12, B0, 0x00 + xvldrepl.d U13, B0, 0x08 + PTR_ADDI A0, A0, 0x10 + PTR_ADDI B0, B0, 0x10 + + beq ZERO, TL, .L_dgemm_2x2_N3_M2_TL1_END +.align 5 +.L_dgemm_2x2_N3_M2_TL1: /* TL-- */ + KERNEL8x2x2 + + PTR_ADDI TL, TL, -1 /* TL-- */ + blt ZERO,TL, .L_dgemm_2x2_N3_M2_TL1 +.L_dgemm_2x2_N3_M2_TL1_END: + KERNEL8x2x2_END + +.L_dgemm_2x2_N3_M2_L7: + /* if (!(L & 7)) goto L_dgemm_2x2_N3_M2_L0 */ + andi TL, L, 7 + beq TL, ZERO,.L_dgemm_2x2_N3_M2_L0 +.align 5 +.L_dgemm_2x2_N3_M2_L71: + xvld U0, A0, 0x00 + + xvldrepl.d U4, B0, 0x00 + xvfmadd.d D0, U0, U4, D0 + + xvldrepl.d U5, B0, 0x08 + xvfmadd.d D4, U0, U5, D4 + + /* Add stride for A0, B0 */ + PTR_ADDI A0, A0, 0x10 + PTR_ADDI B0, B0, 0x10 + + PTR_ADDI TL, TL, -1 + blt ZERO,TL, .L_dgemm_2x2_N3_M2_L71 +.L_dgemm_2x2_N3_M2_L0: + /* Load C0 */ + xvld U0, C0, 0x00 + /* Load C1 */ + xvld U1, C1, 0x00 + GSUB xvf, d, U0, U0, D0, U1, U1, D4 +.endm + +.macro dgemm_8x2 +.L_dgemm_8x2: + /* Load 8 * 64 from A0 */ + xvld U0, A0, 0x00 + xvld U1, A0, 0x20 + + xvldrepl.d U4, B0, 0x00 + /* line 1 */ + xvfmul.d D0, U0, U4 + xvfmul.d D1, U1, U4 + + xvldrepl.d U5, B0, 0x08 + /* line 2 */ + xvfmul.d D4, U0, U5 + xvfmul.d D5, U1, U5 + + /* Add stride for A0 and B0 */ + PTR_ADDI A0, A0, 0x40 + PTR_ADDI B0, B0, 0x10 + /* Reduce L */ + PTR_ADDI L, L, -1 + PTR_SRAI TL, L, 3 /* TL = (L-1) >> 3 */ + /* if (TL < 1) goto L_dgemm_8x2_N3_M8_L7 */ + beq ZERO,TL, .L_dgemm_8x2_N3_M8_L7 + + xvld U8, A0, 0x00 + xvld U9, A0, 0x20 + + PTR_ADDI TL, TL, -1 + + xvldrepl.d U12, B0, 0x00 + xvldrepl.d U13, B0, 0x08 + PTR_ADDI A0, A0, 0x40 + PTR_ADDI B0, B0, 0x10 + + beq ZERO, TL, .L_dgemm_8x2_N3_M8_TL1_END +.align 5 +.L_dgemm_8x2_N3_M8_TL1: /* TL-- */ + KERNEL8x8x2 + + PTR_ADDI TL, TL, -1 /* TL-- */ + blt ZERO,TL, .L_dgemm_8x2_N3_M8_TL1 +.L_dgemm_8x2_N3_M8_TL1_END: + KERNEL8x8x2_END + +.L_dgemm_8x2_N3_M8_L7: + /* if (!(L & 7)) goto L_dgemm_8x2_N3_M8_L0 */ + andi TL, L, 7 + beq TL, ZERO,.L_dgemm_8x2_N3_M8_L0 +.align 5 +.L_dgemm_8x2_N3_M8_L71: + xvld U0, A0, 0x00 + xvld U1, A0, 0x20 + + xvldrepl.d U4, B0, 0x00 + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D1, U1, U4, D1 + + xvldrepl.d U5, B0, 0x08 + xvfmadd.d D4, U0, U5, D4 + xvfmadd.d D5, U1, U5, D5 + + /* Add stride for A0, B0 */ + PTR_ADDI A0, A0, 0x40 + PTR_ADDI B0, B0, 0x10 + + PTR_ADDI TL, TL, -1 + blt ZERO,TL, .L_dgemm_8x2_N3_M8_L71 + +.L_dgemm_8x2_N3_M8_L0: + /* Load C0 */ + xvld U0, C0, 0x00 + xvld U1, C0, 0x20 + /* Load C1 */ + xvld U2, C1, 0x00 + xvld U3, C1, 0x20 + GSUB xvf, d, U0, U0, D0, U1, U1, D1, U2, U2, D4, U3, U3, D5 +.endm + +.macro dgemm_16x2 +.L_dgemm_16x2: + /* Load 16 * 64 from A0 + * U0 = {a3, a2, a1, a0} + * U1 = {a7, a6, a5, a4} + * U2 = {a11, a10, a9, a8} + * U3 = {a15, a14, a13, a12} + */ + xvld U0, A0, 0x00 + xvld U1, A0, 0x20 + xvld U2, A0, 0x40 + xvld U3, A0, 0x60 + + xvldrepl.d U4, B0, 0x00 + /* line 1 */ + xvfmul.d D0, U0, U4 + xvfmul.d D1, U1, U4 + xvfmul.d D2, U2, U4 + xvfmul.d D3, U3, U4 + + xvldrepl.d U5, B0, 0x08 + /* line 2 */ + xvfmul.d D4, U0, U5 + xvfmul.d D5, U1, U5 + xvfmul.d D6, U2, U5 + xvfmul.d D7, U3, U5 + + /* Add stride for A0 and B0 */ + PTR_ADDI A0, A0, 0x80 + PTR_ADDI B0, B0, 0x10 + /* Reduce L */ + PTR_ADDI L, L, -1 + PTR_SRAI TL, L, 3 /* TL = (L-1) >> 3 */ + /* if (TL < 1) goto L_N3_L7 */ + beq ZERO,TL, .L_dgemm_16x2_N3_L7 + + xvld U8, A0, 0x00 + xvld U9, A0, 0x20 + xvld U10, A0, 0x40 + xvld U11, A0, 0x60 + + PTR_ADDI TL, TL, -1 + + xvldrepl.d U12, B0, 0x00 + xvldrepl.d U13, B0, 0x08 + PTR_ADDI A0, A0, 0x80 + PTR_ADDI B0, B0, 0x10 + + beq ZERO, TL, .L_dgemm_16x2_N3_TL1_END +.align 5 +.L_dgemm_16x2_N3_TL1: /* TL-- */ + KERNEL8x16x2 + + PTR_ADDI TL, TL, -1 /* TL-- */ + blt ZERO,TL, .L_dgemm_16x2_N3_TL1 +.L_dgemm_16x2_N3_TL1_END: + KERNEL8x16x2_END + +.L_dgemm_16x2_N3_L7: + /* if (!(L & 7)) goto L_dgemm_16x2_N3_L0 */ + andi TL, L, 7 + beq TL, ZERO,.L_dgemm_16x2_N3_L0 +.align 5 +.L_dgemm_16x2_N3_L71: + /* Load 16 * 64 from A0 */ + xvld U0, A0, 0x00 + xvld U1, A0, 0x20 + xvld U2, A0, 0x40 + xvld U3, A0, 0x60 + + xvldrepl.d U4, B0, 0x00 + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D1, U1, U4, D1 + xvfmadd.d D2, U2, U4, D2 + xvfmadd.d D3, U3, U4, D3 + + xvldrepl.d U5, B0, 0x08 + xvfmadd.d D4, U0, U5, D4 + xvfmadd.d D5, U1, U5, D5 + xvfmadd.d D6, U2, U5, D6 + xvfmadd.d D7, U3, U5, D7 + /* Add stride for A0, B0 */ + PTR_ADDI A0, A0, 0x80 + PTR_ADDI B0, B0, 0x10 + + PTR_ADDI TL, TL, -1 + blt ZERO,TL, .L_dgemm_16x2_N3_L71 + +.L_dgemm_16x2_N3_L0: + /* Load C0 */ + xvld U0, C0, 0x00 + xvld U1, C0, 0x20 + xvld U2, C0, 0x40 + xvld U3, C0, 0x60 + /* Load C1 */ + xvld U4, C1, 0x00 + xvld U5, C1, 0x20 + xvld U6, C1, 0x40 + xvld U7, C1, 0x60 + GSUB xvf, d, U0, U0, D0, U1, U1, D1, U2, U2, D2, U3, U3, D3, \ + U4, U4, D4, U5, U5, D5, U6, U6, D6, U7, U7, D7 +.endm + +.macro dgemm_2x1 +.L_dgemm_2x1: + /* Load 2 * 64 from A0 */ + xvld U0, A0, 0x00 + + xvldrepl.d U4, B0, 0x00 + /* line 1 */ + xvfmul.d D0, U0, U4 + + /* Add stride for A0 and B0 */ + PTR_ADDI A0, A0, 0x10 + PTR_ADDI B0, B0, 0x08 + /* Reduce L */ + PTR_ADDI L, L, -1 + PTR_SRAI TL, L, 3 /* TL = (L-1) >> 3 */ + /* if (TL < 1) goto L_dgemm_2x1_N1_M2_L7 */ + beq ZERO,TL, .L_dgemm_2x1_N1_M2_L7 + + xvld U8, A0, 0x00 + + PTR_ADDI TL, TL, -1 + + xvldrepl.d U12, B0, 0x00 + PTR_ADDI A0, A0, 0x10 + PTR_ADDI B0, B0, 0x08 + + beq ZERO, TL, .L_dgemm_2x1_N1_M2_TL1_END +.align 5 +.L_dgemm_2x1_N1_M2_TL1: /* TL-- */ + KERNEL8x2x1 + + PTR_ADDI TL, TL, -1 /* TL-- */ + blt ZERO,TL, .L_dgemm_2x1_N1_M2_TL1 +.L_dgemm_2x1_N1_M2_TL1_END: + KERNEL8x2x1_END + +.L_dgemm_2x1_N1_M2_L7: + /* if (!(L & 7)) goto L_dgemm_2x1_N1_M2_L0 */ + andi TL, L, 7 + beq TL, ZERO,.L_dgemm_2x1_N1_M2_L0 +.align 5 +.L_dgemm_2x1_N1_M2_L71: + xvld U0, A0, 0x00 + + xvldrepl.d U4, B0, 0x00 + xvfmadd.d D0, U0, U4, D0 + + /* Add stride for A0, B0 */ + PTR_ADDI A0, A0, 0x10 + PTR_ADDI B0, B0, 0x08 + + PTR_ADDI TL, TL, -1 + blt ZERO,TL, .L_dgemm_2x1_N1_M2_L71 +.L_dgemm_2x1_N1_M2_L0: + /* Load C0 */ + xvld U0, C0, 0x00 + GSUB xvf, d, U0, U0, D0 +.endm + +.macro dgemm_4x1 +.L_dgemm_4x1: + /* Load 4 * 64 from A0 */ + xvld U0, A0, 0x00 + + xvldrepl.d U4, B0, 0x00 + /* line 1 */ + xvfmul.d D0, U0, U4 + + /* Add stride for A0 and B0 */ + PTR_ADDI A0, A0, 0x20 + PTR_ADDI B0, B0, 0x08 + /* Reduce L */ + PTR_ADDI L, L, -1 + PTR_SRAI TL, L, 3 /* TL = (L-1) >> 3 */ + /* if (TL < 1) goto L_dgemm_4x1_N1_M4_L7 */ + beq ZERO,TL, .L_dgemm_4x1_N1_M4_L7 + + xvld U8, A0, 0x00 + + PTR_ADDI TL, TL, -1 + + xvldrepl.d U12, B0, 0x00 + PTR_ADDI A0, A0, 0x20 + PTR_ADDI B0, B0, 0x08 + + beq ZERO, TL, .L_dgemm_4x1_N1_M4_TL1_END +.align 5 +.L_dgemm_4x1_N1_M4_TL1: /* TL-- */ + KERNEL8x4x1 + + PTR_ADDI TL, TL, -1 /* TL-- */ + blt ZERO,TL, .L_dgemm_4x1_N1_M4_TL1 +.L_dgemm_4x1_N1_M4_TL1_END: + KERNEL8x4x1_END + +.L_dgemm_4x1_N1_M4_L7: + /* if (!(L & 7)) goto L_dgemm_4x1_N1_M4_L0 */ + andi TL, L, 7 + beq TL, ZERO,.L_dgemm_4x1_N1_M4_L0 +.align 5 +.L_dgemm_4x1_N1_M4_L71: + xvld U0, A0, 0x00 + + xvldrepl.d U4, B0, 0x00 + xvfmadd.d D0, U0, U4, D0 + + /* Add stride for A0, B0 */ + PTR_ADDI A0, A0, 0x20 + PTR_ADDI B0, B0, 0x08 + + PTR_ADDI TL, TL, -1 + blt ZERO,TL, .L_dgemm_4x1_N1_M4_L71 +.L_dgemm_4x1_N1_M4_L0: + /* Load C0 */ + xvld U0, C0, 0x00 + GSUB xvf, d, U0, U0, D0 +.endm + +.macro dgemm_8x1 +.L_dgemm_8x1: + /* Load 8 * 64 from A0 */ + xvld U0, A0, 0x00 + xvld U1, A0, 0x20 + + xvldrepl.d U4, B0, 0x00 + /* line 1 */ + xvfmul.d D0, U0, U4 + xvfmul.d D1, U1, U4 + + /* Add stride for A0 and B0 */ + PTR_ADDI A0, A0, 0x40 + PTR_ADDI B0, B0, 0x08 + /* Reduce L */ + PTR_ADDI L, L, -1 + PTR_SRAI TL, L, 3 /* TL = (L-1) >> 3 */ + /* if (TL < 1) goto L_dgemm_8x1_N1_M8_L7 */ + beq ZERO,TL, .L_dgemm_8x1_N1_M8_L7 + + xvld U8, A0, 0x00 + xvld U9, A0, 0x20 + + PTR_ADDI TL, TL, -1 + + xvldrepl.d U12, B0, 0x00 + PTR_ADDI A0, A0, 0x40 + PTR_ADDI B0, B0, 0x08 + + beq ZERO, TL, .L_dgemm_8x1_N1_M8_TL1_END +.align 5 +.L_dgemm_8x1_N1_M8_TL1: /* TL-- */ + KERNEL8x8x1 + + PTR_ADDI TL, TL, -1 /* TL-- */ + blt ZERO,TL, .L_dgemm_8x1_N1_M8_TL1 + +.L_dgemm_8x1_N1_M8_TL1_END: + KERNEL8x8x1_END + +.L_dgemm_8x1_N1_M8_L7: + /* if (!(L & 7)) goto L_dgemm_8x1_N1_M8_L0 */ + andi TL, L, 7 + beq TL, ZERO,.L_dgemm_8x1_N1_M8_L0 +.align 5 +.L_dgemm_8x1_N1_M8_L71: + xvld U0, A0, 0x00 + xvld U1, A0, 0x20 + + xvldrepl.d U4, B0, 0x00 + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D1, U1, U4, D1 + + /* Add stride for A0, B0 */ + PTR_ADDI A0, A0, 0x40 + PTR_ADDI B0, B0, 0x08 + + PTR_ADDI TL, TL, -1 + blt ZERO,TL, .L_dgemm_8x1_N1_M8_L71 +.L_dgemm_8x1_N1_M8_L0: + /* Load C0 */ + xvld U0, C0, 0x00 + xvld U1, C0, 0x20 + GSUB xvf, d, U0, U0, D0, U1, U1, D1 +.endm + +.macro dgemm_16x1 +.L_dgemm_16x1: + /* Load 16 * 64 from A0 + * U0 = {a3, a2, a1, a0} + * U1 = {a7, a6, a5, a4} + * U2 = {a11, a10, a9, a8} + * U3 = {a15, a14, a13, a12} + */ + xvld U0, A0, 0x00 + xvld U1, A0, 0x20 + xvld U2, A0, 0x40 + xvld U3, A0, 0x60 + + xvldrepl.d U4, B0, 0x00 + /* line 1 */ + xvfmul.d D0, U0, U4 + xvfmul.d D1, U1, U4 + xvfmul.d D2, U2, U4 + xvfmul.d D3, U3, U4 + + /* Add stride for A0 and B0 */ + PTR_ADDI A0, A0, 0x80 + PTR_ADDI B0, B0, 0x08 + /* Reduce L */ + PTR_ADDI L, L, -1 + PTR_SRAI TL, L, 3 /* TL = (L-1) >> 3 */ + /* if (TL < 1) goto L_dgemm_16x1_N1_L7 */ + beq ZERO,TL, .L_dgemm_16x1_N1_L7 + + xvld U8, A0, 0x00 + xvld U9, A0, 0x20 + xvld U10, A0, 0x40 + xvld U11, A0, 0x60 + + PTR_ADDI TL, TL, -1 + + xvldrepl.d U12, B0, 0x00 + PTR_ADDI A0, A0, 0x80 + PTR_ADDI B0, B0, 0x08 + + beq ZERO, TL, .L_dgemm_16x1_N1_TL1_END +.align 5 +.L_dgemm_16x1_N1_TL1: /* TL-- */ + KERNEL8x16x1 + + PTR_ADDI TL, TL, -1 /* TL-- */ + blt ZERO,TL, .L_dgemm_16x1_N1_TL1 +.L_dgemm_16x1_N1_TL1_END: + KERNEL8x16x1_END + +.L_dgemm_16x1_N1_L7: + /* if (!(L & 7)) goto L_dgemm_16x1_N1_L0 */ + andi TL, L, 7 + beq TL, ZERO,.L_dgemm_16x1_N1_L0 +.align 5 +.L_dgemm_16x1_N1_L71: + /* Load 16 * 64 from A0 */ + xvld U0, A0, 0x00 + xvld U1, A0, 0x20 + xvld U2, A0, 0x40 + xvld U3, A0, 0x60 + + xvldrepl.d U4, B0, 0x00 + xvfmadd.d D0, U0, U4, D0 + xvfmadd.d D1, U1, U4, D1 + xvfmadd.d D2, U2, U4, D2 + xvfmadd.d D3, U3, U4, D3 + + /* Add stride for A0, B0 */ + PTR_ADDI A0, A0, 0x80 + PTR_ADDI B0, B0, 0x08 + + PTR_ADDI TL, TL, -1 + blt ZERO,TL, .L_dgemm_16x1_N1_L71 +.L_dgemm_16x1_N1_L0: + /* Load C0 */ + xvld U0, C0, 0x00 + xvld U1, C0, 0x20 + xvld U2, C0, 0x40 + xvld U3, C0, 0x60 + GSUB xvf, d, U0, U0, D0, U1, U1, D1, U2, U2, D2, U3, U3, D3 +.endm + +.macro dgemm_1x2 +.L_dgemm_1x2: // See dgemm_kernel_16x4.S + /* Load 1 * 64 from A0 */ + xvld U0, A0, 0x00 + + xvldrepl.d U4, B0, 0x00 + /* line 1 */ + xvfmul.d D0, U0, U4 + + xvldrepl.d U4, B0, 0x08 + /* line 2 */ + xvfmul.d D4, U0, U4 + + /* Add stride for A0 and B0 */ + addi.d A0, A0, 0x08 + addi.d B0, B0, 0x10 + /* Reduce L */ + addi.d L, L, -1 + srai.d TL, L, 3 /* TL = (L-1) >> 3 */ + /* if (TL < 1) goto L_N3_M1_L7 */ + beq ZERO,TL, .L_dgemm_1x2_N3_M1_L7 + + xvld U8, A0, 0x00 + + addi.d TL, TL, -1 + + xvldrepl.d U12, B0, 0x00 + xvldrepl.d U13, B0, 0x08 + addi.d A0, A0, 0x08 + addi.d B0, B0, 0x10 + beq ZERO, TL, .L_dgemm_1x2_N3_M1_TL1_END +.L_dgemm_1x2_N3_M1_TL1: /* TL-- */ + KERNEL8x1x2 + addi.d TL, TL, -1 /* TL-- */ + blt ZERO,TL, .L_dgemm_1x2_N3_M1_TL1 +.L_dgemm_1x2_N3_M1_TL1_END: + KERNEL8x1x2_END +.L_dgemm_1x2_N3_M1_L7: + /* if (!(L & 7)) goto L_dgemm_1x2_N3_M1_L0 */ + andi TL, L, 7 + beq TL, ZERO,.L_dgemm_1x2_N3_M1_L0 +.L_dgemm_1x2_N3_M1_L71: + xvld U0, A0, 0x00 + + xvldrepl.d U4, B0, 0x00 + xvfmadd.d D0, U0, U4, D0 + + xvldrepl.d U5, B0, 0x08 + xvfmadd.d D4, U0, U5, D4 + + /* Add stride for A0, B0 */ + addi.d A0, A0, 0x08 + addi.d B0, B0, 0x10 + + addi.d TL, TL, -1 + blt ZERO,TL, .L_dgemm_1x2_N3_M1_L71 +.L_dgemm_1x2_N3_M1_L0: + xvld U0, C0, 0x00 + xvld U1, C1, 0x00 + xvinsve0.d U0, U1, 0x01 + xvinsve0.d D0, D4, 0x01 + GSUB xvf, d, U0, U0, D0 +.endm + +.macro dgemm_1x1 +.L_dgemm_1x1: + /* Load 1 * 64 from A0 */ + xvld U0, A0, 0x00 + + xvldrepl.d U4, B0, 0x00 + /* line 1 */ + xvfmul.d D0, U0, U4 + + /* Add stride for A0 and B0 */ + addi.d A0, A0, 0x08 + addi.d B0, B0, 0x08 + /* Reduce L */ + addi.d L, L, -1 + srai.d TL, L, 3 /* TL = (L-1) >> 3 */ + /* if (TL < 1) goto L_N1_M1_L7 */ + beq ZERO,TL, .L_N1_M1_L7 + + xvld U8, A0, 0x00 + + addi.d TL, TL, -1 + + xvldrepl.d U12, B0, 0x00 + addi.d A0, A0, 0x08 + addi.d B0, B0, 0x08 + + beq ZERO, TL, .L_N1_M1_TL1_END +.L_N1_M1_TL1: /* TL-- */ + KERNEL8x1x1 + addi.d TL, TL, -1 /* TL-- */ + blt ZERO,TL, .L_N1_M1_TL1 +.L_N1_M1_TL1_END: + KERNEL8x1x1_END +.L_N1_M1_L7: + /* if (!(L & 7)) goto L_N1_M1_L0 */ + andi TL, L, 7 + beq TL, ZERO,.L_N1_M1_L0 + +.L_N1_M1_L71: + xvld U0, A0, 0x00 + + xvldrepl.d U4, B0, 0x00 + xvfmadd.d D0, U0, U4, D0 + + /* Add stride for A0, B0 */ + addi.d A0, A0, 0x08 + addi.d B0, B0, 0x08 + + addi.d TL, TL, -1 + blt ZERO,TL, .L_N1_M1_L71 +.L_N1_M1_L0: + /* Load C0 */ + xvld U0, C0, 0x00 + GSUB xvf, d, U0, U0, D0 +.endm From d15e0a055cf73502f0639cec829a5396a780c753 Mon Sep 17 00:00:00 2001 From: gxw Date: Wed, 27 Sep 2023 09:40:40 +0800 Subject: [PATCH 025/125] LoongArch64: Fixed compilation issues when enable DYNAMIC_ARCH --- kernel/loongarch64/dgemv_n_8_lasx.S | 10 +++++----- kernel/loongarch64/dgemv_t_8_lasx.S | 6 +++--- kernel/loongarch64/sgemv_n_8_lasx.S | 10 +++++----- kernel/loongarch64/sgemv_t_8_lasx.S | 6 +++--- lapack/laswp/loongarch64/Makefile | 5 +++++ 5 files changed, 21 insertions(+), 16 deletions(-) diff --git a/kernel/loongarch64/dgemv_n_8_lasx.S b/kernel/loongarch64/dgemv_n_8_lasx.S index c6523f9ab..a49bf9bb1 100644 --- a/kernel/loongarch64/dgemv_n_8_lasx.S +++ b/kernel/loongarch64/dgemv_n_8_lasx.S @@ -341,7 +341,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fmadd.d $f10, $f12, $f2, $f10 .endm -.macro DGEMV_N XW:req, X_8:req, X_4:req, X_2:req, X_1:req, Y_8:req, Y_4:req, Y_1:req +.macro DGEMV_N_LASX XW:req, X_8:req, X_4:req, X_2:req, X_1:req, Y_8:req, Y_4:req, Y_1:req PTR_SRLI J, N, 3 beqz J, .L_\XW\()_N_7 PTR_SLLI K_LDA, LDA, 3 @@ -541,13 +541,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .hword .L_GAP_1_0 - .L_GAP_TABLE .hword .L_GAP_1_1 - .L_GAP_TABLE .L_GAP_0_0: /* if (inc_x == 1) && (incy == 1) */ - DGEMV_N GAP_0_0, X_8, X_4, X_2, X_1, Y_8, Y_4, Y_1 + DGEMV_N_LASX GAP_0_0, X_8, X_4, X_2, X_1, Y_8, Y_4, Y_1 .L_GAP_0_1: /* if (inc_x == 1) && (incy != 1) */ - DGEMV_N GAP_0_1, X_8, X_4, X_2, X_1, Y_8_GAP, Y_4_GAP, Y_1 + DGEMV_N_LASX GAP_0_1, X_8, X_4, X_2, X_1, Y_8_GAP, Y_4_GAP, Y_1 .L_GAP_1_0: /* if (inc_x != 1) && (incy == 1) */ - DGEMV_N GAP_1_0, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8, Y_4, Y_1 + DGEMV_N_LASX GAP_1_0, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8, Y_4, Y_1 .L_GAP_1_1: /* if (inc_x != 1) && (incy != 1) */ - DGEMV_N GAP_1_1, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8_GAP, Y_4_GAP, Y_1 + DGEMV_N_LASX GAP_1_1, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8_GAP, Y_4_GAP, Y_1 .L_END: pop_if_used 17 + 7, 24 + 4 jirl $r0, $r1, 0x0 diff --git a/kernel/loongarch64/dgemv_t_8_lasx.S b/kernel/loongarch64/dgemv_t_8_lasx.S index 7f57c1d88..71f942b0f 100644 --- a/kernel/loongarch64/dgemv_t_8_lasx.S +++ b/kernel/loongarch64/dgemv_t_8_lasx.S @@ -220,7 +220,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. GMADD xvf, d, TP0, A0, X0, TP0, TP1, A2, X0, TP1 .endm -.macro DGEMV_T XW:req X8:req, X4:req +.macro DGEMV_T_LASX XW:req X8:req, X4:req PTR_SRLI J, N, 3 beqz J, .L_\XW\()_N_7 PTR_SLLI K_LDA, LDA, 3 @@ -472,9 +472,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .hword .L_GAP_0 - .L_GAP_TABLE .hword .L_GAP_1 - .L_GAP_TABLE .L_GAP_0: /* if (incx == 1) */ - DGEMV_T GAP_0, X8, X4 + DGEMV_T_LASX GAP_0, X8, X4 .L_GAP_1: /* if (incx != 1) */ - DGEMV_T GAP_1, X8_GAP, X4_GAP + DGEMV_T_LASX GAP_1, X8_GAP, X4_GAP .L_END: pop_if_used 17 + 8, 24 + 3 jirl $r0, $r1, 0x0 diff --git a/kernel/loongarch64/sgemv_n_8_lasx.S b/kernel/loongarch64/sgemv_n_8_lasx.S index da172ca50..52ffc320e 100644 --- a/kernel/loongarch64/sgemv_n_8_lasx.S +++ b/kernel/loongarch64/sgemv_n_8_lasx.S @@ -274,7 +274,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. GST f, s, Y0_F, Y, 0 .endm -.macro SGEMV_N XW:req, X_8:req, X_4:req, X_2:req, X_1:req, Y_8:req, Y_4:req, Y_1:req +.macro SGEMV_N_LASX XW:req, X_8:req, X_4:req, X_2:req, X_1:req, Y_8:req, Y_4:req, Y_1:req PTR_SRLI J, N, 3 beqz J, .L_\XW\()_N_7 PTR_SLLI K_LDA, LDA, 3 @@ -450,13 +450,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .hword .L_GAP_1_0 - .L_GAP_TABLE .hword .L_GAP_1_1 - .L_GAP_TABLE .L_GAP_0_0: /* if (inc_x == 1) && (incy == 1) */ - SGEMV_N GAP_0_0, X_8, X_4, X_2, X_1, Y_8, Y_4, Y_1 + SGEMV_N_LASX GAP_0_0, X_8, X_4, X_2, X_1, Y_8, Y_4, Y_1 .L_GAP_0_1: /* if (inc_x == 1) && (incy != 1) */ - SGEMV_N GAP_0_1, X_8, X_4, X_2, X_1, Y_8_GAP, Y_4_GAP, Y_1 + SGEMV_N_LASX GAP_0_1, X_8, X_4, X_2, X_1, Y_8_GAP, Y_4_GAP, Y_1 .L_GAP_1_0: /* if (inc_x != 1) && (incy == 1) */ - SGEMV_N GAP_1_0, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8, Y_4, Y_1 + SGEMV_N_LASX GAP_1_0, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8, Y_4, Y_1 .L_GAP_1_1: /* if (inc_x != 1) && (incy != 1) */ - SGEMV_N GAP_1_1, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8_GAP, Y_4_GAP, Y_1 + SGEMV_N_LASX GAP_1_1, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8_GAP, Y_4_GAP, Y_1 .L_END: pop_if_used 17 + 7, 19 jirl $r0, $r1, 0x0 diff --git a/kernel/loongarch64/sgemv_t_8_lasx.S b/kernel/loongarch64/sgemv_t_8_lasx.S index dde3f4a30..f4bfffb42 100644 --- a/kernel/loongarch64/sgemv_t_8_lasx.S +++ b/kernel/loongarch64/sgemv_t_8_lasx.S @@ -160,7 +160,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. GMADD xvf, s, TP0, A0, X0, TP0, TP1, A1, X0, TP1 .endm -.macro SGEMV_T XW:req X8:req, X4:req +.macro SGEMV_T_LASX XW:req X8:req, X4:req PTR_SRLI J, N, 3 beqz J, .L_\XW\()_N_7 PTR_SLLI K_LDA, LDA, 3 @@ -396,9 +396,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .hword .L_GAP_0 - .L_GAP_TABLE .hword .L_GAP_1 - .L_GAP_TABLE .L_GAP_0: /* if (incx == 1) */ - SGEMV_T GAP_0, X8, X4 + SGEMV_T_LASX GAP_0, X8, X4 .L_GAP_1: /* if (incx != 1) */ - SGEMV_T GAP_1, X8_GAP, X4_GAP + SGEMV_T_LASX GAP_1, X8_GAP, X4_GAP .L_END: pop_if_used 17 + 8, 18 jirl $r0, $r1, 0x0 diff --git a/lapack/laswp/loongarch64/Makefile b/lapack/laswp/loongarch64/Makefile index 71e5a87cb..1c85667ec 100644 --- a/lapack/laswp/loongarch64/Makefile +++ b/lapack/laswp/loongarch64/Makefile @@ -1,6 +1,11 @@ TOPDIR = ../../.. include ../../../Makefile.system +ifeq ($(DYNAMIC_ARCH), 1) +LASWP = ../generic/laswp_k_4.c +ZLASWP = ../generic/zlaswp_k_4.c +endif + ifndef LASWP LASWP = ../generic/laswp_k.c endif From 211bb35dee47ba6ade41a28171738f62e5621f4b Mon Sep 17 00:00:00 2001 From: gxw Date: Wed, 27 Sep 2023 10:10:41 +0800 Subject: [PATCH 026/125] gh-actions: Adding DYNAMIC_ARCH test for LoongArch64 --- .github/workflows/loongarch64.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/loongarch64.yml b/.github/workflows/loongarch64.yml index 5501e98e0..e0236ca86 100644 --- a/.github/workflows/loongarch64.yml +++ b/.github/workflows/loongarch64.yml @@ -18,6 +18,9 @@ jobs: - target: LOONGSON2K1000 triple: loongarch64-unknown-linux-gnu opts: NO_SHARED=1 TARGET=LOONGSON2K1000 + - target: DYNAMIC_ARCH + triple: loongarch64-unknown-linux-gnu + opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC steps: - name: Checkout repository From a92dc25fb3a257ad418da2d1227b092db49bc2b4 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Fri, 29 Sep 2023 08:08:29 -0500 Subject: [PATCH 027/125] Fix Makefile.power for xlf --- Makefile.power | 4 ++++ f_check | 3 +++ 2 files changed, 7 insertions(+) diff --git a/Makefile.power b/Makefile.power index 33702c932..46afb2d4a 100644 --- a/Makefile.power +++ b/Makefile.power @@ -70,8 +70,12 @@ else FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math endif else +ifeq ($(F_COMPILER), IBM) +FCOMMON_OPT += -O2 -qrecur -qnosave +else FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math endif +endif else FCOMMON_OPT += -O2 -Mrecursive endif diff --git a/f_check b/f_check index f30231bc4..31f4376d0 100755 --- a/f_check +++ b/f_check @@ -117,6 +117,9 @@ else vendor=PGI openmp='-mp' ;; + *xlf*) + vendor=IBM + ;; *) vendor=G77 openmp='' From a69367c43b28e2e0029d42092e791415565fe804 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Fri, 29 Sep 2023 09:29:41 -0500 Subject: [PATCH 028/125] Fix Makefile.system for OpenXL. --- Makefile.system | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile.system b/Makefile.system index ae6db40b0..af840f029 100644 --- a/Makefile.system +++ b/Makefile.system @@ -1170,6 +1170,8 @@ CCOMMON_OPT += -DF_INTERFACE_IBM FEXTRALIB += -lxlf90 ifeq ($(C_COMPILER), GCC) FCOMMON_OPT += -qextname +else ifeq ($(C_COMPILER), CLANG) +FCOMMON_OPT += -qextname endif # FCOMMON_OPT += -qarch=440 ifdef BINARY64 From a11e1e10f436f4601275669bdf7b951e6e8df0e8 Mon Sep 17 00:00:00 2001 From: Rajalakshmi Srinivasaraghavan Date: Fri, 29 Sep 2023 10:32:34 -0500 Subject: [PATCH 029/125] powerpc: Fix build errors with xlf This patch fixes errors when using xlf as fortran compiler on Linux. Tested with gcc/xlf and clang/xlf compiler combinations. --- Makefile.power | 4 ++++ Makefile.system | 2 +- f_check | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/Makefile.power b/Makefile.power index 33702c932..46afb2d4a 100644 --- a/Makefile.power +++ b/Makefile.power @@ -70,8 +70,12 @@ else FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math endif else +ifeq ($(F_COMPILER), IBM) +FCOMMON_OPT += -O2 -qrecur -qnosave +else FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math endif +endif else FCOMMON_OPT += -O2 -Mrecursive endif diff --git a/Makefile.system b/Makefile.system index ae6db40b0..b1a357fdf 100644 --- a/Makefile.system +++ b/Makefile.system @@ -1168,7 +1168,7 @@ endif ifeq ($(F_COMPILER), IBM) CCOMMON_OPT += -DF_INTERFACE_IBM FEXTRALIB += -lxlf90 -ifeq ($(C_COMPILER), GCC) +ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC CLANG)) FCOMMON_OPT += -qextname endif # FCOMMON_OPT += -qarch=440 diff --git a/f_check b/f_check index f30231bc4..31f4376d0 100755 --- a/f_check +++ b/f_check @@ -117,6 +117,9 @@ else vendor=PGI openmp='-mp' ;; + *xlf*) + vendor=IBM + ;; *) vendor=G77 openmp='' From ccbb91e4a739ca05b3cefb130863f89dcc1e92f3 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 30 Sep 2023 12:46:34 +0200 Subject: [PATCH 030/125] fix improper function prototypes (empty parentheses) --- common_thread.h | 20 ++++++++++---------- cpuid_x86.c | 10 +++++----- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/common_thread.h b/common_thread.h index 06a7a1a38..70c724597 100644 --- a/common_thread.h +++ b/common_thread.h @@ -141,14 +141,14 @@ static __inline int num_cpu_avail(int level) { #ifdef USE_OPENMP int openmp_nthreads; - openmp_nthreads=omp_get_max_threads(); + openmp_nthreads=omp_get_max_threads(void); #endif #ifndef USE_OPENMP if (blas_cpu_number == 1 #endif #ifdef USE_OPENMP - if (openmp_nthreads == 1 || omp_in_parallel() + if (openmp_nthreads == 1 || omp_in_parallel(void) #endif ) return 1; @@ -192,27 +192,27 @@ int exec_blas(BLASLONG num_cpu, blas_param_t *param, void *buffer); int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, - void *c, BLASLONG ldc, int (*function)(), int threads); + void *c, BLASLONG ldc, int (*function)(void), int threads); -int gemm_thread_m (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG); +int gemm_thread_m (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG ), void *, void *, BLASLONG); -int gemm_thread_n (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG); +int gemm_thread_n (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT*, FLOAT*, BLASLONG), void *, void *, BLASLONG); -int gemm_thread_mn(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG); +int gemm_thread_mn(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG), void *, void *, BLASLONG); -int gemm_thread_variable(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG, BLASLONG); +int gemm_thread_variable(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG), void *, void *, BLASLONG, BLASLONG); int trsm_thread(int mode, BLASLONG m, BLASLONG n, double alpha_r, double alpha_i, void *a, BLASLONG lda, - void *c, BLASLONG ldc, int (*function)(), void *buffer); + void *c, BLASLONG ldc, int (*function)(void), void *buffer); -int syrk_thread(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG); +int syrk_thread(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*, FLOAT *, FLOAT *, BLASLONG), void*, void*, BLASLONG); int getrf_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *offsetA, BLASLONG lda, void *offsetB, BLASLONG jb, - void *ipiv, BLASLONG offset, int (*function)(), void *buffer); + void *ipiv, BLASLONG offset, int (*function)(void), void *buffer); #endif /* ENDIF ASSEMBLER */ diff --git a/cpuid_x86.c b/cpuid_x86.c index c485f3ddf..fdcead8bd 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -194,7 +194,7 @@ static C_INLINE void xgetbv(int op, int * eax, int * edx){ } #endif -int support_avx(){ +int support_avx(void){ #ifndef NO_AVX int eax, ebx, ecx, edx; int ret=0; @@ -212,7 +212,7 @@ int support_avx(){ #endif } -int support_avx2(){ +int support_avx2(void){ #ifndef NO_AVX2 int eax, ebx, ecx=0, edx; int ret=0; @@ -228,7 +228,7 @@ int support_avx2(){ #endif } -int support_avx512(){ +int support_avx512(void){ #if !defined(NO_AVX) && !defined(NO_AVX512) int eax, ebx, ecx, edx; int ret=0; @@ -250,7 +250,7 @@ int support_avx512(){ #endif } -int support_avx512_bf16(){ +int support_avx512_bf16(void){ #if !defined(NO_AVX) && !defined(NO_AVX512) int eax, ebx, ecx, edx; int ret=0; @@ -271,7 +271,7 @@ int support_avx512_bf16(){ #define BIT_AMX_BF16 0x00400000 #define BIT_AMX_ENBD 0x00060000 -int support_amx_bf16() { +int support_amx_bf16(void) { #if !defined(NO_AVX) && !defined(NO_AVX512) int eax, ebx, ecx, edx; int ret=0; From c4bd4a2e5dbbb648ac8198f6193657ea403d088b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 30 Sep 2023 12:49:24 +0200 Subject: [PATCH 031/125] fix improper function prototypes (empty parentheses) --- driver/level3/gemm_thread_m.c | 2 +- driver/level3/gemm_thread_mn.c | 2 +- driver/level3/gemm_thread_n.c | 2 +- driver/level3/gemm_thread_variable.c | 2 +- driver/level3/syrk_thread.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/driver/level3/gemm_thread_m.c b/driver/level3/gemm_thread_m.c index 8813e5529..353ae0be9 100644 --- a/driver/level3/gemm_thread_m.c +++ b/driver/level3/gemm_thread_m.c @@ -40,7 +40,7 @@ #include #include "common.h" -int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(), void *sa, void *sb, BLASLONG nthreads) { +int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG ), void *sa, void *sb, BLASLONG nthreads) { blas_queue_t queue[MAX_CPU_NUMBER]; BLASLONG range[MAX_CPU_NUMBER + 1]; diff --git a/driver/level3/gemm_thread_mn.c b/driver/level3/gemm_thread_mn.c index 6b52df884..4f370999a 100644 --- a/driver/level3/gemm_thread_mn.c +++ b/driver/level3/gemm_thread_mn.c @@ -60,7 +60,7 @@ static const int divide_rule[][2] = { 1, 61}, { 2, 31}, { 7, 9}, { 8, 8}, }; -int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(), void *sa, void *sb, BLASLONG nthreads) { +int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG ), void *sa, void *sb, BLASLONG nthreads) { blas_queue_t queue[MAX_CPU_NUMBER]; diff --git a/driver/level3/gemm_thread_n.c b/driver/level3/gemm_thread_n.c index 9668841bb..d583456bd 100644 --- a/driver/level3/gemm_thread_n.c +++ b/driver/level3/gemm_thread_n.c @@ -40,7 +40,7 @@ #include #include "common.h" -int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(), void *sa, void *sb, BLASLONG nthreads) { +int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG), void *sa, void *sb, BLASLONG nthreads) { blas_queue_t queue[MAX_CPU_NUMBER]; BLASLONG range[MAX_CPU_NUMBER + 1]; diff --git a/driver/level3/gemm_thread_variable.c b/driver/level3/gemm_thread_variable.c index 162a75f70..75e49cb1a 100644 --- a/driver/level3/gemm_thread_variable.c +++ b/driver/level3/gemm_thread_variable.c @@ -42,7 +42,7 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, - int (*function)(), void *sa, void *sb, BLASLONG divM, BLASLONG divN) { + int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG ), void *sa, void *sb, BLASLONG divM, BLASLONG divN) { blas_queue_t queue[MAX_CPU_NUMBER]; diff --git a/driver/level3/syrk_thread.c b/driver/level3/syrk_thread.c index 12808afd5..a40122e38 100644 --- a/driver/level3/syrk_thread.c +++ b/driver/level3/syrk_thread.c @@ -41,7 +41,7 @@ #include #include "common.h" -int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(), void *sa, void *sb, BLASLONG nthreads) { +int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*, FLOAT *, FLOAT *, BLASLONG), void *sa, void *sb, BLASLONG nthreads) { blas_queue_t queue[MAX_CPU_NUMBER]; BLASLONG range[MAX_CPU_NUMBER + 1]; From c6b1d8e7a31f96b6e17fdd92fb6dbbbb2ef7b562 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 30 Sep 2023 12:52:06 +0200 Subject: [PATCH 032/125] fix improper function prototypes (empty parentheses) --- driver/others/blas_l1_thread.c | 4 ++-- driver/others/blas_server.c | 2 +- driver/others/memory.c | 10 +++++----- driver/others/openblas_env.c | 18 +++++++++--------- driver/others/openblas_error_handle.c | 2 +- driver/others/openblas_get_config.c | 8 ++++---- driver/others/openblas_get_parallel.c | 6 +++--- driver/others/parameter.c | 2 +- 8 files changed, 26 insertions(+), 26 deletions(-) diff --git a/driver/others/blas_l1_thread.c b/driver/others/blas_l1_thread.c index 06039c952..01b254f5d 100644 --- a/driver/others/blas_l1_thread.c +++ b/driver/others/blas_l1_thread.c @@ -43,7 +43,7 @@ int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, - void *c, BLASLONG ldc, int (*function)(), int nthreads){ + void *c, BLASLONG ldc, int (*function)(void), int nthreads){ blas_queue_t queue[MAX_CPU_NUMBER]; blas_arg_t args [MAX_CPU_NUMBER]; @@ -141,7 +141,7 @@ int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, - void *c, BLASLONG ldc, int (*function)(), int nthreads){ + void *c, BLASLONG ldc, int (*function)(void), int nthreads){ blas_queue_t queue[MAX_CPU_NUMBER]; blas_arg_t args [MAX_CPU_NUMBER]; diff --git a/driver/others/blas_server.c b/driver/others/blas_server.c index a8a84acbb..2fcb37192 100644 --- a/driver/others/blas_server.c +++ b/driver/others/blas_server.c @@ -93,7 +93,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #endif -extern unsigned int openblas_thread_timeout(); +extern unsigned int openblas_thread_timeout(void); #ifdef SMP_SERVER diff --git a/driver/others/memory.c b/driver/others/memory.c index b27fec431..a74cdabd4 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -1999,7 +1999,7 @@ int goto_get_num_procs (void) { return blas_cpu_number; } -void openblas_fork_handler() +void openblas_fork_handler(void) { // This handler shuts down the OpenBLAS-managed PTHREAD pool when OpenBLAS is // built with "make USE_OPENMP=0". @@ -2016,9 +2016,9 @@ void openblas_fork_handler() #endif } -extern int openblas_num_threads_env(); -extern int openblas_goto_num_threads_env(); -extern int openblas_omp_num_threads_env(); +extern int openblas_num_threads_env(void); +extern int openblas_goto_num_threads_env(void); +extern int openblas_omp_num_threads_env(void); int blas_get_cpu_number(void){ #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) || defined(OS_HAIKU) @@ -3339,7 +3339,7 @@ static void gotoblas_memory_init(void) { /* Initialization for all function; this function should be called before main */ static int gotoblas_initialized = 0; -extern void openblas_read_env(); +extern void openblas_read_env(void); void CONSTRUCTOR gotoblas_init(void) { diff --git a/driver/others/openblas_env.c b/driver/others/openblas_env.c index 35b2270d4..c65f0f320 100644 --- a/driver/others/openblas_env.c +++ b/driver/others/openblas_env.c @@ -41,15 +41,15 @@ static int openblas_env_goto_num_threads=0; static int openblas_env_omp_num_threads=0; static int openblas_env_omp_adaptive=0; -int openblas_verbose() { return openblas_env_verbose;} -unsigned int openblas_thread_timeout() { return openblas_env_thread_timeout;} -int openblas_block_factor() { return openblas_env_block_factor;} -int openblas_num_threads_env() { return openblas_env_openblas_num_threads;} -int openblas_goto_num_threads_env() { return openblas_env_goto_num_threads;} -int openblas_omp_num_threads_env() { return openblas_env_omp_num_threads;} -int openblas_omp_adaptive_env() { return openblas_env_omp_adaptive;} - -void openblas_read_env() { +int openblas_verbose(void) { return openblas_env_verbose;} +unsigned int openblas_thread_timeout(void) { return openblas_env_thread_timeout;} +int openblas_block_factor(void) { return openblas_env_block_factor;} +int openblas_num_threads_env(void) { return openblas_env_openblas_num_threads;} +int openblas_goto_num_threads_env(void) { return openblas_env_goto_num_threads;} +int openblas_omp_num_threads_env(void) { return openblas_env_omp_num_threads;} +int openblas_omp_adaptive_env(void) { return openblas_env_omp_adaptive;} + +void openblas_read_env(void) { int ret=0; env_var_t p; if (readenv(p,"OPENBLAS_VERBOSE")) ret = atoi(p); diff --git a/driver/others/openblas_error_handle.c b/driver/others/openblas_error_handle.c index 9ac72c15d..aa0aa776a 100644 --- a/driver/others/openblas_error_handle.c +++ b/driver/others/openblas_error_handle.c @@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" -extern int openblas_verbose(); +extern int openblas_verbose(void); void openblas_warning(int verbose, const char * msg) { int current_verbose; diff --git a/driver/others/openblas_get_config.c b/driver/others/openblas_get_config.c index 7a5cbeb62..867d0e361 100644 --- a/driver/others/openblas_get_config.c +++ b/driver/others/openblas_get_config.c @@ -69,13 +69,13 @@ static char* openblas_config_str="" ; #ifdef DYNAMIC_ARCH -char *gotoblas_corename(); +char *gotoblas_corename(void); #endif static char tmp_config_str[256]; -int openblas_get_parallel(); +int openblas_get_parallel(void); -char* CNAME() { +char* CNAME(void) { char tmpstr[20]; strcpy(tmp_config_str, openblas_config_str); #ifdef DYNAMIC_ARCH @@ -90,7 +90,7 @@ char tmpstr[20]; } -char* openblas_get_corename() { +char* openblas_get_corename(void) { #ifndef DYNAMIC_ARCH return CHAR_CORENAME; #else diff --git a/driver/others/openblas_get_parallel.c b/driver/others/openblas_get_parallel.c index 5dfda6e59..becfa0a3a 100644 --- a/driver/others/openblas_get_parallel.c +++ b/driver/others/openblas_get_parallel.c @@ -42,17 +42,17 @@ static int parallel = 0; #ifdef NEEDBUNDERSCORE -int CNAME() { +int CNAME(void) { return parallel; } -int NAME() { +int NAME(void) { return parallel; } #else //The CNAME and NAME are the same. -int NAME() { +int NAME(void) { return parallel; } #endif diff --git a/driver/others/parameter.c b/driver/others/parameter.c index 0d5c6aec0..de6bf0de4 100644 --- a/driver/others/parameter.c +++ b/driver/others/parameter.c @@ -40,7 +40,7 @@ #include #include "common.h" -extern int openblas_block_factor(); +extern int openblas_block_factor(void); int get_L2_size(void); #define DEFAULT_GEMM_P 128 From 13ba4edf4373c324bc46a97ec6e96764d44fb873 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 30 Sep 2023 12:53:35 +0200 Subject: [PATCH 033/125] fix function prototypes (empty parentheses) --- interface/lapack/laswp.c | 2 +- interface/lapack/zlaswp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/interface/lapack/laswp.c b/interface/lapack/laswp.c index 0dde33ae3..6544dbc5b 100644 --- a/interface/lapack/laswp.c +++ b/interface/lapack/laswp.c @@ -97,7 +97,7 @@ int NAME(blasint *N, FLOAT *a, blasint *LDA, blasint *K1, blasint *K2, blasint * blas_level1_thread(mode, n, k1, k2, dummyalpha, a, lda, NULL, 0, ipiv, incx, - (int(*)())laswp[flag], nthreads); + (int(*)(void))laswp[flag], nthreads); } #endif diff --git a/interface/lapack/zlaswp.c b/interface/lapack/zlaswp.c index b77a40985..7bb4a659e 100644 --- a/interface/lapack/zlaswp.c +++ b/interface/lapack/zlaswp.c @@ -96,7 +96,7 @@ int NAME(blasint *N, FLOAT *a, blasint *LDA, blasint *K1, blasint *K2, blasint * mode = BLAS_SINGLE | BLAS_COMPLEX; #endif - blas_level1_thread(mode, n, k1, k2, dummyalpha, a, lda, NULL, 0, ipiv, incx, (int(*)())laswp[flag], nthreads); + blas_level1_thread(mode, n, k1, k2, dummyalpha, a, lda, NULL, 0, ipiv, incx, (int(*)(void))laswp[flag], nthreads); } #endif From 675cd551da315af964b6b097e6e5ab7b35bd6e59 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 30 Sep 2023 12:56:38 +0200 Subject: [PATCH 034/125] fix improper function prototypes (empty parentheses) --- kernel/x86_64/ddot.c | 2 +- kernel/x86_64/drot.c | 2 +- kernel/x86_64/srot.c | 2 +- kernel/x86_64/zdot.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/x86_64/ddot.c b/kernel/x86_64/ddot.c index f3b9ee701..569ed2416 100644 --- a/kernel/x86_64/ddot.c +++ b/kernel/x86_64/ddot.c @@ -159,7 +159,7 @@ static int dot_thread_function(BLASLONG n, BLASLONG dummy0, extern int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, - void *c, BLASLONG ldc, int (*function)(), int nthreads); + void *c, BLASLONG ldc, int (*function)(void), int nthreads); #endif FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) diff --git a/kernel/x86_64/drot.c b/kernel/x86_64/drot.c index 40c9cf19d..6fdf4ae56 100644 --- a/kernel/x86_64/drot.c +++ b/kernel/x86_64/drot.c @@ -169,7 +169,7 @@ static int rot_thread_function(blas_arg_t *args) return 0; } -extern int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int (*function)(), int nthreads); +extern int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int (*function)(void), int nthreads); #endif int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s) { diff --git a/kernel/x86_64/srot.c b/kernel/x86_64/srot.c index a49544616..05724b427 100644 --- a/kernel/x86_64/srot.c +++ b/kernel/x86_64/srot.c @@ -171,7 +171,7 @@ static int rot_thread_function(blas_arg_t *args) return 0; } -extern int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int (*function)(), int nthreads); +extern int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int (*function)(void), int nthreads); #endif int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s) { diff --git a/kernel/x86_64/zdot.c b/kernel/x86_64/zdot.c index 72a712a9e..51efa2dfe 100644 --- a/kernel/x86_64/zdot.c +++ b/kernel/x86_64/zdot.c @@ -92,7 +92,7 @@ static void zdot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *d) #if defined(SMP) extern int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, - void *c, BLASLONG ldc, int (*function)(), int nthreads); + void *c, BLASLONG ldc, int (*function)(void), int nthreads); #endif From cd8ac192a901b38980755583faaa35559df7910a Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 30 Sep 2023 12:58:30 +0200 Subject: [PATCH 035/125] fix improper function prototypes (empty parentheses) --- lapack-netlib/LAPACKE/src/lapacke_nancheck.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lapack-netlib/LAPACKE/src/lapacke_nancheck.c b/lapack-netlib/LAPACKE/src/lapacke_nancheck.c index c7d5c33f1..bb894f351 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_nancheck.c +++ b/lapack-netlib/LAPACKE/src/lapacke_nancheck.c @@ -39,7 +39,7 @@ void LAPACKE_set_nancheck( int flag ) nancheck_flag = ( flag ) ? 1 : 0; } -int LAPACKE_get_nancheck( ) +int LAPACKE_get_nancheck( void ) { char* env; if ( nancheck_flag != -1 ) { From f4f31fb53b5f4069ae19a1840035bb770e237945 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 30 Sep 2023 12:59:44 +0200 Subject: [PATCH 036/125] fix improper function prototypes (empty parentheses) --- lapack/lauum/lauum_L_parallel.c | 4 ++-- lapack/lauum/lauum_U_parallel.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lapack/lauum/lauum_L_parallel.c b/lapack/lauum/lauum_L_parallel.c index 1b32e4519..0f4eaefaa 100644 --- a/lapack/lauum/lauum_L_parallel.c +++ b/lapack/lauum/lauum_L_parallel.c @@ -102,7 +102,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, newarg.c = a; syrk_thread(mode | BLAS_TRANSA_T | BLAS_TRANSB_N | BLAS_UPLO, - &newarg, NULL, NULL, (int (*)(void))HERK_LC, sa, sb, args -> nthreads); + &newarg, NULL, NULL, (int (*)(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG))HERK_LC, sa, sb, args -> nthreads); newarg.m = bk; newarg.n = i; @@ -110,7 +110,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, newarg.b = a + (i ) * COMPSIZE; gemm_thread_n(mode | BLAS_TRANSA_T, - &newarg, NULL, NULL, (int (*)(void))TRMM_LCLN, sa, sb, args -> nthreads); + &newarg, NULL, NULL, (int (*)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT*, FLOAT*, BLASLONG))TRMM_LCLN, sa, sb, args -> nthreads); newarg.m = bk; newarg.n = bk; diff --git a/lapack/lauum/lauum_U_parallel.c b/lapack/lauum/lauum_U_parallel.c index f5ea54c88..77bfeebc7 100644 --- a/lapack/lauum/lauum_U_parallel.c +++ b/lapack/lauum/lauum_U_parallel.c @@ -102,7 +102,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, newarg.c = a; syrk_thread(mode | BLAS_TRANSA_N | BLAS_TRANSB_T, - &newarg, NULL, NULL, (int (*)(void))HERK_UN, sa, sb, args -> nthreads); + &newarg, NULL, NULL, (int (*)(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG))HERK_UN, sa, sb, args -> nthreads); newarg.m = i; newarg.n = bk; @@ -110,7 +110,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, newarg.b = a + ( i * lda) * COMPSIZE; gemm_thread_m(mode | BLAS_TRANSA_T | BLAS_RSIDE, - &newarg, NULL, NULL, (int (*)(void))TRMM_RCUN, sa, sb, args -> nthreads); + &newarg, NULL, NULL, (int (*)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT*, FLOAT*, BLASLONG))TRMM_RCUN, sa, sb, args -> nthreads); newarg.m = bk; newarg.n = bk; From 1d4aa8d7d52f469724c26c1378ddf9cca778ce99 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 30 Sep 2023 13:00:51 +0200 Subject: [PATCH 037/125] fix improper function prototypes (empty parentheses) --- lapack/potrf/potrf_L_parallel.c | 2 +- lapack/potrf/potrf_U_parallel.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lapack/potrf/potrf_L_parallel.c b/lapack/potrf/potrf_L_parallel.c index 986816d1a..7d6bcd776 100644 --- a/lapack/potrf/potrf_L_parallel.c +++ b/lapack/potrf/potrf_L_parallel.c @@ -110,7 +110,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, newarg.b = a + (i + bk + i * lda) * COMPSIZE; gemm_thread_m(mode | BLAS_RSIDE | BLAS_TRANSA_T | BLAS_UPLO, - &newarg, NULL, NULL, (int (*)(void))TRSM_RCLN, sa, sb, args -> nthreads); + &newarg, NULL, NULL, (int (*)(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG))TRSM_RCLN, sa, sb, args -> nthreads); newarg.n = n - i - bk; newarg.k = bk; diff --git a/lapack/potrf/potrf_U_parallel.c b/lapack/potrf/potrf_U_parallel.c index cc6ff9912..1f1427276 100644 --- a/lapack/potrf/potrf_U_parallel.c +++ b/lapack/potrf/potrf_U_parallel.c @@ -110,7 +110,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, newarg.b = a + (i + (i + bk) * lda) * COMPSIZE; gemm_thread_n(mode | BLAS_TRANSA_T, - &newarg, NULL, NULL, (int (*)(void))TRSM_LCUN, sa, sb, args -> nthreads); + &newarg, NULL, NULL, (int (*)(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG))TRSM_LCUN, sa, sb, args -> nthreads); newarg.n = n - i - bk; newarg.k = bk; From 60ff5872af834658dbc2cf5d72f375c37052032e Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 30 Sep 2023 13:01:44 +0200 Subject: [PATCH 038/125] fix improper function prototypes (empty parentheses) --- utest/ctest.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/utest/ctest.h b/utest/ctest.h index 79961badf..b158b4538 100644 --- a/utest/ctest.h +++ b/utest/ctest.h @@ -41,7 +41,7 @@ typedef void (*RunWithDataFunc)(void*); struct ctest { const char* ssname; // suite name const char* ttname; // test name - void (*run)(); + void (*run)(void); int skip; void* data; @@ -159,9 +159,9 @@ struct ctest { void WEAK sname##_teardown(struct sname##_data* data) #define __CTEST_INTERNAL(sname, tname, _skip) \ - void __FNAME(sname, tname)(); \ + void __FNAME(sname, tname)(void); \ __CTEST_STRUCT(sname, tname, _skip, NULL, NULL, NULL) \ - void __FNAME(sname, tname)() + void __FNAME(sname, tname)(void) #ifdef __CTEST_APPLE #define SETUP_FNAME(sname) NULL @@ -366,7 +366,7 @@ void __ctest_addTest(struct ctest *test) #ifndef __CTEST_MSVC /* Add all tests to linked list automatically. */ -static void __ctest_linkTests() +static void __ctest_linkTests(void) { struct ctest ** test; struct ctest ** ctest_begin = (struct ctest **)__PNAME(suite, test); @@ -401,7 +401,7 @@ static void __ctest_linkTests() __ctest_head_p = ctest_begin; } #else //for msvc -static void __ctest_linkTests() +static void __ctest_linkTests(void) { struct ctest ** ctest_start = __ctest_head_p; struct ctest ** test; @@ -450,7 +450,7 @@ static void msg_start(const char* color, const char* title) { print_errormsg(" %s: ", title); } -static void msg_end() { +static void msg_end(void) { if (color_output) { print_errormsg(ANSI_NORMAL); } @@ -634,7 +634,7 @@ static int suite_test_filter(struct ctest* t) { #ifndef __CTEST_NO_TIME -static uint64_t getCurrentTime() { +static uint64_t getCurrentTime(void) { struct timeval now; gettimeofday(&now, NULL); uint64_t now64 = (uint64_t) now.tv_sec; From 2dba455d2e950da2ea40cd9c6696294419b2a538 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 30 Sep 2023 15:33:55 +0200 Subject: [PATCH 039/125] revert accidental changes --- common_thread.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common_thread.h b/common_thread.h index 70c724597..6e18d2a8e 100644 --- a/common_thread.h +++ b/common_thread.h @@ -141,14 +141,14 @@ static __inline int num_cpu_avail(int level) { #ifdef USE_OPENMP int openmp_nthreads; - openmp_nthreads=omp_get_max_threads(void); + openmp_nthreads=omp_get_max_threads(); #endif #ifndef USE_OPENMP if (blas_cpu_number == 1 #endif #ifdef USE_OPENMP - if (openmp_nthreads == 1 || omp_in_parallel(void) + if (openmp_nthreads == 1 || omp_in_parallel() #endif ) return 1; From cf2174fb6967db717ec5ec640f3a88d0ccf0372e Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 30 Sep 2023 17:04:39 +0200 Subject: [PATCH 040/125] fix improper function prototypes (empty parentheses) --- driver/others/blas_server_omp.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/driver/others/blas_server_omp.c b/driver/others/blas_server_omp.c index fe6b4a7c0..3e2179373 100644 --- a/driver/others/blas_server_omp.c +++ b/driver/others/blas_server_omp.c @@ -70,7 +70,7 @@ int blas_server_avail = 0; int blas_omp_number_max = 0; -extern int openblas_omp_adaptive_env(); +extern int openblas_omp_adaptive_env(void); static void * blas_thread_buffer[MAX_PARALLEL_NUMBER][MAX_CPU_NUMBER]; #ifdef HAVE_C11 @@ -79,7 +79,7 @@ static atomic_bool blas_buffer_inuse[MAX_PARALLEL_NUMBER]; static _Bool blas_buffer_inuse[MAX_PARALLEL_NUMBER]; #endif -static void adjust_thread_buffers() { +static void adjust_thread_buffers(void) { int i=0, j=0; @@ -124,9 +124,9 @@ void openblas_set_num_threads(int num_threads) { } int blas_thread_init(void){ -if(blas_omp_number_max <= 0) - blas_omp_number_max = omp_get_max_threads(); - + if(blas_omp_number_max <= 0) + blas_omp_number_max = omp_get_max_threads(); + blas_get_cpu_number(); adjust_thread_buffers(); From 90f890ee675945cdb7d7d9887e4baf50c7d5bb29 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 30 Sep 2023 23:12:36 +0200 Subject: [PATCH 041/125] fix improper function prototypes (empty parentheses) (USE_TLS branch) --- driver/others/memory.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/driver/others/memory.c b/driver/others/memory.c index a74cdabd4..caef3e2b7 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -427,9 +427,9 @@ int goto_get_num_procs (void) { return blas_cpu_number; } -static void blas_memory_init(); +static void blas_memory_init(void); -void openblas_fork_handler() +void openblas_fork_handler(void) { // This handler shuts down the OpenBLAS-managed PTHREAD pool when OpenBLAS is // built with "make USE_OPENMP=0". @@ -446,9 +446,9 @@ void openblas_fork_handler() #endif } -extern int openblas_num_threads_env(); -extern int openblas_goto_num_threads_env(); -extern int openblas_omp_num_threads_env(); +extern int openblas_num_threads_env(void); +extern int openblas_goto_num_threads_env(void); +extern int openblas_omp_num_threads_env(void); int blas_get_cpu_number(void){ #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) || defined(OS_HAIKU) @@ -592,7 +592,7 @@ static BLASULONG key_lock = 0UL; #endif /* Returns a pointer to the start of the per-thread memory allocation data */ -static __inline struct alloc_t ** get_memory_table() { +static __inline struct alloc_t ** get_memory_table(void) { #if defined(SMP) LOCK_COMMAND(&key_lock); lsk=local_storage_key; @@ -1145,7 +1145,7 @@ static void blas_memory_cleanup(void* ptr){ } } -static void blas_memory_init(){ +static void blas_memory_init(void){ #if defined(SMP) # if defined(OS_WINDOWS) local_storage_key = TlsAlloc(); @@ -1502,7 +1502,7 @@ static void gotoblas_memory_init(void) { /* Initialization for all function; this function should be called before main */ static int gotoblas_initialized = 0; -extern void openblas_read_env(); +extern void openblas_read_env(void); void CONSTRUCTOR gotoblas_init(void) { From 3b1150fcee164922ed932c7d46b28a8ffec744a8 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Mon, 2 Oct 2023 12:00:48 -0500 Subject: [PATCH 042/125] Fix CPU identification to work on AIX. --- driver/others/dynamic_power.c | 216 +++++++++------------------------- 1 file changed, 58 insertions(+), 158 deletions(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 2847ea9ae..7f8bfd5b9 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -20,12 +20,10 @@ static char *corename[] = { "POWER10" }; -#define NUM_CORETYPES 4 +#define NUM_CORETYPES 5 char *gotoblas_corename(void) { -#ifndef C_PGI if (gotoblas == &gotoblas_POWER6) return corename[1]; -#endif if (gotoblas == &gotoblas_POWER8) return corename[2]; #if (!defined __GNUC__) || ( __GNUC__ >= 6) if (gotoblas == &gotoblas_POWER9) return corename[3]; @@ -36,177 +34,81 @@ char *gotoblas_corename(void) { return corename[0]; } -#if defined(__clang__) -static int __builtin_cpu_supports(char* arg) -{ - return 0; -} -#endif - -#if defined(C_PGI) || defined(__clang__) -/* - * NV HPC compilers do not yet implement __builtin_cpu_is(). - * Fake a version here for use in the CPU detection code below. - * - * Strategy here is to first check the CPU to see what it actually is, - * and then test the input to see if what the CPU actually is matches - * what was requested. - */ +#ifdef _AIX +#include -#include - -/* - * Define POWER processor version table. - * - * NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time - */ - -#define CPU_UNKNOWN 0 -#define CPU_POWER5 5 -#define CPU_POWER6 6 -#define CPU_POWER8 8 -#define CPU_POWER9 9 +#define CPU_UNKNOWN 0 +#define CPU_POWER6 6 +#define CPU_POWER7 7 +#define CPU_POWER8 8 +#define CPU_POWER9 9 #define CPU_POWER10 10 -static struct { - uint32_t pvr_mask; - uint32_t pvr_value; - const char* cpu_name; - uint32_t cpu_type; -} pvrPOWER [] = { - - { /* POWER6 in P5+ mode; 2.04-compliant processor */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x0f000001, - .cpu_name = "POWER5+", - .cpu_type = CPU_POWER5, - }, - - { /* Power6 aka POWER6X*/ - .pvr_mask = 0xffff0000, - .pvr_value = 0x003e0000, - .cpu_name = "POWER6 (raw)", - .cpu_type = CPU_POWER6, - }, - - { /* Power7 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x003f0000, - .cpu_name = "POWER7 (raw)", - .cpu_type = CPU_POWER6, - }, - - { /* Power7+ */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x004A0000, - .cpu_name = "POWER7+ (raw)", - .cpu_type = CPU_POWER6, - }, - - { /* Power8E */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x004b0000, - .cpu_name = "POWER8E (raw)", - .cpu_type = CPU_POWER8, - }, - - { /* Power8NVL */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x004c0000, - .cpu_name = "POWER8NVL (raw)", - .cpu_type = CPU_POWER8, - }, - - { /* Power8 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x004d0000, - .cpu_name = "POWER8 (raw)", - .cpu_type = CPU_POWER8, - }, - - { /* Power9 DD2.0 */ - .pvr_mask = 0xffffefff, - .pvr_value = 0x004e0200, - .cpu_name = "POWER9 (raw)", - .cpu_type = CPU_POWER9, - }, - - { /* Power9 DD 2.1 */ - .pvr_mask = 0xffffefff, - .pvr_value = 0x004e0201, - .cpu_name = "POWER9 (raw)", - .cpu_type = CPU_POWER9, - }, - - { /* Power9 DD2.2 or later */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x004e0000, - .cpu_name = "POWER9 (raw)", - .cpu_type = CPU_POWER9, - }, - - { /* Power10 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00800000, - .cpu_name = "POWER10 (raw)", - .cpu_type = CPU_POWER10, - }, - - { /* End of table, pvr_mask and pvr_value must be zero */ - .pvr_mask = 0x0, - .pvr_value = 0x0, - .cpu_name = "Unknown", - .cpu_type = CPU_UNKNOWN, - }, -}; - -static int __builtin_cpu_is(const char *cpu) { - int i; - uint32_t pvr; - uint32_t cpu_type; - - asm("mfpvr %0" : "=r"(pvr)); - - for (i = 0 ; i < sizeof pvrPOWER / sizeof *pvrPOWER ; ++i) { - if ((pvr & pvrPOWER[i].pvr_mask) == pvrPOWER[i].pvr_value) { - break; - } - } - -#if defined(DEBUG) - printf("%s: returning CPU=%s, cpu_type=%p\n", __func__, - pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type); +int cpuid() +{ + int arch = _system_configuration.implementation; +#ifdef POWER_6 + if (arch == POWER_6) return CPU_POWER6; #endif - cpu_type = pvrPOWER[i].cpu_type; - - if (!strcmp(cpu, "power8")) - return cpu_type == CPU_POWER8; - if (!strcmp(cpu, "power9")) - return cpu_type == CPU_POWER9; - return 0; +#ifdef POWER_7 + else if (arch == POWER_7) return CPU_POWER7; +#endif +#ifdef POWER_8 + else if (arch == POWER_8) return CPU_POWER8; +#endif +#ifdef POWER_9 + else if (arch == POWER_9) return CPU_POWER9; +#endif +#ifdef POWER_10 + else if (arch == POWER_10) return CPU_POWER10; +#endif + return CPU_UNKNOWN; } -#endif /* C_PGI */ +#ifndef __BUILTIN_CPU_SUPPORTS__ +static int __builtin_cpu_supports(char* arg) +{ + static int ipinfo = -1; + if (ipinfo < 0) { + ipinfo = cpuid(); + } + if (ipinfo >= CPU_POWER10) { + if (!strcmp(arg, "power10")) return 1; + } + if (ipinfo >= CPU_POWER9) { + if (!strcmp(arg, "power9")) return 1; + } + if (ipinfo >= CPU_POWER8) { + if (!strcmp(arg, "power8")) return 1; + } + if (ipinfo >= CPU_POWER6) { + if (!strcmp(arg, "power6")) return 1; + } + return 0; +} +#endif static gotoblas_t *get_coretype(void) { -#ifndef C_PGI - if (__builtin_cpu_is("power6") || __builtin_cpu_is("power6x")) + if (__builtin_cpu_supports("power6")) return &gotoblas_POWER6; -#endif - if (__builtin_cpu_is("power8")) + if (__builtin_cpu_supports("power8")) return &gotoblas_POWER8; #if (!defined __GNUC__) || ( __GNUC__ >= 6) - if (__builtin_cpu_is("power9")) + if (__builtin_cpu_supports("power9")) return &gotoblas_POWER9; #endif #ifdef HAVE_P10_SUPPORT - if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")) +#ifdef _AIX + if (__builtin_cpu_supports("power10")) +#else + if (__builtin_cpu_supports("arch_3_1") && __builtin_cpu_supports("mma")) +#endif return &gotoblas_POWER10; #endif /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ -#if (!defined __GNUC__) || ( __GNUC__ >= 11) || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2) - if (__builtin_cpu_is("power10")) +#if (!defined __GNUC__) || ( __GNUC__ < 11) || (__GNUC__ == 10 && __GNUC_MINOR__ < 2) + if (__builtin_cpu_supports("power10")) return &gotoblas_POWER9; #endif return NULL; @@ -229,9 +131,7 @@ static gotoblas_t *force_coretype(char * coretype) { switch (found) { -#ifndef C_PGI case 1: return (&gotoblas_POWER6); -#endif case 2: return (&gotoblas_POWER8); #if (!defined __GNUC__) || ( __GNUC__ >= 6) case 3: return (&gotoblas_POWER9); From eb738d99293dc658bd6941cc6c2b76cd6ece0c11 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Mon, 2 Oct 2023 12:14:46 -0500 Subject: [PATCH 043/125] Minor changes. --- Makefile.system | 2 -- driver/others/dynamic_power.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Makefile.system b/Makefile.system index 1fd47e68e..b1a357fdf 100644 --- a/Makefile.system +++ b/Makefile.system @@ -1170,8 +1170,6 @@ CCOMMON_OPT += -DF_INTERFACE_IBM FEXTRALIB += -lxlf90 ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC CLANG)) FCOMMON_OPT += -qextname -else ifeq ($(C_COMPILER), CLANG) -FCOMMON_OPT += -qextname endif # FCOMMON_OPT += -qarch=440 ifdef BINARY64 diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 7f8bfd5b9..1d3f36875 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -44,7 +44,7 @@ char *gotoblas_corename(void) { #define CPU_POWER9 9 #define CPU_POWER10 10 -int cpuid() +static int cpuid(void) { int arch = _system_configuration.implementation; #ifdef POWER_6 From 12130ee9613936f2fa49fd58a7f6bf8210a65552 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Mon, 2 Oct 2023 12:19:22 -0500 Subject: [PATCH 044/125] Remove tab. --- driver/others/dynamic_power.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 1d3f36875..3c5f1f3c1 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -102,7 +102,7 @@ static gotoblas_t *get_coretype(void) { #ifdef _AIX if (__builtin_cpu_supports("power10")) #else - if (__builtin_cpu_supports("arch_3_1") && __builtin_cpu_supports("mma")) + if (__builtin_cpu_supports("arch_3_1") && __builtin_cpu_supports("mma")) #endif return &gotoblas_POWER10; #endif From a922a07e610e0508e2f2f84ae158c46e2e3d7a0e Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Mon, 2 Oct 2023 12:24:30 -0500 Subject: [PATCH 045/125] Cleanup white spaces. --- driver/others/dynamic_power.c | 158 +++++++++++++++++----------------- 1 file changed, 79 insertions(+), 79 deletions(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 3c5f1f3c1..40f00a634 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -13,25 +13,25 @@ extern gotoblas_t gotoblas_POWER10; extern void openblas_warning(int verbose, const char *msg); static char *corename[] = { - "unknown", - "POWER6", - "POWER8", - "POWER9", - "POWER10" + "unknown", + "POWER6", + "POWER8", + "POWER9", + "POWER10" }; #define NUM_CORETYPES 5 char *gotoblas_corename(void) { - if (gotoblas == &gotoblas_POWER6) return corename[1]; - if (gotoblas == &gotoblas_POWER8) return corename[2]; + if (gotoblas == &gotoblas_POWER6) return corename[1]; + if (gotoblas == &gotoblas_POWER8) return corename[2]; #if (!defined __GNUC__) || ( __GNUC__ >= 6) - if (gotoblas == &gotoblas_POWER9) return corename[3]; + if (gotoblas == &gotoblas_POWER9) return corename[3]; #endif #ifdef HAVE_P10_SUPPORT - if (gotoblas == &gotoblas_POWER10) return corename[4]; + if (gotoblas == &gotoblas_POWER10) return corename[4]; #endif - return corename[0]; + return corename[0]; } #ifdef _AIX @@ -90,13 +90,13 @@ static int __builtin_cpu_supports(char* arg) static gotoblas_t *get_coretype(void) { - if (__builtin_cpu_supports("power6")) - return &gotoblas_POWER6; - if (__builtin_cpu_supports("power8")) - return &gotoblas_POWER8; + if (__builtin_cpu_supports("power6")) + return &gotoblas_POWER6; + if (__builtin_cpu_supports("power8")) + return &gotoblas_POWER8; #if (!defined __GNUC__) || ( __GNUC__ >= 6) - if (__builtin_cpu_supports("power9")) - return &gotoblas_POWER9; + if (__builtin_cpu_supports("power9")) + return &gotoblas_POWER9; #endif #ifdef HAVE_P10_SUPPORT #ifdef _AIX @@ -104,84 +104,84 @@ static gotoblas_t *get_coretype(void) { #else if (__builtin_cpu_supports("arch_3_1") && __builtin_cpu_supports("mma")) #endif - return &gotoblas_POWER10; + return &gotoblas_POWER10; #endif - /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ + /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ #if (!defined __GNUC__) || ( __GNUC__ < 11) || (__GNUC__ == 10 && __GNUC_MINOR__ < 2) - if (__builtin_cpu_supports("power10")) - return &gotoblas_POWER9; -#endif - return NULL; + if (__builtin_cpu_supports("power10")) + return &gotoblas_POWER9; +#endif + return NULL; } static gotoblas_t *force_coretype(char * coretype) { - int i ; - int found = -1; - char message[128]; - - for ( i = 0 ; i < NUM_CORETYPES; i++) - { - if (!strncasecmp(coretype, corename[i], 20)) - { - found = i; - break; - } - } - - switch (found) - { - case 1: return (&gotoblas_POWER6); - case 2: return (&gotoblas_POWER8); + int i ; + int found = -1; + char message[128]; + + for ( i = 0 ; i < NUM_CORETYPES; i++) + { + if (!strncasecmp(coretype, corename[i], 20)) + { + found = i; + break; + } + } + + switch (found) + { + case 1: return (&gotoblas_POWER6); + case 2: return (&gotoblas_POWER8); #if (!defined __GNUC__) || ( __GNUC__ >= 6) - case 3: return (&gotoblas_POWER9); + case 3: return (&gotoblas_POWER9); #endif #ifdef HAVE_P10_SUPPORT - case 4: return (&gotoblas_POWER10); + case 4: return (&gotoblas_POWER10); #endif - default: return NULL; - } - snprintf(message, 128, "Core not found: %s\n", coretype); - openblas_warning(1, message); + default: return NULL; + } + snprintf(message, 128, "Core not found: %s\n", coretype); + openblas_warning(1, message); } void gotoblas_dynamic_init(void) { - char coremsg[128]; - char coren[22]; - char *p; - - - if (gotoblas) return; - - p = getenv("OPENBLAS_CORETYPE"); - if ( p ) - { - gotoblas = force_coretype(p); - } - else - { - gotoblas = get_coretype(); - } - - if (gotoblas == NULL) - { - snprintf(coremsg, 128, "Falling back to POWER8 core\n"); - openblas_warning(1, coremsg); - gotoblas = &gotoblas_POWER8; - } - - if (gotoblas && gotoblas -> init) { - strncpy(coren,gotoblas_corename(),20); - sprintf(coremsg, "Core: %s\n",coren); - openblas_warning(2, coremsg); - gotoblas -> init(); - } else { - openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n"); - exit(1); - } + char coremsg[128]; + char coren[22]; + char *p; + + + if (gotoblas) return; + + p = getenv("OPENBLAS_CORETYPE"); + if ( p ) + { + gotoblas = force_coretype(p); + } + else + { + gotoblas = get_coretype(); + } + + if (gotoblas == NULL) + { + snprintf(coremsg, 128, "Falling back to POWER8 core\n"); + openblas_warning(1, coremsg); + gotoblas = &gotoblas_POWER8; + } + + if (gotoblas && gotoblas -> init) { + strncpy(coren,gotoblas_corename(),20); + sprintf(coremsg, "Core: %s\n",coren); + openblas_warning(2, coremsg); + gotoblas -> init(); + } else { + openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n"); + exit(1); + } } void gotoblas_dynamic_quit(void) { - gotoblas = NULL; + gotoblas = NULL; } From 10210748de17a217fd67f6cb8501272b8bfa88c2 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Mon, 2 Oct 2023 12:44:07 -0500 Subject: [PATCH 046/125] Revert PGI changes. --- driver/others/dynamic_power.c | 310 +++++++++++++++++++++++++--------- 1 file changed, 234 insertions(+), 76 deletions(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 40f00a634..0f5b06be5 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -13,27 +13,181 @@ extern gotoblas_t gotoblas_POWER10; extern void openblas_warning(int verbose, const char *msg); static char *corename[] = { - "unknown", - "POWER6", - "POWER8", - "POWER9", - "POWER10" + "unknown", + "POWER6", + "POWER8", + "POWER9", + "POWER10" }; #define NUM_CORETYPES 5 char *gotoblas_corename(void) { - if (gotoblas == &gotoblas_POWER6) return corename[1]; - if (gotoblas == &gotoblas_POWER8) return corename[2]; +#ifndef C_PGI + if (gotoblas == &gotoblas_POWER6) return corename[1]; +#endif + if (gotoblas == &gotoblas_POWER8) return corename[2]; #if (!defined __GNUC__) || ( __GNUC__ >= 6) - if (gotoblas == &gotoblas_POWER9) return corename[3]; + if (gotoblas == &gotoblas_POWER9) return corename[3]; #endif #ifdef HAVE_P10_SUPPORT - if (gotoblas == &gotoblas_POWER10) return corename[4]; + if (gotoblas == &gotoblas_POWER10) return corename[4]; +#endif + return corename[0]; +} + +#if defined(__clang__) +static int __builtin_cpu_supports(char* arg) +{ + return 0; +} +#endif + +#if defined(C_PGI) || defined(__clang__) +/* + * NV HPC compilers do not yet implement __builtin_cpu_is(). + * Fake a version here for use in the CPU detection code below. + * + * Strategy here is to first check the CPU to see what it actually is, + * and then test the input to see if what the CPU actually is matches + * what was requested. + */ + +#include + +/* + * Define POWER processor version table. + * + * NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time + */ + +#define CPU_UNKNOWN 0 +#define CPU_POWER5 5 +#define CPU_POWER6 6 +#define CPU_POWER8 8 +#define CPU_POWER9 9 +#define CPU_POWER10 10 + +static struct { + uint32_t pvr_mask; + uint32_t pvr_value; + const char* cpu_name; + uint32_t cpu_type; +} pvrPOWER [] = { + + { /* POWER6 in P5+ mode; 2.04-compliant processor */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000001, + .cpu_name = "POWER5+", + .cpu_type = CPU_POWER5, + }, + + { /* Power6 aka POWER6X*/ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003e0000, + .cpu_name = "POWER6 (raw)", + .cpu_type = CPU_POWER6, + }, + + { /* Power7 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003f0000, + .cpu_name = "POWER7 (raw)", + .cpu_type = CPU_POWER6, + }, + + { /* Power7+ */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004A0000, + .cpu_name = "POWER7+ (raw)", + .cpu_type = CPU_POWER6, + }, + + { /* Power8E */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004b0000, + .cpu_name = "POWER8E (raw)", + .cpu_type = CPU_POWER8, + }, + + { /* Power8NVL */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004c0000, + .cpu_name = "POWER8NVL (raw)", + .cpu_type = CPU_POWER8, + }, + + { /* Power8 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004d0000, + .cpu_name = "POWER8 (raw)", + .cpu_type = CPU_POWER8, + }, + + { /* Power9 DD2.0 */ + .pvr_mask = 0xffffefff, + .pvr_value = 0x004e0200, + .cpu_name = "POWER9 (raw)", + .cpu_type = CPU_POWER9, + }, + + { /* Power9 DD 2.1 */ + .pvr_mask = 0xffffefff, + .pvr_value = 0x004e0201, + .cpu_name = "POWER9 (raw)", + .cpu_type = CPU_POWER9, + }, + + { /* Power9 DD2.2 or later */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004e0000, + .cpu_name = "POWER9 (raw)", + .cpu_type = CPU_POWER9, + }, + + { /* Power10 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00800000, + .cpu_name = "POWER10 (raw)", + .cpu_type = CPU_POWER10, + }, + + { /* End of table, pvr_mask and pvr_value must be zero */ + .pvr_mask = 0x0, + .pvr_value = 0x0, + .cpu_name = "Unknown", + .cpu_type = CPU_UNKNOWN, + }, +}; + +static int __builtin_cpu_is(const char *cpu) { + int i; + uint32_t pvr; + uint32_t cpu_type; + + asm("mfpvr %0" : "=r"(pvr)); + + for (i = 0 ; i < sizeof pvrPOWER / sizeof *pvrPOWER ; ++i) { + if ((pvr & pvrPOWER[i].pvr_mask) == pvrPOWER[i].pvr_value) { + break; + } + } + +#if defined(DEBUG) + printf("%s: returning CPU=%s, cpu_type=%p\n", __func__, + pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type); #endif - return corename[0]; + cpu_type = pvrPOWER[i].cpu_type; + + if (!strcmp(cpu, "power8")) + return cpu_type == CPU_POWER8; + if (!strcmp(cpu, "power9")) + return cpu_type == CPU_POWER9; + return 0; } +#endif /* C_PGI */ + #ifdef _AIX #include @@ -90,98 +244,102 @@ static int __builtin_cpu_supports(char* arg) static gotoblas_t *get_coretype(void) { - if (__builtin_cpu_supports("power6")) - return &gotoblas_POWER6; - if (__builtin_cpu_supports("power8")) - return &gotoblas_POWER8; +#ifndef C_PGI + if (__builtin_cpu_is("power6") || __builtin_cpu_is("power6x")) + return &gotoblas_POWER6; +#endif + if (__builtin_cpu_is("power8")) + return &gotoblas_POWER8; #if (!defined __GNUC__) || ( __GNUC__ >= 6) - if (__builtin_cpu_supports("power9")) - return &gotoblas_POWER9; + if (__builtin_cpu_is("power9")) + return &gotoblas_POWER9; #endif #ifdef HAVE_P10_SUPPORT #ifdef _AIX - if (__builtin_cpu_supports("power10")) + if (__builtin_cpu_supports("power10")) #else - if (__builtin_cpu_supports("arch_3_1") && __builtin_cpu_supports("mma")) -#endif - return &gotoblas_POWER10; + if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")) #endif - /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ -#if (!defined __GNUC__) || ( __GNUC__ < 11) || (__GNUC__ == 10 && __GNUC_MINOR__ < 2) - if (__builtin_cpu_supports("power10")) - return &gotoblas_POWER9; + return &gotoblas_POWER10; #endif - return NULL; + /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ +#if (!defined __GNUC__) || ( __GNUC__ >= 11) || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2) + if (__builtin_cpu_is("power10")) + return &gotoblas_POWER9; +#endif + return NULL; } static gotoblas_t *force_coretype(char * coretype) { - int i ; - int found = -1; - char message[128]; - - for ( i = 0 ; i < NUM_CORETYPES; i++) - { - if (!strncasecmp(coretype, corename[i], 20)) - { - found = i; - break; - } - } + int i ; + int found = -1; + char message[128]; - switch (found) - { - case 1: return (&gotoblas_POWER6); - case 2: return (&gotoblas_POWER8); + for ( i = 0 ; i < NUM_CORETYPES; i++) + { + if (!strncasecmp(coretype, corename[i], 20)) + { + found = i; + break; + } + } + + switch (found) + { +#ifndef C_PGI + case 1: return (&gotoblas_POWER6); +#endif + case 2: return (&gotoblas_POWER8); #if (!defined __GNUC__) || ( __GNUC__ >= 6) - case 3: return (&gotoblas_POWER9); + case 3: return (&gotoblas_POWER9); #endif #ifdef HAVE_P10_SUPPORT - case 4: return (&gotoblas_POWER10); + case 4: return (&gotoblas_POWER10); #endif - default: return NULL; - } - snprintf(message, 128, "Core not found: %s\n", coretype); - openblas_warning(1, message); + default: return NULL; + } + snprintf(message, 128, "Core not found: %s\n", coretype); + openblas_warning(1, message); } void gotoblas_dynamic_init(void) { - char coremsg[128]; - char coren[22]; - char *p; + char coremsg[128]; + char coren[22]; + char *p; - if (gotoblas) return; + if (gotoblas) return; - p = getenv("OPENBLAS_CORETYPE"); - if ( p ) - { - gotoblas = force_coretype(p); - } - else - { - gotoblas = get_coretype(); - } + p = getenv("OPENBLAS_CORETYPE"); + if ( p ) + { + gotoblas = force_coretype(p); + } + else + { + gotoblas = get_coretype(); + } - if (gotoblas == NULL) - { - snprintf(coremsg, 128, "Falling back to POWER8 core\n"); - openblas_warning(1, coremsg); - gotoblas = &gotoblas_POWER8; - } + if (gotoblas == NULL) + { + snprintf(coremsg, 128, "Falling back to POWER8 core\n"); + openblas_warning(1, coremsg); + gotoblas = &gotoblas_POWER8; + } - if (gotoblas && gotoblas -> init) { - strncpy(coren,gotoblas_corename(),20); - sprintf(coremsg, "Core: %s\n",coren); - openblas_warning(2, coremsg); - gotoblas -> init(); - } else { - openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n"); - exit(1); - } + if (gotoblas && gotoblas -> init) { + strncpy(coren,gotoblas_corename(),20); + sprintf(coremsg, "Core: %s\n",coren); + openblas_warning(2, coremsg); + gotoblas -> init(); + } else { + openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n"); + exit(1); + } } void gotoblas_dynamic_quit(void) { - gotoblas = NULL; + gotoblas = NULL; } From e5dc376912dab278afdf677cb112008d36ead0fe Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Mon, 2 Oct 2023 12:48:47 -0500 Subject: [PATCH 047/125] Remove duplicate defines. --- driver/others/dynamic_power.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 0f5b06be5..7b0b4ea01 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -43,6 +43,13 @@ static int __builtin_cpu_supports(char* arg) } #endif +#define CPU_UNKNOWN 0 +#define CPU_POWER6 6 +#define CPU_POWER7 7 +#define CPU_POWER8 8 +#define CPU_POWER9 9 +#define CPU_POWER10 10 + #if defined(C_PGI) || defined(__clang__) /* * NV HPC compilers do not yet implement __builtin_cpu_is(). @@ -61,13 +68,6 @@ static int __builtin_cpu_supports(char* arg) * NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time */ -#define CPU_UNKNOWN 0 -#define CPU_POWER5 5 -#define CPU_POWER6 6 -#define CPU_POWER8 8 -#define CPU_POWER9 9 -#define CPU_POWER10 10 - static struct { uint32_t pvr_mask; uint32_t pvr_value; @@ -191,13 +191,6 @@ static int __builtin_cpu_is(const char *cpu) { #ifdef _AIX #include -#define CPU_UNKNOWN 0 -#define CPU_POWER6 6 -#define CPU_POWER7 7 -#define CPU_POWER8 8 -#define CPU_POWER9 9 -#define CPU_POWER10 10 - static int cpuid(void) { int arch = _system_configuration.implementation; From b677d0d5fd175768e63d02253b12d1b0ccb2d242 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Mon, 2 Oct 2023 13:09:12 -0500 Subject: [PATCH 048/125] Adding missing endif --- driver/others/dynamic_power.c | 1 + 1 file changed, 1 insertion(+) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 7b0b4ea01..6ed26ad1e 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -234,6 +234,7 @@ static int __builtin_cpu_supports(char* arg) return 0; } #endif +#endif static gotoblas_t *get_coretype(void) { From a8c90eb3ed5cae583bdc289846fe7d37fdc42d28 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Tue, 3 Oct 2023 10:24:04 -0500 Subject: [PATCH 049/125] Added cpu_is --- driver/others/dynamic_power.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 6ed26ad1e..252e409b3 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -233,8 +233,27 @@ static int __builtin_cpu_supports(char* arg) } return 0; } + +static int __builtin_cpu_is(char *arg) +{ + static int ipinfo = -1; + if (ipinfo < 0) { + ipinfo = cpuid(); + } + if (ipinfo == CPU_POWER10) { + if (!strcmp(arg, "power10") return 1; + } else if (ipinfo == CPU_POWER9) { + if (!strcmp(arg, "power9") return 1; + } else if (ipinfo == CPU_POWER8) { + if (!strcmp(arg, "power8") return 1; + } else if (ipinfo == CPU_POWER6) { + if (!strcmp(arg, "power6") return 1; + } else { + return 0; + } +} #endif -#endif +#endif /* _AIX */ static gotoblas_t *get_coretype(void) { From 2d0b2334259d41c2003b51a07580dbd25cfe267c Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Tue, 3 Oct 2023 10:26:14 -0500 Subject: [PATCH 050/125] Fix missing parens. --- driver/others/dynamic_power.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 252e409b3..734122178 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -241,13 +241,13 @@ static int __builtin_cpu_is(char *arg) ipinfo = cpuid(); } if (ipinfo == CPU_POWER10) { - if (!strcmp(arg, "power10") return 1; + if (!strcmp(arg, "power10")) return 1; } else if (ipinfo == CPU_POWER9) { - if (!strcmp(arg, "power9") return 1; + if (!strcmp(arg, "power9")) return 1; } else if (ipinfo == CPU_POWER8) { - if (!strcmp(arg, "power8") return 1; + if (!strcmp(arg, "power8")) return 1; } else if (ipinfo == CPU_POWER6) { - if (!strcmp(arg, "power6") return 1; + if (!strcmp(arg, "power6")) return 1; } else { return 0; } From 09212f84bff0ca8173f928c59ec81da3ab00933b Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Tue, 3 Oct 2023 12:23:21 -0500 Subject: [PATCH 051/125] Fix default case for cpu_is. --- driver/others/dynamic_power.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 734122178..252baaeeb 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -248,9 +248,8 @@ static int __builtin_cpu_is(char *arg) if (!strcmp(arg, "power8")) return 1; } else if (ipinfo == CPU_POWER6) { if (!strcmp(arg, "power6")) return 1; - } else { - return 0; } + return 0; } #endif #endif /* _AIX */ From 3cc72a3797ac050841975ff38d317f34ecfeb503 Mon Sep 17 00:00:00 2001 From: Chip Kerchner Date: Wed, 4 Oct 2023 09:54:37 -0500 Subject: [PATCH 052/125] Only include cpu_id and cpu_supports in AIX and fix parameter types. --- driver/others/dynamic_power.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 252baaeeb..c01d112bc 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -36,7 +36,7 @@ char *gotoblas_corename(void) { return corename[0]; } -#if defined(__clang__) +#if defined(__clang__) && !defined(_AIX) static int __builtin_cpu_supports(char* arg) { return 0; @@ -50,7 +50,7 @@ static int __builtin_cpu_supports(char* arg) #define CPU_POWER9 9 #define CPU_POWER10 10 -#if defined(C_PGI) || defined(__clang__) +#if defined(C_PGI) || (defined(__clang__) && !defined(_AIX)) /* * NV HPC compilers do not yet implement __builtin_cpu_is(). * Fake a version here for use in the CPU detection code below. @@ -213,7 +213,7 @@ static int cpuid(void) } #ifndef __BUILTIN_CPU_SUPPORTS__ -static int __builtin_cpu_supports(char* arg) +static int __builtin_cpu_supports(const char* arg) { static int ipinfo = -1; if (ipinfo < 0) { @@ -234,7 +234,7 @@ static int __builtin_cpu_supports(char* arg) return 0; } -static int __builtin_cpu_is(char *arg) +static int __builtin_cpu_is(const char *arg) { static int ipinfo = -1; if (ipinfo < 0) { From db0805906ba0d7477ff2adad41bf815cd25e9d06 Mon Sep 17 00:00:00 2001 From: Rajalakshmi Srinivasaraghavan Date: Wed, 4 Oct 2023 14:04:03 -0500 Subject: [PATCH 053/125] powerpc: Fix build errors with Open XL C This patch fixes errors when using Open XL C compiler on AIX. Tested with gcc/xlf and ibm-clang/xlf compiler combinations. --- Makefile.power | 7 +++++++ c_check | 18 +++++++++++++----- kernel/Makefile | 6 ------ 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/Makefile.power b/Makefile.power index 46afb2d4a..ada51b2e1 100644 --- a/Makefile.power +++ b/Makefile.power @@ -98,6 +98,9 @@ endif endif endif +ifeq ($(C_COMPILER), CLANG) +CCOMMON_OPT += -fno-integrated-as +endif # workaround for C->FORTRAN ABI violation in LAPACKE ifeq ($(F_COMPILER), GFORTRAN) FCOMMON_OPT += -fno-optimize-sibling-calls @@ -133,7 +136,11 @@ ifdef BINARY64 ifeq ($(OSNAME), AIX) +ifeq ($(C_COMPILER), GCC) CCOMMON_OPT += -mpowerpc64 -maix64 +else +CCOMMON_OPT += -m64 +endif ifeq ($(COMPILER_F77), g77) FCOMMON_OPT += -mpowerpc64 -maix64 endif diff --git a/c_check b/c_check index 4d12c1674..b018c10a8 100755 --- a/c_check +++ b/c_check @@ -96,11 +96,19 @@ esac defined=0 if [ "$os" = "AIX" ]; then - case "$BINARY" in - 32) compiler_name="$compiler_name -maix32" ;; - 64) compiler_name="$compiler_name -maix64" ;; - esac - defined=1 + if [ "$compiler" = "GCC" ]; then + case "$BINARY" in + 32) compiler_name="$compiler_name -maix32" ;; + 64) compiler_name="$compiler_name -maix64" ;; + esac + defined=1 + else + case "$BINARY" in + 32) compiler_name="$compiler_name -m32" ;; + 64) compiler_name="$compiler_name -m64" ;; + esac + defined=1 + fi fi case "$architecture" in diff --git a/kernel/Makefile b/kernel/Makefile index 1e0a0074f..3f9afd3fa 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -5,12 +5,6 @@ endif TOPDIR = .. include $(TOPDIR)/Makefile.system -ifeq ($(ARCH), power) -ifeq ($(C_COMPILER), CLANG) - override CFLAGS += -fno-integrated-as -endif -endif - AVX2OPT = ifeq ($(C_COMPILER), GCC) # AVX2 support was added in 4.7.0 From c60f9d9c084a97d1c416d63d921a8fcb30b090ac Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Fri, 6 Oct 2023 09:49:17 -0500 Subject: [PATCH 054/125] Add missing CPU_POWER5. --- driver/others/dynamic_power.c | 1 + 1 file changed, 1 insertion(+) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index c01d112bc..8c5caada0 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -44,6 +44,7 @@ static int __builtin_cpu_supports(char* arg) #endif #define CPU_UNKNOWN 0 +#define CPU_POWER5 5 #define CPU_POWER6 6 #define CPU_POWER7 7 #define CPU_POWER8 8 From 71c6689af4e61cc4891eba3d996fb39920798e37 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Fri, 6 Oct 2023 12:20:40 -0500 Subject: [PATCH 055/125] Fix dynamic dispatch to work for clang. --- driver/others/dynamic_power.c | 141 ++++++++++++++-------------------- 1 file changed, 59 insertions(+), 82 deletions(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 8c5caada0..c43738ef4 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -36,13 +36,6 @@ char *gotoblas_corename(void) { return corename[0]; } -#if defined(__clang__) && !defined(_AIX) -static int __builtin_cpu_supports(char* arg) -{ - return 0; -} -#endif - #define CPU_UNKNOWN 0 #define CPU_POWER5 5 #define CPU_POWER6 6 @@ -51,7 +44,31 @@ static int __builtin_cpu_supports(char* arg) #define CPU_POWER9 9 #define CPU_POWER10 10 -#if defined(C_PGI) || (defined(__clang__) && !defined(_AIX)) +#ifdef _AIX +#include + +static int cpuid(void) +{ + int arch = _system_configuration.implementation; +#ifdef POWER_6 + if (arch == POWER_6) return CPU_POWER6; +#endif +#ifdef POWER_7 + else if (arch == POWER_7) return CPU_POWER7; +#endif +#ifdef POWER_8 + else if (arch == POWER_8) return CPU_POWER8; +#endif +#ifdef POWER_9 + else if (arch == POWER_9) return CPU_POWER9; +#endif +#ifdef POWER_10 + else if (arch == POWER_10) return CPU_POWER10; +#endif + return CPU_UNKNOWN; +} +#else +#if defined(C_PGI) || defined(__clang__) /* * NV HPC compilers do not yet implement __builtin_cpu_is(). * Fake a version here for use in the CPU detection code below. @@ -61,8 +78,6 @@ static int __builtin_cpu_supports(char* arg) * what was requested. */ -#include - /* * Define POWER processor version table. * @@ -161,79 +176,32 @@ static struct { }, }; -static int __builtin_cpu_is(const char *cpu) { - int i; - uint32_t pvr; - uint32_t cpu_type; +static int cpuid(void) +{ + int i; + uint32_t pvr; + uint32_t cpu_type; - asm("mfpvr %0" : "=r"(pvr)); + asm("mfpvr %0" : "=r"(pvr)); - for (i = 0 ; i < sizeof pvrPOWER / sizeof *pvrPOWER ; ++i) { - if ((pvr & pvrPOWER[i].pvr_mask) == pvrPOWER[i].pvr_value) { - break; - } - } + for (i = 0 ; i < sizeof pvrPOWER / sizeof *pvrPOWER ; ++i) { + if ((pvr & pvrPOWER[i].pvr_mask) == pvrPOWER[i].pvr_value) { + break; + } + } #if defined(DEBUG) - printf("%s: returning CPU=%s, cpu_type=%p\n", __func__, - pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type); + printf("%s: returning CPU=%s, cpu_type=%p\n", __func__, + pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type); #endif - cpu_type = pvrPOWER[i].cpu_type; - - if (!strcmp(cpu, "power8")) - return cpu_type == CPU_POWER8; - if (!strcmp(cpu, "power9")) - return cpu_type == CPU_POWER9; - return 0; + cpu_type = pvrPOWER[i].cpu_type; + return (int)(cpu_type); } - #endif /* C_PGI */ - -#ifdef _AIX -#include - -static int cpuid(void) -{ - int arch = _system_configuration.implementation; -#ifdef POWER_6 - if (arch == POWER_6) return CPU_POWER6; -#endif -#ifdef POWER_7 - else if (arch == POWER_7) return CPU_POWER7; -#endif -#ifdef POWER_8 - else if (arch == POWER_8) return CPU_POWER8; -#endif -#ifdef POWER_9 - else if (arch == POWER_9) return CPU_POWER9; -#endif -#ifdef POWER_10 - else if (arch == POWER_10) return CPU_POWER10; -#endif - return CPU_UNKNOWN; -} +#endif /* _AIX */ #ifndef __BUILTIN_CPU_SUPPORTS__ -static int __builtin_cpu_supports(const char* arg) -{ - static int ipinfo = -1; - if (ipinfo < 0) { - ipinfo = cpuid(); - } - if (ipinfo >= CPU_POWER10) { - if (!strcmp(arg, "power10")) return 1; - } - if (ipinfo >= CPU_POWER9) { - if (!strcmp(arg, "power9")) return 1; - } - if (ipinfo >= CPU_POWER8) { - if (!strcmp(arg, "power8")) return 1; - } - if (ipinfo >= CPU_POWER6) { - if (!strcmp(arg, "power6")) return 1; - } - return 0; -} +#include static int __builtin_cpu_is(const char *arg) { @@ -241,19 +209,28 @@ static int __builtin_cpu_is(const char *arg) if (ipinfo < 0) { ipinfo = cpuid(); } +#ifdef HAVE_P10_SUPPORT if (ipinfo == CPU_POWER10) { if (!strcmp(arg, "power10")) return 1; - } else if (ipinfo == CPU_POWER9) { + } +#endif + if (ipinfo == CPU_POWER9) { if (!strcmp(arg, "power9")) return 1; } else if (ipinfo == CPU_POWER8) { if (!strcmp(arg, "power8")) return 1; +#ifndef C_PGI } else if (ipinfo == CPU_POWER6) { if (!strcmp(arg, "power6")) return 1; +#endif } return 0; } + +static int __builtin_cpu_supports(const char *arg) +{ + return 0; +} #endif -#endif /* _AIX */ static gotoblas_t *get_coretype(void) { @@ -268,18 +245,18 @@ static gotoblas_t *get_coretype(void) { return &gotoblas_POWER9; #endif #ifdef HAVE_P10_SUPPORT -#ifdef _AIX - if (__builtin_cpu_supports("power10")) +#if defined(_AIX) || defined(__clang__) + if (__builtin_cpu_is("power10")) #else if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")) #endif return &gotoblas_POWER10; #endif - /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ + /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ #if (!defined __GNUC__) || ( __GNUC__ >= 11) || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2) - if (__builtin_cpu_is("power10")) - return &gotoblas_POWER9; -#endif + if (__builtin_cpu_is("power10")) + return &gotoblas_POWER9; +#endif return NULL; } From 298bf1f240afcac73d306f4c2da35b314c39dba6 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Fri, 6 Oct 2023 12:50:28 -0500 Subject: [PATCH 056/125] Reduce differences. --- driver/others/dynamic_power.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index c43738ef4..4e8710bc7 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -178,23 +178,23 @@ static struct { static int cpuid(void) { - int i; - uint32_t pvr; - uint32_t cpu_type; + int i; + uint32_t pvr; + uint32_t cpu_type; - asm("mfpvr %0" : "=r"(pvr)); + asm("mfpvr %0" : "=r"(pvr)); - for (i = 0 ; i < sizeof pvrPOWER / sizeof *pvrPOWER ; ++i) { - if ((pvr & pvrPOWER[i].pvr_mask) == pvrPOWER[i].pvr_value) { - break; - } - } + for (i = 0 ; i < sizeof pvrPOWER / sizeof *pvrPOWER ; ++i) { + if ((pvr & pvrPOWER[i].pvr_mask) == pvrPOWER[i].pvr_value) { + break; + } + } #if defined(DEBUG) - printf("%s: returning CPU=%s, cpu_type=%p\n", __func__, - pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type); + printf("%s: returning CPU=%s, cpu_type=%p\n", __func__, + pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type); #endif - cpu_type = pvrPOWER[i].cpu_type; + cpu_type = pvrPOWER[i].cpu_type; return (int)(cpu_type); } #endif /* C_PGI */ @@ -252,10 +252,10 @@ static gotoblas_t *get_coretype(void) { #endif return &gotoblas_POWER10; #endif - /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ + /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ #if (!defined __GNUC__) || ( __GNUC__ >= 11) || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2) - if (__builtin_cpu_is("power10")) - return &gotoblas_POWER9; + if (__builtin_cpu_is("power10")) + return &gotoblas_POWER9; #endif return NULL; } From 36e08f69946321a7ca3f9ef495d198802e1b5b17 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Fri, 6 Oct 2023 13:08:41 -0500 Subject: [PATCH 057/125] One more small change. --- driver/others/dynamic_power.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 4e8710bc7..311987d31 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -195,7 +195,7 @@ static int cpuid(void) pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type); #endif cpu_type = pvrPOWER[i].cpu_type; - return (int)(cpu_type); + return (int)(cpu_type); } #endif /* C_PGI */ #endif /* _AIX */ From 3655632611173f191c22a36d7c9e0950cdcc202e Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Fri, 6 Oct 2023 13:11:40 -0500 Subject: [PATCH 058/125] Another small change. --- driver/others/dynamic_power.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 311987d31..f98fedd45 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -246,7 +246,7 @@ static gotoblas_t *get_coretype(void) { #endif #ifdef HAVE_P10_SUPPORT #if defined(_AIX) || defined(__clang__) - if (__builtin_cpu_is("power10")) + if (__builtin_cpu_is("power10")) #else if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")) #endif From 880af052dde230595328d8a19d10e42f39369a43 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Fri, 6 Oct 2023 13:41:49 -0500 Subject: [PATCH 059/125] Fix dynamic dispatch P9 for clang. --- driver/others/dynamic_power.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index f98fedd45..db04e635f 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -3,7 +3,7 @@ extern gotoblas_t gotoblas_POWER6; extern gotoblas_t gotoblas_POWER8; -#if (!defined __GNUC__) || ( __GNUC__ >= 6) +#if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__) extern gotoblas_t gotoblas_POWER9; #endif #ifdef HAVE_P10_SUPPORT @@ -27,7 +27,7 @@ char *gotoblas_corename(void) { if (gotoblas == &gotoblas_POWER6) return corename[1]; #endif if (gotoblas == &gotoblas_POWER8) return corename[2]; -#if (!defined __GNUC__) || ( __GNUC__ >= 6) +#if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__) if (gotoblas == &gotoblas_POWER9) return corename[3]; #endif #ifdef HAVE_P10_SUPPORT @@ -240,7 +240,7 @@ static gotoblas_t *get_coretype(void) { #endif if (__builtin_cpu_is("power8")) return &gotoblas_POWER8; -#if (!defined __GNUC__) || ( __GNUC__ >= 6) +#if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__) if (__builtin_cpu_is("power9")) return &gotoblas_POWER9; #endif @@ -281,7 +281,7 @@ static gotoblas_t *force_coretype(char * coretype) { case 1: return (&gotoblas_POWER6); #endif case 2: return (&gotoblas_POWER8); -#if (!defined __GNUC__) || ( __GNUC__ >= 6) +#if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__) case 3: return (&gotoblas_POWER9); #endif #ifdef HAVE_P10_SUPPORT From b626544ca32396cbd6bd138d75669bee05330877 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 7 Oct 2023 22:31:03 +0200 Subject: [PATCH 060/125] complete function prototypes and remove unused functions --- ctest/c_cblat1c.c | 303 +++-------------------------- ctest/c_cblat2c.c | 366 +++++++---------------------------- ctest/c_cblat3c.c | 124 ------------ ctest/c_dblat1c.c | 192 +++--------------- ctest/c_dblat2c.c | 352 +++++++-------------------------- ctest/c_dblat3c.c | 435 ++++++----------------------------------- ctest/c_sblat1c.c | 318 +++--------------------------- ctest/c_sblat2c.c | 481 +++++++--------------------------------------- ctest/c_sblat3c.c | 437 ++++++----------------------------------- ctest/c_zblat1c.c | 304 +++-------------------------- ctest/c_zblat2c.c | 367 +++++++---------------------------- ctest/c_zblat3c.c | 463 +++++++------------------------------------- 12 files changed, 549 insertions(+), 3593 deletions(-) diff --git a/ctest/c_cblat1c.c b/ctest/c_cblat1c.c index 8c0dd140c..2f84da43b 100644 --- a/ctest/c_cblat1c.c +++ b/ctest/c_cblat1c.c @@ -242,251 +242,6 @@ typedef struct Namelist Namelist; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif - -#if 0 -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -#endif -#if 0 -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -#endif -/* -- translated by f2c (version 20000121). - You must link the resulting object file with the libraries: - -lf2c -lm (in that order) -*/ - /* Common Block Declarations */ @@ -396,7 +273,7 @@ static integer c_n1 = -1; static integer c__0 = 0; static logical c_false = FALSE_; -/* Main program */ int main() +/* Main program */ int main(void) { /* Initialized data */ @@ -414,17 +291,21 @@ static logical c_false = FALSE_; static logical same; static integer ninc, nbet, ntra; static logical rewi; - extern /* Subroutine */ int cchk1_(), cchk2_(), cchk3_(), cchk4_(), - cchk5_(), cchk6_(); + extern /* Subroutine */ int cchk1_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, integer*, ftnlen); + extern /* Subroutine */ int cchk2_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, integer*, ftnlen); + extern /* Subroutine */ int cchk3_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, complex*, integer*, ftnlen); + extern /* Subroutine */ int cchk4_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, complex*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, complex*, integer*, ftnlen); + extern /* Subroutine */ int cchk5_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, complex*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, complex*, integer*, ftnlen); + extern /* Subroutine */ int cchk6_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, complex*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, complex*, integer*, ftnlen); static complex a[4225] /* was [65][65] */; static real g[65]; static integer i__, j, n; static logical fatal; static complex x[65], y[65], z__[130]; - extern doublereal sdiff_(); + extern doublereal sdiff_(real*, real*); static logical trace; static integer nidim; - extern /* Subroutine */ int cmvch_(); + extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen); static char snaps[32], trans[1]; static integer isnum; static logical ltest[17]; @@ -438,11 +319,11 @@ static logical c_false = FALSE_; static char snamet[12]; static real thresh; static logical rorder; - extern /* Subroutine */ int cc2chke_(); + extern /* Subroutine */ void cc2chke_(char*, ftnlen); static integer layout; static logical ltestt, tsterr; static complex alf[7]; - extern logical lce_(); + extern logical lce_(complex*, complex*, integer*); static integer inc[7], nkb; static complex bet[7]; static real eps, err; @@ -983,22 +864,7 @@ L240: } /* MAIN__ */ -/* Subroutine */ int cchk1_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax, - incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len) -char *sname; -real *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nkb, *kb, *nalf; -complex *alf; -integer *nbet; -complex *bet; -integer *ninc, *inc, *nmax, *incmax; -complex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt; -real *g; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int cchk1_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, complex* alf, integer* nbet, complex* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* y, complex* yy, complex* ys, complex* yt, real* g, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -1015,10 +881,10 @@ ftnlen sname_len; static integer incx, incy; static logical full, tran, null; static integer i__, m, n; - extern /* Subroutine */ int cmake_(); + extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen); static complex alpha; static logical isame[13]; - extern /* Subroutine */ int cmvch_(); + extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen); static integer nargs; static logical reset; static integer incxs, incys; @@ -1026,14 +892,15 @@ ftnlen sname_len; static integer ia, ib, ic; static logical banded; static integer nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns; - extern /* Subroutine */ int ccgbmv_(), ccgemv_(); - extern logical lceres_(); + extern /* Subroutine */ int ccgbmv_(integer*, char*, integer*, integer*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, ftnlen); + extern /* Subroutine */ void ccgemv_(integer*, char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, ftnlen); + extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen); static char ctrans[14]; static real errmax; static complex transl; static char transs[1]; static integer laa, lda; - extern logical lce_(); + extern logical lce_(complex*, complex*, integer*); static complex als, bls; static real err; static integer iku, kls, kus; @@ -1448,22 +1315,7 @@ L140: } /* cchk1_ */ -/* Subroutine */ int cchk2_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax, - incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len) -char *sname; -real *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nkb, *kb, *nalf; -complex *alf; -integer *nbet; -complex *bet; -integer *ninc, *inc, *nmax, *incmax; -complex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt; -real *g; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int cchk2_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, complex* alf, integer* nbet, complex* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* y, complex* yy, complex* ys, complex* yt, real* g, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -1481,10 +1333,10 @@ ftnlen sname_len; static logical full, null; static char uplo[1]; static integer i__, k, n; - extern /* Subroutine */ int cmake_(); + extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen); static complex alpha; static logical isame[13]; - extern /* Subroutine */ int cmvch_(); + extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen); static integer nargs; static logical reset; static char cuplo[14]; @@ -1495,13 +1347,14 @@ ftnlen sname_len; static integer nc, ik, in; static logical packed; static integer nk, ks, ix, iy, ns, lx, ly; - extern /* Subroutine */ int cchbmv_(), cchemv_(); - extern logical lceres_(); - extern /* Subroutine */ int cchpmv_(); + extern /* Subroutine */ void cchbmv_(integer*, char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, ftnlen); + extern /* Subroutine */ void cchemv_(integer*, char*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, ftnlen); + extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen); + extern /* Subroutine */ void cchpmv_(integer*, char*, integer*, complex*, complex*, complex*, integer*, complex*, complex*, integer*, ftnlen); static real errmax; static complex transl; static integer laa, lda; - extern logical lce_(); + extern logical lce_(complex*, complex*, integer*); static complex als, bls; static real err; @@ -1906,19 +1759,7 @@ L130: } /* cchk2_ */ -/* Subroutine */ int cchk3_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nkb, kb, ninc, inc, nmax, incmax, a, aa, as, x, - xx, xs, xt, g, z__, iorder, sname_len) -char *sname; -real *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nkb, *kb, *ninc, *inc, *nmax, *incmax; -complex *a, *aa, *as, *x, *xx, *xs, *xt; -real *g; -complex *z__; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int cchk3_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* xt, real* g, complex* z__, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -1937,10 +1778,10 @@ ftnlen sname_len; static logical full, null; static char uplo[1], cdiag[14]; static integer i__, k, n; - extern /* Subroutine */ int cmake_(); + extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen); static char diags[1]; static logical isame[13]; - extern /* Subroutine */ int cmvch_(); + extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen); static integer nargs; static logical reset; static char cuplo[14]; @@ -1950,17 +1791,19 @@ ftnlen sname_len; static integer nc, ik, in; static logical packed; static integer nk, ks, ix, ns, lx; - extern logical lceres_(); - extern /* Subroutine */ int cctbmv_(), cctbsv_(); + extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen); + extern /* Subroutine */ void cctbmv_(integer*, char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, ftnlen, ftnlen, ftnlen); + extern /* Subroutine */ void cctbsv_(integer*, char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, ftnlen, ftnlen, ftnlen); static char ctrans[14]; - extern /* Subroutine */ int cctpmv_(); + extern /* Subroutine */ void cctpmv_(integer*, char*, char*, char*, integer*, complex*, complex*, integer*, ftnlen, ftnlen, ftnlen); static real errmax; - extern /* Subroutine */ int cctrmv_(), cctpsv_(); + extern /* Subroutine */ void cctrmv_(integer*, char*, char*, char*, integer*, complex*, integer*, complex*, integer*, ftnlen, ftnlen, ftnlen); + extern /* Subroutine */ void cctpsv_(integer*, char*, char*, char*, integer*, complex*, complex*, integer*, ftnlen, ftnlen, ftnlen); static complex transl; - extern /* Subroutine */ int cctrsv_(); + extern /* Subroutine */ void cctrsv_(integer*, char*, char*, char*, integer*, complex*, integer*, complex*, integer*, ftnlen, ftnlen, ftnlen); static char transs[1]; static integer laa, icd, lda; - extern logical lce_(); + extern logical lce_(complex*, complex*, integer*); static integer ict, icu; static real err; @@ -2418,21 +2261,7 @@ L130: } /* cchk3_ */ -/* Subroutine */ int cchk4_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x, - xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len) -char *sname; -real *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nalf; -complex *alf; -integer *ninc, *inc, *nmax, *incmax; -complex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt; -real *g; -complex *z__; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int cchk4_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, complex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* y, complex* yy, complex* ys, complex* yt, real* g, complex* z__, integer* iorder, ftnlen sname_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; @@ -2444,21 +2273,21 @@ ftnlen sname_len; static integer incx, incy; static logical null; static integer i__, j, m, n; - extern /* Subroutine */ int cmake_(); + extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen); static complex alpha, w[1]; static logical isame[13]; - extern /* Subroutine */ int cmvch_(); + extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen); static integer nargs; static logical reset; static integer incxs, incys, ia, nc, nd, im, in; - extern /* Subroutine */ int ccgerc_(); + extern /* Subroutine */ void ccgerc_(integer*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, integer*); static integer ms, ix, iy, ns, lx, ly; - extern /* Subroutine */ int ccgeru_(); - extern logical lceres_(); + extern /* Subroutine */ void ccgeru_(integer*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, integer*); + extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen); static real errmax; static complex transl; static integer laa, lda; - extern logical lce_(); + extern logical lce_(complex*, complex*, integer*); static complex als; static real err; @@ -2786,21 +2615,7 @@ L150: } /* cchk4_ */ -/* Subroutine */ int cchk5_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x, - xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len) -char *sname; -real *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nalf; -complex *alf; -integer *ninc, *inc, *nmax, *incmax; -complex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt; -real *g; -complex *z__; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int cchk5_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, complex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* y, complex* yy, complex* ys, complex* yt, real* g, complex* z__, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -2818,10 +2633,12 @@ ftnlen sname_len; static logical full, null; static char uplo[1]; static integer i__, j, n; - extern /* Subroutine */ int cmake_(), ccher_(); + extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen); + extern /* Subroutine */ void ccher_(integer*, char*, integer*, real*, complex*, integer*, complex*, integer*, ftnlen); static complex alpha, w[1]; static logical isame[13]; - extern /* Subroutine */ int cchpr_(), cmvch_(); + extern /* Subroutine */ void cchpr_(integer*, char*, integer*, real*, complex*, integer*, complex*, ftnlen); + extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen); static integer nargs; static logical reset; static char cuplo[14]; @@ -2832,11 +2649,11 @@ ftnlen sname_len; static logical packed; static integer ix, ns, lx; static real ralpha; - extern logical lceres_(); + extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen); static real errmax; static complex transl; static integer laa, lda; - extern logical lce_(); + extern logical lce_(complex*, complex*, integer*); static real err; /* Tests CHER and CHPR. */ @@ -3160,21 +2977,7 @@ L130: } /* cchk5_ */ -/* Subroutine */ int cchk6_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x, - xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len) -char *sname; -real *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nalf; -complex *alf; -integer *ninc, *inc, *nmax, *incmax; -complex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt; -real *g; -complex *z__; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int cchk6_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, complex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* y, complex* yy, complex* ys, complex* yt, real* g, complex* z__, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -3192,25 +2995,26 @@ ftnlen sname_len; static logical full, null; static char uplo[1]; static integer i__, j, n; - extern /* Subroutine */ int cmake_(); + extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen); static complex alpha, w[2]; static logical isame[13]; - extern /* Subroutine */ int cmvch_(); + extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen); static integer nargs; static logical reset; static char cuplo[14]; static integer incxs, incys; static logical upper; static char uplos[1]; - extern /* Subroutine */ int ccher2_(), cchpr2_(); + extern /* Subroutine */ void ccher2_(integer*, char*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, integer*, ftnlen); + extern /* Subroutine */ void cchpr2_(integer*, char*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, ftnlen); static integer ia, ja, ic, nc, jj, lj, in; static logical packed; static integer ix, iy, ns, lx, ly; - extern logical lceres_(); + extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen); static real errmax; static complex transl; static integer laa, lda; - extern logical lce_(); + extern logical lce_(complex*, complex*, integer*); static complex als; static real err; @@ -3597,24 +3401,7 @@ L170: } /* cchk6_ */ -/* Subroutine */ int cmvch_(trans, m, n, alpha, a, nmax, x, incx, beta, y, - incy, yt, g, yy, eps, err, fatal, nout, mv, trans_len) -char *trans; -integer *m, *n; -complex *alpha, *a; -integer *nmax; -complex *x; -integer *incx; -complex *beta, *y; -integer *incy; -complex *yt; -real *g; -complex *yy; -real *eps, *err; -logical *fatal; -integer *nout; -logical *mv; -ftnlen trans_len; +/* Subroutine */ int cmvch_(char* trans, integer* m, integer* n, complex* alpha, complex* a, integer* nmax, complex* x, integer* incx, complex* beta, complex* y, integer* incy, complex* yt, real* g, complex* yy, real* eps, real* err, logical* fatal, integer* nout, logical* mv, ftnlen trans_len) { /* System generated locals */ @@ -3812,9 +3599,7 @@ L80: } /* cmvch_ */ -logical lce_(ri, rj, lr) -complex *ri, *rj; -integer *lr; +logical lce_(complex* ri, complex* rj, integer* lr) { /* System generated locals */ integer i__1, i__2, i__3; @@ -3861,13 +3646,7 @@ L30: } /* lce_ */ -logical lceres_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len) -char *type__, *uplo; -integer *m, *n; -complex *aa, *as; -integer *lda; -ftnlen type_len; -ftnlen uplo_len; +logical lceres_(char* type__, char* uplo, integer* m, integer* n, complex* aa, complex* as, integer* lda, ftnlen type_len, ftnlen uplo_len) { /* System generated locals */ integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2, i__3, i__4; @@ -3960,9 +3739,7 @@ L80: } /* lceres_ */ -/* Complex */ VOID cbeg_( ret_val, reset) -complex * ret_val; -logical *reset; +/* Complex */ VOID cbeg_(complex* ret_val, logical* reset) { /* System generated locals */ real r__1, r__2; @@ -4023,8 +3800,7 @@ L10: } /* cbeg_ */ -doublereal sdiff_(x, y) -real *x, *y; +doublereal sdiff_(real* x, real* y) { /* System generated locals */ real ret_val; @@ -4044,19 +3820,7 @@ real *x, *y; } /* sdiff_ */ -/* Subroutine */ int cmake_(type__, uplo, diag, m, n, a, nmax, aa, lda, kl, - ku, reset, transl, type_len, uplo_len, diag_len) -char *type__, *uplo, *diag; -integer *m, *n; -complex *a; -integer *nmax; -complex *aa; -integer *lda, *kl, *ku; -logical *reset; -complex *transl; -ftnlen type_len; -ftnlen uplo_len; -ftnlen diag_len; +/* Subroutine */ int cmake_(char* type__, char* uplo, char* diag, integer* m, integer* n, complex* a, integer* nmax, complex* aa, integer* lda, integer* kl, integer* ku, logical* reset, complex* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; @@ -4064,7 +3828,7 @@ ftnlen diag_len; complex q__1, q__2; /* Local variables */ - extern /* Complex */ VOID cbeg_(); + extern /* Complex */ VOID cbeg_(complex*, logical*); static integer ibeg, iend, ioff; static logical unit; static integer i__, j; diff --git a/ctest/c_cblat3c.c b/ctest/c_cblat3c.c index 1f4b967b0..5ad9b8bd8 100644 --- a/ctest/c_cblat3c.c +++ b/ctest/c_cblat3c.c @@ -242,130 +242,6 @@ typedef struct Namelist Namelist; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif -#if 0 -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -#endif -/* -- translated by f2c (version 20000121). - You must link the resulting object file with the libraries: - -lf2c -lm (in that order) -*/ - - /* Common Block Declarations */ diff --git a/ctest/c_dblat1c.c b/ctest/c_dblat1c.c index bf2f7a781..f0141f2a5 100644 --- a/ctest/c_dblat1c.c +++ b/ctest/c_dblat1c.c @@ -21,19 +21,6 @@ typedef float real; typedef double doublereal; typedef struct { real r, i; } complex; typedef struct { doublereal r, i; } doublecomplex; -#ifdef _MSC_VER -static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} -static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} -static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} -static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} -#else -static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} -static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} -static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} -static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} -#endif -#define pCf(z) (*_pCf(z)) -#define pCd(z) (*_pCd(z)) typedef int logical; typedef short int shortlogical; typedef char logical1; @@ -242,124 +229,6 @@ typedef struct Namelist Namelist; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif -#if 0 -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -#endif /* Common Block Declarations */ @@ -375,16 +244,16 @@ struct { static integer c__1 = 1; static doublereal c_b34 = 1.; -/* Main program */ int main() +/* Main program */ int main(void) { /* Initialized data */ static doublereal sfac = 9.765625e-4; /* Local variables */ - extern /* Subroutine */ int check0_(), check1_(), check2_(), check3_(); + extern /* Subroutine */ int check0_(doublereal*), check1_(doublereal*), check2_(doublereal*), check3_(doublereal*); static integer ic; - extern /* Subroutine */ int header_(); + extern /* Subroutine */ int header_(void); /* Test program for the DOUBLE PRECISION Level 1 CBLAS. */ /* Based upon the original CBLAS test routine together with: */ @@ -431,7 +300,7 @@ static doublereal c_b34 = 1.; } /* MAIN__ */ -/* Subroutine */ int header_() +/* Subroutine */ int header_(void) { /* Initialized data */ @@ -450,8 +319,7 @@ static doublereal c_b34 = 1.; } /* header_ */ -/* Subroutine */ int check0_(sfac) -doublereal *sfac; +/* Subroutine */ int check0_(doublereal* sfac) { /* Initialized data */ @@ -464,7 +332,7 @@ doublereal *sfac; /* Local variables */ static integer k; - extern /* Subroutine */ int drotgtest_(), stest1_(); + extern /* Subroutine */ int drotgtest_(doublereal*,doublereal*,doublereal*,doublereal*), stest1_(doublereal*,doublereal*,doublereal*,doublereal*); static doublereal sa, sb, sc, ss; /* .. Parameters .. */ @@ -509,8 +377,7 @@ L40: return 0; } /* check0_ */ -/* Subroutine */ int check1_(sfac) -doublereal *sfac; +/* Subroutine */ int check1_(doublereal* sfac) { /* Initialized data */ @@ -535,14 +402,14 @@ doublereal *sfac; /* Local variables */ static integer i__; - extern doublereal dnrm2test_(); + extern doublereal dnrm2test_(int*, doublereal*, int*); static doublereal stemp[1], strue[8]; - extern /* Subroutine */ int stest_(), dscaltest_(); - extern doublereal dasumtest_(); - extern /* Subroutine */ int itest1_(), stest1_(); + extern /* Subroutine */ int stest_(int*,doublereal*,doublereal*,doublereal*,doublereal*), dscaltest_(int*,doublereal*,doublereal*,int*); + extern doublereal dasumtest_(int*,doublereal*,int*); + extern /* Subroutine */ int itest1_(int*,int*), stest1_(doublereal*,doublereal*,doublereal*,doublereal*); static doublereal sx[8]; static integer np1; - extern integer idamaxtest_(); + extern integer idamaxtest_(int*,doublereal*,int*); static integer len; /* .. Parameters .. */ @@ -603,8 +470,7 @@ doublereal *sfac; return 0; } /* check1_ */ -/* Subroutine */ int check2_(sfac) -doublereal *sfac; +/* Subroutine */ int check2_(doublereal* sfac) { /* Initialized data */ @@ -649,10 +515,10 @@ doublereal *sfac; /* Local variables */ static integer lenx, leny; - extern doublereal ddottest_(); + extern doublereal ddottest_(int*,doublereal*,int*,doublereal*,int*); static integer i__, j, ksize; - extern /* Subroutine */ int stest_(), dcopytest_(), dswaptest_(), - daxpytest_(), stest1_(); + extern /* Subroutine */ int stest_(int*,doublereal*,doublereal*,doublereal*,doublereal*), dcopytest_(int*,doublereal*,int*,doublereal*,int*), dswaptest_(int*,doublereal*,int*,doublereal*,int*), + daxpytest_(int*,doublereal*,doublereal*,int*,doublereal*,int*), stest1_(doublereal*,doublereal*,doublereal*,doublereal*); static integer ki, kn, mx, my; static doublereal sx[7], sy[7], stx[7], sty[7]; @@ -733,8 +599,7 @@ doublereal *sfac; return 0; } /* check2_ */ -/* Subroutine */ int check3_(sfac) -doublereal *sfac; +/* Subroutine */ int check3_(doublereal* sfac) { /* Initialized data */ @@ -753,9 +618,9 @@ doublereal *sfac; ; /* Local variables */ - extern /* Subroutine */ int drottest_(); + extern /* Subroutine */ int drottest_(int*,doublereal*,int*,doublereal*,int*,doublereal*,doublereal*); static integer i__, k, ksize; - extern /* Subroutine */int stest_(), drotmtest_(); + extern /* Subroutine */int stest_(int*,doublereal*,doublereal*,doublereal*,doublereal*), drotmtest_(int*,doublereal*,int*,doublereal*,int*,doublereal*); static integer ki, kn; static doublereal dparam[5], sx[10], sy[10], stx[10], sty[10]; @@ -826,9 +691,7 @@ doublereal *sfac; return 0; } /* check3_ */ -/* Subroutine */ int stest_(len, scomp, strue, ssize, sfac) -integer *len; -doublereal *scomp, *strue, *ssize, *sfac; +/* Subroutine */ int stest_(int* len, doublereal* scomp, doublereal* strue, doublereal* ssize, doublereal* sfac) { /* System generated locals */ integer i__1; @@ -836,7 +699,7 @@ doublereal *scomp, *strue, *ssize, *sfac; /* Local variables */ static integer i__; - extern doublereal sdiff_(); + extern doublereal sdiff_(doublereal*,doublereal*); static doublereal sd; /* ********************************* STEST ************************** */ @@ -892,11 +755,10 @@ L40: } /* stest_ */ -/* Subroutine */ int stest1_(scomp1, strue1, ssize, sfac) -doublereal *scomp1, *strue1, *ssize, *sfac; +/* Subroutine */ int stest1_(doublereal* scomp1, doublereal* strue1, doublereal* ssize, doublereal* sfac) { static doublereal scomp[1], strue[1]; - extern /* Subroutine */ int stest_(); + extern /* Subroutine */ int stest_(int*, doublereal*, doublereal*, doublereal*, doublereal*); /* ************************* STEST1 ***************************** */ @@ -923,8 +785,7 @@ doublereal *scomp1, *strue1, *ssize, *sfac; return 0; } /* stest1_ */ -doublereal sdiff_(sa, sb) -doublereal *sa, *sb; +doublereal sdiff_(doublereal* sa, doublereal* sb) { /* System generated locals */ doublereal ret_val; @@ -938,8 +799,7 @@ doublereal *sa, *sb; return ret_val; } /* sdiff_ */ -/* Subroutine */ int itest1_(icomp, itrue) -integer *icomp, *itrue; +/* Subroutine */ int itest1_(int* icomp, int* itrue) { /* Local variables */ static integer id; @@ -1188,4 +1048,4 @@ doublereal *dparam; return 0; } /* drotm_ */ -#endif \ No newline at end of file +#endif diff --git a/ctest/c_dblat2c.c b/ctest/c_dblat2c.c index f94dbc1fe..547aa808e 100644 --- a/ctest/c_dblat2c.c +++ b/ctest/c_dblat2c.c @@ -242,129 +242,6 @@ typedef struct Namelist Namelist; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif -#if 0 -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -#endif -/* -- translated by f2c (version 20000121). - You must link the resulting object file with the libraries: - -lf2c -lm (in that order) -*/ - /* Common Block Declarations */ @@ -395,7 +272,7 @@ static integer c_n1 = -1; static integer c__0 = 0; static logical c_false = FALSE_; -/* Main program */ int main() +/* Main program */ int main(void) { /* Initialized data */ @@ -413,17 +290,21 @@ static logical c_false = FALSE_; static logical same; static integer ninc, nbet, ntra; static logical rewi; - extern /* Subroutine */ int dchk1_(), dchk2_(), dchk3_(), dchk4_(), - dchk5_(), dchk6_(); + extern /* Subroutine */ int dchk1_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen); + extern /* Subroutine */ int dchk2_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen); + extern /* Subroutine */ int dchk3_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen); + extern /* Subroutine */ int dchk4_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen); + extern /* Subroutine */ int dchk5_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen); + extern /* Subroutine */ int dchk6_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen); static doublereal a[4225] /* was [65][65] */, g[65]; static integer i__, j; - extern doublereal ddiff_(); + extern doublereal ddiff_(doublereal*, doublereal*); static integer n; static logical fatal; static doublereal x[65], y[65], z__[130]; static logical trace; static integer nidim; - extern /* Subroutine */ int dmvch_(); + extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); static char snaps[32], trans[1]; static integer isnum; static logical ltest[16]; @@ -437,11 +318,11 @@ static logical c_false = FALSE_; static char snamet[12]; static doublereal thresh; static logical rorder; - extern /* Subroutine */ int cd2chke_(); + extern /* Subroutine */ void cd2chke_(char*, ftnlen); static integer layout; static logical ltestt, tsterr; static doublereal alf[7]; - extern logical lde_(); + extern logical lde_(doublereal*, doublereal*, integer*); static integer inc[7], nkb; static doublereal bet[7],eps,err; char tmpchar; @@ -977,21 +858,7 @@ L240: } /* MAIN__ */ -/* Subroutine */ int dchk1_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax, - incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len) -char *sname; -doublereal *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nkb, *kb, *nalf; -doublereal *alf; -integer *nbet; -doublereal *bet; -integer *ninc, *inc, *nmax, *incmax; -doublereal *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int dchk1_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* y, doublereal* yy, doublereal* ys, doublereal* yt, doublereal* g, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -1007,10 +874,10 @@ ftnlen sname_len; static integer incx, incy; static logical full, tran, null; static integer i__, m, n; - extern /* Subroutine */ int dmake_(); + extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen); static doublereal alpha; static logical isame[13]; - extern /* Subroutine */ int dmvch_(); + extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); static integer nargs; static logical reset; static integer incxs, incys; @@ -1018,13 +885,14 @@ ftnlen sname_len; static integer ia, ib, ic; static logical banded; static integer nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns; - extern /* Subroutine */ int cdgbmv_(), cdgemv_(); - extern logical lderes_(); + extern /* Subroutine */ void cdgbmv_(integer*, char*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen); + extern /* Subroutine */ void cdgemv_(integer*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen); + extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); static char ctrans[14]; static doublereal errmax, transl; static char transs[1]; static integer laa, lda; - extern logical lde_(); + extern logical lde_(doublereal*, doublereal*, integer*); static doublereal als, bls, err; static integer iku, kls, kus; @@ -1429,21 +1297,7 @@ L140: } /* dchk1_ */ -/* Subroutine */ int dchk2_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax, - incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len) -char *sname; -doublereal *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nkb, *kb, *nalf; -doublereal *alf; -integer *nbet; -doublereal *bet; -integer *ninc, *inc, *nmax, *incmax; -doublereal *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int dchk2_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* y, doublereal* yy, doublereal* ys, doublereal* yt, doublereal* g, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -1460,10 +1314,10 @@ ftnlen sname_len; static logical full, null; static char uplo[1]; static integer i__, k, n; - extern /* Subroutine */ int dmake_(); + extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen); static doublereal alpha; static logical isame[13]; - extern /* Subroutine */ int dmvch_(); + extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); static integer nargs; static logical reset; static char cuplo[14]; @@ -1474,12 +1328,13 @@ ftnlen sname_len; static integer nc, ik, in; static logical packed; static integer nk, ks, ix, iy, ns, lx, ly; - extern logical lderes_(); - extern /* Subroutine */ int cdsbmv_(), cdspmv_(); + extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); + extern /* Subroutine */ void cdsbmv_(integer*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen); + extern /* Subroutine */ void cdspmv_(integer*, char*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen); static doublereal errmax, transl; - extern /* Subroutine */ int cdsymv_(); + extern /* Subroutine */ void cdsymv_(integer*, char*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen); static integer laa, lda; - extern logical lde_(); + extern logical lde_(doublereal*, doublereal*, integer*); static doublereal als, bls, err; @@ -1882,17 +1737,7 @@ L130: } /* dchk2_ */ -/* Subroutine */ int dchk3_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nkb, kb, ninc, inc, nmax, incmax, a, aa, as, x, - xx, xs, xt, g, z__, iorder, sname_len) -char *sname; -doublereal *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nkb, *kb, *ninc, *inc, *nmax, *incmax; -doublereal *a, *aa, *as, *x, *xx, *xs, *xt, *g, *z__; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int dchk3_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* xt, doublereal* g, doublereal* z__, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -1911,10 +1756,10 @@ ftnlen sname_len; static logical full, null; static char uplo[1], cdiag[14]; static integer i__, k, n; - extern /* Subroutine */ int dmake_(); + extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen); static char diags[1]; static logical isame[13]; - extern /* Subroutine */ int dmvch_(); + extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); static integer nargs; static logical reset; static char cuplo[14]; @@ -1924,16 +1769,19 @@ ftnlen sname_len; static integer nc, ik, in; static logical packed; static integer nk, ks, ix, ns, lx; - extern logical lderes_(); - extern /* Subroutine */ int cdtbmv_(), cdtbsv_(); + extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); + extern /* Subroutine */ void cdtbmv_(integer*, char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen); + extern /* Subroutine */ void cdtbsv_(integer*, char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen); static char ctrans[14]; static doublereal errmax; - extern /* Subroutine */ int cdtpmv_(), cdtrmv_(); + extern /* Subroutine */ void cdtpmv_(integer*, char*, char*, char*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen, ftnlen); + extern /* Subroutine */ void cdtrmv_(integer*, char*, char*, char*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen); static doublereal transl; - extern /* Subroutine */ int cdtpsv_(), cdtrsv_(); + extern /* Subroutine */ void cdtpsv_(integer*, char*, char*, char*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen, ftnlen); + extern /* Subroutine */ void cdtrsv_(integer*, char*, char*, char*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen); static char transs[1]; static integer laa, icd, lda; - extern logical lde_(); + extern logical lde_(doublereal*, doublereal*, integer*); static integer ict, icu; static doublereal err; @@ -2388,19 +2236,7 @@ L130: } /* dchk3_ */ -/* Subroutine */ int dchk4_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x, - xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len) -char *sname; -doublereal *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nalf; -doublereal *alf; -integer *ninc, *inc, *nmax, *incmax; -doublereal *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int dchk4_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* y, doublereal* yy, doublereal* ys, doublereal* yt, doublereal* g, doublereal* z__, integer* iorder, ftnlen sname_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; @@ -2411,17 +2247,18 @@ ftnlen sname_len; static integer incx, incy; static logical null; static integer i__, j, m, n; - extern /* Subroutine */ int dmake_(), cdger_(); + extern /* Subroutine */ void cdger_(integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, integer*); + extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen); static doublereal alpha, w[1]; static logical isame[13]; - extern /* Subroutine */ int dmvch_(); + extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); static integer nargs; static logical reset; static integer incxs, incys, ia, nc, nd, im, in, ms, ix, iy, ns, lx, ly; - extern logical lderes_(); + extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); static doublereal errmax, transl; static integer laa, lda; - extern logical lde_(); + extern logical lde_(doublereal*, doublereal*, integer*); static doublereal als, err; @@ -2727,19 +2564,7 @@ L150: } /* dchk4_ */ -/* Subroutine */ int dchk5_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x, - xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len) -char *sname; -doublereal *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nalf; -doublereal *alf; -integer *ninc, *inc, *nmax, *incmax; -doublereal *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int dchk5_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* y, doublereal* yy, doublereal* ys, doublereal* yt, doublereal* g, doublereal* z__, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -2757,25 +2582,25 @@ ftnlen sname_len; static logical full, null; static char uplo[1]; static integer i__, j, n; - extern /* Subroutine */ int dmake_(); + extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen); static doublereal alpha, w[1]; static logical isame[13]; - extern /* Subroutine */ int dmvch_(); + extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); static integer nargs; - extern /* Subroutine */ int cdspr_(); + extern /* Subroutine */ void cdspr_(integer*, char*, integer*, doublereal*, doublereal*, integer*, doublereal*, ftnlen); static logical reset; static char cuplo[14]; static integer incxs; - extern /* Subroutine */ int cdsyr_(); + extern /* Subroutine */ void cdsyr_(integer*, char*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, ftnlen); static logical upper; static char uplos[1]; static integer ia, ja, ic, nc, jj, lj, in; static logical packed; static integer ix, ns, lx; - extern logical lderes_(); + extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); static doublereal errmax, transl; static integer laa, lda; - extern logical lde_(); + extern logical lde_(doublereal*, doublereal*, integer*); static doublereal als, err; @@ -3096,19 +2921,7 @@ L130: } /* dchk5_ */ -/* Subroutine */ int dchk6_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x, - xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len) -char *sname; -doublereal *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nalf; -doublereal *alf; -integer *ninc, *inc, *nmax, *incmax; -doublereal *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int dchk6_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* y, doublereal* yy, doublereal* ys, doublereal* yt, doublereal* g, doublereal* z__, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -3125,24 +2938,25 @@ ftnlen sname_len; static logical full, null; static char uplo[1]; static integer i__, j, n; - extern /* Subroutine */ int dmake_(); + extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen); static doublereal alpha, w[2]; static logical isame[13]; - extern /* Subroutine */ int dmvch_(); + extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); static integer nargs; static logical reset; static char cuplo[14]; static integer incxs, incys; static logical upper; static char uplos[1]; - extern /* Subroutine */ int cdspr2_(), cdsyr2_(); + extern /* Subroutine */ void cdspr2_(integer*, char*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, ftnlen); + extern /* Subroutine */ void cdsyr2_(integer*, char*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen); static integer ia, ja, ic, nc, jj, lj, in; static logical packed; static integer ix, iy, ns, lx, ly; - extern logical lderes_(); + extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); static doublereal errmax, transl; static integer laa, lda; - extern logical lde_(); + extern logical lde_(doublereal*, doublereal*, integer*); static doublereal als, err; /* Tests DSYR2 and DSPR2. */ @@ -3508,25 +3322,13 @@ L170: } /* dchk6_ */ -/* Subroutine */ int dmake_(type__, uplo, diag, m, n, a, nmax, aa, lda, kl, - ku, reset, transl, type_len, uplo_len, diag_len) -char *type__, *uplo, *diag; -integer *m, *n; -doublereal *a; -integer *nmax; -doublereal *aa; -integer *lda, *kl, *ku; -logical *reset; -doublereal *transl; -ftnlen type_len; -ftnlen uplo_len; -ftnlen diag_len; +/* Subroutine */ int dmake_(char* type__, char* uplo, char* diag, integer* m, integer* n, doublereal* a, integer* nmax, doublereal* aa, integer* lda, integer* kl, integer* ku, logical* reset, doublereal* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ - extern doublereal dbeg_(); + extern doublereal dbeg_(logical* ); static integer ibeg, iend, ioff; static logical unit; static integer i__, j; @@ -3752,28 +3554,14 @@ ftnlen diag_len; } /* dmake_ */ -/* Subroutine */ int dmvch_(trans, m, n, alpha, a, nmax, x, incx, beta, y, - incy, yt, g, yy, eps, err, fatal, nout, mv, trans_len) -char *trans; -integer *m, *n; -doublereal *alpha, *a; -integer *nmax; -doublereal *x; -integer *incx; -doublereal *beta, *y; -integer *incy; -doublereal *yt, *g, *yy, *eps, *err; -logical *fatal; -integer *nout; -logical *mv; -ftnlen trans_len; +/* Subroutine */ int dmvch_(char* trans, integer* m, integer* n, doublereal* alpha, doublereal* a, integer* nmax, doublereal* x, integer* incx, doublereal* beta, doublereal* y, integer* incy, doublereal* yt, doublereal* g, doublereal* yy, doublereal* eps, doublereal* err, logical* fatal, integer* nout, logical* mv, ftnlen trans_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; doublereal d__1; /* Builtin functions */ - double sqrt(); + double sqrt(double); /* Local variables */ static doublereal erri; @@ -3902,9 +3690,7 @@ L70: } /* dmvch_ */ -logical lde_(ri, rj, lr) -doublereal *ri, *rj; -integer *lr; +logical lde_(doublereal* ri, doublereal* rj, integer* lr) { /* System generated locals */ integer i__1; @@ -3949,13 +3735,7 @@ L30: } /* lde_ */ -logical lderes_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len) -char *type__, *uplo; -integer *m, *n; -doublereal *aa, *as; -integer *lda; -ftnlen type_len; -ftnlen uplo_len; +logical lderes_(char* type__, char* uplo, integer* m, integer* n, doublereal* aa, doublereal* as, integer* lda, ftnlen type_len, ftnlen uplo_len) { /* System generated locals */ integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2; @@ -4042,8 +3822,7 @@ L80: } /* lderes_ */ -doublereal dbeg_(reset) -logical *reset; +doublereal dbeg_(logical* reset) { /* System generated locals */ doublereal ret_val; @@ -4094,8 +3873,7 @@ L10: } /* dbeg_ */ -doublereal ddiff_(x, y) -doublereal *x, *y; +doublereal ddiff_(doublereal* x, doublereal* y) { /* System generated locals */ doublereal ret_val; diff --git a/ctest/c_dblat3c.c b/ctest/c_dblat3c.c index 05d6b65b0..dc3d6f9e7 100644 --- a/ctest/c_dblat3c.c +++ b/ctest/c_dblat3c.c @@ -242,129 +242,6 @@ typedef struct Namelist Namelist; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif -#if 0 -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -#endif -/* -- translated by f2c (version 20000121). - You must link the resulting object file with the libraries: - -lf2c -lm (in that order) -*/ - /* Common Block Declarations */ @@ -393,7 +270,7 @@ static logical c_true = TRUE_; static integer c__0 = 0; static logical c_false = FALSE_; -/* Main program MAIN__() */ int main() +/* Main program MAIN__() */ int main(void) { /* Initialized data */ @@ -403,25 +280,24 @@ static logical c_false = FALSE_; integer i__1, i__2, i__3; doublereal d__1; - /* Builtin functions */ - integer s_rsle(), do_lio(), e_rsle(), f_open(), s_wsfe(), do_fio(), - e_wsfe(), s_wsle(), e_wsle(), s_rsfe(), e_rsfe(); - integer f_clos(); /* Local variables */ static integer nalf, idim[9]; static logical same; static integer nbet, ntra; static logical rewi; - extern /* Subroutine */ int dchk1_(), dchk2_(), dchk3_(), dchk4_(), - dchk5_(); + extern /* Subroutine */ int dchk1_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen); + extern /* Subroutine */ int dchk2_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen); + extern /* Subroutine */ int dchk3_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen); + extern /* Subroutine */ int dchk4_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen); +/* Subroutine */ int dchk5_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* nmax, doublereal* ab, doublereal* aa, doublereal* as, doublereal* bb, doublereal* bs, doublereal* c__, doublereal* cc, doublereal* cs, doublereal* ct, doublereal* g, doublereal* w, integer* iorder, ftnlen sname_len); static doublereal c__[4225] /* was [65][65] */, g[65]; static integer i__, j; - extern doublereal ddiff_(); + extern doublereal ddiff_(doublereal*, doublereal*); static integer n; static logical fatal; static doublereal w[130]; - extern /* Subroutine */ int dmmch_(); + extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen); static logical trace; static integer nidim; static char snaps[32]; @@ -433,11 +309,11 @@ static logical c_false = FALSE_; static char snamet[12], transa[1], transb[1]; static doublereal thresh; static logical rorder; - extern /* Subroutine */ int cd3chke_(); + extern /* Subroutine */ void cd3chke_(char*, ftnlen); static integer layout; static logical ltestt, tsterr; static doublereal alf[7]; - extern logical lde_(); + extern logical lde_(doublereal*, doublereal*, integer*); static doublereal bet[7], eps, err; char tmpchar; @@ -907,21 +783,7 @@ L230: } /* MAIN__ */ -/* Subroutine */ int dchk1_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs, - c__, cc, cs, ct, g, iorder, sname_len) -char *sname; -doublereal *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nalf; -doublereal *alf; -integer *nbet; -doublereal *bet; -integer *nmax; -doublereal *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int dchk1_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* nmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* b, doublereal* bb, doublereal* bs, doublereal* c__, doublereal* cc, doublereal* cs, doublereal* ct, doublereal* g, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -931,29 +793,27 @@ ftnlen sname_len; integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6; - /* Builtin functions */ - integer f_rew(), s_wsfe(), e_wsfe(), do_fio(); /* Local variables */ static doublereal beta; static integer ldas, ldbs, ldcs; static logical same, null; static integer i__, k, m, n; - extern /* Subroutine */ int dmake_(); + extern /* Subroutine */ int dmake_(char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen); static doublereal alpha; - extern /* Subroutine */ int dmmch_(); + extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen); static logical isame[13], trana, tranb; static integer nargs; static logical reset; - extern /* Subroutine */ void dprcn1_(); + extern /* Subroutine */ void dprcn1_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, integer*, doublereal*, integer*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen); static integer ia, ib, ma, mb, na, nb, nc, ik, im, in; - extern /* Subroutine */ int cdgemm_(); + extern /* Subroutine */ void cdgemm_(integer*, char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); static integer ks, ms, ns; - extern logical lderes_(); + extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); static char tranas[1], tranbs[1], transa[1], transb[1]; static doublereal errmax; static integer ica, icb, laa, lbb, lda, lcc, ldb, ldc; - extern logical lde_(); + extern logical lde_(doublereal*, doublereal*, integer*); static doublereal als, bls, err; /* Tests DGEMM. */ @@ -1283,23 +1143,8 @@ L130: } /* dchk1_ */ -/* Subroutine */ void dprcn1_(nout, nc, sname, iorder, transa, transb, m, n, k, - alpha, lda, ldb, beta, ldc, sname_len, transa_len, transb_len) -integer *nout, *nc; -char *sname; -integer *iorder; -char *transa, *transb; -integer *m, *n, *k; -doublereal *alpha; -integer *lda, *ldb; -doublereal *beta; -integer *ldc; -ftnlen sname_len; -ftnlen transa_len; -ftnlen transb_len; +/* Subroutine */ void dprcn1_(integer* nout, integer* nc, char* sname, integer* iorder, char* transa, char* transb, integer* m, integer* n, integer* k, doublereal* alpha, integer* lda, integer* ldb, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen transa_len, ftnlen transb_len) { - /* Builtin functions */ - integer s_wsfe(), do_fio(), e_wsfe(); /* Local variables */ static char crc[14], cta[14], ctb[14]; @@ -1328,21 +1173,7 @@ ftnlen transb_len; } /* dprcn1_ */ -/* Subroutine */ int dchk2_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs, - c__, cc, cs, ct, g, iorder, sname_len) -char *sname; -doublereal *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nalf; -doublereal *alf; -integer *nbet; -doublereal *bet; -integer *nmax; -doublereal *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int dchk2_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* nmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* b, doublereal* bb, doublereal* bs, doublereal* c__, doublereal* cc, doublereal* cs, doublereal* ct, doublereal* g, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -1353,8 +1184,6 @@ ftnlen sname_len; integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5; - /* Builtin functions */ - integer f_rew(), s_wsfe(), e_wsfe(), do_fio(); /* Local variables */ static doublereal beta; @@ -1364,21 +1193,21 @@ ftnlen sname_len; static logical left, null; static char uplo[1]; static integer i__, m, n; - extern /* Subroutine */ int dmake_(); + extern /* Subroutine */ int dmake_(char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen); static doublereal alpha; - extern /* Subroutine */ int dmmch_(); + extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen); static logical isame[13]; static char sides[1]; static integer nargs; static logical reset; static char uplos[1]; - extern /* Subroutine */ void dprcn2_(); + extern /* Subroutine */ void dprcn2_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublereal*, integer*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen); static integer ia, ib, na, nc, im, in, ms, ns; - extern logical lderes_(); - extern /* Subroutine */ int cdsymm_(); + extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); + extern /* Subroutine */ void cdsymm_(integer*, char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); static doublereal errmax; static integer laa, lbb, lda, lcc, ldb, ldc; - extern logical lde_(); + extern logical lde_(doublereal*, doublereal*, integer*); static integer ics; static doublereal als, bls; static integer icu; @@ -1692,23 +1521,8 @@ L120: } /* dchk2_ */ -/* Subroutine */ void dprcn2_(nout, nc, sname, iorder, side, uplo, m, n, alpha, - lda, ldb, beta, ldc, sname_len, side_len, uplo_len) -integer *nout, *nc; -char *sname; -integer *iorder; -char *side, *uplo; -integer *m, *n; -doublereal *alpha; -integer *lda, *ldb; -doublereal *beta; -integer *ldc; -ftnlen sname_len; -ftnlen side_len; -ftnlen uplo_len; +/* Subroutine */ void dprcn2_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, integer* m, integer* n, doublereal* alpha, integer* lda, integer* ldb, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len) { - /* Builtin functions */ - integer s_wsfe(), do_fio(), e_wsfe(); /* Local variables */ static char cs[14], cu[14], crc[14]; @@ -1733,19 +1547,7 @@ ftnlen uplo_len; } /* dprcn2_ */ -/* Subroutine */ int dchk3_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nalf, alf, nmax, a, aa, as, b, bb, bs, ct, g, c__, - iorder, sname_len) -char *sname; -doublereal *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nalf; -doublereal *alf; -integer *nmax; -doublereal *a, *aa, *as, *b, *bb, *bs, *ct, *g, *c__; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int dchk3_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* b, doublereal* bb, doublereal* bs, doublereal* ct, doublereal* g, doublereal* c__, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -1766,24 +1568,24 @@ ftnlen sname_len; static logical left, null; static char uplo[1]; static integer i__, j, m, n; - extern /* Subroutine */ int dmake_(); + extern /* Subroutine */ int dmake_(char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen); static doublereal alpha; static char diags[1]; - extern /* Subroutine */ int dmmch_(); + extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen); static logical isame[13]; static char sides[1]; static integer nargs; static logical reset; static char uplos[1]; - extern /* Subroutine */ void dprcn3_(); + extern /* Subroutine */ void dprcn3_(integer*, integer*, char*, integer*, char*, char*, char*, char*, integer*, integer*, doublereal*, integer*, integer*, ftnlen, ftnlen, ftnlen, ftnlen, ftnlen); static integer ia, na, nc, im, in, ms, ns; - extern logical lderes_(); - extern /* Subroutine */ int cdtrmm_(); + extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); + extern /* Subroutine */ void cdtrmm_(integer*, char*, char*, char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen, ftnlen); static char tranas[1], transa[1]; - extern /* Subroutine */ int cdtrsm_(); + extern /* Subroutine */ void cdtrsm_(integer*, char*, char*, char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen, ftnlen); static doublereal errmax; static integer laa, icd, lbb, lda, ldb; - extern logical lde_(); + extern logical lde_(doublereal*, doublereal*, integer*); static integer ics; static doublereal als; static integer ict, icu; @@ -2165,24 +1967,8 @@ L160: } /* dchk3_ */ -/* Subroutine */ void dprcn3_(nout, nc, sname, iorder, side, uplo, transa, - diag, m, n, alpha, lda, ldb, sname_len, side_len, uplo_len, - transa_len, diag_len) -integer *nout, *nc; -char *sname; -integer *iorder; -char *side, *uplo, *transa, *diag; -integer *m, *n; -doublereal *alpha; -integer *lda, *ldb; -ftnlen sname_len; -ftnlen side_len; -ftnlen uplo_len; -ftnlen transa_len; -ftnlen diag_len; +/* Subroutine */ void dprcn3_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, char* transa, char* diag, integer* m, integer* n, doublereal* alpha, integer* lda, integer* ldb, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len, ftnlen transa_len, ftnlen diag_len) { - /* Builtin functions */ - integer s_wsfe(), do_fio(), e_wsfe(); /* Local variables */ static char ca[14], cd[14], cs[14], cu[14], crc[14]; @@ -2219,21 +2005,7 @@ ftnlen diag_len; } /* dprcn3_ */ -/* Subroutine */ int dchk4_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs, - c__, cc, cs, ct, g, iorder, sname_len) -char *sname; -doublereal *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nalf; -doublereal *alf; -integer *nbet; -doublereal *bet; -integer *nmax; -doublereal *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int dchk4_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* nmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* b, doublereal* bb, doublereal* bs, doublereal* c__, doublereal* cc, doublereal* cs, doublereal* ct, doublereal* g, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -2244,8 +2016,6 @@ ftnlen sname_len; integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5; - /* Builtin functions */ - integer f_rew(), s_wsfe(), e_wsfe(), do_fio(); /* Local variables */ static doublereal beta; @@ -2255,23 +2025,23 @@ ftnlen sname_len; static logical tran, null; static char uplo[1]; static integer i__, j, k, n; - extern /* Subroutine */ int dmake_(); + extern /* Subroutine */ int dmake_(char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen); static doublereal alpha; - extern /* Subroutine */ int dmmch_(); + extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen); static logical isame[13]; static integer nargs; static logical reset; static char trans[1]; static logical upper; static char uplos[1]; - extern /* Subroutine */ void dprcn4_(); + extern /* Subroutine */ void dprcn4_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen); static integer ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns; - extern logical lderes_(); + extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); static doublereal errmax; - extern /* Subroutine */ int cdsyrk_(); + extern /* Subroutine */ void cdsyrk_(integer*, char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); static char transs[1]; static integer laa, lda, lcc, ldc; - extern logical lde_(); + extern logical lde_(doublereal*, doublereal*, integer*); static doublereal als; static integer ict, icu; static doublereal err; @@ -2586,23 +2356,8 @@ L130: } /* dchk4_ */ -/* Subroutine */ void dprcn4_(nout, nc, sname, iorder, uplo, transa, n, k, - alpha, lda, beta, ldc, sname_len, uplo_len, transa_len) -integer *nout, *nc; -char *sname; -integer *iorder; -char *uplo, *transa; -integer *n, *k; -doublereal *alpha; -integer *lda; -doublereal *beta; -integer *ldc; -ftnlen sname_len; -ftnlen uplo_len; -ftnlen transa_len; +/* Subroutine */ void dprcn4_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublereal* alpha, integer* lda, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len) { - /* Builtin functions */ - integer s_wsfe(), do_fio(), e_wsfe(); /* Local variables */ static char ca[14], cu[14], crc[14]; @@ -2629,21 +2384,7 @@ ftnlen transa_len; } /* dprcn4_ */ -/* Subroutine */ int dchk5_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nalf, alf, nbet, bet, nmax, ab, aa, as, bb, bs, - c__, cc, cs, ct, g, w, iorder, sname_len) -char *sname; -doublereal *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nalf; -doublereal *alf; -integer *nbet; -doublereal *bet; -integer *nmax; -doublereal *ab, *aa, *as, *bb, *bs, *c__, *cc, *cs, *ct, *g, *w; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int dchk5_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* nmax, doublereal* ab, doublereal* aa, doublereal* as, doublereal* bb, doublereal* bs, doublereal* c__, doublereal* cc, doublereal* cs, doublereal* ct, doublereal* g, doublereal* w, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -2653,8 +2394,6 @@ ftnlen sname_len; /* System generated locals */ integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; - /* Builtin functions */ - integer f_rew(), s_wsfe(), e_wsfe(), do_fio(); /* Local variables */ static integer jjab; @@ -2665,23 +2404,23 @@ ftnlen sname_len; static logical tran, null; static char uplo[1]; static integer i__, j, k, n; - extern /* Subroutine */ int dmake_(); + extern /* Subroutine */ int dmake_(char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen); static doublereal alpha; - extern /* Subroutine */ int dmmch_(); + extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen); static logical isame[13]; static integer nargs; static logical reset; static char trans[1]; static logical upper; static char uplos[1]; - extern /* Subroutine */ void dprcn5_(); + extern /* Subroutine */ void dprcn5_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublereal*, integer*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen); static integer ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns; - extern logical lderes_(); + extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); static doublereal errmax; static char transs[1]; static integer laa, lbb, lda, lcc, ldb, ldc; - extern logical lde_(); - extern /* Subroutine */ int cdsyr2k_(); + extern logical lde_(doublereal*, doublereal*, integer*); + extern /* Subroutine */ void cdsyr2k_(integer*, char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); static doublereal als; static integer ict, icu; static doublereal err; @@ -3048,23 +2787,8 @@ L160: } /* dchk5_ */ -/* Subroutine */ void dprcn5_(nout, nc, sname, iorder, uplo, transa, n, k, - alpha, lda, ldb, beta, ldc, sname_len, uplo_len, transa_len) -integer *nout, *nc; -char *sname; -integer *iorder; -char *uplo, *transa; -integer *n, *k; -doublereal *alpha; -integer *lda, *ldb; -doublereal *beta; -integer *ldc; -ftnlen sname_len; -ftnlen uplo_len; -ftnlen transa_len; +/* Subroutine */ void dprcn5_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublereal* alpha, integer* lda, integer* ldb, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len) { - /* Builtin functions */ - integer s_wsfe(), do_fio(), e_wsfe(); /* Local variables */ static char ca[14], cu[14], crc[14]; @@ -3091,25 +2815,13 @@ ftnlen transa_len; } /* dprcn5_ */ -/* Subroutine */ int dmake_(type__, uplo, diag, m, n, a, nmax, aa, lda, reset, - transl, type_len, uplo_len, diag_len) -char *type__, *uplo, *diag; -integer *m, *n; -doublereal *a; -integer *nmax; -doublereal *aa; -integer *lda; -logical *reset; -doublereal *transl; -ftnlen type_len; -ftnlen uplo_len; -ftnlen diag_len; +/* Subroutine */ int dmake_(char* type__, char* uplo, char* diag, integer* m, integer* n, doublereal* a, integer* nmax, doublereal* aa, integer* lda, logical* reset, doublereal* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ - extern doublereal dbeg_(); + extern doublereal dbeg_(logical*); static integer ibeg, iend; static logical unit; static integer i__, j; @@ -3241,25 +2953,7 @@ ftnlen diag_len; } /* dmake_ */ -/* Subroutine */ int dmmch_(transa, transb, m, n, kk, alpha, a, lda, b, ldb, - beta, c__, ldc, ct, g, cc, ldcc, eps, err, fatal, nout, mv, - transa_len, transb_len) -char *transa, *transb; -integer *m, *n, *kk; -doublereal *alpha, *a; -integer *lda; -doublereal *b; -integer *ldb; -doublereal *beta, *c__; -integer *ldc; -doublereal *ct, *g, *cc; -integer *ldcc; -doublereal *eps, *err; -logical *fatal; -integer *nout; -logical *mv; -ftnlen transa_len; -ftnlen transb_len; +/* Subroutine */ int dmmch_(char* transa, char* transb, integer* m, integer* n, integer* kk, doublereal* alpha, doublereal* a, integer* lda, doublereal* b, integer* ldb, doublereal* beta, doublereal* c__, integer* ldc, doublereal* ct, doublereal* g, doublereal* cc, integer* ldcc, doublereal* eps, doublereal* err, logical* fatal, integer* nout, logical* mv, ftnlen transa_len, ftnlen transb_len) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, cc_dim1, @@ -3267,8 +2961,7 @@ ftnlen transb_len; doublereal d__1, d__2; /* Builtin functions */ - double sqrt(); - integer s_wsfe(), e_wsfe(), do_fio(); + double sqrt(double); /* Local variables */ static doublereal erri; @@ -3432,9 +3125,7 @@ L150: } /* dmmch_ */ -logical lde_(ri, rj, lr) -doublereal *ri, *rj; -integer *lr; +logical lde_(doublereal* ri, doublereal* rj, integer* lr) { /* System generated locals */ integer i__1; @@ -3481,13 +3172,7 @@ L30: } /* lde_ */ -logical lderes_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len) -char *type__, *uplo; -integer *m, *n; -doublereal *aa, *as; -integer *lda; -ftnlen type_len; -ftnlen uplo_len; +logical lderes_(char* type__, char* uplo, integer* m, integer* n, doublereal* aa, doublereal* as, integer* lda, ftnlen type_len, ftnlen uplo_len) { /* System generated locals */ integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2; @@ -3576,8 +3261,7 @@ L80: } /* lderes_ */ -doublereal dbeg_(reset) -logical *reset; +doublereal dbeg_(logical* reset) { /* System generated locals */ doublereal ret_val; @@ -3629,8 +3313,7 @@ L10: } /* dbeg_ */ -doublereal ddiff_(x, y) -doublereal *x, *y; +doublereal ddiff_(doublereal* x, doublereal* y) { /* System generated locals */ doublereal ret_val; diff --git a/ctest/c_sblat1c.c b/ctest/c_sblat1c.c index 57e4707a9..1424e39b4 100644 --- a/ctest/c_sblat1c.c +++ b/ctest/c_sblat1c.c @@ -21,19 +21,6 @@ typedef float real; typedef double doublereal; typedef struct { real r, i; } complex; typedef struct { doublereal r, i; } doublecomplex; -#ifdef _MSC_VER -static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} -static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} -static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} -static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} -#else -static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} -static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} -static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} -static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} -#endif -#define pCf(z) (*_pCf(z)) -#define pCd(z) (*_pCd(z)) typedef int logical; typedef short int shortlogical; typedef char logical1; @@ -242,250 +229,6 @@ typedef struct Namelist Namelist; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif -#if 0 -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -#endif -#if 0 -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -#endif -#if 0 -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -#endif -/* -- translated by f2c (version 20000121). - You must link the resulting object file with the libraries: - -lf2c -lm (in that order) -*/ - /* Common Block Declarations */ @@ -393,7 +270,7 @@ static logical c_true = TRUE_; static integer c__0 = 0; static logical c_false = FALSE_; -/* Main program MAIN__() */ int main() +/* Main program MAIN__() */ int main(void) { /* Initialized data */ @@ -402,26 +279,25 @@ static logical c_false = FALSE_; /* System generated locals */ integer i__1, i__2, i__3; real r__1; - /* Builtin functions */ - integer s_rsle(), do_lio(), e_rsle(), f_open(), s_wsfe(), do_fio(), - e_wsfe(), s_wsle(), e_wsle(), s_rsfe(), e_rsfe(); - integer f_clos(); /* Local variables */ static integer nalf, idim[9]; static logical same; static integer nbet, ntra; static logical rewi; - extern /* Subroutine */ int schk1_(), schk2_(), schk3_(), schk4_(), - schk5_(); + extern /* Subroutine */ int schk1_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, real*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen); + extern /* Subroutine */ int schk2_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, real*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen); + extern /* Subroutine */ int schk3_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen); + extern /* Subroutine */ int schk4_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, real*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen); + extern /* Subroutine */ int schk5_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, real*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen); static real c__[4225] /* was [65][65] */, g[65]; static integer i__, j, n; static logical fatal; static real w[130]; - extern doublereal sdiff_(); + extern doublereal sdiff_(real*, real*); static logical trace; static integer nidim; - extern /* Subroutine */ int smmch_(); + extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen); static char snaps[32]; static integer isnum; static logical ltest[6]; @@ -433,9 +309,9 @@ static logical c_false = FALSE_; static logical rorder; static integer layout; static logical ltestt, tsterr; - extern /* Subroutine */ int cs3chke_(); + extern /* Subroutine */ void cs3chke_(char*, ftnlen); static real alf[7], bet[7]; - extern logical lse_(); + extern logical lse_(real*, real*, integer*); static real eps, err; char tmpchar; @@ -899,21 +775,7 @@ L230: } /* MAIN__ */ -/* Subroutine */ int schk1_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs, - c__, cc, cs, ct, g, iorder, sname_len) -char *sname; -real *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nalf; -real *alf; -integer *nbet; -real *bet; -integer *nmax; -real *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int schk1_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* nbet, real* bet, integer* nmax, real* a, real* aa, real* as, real* b, real* bb, real* bs, real* c__, real* cc, real* cs, real* ct, real* g, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -923,8 +785,6 @@ ftnlen sname_len; integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6; - /* Builtin functions */ - integer f_rew(), s_wsfe(), e_wsfe(), do_fio(); /* Local variables */ static real beta; @@ -936,18 +796,17 @@ ftnlen sname_len; static logical trana, tranb; static integer nargs; static logical reset; - extern /* Subroutine */ void sprcn1_(); - extern /* Subroutine */ int smake_(); - extern /* Subroutine */ int smmch_(); + extern /* Subroutine */ void sprcn1_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, integer*, real*, integer*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen); + extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen); + extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen); static integer ia, ib, ma, mb, na, nb, nc, ik, im, in, ks, ms, ns; - extern /* Subroutine */ int csgemm_(); + extern /* Subroutine */ void csgemm_(integer*, char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen, ftnlen); static char tranas[1], tranbs[1], transa[1], transb[1]; static real errmax; - extern logical lseres_(); - extern logical lse_(); + extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen); + extern logical lse_(real*, real*, integer*); static integer ica, icb, laa, lbb, lda, lcc, ldb, ldc; static real als, bls; - extern logical lse_(); static real err; /* Tests SGEMM. */ @@ -1278,23 +1137,8 @@ L130: -/* Subroutine */ void sprcn1_(nout, nc, sname, iorder, transa, transb, m, n, k, - alpha, lda, ldb, beta, ldc, sname_len, transa_len, transb_len) -integer *nout, *nc; -char *sname; -integer *iorder; -char *transa, *transb; -integer *m, *n, *k; -real *alpha; -integer *lda, *ldb; -real *beta; -integer *ldc; -ftnlen sname_len; -ftnlen transa_len; -ftnlen transb_len; +/* Subroutine */ void sprcn1_(integer* nout, integer* nc, char* sname, integer* iorder, char* transa, char* transb, integer* m, integer* n, integer* k, real* alpha, integer* lda, integer* ldb, real* beta, integer* ldc, ftnlen sname_len, ftnlen transa_len, ftnlen transb_len) { - /* Builtin functions */ - integer s_wsfe(), do_fio(), e_wsfe(); /* Local variables */ static char crc[14], cta[14], ctb[14]; @@ -1324,21 +1168,7 @@ ftnlen transb_len; } /* sprcn1_ */ -/* Subroutine */ int schk2_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs, - c__, cc, cs, ct, g, iorder, sname_len) -char *sname; -real *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nalf; -real *alf; -integer *nbet; -real *bet; -integer *nmax; -real *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int schk2_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* nbet, real* bet, integer* nmax, real* a, real* aa, real* as, real* b, real* bb, real* bs, real* c__, real* cc, real* cs, real* ct, real* g, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -1349,8 +1179,6 @@ ftnlen sname_len; integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5; - /* Builtin functions */ - integer f_rew(), s_wsfe(), e_wsfe(), do_fio(); /* Local variables */ static real beta; @@ -1368,15 +1196,15 @@ ftnlen sname_len; static char uplos[1]; static integer ia, ib, na, nc, im, in, ms, ns; static real errmax; - extern logical lseres_(); - extern /* Subroutine */ int cssymm_(); - extern void sprcn2_(); - extern int smake_(); - extern int smmch_(); + extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen); + extern /* Subroutine */ void cssymm_(integer*, char*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen, ftnlen); + extern void sprcn2_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, real*, integer*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen); + extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen); + extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen); static integer laa, lbb, lda, lcc, ldb, ldc, ics; static real als, bls; static integer icu; - extern logical lse_(); + extern logical lse_(real*, real*, integer*); static real err; /* Tests SSYMM. */ @@ -1685,23 +1513,8 @@ L120: } /* schk2_ */ -/* Subroutine */ void sprcn2_(nout, nc, sname, iorder, side, uplo, m, n, alpha, - lda, ldb, beta, ldc, sname_len, side_len, uplo_len) -integer *nout, *nc; -char *sname; -integer *iorder; -char *side, *uplo; -integer *m, *n; -real *alpha; -integer *lda, *ldb; -real *beta; -integer *ldc; -ftnlen sname_len; -ftnlen side_len; -ftnlen uplo_len; +/* Subroutine */ void sprcn2_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, integer* m, integer* n, real* alpha, integer* lda, integer* ldb, real* beta, integer* ldc, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len) { - /* Builtin functions */ - integer s_wsfe(), do_fio(), e_wsfe(); /* Local variables */ static char cs[14], cu[14], crc[14]; @@ -1726,19 +1539,7 @@ ftnlen uplo_len; } /* sprcn2_ */ -/* Subroutine */ int schk3_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nalf, alf, nmax, a, aa, as, b, bb, bs, ct, g, c__, - iorder, sname_len) -char *sname; -real *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nalf; -real *alf; -integer *nmax; -real *a, *aa, *as, *b, *bb, *bs, *ct, *g, *c__; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int schk3_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* nmax, real* a, real* aa, real* as, real* b, real* bb, real* bs, real* ct, real* g, real* c__, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -1751,8 +1552,6 @@ ftnlen sname_len; integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5; - /* Builtin functions */ - integer f_rew(), s_wsfe(), e_wsfe(), do_fio(); /* Local variables */ static char diag[1]; @@ -1769,18 +1568,19 @@ ftnlen sname_len; static integer nargs; static logical reset; static char uplos[1]; - extern /* Subroutine */ void sprcn3_(); + extern /* Subroutine */ void sprcn3_(integer*, integer*, char*, integer*, char*, char*, char*, char*, integer*, integer*, real*, integer*, integer*, ftnlen , ftnlen, ftnlen, ftnlen, ftnlen); static integer ia, na, nc, im, in, ms, ns; static char tranas[1], transa[1]; static real errmax; - extern int smake_(); - extern int smmch_(); - extern logical lseres_(); - extern /* Subroutine */ int cstrmm_(), cstrsm_(); + extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen); + extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen); + extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen); + extern /* Subroutine */ void cstrmm_(integer*, char*, char*, char*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen, ftnlen); + extern /* Subroutine */ void cstrsm_(integer*, char*, char*, char*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen, ftnlen); static integer laa, icd, lbb, lda, ldb, ics; static real als; static integer ict, icu; - extern logical lse_(); + extern logical lse_(real*, real*, integer*); static real err; /* Tests STRMM and STRSM. */ @@ -2155,24 +1955,8 @@ L160: } /* schk3_ */ -/* Subroutine */ void sprcn3_(nout, nc, sname, iorder, side, uplo, transa, - diag, m, n, alpha, lda, ldb, sname_len, side_len, uplo_len, - transa_len, diag_len) -integer *nout, *nc; -char *sname; -integer *iorder; -char *side, *uplo, *transa, *diag; -integer *m, *n; -real *alpha; -integer *lda, *ldb; -ftnlen sname_len; -ftnlen side_len; -ftnlen uplo_len; -ftnlen transa_len; -ftnlen diag_len; +/* Subroutine */ void sprcn3_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, char* transa, char* diag, integer* m, integer* n, real* alpha, integer* lda, integer* ldb, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len, ftnlen transa_len, ftnlen diag_len) { - /* Builtin functions */ - integer s_wsfe(), do_fio(), e_wsfe(); /* Local variables */ static char ca[14], cd[14], cs[14], cu[14], crc[14]; @@ -2210,21 +1994,7 @@ ftnlen diag_len; } /* sprcn3_ */ -/* Subroutine */ int schk4_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs, - c__, cc, cs, ct, g, iorder, sname_len) -char *sname; -real *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nalf; -real *alf; -integer *nbet; -real *bet; -integer *nmax; -real *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int schk4_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* nbet, real* bet, integer* nmax, real* a, real* aa, real* as, real* b, real* bb, real* bs, real* c__, real* cc, real* cs, real* ct, real* g, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -2235,8 +2005,6 @@ ftnlen sname_len; integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5; - /* Builtin functions */ - integer f_rew(), s_wsfe(), e_wsfe(), do_fio(); /* Local variables */ static real beta; @@ -2253,18 +2021,18 @@ ftnlen sname_len; static char trans[1]; static logical upper; static char uplos[1]; - extern /* Subroutine */ void sprcn4_(); - extern /* Subroutine */ int smake_(); - extern /* Subroutine */ int smmch_(); + extern /* Subroutine */ void sprcn4_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen); + extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen); + extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen); static integer ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns; static real errmax; - extern logical lseres_(); + extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen); static char transs[1]; - extern /* Subroutine */ int cssyrk_(); + extern /* Subroutine */ void cssyrk_(integer*, char*, char*, integer*, integer*, real*, real*, integer*, real*, real*, integer*, ftnlen, ftnlen); static integer laa, lda, lcc, ldc; static real als; static integer ict, icu; - extern logical lse_(); + extern logical lse_(real*, real*, integer*); static real err; /* Tests SSYRK. */ @@ -2575,23 +2343,8 @@ L130: } /* schk4_ */ -/* Subroutine */ void sprcn4_(nout, nc, sname, iorder, uplo, transa, n, k, - alpha, lda, beta, ldc, sname_len, uplo_len, transa_len) -integer *nout, *nc; -char *sname; -integer *iorder; -char *uplo, *transa; -integer *n, *k; -real *alpha; -integer *lda; -real *beta; -integer *ldc; -ftnlen sname_len; -ftnlen uplo_len; -ftnlen transa_len; +/* Subroutine */ void sprcn4_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, real* alpha, integer* lda, real* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len) { - /* Builtin functions */ - integer s_wsfe(), do_fio(), e_wsfe(); /* Local variables */ static char ca[14], cu[14], crc[14]; @@ -2619,21 +2372,7 @@ ftnlen transa_len; } /* sprcn4_ */ -/* Subroutine */ int schk5_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nalf, alf, nbet, bet, nmax, ab, aa, as, bb, bs, - c__, cc, cs, ct, g, w, iorder, sname_len) -char *sname; -real *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nalf; -real *alf; -integer *nbet; -real *bet; -integer *nmax; -real *ab, *aa, *as, *bb, *bs, *c__, *cc, *cs, *ct, *g, *w; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int schk5_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* nbet, real* bet, integer* nmax, real* ab, real* aa, real* as, real* bb, real* bs, real* c__, real* cc, real* cs, real* ct, real* g, real* w, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -2643,8 +2382,6 @@ ftnlen sname_len; /* System generated locals */ integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; - /* Builtin functions */ - integer f_rew(), s_wsfe(), e_wsfe(), do_fio(); /* Local variables */ static integer jjab; @@ -2663,18 +2400,18 @@ ftnlen sname_len; static logical upper; static char uplos[1]; static integer ia, ib; - extern /* Subroutine */ void sprcn5_(); + extern /* Subroutine */ void sprcn5_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, real*, integer*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen); static integer jc, ma, na, nc, ik, in, jj, lj, ks, ns; static real errmax; - extern logical lseres_(); - extern int smake_(); + extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen); + extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen); static char transs[1]; static integer laa, lbb, lda, lcc, ldb, ldc; static real als; static integer ict, icu; - extern /* Subroutine */ int cssyr2k_(); - extern logical lse_(); - extern int smmch_(); + extern /* Subroutine */ void cssyr2k_(integer*, char*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen, ftnlen); + extern logical lse_(real*, real*, integer*); + extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen); static real err; /* Tests SSYR2K. */ @@ -3037,23 +2774,8 @@ L160: } /* schk5_ */ -/* Subroutine */ void sprcn5_(nout, nc, sname, iorder, uplo, transa, n, k, - alpha, lda, ldb, beta, ldc, sname_len, uplo_len, transa_len) -integer *nout, *nc; -char *sname; -integer *iorder; -char *uplo, *transa; -integer *n, *k; -real *alpha; -integer *lda, *ldb; -real *beta; -integer *ldc; -ftnlen sname_len; -ftnlen uplo_len; -ftnlen transa_len; +/* Subroutine */ void sprcn5_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, real* alpha, integer* lda, integer* ldb, real* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len) { - /* Builtin functions */ - integer s_wsfe(), do_fio(), e_wsfe(); /* Local variables */ static char ca[14], cu[14], crc[14]; @@ -3081,19 +2803,7 @@ ftnlen transa_len; } /* sprcn5_ */ -/* Subroutine */ int smake_(type__, uplo, diag, m, n, a, nmax, aa, lda, reset, - transl, type_len, uplo_len, diag_len) -char *type__, *uplo, *diag; -integer *m, *n; -real *a; -integer *nmax; -real *aa; -integer *lda; -logical *reset; -real *transl; -ftnlen type_len; -ftnlen uplo_len; -ftnlen diag_len; +/* Subroutine */ int smake_(char* type__, char* uplo, char* diag, integer* m, integer* n, real* a, integer* nmax, real* aa, integer* lda, logical* reset, real* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; @@ -3102,7 +2812,7 @@ ftnlen diag_len; /* Local variables */ static integer ibeg, iend; - extern doublereal sbeg_(); + extern doublereal sbeg_(logical*); static logical unit; static integer i__, j; static logical lower, upper, gen, tri, sym; @@ -3233,25 +2943,7 @@ ftnlen diag_len; } /* smake_ */ -/* Subroutine */ int smmch_(transa, transb, m, n, kk, alpha, a, lda, b, ldb, - beta, c__, ldc, ct, g, cc, ldcc, eps, err, fatal, nout, mv, - transa_len, transb_len) -char *transa, *transb; -integer *m, *n, *kk; -real *alpha, *a; -integer *lda; -real *b; -integer *ldb; -real *beta, *c__; -integer *ldc; -real *ct, *g, *cc; -integer *ldcc; -real *eps, *err; -logical *fatal; -integer *nout; -logical *mv; -ftnlen transa_len; -ftnlen transb_len; +/* Subroutine */ int smmch_(char* transa, char* transb, integer* m, integer* n, integer* kk, real* alpha, real* a, integer* lda, real* b, integer* ldb, real* beta, real* c__, integer* ldc, real* ct, real* g, real* cc, integer* ldcc, real* eps, real* err, logical* fatal, integer* nout, logical* mv, ftnlen transa_len, ftnlen transb_len) { /* System generated locals */ @@ -3260,8 +2952,7 @@ ftnlen transb_len; real r__1, r__2; /* Builtin functions */ - double sqrt(); - integer s_wsfe(), e_wsfe(), do_fio(); + double sqrt(double); /* Local variables */ static real erri; @@ -3426,9 +3117,7 @@ L150: } /* smmch_ */ -logical lse_(ri, rj, lr) -real *ri, *rj; -integer *lr; +logical lse_(real* ri, real* rj, integer* lr) { /* System generated locals */ integer i__1; @@ -3475,13 +3164,7 @@ L30: } /* lse_ */ -logical lseres_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len) -char *type__, *uplo; -integer *m, *n; -real *aa, *as; -integer *lda; -ftnlen type_len; -ftnlen uplo_len; +logical lseres_(char* type__, char* uplo, integer* m, integer* n, real* aa, real* as, integer* lda, ftnlen type_len, ftnlen uplo_len) { /* System generated locals */ integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2; @@ -3572,8 +3255,7 @@ L80: } /* lseres_ */ -doublereal sbeg_(reset) -logical *reset; +doublereal sbeg_(logical* reset) { /* System generated locals */ real ret_val; @@ -3625,8 +3307,7 @@ L10: } /* sbeg_ */ -doublereal sdiff_(x, y) -real *x, *y; +doublereal sdiff_(real* x, real* y) { /* System generated locals */ real ret_val; diff --git a/ctest/c_zblat1c.c b/ctest/c_zblat1c.c index d5b080633..4761e63d7 100644 --- a/ctest/c_zblat1c.c +++ b/ctest/c_zblat1c.c @@ -242,250 +242,6 @@ typedef struct Namelist Namelist; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif -#if 0 -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -#endif -#if 0 -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -#endif -/* -- translated by f2c (version 20000121). - You must link the resulting object file with the libraries: - -lf2c -lm (in that order) -*/ - /* Common Block Declarations */ @@ -396,7 +273,7 @@ static integer c_n1 = -1; static integer c__0 = 0; static logical c_false = FALSE_; -/* Main program */ int main() +/* Main program */ int main(void) { /* Initialized data */ @@ -414,19 +291,23 @@ static logical c_false = FALSE_; static logical same; static integer ninc, nbet, ntra; static logical rewi; - extern /* Subroutine */ int zchk1_(), zchk2_(), zchk3_(), zchk4_(), - zchk5_(), zchk6_(); + extern /* Subroutine */ int zchk1_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, integer*, ftnlen); + extern /* Subroutine */ int zchk2_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, integer*, ftnlen); + extern /* Subroutine */ int zchk3_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen); + extern /* Subroutine */ int zchk4_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen); + extern /* Subroutine */ int zchk5_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen); + extern /* Subroutine */ int zchk6_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen); static doublecomplex a[4225] /* was [65][65] */; static doublereal g[65]; static integer i__, j; - extern doublereal ddiff_(); + extern doublereal ddiff_(doublereal*, doublereal*); static integer n; static logical fatal; static doublecomplex x[65], y[65], z__[130]; static logical trace; static integer nidim; static char snaps[32], trans[1]; - extern /* Subroutine */ int zmvch_(); + extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); static integer isnum; static logical ltest[17]; static doublecomplex aa[4225]; @@ -441,12 +322,12 @@ static logical c_false = FALSE_; static logical rorder; static integer layout; static logical ltestt, tsterr; - extern /* Subroutine */ int cz2chke_(); + extern /* Subroutine */ void cz2chke_(char*, ftnlen); static doublecomplex alf[7]; static integer inc[7], nkb; static doublecomplex bet[7]; static doublereal eps, err; - extern logical lze_(); + extern logical lze_(doublecomplex*, doublecomplex*, integer*); char tmpchar; /* Test program for the DOUBLE PRECISION COMPLEX Level 2 Blas. */ @@ -984,22 +865,7 @@ L240: } /* MAIN__ */ -/* Subroutine */ int zchk1_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax, - incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len) -char *sname; -doublereal *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nkb, *kb, *nalf; -doublecomplex *alf; -integer *nbet; -doublecomplex *bet; -integer *ninc, *inc, *nmax, *incmax; -doublecomplex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt; -doublereal *g; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int zchk1_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* y, doublecomplex* yy, doublecomplex* ys, doublecomplex* yt, doublereal* g, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -1018,27 +884,27 @@ ftnlen sname_len; static integer i__, m, n; static doublecomplex alpha; static logical isame[13]; - extern /* Subroutine */ int zmake_(); + extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen); static integer nargs; static logical reset; static integer incxs, incys; static char trans[1]; - extern /* Subroutine */ int zmvch_(); + extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); static integer ia, ib, ic; static logical banded; static integer nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns; - extern /* Subroutine */ int czgbmv_(); + extern /* Subroutine */ void czgbmv_(integer*, char*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen); static char ctrans[14]; - extern /* Subroutine */ int czgemv_(); + extern /* Subroutine */ void czgemv_(integer*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen); static doublereal errmax; static doublecomplex transl; - extern logical lzeres_(); + extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); static char transs[1]; static integer laa, lda; static doublecomplex als, bls; static doublereal err; static integer iku, kls; - extern logical lze_(); + extern logical lze_(doublecomplex*, doublecomplex*, integer*); static integer kus; @@ -1451,22 +1317,7 @@ L140: } /* zchk1_ */ -/* Subroutine */ int zchk2_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax, - incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len) -char *sname; -doublereal *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nkb, *kb, *nalf; -doublecomplex *alf; -integer *nbet; -doublecomplex *bet; -integer *ninc, *inc, *nmax, *incmax; -doublecomplex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt; -doublereal *g; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int zchk2_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* y, doublecomplex* yy, doublecomplex* ys, doublecomplex* yt, doublereal* g, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -1486,27 +1337,28 @@ ftnlen sname_len; static integer i__, k, n; static doublecomplex alpha; static logical isame[13]; - extern /* Subroutine */ int zmake_(); + extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen); static integer nargs; static logical reset; static char cuplo[14]; static integer incxs, incys; - extern /* Subroutine */ int zmvch_(); + extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); static char uplos[1]; static integer ia, ib, ic; static logical banded; static integer nc, ik, in; static logical packed; static integer nk, ks, ix, iy, ns, lx, ly; - extern /* Subroutine */ int czhbmv_(), czhemv_(); + extern /* Subroutine */ void czhbmv_(integer*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen); + extern /* Subroutine */ void czhemv_(integer*, char*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen); static doublereal errmax; static doublecomplex transl; - extern logical lzeres_(); - extern /* Subroutine */ int czhpmv_(); + extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); + extern /* Subroutine */ void czhpmv_(integer*, char*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen); static integer laa, lda; static doublecomplex als, bls; static doublereal err; - extern logical lze_(); + extern logical lze_(doublecomplex*, doublecomplex*, integer*); /* Tests CHEMV, CHBMV and CHPMV. */ @@ -1909,19 +1761,7 @@ L130: } /* zchk2_ */ -/* Subroutine */ int zchk3_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nkb, kb, ninc, inc, nmax, incmax, a, aa, as, x, - xx, xs, xt, g, z__, iorder, sname_len) -char *sname; -doublereal *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nkb, *kb, *ninc, *inc, *nmax, *incmax; -doublecomplex *a, *aa, *as, *x, *xx, *xs, *xt; -doublereal *g; -doublecomplex *z__; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int zchk3_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* xt, doublereal* g, doublecomplex* z__, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -1942,13 +1782,13 @@ ftnlen sname_len; static integer i__, k, n; static char diags[1]; static logical isame[13]; - extern /* Subroutine */ int zmake_(); + extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen); static integer nargs; static logical reset; static char cuplo[14]; static integer incxs; static char trans[1]; - extern /* Subroutine */ int zmvch_(); + extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); static char uplos[1]; static logical banded; static integer nc, ik, in; @@ -1957,14 +1797,17 @@ ftnlen sname_len; static char ctrans[14]; static doublereal errmax; static doublecomplex transl; - extern logical lzeres_(); - extern /* Subroutine */ int cztbmv_(); + extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); + extern /* Subroutine */ void cztbmv_(integer*, char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen); static char transs[1]; - extern /* Subroutine */ int cztbsv_(), cztpmv_(), cztrmv_(), cztpsv_(), - cztrsv_(); + extern /* Subroutine */ void cztbsv_(integer*, char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen); + extern /* Subroutine */ void cztpmv_(integer*, char*, char*, char*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen); + extern /* Subroutine */ void cztpsv_(integer*, char*, char*, char*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen); + extern /* Subroutine */ void cztrmv_(integer*, char*, char*, char*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen); + extern /* Subroutine */ void cztrsv_(integer*, char*, char*, char*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen); static integer laa, icd, lda, ict, icu; static doublereal err; - extern logical lze_(); + extern logical lze_(doublecomplex*, doublecomplex*, integer*); @@ -2422,21 +2265,7 @@ L130: } /* zchk3_ */ -/* Subroutine */ int zchk4_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x, - xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len) -char *sname; -doublereal *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nalf; -doublecomplex *alf; -integer *ninc, *inc, *nmax, *incmax; -doublecomplex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt; -doublereal *g; -doublecomplex *z__; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int zchk4_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* y, doublecomplex* yy, doublecomplex* ys, doublecomplex* yt, doublereal* g, doublecomplex* z__, integer* iorder, ftnlen sname_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; @@ -2450,21 +2279,21 @@ ftnlen sname_len; static integer i__, j, m, n; static doublecomplex alpha, w[1]; static logical isame[13]; - extern /* Subroutine */ int zmake_(); + extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen); static integer nargs; static logical reset; static integer incxs, incys; - extern /* Subroutine */ int zmvch_(); + extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); static integer ia, nc, nd, im, in, ms, ix, iy, ns, lx, ly; - extern /* Subroutine */ int czgerc_(); + extern /* Subroutine */ void czgerc_(integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, integer*); static doublereal errmax; - extern /* Subroutine */ int czgeru_(); + extern /* Subroutine */ void czgeru_(integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, integer*); static doublecomplex transl; - extern logical lzeres_(); + extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); static integer laa, lda; static doublecomplex als; static doublereal err; - extern logical lze_(); + extern logical lze_(doublecomplex*, doublecomplex*, integer*); @@ -2793,21 +2622,7 @@ L150: } /* zchk4_ */ -/* Subroutine */ int zchk5_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x, - xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len) -char *sname; -doublereal *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nalf; -doublecomplex *alf; -integer *ninc, *inc, *nmax, *incmax; -doublecomplex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt; -doublereal *g; -doublecomplex *z__; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int zchk5_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* y, doublecomplex* yy, doublecomplex* ys, doublecomplex* yt, doublereal* g, doublecomplex* z__, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -2827,13 +2642,14 @@ ftnlen sname_len; static integer i__, j, n; static doublecomplex alpha, w[1]; static logical isame[13]; - extern /* Subroutine */ int zmake_(); + extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen); static integer nargs; - extern /* Subroutine */ int czher_(); + extern /* Subroutine */ void czher_(integer*, char*, integer*, doublereal*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen); static logical reset; static char cuplo[14]; static integer incxs; - extern /* Subroutine */ int czhpr_(), zmvch_(); + extern /* Subroutine */ void czhpr_(integer*, char*, integer*, doublereal*, doublecomplex*, integer*, doublecomplex*, ftnlen); + extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); static logical upper; static char uplos[1]; static integer ia, ja, ic, nc, jj, lj, in; @@ -2841,10 +2657,10 @@ ftnlen sname_len; static integer ix, ns, lx; static doublereal ralpha, errmax; static doublecomplex transl; - extern logical lzeres_(); + extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); static integer laa, lda; static doublereal err; - extern logical lze_(); + extern logical lze_(doublecomplex*, doublecomplex*, integer*); /* Tests ZHER and ZHPR. */ @@ -3167,21 +2983,7 @@ L130: } /* zchk5_ */ -/* Subroutine */ int zchk6_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x, - xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len) -char *sname; -doublereal *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nalf; -doublecomplex *alf; -integer *ninc, *inc, *nmax, *incmax; -doublecomplex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt; -doublereal *g; -doublecomplex *z__; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int zchk6_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* y, doublecomplex* yy, doublecomplex* ys, doublecomplex* yt, doublereal* g, doublecomplex* z__, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -3201,25 +3003,26 @@ ftnlen sname_len; static integer i__, j, n; static doublecomplex alpha, w[2]; static logical isame[13]; - extern /* Subroutine */ int zmake_(); + extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen); static integer nargs; static logical reset; static char cuplo[14]; static integer incxs, incys; - extern /* Subroutine */ int zmvch_(); + extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); static logical upper; static char uplos[1]; - extern /* Subroutine */ int czher2_(), czhpr2_(); + extern /* Subroutine */ void czher2_(integer*, char*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen); + extern /* Subroutine */ void czhpr2_(integer*, char*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, ftnlen); static integer ia, ja, ic, nc, jj, lj, in; static logical packed; static integer ix, iy, ns, lx, ly; static doublereal errmax; static doublecomplex transl; - extern logical lzeres_(); + extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); static integer laa, lda; static doublecomplex als; static doublereal err; - extern logical lze_(); + extern logical lze_(doublecomplex*, doublecomplex*, integer*); /* Tests ZHER2 and ZHPR2. */ @@ -3604,24 +3407,7 @@ L170: } /* zchk6_ */ -/* Subroutine */ int zmvch_(trans, m, n, alpha, a, nmax, x, incx, beta, y, - incy, yt, g, yy, eps, err, fatal, nout, mv, trans_len) -char *trans; -integer *m, *n; -doublecomplex *alpha, *a; -integer *nmax; -doublecomplex *x; -integer *incx; -doublecomplex *beta, *y; -integer *incy; -doublecomplex *yt; -doublereal *g; -doublecomplex *yy; -doublereal *eps, *err; -logical *fatal; -integer *nout; -logical *mv; -ftnlen trans_len; +/* Subroutine */ int zmvch_(char* trans, integer* m, integer* n, doublecomplex* alpha, doublecomplex* a, integer* nmax, doublecomplex* x, integer* incx, doublecomplex* beta, doublecomplex* y, integer* incy, doublecomplex* yt, doublereal* g, doublecomplex* yy, doublereal* eps, doublereal* err, logical* fatal, integer* nout, logical* mv, ftnlen trans_len) { /* System generated locals */ @@ -3819,9 +3605,7 @@ L80: } /* zmvch_ */ -logical lze_(ri, rj, lr) -doublecomplex *ri, *rj; -integer *lr; +logical lze_(doublecomplex* ri, doublecomplex* rj, integer* lr) { /* System generated locals */ integer i__1, i__2, i__3; @@ -3868,13 +3652,7 @@ L30: } /* lze_ */ -logical lzeres_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len) -char *type__, *uplo; -integer *m, *n; -doublecomplex *aa, *as; -integer *lda; -ftnlen type_len; -ftnlen uplo_len; +logical lzeres_(char* type__, char* uplo, integer* m, integer* n, doublecomplex* aa, doublecomplex* as, integer* lda, ftnlen type_len, ftnlen uplo_len) { /* System generated locals */ integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2, i__3, i__4; @@ -3967,9 +3745,7 @@ L80: } /* lzeres_ */ -/* Double Complex */ VOID zbeg_( ret_val, reset) -doublecomplex * ret_val; -logical *reset; +/* Double Complex */ VOID zbeg_( doublecomplex* ret_val, logical* reset) { /* System generated locals */ doublereal d__1, d__2; @@ -4030,8 +3806,7 @@ L10: } /* zbeg_ */ -doublereal ddiff_(x, y) -doublereal *x, *y; +doublereal ddiff_(doublereal* x, doublereal* y) { /* System generated locals */ doublereal ret_val; @@ -4051,19 +3826,7 @@ doublereal *x, *y; } /* ddiff_ */ -/* Subroutine */ int zmake_(type__, uplo, diag, m, n, a, nmax, aa, lda, kl, - ku, reset, transl, type_len, uplo_len, diag_len) -char *type__, *uplo, *diag; -integer *m, *n; -doublecomplex *a; -integer *nmax; -doublecomplex *aa; -integer *lda, *kl, *ku; -logical *reset; -doublecomplex *transl; -ftnlen type_len; -ftnlen uplo_len; -ftnlen diag_len; +/* Subroutine */ int zmake_(char* type__, char* uplo, char* diag, integer* m, integer* n, doublecomplex* a, integer* nmax, doublecomplex* aa, integer* lda, integer* kl, integer* ku, logical* reset, doublecomplex* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; @@ -4072,7 +3835,7 @@ ftnlen diag_len; /* Local variables */ static integer ibeg, iend, ioff; - extern /* Double Complex */ VOID zbeg_(); + extern /* Double Complex */ VOID zbeg_(doublecomplex*, logical*); static logical unit; static integer i__, j; static logical lower; diff --git a/ctest/c_zblat3c.c b/ctest/c_zblat3c.c index eca2c3ff6..6025c0052 100644 --- a/ctest/c_zblat3c.c +++ b/ctest/c_zblat3c.c @@ -22,14 +22,11 @@ typedef double doublereal; typedef struct { real r, i; } complex; typedef struct { doublereal r, i; } doublecomplex; #ifdef _MSC_VER -static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} -static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} #else static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} -static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} #endif #define pCf(z) (*_pCf(z)) @@ -242,124 +239,7 @@ typedef struct Namelist Namelist; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif -#if 0 -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -#endif + /* Common Block Declarations */ @@ -388,7 +268,7 @@ static logical c_true = TRUE_; static integer c__0 = 0; static logical c_false = FALSE_; -/* Main program MAIN__() */ int main() +/* Main program MAIN__() */ int main(void) { /* Initialized data */ @@ -400,26 +280,29 @@ static logical c_false = FALSE_; doublereal d__1; /* Builtin functions */ - integer s_rsle(), do_lio(), e_rsle(), f_open(), s_wsfe(), do_fio(), - e_wsfe(), s_wsle(), e_wsle(), s_rsfe(), e_rsfe(); + integer s_rsle(void), do_lio(void), e_rsle(void), f_open(void), s_wsfe(void), do_fio(void), + e_wsfe(void), s_wsle(void), e_wsle(void), s_rsfe(void), e_rsfe(void); /* Local variables */ static integer nalf, idim[9]; static logical same; static integer nbet, ntra; static logical rewi; - extern /* Subroutine */ int zchk1_(), zchk2_(), zchk3_(), zchk4_(), - zchk5_(); + extern /* Subroutine */ int zchk1_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, integer*, ftnlen); + extern /* Subroutine */ int zchk2_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, integer*, ftnlen); + extern /* Subroutine */ int zchk3_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen); + extern /* Subroutine */ int zchk4_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, integer*, ftnlen); + extern /* Subroutine */ int zchk5_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen); static doublecomplex c__[4225] /* was [65][65] */; static doublereal g[65]; static integer i__, j; - extern doublereal ddiff_(); + extern doublereal ddiff_(doublereal*, doublereal*); static integer n; static logical fatal; static doublecomplex w[130]; static logical trace; static integer nidim; - extern /* Subroutine */ int zmmch_(); + extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen); static char snaps[32]; static integer isnum; static logical ltest[9]; @@ -431,10 +314,10 @@ static logical c_false = FALSE_; static logical rorder; static integer layout; static logical ltestt, tsterr; - extern /* Subroutine */ int cz3chke_(); + extern /* Subroutine */ int cz3chke_(char*, ftnlen); static doublecomplex alf[7], bet[7]; static doublereal eps, err; - extern logical lze_(); + extern logical lze_(doublecomplex*, doublecomplex*, integer*); char tmpchar; /* Test program for the COMPLEX*16 Level 3 Blas. */ @@ -924,22 +807,7 @@ L230: } /* MAIN__ */ -/* Subroutine */ int zchk1_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs, - c__, cc, cs, ct, g, iorder, sname_len) -char *sname; -doublereal *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nalf; -doublecomplex *alf; -integer *nbet; -doublecomplex *bet; -integer *nmax; -doublecomplex *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct; -doublereal *g; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int zchk1_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* nmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* b, doublecomplex* bb, doublecomplex* bs, doublecomplex* c__, doublecomplex* cc, doublecomplex* cs, doublecomplex* ct, doublereal* g, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -956,21 +824,21 @@ ftnlen sname_len; static integer i__, k, m, n; static doublecomplex alpha; static logical isame[13], trana, tranb; - extern /* Subroutine */ int zmake_(); + extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen); static integer nargs; - extern /* Subroutine */ int zmmch_(); + extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen); static logical reset; static integer ia, ib; - extern /* Subroutine */ int zprcn1_(); + extern /* Subroutine */ int zprcn1_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, integer*, doublecomplex*, integer*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen); static integer ma, mb, na, nb, nc, ik, im, in, ks, ms, ns; - extern /* Subroutine */ int czgemm_(); + extern /* Subroutine */ void czgemm_(integer*, char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); static char tranas[1], tranbs[1], transa[1], transb[1]; static doublereal errmax; - extern logical lzeres_(); + extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); static integer ica, icb, laa, lbb, lda, lcc, ldb, ldc; static doublecomplex als, bls; static doublereal err; - extern logical lze_(); + extern logical lze_(doublecomplex*, doublecomplex*, integer*); /* Tests ZGEMM. */ @@ -1313,20 +1181,7 @@ L130: } /* zchk1_ */ -/* Subroutine */ int zprcn1_(nout, nc, sname, iorder, transa, transb, m, n, k, - alpha, lda, ldb, beta, ldc, sname_len, transa_len, transb_len) -integer *nout, *nc; -char *sname; -integer *iorder; -char *transa, *transb; -integer *m, *n, *k; -doublecomplex *alpha; -integer *lda, *ldb; -doublecomplex *beta; -integer *ldc; -ftnlen sname_len; -ftnlen transa_len; -ftnlen transb_len; +/* Subroutine */ int zprcn1_(integer* nout, integer* nc, char* sname, integer* iorder, char* transa, char* transb, integer* m, integer* n, integer* k, doublecomplex* alpha, integer* lda, integer* ldb, doublecomplex* beta, integer* ldc, ftnlen sname_len, ftnlen transa_len, ftnlen transb_len) { /* Local variables */ static char crc[14], cta[14], ctb[14]; @@ -1357,22 +1212,7 @@ return 0; } /* zprcn1_ */ -/* Subroutine */ int zchk2_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs, - c__, cc, cs, ct, g, iorder, sname_len) -char *sname; -doublereal *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nalf; -doublecomplex *alf; -integer *nbet; -doublecomplex *bet; -integer *nmax; -doublecomplex *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct; -doublereal *g; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int zchk2_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* nmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* b, doublecomplex* bb, doublecomplex* bs, doublecomplex* c__, doublecomplex* cc, doublecomplex* cs, doublecomplex* ct, doublereal* g, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -1394,23 +1234,23 @@ ftnlen sname_len; static doublecomplex alpha; static logical isame[13]; static char sides[1]; - extern /* Subroutine */ int zmake_(); + extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen); static integer nargs; - extern /* Subroutine */ int zmmch_(); + extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen); static logical reset; static char uplos[1]; static integer ia, ib; - extern /* Subroutine */ int zprcn2_(); + extern /* Subroutine */ int zprcn2_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublecomplex*, integer*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen); static integer na, nc, im, in, ms, ns; - extern /* Subroutine */ int czhemm_(); + extern /* Subroutine */ void czhemm_(integer*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); static doublereal errmax; - extern logical lzeres_(); - extern /* Subroutine */ int czsymm_(); + extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); + extern /* Subroutine */ void czsymm_(integer*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); static integer laa, lbb, lda, lcc, ldb, ldc, ics; static doublecomplex als, bls; static integer icu; static doublereal err; - extern logical lze_(); + extern logical lze_(doublecomplex*, doublecomplex*, integer*); /* Tests ZHEMM and ZSYMM. */ @@ -1737,20 +1577,7 @@ L120: } /* zchk2_ */ -/* Subroutine */ int zprcn2_(nout, nc, sname, iorder, side, uplo, m, n, alpha, - lda, ldb, beta, ldc, sname_len, side_len, uplo_len) -integer *nout, *nc; -char *sname; -integer *iorder; -char *side, *uplo; -integer *m, *n; -doublecomplex *alpha; -integer *lda, *ldb; -doublecomplex *beta; -integer *ldc; -ftnlen sname_len; -ftnlen side_len; -ftnlen uplo_len; +/* Subroutine */ int zprcn2_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, integer* m, integer* n, doublecomplex* alpha, integer* lda, integer* ldb, doublecomplex* beta, integer* ldc, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len) { /* Local variables */ static char cs[14], cu[14], crc[14]; @@ -1777,21 +1604,7 @@ return 0; } /* zprcn2_ */ -/* Subroutine */ int zchk3_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nalf, alf, nmax, a, aa, as, b, bb, bs, ct, g, c__, - iorder, sname_len) -char *sname; -doublereal *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nalf; -doublecomplex *alf; -integer *nmax; -doublecomplex *a, *aa, *as, *b, *bb, *bs, *ct; -doublereal *g; -doublecomplex *c__; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int zchk3_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* nmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* b, doublecomplex* bb, doublecomplex* bs, doublecomplex* ct, doublereal* g, doublecomplex* c__, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -1817,23 +1630,24 @@ ftnlen sname_len; static char diags[1]; static logical isame[13]; static char sides[1]; - extern /* Subroutine */ int zmake_(); + extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen); static integer nargs; - extern /* Subroutine */ int zmmch_(); + extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen); static logical reset; static char uplos[1]; static integer ia, na; - extern /* Subroutine */ int zprcn3_(); + extern /* Subroutine */ int zprcn3_(integer*, integer*, char*, integer*, char*, char*, char*, char*, integer*, integer*, doublecomplex*, integer*, integer*, ftnlen, ftnlen, ftnlen, ftnlen, ftnlen); static integer nc, im, in, ms, ns; static char tranas[1], transa[1]; static doublereal errmax; - extern logical lzeres_(); - extern /* Subroutine */ int cztrmm_(), cztrsm_(); + extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); + extern /* Subroutine */ void cztrmm_(integer*, char*, char*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen, ftnlen); + extern /* Subroutine */ void cztrsm_(integer*, char*, char*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen, ftnlen); static integer laa, icd, lbb, lda, ldb, ics; static doublecomplex als; static integer ict, icu; static doublereal err; - extern logical lze_(); + extern logical lze_(doublecomplex*, doublecomplex*, integer*); /* Tests ZTRMM and ZTRSM. */ @@ -2227,21 +2041,7 @@ L160: } /* zchk3_ */ -/* Subroutine */ int zprcn3_(nout, nc, sname, iorder, side, uplo, transa, - diag, m, n, alpha, lda, ldb, sname_len, side_len, uplo_len, - transa_len, diag_len) -integer *nout, *nc; -char *sname; -integer *iorder; -char *side, *uplo, *transa, *diag; -integer *m, *n; -doublecomplex *alpha; -integer *lda, *ldb; -ftnlen sname_len; -ftnlen side_len; -ftnlen uplo_len; -ftnlen transa_len; -ftnlen diag_len; +/* Subroutine */ int zprcn3_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, char* transa, char* diag, integer* m, integer* n, doublecomplex* alpha, integer* lda, integer* ldb, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len, ftnlen transa_len, ftnlen diag_len) { /* Local variables */ @@ -2281,22 +2081,7 @@ return 0; } /* zprcn3_ */ -/* Subroutine */ int zchk4_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs, - c__, cc, cs, ct, g, iorder, sname_len) -char *sname; -doublereal *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nalf; -doublecomplex *alf; -integer *nbet; -doublecomplex *bet; -integer *nmax; -doublecomplex *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct; -doublereal *g; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int zchk4_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* nmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* b, doublecomplex* bb, doublecomplex* bs, doublecomplex* c__, doublecomplex* cc, doublecomplex* cs, doublecomplex* ct, doublereal* g, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -2320,30 +2105,30 @@ ftnlen sname_len; static doublecomplex alpha; static doublereal rbeta; static logical isame[13]; - extern /* Subroutine */ int zmake_(); + extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen); static integer nargs; - extern /* Subroutine */ int zmmch_(); + extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen); static doublereal rbets; static logical reset; static char trans[1]; static logical upper; static char uplos[1]; static integer ia, ib, jc, ma, na; - extern /* Subroutine */ int zprcn4_(); + extern /* Subroutine */ int zprcn4_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen); static integer nc; - extern /* Subroutine */ int zprcn6_(); + extern /* Subroutine */ int zprcn6_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen); static integer ik, in, jj, lj, ks, ns; static doublereal ralpha; - extern /* Subroutine */ int czherk_(); + extern /* Subroutine */ int czherk_(integer*, char*, char*, integer*, integer*, doublereal*, doublecomplex*, integer*, doublereal*, doublecomplex*, integer*, ftnlen, ftnlen); static doublereal errmax; - extern logical lzeres_(); + extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); static char transs[1], transt[1]; - extern /* Subroutine */ int czsyrk_(); + extern /* Subroutine */ int czsyrk_(integer*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); static integer laa, lda, lcc, ldc; static doublecomplex als; static integer ict, icu; static doublereal err; - extern logical lze_(); + extern logical lze_(doublecomplex*, doublecomplex*, integer*); /* Tests ZHERK and ZSYRK. */ @@ -2732,20 +2517,7 @@ L130: } /* zchk4_ */ -/* Subroutine */ int zprcn4_(nout, nc, sname, iorder, uplo, transa, n, k, - alpha, lda, beta, ldc, sname_len, uplo_len, transa_len) -integer *nout, *nc; -char *sname; -integer *iorder; -char *uplo, *transa; -integer *n, *k; -doublecomplex *alpha; -integer *lda; -doublecomplex *beta; -integer *ldc; -ftnlen sname_len; -ftnlen uplo_len; -ftnlen transa_len; +/* Subroutine */ int zprcn4_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublecomplex* alpha, integer* lda, doublecomplex* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len) { /* Local variables */ static char ca[14], cu[14], crc[14]; @@ -2775,20 +2547,7 @@ return 0; -/* Subroutine */ int zprcn6_(nout, nc, sname, iorder, uplo, transa, n, k, - alpha, lda, beta, ldc, sname_len, uplo_len, transa_len) -integer *nout, *nc; -char *sname; -integer *iorder; -char *uplo, *transa; -integer *n, *k; -doublereal *alpha; -integer *lda; -doublereal *beta; -integer *ldc; -ftnlen sname_len; -ftnlen uplo_len; -ftnlen transa_len; +/* Subroutine */ int zprcn6_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublereal* alpha, integer* lda, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len) { /* Local variables */ @@ -2818,23 +2577,7 @@ return 0; } /* zprcn6_ */ -/* Subroutine */ int zchk5_(sname, eps, thresh, nout, ntra, trace, rewi, - fatal, nidim, idim, nalf, alf, nbet, bet, nmax, ab, aa, as, bb, bs, - c__, cc, cs, ct, g, w, iorder, sname_len) -char *sname; -doublereal *eps, *thresh; -integer *nout, *ntra; -logical *trace, *rewi, *fatal; -integer *nidim, *idim, *nalf; -doublecomplex *alf; -integer *nbet; -doublecomplex *bet; -integer *nmax; -doublecomplex *ab, *aa, *as, *bb, *bs, *c__, *cc, *cs, *ct; -doublereal *g; -doublecomplex *w; -integer *iorder; -ftnlen sname_len; +/* Subroutine */ int zchk5_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* nmax, doublecomplex* ab, doublecomplex* aa, doublecomplex* as, doublecomplex* bb, doublecomplex* bs, doublecomplex* c__, doublecomplex* cc, doublecomplex* cs, doublecomplex* ct, doublereal* g, doublecomplex* w, integer* iorder, ftnlen sname_len) { /* Initialized data */ @@ -2857,27 +2600,28 @@ ftnlen sname_len; static doublecomplex alpha; static doublereal rbeta; static logical isame[13]; - extern /* Subroutine */ int zmake_(); + extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen); static integer nargs; - extern /* Subroutine */ int zmmch_(); + extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen); static doublereal rbets; static logical reset; static char trans[1]; static logical upper; static char uplos[1]; static integer ia, ib, jc, ma, na, nc; - extern /* Subroutine */ int zprcn5_(), zprcn7_(); + extern /* Subroutine */ int zprcn5_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublecomplex*, integer*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen); + extern /* Subroutine */ int zprcn7_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublecomplex*, integer*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen); static integer ik, in, jj, lj, ks, ns; static doublereal errmax; - extern logical lzeres_(); + extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); static char transs[1], transt[1]; - extern /* Subroutine */ int czher2k_(); + extern /* Subroutine */ int czher2k_(integer*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublereal*, doublecomplex*, integer*, ftnlen, ftnlen); static integer laa, lbb, lda, lcc, ldb, ldc; static doublecomplex als; static integer ict, icu; - extern /* Subroutine */ int czsyr2k_(); + extern /* Subroutine */ int czsyr2k_(integer*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); static doublereal err; - extern logical lze_(); + extern logical lze_(doublecomplex*, doublecomplex*, integer*); /* Tests ZHER2K and ZSYR2K. */ @@ -3349,20 +3093,7 @@ L160: } /* zchk5_ */ -/* Subroutine */ int zprcn5_(nout, nc, sname, iorder, uplo, transa, n, k, - alpha, lda, ldb, beta, ldc, sname_len, uplo_len, transa_len) -integer *nout, *nc; -char *sname; -integer *iorder; -char *uplo, *transa; -integer *n, *k; -doublecomplex *alpha; -integer *lda, *ldb; -doublecomplex *beta; -integer *ldc; -ftnlen sname_len; -ftnlen uplo_len; -ftnlen transa_len; +/* Subroutine */ int zprcn5_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublecomplex* alpha, integer* lda, integer* ldb, doublecomplex* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len) { /* Local variables */ static char ca[14], cu[14], crc[14]; @@ -3392,20 +3123,7 @@ return 0; -/* Subroutine */ int zprcn7_(nout, nc, sname, iorder, uplo, transa, n, k, - alpha, lda, ldb, beta, ldc, sname_len, uplo_len, transa_len) -integer *nout, *nc; -char *sname; -integer *iorder; -char *uplo, *transa; -integer *n, *k; -doublecomplex *alpha; -integer *lda, *ldb; -doublereal *beta; -integer *ldc; -ftnlen sname_len; -ftnlen uplo_len; -ftnlen transa_len; +/* Subroutine */ int zprcn7_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublecomplex* alpha, integer* lda, integer* ldb, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len) { /* Local variables */ @@ -3435,19 +3153,7 @@ return 0; } /* zprcn7_ */ -/* Subroutine */ int zmake_(type__, uplo, diag, m, n, a, nmax, aa, lda, reset, - transl, type_len, uplo_len, diag_len) -char *type__, *uplo, *diag; -integer *m, *n; -doublecomplex *a; -integer *nmax; -doublecomplex *aa; -integer *lda; -logical *reset; -doublecomplex *transl; -ftnlen type_len; -ftnlen uplo_len; -ftnlen diag_len; +/* Subroutine */ int zmake_(char* type__, char* uplo, char* diag, integer* m, integer* n, doublecomplex* a, integer* nmax, doublecomplex* aa, integer* lda, logical* reset, doublecomplex* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; @@ -3456,7 +3162,7 @@ ftnlen diag_len; /* Local variables */ static integer ibeg, iend; - extern /* Double Complex */ VOID zbeg_(); + extern /* Double Complex */ VOID zbeg_(doublecomplex*, logical*); static logical unit; static integer i__, j; static logical lower, upper; @@ -3629,27 +3335,7 @@ ftnlen diag_len; } /* zmake_ */ -/* Subroutine */ int zmmch_(transa, transb, m, n, kk, alpha, a, lda, b, ldb, - beta, c__, ldc, ct, g, cc, ldcc, eps, err, fatal, nout, mv, - transa_len, transb_len) -char *transa, *transb; -integer *m, *n, *kk; -doublecomplex *alpha, *a; -integer *lda; -doublecomplex *b; -integer *ldb; -doublecomplex *beta, *c__; -integer *ldc; -doublecomplex *ct; -doublereal *g; -doublecomplex *cc; -integer *ldcc; -doublereal *eps, *err; -logical *fatal; -integer *nout; -logical *mv; -ftnlen transa_len; -ftnlen transb_len; +/* Subroutine */ int zmmch_(char* transa, char* transb, integer* m, integer* n, integer* kk, doublecomplex* alpha, doublecomplex* a, integer* lda, doublecomplex* b, integer* ldb, doublecomplex* beta, doublecomplex* c__, integer* ldc, doublecomplex* ct, doublereal* g, doublecomplex* cc, integer* ldcc, doublereal* eps, doublereal* err, logical* fatal, integer* nout, logical* mv, ftnlen transa_len, ftnlen transb_len) { /* System generated locals */ @@ -3658,7 +3344,7 @@ ftnlen transb_len; doublereal d__1, d__2, d__3, d__4, d__5, d__6; doublecomplex z__1, z__2, z__3, z__4; - double sqrt(); + double sqrt(double); /* Local variables */ static doublereal erri; static integer i__, j, k; @@ -4031,9 +3717,7 @@ L250: } /* zmmch_ */ -logical lze_(ri, rj, lr) -doublecomplex *ri, *rj; -integer *lr; +logical lze_(doublecomplex* ri, doublecomplex* rj, integer* lr) { /* System generated locals */ integer i__1, i__2, i__3; @@ -4082,13 +3766,7 @@ L30: } /* lze_ */ -logical lzeres_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len) -char *type__, *uplo; -integer *m, *n; -doublecomplex *aa, *as; -integer *lda; -ftnlen type_len; -ftnlen uplo_len; +logical lzeres_(char* type__, char* uplo, integer* m, integer* n, doublecomplex *aa, doublecomplex* as, integer* lda, ftnlen type_len, ftnlen uplo_len) { /* System generated locals */ integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2, i__3, i__4; @@ -4184,9 +3862,7 @@ L80: } /* lzeres_ */ -/* Double Complex */ VOID zbeg_( ret_val, reset) -doublecomplex * ret_val; -logical *reset; +/* Double Complex */ VOID zbeg_(doublecomplex* ret_val, logical* reset) { /* System generated locals */ doublereal d__1, d__2; @@ -4249,8 +3925,7 @@ L10: } /* zbeg_ */ -doublereal ddiff_(x, y) -doublereal *x, *y; +doublereal ddiff_(doublereal* x, doublereal* y) { /* System generated locals */ doublereal ret_val; From 4041b7fb42dcba67d99a0e00e1e820b6cc29f7fb Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 7 Oct 2023 22:33:08 +0200 Subject: [PATCH 061/125] fix function prototypes in f2c-converted files --- lapack-netlib/INSTALL/dlamch.c | 230 +-------------------- lapack-netlib/INSTALL/droundup_lwork.c | 141 ------------- lapack-netlib/INSTALL/dsecnd_INT_ETIME.c | 144 +------------- lapack-netlib/INSTALL/ilaver.c | 243 +---------------------- lapack-netlib/INSTALL/second_INT_ETIME.c | 144 +------------- lapack-netlib/INSTALL/slamch.c | 229 --------------------- lapack-netlib/INSTALL/sroundup_lwork.c | 141 ------------- 7 files changed, 6 insertions(+), 1266 deletions(-) diff --git a/lapack-netlib/INSTALL/dlamch.c b/lapack-netlib/INSTALL/dlamch.c index 744130a87..ce6b76a32 100644 --- a/lapack-netlib/INSTALL/dlamch.c +++ b/lapack-netlib/INSTALL/dlamch.c @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } #define sig_die(s, kill) { exit(1); } #define s_stop(s, n) {exit(0);} -static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define z_abs(z) (cabs(Cd(z))) #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} @@ -261,24 +260,7 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} static double dpow_ui(double x, integer n) { double pow=1.0; unsigned long int u; if(n != 0) { @@ -291,217 +273,7 @@ static double dpow_ui(double x, integer n) { } return pow; } -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; - _Complex float zdotc = 0.0; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; - _Complex float zdotc = 0.0; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; - _Complex float zdotc = 0.0; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; - _Complex float zdotc = 0.0; - if (incx == 1 && incy == 1) { - for (i=0;i Date: Sat, 7 Oct 2023 22:36:29 +0200 Subject: [PATCH 062/125] fix function prototypes in f2c-converted files --- lapack-netlib/TESTING/MATGEN/clagge.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/claghe.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/clagsy.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/clahilb.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/clakf2.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/clarge.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/clarnd.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/claror.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/clarot.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/clatm1.c | 235 ----------------------- lapack-netlib/TESTING/MATGEN/clatm2.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/clatm3.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/clatm5.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/clatm6.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/clatme.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/clatmr.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/clatms.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/clatmt.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/dlagge.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/dlagsy.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/dlahilb.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/dlakf2.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/dlaran.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/dlarge.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/dlarnd.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/dlaror.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/dlarot.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/dlatm1.c | 235 ----------------------- lapack-netlib/TESTING/MATGEN/dlatm2.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/dlatm3.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/dlatm5.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/dlatm6.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/dlatm7.c | 235 ----------------------- lapack-netlib/TESTING/MATGEN/dlatme.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/dlatmr.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/dlatms.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/dlatmt.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/slagge.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/slagsy.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/slahilb.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/slakf2.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/slaran.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/slarge.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/slarnd.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/slaror.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/slarot.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/slatm1.c | 235 ----------------------- lapack-netlib/TESTING/MATGEN/slatm2.c | 247 ------------------------ lapack-netlib/TESTING/MATGEN/slatm3.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/slatm5.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/slatm6.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/slatm7.c | 235 ----------------------- lapack-netlib/TESTING/MATGEN/slatme.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/slatmr.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/slatms.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/slatmt.c | 248 ------------------------ lapack-netlib/TESTING/MATGEN/zlagge.c | 249 ------------------------- lapack-netlib/TESTING/MATGEN/zlaghe.c | 249 ------------------------- lapack-netlib/TESTING/MATGEN/zlagsy.c | 249 ------------------------- lapack-netlib/TESTING/MATGEN/zlahilb.c | 249 ------------------------- lapack-netlib/TESTING/MATGEN/zlakf2.c | 249 ------------------------- lapack-netlib/TESTING/MATGEN/zlarge.c | 249 ------------------------- lapack-netlib/TESTING/MATGEN/zlarnd.c | 249 ------------------------- lapack-netlib/TESTING/MATGEN/zlaror.c | 249 ------------------------- lapack-netlib/TESTING/MATGEN/zlarot.c | 249 ------------------------- lapack-netlib/TESTING/MATGEN/zlatm1.c | 236 ----------------------- lapack-netlib/TESTING/MATGEN/zlatm2.c | 249 ------------------------- lapack-netlib/TESTING/MATGEN/zlatm3.c | 249 ------------------------- lapack-netlib/TESTING/MATGEN/zlatm5.c | 249 ------------------------- lapack-netlib/TESTING/MATGEN/zlatm6.c | 249 ------------------------- lapack-netlib/TESTING/MATGEN/zlatme.c | 249 ------------------------- lapack-netlib/TESTING/MATGEN/zlatmr.c | 249 ------------------------- lapack-netlib/TESTING/MATGEN/zlatms.c | 249 ------------------------- lapack-netlib/TESTING/MATGEN/zlatmt.c | 249 ------------------------- 74 files changed, 18291 deletions(-) diff --git a/lapack-netlib/TESTING/MATGEN/clagge.c b/lapack-netlib/TESTING/MATGEN/clagge.c index f05905bd7..62c33d01e 100644 --- a/lapack-netlib/TESTING/MATGEN/clagge.c +++ b/lapack-netlib/TESTING/MATGEN/clagge.c @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } #define sig_die(s, kill) { exit(1); } #define s_stop(s, n) {exit(0);} -static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define z_abs(z) (cabs(Cd(z))) #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} @@ -261,253 +260,6 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif - -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i \brief \b CLARND */ diff --git a/lapack-netlib/TESTING/MATGEN/claror.c b/lapack-netlib/TESTING/MATGEN/claror.c index cd0d15300..b0d73f37c 100644 --- a/lapack-netlib/TESTING/MATGEN/claror.c +++ b/lapack-netlib/TESTING/MATGEN/claror.c @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } #define sig_die(s, kill) { exit(1); } #define s_stop(s, n) {exit(0);} -static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define z_abs(z) (cabs(Cd(z))) #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} @@ -261,253 +260,6 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif - -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i \brief \b CLATM2 */ diff --git a/lapack-netlib/TESTING/MATGEN/clatm3.c b/lapack-netlib/TESTING/MATGEN/clatm3.c index 58cd4e551..fcd8dbfcb 100644 --- a/lapack-netlib/TESTING/MATGEN/clatm3.c +++ b/lapack-netlib/TESTING/MATGEN/clatm3.c @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } #define sig_die(s, kill) { exit(1); } #define s_stop(s, n) {exit(0);} -static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define z_abs(z) (cabs(Cd(z))) #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} @@ -261,253 +260,6 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif - -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i \brief \b CLATM3 */ diff --git a/lapack-netlib/TESTING/MATGEN/clatm5.c b/lapack-netlib/TESTING/MATGEN/clatm5.c index c2b81ccf3..8fbc1c0a6 100644 --- a/lapack-netlib/TESTING/MATGEN/clatm5.c +++ b/lapack-netlib/TESTING/MATGEN/clatm5.c @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } #define sig_die(s, kill) { exit(1); } #define s_stop(s, n) {exit(0);} -static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define z_abs(z) (cabs(Cd(z))) #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} @@ -261,253 +260,6 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif - -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i \brief \b DLARAN */ diff --git a/lapack-netlib/TESTING/MATGEN/dlarge.c b/lapack-netlib/TESTING/MATGEN/dlarge.c index 5d8a81387..5cc7fbce8 100644 --- a/lapack-netlib/TESTING/MATGEN/dlarge.c +++ b/lapack-netlib/TESTING/MATGEN/dlarge.c @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } #define sig_die(s, kill) { exit(1); } #define s_stop(s, n) {exit(0);} -static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define z_abs(z) (cabs(Cd(z))) #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} @@ -261,253 +260,6 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif - -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i \brief \b DLARND */ diff --git a/lapack-netlib/TESTING/MATGEN/dlaror.c b/lapack-netlib/TESTING/MATGEN/dlaror.c index d9e2e46ae..fdd126174 100644 --- a/lapack-netlib/TESTING/MATGEN/dlaror.c +++ b/lapack-netlib/TESTING/MATGEN/dlaror.c @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } #define sig_die(s, kill) { exit(1); } #define s_stop(s, n) {exit(0);} -static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define z_abs(z) (cabs(Cd(z))) #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} @@ -261,253 +260,6 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif - -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} static double dpow_ui(double x, integer n) { double pow=1.0; unsigned long int u; if(n != 0) { @@ -291,223 +273,6 @@ static double dpow_ui(double x, integer n) { } return pow; } -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i \brief \b DLATM1 */ diff --git a/lapack-netlib/TESTING/MATGEN/dlatm2.c b/lapack-netlib/TESTING/MATGEN/dlatm2.c index d74bc9168..7491e9829 100644 --- a/lapack-netlib/TESTING/MATGEN/dlatm2.c +++ b/lapack-netlib/TESTING/MATGEN/dlatm2.c @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } #define sig_die(s, kill) { exit(1); } #define s_stop(s, n) {exit(0);} -static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define z_abs(z) (cabs(Cd(z))) #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} @@ -261,253 +260,6 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif - -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i \brief \b DLATM2 */ diff --git a/lapack-netlib/TESTING/MATGEN/dlatm3.c b/lapack-netlib/TESTING/MATGEN/dlatm3.c index 86f964ced..a9d26c7fc 100644 --- a/lapack-netlib/TESTING/MATGEN/dlatm3.c +++ b/lapack-netlib/TESTING/MATGEN/dlatm3.c @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } #define sig_die(s, kill) { exit(1); } #define s_stop(s, n) {exit(0);} -static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define z_abs(z) (cabs(Cd(z))) #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} @@ -261,253 +260,6 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif - -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i \brief \b DLATM3 */ diff --git a/lapack-netlib/TESTING/MATGEN/dlatm5.c b/lapack-netlib/TESTING/MATGEN/dlatm5.c index 94b49d6e3..7f1c36428 100644 --- a/lapack-netlib/TESTING/MATGEN/dlatm5.c +++ b/lapack-netlib/TESTING/MATGEN/dlatm5.c @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } #define sig_die(s, kill) { exit(1); } #define s_stop(s, n) {exit(0);} -static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define z_abs(z) (cabs(Cd(z))) #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} @@ -261,253 +260,6 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif - -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} static double dpow_ui(double x, integer n) { double pow=1.0; unsigned long int u; if(n != 0) { @@ -291,223 +273,6 @@ static double dpow_ui(double x, integer n) { } return pow; } -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i \brief \b DLATM7 */ diff --git a/lapack-netlib/TESTING/MATGEN/dlatme.c b/lapack-netlib/TESTING/MATGEN/dlatme.c index a92c70ef2..e29df164c 100644 --- a/lapack-netlib/TESTING/MATGEN/dlatme.c +++ b/lapack-netlib/TESTING/MATGEN/dlatme.c @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } #define sig_die(s, kill) { exit(1); } #define s_stop(s, n) {exit(0);} -static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define z_abs(z) (cabs(Cd(z))) #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} @@ -261,253 +260,6 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif - -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i \brief \b SLARAN */ diff --git a/lapack-netlib/TESTING/MATGEN/slarge.c b/lapack-netlib/TESTING/MATGEN/slarge.c index 6b37e9400..d5fbd541c 100644 --- a/lapack-netlib/TESTING/MATGEN/slarge.c +++ b/lapack-netlib/TESTING/MATGEN/slarge.c @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } #define sig_die(s, kill) { exit(1); } #define s_stop(s, n) {exit(0);} -static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define z_abs(z) (cabs(Cd(z))) #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} @@ -261,253 +260,6 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif - -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i \brief \b SLARND */ diff --git a/lapack-netlib/TESTING/MATGEN/slaror.c b/lapack-netlib/TESTING/MATGEN/slaror.c index 48b532dfd..7e3065432 100644 --- a/lapack-netlib/TESTING/MATGEN/slaror.c +++ b/lapack-netlib/TESTING/MATGEN/slaror.c @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } #define sig_die(s, kill) { exit(1); } #define s_stop(s, n) {exit(0);} -static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define z_abs(z) (cabs(Cd(z))) #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} @@ -261,253 +260,6 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif - -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i \brief \b SLATM1 */ diff --git a/lapack-netlib/TESTING/MATGEN/slatm2.c b/lapack-netlib/TESTING/MATGEN/slatm2.c index e7b72006f..833ee5dea 100644 --- a/lapack-netlib/TESTING/MATGEN/slatm2.c +++ b/lapack-netlib/TESTING/MATGEN/slatm2.c @@ -261,253 +261,6 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif - -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i \brief \b SLATM2 */ diff --git a/lapack-netlib/TESTING/MATGEN/slatm3.c b/lapack-netlib/TESTING/MATGEN/slatm3.c index 4f9f5fee2..cdf96ef51 100644 --- a/lapack-netlib/TESTING/MATGEN/slatm3.c +++ b/lapack-netlib/TESTING/MATGEN/slatm3.c @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } #define sig_die(s, kill) { exit(1); } #define s_stop(s, n) {exit(0);} -static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define z_abs(z) (cabs(Cd(z))) #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} @@ -261,253 +260,6 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif - -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i \brief \b SLATM3 */ diff --git a/lapack-netlib/TESTING/MATGEN/slatm5.c b/lapack-netlib/TESTING/MATGEN/slatm5.c index 24ee0915d..9122bc041 100644 --- a/lapack-netlib/TESTING/MATGEN/slatm5.c +++ b/lapack-netlib/TESTING/MATGEN/slatm5.c @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } #define sig_die(s, kill) { exit(1); } #define s_stop(s, n) {exit(0);} -static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define z_abs(z) (cabs(Cd(z))) #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} @@ -261,253 +260,6 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif - -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i \brief \b SLATM7 */ diff --git a/lapack-netlib/TESTING/MATGEN/slatme.c b/lapack-netlib/TESTING/MATGEN/slatme.c index a8a6b39a3..126c42121 100644 --- a/lapack-netlib/TESTING/MATGEN/slatme.c +++ b/lapack-netlib/TESTING/MATGEN/slatme.c @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } #define sig_die(s, kill) { exit(1); } #define s_stop(s, n) {exit(0);} -static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define z_abs(z) (cabs(Cd(z))) #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} @@ -261,253 +260,6 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif - -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i \brief \b ZLARND */ diff --git a/lapack-netlib/TESTING/MATGEN/zlaror.c b/lapack-netlib/TESTING/MATGEN/zlaror.c index 6ada57b8a..c8a84f215 100644 --- a/lapack-netlib/TESTING/MATGEN/zlaror.c +++ b/lapack-netlib/TESTING/MATGEN/zlaror.c @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } #define sig_die(s, kill) { exit(1); } #define s_stop(s, n) {exit(0);} -static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define z_abs(z) (cabs(Cd(z))) #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} @@ -261,254 +260,6 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif - -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} static double dpow_ui(double x, integer n) { double pow=1.0; unsigned long int u; if(n != 0) { @@ -291,224 +273,6 @@ static double dpow_ui(double x, integer n) { } return pow; } -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i \brief \b ZLATM2 */ diff --git a/lapack-netlib/TESTING/MATGEN/zlatm3.c b/lapack-netlib/TESTING/MATGEN/zlatm3.c index c35ffe4d9..6370a9d39 100644 --- a/lapack-netlib/TESTING/MATGEN/zlatm3.c +++ b/lapack-netlib/TESTING/MATGEN/zlatm3.c @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } #define sig_die(s, kill) { exit(1); } #define s_stop(s, n) {exit(0);} -static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define z_abs(z) (cabs(Cd(z))) #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} @@ -261,254 +260,6 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif - -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i \brief \b ZLATM3 */ diff --git a/lapack-netlib/TESTING/MATGEN/zlatm5.c b/lapack-netlib/TESTING/MATGEN/zlatm5.c index 753ee0ce6..5ee6cc8ce 100644 --- a/lapack-netlib/TESTING/MATGEN/zlatm5.c +++ b/lapack-netlib/TESTING/MATGEN/zlatm5.c @@ -248,7 +248,6 @@ typedef struct Namelist Namelist; #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } #define sig_die(s, kill) { exit(1); } #define s_stop(s, n) {exit(0);} -static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define z_abs(z) (cabs(Cd(z))) #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} @@ -262,254 +261,6 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif - -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i Date: Sat, 7 Oct 2023 22:38:30 +0200 Subject: [PATCH 063/125] fix function prototypes in f2c-converted files --- lapack-netlib/SRC/DEPRECATED/cgegs.c | 241 ------------------------- lapack-netlib/SRC/DEPRECATED/cgegv.c | 242 ------------------------- lapack-netlib/SRC/DEPRECATED/cgelsx.c | 242 ------------------------- lapack-netlib/SRC/DEPRECATED/cgeqpf.c | 241 ------------------------- lapack-netlib/SRC/DEPRECATED/cggsvd.c | 243 +------------------------ lapack-netlib/SRC/DEPRECATED/cggsvp.c | 242 ------------------------- lapack-netlib/SRC/DEPRECATED/clahrd.c | 248 -------------------------- lapack-netlib/SRC/DEPRECATED/clatzm.c | 248 -------------------------- lapack-netlib/SRC/DEPRECATED/ctzrqf.c | 247 ------------------------- lapack-netlib/SRC/DEPRECATED/dgegs.c | 242 +------------------------ lapack-netlib/SRC/DEPRECATED/dgegv.c | 242 ------------------------- lapack-netlib/SRC/DEPRECATED/dgelsx.c | 242 ------------------------- lapack-netlib/SRC/DEPRECATED/dgeqpf.c | 243 +------------------------ lapack-netlib/SRC/DEPRECATED/dggsvd.c | 241 ------------------------- lapack-netlib/SRC/DEPRECATED/dggsvp.c | 242 ------------------------- lapack-netlib/SRC/DEPRECATED/dlahrd.c | 246 ------------------------- lapack-netlib/SRC/DEPRECATED/dlatzm.c | 248 -------------------------- lapack-netlib/SRC/DEPRECATED/dtzrqf.c | 247 ------------------------- lapack-netlib/SRC/DEPRECATED/sgegs.c | 241 ------------------------- lapack-netlib/SRC/DEPRECATED/sgegv.c | 242 ------------------------- lapack-netlib/SRC/DEPRECATED/sgelsx.c | 242 ------------------------- lapack-netlib/SRC/DEPRECATED/sgeqpf.c | 243 +------------------------ lapack-netlib/SRC/DEPRECATED/sggsvd.c | 241 ------------------------- lapack-netlib/SRC/DEPRECATED/sggsvp.c | 242 ------------------------- lapack-netlib/SRC/DEPRECATED/slahrd.c | 242 ------------------------- lapack-netlib/SRC/DEPRECATED/slatzm.c | 247 ------------------------- lapack-netlib/SRC/DEPRECATED/stzrqf.c | 248 -------------------------- lapack-netlib/SRC/DEPRECATED/zgegs.c | 241 ------------------------- lapack-netlib/SRC/DEPRECATED/zgegv.c | 242 ------------------------- lapack-netlib/SRC/DEPRECATED/zgelsx.c | 242 ------------------------- lapack-netlib/SRC/DEPRECATED/zgeqpf.c | 243 +------------------------ lapack-netlib/SRC/DEPRECATED/zggsvd.c | 243 +------------------------ lapack-netlib/SRC/DEPRECATED/zggsvp.c | 244 +------------------------ lapack-netlib/SRC/DEPRECATED/zlahrd.c | 248 -------------------------- lapack-netlib/SRC/DEPRECATED/zlatzm.c | 248 -------------------------- lapack-netlib/SRC/DEPRECATED/ztzrqf.c | 248 -------------------------- 36 files changed, 7 insertions(+), 8767 deletions(-) diff --git a/lapack-netlib/SRC/DEPRECATED/cgegs.c b/lapack-netlib/SRC/DEPRECATED/cgegs.c index 35b59e683..270a05ebd 100644 --- a/lapack-netlib/SRC/DEPRECATED/cgegs.c +++ b/lapack-netlib/SRC/DEPRECATED/cgegs.c @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } #define sig_die(s, kill) { exit(1); } #define s_stop(s, n) {exit(0);} -static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define z_abs(z) (cabs(Cd(z))) #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} @@ -261,247 +260,7 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 -#ifdef __cplusplus -typedef logical (*L_fp)(...); -#else -typedef logical (*L_fp)(); -#endif -static float spow_ui(float x, integer n) { - float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i>= 1) x *= x; - else break; - } - } - return pow; -} -static double dpow_ui(double x, integer n) { - double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#ifdef _MSC_VER -static _Fcomplex cpow_ui(complex x, integer n) { - complex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; - for(u = n; ; ) { - if(u & 01) pow.r *= x.r, pow.i *= x.i; - if(u >>= 1) x.r *= x.r, x.i *= x.i; - else break; - } - } - _Fcomplex p={pow.r, pow.i}; - return p; -} -#else -static _Complex float cpow_ui(_Complex float x, integer n) { - _Complex float pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -#ifdef _MSC_VER -static _Dcomplex zpow_ui(_Dcomplex x, integer n) { - _Dcomplex pow={1.0,0.0}; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; - for(u = n; ; ) { - if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; - if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; - else break; - } - } - _Dcomplex p = {pow._Val[0], pow._Val[1]}; - return p; -} -#else -static _Complex double zpow_ui(_Complex double x, integer n) { - _Complex double pow=1.0; unsigned long int u; - if(n != 0) { - if(n < 0) n = -n, x = 1/x; - for(u = n; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -#endif -static integer pow_ii(integer x, integer n) { - integer pow; unsigned long int u; - if (n <= 0) { - if (n == 0 || x == 1) pow = 1; - else if (x != -1) pow = x == 0 ? 1/x : 0; - else n = -n; - } - if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { - u = n; - for(pow = 1; ; ) { - if(u & 01) pow *= x; - if(u >>= 1) x *= x; - else break; - } - } - return pow; -} -static integer dmaxloc_(double *w, integer s, integer e, integer *n) -{ - double m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static integer smaxloc_(float *w, integer s, integer e, integer *n) -{ - float m; integer i, mi; - for(m=w[s-1], mi=s, i=s+1; i<=e; i++) - if (w[i-1]>m) mi=i ,m=w[i-1]; - return mi-s+1; -} -static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { - integer n = *n_, incx = *incx_, incy = *incy_, i; -#ifdef _MSC_VER - _Fcomplex zdotc = {0.0, 0.0}; - if (incx == 1 && incy == 1) { - for (i=0;i Date: Sun, 8 Oct 2023 11:36:06 +0200 Subject: [PATCH 064/125] fix prototype of itest1 for INTERFACE64 --- ctest/c_sblat1c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctest/c_sblat1c.c b/ctest/c_sblat1c.c index 1424e39b4..7c049b796 100644 --- a/ctest/c_sblat1c.c +++ b/ctest/c_sblat1c.c @@ -437,7 +437,7 @@ L40: static real stemp[1], strue[8]; extern /* Subroutine */ int stest_(int*, real*,real*,real*,real*), sscaltest_(int*,real*,real*,int*); extern real sasumtest_(int*,real*,int*); - extern /* Subroutine */ int itest1_(int*,int*), stest1_(real*,real*,real*,real*); + extern /* Subroutine */ int itest1_(integer*,integer*), stest1_(real*,real*,real*,real*); static real sx[8]; static integer np1; extern integer isamaxtest_(int*,real*,int*); From 2b865da7304f930d44194681bd7ef86cf8edbb4c Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 8 Oct 2023 11:55:10 +0200 Subject: [PATCH 065/125] fix prototypes of stest and ctest for INTERFACE64 --- ctest/c_zblat1c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ctest/c_zblat1c.c b/ctest/c_zblat1c.c index 4761e63d7..d8cff5dee 100644 --- a/ctest/c_zblat1c.c +++ b/ctest/c_zblat1c.c @@ -378,7 +378,7 @@ static doublereal c_b43 = 1.; /* Local variables */ static integer i__; - extern /* Subroutine */ int ctest_(int*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*); + extern /* Subroutine */ int ctest_(integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*); static doublecomplex mwpcs[5], mwpct[5]; extern /* Subroutine */ int zscaltest_(int*, doublereal*, doublecomplex*, int*), itest1_(int*, int*), stest1_(doublereal*, doublereal*, doublereal*, doublereal*); static doublecomplex cx[8]; @@ -588,7 +588,7 @@ static doublereal c_b43 = 1.; /* Local variables */ static doublecomplex cdot[1]; static integer lenx, leny, i__; - extern /* Subroutine */ int ctest_(int*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*); + extern /* Subroutine */ int ctest_(integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*); static integer ksize; static doublecomplex ztemp; extern /* Subroutine */ int zdotctest_(int*, doublecomplex*, int*, doublecomplex*, int*, doublecomplex*), zcopytest_(int*, doublecomplex*, int*, doublecomplex*, int*); @@ -747,7 +747,7 @@ L40: /* Subroutine */ int stest1_(doublereal* scomp1, doublereal* strue1, doublereal* ssize, doublereal* sfac) { static doublereal scomp[1], strue[1]; - extern /* Subroutine */ int stest_(int*,doublereal*, doublereal*, doublereal*, doublereal*); + extern /* Subroutine */ integer stest_(int*,doublereal*, doublereal*, doublereal*, doublereal*); /* ************************* STEST1 ***************************** */ From c30b53087860186287b5db0ba455499fbe7d20e5 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 8 Oct 2023 11:59:19 +0200 Subject: [PATCH 066/125] fix prototypes of ctest and itest for INTERFACE64 --- ctest/c_cblat1c.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ctest/c_cblat1c.c b/ctest/c_cblat1c.c index 2f84da43b..fa97cbf92 100644 --- a/ctest/c_cblat1c.c +++ b/ctest/c_cblat1c.c @@ -437,9 +437,9 @@ static real c_b43 = (float)1.; /* Local variables */ static integer i__; - extern /* Subroutine */ int ctest_(int*, complex*, complex*, complex*, real*); + extern /* Subroutine */ int ctest_(integer*, complex*, complex*, complex*, real*); static complex mwpcs[5], mwpct[5]; - extern /* Subroutine */ int itest1_(int*, int*), stest1_(real*,real*,real*,real*); + extern /* Subroutine */ int itest1_(integer*, integer*), stest1_(real*,real*,real*,real*); static complex cx[8]; extern real scnrm2test_(int*, complex*, int*); static integer np1; @@ -734,7 +734,7 @@ static real c_b43 = (float)1.; static complex cdot[1]; static integer lenx, leny, i__; static complex ctemp; - extern /* Subroutine */ int ctest_(int*, complex*, complex*, complex*, real*); + extern /* Subroutine */ int ctest_(integer*, complex*, complex*, complex*, real*); static integer ksize; extern /* Subroutine */ int cdotctest_(int*, complex*, int*, complex*, int*,complex*), ccopytest_(int*, complex*, int*, complex*, int*), cdotutest_(int*, complex*, int*, complex*, int*, complex*), cswaptest_(int*, complex*, int*, complex*, int*), caxpytest_(int*, complex*, complex*, int*, complex*, int*); @@ -939,7 +939,7 @@ doublereal sdiff_(real* sa, real* sb) /* Local variables */ static integer i__; static real scomp[20], ssize[20], strue[20]; - extern /* Subroutine */ int stest_(int*, real*,real*,real*,real*); + extern /* Subroutine */ int stest_(integer*, real*,real*,real*,real*); /* **************************** CTEST ***************************** */ From 769a58e9d18029cefab7647916fec3bd635b604e Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 8 Oct 2023 12:51:41 +0200 Subject: [PATCH 067/125] fix prototypes of stest and itest1 for INTERFACE64 --- ctest/c_dblat1c.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ctest/c_dblat1c.c b/ctest/c_dblat1c.c index f0141f2a5..d26cd9924 100644 --- a/ctest/c_dblat1c.c +++ b/ctest/c_dblat1c.c @@ -404,9 +404,9 @@ L40: static integer i__; extern doublereal dnrm2test_(int*, doublereal*, int*); static doublereal stemp[1], strue[8]; - extern /* Subroutine */ int stest_(int*,doublereal*,doublereal*,doublereal*,doublereal*), dscaltest_(int*,doublereal*,doublereal*,int*); + extern /* Subroutine */ int stest_(integer*,doublereal*,doublereal*,doublereal*,doublereal*), dscaltest_(int*,doublereal*,doublereal*,int*); extern doublereal dasumtest_(int*,doublereal*,int*); - extern /* Subroutine */ int itest1_(int*,int*), stest1_(doublereal*,doublereal*,doublereal*,doublereal*); + extern /* Subroutine */ int itest1_(integer*,integer*), stest1_(doublereal*,doublereal*,doublereal*,doublereal*); static doublereal sx[8]; static integer np1; extern integer idamaxtest_(int*,doublereal*,int*); @@ -517,7 +517,7 @@ L40: static integer lenx, leny; extern doublereal ddottest_(int*,doublereal*,int*,doublereal*,int*); static integer i__, j, ksize; - extern /* Subroutine */ int stest_(int*,doublereal*,doublereal*,doublereal*,doublereal*), dcopytest_(int*,doublereal*,int*,doublereal*,int*), dswaptest_(int*,doublereal*,int*,doublereal*,int*), + extern /* Subroutine */ int stest_(integer*,doublereal*,doublereal*,doublereal*,doublereal*), dcopytest_(int*,doublereal*,int*,doublereal*,int*), dswaptest_(int*,doublereal*,int*,doublereal*,int*), daxpytest_(int*,doublereal*,doublereal*,int*,doublereal*,int*), stest1_(doublereal*,doublereal*,doublereal*,doublereal*); static integer ki, kn, mx, my; static doublereal sx[7], sy[7], stx[7], sty[7]; @@ -620,7 +620,7 @@ L40: /* Local variables */ extern /* Subroutine */ int drottest_(int*,doublereal*,int*,doublereal*,int*,doublereal*,doublereal*); static integer i__, k, ksize; - extern /* Subroutine */int stest_(int*,doublereal*,doublereal*,doublereal*,doublereal*), drotmtest_(int*,doublereal*,int*,doublereal*,int*,doublereal*); + extern /* Subroutine */int stest_(integer*,doublereal*,doublereal*,doublereal*,doublereal*), drotmtest_(int*,doublereal*,int*,doublereal*,int*,doublereal*); static integer ki, kn; static doublereal dparam[5], sx[10], sy[10], stx[10], sty[10]; @@ -691,7 +691,7 @@ L40: return 0; } /* check3_ */ -/* Subroutine */ int stest_(int* len, doublereal* scomp, doublereal* strue, doublereal* ssize, doublereal* sfac) +/* Subroutine */ int stest_(integer* len, doublereal* scomp, doublereal* strue, doublereal* ssize, doublereal* sfac) { /* System generated locals */ integer i__1; @@ -758,7 +758,7 @@ L40: /* Subroutine */ int stest1_(doublereal* scomp1, doublereal* strue1, doublereal* ssize, doublereal* sfac) { static doublereal scomp[1], strue[1]; - extern /* Subroutine */ int stest_(int*, doublereal*, doublereal*, doublereal*, doublereal*); + extern /* Subroutine */ int stest_(integer*, doublereal*, doublereal*, doublereal*, doublereal*); /* ************************* STEST1 ***************************** */ From d8126c76e770efe8b7394d856de58ada75e5f3b1 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 8 Oct 2023 13:38:39 +0200 Subject: [PATCH 068/125] fix prototype --- ctest/c_cblat1c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctest/c_cblat1c.c b/ctest/c_cblat1c.c index fa97cbf92..929f3eaf3 100644 --- a/ctest/c_cblat1c.c +++ b/ctest/c_cblat1c.c @@ -887,7 +887,7 @@ L40: /* Subroutine */ int stest1_(real* scomp1, real* strue1, real* ssize, real* sfac) { static real scomp[1], strue[1]; - extern /* Subroutine */ int stest_(int*, real*, real*, real*, real*); + extern /* Subroutine */ int stest_(integer*, real*, real*, real*, real*); /* ************************* STEST1 ***************************** */ From c5e7339c9eb441c56264f30d0b998ff87335cbef Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 8 Oct 2023 16:13:37 +0200 Subject: [PATCH 069/125] correct prototypes for INTERFACE64 builds --- ctest/c_cblat1c.c | 12 ++++++------ ctest/c_dblat1c.c | 20 ++++++++++---------- ctest/c_sblat1c.c | 22 +++++++++++----------- ctest/c_zblat1c.c | 10 +++++----- 4 files changed, 32 insertions(+), 32 deletions(-) diff --git a/ctest/c_cblat1c.c b/ctest/c_cblat1c.c index 929f3eaf3..b4c512436 100644 --- a/ctest/c_cblat1c.c +++ b/ctest/c_cblat1c.c @@ -441,11 +441,11 @@ static real c_b43 = (float)1.; static complex mwpcs[5], mwpct[5]; extern /* Subroutine */ int itest1_(integer*, integer*), stest1_(real*,real*,real*,real*); static complex cx[8]; - extern real scnrm2test_(int*, complex*, int*); + extern real scnrm2test_(integer*, complex*, integer*); static integer np1; - extern integer icamaxtest_(int*, complex*, int*); - extern /* Subroutine */ int csscaltest_(int*, real*, complex*, int*); - extern real scasumtest_(int*, complex*, int*); + extern integer icamaxtest_(integer*, complex*, integer*); + extern /* Subroutine */ int csscaltest_(integer*, real*, complex*, integer*); + extern real scasumtest_(integer*, complex*, integer*); static integer len; /* .. Parameters .. */ @@ -736,8 +736,8 @@ static real c_b43 = (float)1.; static complex ctemp; extern /* Subroutine */ int ctest_(integer*, complex*, complex*, complex*, real*); static integer ksize; - extern /* Subroutine */ int cdotctest_(int*, complex*, int*, complex*, int*,complex*), ccopytest_(int*, complex*, int*, complex*, int*), cdotutest_(int*, complex*, int*, complex*, int*, complex*), - cswaptest_(int*, complex*, int*, complex*, int*), caxpytest_(int*, complex*, complex*, int*, complex*, int*); + extern /* Subroutine */ int cdotctest_(integer*, complex*, integer*, complex*, integer*,complex*), ccopytest_(integer*, complex*, integer*, complex*, integer*), cdotutest_(integer*, complex*, integer*, complex*, integer*, complex*), + cswaptest_(integer*, complex*, integer*, complex*, integer*), caxpytest_(integer*, complex*, complex*, integer*, complex*, integer*); static integer ki, kn; static complex cx[7], cy[7]; static integer mx, my; diff --git a/ctest/c_dblat1c.c b/ctest/c_dblat1c.c index d26cd9924..089dca4da 100644 --- a/ctest/c_dblat1c.c +++ b/ctest/c_dblat1c.c @@ -402,14 +402,14 @@ L40: /* Local variables */ static integer i__; - extern doublereal dnrm2test_(int*, doublereal*, int*); + extern doublereal dnrm2test_(integer*, doublereal*, integer*); static doublereal stemp[1], strue[8]; - extern /* Subroutine */ int stest_(integer*,doublereal*,doublereal*,doublereal*,doublereal*), dscaltest_(int*,doublereal*,doublereal*,int*); - extern doublereal dasumtest_(int*,doublereal*,int*); + extern /* Subroutine */ int stest_(integer*,doublereal*,doublereal*,doublereal*,doublereal*), dscaltest_(integer*,doublereal*,doublereal*,integer*); + extern doublereal dasumtest_(integer*,doublereal*,integer*); extern /* Subroutine */ int itest1_(integer*,integer*), stest1_(doublereal*,doublereal*,doublereal*,doublereal*); static doublereal sx[8]; static integer np1; - extern integer idamaxtest_(int*,doublereal*,int*); + extern integer idamaxtest_(integer*,doublereal*,integer*); static integer len; /* .. Parameters .. */ @@ -515,10 +515,10 @@ L40: /* Local variables */ static integer lenx, leny; - extern doublereal ddottest_(int*,doublereal*,int*,doublereal*,int*); + extern doublereal ddottest_(integer*,doublereal*,integer*,doublereal*,integer*); static integer i__, j, ksize; - extern /* Subroutine */ int stest_(integer*,doublereal*,doublereal*,doublereal*,doublereal*), dcopytest_(int*,doublereal*,int*,doublereal*,int*), dswaptest_(int*,doublereal*,int*,doublereal*,int*), - daxpytest_(int*,doublereal*,doublereal*,int*,doublereal*,int*), stest1_(doublereal*,doublereal*,doublereal*,doublereal*); + extern /* Subroutine */ int stest_(integer*,doublereal*,doublereal*,doublereal*,doublereal*), dcopytest_(integer*,doublereal*,integer*,doublereal*,integer*), dswaptest_(integer*,doublereal*,integer*,doublereal*,integer*), + daxpytest_(integer*,doublereal*,doublereal*,integer*,doublereal*,integer*), stest1_(doublereal*,doublereal*,doublereal*,doublereal*); static integer ki, kn, mx, my; static doublereal sx[7], sy[7], stx[7], sty[7]; @@ -618,9 +618,9 @@ L40: ; /* Local variables */ - extern /* Subroutine */ int drottest_(int*,doublereal*,int*,doublereal*,int*,doublereal*,doublereal*); + extern /* Subroutine */ int drottest_(integer*,doublereal*,integer*,doublereal*,integer*,doublereal*,doublereal*); static integer i__, k, ksize; - extern /* Subroutine */int stest_(integer*,doublereal*,doublereal*,doublereal*,doublereal*), drotmtest_(int*,doublereal*,int*,doublereal*,int*,doublereal*); + extern /* Subroutine */int stest_(integer*,doublereal*,doublereal*,doublereal*,doublereal*), drotmtest_(integer*,doublereal*,integer*,doublereal*,integer*,doublereal*); static integer ki, kn; static doublereal dparam[5], sx[10], sy[10], stx[10], sty[10]; @@ -799,7 +799,7 @@ doublereal sdiff_(doublereal* sa, doublereal* sb) return ret_val; } /* sdiff_ */ -/* Subroutine */ int itest1_(int* icomp, int* itrue) +/* Subroutine */ int itest1_(integer* icomp, integer* itrue) { /* Local variables */ static integer id; diff --git a/ctest/c_sblat1c.c b/ctest/c_sblat1c.c index 7c049b796..7a81e04c1 100644 --- a/ctest/c_sblat1c.c +++ b/ctest/c_sblat1c.c @@ -433,14 +433,14 @@ L40: /* Local variables */ static integer i__; - extern real snrm2test_(int*,real*,int*); + extern real snrm2test_(integer*,real*,integer*); static real stemp[1], strue[8]; - extern /* Subroutine */ int stest_(int*, real*,real*,real*,real*), sscaltest_(int*,real*,real*,int*); - extern real sasumtest_(int*,real*,int*); + extern /* Subroutine */ int stest_(integer*, real*,real*,real*,real*), sscaltest_(integer*,real*,real*,integer*); + extern real sasumtest_(integer*,real*,integer*); extern /* Subroutine */ int itest1_(integer*,integer*), stest1_(real*,real*,real*,real*); static real sx[8]; static integer np1; - extern integer isamaxtest_(int*,real*,int*); + extern integer isamaxtest_(integer*,real*,integer*); static integer len; @@ -590,10 +590,10 @@ L40: /* Local variables */ static integer lenx, leny; - extern real sdottest_(int*,real*,int*,real*,int*); + extern real sdottest_(integer*,real*,integer*,real*,integer*); static integer i__, j, ksize; - extern /* Subroutine */ int stest_(int*,real*,real*,real*,real*), scopytest_(int*,real*,int*,real*,int*), sswaptest_(int*,real*,int*,real*,int*), - saxpytest_(int*,real*,real*,int*,real*,int*); + extern /* Subroutine */ int stest_(integer*,real*,real*,real*,real*), scopytest_(integer*,real*,integer*,real*,integer*), sswaptest_(integer*,real*,integer*,real*,integer*), + saxpytest_(integer*,real*,real*,integer*,real*,integer*); static integer ki; extern /* Subroutine */ int stest1_(real*,real*,real*,real*); static integer kn, mx, my; @@ -708,9 +708,9 @@ L40: 1.17 }; /* Local variables */ - extern /* Subroutine */ void srottest_(int*,real*,int*,real*,int*,real*,real*); + extern /* Subroutine */ void srottest_(integer*,real*,integer*,real*,integer*,real*,real*); static integer i__, k, ksize; - extern /* Subroutine */ int stest_(int*,real*,real*,real*,real*), srotmtest_(int*,real*,int*,real*,int*,real*); + extern /* Subroutine */ int stest_(integer*,real*,real*,real*,real*), srotmtest_(integer*,real*,integer*,real*,integer*,real*); static integer ki, kn; static real sx[19], sy[19], sparam[5], stx[19], sty[19]; @@ -781,7 +781,7 @@ L40: return 0; } /* check3_ */ -/* Subroutine */ int stest_(int* len, real* scomp, real* strue, real* ssize, real* sfac) +/* Subroutine */ int stest_(integer* len, real* scomp, real* strue, real* ssize, real* sfac) { integer i__1; real r__1, r__2, r__3, r__4, r__5; @@ -847,7 +847,7 @@ L40: /* Subroutine */ int stest1_(real* scomp1, real* strue1, real* ssize, real* sfac) { static real scomp[1], strue[1]; - extern /* Subroutine */ int stest_(int*,real*,real*,real*,real*); + extern /* Subroutine */ int stest_(integer*,real*,real*,real*,real*); /* ************************* STEST1 ***************************** */ diff --git a/ctest/c_zblat1c.c b/ctest/c_zblat1c.c index d8cff5dee..f7c0515fc 100644 --- a/ctest/c_zblat1c.c +++ b/ctest/c_zblat1c.c @@ -380,7 +380,7 @@ static doublereal c_b43 = 1.; static integer i__; extern /* Subroutine */ int ctest_(integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*); static doublecomplex mwpcs[5], mwpct[5]; - extern /* Subroutine */ int zscaltest_(int*, doublereal*, doublecomplex*, int*), itest1_(int*, int*), stest1_(doublereal*, doublereal*, doublereal*, doublereal*); + extern /* Subroutine */ int zscaltest_(integer*, doublereal*, doublecomplex*, integer*), itest1_(integer*, integer*), stest1_(doublereal*, doublereal*, doublereal*, doublereal*); static doublecomplex cx[8]; extern doublereal dznrm2test_(integer*, doublecomplex*, integer*); static integer np1; @@ -591,11 +591,11 @@ static doublereal c_b43 = 1.; extern /* Subroutine */ int ctest_(integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*); static integer ksize; static doublecomplex ztemp; - extern /* Subroutine */ int zdotctest_(int*, doublecomplex*, int*, doublecomplex*, int*, doublecomplex*), zcopytest_(int*, doublecomplex*, int*, doublecomplex*, int*); + extern /* Subroutine */ int zdotctest_(integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*), zcopytest_(integer*, doublecomplex*, integer*, doublecomplex*, integer*); static integer ki; - extern /* Subroutine */ int zdotutest_(int*, doublecomplex*, int*, doublecomplex*, int*, doublecomplex*), zswaptest_(int*, doublecomplex*, int*, doublecomplex*, int*); + extern /* Subroutine */ int zdotutest_(integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*), zswaptest_(integer*, doublecomplex*, integer*, doublecomplex*, integer*); static integer kn; - extern /* Subroutine */ int zaxpytest_(int*, doublereal*, doublecomplex*, int*, doublecomplex*, int*); + extern /* Subroutine */ int zaxpytest_(integer*, doublereal*, doublecomplex*, integer*, doublecomplex*, integer*); static doublecomplex cx[7], cy[7]; static integer mx, my; @@ -747,7 +747,7 @@ L40: /* Subroutine */ int stest1_(doublereal* scomp1, doublereal* strue1, doublereal* ssize, doublereal* sfac) { static doublereal scomp[1], strue[1]; - extern /* Subroutine */ integer stest_(int*,doublereal*, doublereal*, doublereal*, doublereal*); + extern /* Subroutine */ int stest_(integer*,doublereal*, doublereal*, doublereal*, doublereal*); /* ************************* STEST1 ***************************** */ From 103d6f4e42fbe532ae4ea48e8d90d7d792bc93d2 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 10 Oct 2023 16:15:52 +0200 Subject: [PATCH 070/125] Require "classic ld" with XCODE 15.x on Mac --- Makefile.system | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Makefile.system b/Makefile.system index b1a357fdf..77c36c870 100644 --- a/Makefile.system +++ b/Makefile.system @@ -405,6 +405,13 @@ export MACOSX_DEPLOYMENT_TARGET=10.8 endif endif MD5SUM = md5 -r +XCVER = $(shell pkgutil --pkg-info=com.apple.pkg.Xcode |awk '/version:/ {print $2}'|cut -d: -f2|cut -f1 -d.) +ifeq (x$(XCVER)x,xx) +XCVER = $(shell pkgutil --pkg-info=com.apple.pkg.CLTools_Executables |awk '/version:/ {print $2}'|cut -d: -f2|cut -f1 -d.) +endif +ifeq (x$(XCVER), x 15) +CCOMMON_OPT += -Wl,-ld_classic +endif endif ifneq (,$(findstring $(OSNAME), FreeBSD OpenBSD DragonFly)) From 82fc29a57a403c2e6567d06e7dbaf58ae1fe30a5 Mon Sep 17 00:00:00 2001 From: Rajalakshmi Srinivasaraghavan Date: Wed, 11 Oct 2023 17:04:42 -0500 Subject: [PATCH 071/125] POWER10: Fallback to POWER8 functions As cgemm and zgemm kernels are not optimized for big endian falling back to POWER8 versions. Tested on AIX using gcc and Open XL C. --- kernel/power/KERNEL.POWER10 | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/kernel/power/KERNEL.POWER10 b/kernel/power/KERNEL.POWER10 index 79d889fe0..58f865322 100644 --- a/kernel/power/KERNEL.POWER10 +++ b/kernel/power/KERNEL.POWER10 @@ -19,8 +19,13 @@ SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX) STRMMKERNEL = sgemm_kernel_power10.c DTRMMKERNEL = dgemm_kernel_power10.c +ifeq ($(OSNAME), AIX) +CTRMMKERNEL = ctrmm_kernel_8x4_power8.S +ZTRMMKERNEL = ztrmm_kernel_8x2_power8.S +else CTRMMKERNEL = cgemm_kernel_power10.S ZTRMMKERNEL = zgemm_kernel_power10.S +endif SGEMMKERNEL = sgemm_kernel_power10.c SGEMMINCOPY = ../generic/gemm_ncopy_16.c @@ -62,10 +67,18 @@ DGEMM_SMALL_K_B0_TT = dgemm_small_kernel_tt_power10.c DGEMM_SMALL_K_TN = dgemm_small_kernel_tn_power10.c DGEMM_SMALL_K_B0_TN = dgemm_small_kernel_tn_power10.c +ifeq ($(OSNAME), AIX) +CGEMMKERNEL = cgemm_kernel_8x4_power8.S +else CGEMMKERNEL = cgemm_kernel_power10.S +endif #CGEMMKERNEL = cgemm_kernel_8x4_power8.S CGEMMINCOPY = ../generic/zgemm_ncopy_8.c +ifeq ($(OSNAME), AIX) +CGEMMITCOPY = cgemm_tcopy_8_power8.S +else CGEMMITCOPY = ../generic/zgemm_tcopy_8.c +endif CGEMMONCOPY = ../generic/zgemm_ncopy_4.c CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) @@ -73,7 +86,11 @@ CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) +ifeq ($(OSNAME), AIX) +ZGEMMKERNEL = zgemm_kernel_8x2_power8.S +else ZGEMMKERNEL = zgemm_kernel_power10.S +endif ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c ZGEMMINCOPY = ../generic/zgemm_ncopy_8.c @@ -124,6 +141,7 @@ ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c #SMINKERNEL = ../arm/min.c #DMINKERNEL = ../arm/min.c # +ifeq ($(C_COMPILER), GCC) ifneq ($(GCCVERSIONGTEQ9),1) ISAMAXKERNEL = isamax_power9.S else @@ -148,6 +166,15 @@ ICAMINKERNEL = icamin_power9.S else ICAMINKERNEL = icamin.c endif +else +ISAMAXKERNEL = isamax.c +IDAMAXKERNEL = idamax.c +ICAMAXKERNEL = icamax.c +IZAMAXKERNEL = izamax.c +ISAMINKERNEL = isamin.c +IDAMINKERNEL = idamin.c +ICAMINKERNEL = icamin.c +endif IZAMINKERNEL = izamin.c # #ISMAXKERNEL = ../arm/imax.c From 71d733e5f735d18f93ab40d15d85b5129980ec91 Mon Sep 17 00:00:00 2001 From: Rajalakshmi Srinivasaraghavan Date: Wed, 11 Oct 2023 17:18:42 -0500 Subject: [PATCH 072/125] POWER: Avoid m4 conversions for C files This patch removes intermediate m4 conversions used in sbgemm compilation as it is not needed for .c files. Tested on AIX with gcc and IBM Open XL C. --- kernel/Makefile.L3 | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index 174a1d41b..7db9d7907 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -634,15 +634,7 @@ $(KDIR)$(SBGEMMONCOPYOBJ) : $(KERNELDIR)/$(SBGEMMONCOPY) $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ $(KDIR)$(SBGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SBGEMMOTCOPY) - -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemmotcopy.s - m4 sbgemmotcopy.s > sbgemmotcopy_nomacros.s - $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemmotcopy_nomacros.s -o $@ - rm sbgemmotcopy.s sbgemmotcopy_nomacros.s -else $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ -endif ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N)) @@ -650,14 +642,7 @@ $(KDIR)$(SBGEMMINCOPYOBJ) : $(KERNELDIR)/$(SBGEMMINCOPY) $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ $(KDIR)$(SBGEMMITCOPYOBJ) : $(KERNELDIR)/$(SBGEMMITCOPY) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemmitcopy.s - m4 sbgemmitcopy.s > sbgemmitcopy_nomacros.s - $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemmitcopy_nomacros.s -o $@ - rm sbgemmitcopy.s sbgemmitcopy_nomacros.s -else $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ -endif endif endif @@ -829,15 +814,8 @@ endif ifeq ($(BUILD_BFLOAT16), 1) $(KDIR)sbgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMMKERNEL) $(SBGEMMDEPEND) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemm_kernel$(TSUFFIX).s - m4 sbgemm_kernel$(TSUFFIX).s > sbgemm_kernel$(TSUFFIX)_nomacros.s - $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemm_kernel$(TSUFFIX)_nomacros.s -o $@ - rm sbgemm_kernel$(TSUFFIX).s sbgemm_kernel$(TSUFFIX)_nomacros.s -else $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ endif -endif $(KDIR)dgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) $(DGEMMDEPEND) ifeq ($(OS), AIX) From 97a61d0577bcfefe92df0515ece079234400e7eb Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Wed, 11 Oct 2023 17:36:43 -0500 Subject: [PATCH 073/125] Fix bfloat16_bits union so that it always the sizeof unsigned short. --- test/compare_sgemm_sbgemm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/compare_sgemm_sbgemm.c b/test/compare_sgemm_sbgemm.c index 276fecae9..57d416c94 100644 --- a/test/compare_sgemm_sbgemm.c +++ b/test/compare_sgemm_sbgemm.c @@ -32,7 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. typedef union { unsigned short v; - struct + struct __attribute__((packed)) { #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ unsigned short s:1; From e98e3c4783fe9250c9bda11cada6e82ef627854e Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Wed, 11 Oct 2023 18:05:55 -0500 Subject: [PATCH 074/125] Fix float32_bits union so that it always the sizeof float. --- test/compare_sgemm_sbgemm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/compare_sgemm_sbgemm.c b/test/compare_sgemm_sbgemm.c index 57d416c94..b723b6a3b 100644 --- a/test/compare_sgemm_sbgemm.c +++ b/test/compare_sgemm_sbgemm.c @@ -49,7 +49,7 @@ typedef union typedef union { float v; - struct + struct __attribute__((packed)) { #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ uint32_t s:1; From e7d05402e02c6cb3e9cca60cff976927cbd4f506 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 12 Oct 2023 14:24:53 +0200 Subject: [PATCH 075/125] Fix up S/D GEMM copy function definitions after #4009 --- kernel/arm64/KERNEL.A64FX | 90 +++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 50 deletions(-) diff --git a/kernel/arm64/KERNEL.A64FX b/kernel/arm64/KERNEL.A64FX index bd25f7cd8..ccbce27e1 100644 --- a/kernel/arm64/KERNEL.A64FX +++ b/kernel/arm64/KERNEL.A64FX @@ -57,7 +57,7 @@ CAMAXKERNEL = zamax.S ZAMAXKERNEL = zamax.S SAXPYKERNEL = axpy.S -DAXPYKERNEL = axpy.S +DAXPYKERNEL = daxpy_thunderx2t99.S CAXPYKERNEL = zaxpy.S ZAXPYKERNEL = zaxpy.S @@ -81,45 +81,35 @@ DGEMVTKERNEL = gemv_t.S CGEMVTKERNEL = zgemv_t.S ZGEMVTKERNEL = zgemv_t.S - -SASUMKERNEL = asum.S -DASUMKERNEL = asum.S -CASUMKERNEL = casum.S -ZASUMKERNEL = zasum.S - -SCOPYKERNEL = copy.S -DCOPYKERNEL = copy.S -CCOPYKERNEL = copy.S -ZCOPYKERNEL = copy.S - -SSWAPKERNEL = swap.S -DSWAPKERNEL = swap.S -CSWAPKERNEL = swap.S -ZSWAPKERNEL = swap.S - -ISAMAXKERNEL = iamax.S -IDAMAXKERNEL = iamax.S -ICAMAXKERNEL = izamax.S -IZAMAXKERNEL = izamax.S - -SNRM2KERNEL = nrm2.S -DNRM2KERNEL = nrm2.S -CNRM2KERNEL = znrm2.S -ZNRM2KERNEL = znrm2.S - -DDOTKERNEL = dot.S -ifneq ($(C_COMPILER), PGI) -SDOTKERNEL = ../generic/dot.c -else -SDOTKERNEL = dot.S -endif -ifneq ($(C_COMPILER), PGI) -CDOTKERNEL = zdot.S -ZDOTKERNEL = zdot.S -else -CDOTKERNEL = ../arm/zdot.c -ZDOTKERNEL = ../arm/zdot.c -endif +SASUMKERNEL = sasum_thunderx2t99.c +DASUMKERNEL = dasum_thunderx2t99.c +CASUMKERNEL = casum_thunderx2t99.c +ZASUMKERNEL = zasum_thunderx2t99.c + +SCOPYKERNEL = copy_thunderx2t99.c +DCOPYKERNEL = copy_thunderx2t99.c +CCOPYKERNEL = copy_thunderx2t99.c +ZCOPYKERNEL = copy_thunderx2t99.c + +SSWAPKERNEL = swap_thunderx2t99.S +DSWAPKERNEL = swap_thunderx2t99.S +CSWAPKERNEL = swap_thunderx2t99.S +ZSWAPKERNEL = swap_thunderx2t99.S + +ISAMAXKERNEL = iamax_thunderx2t99.c +IDAMAXKERNEL = iamax_thunderx2t99.c +ICAMAXKERNEL = izamax_thunderx2t99.c +IZAMAXKERNEL = izamax_thunderx2t99.c + +SNRM2KERNEL = scnrm2_thunderx2t99.c +DNRM2KERNEL = dznrm2_thunderx2t99.c +CNRM2KERNEL = scnrm2_thunderx2t99.c +ZNRM2KERNEL = dznrm2_thunderx2t99.c + +DDOTKERNEL = dot.c +SDOTKERNEL = dot.c +CDOTKERNEL = zdot_thunderx2t99.c +ZDOTKERNEL = zdot_thunderx2t99.c DSDOTKERNEL = dot.S DGEMM_BETA = dgemm_beta.S @@ -128,10 +118,10 @@ SGEMM_BETA = sgemm_beta.S SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S -SGEMMINCOPY = sgemm_ncopy_sve_v1.c -SGEMMITCOPY = sgemm_tcopy_sve_v1.c -SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S -SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S +SGEMMINCOPY = gemm_ncopy_sve_v1x$(SGEMM_UNROLL_N).c +SGEMMITCOPY = gemm_tcopy_sve_v1x$(SGEMM_UNROLL_N).c +SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S +SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) @@ -149,8 +139,8 @@ SSYMMLCOPY_M = symm_lcopy_sve.c DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S -DGEMMINCOPY = dgemm_ncopy_sve_v1.c -DGEMMITCOPY = dgemm_tcopy_sve_v1.c +DGEMMINCOPY = gemm_ncopy_sve_v1x$(DGEMM_UNROLL_N).c +DGEMMITCOPY = gemm_tcopy_sve_v1x$(DGEMM_UNROLL_N).c DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S @@ -170,8 +160,8 @@ DSYMMLCOPY_M = symm_lcopy_sve.c CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S -CGEMMINCOPY = cgemm_ncopy_sve_v1.c -CGEMMITCOPY = cgemm_tcopy_sve_v1.c +CGEMMINCOPY = gemm_ncopy_complex_sve_v1x$(ZGEMM_UNROLL_N).c +CGEMMITCOPY = gemm_tcopy_complex_sve_v1x$(ZGEMM_UNROLL_N).c CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c @@ -194,8 +184,8 @@ CSYMMLCOPY_M = zsymm_lcopy_sve.c ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S -ZGEMMINCOPY = zgemm_ncopy_sve_v1.c -ZGEMMITCOPY = zgemm_tcopy_sve_v1.c +ZGEMMINCOPY = gemm_ncopy_complex_sve_v1x$(ZGEMM_UNROLL_N).c +ZGEMMITCOPY = gemm_tcopy_complex_sve_v1x$(ZGEMM_UNROLL_N).c ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c From d46eba06a7a95a61649cf25a1fd350f46d8050b6 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Thu, 12 Oct 2023 09:41:33 -0500 Subject: [PATCH 076/125] Pack structure only on AIX. --- test/compare_sgemm_sbgemm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/compare_sgemm_sbgemm.c b/test/compare_sgemm_sbgemm.c index b723b6a3b..cf808b56d 100644 --- a/test/compare_sgemm_sbgemm.c +++ b/test/compare_sgemm_sbgemm.c @@ -32,7 +32,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. typedef union { unsigned short v; +#if defined(_AIX) struct __attribute__((packed)) +#else + struct +#endif { #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ unsigned short s:1; @@ -49,7 +53,11 @@ typedef union typedef union { float v; +#if defined(_AIX) struct __attribute__((packed)) +#else + struct +#endif { #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ uint32_t s:1; From fe75c88a2c65240f4b7265be3dbc4b93132ec24f Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 12 Oct 2023 18:20:09 +0200 Subject: [PATCH 077/125] AzureCI: move OSX-Clang jobs to macOS-12 to resolve setup/build timeouts --- azure-pipelines.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index ff56ad00b..317bc504a 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -167,11 +167,10 @@ jobs: - job: OSX_OpenMP_Clang pool: - vmImage: 'macOS-11' + vmImage: 'macOS-latest' variables: LD_LIBRARY_PATH: /usr/local/opt/llvm/lib LIBRARY_PATH: /usr/local/opt/llvm/lib - MACOSX_DEPLOYMENT_TARGET: 11.0 steps: - script: | brew update @@ -180,7 +179,7 @@ jobs: - job: OSX_OpenMP_Clang_cmake pool: - vmImage: 'macOS-11' + vmImage: 'macOS-latest' variables: LD_LIBRARY_PATH: /usr/local/opt/llvm/lib LIBRARY_PATH: /usr/local/opt/llvm/lib @@ -210,7 +209,7 @@ jobs: - job: OSX_Ifort_Clang pool: - vmImage: 'macOS-11' + vmImage: 'macOS-latest' variables: LD_LIBRARY_PATH: /usr/local/opt/llvm/lib MACOS_HPCKIT_URL: https://registrationcenter-download.intel.com/akdlm/irc_nas/17643/m_HPCKit_p_2021.2.0.2903_offline.dmg From 9f42570e33db010665d65bc50f803a673fcc5af0 Mon Sep 17 00:00:00 2001 From: Rajalakshmi Srinivasaraghavan Date: Thu, 12 Oct 2023 12:37:40 -0500 Subject: [PATCH 078/125] POWER: Increase macro size limit for AIX This patch increases the macro size limit from 4096 to 16384 to allow compiling larger assembly files in AIX. Tested with GCC and IBM Open XL C. --- kernel/Makefile.L3 | 97 +++++++++++++++++++++++++--------------------- 1 file changed, 53 insertions(+), 44 deletions(-) diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index 7db9d7907..448e096a3 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -61,6 +61,15 @@ ifeq ($(CORE), ZEN) USE_TRMM = 1 endif +ifeq ($(OS), AIX) +M4VERSION := $(shell m4 --version < /dev/null 2>&1 | grep GNU 2>&1 >/dev/null ; echo $$?) +ifeq ($(M4VERSION), 0) +M4_AIX := m4 -l16384 +else +M4_AIX := m4 -B16384 +endif +$(info $$var is [${$(M4_AIX)}]) +endif ifeq ($(CORE), POWER8) ifeq ($(BINARY64),1) USE_TRMM = 1 @@ -653,7 +662,7 @@ $(KDIR)$(SGEMMONCOPYOBJ) : $(KERNELDIR)/$(SGEMMONCOPY) $(KDIR)$(SGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SGEMMOTCOPY) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -UDOUBLE -UCOMPLEX $< -o - > sgemmotcopy.s - m4 sgemmotcopy.s > sgemmotcopy_nomacros.s + $(M4_AIX) sgemmotcopy.s > sgemmotcopy_nomacros.s $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemmotcopy_nomacros.s -o $@ rm sgemmotcopy.s sgemmotcopy_nomacros.s else @@ -669,7 +678,7 @@ $(KDIR)$(SGEMMINCOPYOBJ) : $(KERNELDIR)/$(SGEMMINCOPY) $(KDIR)$(SGEMMITCOPYOBJ) : $(KERNELDIR)/$(SGEMMITCOPY) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -UDOUBLE -UCOMPLEX $< -o - > sgemmitcopy.s - m4 sgemmitcopy.s > sgemmitcopy_nomacros.s + $(M4_AIX) sgemmitcopy.s > sgemmitcopy_nomacros.s $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemmitcopy_nomacros.s -o $@ rm sgemmitcopy.s sgemmitcopy_nomacros.s else @@ -681,7 +690,7 @@ endif $(KDIR)$(DGEMMONCOPYOBJ) : $(KERNELDIR)/$(DGEMMONCOPY) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > dgemm_ncopy.s - m4 dgemm_ncopy.s > dgemm_ncopy_nomacros.s + $(M4_AIX) dgemm_ncopy.s > dgemm_ncopy_nomacros.s $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_ncopy_nomacros.s -o $@ rm dgemm_ncopy.s dgemm_ncopy_nomacros.s else @@ -699,7 +708,7 @@ $(KDIR)$(DGEMMINCOPYOBJ) : $(KERNELDIR)/$(DGEMMINCOPY) $(KDIR)$(DGEMMITCOPYOBJ) : $(KERNELDIR)/$(DGEMMITCOPY) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > dgemm_itcopy.s - m4 dgemm_itcopy.s > dgemm_itcopy_nomacros.s + $(M4_AIX) dgemm_itcopy.s > dgemm_itcopy_nomacros.s $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_itcopy_nomacros.s -o $@ rm dgemm_itcopy.s dgemm_itcopy_nomacros.s else @@ -742,7 +751,7 @@ $(KDIR)$(CGEMMINCOPYOBJ) : $(KERNELDIR)/$(CGEMMINCOPY) $(KDIR)$(CGEMMITCOPYOBJ) : $(KERNELDIR)/$(CGEMMITCOPY) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -UDOUBLE -UCOMPLEX -S $< -o - > cgemm_itcopy.s - m4 cgemm_itcopy.s > cgemm_itcopy_nomacros.s + $(M4_AIX) cgemm_itcopy.s > cgemm_itcopy_nomacros.s $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX cgemm_itcopy_nomacros.s -o $@ rm cgemm_itcopy.s cgemm_itcopy_nomacros.s else @@ -765,7 +774,7 @@ $(KDIR)$(ZGEMMINCOPYOBJ) : $(KERNELDIR)/$(ZGEMMINCOPY) $(KDIR)$(ZGEMMITCOPYOBJ) : $(KERNELDIR)/$(ZGEMMITCOPY) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > zgemm_itcopy.s - m4 zgemm_itcopy.s > zgemm_itcopy_nomacros.s + $(M4_AIX) zgemm_itcopy.s > zgemm_itcopy_nomacros.s $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX zgemm_itcopy_nomacros.s -o $@ rm zgemm_itcopy.s zgemm_itcopy_nomacros.s else @@ -797,7 +806,7 @@ endif $(KDIR)sgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(SGEMMDEPEND) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -UDOUBLE -UCOMPLEX $< -o - > sgemm_kernel$(TSUFFIX).s - m4 sgemm_kernel$(TSUFFIX).s > sgemm_kernel$(TSUFFIX)_nomacros.s + $(M4_AIX) sgemm_kernel$(TSUFFIX).s > sgemm_kernel$(TSUFFIX)_nomacros.s $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemm_kernel$(TSUFFIX)_nomacros.s -o $@ rm sgemm_kernel$(TSUFFIX).s sgemm_kernel$(TSUFFIX)_nomacros.s else @@ -820,7 +829,7 @@ endif $(KDIR)dgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) $(DGEMMDEPEND) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > dgemm_kernel$(TSUFFIX).s - m4 dgemm_kernel$(TSUFFIX).s > dgemm_kernel$(TSUFFIX)_nomacros.s + $(M4_AIX) dgemm_kernel$(TSUFFIX).s > dgemm_kernel$(TSUFFIX)_nomacros.s $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_kernel$(TSUFFIX)_nomacros.s -o $@ rm dgemm_kernel$(TSUFFIX).s dgemm_kernel$(TSUFFIX)_nomacros.s else @@ -833,7 +842,7 @@ $(KDIR)qgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) $(QGEMMDEP $(KDIR)cgemm_kernel_n$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DNN $< -o - > cgemm_kernel_n.s - m4 cgemm_kernel_n.s > cgemm_kernel_n_nomacros.s + $(M4_AIX) cgemm_kernel_n.s > cgemm_kernel_n_nomacros.s $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNN cgemm_kernel_n_nomacros.s -o $@ rm cgemm_kernel_n.s cgemm_kernel_n_nomacros.s else @@ -843,7 +852,7 @@ endif $(KDIR)cgemm_kernel_l$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DCN $< -o - > cgemm_kernel_l.s - m4 cgemm_kernel_l.s > cgemm_kernel_l_nomacros.s + $(M4_AIX) cgemm_kernel_l.s > cgemm_kernel_l_nomacros.s $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCN cgemm_kernel_l_nomacros.s -o $@ rm cgemm_kernel_l.s cgemm_kernel_l_nomacros.s else @@ -853,7 +862,7 @@ endif $(KDIR)cgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DNC $< -o - > cgemm_kernel_r.s - m4 cgemm_kernel_r.s > cgemm_kernel_r_nomacros.s + $(M4_AIX) cgemm_kernel_r.s > cgemm_kernel_r_nomacros.s $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNC cgemm_kernel_r_nomacros.s -o $@ rm cgemm_kernel_r.s cgemm_kernel_r_nomacros.s else @@ -863,7 +872,7 @@ endif $(KDIR)cgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DCC $< -o - > cgemm_kernel_b.s - m4 cgemm_kernel_b.s > cgemm_kernel_b_nomacros.s + $(M4_AIX) cgemm_kernel_b.s > cgemm_kernel_b_nomacros.s $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCC cgemm_kernel_b_nomacros.s -o $@ rm cgemm_kernel_b.s cgemm_kernel_b_nomacros.s else @@ -873,7 +882,7 @@ endif $(KDIR)zgemm_kernel_n$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DNN $< -o - > zgemm_kernel_n.s - m4 zgemm_kernel_n.s > zgemm_kernel_n_nomacros.s + $(M4_AIX) zgemm_kernel_n.s > zgemm_kernel_n_nomacros.s $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNN zgemm_kernel_n_nomacros.s -o $@ rm zgemm_kernel_n.s zgemm_kernel_n_nomacros.s else ifeq ($(CORE),SANDYBRIDGE) @@ -885,7 +894,7 @@ endif $(KDIR)zgemm_kernel_l$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DCN $< -o - > zgemm_kernel_l.s - m4 zgemm_kernel_l.s > zgemm_kernel_l_nomacros.s + $(M4_AIX) zgemm_kernel_l.s > zgemm_kernel_l_nomacros.s $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCN zgemm_kernel_l_nomacros.s -o $@ rm zgemm_kernel_l.s zgemm_kernel_l_nomacros.s else ifeq ($(CORE),SANDYBRIDGE) @@ -897,7 +906,7 @@ endif $(KDIR)zgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DNC $< -o - > zgemm_kernel_r.s - m4 zgemm_kernel_r.s > zgemm_kernel_r_nomacros.s + $(M4_AIX) zgemm_kernel_r.s > zgemm_kernel_r_nomacros.s $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNC zgemm_kernel_r_nomacros.s -o $@ rm zgemm_kernel_r.s zgemm_kernel_r_nomacros.s else ifeq ($(CORE),SANDYBRIDGE) @@ -909,7 +918,7 @@ endif $(KDIR)zgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DCC $< -o - > zgemm_kernel_b.s - m4 zgemm_kernel_b.s > zgemm_kernel_b_nomacros.s + $(M4_AIX) zgemm_kernel_b.s > zgemm_kernel_b_nomacros.s $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCC zgemm_kernel_b_nomacros.s -o $@ rm zgemm_kernel_b.s zgemm_kernel_b_nomacros.s else ifeq ($(CORE),SANDYBRIDGE) @@ -935,7 +944,7 @@ ifdef USE_TRMM $(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o - > strmmkernel_ln.s - m4 strmmkernel_ln.s > strmmkernel_ln_nomacros.s + $(M4_AIX) strmmkernel_ln.s > strmmkernel_ln_nomacros.s $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA strmmkernel_ln_nomacros.s -o $@ rm strmmkernel_ln.s strmmkernel_ln_nomacros.s else @@ -945,7 +954,7 @@ endif $(KDIR)strmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o - > strmmkernel_lt.s - m4 strmmkernel_lt.s > strmmkernel_lt_nomacros.s + $(M4_AIX) strmmkernel_lt.s > strmmkernel_lt_nomacros.s $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA strmmkernel_lt_nomacros.s -o $@ rm strmmkernel_lt.s strmmkernel_lt_nomacros.s else @@ -955,7 +964,7 @@ endif $(KDIR)strmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o - > strmmkernel_rn.s - m4 strmmkernel_rn.s > strmmkernel_rn_nomacros.s + $(M4_AIX) strmmkernel_rn.s > strmmkernel_rn_nomacros.s $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA strmmkernel_rn_nomacros.s -o $@ rm strmmkernel_rn.s strmmkernel_rn_nomacros.s else @@ -965,7 +974,7 @@ endif $(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s - m4 strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s + $(M4_AIX) strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@ rm strmm_kernel_rt.s strmm_kernel_rt_nomacros.s else @@ -975,7 +984,7 @@ endif $(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o - > dtrmm_kernel_ln.s - m4 dtrmm_kernel_ln.s > dtrmm_kernel_ln_nomacros.s + $(M4_AIX) dtrmm_kernel_ln.s > dtrmm_kernel_ln_nomacros.s $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA dtrmm_kernel_ln_nomacros.s -o $@ rm dtrmm_kernel_ln.s dtrmm_kernel_ln_nomacros.s else @@ -985,7 +994,7 @@ endif $(KDIR)dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o - > dtrmm_kernel_lt.s - m4 dtrmm_kernel_lt.s > dtrmm_kernel_lt_nomacros.s + $(M4_AIX) dtrmm_kernel_lt.s > dtrmm_kernel_lt_nomacros.s $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA dtrmm_kernel_lt_nomacros.s -o $@ rm dtrmm_kernel_lt.s dtrmm_kernel_lt_nomacros.s else @@ -995,7 +1004,7 @@ endif $(KDIR)dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o - > dtrmm_kernel_rn.s - m4 dtrmm_kernel_rn.s > dtrmm_kernel_rn_nomacros.s + $(M4_AIX) dtrmm_kernel_rn.s > dtrmm_kernel_rn_nomacros.s $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA dtrmm_kernel_rn_nomacros.s -o $@ rm dtrmm_kernel_rn.s dtrmm_kernel_rn_nomacros.s else @@ -1005,7 +1014,7 @@ endif $(KDIR)dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > dtrmm_kernel_rt.s - m4 dtrmm_kernel_rt.s > dtrmm_kernel_rt_nomacros.s + $(M4_AIX) dtrmm_kernel_rt.s > dtrmm_kernel_rt_nomacros.s $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA dtrmm_kernel_rt_nomacros.s -o $@ rm dtrmm_kernel_rt.s dtrmm_kernel_rt_nomacros.s else @@ -1027,7 +1036,7 @@ $(KDIR)qtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) $(KDIR)ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_ln.s - m4 ctrmm_kernel_ln.s > ctrmm_kernel_ln_nomacros.s + $(M4_AIX) ctrmm_kernel_ln.s > ctrmm_kernel_ln_nomacros.s $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN ctrmm_kernel_ln_nomacros.s -o $@ rm ctrmm_kernel_ln.s ctrmm_kernel_ln_nomacros.s else @@ -1037,7 +1046,7 @@ endif $(KDIR)ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_lt.s - m4 ctrmm_kernel_lt.s > ctrmm_kernel_lt_nomacros.s + $(M4_AIX) ctrmm_kernel_lt.s > ctrmm_kernel_lt_nomacros.s $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN ctrmm_kernel_lt_nomacros.s -o $@ rm ctrmm_kernel_lt.s ctrmm_kernel_lt_nomacros.s else @@ -1047,7 +1056,7 @@ endif $(KDIR)ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o - > ctrmm_kernel_lr.s - m4 ctrmm_kernel_lr.s > ctrmm_kernel_lr_nomacros.s + $(M4_AIX) ctrmm_kernel_lr.s > ctrmm_kernel_lr_nomacros.s $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN ctrmm_kernel_lr_nomacros.s -o $@ rm ctrmm_kernel_lr.s ctrmm_kernel_lr_nomacros.s else @@ -1057,7 +1066,7 @@ endif $(KDIR)ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o - > ctrmm_kernel_lc.s - m4 ctrmm_kernel_lc.s > ctrmm_kernel_lc_nomacros.s + $(M4_AIX) ctrmm_kernel_lc.s > ctrmm_kernel_lc_nomacros.s $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN ctrmm_kernel_lc_nomacros.s -o $@ rm ctrmm_kernel_lc_nomacros.s ctrmm_kernel_lc.s else @@ -1067,7 +1076,7 @@ endif $(KDIR)ctrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_rn.s - m4 ctrmm_kernel_rn.s > ctrmm_kernel_rn_nomacros.s + $(M4_AIX) ctrmm_kernel_rn.s > ctrmm_kernel_rn_nomacros.s $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN ctrmm_kernel_rn_nomacros.s -o $@ rm ctrmm_kernel_rn.s ctrmm_kernel_rn_nomacros.s else @@ -1077,7 +1086,7 @@ endif $(KDIR)ctrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_rt.s - m4 ctrmm_kernel_rt.s > ctrmm_kernel_rt_nomacros.s + $(M4_AIX) ctrmm_kernel_rt.s > ctrmm_kernel_rt_nomacros.s $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN ctrmm_kernel_rt_nomacros.s -o $@ rm ctrmm_kernel_rt.s ctrmm_kernel_rt_nomacros.s else @@ -1087,7 +1096,7 @@ endif $(KDIR)ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o - > ctrmm_kernel_rr.s - m4 ctrmm_kernel_rr.s > ctrmm_kernel_rr_nomacros.s + $(M4_AIX) ctrmm_kernel_rr.s > ctrmm_kernel_rr_nomacros.s $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC ctrmm_kernel_rr_nomacros.s -o $@ rm ctrmm_kernel_rr.s ctrmm_kernel_rr_nomacros.s else @@ -1097,7 +1106,7 @@ endif $(KDIR)ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o - > ctrmm_kernel_RC.s - m4 ctrmm_kernel_RC.s > ctrmm_kernel_RC_nomacros.s + $(M4_AIX) ctrmm_kernel_RC.s > ctrmm_kernel_RC_nomacros.s $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC ctrmm_kernel_RC_nomacros.s -o $@ rm ctrmm_kernel_RC.s ctrmm_kernel_RC_nomacros.s else @@ -1107,7 +1116,7 @@ endif $(KDIR)ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_ln.s - m4 ztrmm_kernel_ln.s > ztrmm_kernel_ln_nomacros.s + $(M4_AIX) ztrmm_kernel_ln.s > ztrmm_kernel_ln_nomacros.s $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_ln_nomacros.s -o $@ rm ztrmm_kernel_ln.s ztrmm_kernel_ln_nomacros.s else ifeq ($(CORE), SANDYBRIDGE) @@ -1119,7 +1128,7 @@ endif $(KDIR)ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_lt.s - m4 ztrmm_kernel_lt.s > ztrmm_kernel_lt_nomacros.s + $(M4_AIX) ztrmm_kernel_lt.s > ztrmm_kernel_lt_nomacros.s $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_lt_nomacros.s -o $@ rm ztrmm_kernel_lt.s ztrmm_kernel_lt_nomacros.s else ifeq ($(CORE), SANDYBRIDGE) @@ -1131,7 +1140,7 @@ endif $(KDIR)ztrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o - > ztrmm_kernel_lr.s - m4 ztrmm_kernel_lr.s > ztrmm_kernel_lr_nomacros.s + $(M4_AIX) ztrmm_kernel_lr.s > ztrmm_kernel_lr_nomacros.s $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN ztrmm_kernel_lr_nomacros.s -o $@ rm ztrmm_kernel_lr.s ztrmm_kernel_lr_nomacros.s else ifeq ($(CORE), SANDYBRIDGE) @@ -1143,7 +1152,7 @@ endif $(KDIR)ztrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o - > ztrmm_kernel_lc.s - m4 ztrmm_kernel_lc.s >ztrmm_kernel_lc_nomacros.s + $(M4_AIX) ztrmm_kernel_lc.s >ztrmm_kernel_lc_nomacros.s $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN ztrmm_kernel_lc_nomacros.s -o $@ rm ztrmm_kernel_lc.s ztrmm_kernel_lc_nomacros.s else ifeq ($(CORE), SANDYBRIDGE) @@ -1155,7 +1164,7 @@ endif $(KDIR)ztrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_rn.s - m4 ztrmm_kernel_rn.s > ztrmm_kernel_rn_nomacros.s + $(M4_AIX) ztrmm_kernel_rn.s > ztrmm_kernel_rn_nomacros.s $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_rn_nomacros.s -o $@ rm ztrmm_kernel_rn.s ztrmm_kernel_rn_nomacros.s else ifeq ($(CORE), SANDYBRIDGE) @@ -1167,7 +1176,7 @@ endif $(KDIR)ztrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_rt.s - m4 ztrmm_kernel_rt.s > ztrmm_kernel_rt_nomacros.s + $(M4_AIX) ztrmm_kernel_rt.s > ztrmm_kernel_rt_nomacros.s $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_rt_nomacros.s -o $@ rm ztrmm_kernel_rt.s ztrmm_kernel_rt_nomacros.s else ifeq ($(CORE), SANDYBRIDGE) @@ -1179,7 +1188,7 @@ endif $(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o - > ztrmm_kernel_rr.s - m4 ztrmm_kernel_rr.s > ztrmm_kernel_rr_nomacros.s + $(M4_AIX) ztrmm_kernel_rr.s > ztrmm_kernel_rr_nomacros.s $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC ztrmm_kernel_rr_nomacros.s -o $@ rm ztrmm_kernel_rr.s ztrmm_kernel_rr_nomacros.s else ifeq ($(CORE), SANDYBRIDGE) @@ -1191,7 +1200,7 @@ endif $(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o - > ztrmm_kernel_rc.s - m4 ztrmm_kernel_rc.s > ztrmm_kernel_rc_nomacros.s + $(M4_AIX) ztrmm_kernel_rc.s > ztrmm_kernel_rc_nomacros.s $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC ztrmm_kernel_rc_nomacros.s -o $@ rm ztrmm_kernel_rc.s ztrmm_kernel_rc_nomacros.s else ifeq ($(CORE), SANDYBRIDGE) @@ -1213,7 +1222,7 @@ $(KDIR)strmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s - m4 strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s + $(M4_AIX) strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@ rm strmm_kernel_rt.s strmm_kernel_rt_nomacros.s else @@ -1373,7 +1382,7 @@ $(KDIR)dtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRSMKERNEL_LN) $(DT $(KDIR)dtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRSMKERNEL_LT) $(DTRSMDEPEND) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ $< -o - > dtrsm_kernel_lt.s - m4 dtrsm_kernel_lt.s > dtrsm_kernel_lt_nomacros.s + $(M4_AIX) dtrsm_kernel_lt.s > dtrsm_kernel_lt_nomacros.s $(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ dtrsm_kernel_lt_nomacros.s -o $@ rm dtrsm_kernel_lt.s dtrsm_kernel_lt_nomacros.s else @@ -2965,7 +2974,7 @@ $(KDIR)cgemm_kernel_l$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMM $(KDIR)cgemm_kernel_r$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) ifeq ($(OS), AIX) $(CC) $(PFLAGS) -S -UDOUBLE -DCOMPLEX -DNC $< -o - > cgemm_kernel_r.s - m4 cgemm_kernel_r.s > cgemm_kernel_r_nomacros.s + $(M4_AIX) cgemm_kernel_r.s > cgemm_kernel_r_nomacros.s $(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DNC cgemm_kernel_r_nomacros.s -o $@ rm cgemm_kernel_r.s cgemm_kernel_r_nomacros.s else @@ -3011,7 +3020,7 @@ $(KDIR)strmm_kernel_RN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(KDIR)strmm_kernel_RT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) ifeq ($(OS), AIX) $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s - m4 strmmkernel_rn.s > strmm_kernel_rt_nomacros.s + $(M4_AIX) strmmkernel_rn.s > strmm_kernel_rt_nomacros.s $(CC) $(PFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@ rm strmm_kernel_rt.s strmm_kernel_rt_nomacros.s else From 301e2ecc49aa9207573a062fe272f902161c48a7 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 16 Oct 2023 22:15:46 +0200 Subject: [PATCH 079/125] Cray Fortran uses -O in combinations like -O omp so don't filter that out --- Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Makefile b/Makefile index 299970c67..8621a8b3f 100644 --- a/Makefile +++ b/Makefile @@ -35,7 +35,11 @@ export NO_LAPACK export C_LAPACK endif +ifeq ($(F_COMPILER),CRAY) +LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -Og -Os,$(LAPACK_FFLAGS)) +else LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -O -Og -Os,$(LAPACK_FFLAGS)) +endif SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench cpp_thread_test From b41cab08756563819e0cbc7ab005ab746fa4721b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 16 Oct 2023 22:20:59 +0200 Subject: [PATCH 080/125] Need to use override to actually strip down the already defined FFLAGS for NAG and CCE Fortran --- Makefile.system | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.system b/Makefile.system index 77c36c870..868cca4f9 100644 --- a/Makefile.system +++ b/Makefile.system @@ -1642,11 +1642,11 @@ endif ifeq ($(F_COMPILER),NAG) LAPACK_FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS)) -FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS)) +override FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS)) endif ifeq ($(F_COMPILER),CRAY) LAPACK_FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS)) -FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS)) +override FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS)) endif LAPACK_CFLAGS = $(CFLAGS) From f8c230c21c0bb20dc61d14988069ae60df6e0423 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 18 Oct 2023 11:58:54 +0200 Subject: [PATCH 081/125] Switch MINGW-W64 jobs to UCRT --- .github/workflows/dynamic_arch.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/dynamic_arch.yml b/.github/workflows/dynamic_arch.yml index 4fe6e63fc..0c39bfddf 100644 --- a/.github/workflows/dynamic_arch.yml +++ b/.github/workflows/dynamic_arch.yml @@ -151,13 +151,13 @@ jobs: strategy: fail-fast: false matrix: - msystem: [MINGW64, MINGW32, CLANG64, CLANG32] + msystem: [UCRT64, MINGW32, CLANG64, CLANG32] idx: [int32, int64] build-type: [Release] include: - - msystem: MINGW64 + - msystem: UCRT64 idx: int32 - target-prefix: mingw-w64-x86_64 + target-prefix: mingw-w64-ucrt-x86_64 fc-pkg: fc - msystem: MINGW32 idx: int32 @@ -175,10 +175,10 @@ jobs: target-prefix: mingw-w64-clang-i686 fc-pkg: cc c-lapack-flags: -DC_LAPACK=ON - - msystem: MINGW64 + - msystem: UCRT64 idx: int64 idx64-flags: -DBINARY=64 -DINTERFACE64=1 - target-prefix: mingw-w64-x86_64 + target-prefix: mingw-w64-ucrt-x86_64 fc-pkg: fc - msystem: CLANG64 idx: int64 @@ -188,9 +188,9 @@ jobs: # Compiling with Flang 16 seems to cause test errors on machines # with AVX512 instructions. Revisit after MSYS2 distributes Flang 17. no-avx512-flags: -DNO_AVX512=1 - - msystem: MINGW64 + - msystem: UCRT64 idx: int32 - target-prefix: mingw-w64-x86_64 + target-prefix: mingw-w64-ucrt-x86_64 fc-pkg: fc build-type: None exclude: From e12aaed13d39a77bb089d5c39478ed203160f196 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 18 Oct 2023 16:28:54 +0200 Subject: [PATCH 082/125] Fix unwanted fallthrough from Intel Family 6 to 15 in case of identification failure --- driver/others/dynamic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/driver/others/dynamic.c b/driver/others/dynamic.c index 8e0f53f74..69a473060 100644 --- a/driver/others/dynamic.c +++ b/driver/others/dynamic.c @@ -805,7 +805,8 @@ static gotoblas_t *get_coretype(void){ } return NULL; } - case 0xf: + break; + case 0xf: if (model <= 0x2) return &gotoblas_NORTHWOOD; return &gotoblas_PRESCOTT; } From 6b8379d6d998f94ecb9a6adccc6cbd3f1d23f1c7 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Thu, 19 Oct 2023 11:38:26 +0200 Subject: [PATCH 083/125] Run nightly Homebrew cron job only on the main repo, not on forks I noticed this because GitHub emailed me that it would disable the nightly job because it hadn't changed for 3 months. It currently takes 30-50 minutes daily, and by default runs on all forks of the main repository that have the relevant workflow yaml file. That serves little purpose and wastes quite a bit of energy - so disable the runs outside of the main repo. This will not disable the runs on forks already made in the past that contain this workflow file, but it does save 3 months worth of runs on every new fork that is created. [skip ci] --- .github/workflows/nightly-Homebrew-build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/nightly-Homebrew-build.yml b/.github/workflows/nightly-Homebrew-build.yml index 96063565d..eb315f1d4 100644 --- a/.github/workflows/nightly-Homebrew-build.yml +++ b/.github/workflows/nightly-Homebrew-build.yml @@ -23,6 +23,7 @@ permissions: jobs: build-OpenBLAS-with-Homebrew: + if: "github.repository == 'OpenMathLib/OpenBLAS'" runs-on: macos-latest env: DEVELOPER_DIR: /Applications/Xcode_11.4.1.app/Contents/Developer From 980f702f72c20d01f4110e40fc44e35352812b8b Mon Sep 17 00:00:00 2001 From: Rajalakshmi Srinivasaraghavan Date: Thu, 19 Oct 2023 18:48:19 -0500 Subject: [PATCH 084/125] POWER: AIX: Make use of power10 optimization POWER10 optimizations are disabled when using default AIX assembler. As we have fixed many issues recently, enabling optimization path for default assembler. --- Makefile.system | 8 ++++---- kernel/power/KERNEL.POWER10 | 4 ---- param.h | 5 ----- 3 files changed, 4 insertions(+), 13 deletions(-) diff --git a/Makefile.system b/Makefile.system index 868cca4f9..30b0ddec2 100644 --- a/Makefile.system +++ b/Makefile.system @@ -277,10 +277,6 @@ endif ifndef GOTOBLAS_MAKEFILE export GOTOBLAS_MAKEFILE = 1 -# Determine if the assembler is GNU Assembler -HAVE_GAS := $(shell $(AS) -v < /dev/null 2>&1 | grep GNU 2>&1 >/dev/null ; echo $$?) -GETARCH_FLAGS += -DHAVE_GAS=$(HAVE_GAS) - # Generating Makefile.conf and config.h DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" HOST_CFLAGS="$(GETARCH_FLAGS)" CFLAGS="$(CFLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) DYNAMIC_ARCH=$(DYNAMIC_ARCH) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) TARGET=$(TARGET) all) @@ -760,7 +756,11 @@ DYNAMIC_CORE += POWER9 else $(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.) endif +ifeq ($(OSNAME), AIX) +LDVERSIONGTEQ35 := 1 +else LDVERSIONGTEQ35 := $(shell expr `$(CC) -Wl,--version 2> /dev/null | head -1 | cut -f2 -d "." | cut -f1 -d "-"` \>= 35) +endif ifeq ($(GCCVERSIONGTEQ11)$(LDVERSIONGTEQ35), 11) DYNAMIC_CORE += POWER10 CCOMMON_OPT += -DHAVE_P10_SUPPORT diff --git a/kernel/power/KERNEL.POWER10 b/kernel/power/KERNEL.POWER10 index 58f865322..9047c714c 100644 --- a/kernel/power/KERNEL.POWER10 +++ b/kernel/power/KERNEL.POWER10 @@ -1,6 +1,3 @@ -ifeq ($(HAVE_GAS), 1) -include $(KERNELDIR)/KERNEL.POWER8 -else #SGEMM_BETA = ../generic/gemm_beta.c #DGEMM_BETA = ../generic/gemm_beta.c #CGEMM_BETA = ../generic/zgemm_beta.c @@ -265,4 +262,3 @@ QCABS_KERNEL = ../generic/cabs.c #Dump kernel CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c -endif diff --git a/param.h b/param.h index 03bf3624f..ee4640f57 100644 --- a/param.h +++ b/param.h @@ -2600,13 +2600,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_N 8 -#if defined(HAVE_GAS) && (HAVE_GAS == 1) -#define DGEMM_DEFAULT_UNROLL_M 16 -#define DGEMM_DEFAULT_UNROLL_N 4 -#else #define DGEMM_DEFAULT_UNROLL_M 8 #define DGEMM_DEFAULT_UNROLL_N 8 -#endif #define CGEMM_DEFAULT_UNROLL_M 8 #define CGEMM_DEFAULT_UNROLL_N 4 #define ZGEMM_DEFAULT_UNROLL_M 8 From a7f73c764cee8fada4f7f359ae4a8be6b9810ada Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 27 Oct 2023 16:48:47 +0200 Subject: [PATCH 085/125] Clarify "make" options and the need to repeat them in the install step --- README.md | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 081d45870..f57cec831 100644 --- a/README.md +++ b/README.md @@ -54,10 +54,15 @@ Building OpenBLAS requires the following to be installed: Simply invoking `make` (or `gmake` on BSD) will detect the CPU automatically. To set a specific target CPU, use `make TARGET=xxx`, e.g. `make TARGET=NEHALEM`. -The full target list is in the file `TargetList.txt`. For building with `cmake`, the -usual conventions apply, i.e. create a build directory either underneath the toplevel -OpenBLAS source directory or separate from it, and invoke `cmake` there with the path -to the source tree and any build options you plan to set. +The full target list is in the file `TargetList.txt`, other build optionss are documented in Makefile.rule and +can either be set there (typically by removing the comment character from the respective line), or used on the +`make` command line. +Note that when you run `make install` after building, you need to repeat all command line options you provided to `make` +in the build step, as some settings like the supported maximum number of threads are automatically derived from the +build host by default, which might not be what you want. +For building with `cmake`, the usual conventions apply, i.e. create a build directory either underneath the toplevel +OpenBLAS source directory or separate from it, and invoke `cmake` there with the path to the source tree and any +build options you plan to set. ### Cross compile From f5e1f20f4db408d826cb89638175f1987304cf5b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 27 Oct 2023 17:10:37 +0200 Subject: [PATCH 086/125] Update target list --- README.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index f57cec831..3c4e38f18 100644 --- a/README.md +++ b/README.md @@ -122,7 +122,7 @@ Use `PREFIX=` when invoking `make`, for example ```sh make install PREFIX=your_installation_directory ``` - +(along with all options you added on the `make` command line in the preceding build step) The default installation directory is `/opt/OpenBLAS`. ## Supported CPUs and Operating Systems @@ -142,7 +142,7 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th - **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thanks to Werner Saar) - **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations. - **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations. -- **AMD ZEN**: Uses Haswell codes with some optimizations. +- **AMD ZEN**: Uses Haswell codes with some optimizations for Zen 2/3 (use SkylakeX for Zen4) #### MIPS32 @@ -174,13 +174,16 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th - **TSV110**: Optimized some Level-3 helper functions - **EMAG 8180**: preliminary support based on A57 - **Neoverse N1**: (AWS Graviton2) preliminary support -- **Apple Vortex**: preliminary support based on ARMV8 +- **Neoverse V1**: (AWS Graviton3) optimized Level-3 BLAS +- **Apple Vortex**: preliminary support based on ThunderX2/3 +- **A64FX**: preliminary support, optimized Level-3 BLAS +- **ARMV8SVE**: any ARMV8 cpu with SVE extensions #### PPC/PPC64 - **POWER8**: Optimized BLAS, only for PPC64LE (Little Endian), only with `USE_OPENMP=1` - **POWER9**: Optimized Level-3 BLAS (real) and some Level-1,2. PPC64LE with OpenMP only. -- **POWER10**: +- **POWER10**: Optimized Level-3 BLAS including SBGEMM and some Level-1,2. #### IBM zEnterprise System From 1cec1c0fc7509a949b65ce5bb50696c18838046e Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 28 Oct 2023 14:43:19 +0200 Subject: [PATCH 087/125] Add FreeBSD clang/gfortran build with OpenMP --- .cirrus.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.cirrus.yml b/.cirrus.yml index 02cd40997..c405b958d 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -148,6 +148,15 @@ FreeBSD_task: - ls -l /usr/local/lib - gmake CC=gcc INTERFACE64=1 +FreeBSD_task: + name: FreeBSD-clang-openmp + freebsd_instance: + image_family: freebsd-13-2 + install_script: + - pkg update -f && pkg upgrade -y && pkg install -y gmake gcc + compile_script: + - gmake CC=clang FC=gfortran USE_OPENMP=1 CPP_THREAD_SAFETY_TEST=1 + #task: # name: Windows/LLVM16 --- too slow --- # windows_container: From 289a5f6d9b8570de6fa5c2bf2789e04abce494ea Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 28 Oct 2023 18:44:58 +0200 Subject: [PATCH 088/125] work around libgfortran install issue on FreeBSD --- .cirrus.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.cirrus.yml b/.cirrus.yml index c405b958d..5a1f2cfda 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -154,6 +154,7 @@ FreeBSD_task: image_family: freebsd-13-2 install_script: - pkg update -f && pkg upgrade -y && pkg install -y gmake gcc + - ln -s /usr/local/gcc12/lib/libgfortran.so.5.0.0 /usr/lib/libgfortran.so compile_script: - gmake CC=clang FC=gfortran USE_OPENMP=1 CPP_THREAD_SAFETY_TEST=1 From dc1c880782e33307aaa2b04467b110003f3305e1 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 28 Oct 2023 23:14:36 +0200 Subject: [PATCH 089/125] fix libgfortran path on bsd --- .cirrus.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cirrus.yml b/.cirrus.yml index 5a1f2cfda..6c2baf8a0 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -154,7 +154,7 @@ FreeBSD_task: image_family: freebsd-13-2 install_script: - pkg update -f && pkg upgrade -y && pkg install -y gmake gcc - - ln -s /usr/local/gcc12/lib/libgfortran.so.5.0.0 /usr/lib/libgfortran.so + - ln -s /usr/local/lib/gcc12/libgfortran.so.5.0.0 /usr/lib/libgfortran.so compile_script: - gmake CC=clang FC=gfortran USE_OPENMP=1 CPP_THREAD_SAFETY_TEST=1 From d003ad630b1792f169373b8ab35c5ea7a6dfdccd Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 31 Oct 2023 10:26:38 +0100 Subject: [PATCH 090/125] Increase the default GEMM buffer size on modern ARM server cpus --- common_arm64.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/common_arm64.h b/common_arm64.h index 436ccb8f5..1e593c66f 100644 --- a/common_arm64.h +++ b/common_arm64.h @@ -162,7 +162,11 @@ REALNAME: #define HUGE_PAGESIZE ( 4 << 20) #ifndef BUFFERSIZE +if defined(NEOVERSEN1) || defined(NEOVERSEN2) || defined(NEOVERSEV1) || defined(A64FX) || defined(ARMV8SVE) +#define BUFFER_SIZE (32 << 22) +else #define BUFFER_SIZE (32 << 20) +#endif #else #define BUFFER_SIZE (32 << BUFFERSIZE) #endif From 728788f6676bb5e999cdf4fbcda9e2c7b8b9cd53 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 31 Oct 2023 11:08:22 +0100 Subject: [PATCH 091/125] typo fix --- common_arm64.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common_arm64.h b/common_arm64.h index 1e593c66f..6ae6a35a3 100644 --- a/common_arm64.h +++ b/common_arm64.h @@ -162,9 +162,9 @@ REALNAME: #define HUGE_PAGESIZE ( 4 << 20) #ifndef BUFFERSIZE -if defined(NEOVERSEN1) || defined(NEOVERSEN2) || defined(NEOVERSEV1) || defined(A64FX) || defined(ARMV8SVE) +#if defined(NEOVERSEN1) || defined(NEOVERSEN2) || defined(NEOVERSEV1) || defined(A64FX) || defined(ARMV8SVE) #define BUFFER_SIZE (32 << 22) -else +#else #define BUFFER_SIZE (32 << 20) #endif #else From c8882bd9d890c332adaf992a0b9da6be8384bb97 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Wed, 1 Nov 2023 14:53:55 -0500 Subject: [PATCH 092/125] Remove POWER7 from cpu list. --- driver/others/dynamic_power.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index db04e635f..b4a1cc6be 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -39,7 +39,6 @@ char *gotoblas_corename(void) { #define CPU_UNKNOWN 0 #define CPU_POWER5 5 #define CPU_POWER6 6 -#define CPU_POWER7 7 #define CPU_POWER8 8 #define CPU_POWER9 9 #define CPU_POWER10 10 @@ -53,9 +52,6 @@ static int cpuid(void) #ifdef POWER_6 if (arch == POWER_6) return CPU_POWER6; #endif -#ifdef POWER_7 - else if (arch == POWER_7) return CPU_POWER7; -#endif #ifdef POWER_8 else if (arch == POWER_8) return CPU_POWER8; #endif From 7dcb2d67f23caa8b70df4ea37c05a12ff8c15898 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Wed, 1 Nov 2023 15:23:28 -0500 Subject: [PATCH 093/125] Have POWER7 return arch=POWER6. --- driver/others/dynamic_power.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index b4a1cc6be..10a5d64b3 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -52,6 +52,9 @@ static int cpuid(void) #ifdef POWER_6 if (arch == POWER_6) return CPU_POWER6; #endif +#ifdef POWER_7 + else if (arch == POWER_7) return CPU_POWER6; +#endif #ifdef POWER_8 else if (arch == POWER_8) return CPU_POWER8; #endif From 3bfa4d4dccf8616ab330387a7be1ebd709a3214c Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 3 Nov 2023 14:55:31 +0100 Subject: [PATCH 094/125] Fix outdated SVE kernel definitions for Cortex cpus by aliasing to ARMV8SVE --- kernel/arm64/KERNEL.CORTEXA510 | 217 +-------------------------------- kernel/arm64/KERNEL.CORTEXA710 | 217 +-------------------------------- kernel/arm64/KERNEL.CORTEXX2 | 217 +-------------------------------- 3 files changed, 3 insertions(+), 648 deletions(-) diff --git a/kernel/arm64/KERNEL.CORTEXA510 b/kernel/arm64/KERNEL.CORTEXA510 index bd25f7cd8..bc5999097 100644 --- a/kernel/arm64/KERNEL.CORTEXA510 +++ b/kernel/arm64/KERNEL.CORTEXA510 @@ -1,216 +1 @@ -SAMINKERNEL = ../arm/amin.c -DAMINKERNEL = ../arm/amin.c -CAMINKERNEL = ../arm/zamin.c -ZAMINKERNEL = ../arm/zamin.c - -SMAXKERNEL = ../arm/max.c -DMAXKERNEL = ../arm/max.c - -SMINKERNEL = ../arm/min.c -DMINKERNEL = ../arm/min.c - -ISAMINKERNEL = ../arm/iamin.c -IDAMINKERNEL = ../arm/iamin.c -ICAMINKERNEL = ../arm/izamin.c -IZAMINKERNEL = ../arm/izamin.c - -ISMAXKERNEL = ../arm/imax.c -IDMAXKERNEL = ../arm/imax.c - -ISMINKERNEL = ../arm/imin.c -IDMINKERNEL = ../arm/imin.c - -STRSMKERNEL_LN = trsm_kernel_LN_sve.c -STRSMKERNEL_LT = trsm_kernel_LT_sve.c -STRSMKERNEL_RN = trsm_kernel_RN_sve.c -STRSMKERNEL_RT = trsm_kernel_RT_sve.c - -DTRSMKERNEL_LN = trsm_kernel_LN_sve.c -DTRSMKERNEL_LT = trsm_kernel_LT_sve.c -DTRSMKERNEL_RN = trsm_kernel_RN_sve.c -DTRSMKERNEL_RT = trsm_kernel_RT_sve.c - -TRSMCOPYLN_M = trsm_lncopy_sve.c -TRSMCOPYLT_M = trsm_ltcopy_sve.c -TRSMCOPYUN_M = trsm_uncopy_sve.c -TRSMCOPYUT_M = trsm_utcopy_sve.c - -CTRSMKERNEL_LN = trsm_kernel_LN_sve.c -CTRSMKERNEL_LT = trsm_kernel_LT_sve.c -CTRSMKERNEL_RN = trsm_kernel_RN_sve.c -CTRSMKERNEL_RT = trsm_kernel_RT_sve.c - -ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c -ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c -ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c -ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c - -ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c -ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c -ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c -ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c - - -SAMAXKERNEL = amax.S -DAMAXKERNEL = amax.S -CAMAXKERNEL = zamax.S -ZAMAXKERNEL = zamax.S - -SAXPYKERNEL = axpy.S -DAXPYKERNEL = axpy.S -CAXPYKERNEL = zaxpy.S -ZAXPYKERNEL = zaxpy.S - -SROTKERNEL = rot.S -DROTKERNEL = rot.S -CROTKERNEL = zrot.S -ZROTKERNEL = zrot.S - -SSCALKERNEL = scal.S -DSCALKERNEL = scal.S -CSCALKERNEL = zscal.S -ZSCALKERNEL = zscal.S - -SGEMVNKERNEL = gemv_n.S -DGEMVNKERNEL = gemv_n.S -CGEMVNKERNEL = zgemv_n.S -ZGEMVNKERNEL = zgemv_n.S - -SGEMVTKERNEL = gemv_t.S -DGEMVTKERNEL = gemv_t.S -CGEMVTKERNEL = zgemv_t.S -ZGEMVTKERNEL = zgemv_t.S - - -SASUMKERNEL = asum.S -DASUMKERNEL = asum.S -CASUMKERNEL = casum.S -ZASUMKERNEL = zasum.S - -SCOPYKERNEL = copy.S -DCOPYKERNEL = copy.S -CCOPYKERNEL = copy.S -ZCOPYKERNEL = copy.S - -SSWAPKERNEL = swap.S -DSWAPKERNEL = swap.S -CSWAPKERNEL = swap.S -ZSWAPKERNEL = swap.S - -ISAMAXKERNEL = iamax.S -IDAMAXKERNEL = iamax.S -ICAMAXKERNEL = izamax.S -IZAMAXKERNEL = izamax.S - -SNRM2KERNEL = nrm2.S -DNRM2KERNEL = nrm2.S -CNRM2KERNEL = znrm2.S -ZNRM2KERNEL = znrm2.S - -DDOTKERNEL = dot.S -ifneq ($(C_COMPILER), PGI) -SDOTKERNEL = ../generic/dot.c -else -SDOTKERNEL = dot.S -endif -ifneq ($(C_COMPILER), PGI) -CDOTKERNEL = zdot.S -ZDOTKERNEL = zdot.S -else -CDOTKERNEL = ../arm/zdot.c -ZDOTKERNEL = ../arm/zdot.c -endif -DSDOTKERNEL = dot.S - -DGEMM_BETA = dgemm_beta.S -SGEMM_BETA = sgemm_beta.S - -SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S -STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S - -SGEMMINCOPY = sgemm_ncopy_sve_v1.c -SGEMMITCOPY = sgemm_tcopy_sve_v1.c -SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S -SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S - -SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) -SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) -SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) -SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) - -STRMMUNCOPY_M = trmm_uncopy_sve_v1.c -STRMMLNCOPY_M = trmm_lncopy_sve_v1.c -STRMMUTCOPY_M = trmm_utcopy_sve_v1.c -STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c - -SSYMMUCOPY_M = symm_ucopy_sve.c -SSYMMLCOPY_M = symm_lcopy_sve.c - -DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S -DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S - -DGEMMINCOPY = dgemm_ncopy_sve_v1.c -DGEMMITCOPY = dgemm_tcopy_sve_v1.c -DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S -DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S - -DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) -DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) -DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) -DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) - -DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c -DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c -DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c -DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c - -DSYMMUCOPY_M = symm_ucopy_sve.c -DSYMMLCOPY_M = symm_lcopy_sve.c - -CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S -CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S - -CGEMMINCOPY = cgemm_ncopy_sve_v1.c -CGEMMITCOPY = cgemm_tcopy_sve_v1.c -CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c -CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c - -CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) -CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) -CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) -CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) - -CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c -CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c -CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c -CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c - -CHEMMLTCOPY_M = zhemm_ltcopy_sve.c -CHEMMUTCOPY_M = zhemm_utcopy_sve.c - -CSYMMUCOPY_M = zsymm_ucopy_sve.c -CSYMMLCOPY_M = zsymm_lcopy_sve.c - -ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S -ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S - -ZGEMMINCOPY = zgemm_ncopy_sve_v1.c -ZGEMMITCOPY = zgemm_tcopy_sve_v1.c -ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c -ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c - -ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) -ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) -ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) -ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) - -ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c -ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c -ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c -ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c - -ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c -ZHEMMUTCOPY_M = zhemm_utcopy_sve.c - -ZSYMMUCOPY_M = zsymm_ucopy_sve.c -ZSYMMLCOPY_M = zsymm_lcopy_sve.c +include $(KERNELDIR)/KERNEL.ARMV8SVE diff --git a/kernel/arm64/KERNEL.CORTEXA710 b/kernel/arm64/KERNEL.CORTEXA710 index bd25f7cd8..bc5999097 100644 --- a/kernel/arm64/KERNEL.CORTEXA710 +++ b/kernel/arm64/KERNEL.CORTEXA710 @@ -1,216 +1 @@ -SAMINKERNEL = ../arm/amin.c -DAMINKERNEL = ../arm/amin.c -CAMINKERNEL = ../arm/zamin.c -ZAMINKERNEL = ../arm/zamin.c - -SMAXKERNEL = ../arm/max.c -DMAXKERNEL = ../arm/max.c - -SMINKERNEL = ../arm/min.c -DMINKERNEL = ../arm/min.c - -ISAMINKERNEL = ../arm/iamin.c -IDAMINKERNEL = ../arm/iamin.c -ICAMINKERNEL = ../arm/izamin.c -IZAMINKERNEL = ../arm/izamin.c - -ISMAXKERNEL = ../arm/imax.c -IDMAXKERNEL = ../arm/imax.c - -ISMINKERNEL = ../arm/imin.c -IDMINKERNEL = ../arm/imin.c - -STRSMKERNEL_LN = trsm_kernel_LN_sve.c -STRSMKERNEL_LT = trsm_kernel_LT_sve.c -STRSMKERNEL_RN = trsm_kernel_RN_sve.c -STRSMKERNEL_RT = trsm_kernel_RT_sve.c - -DTRSMKERNEL_LN = trsm_kernel_LN_sve.c -DTRSMKERNEL_LT = trsm_kernel_LT_sve.c -DTRSMKERNEL_RN = trsm_kernel_RN_sve.c -DTRSMKERNEL_RT = trsm_kernel_RT_sve.c - -TRSMCOPYLN_M = trsm_lncopy_sve.c -TRSMCOPYLT_M = trsm_ltcopy_sve.c -TRSMCOPYUN_M = trsm_uncopy_sve.c -TRSMCOPYUT_M = trsm_utcopy_sve.c - -CTRSMKERNEL_LN = trsm_kernel_LN_sve.c -CTRSMKERNEL_LT = trsm_kernel_LT_sve.c -CTRSMKERNEL_RN = trsm_kernel_RN_sve.c -CTRSMKERNEL_RT = trsm_kernel_RT_sve.c - -ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c -ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c -ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c -ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c - -ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c -ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c -ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c -ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c - - -SAMAXKERNEL = amax.S -DAMAXKERNEL = amax.S -CAMAXKERNEL = zamax.S -ZAMAXKERNEL = zamax.S - -SAXPYKERNEL = axpy.S -DAXPYKERNEL = axpy.S -CAXPYKERNEL = zaxpy.S -ZAXPYKERNEL = zaxpy.S - -SROTKERNEL = rot.S -DROTKERNEL = rot.S -CROTKERNEL = zrot.S -ZROTKERNEL = zrot.S - -SSCALKERNEL = scal.S -DSCALKERNEL = scal.S -CSCALKERNEL = zscal.S -ZSCALKERNEL = zscal.S - -SGEMVNKERNEL = gemv_n.S -DGEMVNKERNEL = gemv_n.S -CGEMVNKERNEL = zgemv_n.S -ZGEMVNKERNEL = zgemv_n.S - -SGEMVTKERNEL = gemv_t.S -DGEMVTKERNEL = gemv_t.S -CGEMVTKERNEL = zgemv_t.S -ZGEMVTKERNEL = zgemv_t.S - - -SASUMKERNEL = asum.S -DASUMKERNEL = asum.S -CASUMKERNEL = casum.S -ZASUMKERNEL = zasum.S - -SCOPYKERNEL = copy.S -DCOPYKERNEL = copy.S -CCOPYKERNEL = copy.S -ZCOPYKERNEL = copy.S - -SSWAPKERNEL = swap.S -DSWAPKERNEL = swap.S -CSWAPKERNEL = swap.S -ZSWAPKERNEL = swap.S - -ISAMAXKERNEL = iamax.S -IDAMAXKERNEL = iamax.S -ICAMAXKERNEL = izamax.S -IZAMAXKERNEL = izamax.S - -SNRM2KERNEL = nrm2.S -DNRM2KERNEL = nrm2.S -CNRM2KERNEL = znrm2.S -ZNRM2KERNEL = znrm2.S - -DDOTKERNEL = dot.S -ifneq ($(C_COMPILER), PGI) -SDOTKERNEL = ../generic/dot.c -else -SDOTKERNEL = dot.S -endif -ifneq ($(C_COMPILER), PGI) -CDOTKERNEL = zdot.S -ZDOTKERNEL = zdot.S -else -CDOTKERNEL = ../arm/zdot.c -ZDOTKERNEL = ../arm/zdot.c -endif -DSDOTKERNEL = dot.S - -DGEMM_BETA = dgemm_beta.S -SGEMM_BETA = sgemm_beta.S - -SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S -STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S - -SGEMMINCOPY = sgemm_ncopy_sve_v1.c -SGEMMITCOPY = sgemm_tcopy_sve_v1.c -SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S -SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S - -SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) -SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) -SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) -SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) - -STRMMUNCOPY_M = trmm_uncopy_sve_v1.c -STRMMLNCOPY_M = trmm_lncopy_sve_v1.c -STRMMUTCOPY_M = trmm_utcopy_sve_v1.c -STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c - -SSYMMUCOPY_M = symm_ucopy_sve.c -SSYMMLCOPY_M = symm_lcopy_sve.c - -DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S -DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S - -DGEMMINCOPY = dgemm_ncopy_sve_v1.c -DGEMMITCOPY = dgemm_tcopy_sve_v1.c -DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S -DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S - -DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) -DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) -DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) -DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) - -DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c -DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c -DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c -DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c - -DSYMMUCOPY_M = symm_ucopy_sve.c -DSYMMLCOPY_M = symm_lcopy_sve.c - -CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S -CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S - -CGEMMINCOPY = cgemm_ncopy_sve_v1.c -CGEMMITCOPY = cgemm_tcopy_sve_v1.c -CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c -CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c - -CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) -CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) -CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) -CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) - -CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c -CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c -CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c -CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c - -CHEMMLTCOPY_M = zhemm_ltcopy_sve.c -CHEMMUTCOPY_M = zhemm_utcopy_sve.c - -CSYMMUCOPY_M = zsymm_ucopy_sve.c -CSYMMLCOPY_M = zsymm_lcopy_sve.c - -ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S -ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S - -ZGEMMINCOPY = zgemm_ncopy_sve_v1.c -ZGEMMITCOPY = zgemm_tcopy_sve_v1.c -ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c -ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c - -ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) -ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) -ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) -ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) - -ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c -ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c -ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c -ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c - -ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c -ZHEMMUTCOPY_M = zhemm_utcopy_sve.c - -ZSYMMUCOPY_M = zsymm_ucopy_sve.c -ZSYMMLCOPY_M = zsymm_lcopy_sve.c +include $(KERNELDIR)/KERNEL.ARMV8SVE diff --git a/kernel/arm64/KERNEL.CORTEXX2 b/kernel/arm64/KERNEL.CORTEXX2 index bd25f7cd8..bc5999097 100644 --- a/kernel/arm64/KERNEL.CORTEXX2 +++ b/kernel/arm64/KERNEL.CORTEXX2 @@ -1,216 +1 @@ -SAMINKERNEL = ../arm/amin.c -DAMINKERNEL = ../arm/amin.c -CAMINKERNEL = ../arm/zamin.c -ZAMINKERNEL = ../arm/zamin.c - -SMAXKERNEL = ../arm/max.c -DMAXKERNEL = ../arm/max.c - -SMINKERNEL = ../arm/min.c -DMINKERNEL = ../arm/min.c - -ISAMINKERNEL = ../arm/iamin.c -IDAMINKERNEL = ../arm/iamin.c -ICAMINKERNEL = ../arm/izamin.c -IZAMINKERNEL = ../arm/izamin.c - -ISMAXKERNEL = ../arm/imax.c -IDMAXKERNEL = ../arm/imax.c - -ISMINKERNEL = ../arm/imin.c -IDMINKERNEL = ../arm/imin.c - -STRSMKERNEL_LN = trsm_kernel_LN_sve.c -STRSMKERNEL_LT = trsm_kernel_LT_sve.c -STRSMKERNEL_RN = trsm_kernel_RN_sve.c -STRSMKERNEL_RT = trsm_kernel_RT_sve.c - -DTRSMKERNEL_LN = trsm_kernel_LN_sve.c -DTRSMKERNEL_LT = trsm_kernel_LT_sve.c -DTRSMKERNEL_RN = trsm_kernel_RN_sve.c -DTRSMKERNEL_RT = trsm_kernel_RT_sve.c - -TRSMCOPYLN_M = trsm_lncopy_sve.c -TRSMCOPYLT_M = trsm_ltcopy_sve.c -TRSMCOPYUN_M = trsm_uncopy_sve.c -TRSMCOPYUT_M = trsm_utcopy_sve.c - -CTRSMKERNEL_LN = trsm_kernel_LN_sve.c -CTRSMKERNEL_LT = trsm_kernel_LT_sve.c -CTRSMKERNEL_RN = trsm_kernel_RN_sve.c -CTRSMKERNEL_RT = trsm_kernel_RT_sve.c - -ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c -ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c -ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c -ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c - -ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c -ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c -ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c -ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c - - -SAMAXKERNEL = amax.S -DAMAXKERNEL = amax.S -CAMAXKERNEL = zamax.S -ZAMAXKERNEL = zamax.S - -SAXPYKERNEL = axpy.S -DAXPYKERNEL = axpy.S -CAXPYKERNEL = zaxpy.S -ZAXPYKERNEL = zaxpy.S - -SROTKERNEL = rot.S -DROTKERNEL = rot.S -CROTKERNEL = zrot.S -ZROTKERNEL = zrot.S - -SSCALKERNEL = scal.S -DSCALKERNEL = scal.S -CSCALKERNEL = zscal.S -ZSCALKERNEL = zscal.S - -SGEMVNKERNEL = gemv_n.S -DGEMVNKERNEL = gemv_n.S -CGEMVNKERNEL = zgemv_n.S -ZGEMVNKERNEL = zgemv_n.S - -SGEMVTKERNEL = gemv_t.S -DGEMVTKERNEL = gemv_t.S -CGEMVTKERNEL = zgemv_t.S -ZGEMVTKERNEL = zgemv_t.S - - -SASUMKERNEL = asum.S -DASUMKERNEL = asum.S -CASUMKERNEL = casum.S -ZASUMKERNEL = zasum.S - -SCOPYKERNEL = copy.S -DCOPYKERNEL = copy.S -CCOPYKERNEL = copy.S -ZCOPYKERNEL = copy.S - -SSWAPKERNEL = swap.S -DSWAPKERNEL = swap.S -CSWAPKERNEL = swap.S -ZSWAPKERNEL = swap.S - -ISAMAXKERNEL = iamax.S -IDAMAXKERNEL = iamax.S -ICAMAXKERNEL = izamax.S -IZAMAXKERNEL = izamax.S - -SNRM2KERNEL = nrm2.S -DNRM2KERNEL = nrm2.S -CNRM2KERNEL = znrm2.S -ZNRM2KERNEL = znrm2.S - -DDOTKERNEL = dot.S -ifneq ($(C_COMPILER), PGI) -SDOTKERNEL = ../generic/dot.c -else -SDOTKERNEL = dot.S -endif -ifneq ($(C_COMPILER), PGI) -CDOTKERNEL = zdot.S -ZDOTKERNEL = zdot.S -else -CDOTKERNEL = ../arm/zdot.c -ZDOTKERNEL = ../arm/zdot.c -endif -DSDOTKERNEL = dot.S - -DGEMM_BETA = dgemm_beta.S -SGEMM_BETA = sgemm_beta.S - -SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S -STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S - -SGEMMINCOPY = sgemm_ncopy_sve_v1.c -SGEMMITCOPY = sgemm_tcopy_sve_v1.c -SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S -SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S - -SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) -SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) -SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) -SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) - -STRMMUNCOPY_M = trmm_uncopy_sve_v1.c -STRMMLNCOPY_M = trmm_lncopy_sve_v1.c -STRMMUTCOPY_M = trmm_utcopy_sve_v1.c -STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c - -SSYMMUCOPY_M = symm_ucopy_sve.c -SSYMMLCOPY_M = symm_lcopy_sve.c - -DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S -DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S - -DGEMMINCOPY = dgemm_ncopy_sve_v1.c -DGEMMITCOPY = dgemm_tcopy_sve_v1.c -DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S -DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S - -DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) -DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) -DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) -DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) - -DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c -DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c -DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c -DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c - -DSYMMUCOPY_M = symm_ucopy_sve.c -DSYMMLCOPY_M = symm_lcopy_sve.c - -CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S -CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S - -CGEMMINCOPY = cgemm_ncopy_sve_v1.c -CGEMMITCOPY = cgemm_tcopy_sve_v1.c -CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c -CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c - -CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) -CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) -CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) -CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) - -CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c -CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c -CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c -CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c - -CHEMMLTCOPY_M = zhemm_ltcopy_sve.c -CHEMMUTCOPY_M = zhemm_utcopy_sve.c - -CSYMMUCOPY_M = zsymm_ucopy_sve.c -CSYMMLCOPY_M = zsymm_lcopy_sve.c - -ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S -ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S - -ZGEMMINCOPY = zgemm_ncopy_sve_v1.c -ZGEMMITCOPY = zgemm_tcopy_sve_v1.c -ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c -ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c - -ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) -ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) -ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) -ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) - -ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c -ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c -ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c -ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c - -ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c -ZHEMMUTCOPY_M = zhemm_utcopy_sve.c - -ZSYMMUCOPY_M = zsymm_ucopy_sve.c -ZSYMMLCOPY_M = zsymm_lcopy_sve.c +include $(KERNELDIR)/KERNEL.ARMV8SVE From 9019bc494514a74c2042152cdca0a36adea7b42f Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 4 Nov 2023 22:10:06 +0100 Subject: [PATCH 095/125] Use SkylakeX ?ASUM microkernel for Cooperlake/Sapphirerapids as well --- kernel/x86_64/casum.c | 2 +- kernel/x86_64/dasum.c | 2 +- kernel/x86_64/sasum.c | 2 +- kernel/x86_64/zasum.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/x86_64/casum.c b/kernel/x86_64/casum.c index 60feec0ce..e4d054311 100644 --- a/kernel/x86_64/casum.c +++ b/kernel/x86_64/casum.c @@ -4,7 +4,7 @@ #define ABS_K(a) ((a) > 0 ? (a) : (-(a))) #endif -#if defined(SKYLAKEX) +#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) #include "casum_microk_skylakex-2.c" #endif diff --git a/kernel/x86_64/dasum.c b/kernel/x86_64/dasum.c index a9c40f38f..0147c6978 100644 --- a/kernel/x86_64/dasum.c +++ b/kernel/x86_64/dasum.c @@ -4,7 +4,7 @@ #define ABS_K(a) ((a) > 0 ? (a) : (-(a))) #endif -#if defined(SKYLAKEX) +#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) #include "dasum_microk_skylakex-2.c" #elif defined(HASWELL) || defined(ZEN) #include "dasum_microk_haswell-2.c" diff --git a/kernel/x86_64/sasum.c b/kernel/x86_64/sasum.c index 37a92468f..3f22cb97a 100644 --- a/kernel/x86_64/sasum.c +++ b/kernel/x86_64/sasum.c @@ -9,7 +9,7 @@ #endif -#if defined(SKYLAKEX) +#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) #include "sasum_microk_skylakex-2.c" #elif defined(HASWELL) || defined(ZEN) #include "sasum_microk_haswell-2.c" diff --git a/kernel/x86_64/zasum.c b/kernel/x86_64/zasum.c index 80e95a2c8..3f17ab1cf 100644 --- a/kernel/x86_64/zasum.c +++ b/kernel/x86_64/zasum.c @@ -4,7 +4,7 @@ #define ABS_K(a) ((a) > 0 ? (a) : (-(a))) #endif -#if defined(SKYLAKEX) +#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) #include "zasum_microk_skylakex-2.c" #endif From 04bc801999e8d6e6ed101a1ab8ec9720f271ad2c Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 4 Nov 2023 23:48:59 +0100 Subject: [PATCH 096/125] (Re)apply fixes for supporting only a subset of precision types from PR 3915 --- kernel/Makefile.L3 | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index 448e096a3..863f376e9 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -182,7 +182,7 @@ ifeq ($(BUILD_BFLOAT16),1) SBBLASOBJS += sbgemm_beta$(TSUFFIX).$(SUFFIX) endif -ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" +ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" "" SBLASOBJS += \ sgemm_beta$(TSUFFIX).$(SUFFIX) \ strmm_kernel_LN$(TSUFFIX).$(SUFFIX) strmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ @@ -191,7 +191,7 @@ SBLASOBJS += \ strsm_kernel_RN$(TSUFFIX).$(SUFFIX) strsm_kernel_RT$(TSUFFIX).$(SUFFIX) endif -ifeq ($(BUILD_DOUBLE),1) +ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" "" DBLASOBJS += \ dgemm_beta$(TSUFFIX).$(SUFFIX) \ dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ @@ -207,7 +207,7 @@ QBLASOBJS += \ qtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) qtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ qtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) qtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) -ifeq ($(BUILD_COMPLEX),1) +ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" CBLASOBJS += \ ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) \ @@ -255,7 +255,7 @@ XBLASOBJS += xgemm3m_kernel$(TSUFFIX).$(SUFFIX) endif -ifeq ($(BUILD_SINGLE),1) +ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" "" SBLASOBJS += \ strmm_iunucopy$(TSUFFIX).$(SUFFIX) strmm_iunncopy$(TSUFFIX).$(SUFFIX) \ strmm_ilnucopy$(TSUFFIX).$(SUFFIX) strmm_ilnncopy$(TSUFFIX).$(SUFFIX) \ @@ -264,10 +264,7 @@ SBLASOBJS += \ strmm_ounucopy$(TSUFFIX).$(SUFFIX) strmm_ounncopy$(TSUFFIX).$(SUFFIX) \ strmm_olnucopy$(TSUFFIX).$(SUFFIX) strmm_olnncopy$(TSUFFIX).$(SUFFIX) \ strmm_outucopy$(TSUFFIX).$(SUFFIX) strmm_outncopy$(TSUFFIX).$(SUFFIX) \ - strmm_oltucopy$(TSUFFIX).$(SUFFIX) strmm_oltncopy$(TSUFFIX).$(SUFFIX) -endif -ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" -SBLASOBJS += \ + strmm_oltucopy$(TSUFFIX).$(SUFFIX) strmm_oltncopy$(TSUFFIX).$(SUFFIX) \ strsm_iunucopy$(TSUFFIX).$(SUFFIX) strsm_iunncopy$(TSUFFIX).$(SUFFIX) \ strsm_ilnucopy$(TSUFFIX).$(SUFFIX) strsm_ilnncopy$(TSUFFIX).$(SUFFIX) \ strsm_iutucopy$(TSUFFIX).$(SUFFIX) strsm_iutncopy$(TSUFFIX).$(SUFFIX) \ @@ -275,10 +272,7 @@ SBLASOBJS += \ strsm_ounucopy$(TSUFFIX).$(SUFFIX) strsm_ounncopy$(TSUFFIX).$(SUFFIX) \ strsm_olnucopy$(TSUFFIX).$(SUFFIX) strsm_olnncopy$(TSUFFIX).$(SUFFIX) \ strsm_outucopy$(TSUFFIX).$(SUFFIX) strsm_outncopy$(TSUFFIX).$(SUFFIX) \ - strsm_oltucopy$(TSUFFIX).$(SUFFIX) strsm_oltncopy$(TSUFFIX).$(SUFFIX) -endif -ifeq ($(BUILD_SINGLE),1) -SBLASOBJS += \ + strsm_oltucopy$(TSUFFIX).$(SUFFIX) strsm_oltncopy$(TSUFFIX).$(SUFFIX) \ ssymm_iutcopy$(TSUFFIX).$(SUFFIX) ssymm_iltcopy$(TSUFFIX).$(SUFFIX) \ ssymm_outcopy$(TSUFFIX).$(SUFFIX) ssymm_oltcopy$(TSUFFIX).$(SUFFIX) endif @@ -400,7 +394,7 @@ XBLASOBJS += \ ifeq ($(USE_GEMM3M), 1) -ifeq ($(BUILD_COMPLEX),1) +ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" CBLASOBJS += \ cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \ cgemm3m_incopyr$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) \ From 3a86fde5a7f762fe94196c6e090377721bc551b5 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 4 Nov 2023 23:52:46 +0100 Subject: [PATCH 097/125] Delete misplaced file from PR 3915 after applying in the correct place --- Makefile.L3 | 5089 --------------------------------------------------- 1 file changed, 5089 deletions(-) delete mode 100644 Makefile.L3 diff --git a/Makefile.L3 b/Makefile.L3 deleted file mode 100644 index 76586d826..000000000 --- a/Makefile.L3 +++ /dev/null @@ -1,5089 +0,0 @@ -USE_GEMM3M = 0 -OS := $(shell uname) - -ifeq ($(ARCH), x86) -USE_GEMM3M = 1 -endif - -ifeq ($(ARCH), x86_64) -USE_GEMM3M = 1 -endif - -ifeq ($(ARCH), x86_64) -USE_DIRECT_SGEMM = 1 -endif - -ifeq ($(ARCH), ia64) -USE_GEMM3M = 1 -endif - -ifeq ($(ARCH), arm) -USE_TRMM = 1 -endif - -ifeq ($(ARCH), arm64) -USE_TRMM = 1 -endif - -ifeq ($(ARCH), riscv64) -USE_TRMM = 1 -endif - -ifneq ($(DYNAMIC_ARCH), 1) -ifeq ($(TARGET), GENERIC) -USE_TRMM = 1 -endif -endif - -ifeq ($(CORE), HASWELL) -USE_TRMM = 1 -endif - -ifeq ($(CORE), SKYLAKEX) -USE_TRMM = 1 -endif - -ifeq ($(CORE), COOPERLAKE) -USE_TRMM = 1 -endif - -ifeq ($(CORE), SAPPHIRERAPIDS) -USE_TRMM = 1 -endif - -ifeq ($(CORE), ZEN) -USE_TRMM = 1 -endif - -ifeq ($(CORE), POWER8) -ifeq ($(BINARY64),1) -USE_TRMM = 1 -endif -endif - -ifeq ($(CORE), POWER9) -USE_TRMM = 1 -endif - -ifeq ($(CORE), POWER10) -USE_TRMM = 1 -endif - -ifeq ($(ARCH), zarch) -USE_TRMM = 1 -endif - -ifeq ($(CORE), Z14) -USE_TRMM = 1 -endif - -ifdef USE_DIRECT_SGEMM -ifndef SGEMMDIRECTKERNEL -SGEMMDIRECTKERNEL = sgemm_direct_skylakex.c -SGEMMDIRECTPERFORMANT = sgemm_direct_performant.c -endif -endif - -ifeq ($(BUILD_BFLOAT16), 1) -ifndef SBGEMMKERNEL -SBGEMM_BETA = ../generic/gemm_beta.c -SBGEMMKERNEL = ../generic/gemmkernel_2x2.c -SBGEMMINCOPY = ../generic/gemm_ncopy_2.c -SBGEMMITCOPY = ../generic/gemm_tcopy_2.c -SBGEMMONCOPY = ../generic/gemm_ncopy_2.c -SBGEMMOTCOPY = ../generic/gemm_tcopy_2.c -SBGEMMINCOPYOBJ = sbgemm_incopy$(TSUFFIX).$(SUFFIX) -SBGEMMITCOPYOBJ = sbgemm_itcopy$(TSUFFIX).$(SUFFIX) -SBGEMMONCOPYOBJ = sbgemm_oncopy$(TSUFFIX).$(SUFFIX) -SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX) -endif - -SBKERNELOBJS += \ - sbgemm_kernel$(TSUFFIX).$(SUFFIX) \ - $(SBGEMMINCOPYOBJ) $(SBGEMMITCOPYOBJ) \ - $(SBGEMMONCOPYOBJ) $(SBGEMMOTCOPYOBJ) -endif - -ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" "" -SKERNELOBJS += \ - sgemm_kernel$(TSUFFIX).$(SUFFIX) \ - sgemm_beta$(TSUFFIX).$(SUFFIX) \ - $(SGEMMINCOPYOBJ) $(SGEMMITCOPYOBJ) \ - $(SGEMMONCOPYOBJ) $(SGEMMOTCOPYOBJ) - -ifdef USE_DIRECT_SGEMM -SKERNELOBJS += \ - sgemm_direct$(TSUFFIX).$(SUFFIX) \ - sgemm_direct_performant$(TSUFFIX).$(SUFFIX) -endif -endif - -ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" "" -DKERNELOBJS += \ - dgemm_beta$(TSUFFIX).$(SUFFIX) \ - dgemm_kernel$(TSUFFIX).$(SUFFIX) \ - $(DGEMMINCOPYOBJ) $(DGEMMITCOPYOBJ) \ - $(DGEMMONCOPYOBJ) $(DGEMMOTCOPYOBJ) -endif - -QKERNELOBJS += \ - qgemm_kernel$(TSUFFIX).$(SUFFIX) \ - $(QGEMMINCOPYOBJ) $(QGEMMITCOPYOBJ) \ - $(QGEMMONCOPYOBJ) $(QGEMMOTCOPYOBJ) - -ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" -CKERNELOBJS += \ - cgemm_kernel_n$(TSUFFIX).$(SUFFIX) cgemm_kernel_r$(TSUFFIX).$(SUFFIX) \ - cgemm_kernel_l$(TSUFFIX).$(SUFFIX) cgemm_kernel_b$(TSUFFIX).$(SUFFIX) \ - $(CGEMMINCOPYOBJ) $(CGEMMITCOPYOBJ) \ - $(CGEMMONCOPYOBJ) $(CGEMMOTCOPYOBJ) -endif - -ifeq ($(BUILD_COMPLEX16),1) -ZKERNELOBJS += \ - zgemm_kernel_n$(TSUFFIX).$(SUFFIX) zgemm_kernel_r$(TSUFFIX).$(SUFFIX) \ - zgemm_kernel_l$(TSUFFIX).$(SUFFIX) zgemm_kernel_b$(TSUFFIX).$(SUFFIX) \ - $(ZGEMMINCOPYOBJ) $(ZGEMMITCOPYOBJ) \ - $(ZGEMMONCOPYOBJ) $(ZGEMMOTCOPYOBJ) -endif - -XKERNELOBJS += \ - xgemm_kernel_n$(TSUFFIX).$(SUFFIX) xgemm_kernel_r$(TSUFFIX).$(SUFFIX) \ - xgemm_kernel_l$(TSUFFIX).$(SUFFIX) xgemm_kernel_b$(TSUFFIX).$(SUFFIX) \ - $(XGEMMINCOPYOBJ) $(XGEMMITCOPYOBJ) \ - $(XGEMMONCOPYOBJ) $(XGEMMOTCOPYOBJ) - -ifeq ($(BUILD_BFLOAT16),1) -SBBLASOBJS += $(SBKERNELOBJS) -endif -SBLASOBJS += $(SKERNELOBJS) -DBLASOBJS += $(DKERNELOBJS) -QBLASOBJS += $(QKERNELOBJS) -CBLASOBJS += $(CKERNELOBJS) -ZBLASOBJS += $(ZKERNELOBJS) -XBLASOBJS += $(XKERNELOBJS) - -ifeq ($(BUILD_BFLOAT16),1) -SBBLASOBJS += sbgemm_beta$(TSUFFIX).$(SUFFIX) -endif - -ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" "" -SBLASOBJS += \ - sgemm_beta$(TSUFFIX).$(SUFFIX) \ - strmm_kernel_LN$(TSUFFIX).$(SUFFIX) strmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ - strmm_kernel_RN$(TSUFFIX).$(SUFFIX) strmm_kernel_RT$(TSUFFIX).$(SUFFIX) \ - strsm_kernel_LN$(TSUFFIX).$(SUFFIX) strsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ - strsm_kernel_RN$(TSUFFIX).$(SUFFIX) strsm_kernel_RT$(TSUFFIX).$(SUFFIX) -endif - -ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" "" -DBLASOBJS += \ - dgemm_beta$(TSUFFIX).$(SUFFIX) \ - dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ - dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) \ - dtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) dtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ - dtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) dtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) -endif - -QBLASOBJS += \ - qgemm_beta$(TSUFFIX).$(SUFFIX) \ - qtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) qtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ - qtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) qtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) \ - qtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) qtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ - qtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) qtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) - -ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" -CBLASOBJS += \ - ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ - ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) \ - ctrmm_kernel_RN$(TSUFFIX).$(SUFFIX) ctrmm_kernel_RT$(TSUFFIX).$(SUFFIX) \ - ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) -endif -ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" -CBLASOBJS += \ - cgemm_beta$(TSUFFIX).$(SUFFIX) \ - ctrsm_kernel_LN$(TSUFFIX).$(SUFFIX) ctrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ - ctrsm_kernel_LR$(TSUFFIX).$(SUFFIX) ctrsm_kernel_LC$(TSUFFIX).$(SUFFIX) \ - ctrsm_kernel_RN$(TSUFFIX).$(SUFFIX) ctrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ - ctrsm_kernel_RR$(TSUFFIX).$(SUFFIX) ctrsm_kernel_RC$(TSUFFIX).$(SUFFIX) -endif - -ifeq ($(BUILD_COMPLEX16),1) -ZBLASOBJS += \ - zgemm_beta$(TSUFFIX).$(SUFFIX) \ - ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ - ztrmm_kernel_LR$(TSUFFIX).$(SUFFIX) ztrmm_kernel_LC$(TSUFFIX).$(SUFFIX) \ - ztrmm_kernel_RN$(TSUFFIX).$(SUFFIX) ztrmm_kernel_RT$(TSUFFIX).$(SUFFIX) \ - ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) \ - ztrsm_kernel_LN$(TSUFFIX).$(SUFFIX) ztrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ - ztrsm_kernel_LR$(TSUFFIX).$(SUFFIX) ztrsm_kernel_LC$(TSUFFIX).$(SUFFIX) \ - ztrsm_kernel_RN$(TSUFFIX).$(SUFFIX) ztrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ - ztrsm_kernel_RR$(TSUFFIX).$(SUFFIX) ztrsm_kernel_RC$(TSUFFIX).$(SUFFIX) -endif - -XBLASOBJS += \ - xgemm_beta$(TSUFFIX).$(SUFFIX) \ - xtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) xtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ - xtrmm_kernel_LR$(TSUFFIX).$(SUFFIX) xtrmm_kernel_LC$(TSUFFIX).$(SUFFIX) \ - xtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) xtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) \ - xtrmm_kernel_RR$(TSUFFIX).$(SUFFIX) xtrmm_kernel_RC$(TSUFFIX).$(SUFFIX) \ - xtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ - xtrsm_kernel_LR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_LC$(TSUFFIX).$(SUFFIX) \ - xtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ - xtrsm_kernel_RR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RC$(TSUFFIX).$(SUFFIX) - -ifeq ($(USE_GEMM3M), 1) - -CBLASOBJS += cgemm3m_kernel$(TSUFFIX).$(SUFFIX) -ZBLASOBJS += zgemm3m_kernel$(TSUFFIX).$(SUFFIX) -XBLASOBJS += xgemm3m_kernel$(TSUFFIX).$(SUFFIX) - -endif - -ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" "" -SBLASOBJS += \ - strmm_iunucopy$(TSUFFIX).$(SUFFIX) strmm_iunncopy$(TSUFFIX).$(SUFFIX) \ - strmm_ilnucopy$(TSUFFIX).$(SUFFIX) strmm_ilnncopy$(TSUFFIX).$(SUFFIX) \ - strmm_iutucopy$(TSUFFIX).$(SUFFIX) strmm_iutncopy$(TSUFFIX).$(SUFFIX) \ - strmm_iltucopy$(TSUFFIX).$(SUFFIX) strmm_iltncopy$(TSUFFIX).$(SUFFIX) \ - strmm_ounucopy$(TSUFFIX).$(SUFFIX) strmm_ounncopy$(TSUFFIX).$(SUFFIX) \ - strmm_olnucopy$(TSUFFIX).$(SUFFIX) strmm_olnncopy$(TSUFFIX).$(SUFFIX) \ - strmm_outucopy$(TSUFFIX).$(SUFFIX) strmm_outncopy$(TSUFFIX).$(SUFFIX) \ - strmm_oltucopy$(TSUFFIX).$(SUFFIX) strmm_oltncopy$(TSUFFIX).$(SUFFIX) \ - strsm_iunucopy$(TSUFFIX).$(SUFFIX) strsm_iunncopy$(TSUFFIX).$(SUFFIX) \ - strsm_ilnucopy$(TSUFFIX).$(SUFFIX) strsm_ilnncopy$(TSUFFIX).$(SUFFIX) \ - strsm_iutucopy$(TSUFFIX).$(SUFFIX) strsm_iutncopy$(TSUFFIX).$(SUFFIX) \ - strsm_iltucopy$(TSUFFIX).$(SUFFIX) strsm_iltncopy$(TSUFFIX).$(SUFFIX) \ - strsm_ounucopy$(TSUFFIX).$(SUFFIX) strsm_ounncopy$(TSUFFIX).$(SUFFIX) \ - strsm_olnucopy$(TSUFFIX).$(SUFFIX) strsm_olnncopy$(TSUFFIX).$(SUFFIX) \ - strsm_outucopy$(TSUFFIX).$(SUFFIX) strsm_outncopy$(TSUFFIX).$(SUFFIX) \ - strsm_oltucopy$(TSUFFIX).$(SUFFIX) strsm_oltncopy$(TSUFFIX).$(SUFFIX) \ - ssymm_iutcopy$(TSUFFIX).$(SUFFIX) ssymm_iltcopy$(TSUFFIX).$(SUFFIX) \ - ssymm_outcopy$(TSUFFIX).$(SUFFIX) ssymm_oltcopy$(TSUFFIX).$(SUFFIX) -endif - -ifeq ($(BUILD_DOUBLE),1) -DBLASOBJS += \ - dtrmm_iunucopy$(TSUFFIX).$(SUFFIX) dtrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ - dtrmm_ilnucopy$(TSUFFIX).$(SUFFIX) dtrmm_ilnncopy$(TSUFFIX).$(SUFFIX) \ - dtrmm_iutucopy$(TSUFFIX).$(SUFFIX) dtrmm_iutncopy$(TSUFFIX).$(SUFFIX) \ - dtrmm_iltucopy$(TSUFFIX).$(SUFFIX) dtrmm_iltncopy$(TSUFFIX).$(SUFFIX) \ - dtrmm_ounucopy$(TSUFFIX).$(SUFFIX) dtrmm_ounncopy$(TSUFFIX).$(SUFFIX) \ - dtrmm_olnucopy$(TSUFFIX).$(SUFFIX) dtrmm_olnncopy$(TSUFFIX).$(SUFFIX) \ - dtrmm_outucopy$(TSUFFIX).$(SUFFIX) dtrmm_outncopy$(TSUFFIX).$(SUFFIX) \ - dtrmm_oltucopy$(TSUFFIX).$(SUFFIX) dtrmm_oltncopy$(TSUFFIX).$(SUFFIX) \ - dtrsm_iunucopy$(TSUFFIX).$(SUFFIX) dtrsm_iunncopy$(TSUFFIX).$(SUFFIX) \ - dtrsm_ilnucopy$(TSUFFIX).$(SUFFIX) dtrsm_ilnncopy$(TSUFFIX).$(SUFFIX) \ - dtrsm_iutucopy$(TSUFFIX).$(SUFFIX) dtrsm_iutncopy$(TSUFFIX).$(SUFFIX) \ - dtrsm_iltucopy$(TSUFFIX).$(SUFFIX) dtrsm_iltncopy$(TSUFFIX).$(SUFFIX) \ - dtrsm_ounucopy$(TSUFFIX).$(SUFFIX) dtrsm_ounncopy$(TSUFFIX).$(SUFFIX) \ - dtrsm_olnucopy$(TSUFFIX).$(SUFFIX) dtrsm_olnncopy$(TSUFFIX).$(SUFFIX) \ - dtrsm_outucopy$(TSUFFIX).$(SUFFIX) dtrsm_outncopy$(TSUFFIX).$(SUFFIX) \ - dtrsm_oltucopy$(TSUFFIX).$(SUFFIX) dtrsm_oltncopy$(TSUFFIX).$(SUFFIX) \ - dsymm_iutcopy$(TSUFFIX).$(SUFFIX) dsymm_iltcopy$(TSUFFIX).$(SUFFIX) \ - dsymm_outcopy$(TSUFFIX).$(SUFFIX) dsymm_oltcopy$(TSUFFIX).$(SUFFIX) -endif - -QBLASOBJS += \ - qtrmm_iunucopy$(TSUFFIX).$(SUFFIX) qtrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ - qtrmm_ilnucopy$(TSUFFIX).$(SUFFIX) qtrmm_ilnncopy$(TSUFFIX).$(SUFFIX) \ - qtrmm_iutucopy$(TSUFFIX).$(SUFFIX) qtrmm_iutncopy$(TSUFFIX).$(SUFFIX) \ - qtrmm_iltucopy$(TSUFFIX).$(SUFFIX) qtrmm_iltncopy$(TSUFFIX).$(SUFFIX) \ - qtrmm_ounucopy$(TSUFFIX).$(SUFFIX) qtrmm_ounncopy$(TSUFFIX).$(SUFFIX) \ - qtrmm_olnucopy$(TSUFFIX).$(SUFFIX) qtrmm_olnncopy$(TSUFFIX).$(SUFFIX) \ - qtrmm_outucopy$(TSUFFIX).$(SUFFIX) qtrmm_outncopy$(TSUFFIX).$(SUFFIX) \ - qtrmm_oltucopy$(TSUFFIX).$(SUFFIX) qtrmm_oltncopy$(TSUFFIX).$(SUFFIX) \ - qtrsm_iunucopy$(TSUFFIX).$(SUFFIX) qtrsm_iunncopy$(TSUFFIX).$(SUFFIX) \ - qtrsm_ilnucopy$(TSUFFIX).$(SUFFIX) qtrsm_ilnncopy$(TSUFFIX).$(SUFFIX) \ - qtrsm_iutucopy$(TSUFFIX).$(SUFFIX) qtrsm_iutncopy$(TSUFFIX).$(SUFFIX) \ - qtrsm_iltucopy$(TSUFFIX).$(SUFFIX) qtrsm_iltncopy$(TSUFFIX).$(SUFFIX) \ - qtrsm_ounucopy$(TSUFFIX).$(SUFFIX) qtrsm_ounncopy$(TSUFFIX).$(SUFFIX) \ - qtrsm_olnucopy$(TSUFFIX).$(SUFFIX) qtrsm_olnncopy$(TSUFFIX).$(SUFFIX) \ - qtrsm_outucopy$(TSUFFIX).$(SUFFIX) qtrsm_outncopy$(TSUFFIX).$(SUFFIX) \ - qtrsm_oltucopy$(TSUFFIX).$(SUFFIX) qtrsm_oltncopy$(TSUFFIX).$(SUFFIX) \ - qsymm_iutcopy$(TSUFFIX).$(SUFFIX) qsymm_iltcopy$(TSUFFIX).$(SUFFIX) \ - qsymm_outcopy$(TSUFFIX).$(SUFFIX) qsymm_oltcopy$(TSUFFIX).$(SUFFIX) - -ifeq ($(BUILD_COMPLEX),1) -CBLASOBJS += \ - ctrmm_iunucopy$(TSUFFIX).$(SUFFIX) ctrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ - ctrmm_ilnucopy$(TSUFFIX).$(SUFFIX) ctrmm_ilnncopy$(TSUFFIX).$(SUFFIX) \ - ctrmm_iutucopy$(TSUFFIX).$(SUFFIX) ctrmm_iutncopy$(TSUFFIX).$(SUFFIX) \ - ctrmm_iltucopy$(TSUFFIX).$(SUFFIX) ctrmm_iltncopy$(TSUFFIX).$(SUFFIX) \ - ctrmm_ounucopy$(TSUFFIX).$(SUFFIX) ctrmm_ounncopy$(TSUFFIX).$(SUFFIX) \ - ctrmm_olnucopy$(TSUFFIX).$(SUFFIX) ctrmm_olnncopy$(TSUFFIX).$(SUFFIX) \ - ctrmm_outucopy$(TSUFFIX).$(SUFFIX) ctrmm_outncopy$(TSUFFIX).$(SUFFIX) \ - ctrmm_oltucopy$(TSUFFIX).$(SUFFIX) ctrmm_oltncopy$(TSUFFIX).$(SUFFIX) \ - csymm_iutcopy$(TSUFFIX).$(SUFFIX) csymm_iltcopy$(TSUFFIX).$(SUFFIX) \ - csymm_outcopy$(TSUFFIX).$(SUFFIX) csymm_oltcopy$(TSUFFIX).$(SUFFIX) \ - chemm_iutcopy$(TSUFFIX).$(SUFFIX) chemm_iltcopy$(TSUFFIX).$(SUFFIX) \ - chemm_outcopy$(TSUFFIX).$(SUFFIX) chemm_oltcopy$(TSUFFIX).$(SUFFIX) -endif -ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" -CBLASOBJS += \ - ctrsm_iunucopy$(TSUFFIX).$(SUFFIX) ctrsm_iunncopy$(TSUFFIX).$(SUFFIX) \ - ctrsm_ilnucopy$(TSUFFIX).$(SUFFIX) ctrsm_ilnncopy$(TSUFFIX).$(SUFFIX) \ - ctrsm_iutucopy$(TSUFFIX).$(SUFFIX) ctrsm_iutncopy$(TSUFFIX).$(SUFFIX) \ - ctrsm_iltucopy$(TSUFFIX).$(SUFFIX) ctrsm_iltncopy$(TSUFFIX).$(SUFFIX) \ - ctrsm_ounucopy$(TSUFFIX).$(SUFFIX) ctrsm_ounncopy$(TSUFFIX).$(SUFFIX) \ - ctrsm_olnucopy$(TSUFFIX).$(SUFFIX) ctrsm_olnncopy$(TSUFFIX).$(SUFFIX) \ - ctrsm_outucopy$(TSUFFIX).$(SUFFIX) ctrsm_outncopy$(TSUFFIX).$(SUFFIX) \ - ctrsm_oltucopy$(TSUFFIX).$(SUFFIX) ctrsm_oltncopy$(TSUFFIX).$(SUFFIX) -endif - -ifeq ($(BUILD_COMPLEX16),1) -ZBLASOBJS += \ - ztrmm_iunucopy$(TSUFFIX).$(SUFFIX) ztrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ - ztrmm_ilnucopy$(TSUFFIX).$(SUFFIX) ztrmm_ilnncopy$(TSUFFIX).$(SUFFIX) \ - ztrmm_iutucopy$(TSUFFIX).$(SUFFIX) ztrmm_iutncopy$(TSUFFIX).$(SUFFIX) \ - ztrmm_iltucopy$(TSUFFIX).$(SUFFIX) ztrmm_iltncopy$(TSUFFIX).$(SUFFIX) \ - ztrmm_ounucopy$(TSUFFIX).$(SUFFIX) ztrmm_ounncopy$(TSUFFIX).$(SUFFIX) \ - ztrmm_olnucopy$(TSUFFIX).$(SUFFIX) ztrmm_olnncopy$(TSUFFIX).$(SUFFIX) \ - ztrmm_outucopy$(TSUFFIX).$(SUFFIX) ztrmm_outncopy$(TSUFFIX).$(SUFFIX) \ - ztrmm_oltucopy$(TSUFFIX).$(SUFFIX) ztrmm_oltncopy$(TSUFFIX).$(SUFFIX) \ - ztrsm_iunucopy$(TSUFFIX).$(SUFFIX) ztrsm_iunncopy$(TSUFFIX).$(SUFFIX) \ - ztrsm_ilnucopy$(TSUFFIX).$(SUFFIX) ztrsm_ilnncopy$(TSUFFIX).$(SUFFIX) \ - ztrsm_iutucopy$(TSUFFIX).$(SUFFIX) ztrsm_iutncopy$(TSUFFIX).$(SUFFIX) \ - ztrsm_iltucopy$(TSUFFIX).$(SUFFIX) ztrsm_iltncopy$(TSUFFIX).$(SUFFIX) \ - ztrsm_ounucopy$(TSUFFIX).$(SUFFIX) ztrsm_ounncopy$(TSUFFIX).$(SUFFIX) \ - ztrsm_olnucopy$(TSUFFIX).$(SUFFIX) ztrsm_olnncopy$(TSUFFIX).$(SUFFIX) \ - ztrsm_outucopy$(TSUFFIX).$(SUFFIX) ztrsm_outncopy$(TSUFFIX).$(SUFFIX) \ - ztrsm_oltucopy$(TSUFFIX).$(SUFFIX) ztrsm_oltncopy$(TSUFFIX).$(SUFFIX) \ - zsymm_iutcopy$(TSUFFIX).$(SUFFIX) zsymm_iltcopy$(TSUFFIX).$(SUFFIX) \ - zsymm_outcopy$(TSUFFIX).$(SUFFIX) zsymm_oltcopy$(TSUFFIX).$(SUFFIX) \ - zhemm_iutcopy$(TSUFFIX).$(SUFFIX) zhemm_iltcopy$(TSUFFIX).$(SUFFIX) \ - zhemm_outcopy$(TSUFFIX).$(SUFFIX) zhemm_oltcopy$(TSUFFIX).$(SUFFIX) -endif - -XBLASOBJS += \ - xtrmm_iunucopy$(TSUFFIX).$(SUFFIX) xtrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ - xtrmm_ilnucopy$(TSUFFIX).$(SUFFIX) xtrmm_ilnncopy$(TSUFFIX).$(SUFFIX) \ - xtrmm_iutucopy$(TSUFFIX).$(SUFFIX) xtrmm_iutncopy$(TSUFFIX).$(SUFFIX) \ - xtrmm_iltucopy$(TSUFFIX).$(SUFFIX) xtrmm_iltncopy$(TSUFFIX).$(SUFFIX) \ - xtrmm_ounucopy$(TSUFFIX).$(SUFFIX) xtrmm_ounncopy$(TSUFFIX).$(SUFFIX) \ - xtrmm_olnucopy$(TSUFFIX).$(SUFFIX) xtrmm_olnncopy$(TSUFFIX).$(SUFFIX) \ - xtrmm_outucopy$(TSUFFIX).$(SUFFIX) xtrmm_outncopy$(TSUFFIX).$(SUFFIX) \ - xtrmm_oltucopy$(TSUFFIX).$(SUFFIX) xtrmm_oltncopy$(TSUFFIX).$(SUFFIX) \ - xtrsm_iunucopy$(TSUFFIX).$(SUFFIX) xtrsm_iunncopy$(TSUFFIX).$(SUFFIX) \ - xtrsm_ilnucopy$(TSUFFIX).$(SUFFIX) xtrsm_ilnncopy$(TSUFFIX).$(SUFFIX) \ - xtrsm_iutucopy$(TSUFFIX).$(SUFFIX) xtrsm_iutncopy$(TSUFFIX).$(SUFFIX) \ - xtrsm_iltucopy$(TSUFFIX).$(SUFFIX) xtrsm_iltncopy$(TSUFFIX).$(SUFFIX) \ - xtrsm_ounucopy$(TSUFFIX).$(SUFFIX) xtrsm_ounncopy$(TSUFFIX).$(SUFFIX) \ - xtrsm_olnucopy$(TSUFFIX).$(SUFFIX) xtrsm_olnncopy$(TSUFFIX).$(SUFFIX) \ - xtrsm_outucopy$(TSUFFIX).$(SUFFIX) xtrsm_outncopy$(TSUFFIX).$(SUFFIX) \ - xtrsm_oltucopy$(TSUFFIX).$(SUFFIX) xtrsm_oltncopy$(TSUFFIX).$(SUFFIX) \ - xsymm_iutcopy$(TSUFFIX).$(SUFFIX) xsymm_iltcopy$(TSUFFIX).$(SUFFIX) \ - xsymm_outcopy$(TSUFFIX).$(SUFFIX) xsymm_oltcopy$(TSUFFIX).$(SUFFIX) \ - xhemm_iutcopy$(TSUFFIX).$(SUFFIX) xhemm_iltcopy$(TSUFFIX).$(SUFFIX) \ - xhemm_outcopy$(TSUFFIX).$(SUFFIX) xhemm_oltcopy$(TSUFFIX).$(SUFFIX) - -ifeq ($(USE_GEMM3M), 1) - -ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" -CBLASOBJS += \ - cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \ - cgemm3m_incopyr$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) \ - cgemm3m_incopyi$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyi$(TSUFFIX).$(SUFFIX) \ - cgemm3m_oncopyb$(TSUFFIX).$(SUFFIX) cgemm3m_otcopyb$(TSUFFIX).$(SUFFIX) \ - cgemm3m_oncopyr$(TSUFFIX).$(SUFFIX) cgemm3m_otcopyr$(TSUFFIX).$(SUFFIX) \ - cgemm3m_oncopyi$(TSUFFIX).$(SUFFIX) cgemm3m_otcopyi$(TSUFFIX).$(SUFFIX) \ - csymm3m_iucopyb$(TSUFFIX).$(SUFFIX) csymm3m_oucopyb$(TSUFFIX).$(SUFFIX) \ - csymm3m_iucopyr$(TSUFFIX).$(SUFFIX) csymm3m_oucopyr$(TSUFFIX).$(SUFFIX) \ - csymm3m_iucopyi$(TSUFFIX).$(SUFFIX) csymm3m_oucopyi$(TSUFFIX).$(SUFFIX) \ - csymm3m_ilcopyb$(TSUFFIX).$(SUFFIX) csymm3m_olcopyb$(TSUFFIX).$(SUFFIX) \ - csymm3m_ilcopyr$(TSUFFIX).$(SUFFIX) csymm3m_olcopyr$(TSUFFIX).$(SUFFIX) \ - csymm3m_ilcopyi$(TSUFFIX).$(SUFFIX) csymm3m_olcopyi$(TSUFFIX).$(SUFFIX) \ - chemm3m_iucopyb$(TSUFFIX).$(SUFFIX) chemm3m_oucopyb$(TSUFFIX).$(SUFFIX) \ - chemm3m_iucopyr$(TSUFFIX).$(SUFFIX) chemm3m_oucopyr$(TSUFFIX).$(SUFFIX) \ - chemm3m_iucopyi$(TSUFFIX).$(SUFFIX) chemm3m_oucopyi$(TSUFFIX).$(SUFFIX) \ - chemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) chemm3m_olcopyb$(TSUFFIX).$(SUFFIX) \ - chemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) chemm3m_olcopyr$(TSUFFIX).$(SUFFIX) \ - chemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) chemm3m_olcopyi$(TSUFFIX).$(SUFFIX) -endif - -ifeq ($(BUILD_COMPLEX16),1) -ZBLASOBJS += \ - zgemm3m_incopyb$(TSUFFIX).$(SUFFIX) zgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \ - zgemm3m_incopyr$(TSUFFIX).$(SUFFIX) zgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) \ - zgemm3m_incopyi$(TSUFFIX).$(SUFFIX) zgemm3m_itcopyi$(TSUFFIX).$(SUFFIX) \ - zgemm3m_oncopyb$(TSUFFIX).$(SUFFIX) zgemm3m_otcopyb$(TSUFFIX).$(SUFFIX) \ - zgemm3m_oncopyr$(TSUFFIX).$(SUFFIX) zgemm3m_otcopyr$(TSUFFIX).$(SUFFIX) \ - zgemm3m_oncopyi$(TSUFFIX).$(SUFFIX) zgemm3m_otcopyi$(TSUFFIX).$(SUFFIX) \ - zsymm3m_iucopyb$(TSUFFIX).$(SUFFIX) zsymm3m_oucopyb$(TSUFFIX).$(SUFFIX) \ - zsymm3m_iucopyr$(TSUFFIX).$(SUFFIX) zsymm3m_oucopyr$(TSUFFIX).$(SUFFIX) \ - zsymm3m_iucopyi$(TSUFFIX).$(SUFFIX) zsymm3m_oucopyi$(TSUFFIX).$(SUFFIX) \ - zsymm3m_ilcopyb$(TSUFFIX).$(SUFFIX) zsymm3m_olcopyb$(TSUFFIX).$(SUFFIX) \ - zsymm3m_ilcopyr$(TSUFFIX).$(SUFFIX) zsymm3m_olcopyr$(TSUFFIX).$(SUFFIX) \ - zsymm3m_ilcopyi$(TSUFFIX).$(SUFFIX) zsymm3m_olcopyi$(TSUFFIX).$(SUFFIX) \ - zhemm3m_iucopyb$(TSUFFIX).$(SUFFIX) zhemm3m_oucopyb$(TSUFFIX).$(SUFFIX) \ - zhemm3m_iucopyr$(TSUFFIX).$(SUFFIX) zhemm3m_oucopyr$(TSUFFIX).$(SUFFIX) \ - zhemm3m_iucopyi$(TSUFFIX).$(SUFFIX) zhemm3m_oucopyi$(TSUFFIX).$(SUFFIX) \ - zhemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) zhemm3m_olcopyb$(TSUFFIX).$(SUFFIX) \ - zhemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) zhemm3m_olcopyr$(TSUFFIX).$(SUFFIX) \ - zhemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) zhemm3m_olcopyi$(TSUFFIX).$(SUFFIX) -endif - -XBLASOBJS += \ - xgemm3m_incopyb$(TSUFFIX).$(SUFFIX) xgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \ - xgemm3m_incopyr$(TSUFFIX).$(SUFFIX) xgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) \ - xgemm3m_incopyi$(TSUFFIX).$(SUFFIX) xgemm3m_itcopyi$(TSUFFIX).$(SUFFIX) \ - xgemm3m_oncopyb$(TSUFFIX).$(SUFFIX) xgemm3m_otcopyb$(TSUFFIX).$(SUFFIX) \ - xgemm3m_oncopyr$(TSUFFIX).$(SUFFIX) xgemm3m_otcopyr$(TSUFFIX).$(SUFFIX) \ - xgemm3m_oncopyi$(TSUFFIX).$(SUFFIX) xgemm3m_otcopyi$(TSUFFIX).$(SUFFIX) \ - xsymm3m_iucopyb$(TSUFFIX).$(SUFFIX) xsymm3m_oucopyb$(TSUFFIX).$(SUFFIX) \ - xsymm3m_iucopyr$(TSUFFIX).$(SUFFIX) xsymm3m_oucopyr$(TSUFFIX).$(SUFFIX) \ - xsymm3m_iucopyi$(TSUFFIX).$(SUFFIX) xsymm3m_oucopyi$(TSUFFIX).$(SUFFIX) \ - xsymm3m_ilcopyb$(TSUFFIX).$(SUFFIX) xsymm3m_olcopyb$(TSUFFIX).$(SUFFIX) \ - xsymm3m_ilcopyr$(TSUFFIX).$(SUFFIX) xsymm3m_olcopyr$(TSUFFIX).$(SUFFIX) \ - xsymm3m_ilcopyi$(TSUFFIX).$(SUFFIX) xsymm3m_olcopyi$(TSUFFIX).$(SUFFIX) \ - xhemm3m_iucopyb$(TSUFFIX).$(SUFFIX) xhemm3m_oucopyb$(TSUFFIX).$(SUFFIX) \ - xhemm3m_iucopyr$(TSUFFIX).$(SUFFIX) xhemm3m_oucopyr$(TSUFFIX).$(SUFFIX) \ - xhemm3m_iucopyi$(TSUFFIX).$(SUFFIX) xhemm3m_oucopyi$(TSUFFIX).$(SUFFIX) \ - xhemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) xhemm3m_olcopyb$(TSUFFIX).$(SUFFIX) \ - xhemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) xhemm3m_olcopyr$(TSUFFIX).$(SUFFIX) \ - xhemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) xhemm3m_olcopyi$(TSUFFIX).$(SUFFIX) - -endif - -###### BLAS small matrix optimization ##### -ifeq ($(SMALL_MATRIX_OPT), 1) - -ifeq ($(BUILD_BFLOAT16),1) -SBBLASOBJS += \ - sbgemm_small_matrix_permit$(TSUFFIX).$(SUFFIX) \ - sbgemm_small_kernel_nn$(TSUFFIX).$(SUFFIX) sbgemm_small_kernel_nt$(TSUFFIX).$(SUFFIX) \ - sbgemm_small_kernel_tn$(TSUFFIX).$(SUFFIX) sbgemm_small_kernel_tt$(TSUFFIX).$(SUFFIX) \ - sbgemm_small_kernel_b0_nn$(TSUFFIX).$(SUFFIX) sbgemm_small_kernel_b0_nt$(TSUFFIX).$(SUFFIX) \ - sbgemm_small_kernel_b0_tn$(TSUFFIX).$(SUFFIX) sbgemm_small_kernel_b0_tt$(TSUFFIX).$(SUFFIX) -endif - -SBLASOBJS += \ - sgemm_small_matrix_permit$(TSUFFIX).$(SUFFIX) \ - sgemm_small_kernel_nn$(TSUFFIX).$(SUFFIX) sgemm_small_kernel_nt$(TSUFFIX).$(SUFFIX) \ - sgemm_small_kernel_tn$(TSUFFIX).$(SUFFIX) sgemm_small_kernel_tt$(TSUFFIX).$(SUFFIX) \ - sgemm_small_kernel_b0_nn$(TSUFFIX).$(SUFFIX) sgemm_small_kernel_b0_nt$(TSUFFIX).$(SUFFIX) \ - sgemm_small_kernel_b0_tn$(TSUFFIX).$(SUFFIX) sgemm_small_kernel_b0_tt$(TSUFFIX).$(SUFFIX) - -DBLASOBJS += \ - dgemm_small_matrix_permit$(TSUFFIX).$(SUFFIX) \ - dgemm_small_kernel_nn$(TSUFFIX).$(SUFFIX) dgemm_small_kernel_nt$(TSUFFIX).$(SUFFIX) \ - dgemm_small_kernel_tn$(TSUFFIX).$(SUFFIX) dgemm_small_kernel_tt$(TSUFFIX).$(SUFFIX) \ - dgemm_small_kernel_b0_nn$(TSUFFIX).$(SUFFIX) dgemm_small_kernel_b0_nt$(TSUFFIX).$(SUFFIX) \ - dgemm_small_kernel_b0_tn$(TSUFFIX).$(SUFFIX) dgemm_small_kernel_b0_tt$(TSUFFIX).$(SUFFIX) - -CBLASOBJS += \ - cgemm_small_matrix_permit$(TSUFFIX).$(SUFFIX) \ - cgemm_small_kernel_nn$(TSUFFIX).$(SUFFIX) cgemm_small_kernel_nt$(TSUFFIX).$(SUFFIX) \ - cgemm_small_kernel_nr$(TSUFFIX).$(SUFFIX) cgemm_small_kernel_nc$(TSUFFIX).$(SUFFIX) \ - cgemm_small_kernel_tn$(TSUFFIX).$(SUFFIX) cgemm_small_kernel_tt$(TSUFFIX).$(SUFFIX) \ - cgemm_small_kernel_tr$(TSUFFIX).$(SUFFIX) cgemm_small_kernel_tc$(TSUFFIX).$(SUFFIX) \ - cgemm_small_kernel_rn$(TSUFFIX).$(SUFFIX) cgemm_small_kernel_rt$(TSUFFIX).$(SUFFIX) \ - cgemm_small_kernel_rr$(TSUFFIX).$(SUFFIX) cgemm_small_kernel_rc$(TSUFFIX).$(SUFFIX) \ - cgemm_small_kernel_cn$(TSUFFIX).$(SUFFIX) cgemm_small_kernel_ct$(TSUFFIX).$(SUFFIX) \ - cgemm_small_kernel_cr$(TSUFFIX).$(SUFFIX) cgemm_small_kernel_cc$(TSUFFIX).$(SUFFIX) \ - cgemm_small_kernel_b0_nn$(TSUFFIX).$(SUFFIX) cgemm_small_kernel_b0_nt$(TSUFFIX).$(SUFFIX) \ - cgemm_small_kernel_b0_nr$(TSUFFIX).$(SUFFIX) cgemm_small_kernel_b0_nc$(TSUFFIX).$(SUFFIX) \ - cgemm_small_kernel_b0_tn$(TSUFFIX).$(SUFFIX) cgemm_small_kernel_b0_tt$(TSUFFIX).$(SUFFIX) \ - cgemm_small_kernel_b0_tr$(TSUFFIX).$(SUFFIX) cgemm_small_kernel_b0_tc$(TSUFFIX).$(SUFFIX) \ - cgemm_small_kernel_b0_rn$(TSUFFIX).$(SUFFIX) cgemm_small_kernel_b0_rt$(TSUFFIX).$(SUFFIX) \ - cgemm_small_kernel_b0_rr$(TSUFFIX).$(SUFFIX) cgemm_small_kernel_b0_rc$(TSUFFIX).$(SUFFIX) \ - cgemm_small_kernel_b0_cn$(TSUFFIX).$(SUFFIX) cgemm_small_kernel_b0_ct$(TSUFFIX).$(SUFFIX) \ - cgemm_small_kernel_b0_cr$(TSUFFIX).$(SUFFIX) cgemm_small_kernel_b0_cc$(TSUFFIX).$(SUFFIX) - -ZBLASOBJS += \ - zgemm_small_matrix_permit$(TSUFFIX).$(SUFFIX) \ - zgemm_small_kernel_nn$(TSUFFIX).$(SUFFIX) zgemm_small_kernel_nt$(TSUFFIX).$(SUFFIX) \ - zgemm_small_kernel_nr$(TSUFFIX).$(SUFFIX) zgemm_small_kernel_nc$(TSUFFIX).$(SUFFIX) \ - zgemm_small_kernel_tn$(TSUFFIX).$(SUFFIX) zgemm_small_kernel_tt$(TSUFFIX).$(SUFFIX) \ - zgemm_small_kernel_tr$(TSUFFIX).$(SUFFIX) zgemm_small_kernel_tc$(TSUFFIX).$(SUFFIX) \ - zgemm_small_kernel_rn$(TSUFFIX).$(SUFFIX) zgemm_small_kernel_rt$(TSUFFIX).$(SUFFIX) \ - zgemm_small_kernel_rr$(TSUFFIX).$(SUFFIX) zgemm_small_kernel_rc$(TSUFFIX).$(SUFFIX) \ - zgemm_small_kernel_cn$(TSUFFIX).$(SUFFIX) zgemm_small_kernel_ct$(TSUFFIX).$(SUFFIX) \ - zgemm_small_kernel_cr$(TSUFFIX).$(SUFFIX) zgemm_small_kernel_cc$(TSUFFIX).$(SUFFIX) \ - zgemm_small_kernel_b0_nn$(TSUFFIX).$(SUFFIX) zgemm_small_kernel_b0_nt$(TSUFFIX).$(SUFFIX) \ - zgemm_small_kernel_b0_nr$(TSUFFIX).$(SUFFIX) zgemm_small_kernel_b0_nc$(TSUFFIX).$(SUFFIX) \ - zgemm_small_kernel_b0_tn$(TSUFFIX).$(SUFFIX) zgemm_small_kernel_b0_tt$(TSUFFIX).$(SUFFIX) \ - zgemm_small_kernel_b0_tr$(TSUFFIX).$(SUFFIX) zgemm_small_kernel_b0_tc$(TSUFFIX).$(SUFFIX) \ - zgemm_small_kernel_b0_rn$(TSUFFIX).$(SUFFIX) zgemm_small_kernel_b0_rt$(TSUFFIX).$(SUFFIX) \ - zgemm_small_kernel_b0_rr$(TSUFFIX).$(SUFFIX) zgemm_small_kernel_b0_rc$(TSUFFIX).$(SUFFIX) \ - zgemm_small_kernel_b0_cn$(TSUFFIX).$(SUFFIX) zgemm_small_kernel_b0_ct$(TSUFFIX).$(SUFFIX) \ - zgemm_small_kernel_b0_cr$(TSUFFIX).$(SUFFIX) zgemm_small_kernel_b0_cc$(TSUFFIX).$(SUFFIX) - -endif - -###### BLAS extensions ##### - -ifeq ($(BUILD_SINGLE),1) -SBLASOBJS += \ - somatcopy_k_cn$(TSUFFIX).$(SUFFIX) somatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ - somatcopy_k_ct$(TSUFFIX).$(SUFFIX) somatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ - simatcopy_k_cn$(TSUFFIX).$(SUFFIX) simatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ - simatcopy_k_ct$(TSUFFIX).$(SUFFIX) simatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ - sgeadd_k$(TSUFFIX).$(SUFFIX) -endif -ifeq ($(BUILD_DOUBLE),1) -DBLASOBJS += \ - domatcopy_k_cn$(TSUFFIX).$(SUFFIX) domatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ - domatcopy_k_ct$(TSUFFIX).$(SUFFIX) domatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ - dimatcopy_k_cn$(TSUFFIX).$(SUFFIX) dimatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ - dimatcopy_k_ct$(TSUFFIX).$(SUFFIX) dimatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ - dgeadd_k$(TSUFFIX).$(SUFFIX) -endif - -ifeq ($(BUILD_COMPLEX),1) -CBLASOBJS += \ - comatcopy_k_cn$(TSUFFIX).$(SUFFIX) comatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ - comatcopy_k_ct$(TSUFFIX).$(SUFFIX) comatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ - comatcopy_k_cnc$(TSUFFIX).$(SUFFIX) comatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \ - comatcopy_k_ctc$(TSUFFIX).$(SUFFIX) comatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \ - cimatcopy_k_cn$(TSUFFIX).$(SUFFIX) cimatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ - cimatcopy_k_ct$(TSUFFIX).$(SUFFIX) cimatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ - cimatcopy_k_cnc$(TSUFFIX).$(SUFFIX) cimatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \ - cimatcopy_k_ctc$(TSUFFIX).$(SUFFIX) cimatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \ - cgeadd_k$(TSUFFIX).$(SUFFIX) -endif - -ifeq ($(BUILD_COMPLEX16),1) -ZBLASOBJS += \ - zomatcopy_k_cn$(TSUFFIX).$(SUFFIX) zomatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ - zomatcopy_k_ct$(TSUFFIX).$(SUFFIX) zomatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ - zomatcopy_k_cnc$(TSUFFIX).$(SUFFIX) zomatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \ - zomatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zomatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \ - zimatcopy_k_cn$(TSUFFIX).$(SUFFIX) zimatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ - zimatcopy_k_ct$(TSUFFIX).$(SUFFIX) zimatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ - zimatcopy_k_cnc$(TSUFFIX).$(SUFFIX) zimatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \ - zimatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zimatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \ - zgeadd_k$(TSUFFIX).$(SUFFIX) -endif - -ifeq ($(BUILD_BFLOAT16), 1) -SBGEMMINCOPYOBJ_P = $(SBGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -SBGEMMITCOPYOBJ_P = $(SBGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -SBGEMMONCOPYOBJ_P = $(SBGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -SBGEMMOTCOPYOBJ_P = $(SBGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -endif - -SGEMMINCOPYOBJ_P = $(SGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -SGEMMITCOPYOBJ_P = $(SGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -SGEMMONCOPYOBJ_P = $(SGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -SGEMMOTCOPYOBJ_P = $(SGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -DGEMMINCOPYOBJ_P = $(DGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -DGEMMITCOPYOBJ_P = $(DGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -DGEMMONCOPYOBJ_P = $(DGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -DGEMMOTCOPYOBJ_P = $(DGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -QGEMMINCOPYOBJ_P = $(QGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -QGEMMITCOPYOBJ_P = $(QGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -QGEMMONCOPYOBJ_P = $(QGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -QGEMMOTCOPYOBJ_P = $(QGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -CGEMMINCOPYOBJ_P = $(CGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -CGEMMITCOPYOBJ_P = $(CGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -CGEMMONCOPYOBJ_P = $(CGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -CGEMMOTCOPYOBJ_P = $(CGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -ZGEMMINCOPYOBJ_P = $(ZGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -ZGEMMITCOPYOBJ_P = $(ZGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -ZGEMMONCOPYOBJ_P = $(ZGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -ZGEMMOTCOPYOBJ_P = $(ZGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -XGEMMINCOPYOBJ_P = $(XGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -XGEMMITCOPYOBJ_P = $(XGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -XGEMMONCOPYOBJ_P = $(XGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) -XGEMMOTCOPYOBJ_P = $(XGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) - -ifeq ($(BUILD_BFLOAT16),1) -$(KDIR)sbgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMM_BETA) - $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ -endif - -$(KDIR)sgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_BETA) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)dgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMM_BETA) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)qgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMM_BETA) - $(CC) $(CFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)cgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_BETA) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX $< -o $@ - -$(KDIR)zgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_BETA) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX $< -o $@ - -$(KDIR)xgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMM_BETA) - $(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX $< -o $@ - -ifeq ($(ARCH), E2K) -USE_TRMM = 1 -endif - - -ifeq ($(BUILD_BFLOAT16), 1) - -$(KDIR)$(SBGEMMONCOPYOBJ) : $(KERNELDIR)/$(SBGEMMONCOPY) - $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)$(SBGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SBGEMMOTCOPY) - -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemmotcopy.s - m4 sbgemmotcopy.s > sbgemmotcopy_nomacros.s - $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemmotcopy_nomacros.s -o $@ - rm sbgemmotcopy.s sbgemmotcopy_nomacros.s -else - $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ -endif - -ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N)) - -$(KDIR)$(SBGEMMINCOPYOBJ) : $(KERNELDIR)/$(SBGEMMINCOPY) - $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)$(SBGEMMITCOPYOBJ) : $(KERNELDIR)/$(SBGEMMITCOPY) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemmitcopy.s - m4 sbgemmitcopy.s > sbgemmitcopy_nomacros.s - $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemmitcopy_nomacros.s -o $@ - rm sbgemmitcopy.s sbgemmitcopy_nomacros.s -else - $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ -endif - -endif -endif - -$(KDIR)$(SGEMMONCOPYOBJ) : $(KERNELDIR)/$(SGEMMONCOPY) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)$(SGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SGEMMOTCOPY) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -UDOUBLE -UCOMPLEX $< -o - > sgemmotcopy.s - m4 sgemmotcopy.s > sgemmotcopy_nomacros.s - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemmotcopy_nomacros.s -o $@ - rm sgemmotcopy.s sgemmotcopy_nomacros.s -else - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ -endif - - -ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) - -$(KDIR)$(SGEMMINCOPYOBJ) : $(KERNELDIR)/$(SGEMMINCOPY) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)$(SGEMMITCOPYOBJ) : $(KERNELDIR)/$(SGEMMITCOPY) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -UDOUBLE -UCOMPLEX $< -o - > sgemmitcopy.s - m4 sgemmitcopy.s > sgemmitcopy_nomacros.s - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemmitcopy_nomacros.s -o $@ - rm sgemmitcopy.s sgemmitcopy_nomacros.s -else - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ -endif - -endif - -$(KDIR)$(DGEMMONCOPYOBJ) : $(KERNELDIR)/$(DGEMMONCOPY) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > dgemm_ncopy.s - m4 dgemm_ncopy.s > dgemm_ncopy_nomacros.s - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_ncopy_nomacros.s -o $@ - rm dgemm_ncopy.s dgemm_ncopy_nomacros.s -else - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ -endif - -$(KDIR)$(DGEMMOTCOPYOBJ) : $(KERNELDIR)/$(DGEMMOTCOPY) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ - -ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N)) - -$(KDIR)$(DGEMMINCOPYOBJ) : $(KERNELDIR)/$(DGEMMINCOPY) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)$(DGEMMITCOPYOBJ) : $(KERNELDIR)/$(DGEMMITCOPY) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > dgemm_itcopy.s - m4 dgemm_itcopy.s > dgemm_itcopy_nomacros.s - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_itcopy_nomacros.s -o $@ - rm dgemm_itcopy.s dgemm_itcopy_nomacros.s -else - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ -endif - -endif - -ifdef EXPRECISION - -$(KDIR)$(QGEMMONCOPYOBJ) : $(KERNELDIR)/$(QGEMMONCOPY) - $(CC) $(CFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)$(QGEMMOTCOPYOBJ) : $(KERNELDIR)/$(QGEMMOTCOPY) - $(CC) $(CFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $@ - -ifneq ($(QGEMM_UNROLL_M), $(QGEMM_UNROLL_N)) - -$(KDIR)$(QGEMMINCOPYOBJ) : $(KERNELDIR)/$(QGEMMINCOPY) - $(CC) $(CFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)$(QGEMMITCOPYOBJ) : $(KERNELDIR)/$(QGEMMITCOPY) - $(CC) $(CFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $@ - -endif - -endif - -$(KDIR)$(CGEMMONCOPYOBJ) : $(KERNELDIR)/$(CGEMMONCOPY) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)$(CGEMMOTCOPYOBJ) : $(KERNELDIR)/$(CGEMMOTCOPY) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ - -ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) - -$(KDIR)$(CGEMMINCOPYOBJ) : $(KERNELDIR)/$(CGEMMINCOPY) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)$(CGEMMITCOPYOBJ) : $(KERNELDIR)/$(CGEMMITCOPY) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -UDOUBLE -UCOMPLEX -S $< -o - > cgemm_itcopy.s - m4 cgemm_itcopy.s > cgemm_itcopy_nomacros.s - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX cgemm_itcopy_nomacros.s -o $@ - rm cgemm_itcopy.s cgemm_itcopy_nomacros.s -else - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ -endif - -endif - -$(KDIR)$(ZGEMMONCOPYOBJ) : $(KERNELDIR)/$(ZGEMMONCOPY) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)$(ZGEMMOTCOPYOBJ) : $(KERNELDIR)/$(ZGEMMOTCOPY) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ - -ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) - -$(KDIR)$(ZGEMMINCOPYOBJ) : $(KERNELDIR)/$(ZGEMMINCOPY) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)$(ZGEMMITCOPYOBJ) : $(KERNELDIR)/$(ZGEMMITCOPY) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > zgemm_itcopy.s - m4 zgemm_itcopy.s > zgemm_itcopy_nomacros.s - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX zgemm_itcopy_nomacros.s -o $@ - rm zgemm_itcopy.s zgemm_itcopy_nomacros.s -else - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ -endif - -endif - -ifdef EXPRECISION - -$(KDIR)$(XGEMMONCOPYOBJ) : $(KERNELDIR)/$(XGEMMONCOPY) - $(CC) $(CFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)$(XGEMMOTCOPYOBJ) : $(KERNELDIR)/$(XGEMMOTCOPY) - $(CC) $(CFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $@ - -ifneq ($(XGEMM_UNROLL_M), $(XGEMM_UNROLL_N)) - -$(KDIR)$(XGEMMINCOPYOBJ) : $(KERNELDIR)/$(XGEMMINCOPY) - $(CC) $(CFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)$(XGEMMITCOPYOBJ) : $(KERNELDIR)/$(XGEMMITCOPY) - $(CC) $(CFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $@ - -endif - -endif - -$(KDIR)sgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(SGEMMDEPEND) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -UDOUBLE -UCOMPLEX $< -o - > sgemm_kernel$(TSUFFIX).s - m4 sgemm_kernel$(TSUFFIX).s > sgemm_kernel$(TSUFFIX)_nomacros.s - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemm_kernel$(TSUFFIX)_nomacros.s -o $@ - rm sgemm_kernel$(TSUFFIX).s sgemm_kernel$(TSUFFIX)_nomacros.s -else - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ -endif - -ifdef USE_DIRECT_SGEMM -$(KDIR)sgemm_direct_performant$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMDIRECTPERFORMANT) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ -$(KDIR)sgemm_direct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMDIRECTKERNEL) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ -endif - -ifeq ($(BUILD_BFLOAT16), 1) - -$(KDIR)sbgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMMKERNEL) $(SBGEMMDEPEND) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemm_kernel$(TSUFFIX).s - m4 sbgemm_kernel$(TSUFFIX).s > sbgemm_kernel$(TSUFFIX)_nomacros.s - $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemm_kernel$(TSUFFIX)_nomacros.s -o $@ - rm sbgemm_kernel$(TSUFFIX).s sbgemm_kernel$(TSUFFIX)_nomacros.s -else - $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ -endif -endif - -$(KDIR)dgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) $(DGEMMDEPEND) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > dgemm_kernel$(TSUFFIX).s - m4 dgemm_kernel$(TSUFFIX).s > dgemm_kernel$(TSUFFIX)_nomacros.s - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_kernel$(TSUFFIX)_nomacros.s -o $@ - rm dgemm_kernel$(TSUFFIX).s dgemm_kernel$(TSUFFIX)_nomacros.s -else - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ -endif - -$(KDIR)qgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) $(QGEMMDEPEND) - $(CC) $(CFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)cgemm_kernel_n$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DNN $< -o - > cgemm_kernel_n.s - m4 cgemm_kernel_n.s > cgemm_kernel_n_nomacros.s - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNN cgemm_kernel_n_nomacros.s -o $@ - rm cgemm_kernel_n.s cgemm_kernel_n_nomacros.s -else - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNN $< -o $@ -endif - -$(KDIR)cgemm_kernel_l$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DCN $< -o - > cgemm_kernel_l.s - m4 cgemm_kernel_l.s > cgemm_kernel_l_nomacros.s - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCN cgemm_kernel_l_nomacros.s -o $@ - rm cgemm_kernel_l.s cgemm_kernel_l_nomacros.s -else - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCN $< -o $@ -endif - -$(KDIR)cgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DNC $< -o - > cgemm_kernel_r.s - m4 cgemm_kernel_r.s > cgemm_kernel_r_nomacros.s - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNC cgemm_kernel_r_nomacros.s -o $@ - rm cgemm_kernel_r.s cgemm_kernel_r_nomacros.s -else - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNC $< -o $@ -endif - -$(KDIR)cgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DCC $< -o - > cgemm_kernel_b.s - m4 cgemm_kernel_b.s > cgemm_kernel_b_nomacros.s - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCC cgemm_kernel_b_nomacros.s -o $@ - rm cgemm_kernel_b.s cgemm_kernel_b_nomacros.s -else - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCC $< -o $@ -endif - -$(KDIR)zgemm_kernel_n$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DNN $< -o - > zgemm_kernel_n.s - m4 zgemm_kernel_n.s > zgemm_kernel_n_nomacros.s - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNN zgemm_kernel_n_nomacros.s -o $@ - rm zgemm_kernel_n.s zgemm_kernel_n_nomacros.s -else ifeq ($(CORE),SANDYBRIDGE) - $(CC) $(filter-out -mavx,$(CFLAGS)) -c -DDOUBLE -DCOMPLEX -DNN $< -o $@ -else - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNN $< -o $@ -endif - -$(KDIR)zgemm_kernel_l$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DCN $< -o - > zgemm_kernel_l.s - m4 zgemm_kernel_l.s > zgemm_kernel_l_nomacros.s - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCN zgemm_kernel_l_nomacros.s -o $@ - rm zgemm_kernel_l.s zgemm_kernel_l_nomacros.s -else ifeq ($(CORE),SANDYBRIDGE) - $(CC) $(filter-out -mavx,$(CFLAGS)) -c -DDOUBLE -DCOMPLEX -DCN $< -o $@ -else - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCN $< -o $@ -endif - -$(KDIR)zgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DNC $< -o - > zgemm_kernel_r.s - m4 zgemm_kernel_r.s > zgemm_kernel_r_nomacros.s - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNC zgemm_kernel_r_nomacros.s -o $@ - rm zgemm_kernel_r.s zgemm_kernel_r_nomacros.s -else ifeq ($(CORE),SANDYBRIDGE) - $(CC) $(filter-out -mavx,$(CFLAGS)) -c -DDOUBLE -DCOMPLEX -DNC $< -o $@ -else - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNC $< -o $@ -endif - -$(KDIR)zgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DCC $< -o - > zgemm_kernel_b.s - m4 zgemm_kernel_b.s > zgemm_kernel_b_nomacros.s - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCC zgemm_kernel_b_nomacros.s -o $@ - rm zgemm_kernel_b.s zgemm_kernel_b_nomacros.s -else ifeq ($(CORE),SANDYBRIDGE) - $(CC) $(filter-out -mavx,$(CFLAGS)) -c -DDOUBLE -DCOMPLEX -DCC $< -o $@ -else - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCC $< -o $@ -endif - -$(KDIR)xgemm_kernel_n$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMDEPEND) - $(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DNN $< -o $@ - -$(KDIR)xgemm_kernel_l$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMDEPEND) - $(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DCN $< -o $@ - -$(KDIR)xgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMDEPEND) - $(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DNC $< -o $@ - -$(KDIR)xgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMDEPEND) - $(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DCC $< -o $@ - - -ifdef USE_TRMM -$(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o - > strmmkernel_ln.s - m4 strmmkernel_ln.s > strmmkernel_ln_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA strmmkernel_ln_nomacros.s -o $@ - rm strmmkernel_ln.s strmmkernel_ln_nomacros.s -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ -endif - -$(KDIR)strmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o - > strmmkernel_lt.s - m4 strmmkernel_lt.s > strmmkernel_lt_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA strmmkernel_lt_nomacros.s -o $@ - rm strmmkernel_lt.s strmmkernel_lt_nomacros.s -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ -endif - -$(KDIR)strmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o - > strmmkernel_rn.s - m4 strmmkernel_rn.s > strmmkernel_rn_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA strmmkernel_rn_nomacros.s -o $@ - rm strmmkernel_rn.s strmmkernel_rn_nomacros.s -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ -endif - -$(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s - m4 strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@ - rm strmm_kernel_rt.s strmm_kernel_rt_nomacros.s -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ -endif - -$(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o - > dtrmm_kernel_ln.s - m4 dtrmm_kernel_ln.s > dtrmm_kernel_ln_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA dtrmm_kernel_ln_nomacros.s -o $@ - rm dtrmm_kernel_ln.s dtrmm_kernel_ln_nomacros.s -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ -endif - -$(KDIR)dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o - > dtrmm_kernel_lt.s - m4 dtrmm_kernel_lt.s > dtrmm_kernel_lt_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA dtrmm_kernel_lt_nomacros.s -o $@ - rm dtrmm_kernel_lt.s dtrmm_kernel_lt_nomacros.s -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ -endif - -$(KDIR)dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o - > dtrmm_kernel_rn.s - m4 dtrmm_kernel_rn.s > dtrmm_kernel_rn_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA dtrmm_kernel_rn_nomacros.s -o $@ - rm dtrmm_kernel_rn.s dtrmm_kernel_rn_nomacros.s -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ -endif - -$(KDIR)dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > dtrmm_kernel_rt.s - m4 dtrmm_kernel_rt.s > dtrmm_kernel_rt_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA dtrmm_kernel_rt_nomacros.s -o $@ - rm dtrmm_kernel_rt.s dtrmm_kernel_rt_nomacros.s -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ -endif - -$(KDIR)qtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ - -$(KDIR)qtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ - -$(KDIR)qtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ - -$(KDIR)qtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ - -$(KDIR)ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_ln.s - m4 ctrmm_kernel_ln.s > ctrmm_kernel_ln_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN ctrmm_kernel_ln_nomacros.s -o $@ - rm ctrmm_kernel_ln.s ctrmm_kernel_ln_nomacros.s -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ -endif - -$(KDIR)ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_lt.s - m4 ctrmm_kernel_lt.s > ctrmm_kernel_lt_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN ctrmm_kernel_lt_nomacros.s -o $@ - rm ctrmm_kernel_lt.s ctrmm_kernel_lt_nomacros.s -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@ -endif - -$(KDIR)ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o - > ctrmm_kernel_lr.s - m4 ctrmm_kernel_lr.s > ctrmm_kernel_lr_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN ctrmm_kernel_lr_nomacros.s -o $@ - rm ctrmm_kernel_lr.s ctrmm_kernel_lr_nomacros.s -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@ -endif - -$(KDIR)ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o - > ctrmm_kernel_lc.s - m4 ctrmm_kernel_lc.s > ctrmm_kernel_lc_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN ctrmm_kernel_lc_nomacros.s -o $@ - rm ctrmm_kernel_lc_nomacros.s ctrmm_kernel_lc.s -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@ -endif - -$(KDIR)ctrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_rn.s - m4 ctrmm_kernel_rn.s > ctrmm_kernel_rn_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN ctrmm_kernel_rn_nomacros.s -o $@ - rm ctrmm_kernel_rn.s ctrmm_kernel_rn_nomacros.s -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@ -endif - -$(KDIR)ctrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_rt.s - m4 ctrmm_kernel_rt.s > ctrmm_kernel_rt_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN ctrmm_kernel_rt_nomacros.s -o $@ - rm ctrmm_kernel_rt.s ctrmm_kernel_rt_nomacros.s -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@ -endif - -$(KDIR)ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o - > ctrmm_kernel_rr.s - m4 ctrmm_kernel_rr.s > ctrmm_kernel_rr_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC ctrmm_kernel_rr_nomacros.s -o $@ - rm ctrmm_kernel_rr.s ctrmm_kernel_rr_nomacros.s -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@ -endif - -$(KDIR)ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o - > ctrmm_kernel_RC.s - m4 ctrmm_kernel_RC.s > ctrmm_kernel_RC_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC ctrmm_kernel_RC_nomacros.s -o $@ - rm ctrmm_kernel_RC.s ctrmm_kernel_RC_nomacros.s -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ -endif - -$(KDIR)ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_ln.s - m4 ztrmm_kernel_ln.s > ztrmm_kernel_ln_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_ln_nomacros.s -o $@ - rm ztrmm_kernel_ln.s ztrmm_kernel_ln_nomacros.s -else ifeq ($(CORE), SANDYBRIDGE) - $(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ -endif - -$(KDIR)ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_lt.s - m4 ztrmm_kernel_lt.s > ztrmm_kernel_lt_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_lt_nomacros.s -o $@ - rm ztrmm_kernel_lt.s ztrmm_kernel_lt_nomacros.s -else ifeq ($(CORE), SANDYBRIDGE) - $(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@ -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@ -endif - -$(KDIR)ztrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o - > ztrmm_kernel_lr.s - m4 ztrmm_kernel_lr.s > ztrmm_kernel_lr_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN ztrmm_kernel_lr_nomacros.s -o $@ - rm ztrmm_kernel_lr.s ztrmm_kernel_lr_nomacros.s -else ifeq ($(CORE), SANDYBRIDGE) - $(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@ -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@ -endif - -$(KDIR)ztrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o - > ztrmm_kernel_lc.s - m4 ztrmm_kernel_lc.s >ztrmm_kernel_lc_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN ztrmm_kernel_lc_nomacros.s -o $@ - rm ztrmm_kernel_lc.s ztrmm_kernel_lc_nomacros.s -else ifeq ($(CORE), SANDYBRIDGE) - $(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@ -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@ -endif - -$(KDIR)ztrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_rn.s - m4 ztrmm_kernel_rn.s > ztrmm_kernel_rn_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_rn_nomacros.s -o $@ - rm ztrmm_kernel_rn.s ztrmm_kernel_rn_nomacros.s -else ifeq ($(CORE), SANDYBRIDGE) - $(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@ -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@ -endif - -$(KDIR)ztrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_rt.s - m4 ztrmm_kernel_rt.s > ztrmm_kernel_rt_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_rt_nomacros.s -o $@ - rm ztrmm_kernel_rt.s ztrmm_kernel_rt_nomacros.s -else ifeq ($(CORE), SANDYBRIDGE) - $(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@ -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@ -endif - -$(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o - > ztrmm_kernel_rr.s - m4 ztrmm_kernel_rr.s > ztrmm_kernel_rr_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC ztrmm_kernel_rr_nomacros.s -o $@ - rm ztrmm_kernel_rr.s ztrmm_kernel_rr_nomacros.s -else ifeq ($(CORE), SANDYBRIDGE) - $(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@ -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@ -endif - -$(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o - > ztrmm_kernel_rc.s - m4 ztrmm_kernel_rc.s > ztrmm_kernel_rc_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC ztrmm_kernel_rc_nomacros.s -o $@ - rm ztrmm_kernel_rc.s ztrmm_kernel_rc_nomacros.s -else ifeq ($(CORE), SANDYBRIDGE) - $(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ -endif - -else -$(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ - -$(KDIR)strmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ - -$(KDIR)strmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ - -$(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s - m4 strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@ - rm strmm_kernel_rt.s strmm_kernel_rt_nomacros.s -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ -endif - -$(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ - -$(KDIR)dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ - -$(KDIR)dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ - -$(KDIR)dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ - -$(KDIR)qtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ - -$(KDIR)qtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ - -$(KDIR)qtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ - -$(KDIR)qtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ - -$(KDIR)ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@ - -$(KDIR)ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@ - -$(KDIR)ctrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)ctrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@ - -$(KDIR)ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ - -$(KDIR)ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) -ifeq ($(CORE),SANDYBRIDGE) - $(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ -endif - -$(KDIR)ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) -ifeq ($(CORE),SANDYBRIDGE) - $(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@ -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@ -endif -$(KDIR)ztrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) -ifeq ($(CORE),SANDYBRIDGE) - $(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@ -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@ -endif -$(KDIR)ztrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) -ifeq ($(CORE),SANDYBRIDGE) - $(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@ -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@ -endif -$(KDIR)ztrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) -ifeq ($(CORE),SANDYBRIDGE) - $(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@ -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@ -endif -$(KDIR)ztrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) -ifeq ($(CORE),SANDYBRIDGE) - $(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@ -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@ -endif -$(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) -ifeq ($(CORE),SANDYBRIDGE) - $(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@ -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@ -endif -$(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) -ifeq ($(CORE),SANDYBRIDGE) - $(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ -endif -endif - - - - -$(KDIR)xtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)xtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)xtrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@ - -$(KDIR)xtrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@ - -$(KDIR)xtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)xtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)xtrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@ - -$(KDIR)xtrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ - -$(KDIR)cgemm3m_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM3MKERNEL) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNN $< -o $@ - -$(KDIR)zgemm3m_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM3MKERNEL) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNN $< -o $@ - -$(KDIR)xgemm3m_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMM3MKERNEL) - $(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DNN $< -o $@ - -$(KDIR)strsm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRSMKERNEL_LN) $(STRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -UDOUBLE -DUPPER -DLN -UCONJ $< -o $@ - -$(KDIR)strsm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRSMKERNEL_LT) $(STRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -UDOUBLE -UUPPER -DLT -UCONJ $< -o $@ - -$(KDIR)strsm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRSMKERNEL_RN) $(STRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -UDOUBLE -DUPPER -DRN -UCONJ $< -o $@ - -$(KDIR)strsm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRSMKERNEL_RT) $(STRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -UDOUBLE -UUPPER -DRT -UCONJ $< -o $@ - -$(KDIR)dtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRSMKERNEL_LN) $(DTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -DDOUBLE -DUPPER -DLN -UCONJ $< -o $@ - -$(KDIR)dtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRSMKERNEL_LT) $(DTRSMDEPEND) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ $< -o - > dtrsm_kernel_lt.s - m4 dtrsm_kernel_lt.s > dtrsm_kernel_lt_nomacros.s - $(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ dtrsm_kernel_lt_nomacros.s -o $@ - rm dtrsm_kernel_lt.s dtrsm_kernel_lt_nomacros.s -else - $(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ $< -o $@ -endif - -$(KDIR)dtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRSMKERNEL_RN) $(DTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -DDOUBLE -DUPPER -DRN -UCONJ $< -o $@ - -$(KDIR)dtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRSMKERNEL_RT) $(DTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DRT -UCONJ $< -o $@ - -$(KDIR)qtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QTRSMKERNEL_LN) $(QTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -DXDOUBLE -DUPPER -DLN -UCONJ $< -o $@ - -$(KDIR)qtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QTRSMKERNEL_LT) $(QTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -DXDOUBLE -UUPPER -DLT -UCONJ $< -o $@ - -$(KDIR)qtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QTRSMKERNEL_RN) $(QTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -DXDOUBLE -DUPPER -DRN -UCONJ $< -o $@ - -$(KDIR)qtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QTRSMKERNEL_RT) $(QTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -DXDOUBLE -UUPPER -DRT -UCONJ $< -o $@ - -$(KDIR)ctrsm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRSMKERNEL_LN) $(CTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -DCOMPLEX -UDOUBLE -DUPPER -DLN -UCONJ $< -o $@ - -$(KDIR)ctrsm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRSMKERNEL_LT) $(CTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -DCOMPLEX -UDOUBLE -UUPPER -DLT -UCONJ $< -o $@ - -$(KDIR)ctrsm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRSMKERNEL_LN) $(CTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -DCOMPLEX -UDOUBLE -DUPPER -DLN -DCONJ $< -o $@ - -$(KDIR)ctrsm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRSMKERNEL_LT) $(CTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -DCOMPLEX -UDOUBLE -UUPPER -DLT -DCONJ $< -o $@ - -$(KDIR)ctrsm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRSMKERNEL_RN) $(CTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -DCOMPLEX -UDOUBLE -DUPPER -DRN -UCONJ $< -o $@ - -$(KDIR)ctrsm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRSMKERNEL_RT) $(CTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -DCOMPLEX -UDOUBLE -UUPPER -DRT -UCONJ $< -o $@ - -$(KDIR)ctrsm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRSMKERNEL_RN) $(CTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -DCOMPLEX -UDOUBLE -DUPPER -DRN -DCONJ $< -o $@ - -$(KDIR)ctrsm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRSMKERNEL_RT) $(CTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -DCOMPLEX -UDOUBLE -UUPPER -DRT -DCONJ $< -o $@ - -$(KDIR)ztrsm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRSMKERNEL_LN) $(ZTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -DCOMPLEX -DDOUBLE -DUPPER -DLN -UCONJ $< -o $@ - -$(KDIR)ztrsm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRSMKERNEL_LT) $(ZTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -DCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ $< -o $@ - -$(KDIR)ztrsm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRSMKERNEL_LN) $(ZTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -DCOMPLEX -DDOUBLE -DUPPER -DLN -DCONJ $< -o $@ - -$(KDIR)ztrsm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRSMKERNEL_LT) $(ZTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -DCOMPLEX -DDOUBLE -UUPPER -DLT -DCONJ $< -o $@ - -$(KDIR)ztrsm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRSMKERNEL_RN) $(ZTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -DCOMPLEX -DDOUBLE -DUPPER -DRN -UCONJ $< -o $@ - -$(KDIR)ztrsm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRSMKERNEL_RT) $(ZTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -DCOMPLEX -DDOUBLE -UUPPER -DRT -UCONJ $< -o $@ - -$(KDIR)ztrsm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRSMKERNEL_RN) $(ZTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -DCOMPLEX -DDOUBLE -DUPPER -DRN -DCONJ $< -o $@ - -$(KDIR)ztrsm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRSMKERNEL_RT) $(ZTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -DCOMPLEX -DDOUBLE -UUPPER -DRT -DCONJ $< -o $@ - -$(KDIR)xtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRSMKERNEL_LN) $(XTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -DCOMPLEX -DXDOUBLE -DUPPER -DLN -UCONJ $< -o $@ - -$(KDIR)xtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRSMKERNEL_LT) $(XTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -DCOMPLEX -DXDOUBLE -UUPPER -DLT -UCONJ $< -o $@ - -$(KDIR)xtrsm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRSMKERNEL_LN) $(XTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -DCOMPLEX -DXDOUBLE -DUPPER -DLN -DCONJ $< -o $@ - -$(KDIR)xtrsm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRSMKERNEL_LT) $(XTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -DCOMPLEX -DXDOUBLE -UUPPER -DLT -DCONJ $< -o $@ - -$(KDIR)xtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRSMKERNEL_RN) $(XTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -DCOMPLEX -DXDOUBLE -DUPPER -DRN -UCONJ $< -o $@ - -$(KDIR)xtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRSMKERNEL_RT) $(XTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -DCOMPLEX -DXDOUBLE -UUPPER -DRT -UCONJ $< -o $@ - -$(KDIR)xtrsm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRSMKERNEL_RN) $(XTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -DCOMPLEX -DXDOUBLE -DUPPER -DRN -DCONJ $< -o $@ - -$(KDIR)xtrsm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRSMKERNEL_RT) $(XTRSMDEPEND) - $(CC) -c $(CFLAGS) -DTRSMKERNEL -DCOMPLEX -DXDOUBLE -UUPPER -DRT -DCONJ $< -o $@ - - -ifdef STRMMUNCOPY_M -$(KDIR)strmm_iunucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMUNCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)strmm_iunncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMUNCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -else -$(KDIR)strmm_iunucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_uncopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)strmm_iunncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_uncopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -endif - -ifdef STRMMLNCOPY_M -$(KDIR)strmm_ilnucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMLNCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)strmm_ilnncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMLNCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -else -$(KDIR)strmm_ilnucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_lncopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)strmm_ilnncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_lncopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -endif - -ifdef STRMMUTCOPY_M -$(KDIR)strmm_iutucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMUTCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)strmm_iutncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMUTCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -else -$(KDIR)strmm_iutucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_utcopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)strmm_iutncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_utcopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -endif - -ifdef STRMMLTCOPY_M -$(KDIR)strmm_iltucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMLTCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)strmm_iltncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMLTCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -else -$(KDIR)strmm_iltucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_ltcopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)strmm_iltncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_ltcopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -endif - -$(KDIR)strmm_ounucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_uncopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)strmm_ounncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_uncopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)strmm_olnucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_lncopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)strmm_olnncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_lncopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)strmm_outucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_utcopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)strmm_outncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_utcopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)strmm_oltucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_ltcopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)strmm_oltncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_ltcopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -ifdef DTRMMUNCOPY_M -$(KDIR)dtrmm_iunucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMUNCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)dtrmm_iunncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMUNCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -else -$(KDIR)dtrmm_iunucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_uncopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)dtrmm_iunncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_uncopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -endif - -ifdef DTRMMLNCOPY_M -$(KDIR)dtrmm_ilnucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMLNCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)dtrmm_ilnncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMLNCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -else -$(KDIR)dtrmm_ilnucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_lncopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)dtrmm_ilnncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_lncopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -endif - -ifdef DTRMMUTCOPY_M -$(KDIR)dtrmm_iutucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMUTCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)dtrmm_iutncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMUTCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -else -$(KDIR)dtrmm_iutucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_utcopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)dtrmm_iutncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_utcopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -endif - -ifdef DTRMMLTCOPY_M -$(KDIR)dtrmm_iltucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMLTCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)dtrmm_iltncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMLTCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -else -$(KDIR)dtrmm_iltucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_ltcopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)dtrmm_iltncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_ltcopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -endif - -$(KDIR)dtrmm_ounucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_uncopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)dtrmm_ounncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_uncopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)dtrmm_olnucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_lncopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)dtrmm_olnncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_lncopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)dtrmm_outucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_utcopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)dtrmm_outncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_utcopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)dtrmm_oltucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_ltcopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)dtrmm_oltncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_ltcopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)qtrmm_iunucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_uncopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)qtrmm_iunncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_uncopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)qtrmm_ilnucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_lncopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)qtrmm_ilnncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_lncopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)qtrmm_iutucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_utcopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)qtrmm_iutncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_utcopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)qtrmm_iltucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_ltcopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)qtrmm_iltncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_ltcopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)qtrmm_ounucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_uncopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)qtrmm_ounncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_uncopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)qtrmm_olnucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_lncopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)qtrmm_olnncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_lncopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)qtrmm_outucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_utcopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)qtrmm_outncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_utcopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)qtrmm_oltucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_ltcopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)qtrmm_oltncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_ltcopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -ifdef CTRMMUNCOPY_M -$(KDIR)ctrmm_iunucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMUNCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ctrmm_iunncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMUNCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -else -$(KDIR)ctrmm_iunucopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_uncopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ctrmm_iunncopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_uncopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -endif - -ifdef CTRMMLNCOPY_M -$(KDIR)ctrmm_ilnucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMLNCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ctrmm_ilnncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMLNCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -else -$(KDIR)ctrmm_ilnucopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_lncopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ctrmm_ilnncopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_lncopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -endif - -ifdef CTRMMUTCOPY_M -$(KDIR)ctrmm_iutucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMUTCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ctrmm_iutncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMUTCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -else -$(KDIR)ctrmm_iutucopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_utcopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ctrmm_iutncopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_utcopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -endif - -ifdef CTRMMLTCOPY_M -$(KDIR)ctrmm_iltucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMLTCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ctrmm_iltncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMLTCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -else -$(KDIR)ctrmm_iltucopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_ltcopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ctrmm_iltncopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_ltcopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -endif - -$(KDIR)ctrmm_ounucopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_uncopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ctrmm_ounncopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_uncopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)ctrmm_olnucopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_lncopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ctrmm_olnncopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_lncopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)ctrmm_outucopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_utcopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ctrmm_outncopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_utcopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)ctrmm_oltucopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_ltcopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ctrmm_oltncopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_ltcopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -ifdef ZTRMMUNCOPY_M -$(KDIR)ztrmm_iunucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMUNCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ztrmm_iunncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMUNCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -else -$(KDIR)ztrmm_iunucopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_uncopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ztrmm_iunncopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_uncopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -endif - -ifdef ZTRMMLNCOPY_M -$(KDIR)ztrmm_ilnucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMLNCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ztrmm_ilnncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMLNCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -else -$(KDIR)ztrmm_ilnucopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_lncopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ztrmm_ilnncopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_lncopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -endif - -ifdef ZTRMMUTCOPY_M -$(KDIR)ztrmm_iutucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMUTCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ztrmm_iutncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMUTCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -else -$(KDIR)ztrmm_iutucopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_utcopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ztrmm_iutncopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_utcopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -endif - -ifdef ZTRMMLTCOPY_M -$(KDIR)ztrmm_iltucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMLTCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ztrmm_iltncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMLTCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -else -$(KDIR)ztrmm_iltucopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_ltcopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ztrmm_iltncopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_ltcopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -endif - -$(KDIR)ztrmm_ounucopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_uncopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ztrmm_ounncopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_uncopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)ztrmm_olnucopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_lncopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ztrmm_olnncopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_lncopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)ztrmm_outucopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_utcopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ztrmm_outncopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_utcopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)ztrmm_oltucopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_ltcopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ztrmm_oltncopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_ltcopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)xtrmm_iunucopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_uncopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)xtrmm_iunncopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_uncopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)xtrmm_ilnucopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_lncopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)xtrmm_ilnncopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_lncopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)xtrmm_iutucopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_utcopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)xtrmm_iutncopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_utcopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)xtrmm_iltucopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_ltcopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)xtrmm_iltncopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_ltcopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)xtrmm_ounucopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_uncopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)xtrmm_ounncopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_uncopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)xtrmm_olnucopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_lncopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)xtrmm_olnncopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_lncopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)xtrmm_outucopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_utcopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)xtrmm_outncopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_utcopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)xtrmm_oltucopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_ltcopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)xtrmm_oltncopy$(TSUFFIX).$(SUFFIX) : generic/ztrmm_ltcopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)ssymm_outcopy$(TSUFFIX).$(SUFFIX) : generic/symm_ucopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -ULOWER $< -o $@ - -$(KDIR)ssymm_oltcopy$(TSUFFIX).$(SUFFIX) : generic/symm_lcopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -DLOWER $< -o $@ - -ifdef SSYMMUCOPY_M -$(KDIR)ssymm_iutcopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SSYMMUCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER $< -o $@ -else -$(KDIR)ssymm_iutcopy$(TSUFFIX).$(SUFFIX) : generic/symm_ucopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER $< -o $@ -endif - -ifdef SSYMMLCOPY_M -$(KDIR)ssymm_iltcopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SSYMMLCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER $< -o $@ -else -$(KDIR)ssymm_iltcopy$(TSUFFIX).$(SUFFIX) : generic/symm_lcopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER $< -o $@ -endif - -$(KDIR)dsymm_outcopy$(TSUFFIX).$(SUFFIX) : generic/symm_ucopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -ULOWER $< -o $@ - -$(KDIR)dsymm_oltcopy$(TSUFFIX).$(SUFFIX) : generic/symm_lcopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -DLOWER $< -o $@ - -ifdef DSYMMUCOPY_M -$(KDIR)dsymm_iutcopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DSYMMUCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER $< -o $@ -else -$(KDIR)dsymm_iutcopy$(TSUFFIX).$(SUFFIX) : generic/symm_ucopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER $< -o $@ -endif - -ifdef DSYMMLCOPY_M -$(KDIR)dsymm_iltcopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DSYMMLCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER $< -o $@ -else -$(KDIR)dsymm_iltcopy$(TSUFFIX).$(SUFFIX) : generic/symm_lcopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER $< -o $@ -endif - -$(KDIR)qsymm_outcopy$(TSUFFIX).$(SUFFIX) : generic/symm_ucopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -ULOWER $< -o $@ - -$(KDIR)qsymm_oltcopy$(TSUFFIX).$(SUFFIX) : generic/symm_lcopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -DLOWER $< -o $@ - -$(KDIR)qsymm_iutcopy$(TSUFFIX).$(SUFFIX) : generic/symm_ucopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -ULOWER $< -o $@ - -$(KDIR)qsymm_iltcopy$(TSUFFIX).$(SUFFIX) : generic/symm_lcopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -DLOWER $< -o $@ - -$(KDIR)csymm_outcopy$(TSUFFIX).$(SUFFIX) : generic/zsymm_ucopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -ULOWER $< -o $@ - -$(KDIR)csymm_oltcopy$(TSUFFIX).$(SUFFIX) : generic/zsymm_lcopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -DLOWER $< -o $@ - -ifdef CSYMMUCOPY_M -$(KDIR)csymm_iutcopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CSYMMUCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER $< -o $@ -else -$(KDIR)csymm_iutcopy$(TSUFFIX).$(SUFFIX) : generic/zsymm_ucopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER $< -o $@ -endif - -ifdef CSYMMLCOPY_M -$(KDIR)csymm_iltcopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CSYMMLCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -DLOWER $< -o $@ -else -$(KDIR)csymm_iltcopy$(TSUFFIX).$(SUFFIX) : generic/zsymm_lcopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -DLOWER $< -o $@ -endif - -$(KDIR)zsymm_outcopy$(TSUFFIX).$(SUFFIX) : generic/zsymm_ucopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -ULOWER $< -o $@ - -$(KDIR)zsymm_oltcopy$(TSUFFIX).$(SUFFIX) : generic/zsymm_lcopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -DLOWER $< -o $@ - -ifdef ZSYMMUCOPY_M -$(KDIR)zsymm_iutcopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZSYMMUCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER $< -o $@ -else -$(KDIR)zsymm_iutcopy$(TSUFFIX).$(SUFFIX) : generic/zsymm_ucopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER $< -o $@ -endif - -ifdef ZSYMMLCOPY_M -$(KDIR)zsymm_iltcopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZSYMMLCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER $< -o $@ -else -$(KDIR)zsymm_iltcopy$(TSUFFIX).$(SUFFIX) : generic/zsymm_lcopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER $< -o $@ -endif - -$(KDIR)xsymm_outcopy$(TSUFFIX).$(SUFFIX) : generic/zsymm_ucopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -ULOWER $< -o $@ - -$(KDIR)xsymm_oltcopy$(TSUFFIX).$(SUFFIX) : generic/zsymm_lcopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -DLOWER $< -o $@ - -$(KDIR)xsymm_iutcopy$(TSUFFIX).$(SUFFIX) : generic/zsymm_ucopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -ULOWER $< -o $@ - -$(KDIR)xsymm_iltcopy$(TSUFFIX).$(SUFFIX) : generic/zsymm_lcopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -DLOWER $< -o $@ - -$(KDIR)chemm_outcopy$(TSUFFIX).$(SUFFIX) : generic/zhemm_utcopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER $< -ULOWER -o $@ - -$(KDIR)chemm_oltcopy$(TSUFFIX).$(SUFFIX) : generic/zhemm_ltcopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER $< -DLOWER -o $@ - -ifdef CHEMMUTCOPY_M -$(KDIR)chemm_iutcopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CHEMMUTCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER $< -ULOWER -o $@ -else -$(KDIR)chemm_iutcopy$(TSUFFIX).$(SUFFIX) : generic/zhemm_utcopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER $< -ULOWER -o $@ -endif - -ifdef CHEMMLTCOPY_M -$(KDIR)chemm_iltcopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CHEMMLTCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER $< -DLOWER -o $@ -else -$(KDIR)chemm_iltcopy$(TSUFFIX).$(SUFFIX) : generic/zhemm_ltcopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER $< -DLOWER -o $@ -endif - -$(KDIR)zhemm_outcopy$(TSUFFIX).$(SUFFIX) : generic/zhemm_utcopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER $< -ULOWER -o $@ - -$(KDIR)zhemm_oltcopy$(TSUFFIX).$(SUFFIX) : generic/zhemm_ltcopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER $< -DLOWER -o $@ - -ifdef ZHEMMUTCOPY_M -$(KDIR)zhemm_iutcopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZHEMMUTCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER $< -ULOWER -o $@ -else -$(KDIR)zhemm_iutcopy$(TSUFFIX).$(SUFFIX) : generic/zhemm_utcopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER $< -ULOWER -o $@ -endif - -ifdef ZHEMMLTCOPY_M -$(KDIR)zhemm_iltcopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZHEMMLTCOPY_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER $< -DLOWER -o $@ -else -$(KDIR)zhemm_iltcopy$(TSUFFIX).$(SUFFIX) : generic/zhemm_ltcopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER $< -DLOWER -o $@ -endif - -$(KDIR)xhemm_outcopy$(TSUFFIX).$(SUFFIX) : generic/zhemm_utcopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER $< -ULOWER -o $@ - -$(KDIR)xhemm_oltcopy$(TSUFFIX).$(SUFFIX) : generic/zhemm_ltcopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER $< -DLOWER -o $@ - -$(KDIR)xhemm_iutcopy$(TSUFFIX).$(SUFFIX) : generic/zhemm_utcopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER $< -ULOWER -o $@ - -$(KDIR)xhemm_iltcopy$(TSUFFIX).$(SUFFIX) : generic/zhemm_ltcopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER $< -DLOWER -o $@ - -$(KDIR)cgemm3m_oncopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)cgemm3m_oncopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)cgemm3m_oncopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)cgemm3m_otcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)cgemm3m_otcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)cgemm3m_otcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@ - -$(KDIR)cgemm3m_incopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)cgemm3m_incopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@ - -$(KDIR)cgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)cgemm3m_itcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)zgemm3m_oncopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)zgemm3m_oncopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)zgemm3m_oncopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)zgemm3m_otcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)zgemm3m_otcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)zgemm3m_otcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)zgemm3m_incopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@ - -$(KDIR)zgemm3m_incopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)zgemm3m_incopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)zgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@ - -$(KDIR)zgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)zgemm3m_itcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)xgemm3m_oncopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)xgemm3m_oncopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)xgemm3m_oncopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)xgemm3m_otcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)xgemm3m_otcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)xgemm3m_otcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)xgemm3m_incopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@ - -$(KDIR)xgemm3m_incopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)xgemm3m_incopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)xgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@ - -$(KDIR)xgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)xgemm3m_itcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)csymm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)csymm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)csymm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)csymm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)csymm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)csymm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)csymm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@ - -$(KDIR)csymm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@ - -$(KDIR)csymm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)csymm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)csymm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)csymm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)zsymm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)zsymm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)zsymm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)zsymm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)zsymm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)zsymm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)zsymm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@ - -$(KDIR)zsymm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@ - -$(KDIR)zsymm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)zsymm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)zsymm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)zsymm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)xsymm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)xsymm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)xsymm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)xsymm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)xsymm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)xsymm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)xsymm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@ - -$(KDIR)xsymm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@ - -$(KDIR)xsymm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)xsymm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)xsymm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)xsymm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)chemm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)chemm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)chemm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)chemm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)chemm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)chemm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)chemm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@ - -$(KDIR)chemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@ - -$(KDIR)chemm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)chemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)chemm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)chemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)zhemm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)zhemm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)zhemm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)zhemm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)zhemm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)zhemm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)zhemm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@ - -$(KDIR)zhemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@ - -$(KDIR)zhemm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)zhemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)zhemm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)zhemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)xhemm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)xhemm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)xhemm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)xhemm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)xhemm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)xhemm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)xhemm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@ - -$(KDIR)xhemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@ - -$(KDIR)xhemm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)xhemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)xhemm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)xhemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -ifdef TRSMCOPYUN_M -$(KDIR)strsm_iunucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TRSMCOPYUN_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)strsm_iunncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TRSMCOPYUN_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -else -$(KDIR)strsm_iunucopy$(TSUFFIX).$(SUFFIX) : generic/trsm_uncopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)strsm_iunncopy$(TSUFFIX).$(SUFFIX) : generic/trsm_uncopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -endif - -ifdef TRSMCOPYLN_M -$(KDIR)strsm_ilnucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TRSMCOPYLN_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)strsm_ilnncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TRSMCOPYLN_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -else -$(KDIR)strsm_ilnucopy$(TSUFFIX).$(SUFFIX) : generic/trsm_lncopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)strsm_ilnncopy$(TSUFFIX).$(SUFFIX) : generic/trsm_lncopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -endif - -ifdef TRSMCOPYUT_M -$(KDIR)strsm_iutucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TRSMCOPYUT_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)strsm_iutncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TRSMCOPYUT_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -else -$(KDIR)strsm_iutucopy$(TSUFFIX).$(SUFFIX) : generic/trsm_utcopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)strsm_iutncopy$(TSUFFIX).$(SUFFIX) : generic/trsm_utcopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -endif - -ifdef TRSMCOPYLT_M -$(KDIR)strsm_iltucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TRSMCOPYLT_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)strsm_iltncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TRSMCOPYLT_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -else -$(KDIR)strsm_iltucopy$(TSUFFIX).$(SUFFIX) : generic/trsm_ltcopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)strsm_iltncopy$(TSUFFIX).$(SUFFIX) : generic/trsm_ltcopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -endif - -$(KDIR)strsm_ounucopy$(TSUFFIX).$(SUFFIX) : generic/trsm_uncopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)strsm_ounncopy$(TSUFFIX).$(SUFFIX) : generic/trsm_uncopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)strsm_olnucopy$(TSUFFIX).$(SUFFIX) : generic/trsm_lncopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)strsm_olnncopy$(TSUFFIX).$(SUFFIX) : generic/trsm_lncopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)strsm_outucopy$(TSUFFIX).$(SUFFIX) : generic/trsm_utcopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)strsm_outncopy$(TSUFFIX).$(SUFFIX) : generic/trsm_utcopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)strsm_oltucopy$(TSUFFIX).$(SUFFIX) : generic/trsm_ltcopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)strsm_oltncopy$(TSUFFIX).$(SUFFIX) : generic/trsm_ltcopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -ifdef TRSMCOPYUN_M -$(KDIR)dtrsm_iunucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TRSMCOPYUN_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)dtrsm_iunncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TRSMCOPYUN_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -else -$(KDIR)dtrsm_iunucopy$(TSUFFIX).$(SUFFIX) : generic/trsm_uncopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)dtrsm_iunncopy$(TSUFFIX).$(SUFFIX) : generic/trsm_uncopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -endif - -ifdef TRSMCOPYLN_M -$(KDIR)dtrsm_ilnucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TRSMCOPYLN_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)dtrsm_ilnncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TRSMCOPYLN_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -else -$(KDIR)dtrsm_ilnucopy$(TSUFFIX).$(SUFFIX) : generic/trsm_lncopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)dtrsm_ilnncopy$(TSUFFIX).$(SUFFIX) : generic/trsm_lncopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -endif - -ifdef TRSMCOPYUT_M -$(KDIR)dtrsm_iutucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TRSMCOPYUT_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)dtrsm_iutncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TRSMCOPYUT_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -else -$(KDIR)dtrsm_iutucopy$(TSUFFIX).$(SUFFIX) : generic/trsm_utcopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)dtrsm_iutncopy$(TSUFFIX).$(SUFFIX) : generic/trsm_utcopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -endif - -ifdef TRSMCOPYLT_M -$(KDIR)dtrsm_iltucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TRSMCOPYLT_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)dtrsm_iltncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TRSMCOPYLT_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -else -$(KDIR)dtrsm_iltucopy$(TSUFFIX).$(SUFFIX) : generic/trsm_ltcopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)dtrsm_iltncopy$(TSUFFIX).$(SUFFIX) : generic/trsm_ltcopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -endif - -$(KDIR)dtrsm_ounucopy$(TSUFFIX).$(SUFFIX) : generic/trsm_uncopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)dtrsm_ounncopy$(TSUFFIX).$(SUFFIX) : generic/trsm_uncopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)dtrsm_olnucopy$(TSUFFIX).$(SUFFIX) : generic/trsm_lncopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)dtrsm_olnncopy$(TSUFFIX).$(SUFFIX) : generic/trsm_lncopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)dtrsm_outucopy$(TSUFFIX).$(SUFFIX) : generic/trsm_utcopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)dtrsm_outncopy$(TSUFFIX).$(SUFFIX) : generic/trsm_utcopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)dtrsm_oltucopy$(TSUFFIX).$(SUFFIX) : generic/trsm_ltcopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)dtrsm_oltncopy$(TSUFFIX).$(SUFFIX) : generic/trsm_ltcopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)qtrsm_iunucopy$(TSUFFIX).$(SUFFIX) : generic/trsm_uncopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)qtrsm_iunncopy$(TSUFFIX).$(SUFFIX) : generic/trsm_uncopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)qtrsm_ilnucopy$(TSUFFIX).$(SUFFIX) : generic/trsm_lncopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)qtrsm_ilnncopy$(TSUFFIX).$(SUFFIX) : generic/trsm_lncopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)qtrsm_iutucopy$(TSUFFIX).$(SUFFIX) : generic/trsm_utcopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)qtrsm_iutncopy$(TSUFFIX).$(SUFFIX) : generic/trsm_utcopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)qtrsm_iltucopy$(TSUFFIX).$(SUFFIX) : generic/trsm_ltcopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)qtrsm_iltncopy$(TSUFFIX).$(SUFFIX) : generic/trsm_ltcopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)qtrsm_ounucopy$(TSUFFIX).$(SUFFIX) : generic/trsm_uncopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)qtrsm_ounncopy$(TSUFFIX).$(SUFFIX) : generic/trsm_uncopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)qtrsm_olnucopy$(TSUFFIX).$(SUFFIX) : generic/trsm_lncopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)qtrsm_olnncopy$(TSUFFIX).$(SUFFIX) : generic/trsm_lncopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)qtrsm_outucopy$(TSUFFIX).$(SUFFIX) : generic/trsm_utcopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)qtrsm_outncopy$(TSUFFIX).$(SUFFIX) : generic/trsm_utcopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)qtrsm_oltucopy$(TSUFFIX).$(SUFFIX) : generic/trsm_ltcopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)qtrsm_oltncopy$(TSUFFIX).$(SUFFIX) : generic/trsm_ltcopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -ifdef ZTRSMCOPYUN_M -$(KDIR)ctrsm_iunucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRSMCOPYUN_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ctrsm_iunncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRSMCOPYUN_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -else -$(KDIR)ctrsm_iunucopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_uncopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ctrsm_iunncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_uncopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -endif - -ifdef ZTRSMCOPYLN_M -$(KDIR)ctrsm_ilnucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRSMCOPYLN_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ctrsm_ilnncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRSMCOPYLN_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -else -$(KDIR)ctrsm_ilnucopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_lncopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ctrsm_ilnncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_lncopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -endif - -ifdef ZTRSMCOPYUT_M -$(KDIR)ctrsm_iutucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRSMCOPYUT_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ctrsm_iutncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRSMCOPYUT_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -else -$(KDIR)ctrsm_iutucopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_utcopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ctrsm_iutncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_utcopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -endif - -ifdef ZTRSMCOPYLT_M -$(KDIR)ctrsm_iltucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRSMCOPYLT_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ctrsm_iltncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRSMCOPYLT_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -else -$(KDIR)ctrsm_iltucopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_ltcopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ctrsm_iltncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_ltcopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -endif - -$(KDIR)ctrsm_ounucopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_uncopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ctrsm_ounncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_uncopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)ctrsm_olnucopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_lncopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ctrsm_olnncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_lncopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)ctrsm_outucopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_utcopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ctrsm_outncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_utcopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)ctrsm_oltucopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_ltcopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ctrsm_oltncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_ltcopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -ifdef ZTRSMCOPYUN_M -$(KDIR)ztrsm_iunucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRSMCOPYUN_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ztrsm_iunncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRSMCOPYUN_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -else -$(KDIR)ztrsm_iunucopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_uncopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ztrsm_iunncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_uncopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -endif - -ifdef ZTRSMCOPYLN_M -$(KDIR)ztrsm_ilnucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRSMCOPYLN_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ztrsm_ilnncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRSMCOPYLN_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -else -$(KDIR)ztrsm_ilnucopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_lncopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ztrsm_ilnncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_lncopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -endif - -ifdef ZTRSMCOPYUT_M -$(KDIR)ztrsm_iutucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRSMCOPYUT_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ztrsm_iutncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRSMCOPYUT_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -else -$(KDIR)ztrsm_iutucopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_utcopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ztrsm_iutncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_utcopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ -endif - -ifdef ZTRSMCOPYLT_M -$(KDIR)ztrsm_iltucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRSMCOPYLT_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ztrsm_iltncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRSMCOPYLT_M) - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -else -$(KDIR)ztrsm_iltucopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_ltcopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ztrsm_iltncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_ltcopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ -endif - -$(KDIR)ztrsm_ounucopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_uncopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ztrsm_ounncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_uncopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)ztrsm_olnucopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_lncopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ztrsm_olnncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_lncopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)ztrsm_outucopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_utcopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ztrsm_outncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_utcopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)ztrsm_oltucopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_ltcopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ztrsm_oltncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_ltcopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)xtrsm_iunucopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_uncopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)xtrsm_iunncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_uncopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)xtrsm_ilnucopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_lncopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)xtrsm_ilnncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_lncopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)xtrsm_iutucopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_utcopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)xtrsm_iutncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_utcopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)xtrsm_iltucopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_ltcopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)xtrsm_iltncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_ltcopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)xtrsm_ounucopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_uncopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)xtrsm_ounncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_uncopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)xtrsm_olnucopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_lncopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)xtrsm_olnncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_lncopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)xtrsm_outucopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_utcopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)xtrsm_outncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_utcopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)xtrsm_oltucopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_ltcopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)xtrsm_oltncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_ltcopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - - -$(KDIR)sgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMM_BETA) - $(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ - -ifeq ($(BUILD_BFLOAT16),1) -$(KDIR)sbgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBGEMM_BETA) - $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ -endif - -$(KDIR)dgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMM_BETA) - $(CC) $(PFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)qgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGEMM_BETA) - $(CC) $(PFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)cgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMM_BETA) - $(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX $< -o $@ - -$(KDIR)zgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMM_BETA) - $(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX $< -o $@ - -$(KDIR)xgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMM_BETA) - $(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX $< -o $@ - - -ifeq ($(BUILD_BFLOAT16), 1) -$(SBGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMONCOPY) - $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ - -$(SBGEMMOTCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMOTCOPY) - $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ - -ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N)) -$(SBGEMMINCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMINCOPY) - $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ - -$(SBGEMMITCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMITCOPY) - $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ - -endif -endif - -$(SGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SGEMMONCOPY) - $(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ - -$(SGEMMOTCOPYOBJ_P) : $(KERNELDIR)/$(SGEMMOTCOPY) - $(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ - -ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) - -$(SGEMMINCOPYOBJ_P) : $(KERNELDIR)/$(SGEMMINCOPY) - $(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ - -$(SGEMMITCOPYOBJ_P) : $(KERNELDIR)/$(SGEMMITCOPY) - $(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ - -endif - -$(DGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(DGEMMONCOPY) - $(CC) $(PFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ - -$(DGEMMOTCOPYOBJ_P) : $(KERNELDIR)/$(DGEMMOTCOPY) - $(CC) $(PFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ - -ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N)) - -$(DGEMMINCOPYOBJ_P) : $(KERNELDIR)/$(DGEMMINCOPY) - $(CC) $(PFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ - -$(DGEMMITCOPYOBJ_P) : $(KERNELDIR)/$(DGEMMITCOPY) - $(CC) $(PFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ - -endif - -ifdef EXPRECISION - -$(QGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(QGEMMONCOPY) - $(CC) $(PFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $@ - -$(QGEMMOTCOPYOBJ_P) : $(KERNELDIR)/$(QGEMMOTCOPY) - $(CC) $(PFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $@ - -ifneq ($(QGEMM_UNROLL_M), $(QGEMM_UNROLL_N)) - -$(QGEMMINCOPYOBJ_P) : $(KERNELDIR)/$(QGEMMINCOPY) - $(CC) $(PFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $@ - -$(QGEMMITCOPYOBJ_P) : $(KERNELDIR)/$(QGEMMITCOPY) - $(CC) $(PFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $@ - -endif - -endif - -$(CGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(CGEMMONCOPY) - $(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ - -$(CGEMMOTCOPYOBJ_P) : $(KERNELDIR)/$(CGEMMOTCOPY) - $(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ - -ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) - -$(CGEMMINCOPYOBJ_P) : $(KERNELDIR)/$(CGEMMINCOPY) - $(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ - -$(CGEMMITCOPYOBJ_P) : $(KERNELDIR)/$(CGEMMITCOPY) - $(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ - -endif - -$(ZGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(ZGEMMONCOPY) - $(CC) $(PFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ - -$(ZGEMMOTCOPYOBJ_P) : $(KERNELDIR)/$(ZGEMMOTCOPY) - $(CC) $(PFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ - -ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) - -$(ZGEMMINCOPYOBJ_P) : $(KERNELDIR)/$(ZGEMMINCOPY) - $(CC) $(PFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ - -$(ZGEMMITCOPYOBJ_P) : $(KERNELDIR)/$(ZGEMMITCOPY) - $(CC) $(PFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ - -endif - -ifdef EXPRECISION - -$(XGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(XGEMMONCOPY) - $(CC) $(PFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $@ - -$(XGEMMOTCOPYOBJ_P) : $(KERNELDIR)/$(XGEMMOTCOPY) - $(CC) $(PFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $@ - -ifneq ($(XGEMM_UNROLL_M), $(XGEMM_UNROLL_N)) - -$(XGEMMINCOPYOBJ_P) : $(KERNELDIR)/$(XGEMMINCOPY) - $(CC) $(PFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $@ - -$(XGEMMITCOPYOBJ_P) : $(KERNELDIR)/$(XGEMMITCOPY) - $(CC) $(PFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $@ - -endif - -endif - - -ifeq ($(BUILD_BFLOAT16), 1) -$(KDIR)sbgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBGEMMKERNEL) $(SBGEMMDEPEND) - $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ -endif - -$(KDIR)sgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(SGEMMDEPEND) - $(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)dgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) $(DGEMMDEPEND) - $(CC) $(PFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)qgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) $(QGEMMDEPEND) - $(CC) $(PFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)cgemm_kernel_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) - $(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DNN $< -o $@ - -$(KDIR)cgemm_kernel_l$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) - $(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DCN $< -o $@ - -$(KDIR)cgemm_kernel_r$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) -ifeq ($(OS), AIX) - $(CC) $(PFLAGS) -S -UDOUBLE -DCOMPLEX -DNC $< -o - > cgemm_kernel_r.s - m4 cgemm_kernel_r.s > cgemm_kernel_r_nomacros.s - $(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DNC cgemm_kernel_r_nomacros.s -o $@ - rm cgemm_kernel_r.s cgemm_kernel_r_nomacros.s -else - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNC $< -o $@ -endif - -$(KDIR)cgemm_kernel_b$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) - $(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DCC $< -o $@ - -$(KDIR)zgemm_kernel_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) - $(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DNN $< -o $@ - -$(KDIR)zgemm_kernel_l$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) - $(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DCN $< -o $@ - -$(KDIR)zgemm_kernel_r$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) - $(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DNC $< -o $@ - -$(KDIR)zgemm_kernel_b$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) - $(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DCC $< -o $@ - -$(KDIR)xgemm_kernel_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMDEPEND) - $(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DNN $< -o $@ - -$(KDIR)xgemm_kernel_l$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMDEPEND) - $(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DCN $< -o $@ - -$(KDIR)xgemm_kernel_r$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMDEPEND) - $(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DNC $< -o $@ - -$(KDIR)xgemm_kernel_b$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMDEPEND) - $(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DCC $< -o $@ - -$(KDIR)strmm_kernel_LN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ - -$(KDIR)strmm_kernel_LT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ - -$(KDIR)strmm_kernel_RN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ - -$(KDIR)strmm_kernel_RT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) -ifeq ($(OS), AIX) - $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s - m4 strmmkernel_rn.s > strmm_kernel_rt_nomacros.s - $(CC) $(PFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@ - rm strmm_kernel_rt.s strmm_kernel_rt_nomacros.s -else - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ -endif - -$(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ - -$(KDIR)dtrmm_kernel_LT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ - -$(KDIR)dtrmm_kernel_RN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ - -$(KDIR)dtrmm_kernel_RT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ - -$(KDIR)qtrmm_kernel_LN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ - -$(KDIR)qtrmm_kernel_LT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ - -$(KDIR)qtrmm_kernel_RN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ - -$(KDIR)qtrmm_kernel_RT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ - -$(KDIR)ctrmm_kernel_LN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)ctrmm_kernel_LT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)ctrmm_kernel_LR$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@ - -$(KDIR)ctrmm_kernel_LC$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@ - -$(KDIR)ctrmm_kernel_RN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)ctrmm_kernel_RT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)ctrmm_kernel_RR$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@ - -$(KDIR)ctrmm_kernel_RC$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ - -$(KDIR)ztrmm_kernel_LN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)ztrmm_kernel_LT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)ztrmm_kernel_LR$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@ - -$(KDIR)ztrmm_kernel_LC$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@ - -$(KDIR)ztrmm_kernel_RN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)ztrmm_kernel_RT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@ - -$(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ - -$(KDIR)xtrmm_kernel_LN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)xtrmm_kernel_LT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)xtrmm_kernel_LR$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@ - -$(KDIR)xtrmm_kernel_LC$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@ - -$(KDIR)xtrmm_kernel_RN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)xtrmm_kernel_RT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)xtrmm_kernel_RR$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@ - -$(KDIR)xtrmm_kernel_RC$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) - $(CC) $(PFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ - -$(KDIR)cgemm3m_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMM3MKERNEL) - $(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DNN $< -o $@ - -$(KDIR)zgemm3m_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMM3MKERNEL) - $(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DNN $< -o $@ - -$(KDIR)xgemm3m_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMM3MKERNEL) - $(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DNN $< -o $@ - -$(KDIR)strsm_kernel_LN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(STRSMKERNEL_LN) $(STRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -UCOMPLEX -UDOUBLE -DUPPER -DLN -UCONJ $< -o $@ - -$(KDIR)strsm_kernel_LT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(STRSMKERNEL_LT) $(STRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -UCOMPLEX -UDOUBLE -UUPPER -DLT -UCONJ $< -o $@ - -$(KDIR)strsm_kernel_RN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(STRSMKERNEL_RN) $(STRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -UCOMPLEX -UDOUBLE -DUPPER -DRN -UCONJ $< -o $@ - -$(KDIR)strsm_kernel_RT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(STRSMKERNEL_RT) $(STRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -UCOMPLEX -UDOUBLE -UUPPER -DRT -UCONJ $< -o $@ - -$(KDIR)dtrsm_kernel_LN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DTRSMKERNEL_LN) $(DTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -UCOMPLEX -DDOUBLE -DUPPER -DLN -UCONJ $< -o $@ - -$(KDIR)dtrsm_kernel_LT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DTRSMKERNEL_LT) $(DTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ $< -o $@ - -$(KDIR)dtrsm_kernel_RN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DTRSMKERNEL_RN) $(DTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -UCOMPLEX -DDOUBLE -DUPPER -DRN -UCONJ $< -o $@ - -$(KDIR)dtrsm_kernel_RT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DTRSMKERNEL_RT) $(DTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DRT -UCONJ $< -o $@ - -$(KDIR)qtrsm_kernel_LN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QTRSMKERNEL_LN) $(QTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -UCOMPLEX -DXDOUBLE -DUPPER -DLN -UCONJ $< -o $@ - -$(KDIR)qtrsm_kernel_LT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QTRSMKERNEL_LT) $(QTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -UCOMPLEX -DXDOUBLE -UUPPER -DLT -UCONJ $< -o $@ - -$(KDIR)qtrsm_kernel_RN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QTRSMKERNEL_RN) $(QTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -UCOMPLEX -DXDOUBLE -DUPPER -DRN -UCONJ $< -o $@ - -$(KDIR)qtrsm_kernel_RT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QTRSMKERNEL_RT) $(QTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -UCOMPLEX -DXDOUBLE -UUPPER -DRT -UCONJ $< -o $@ - -$(KDIR)ctrsm_kernel_LN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CTRSMKERNEL_LN) $(CTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -DCOMPLEX -UDOUBLE -DUPPER -DLN -UCONJ $< -o $@ - -$(KDIR)ctrsm_kernel_LT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CTRSMKERNEL_LT) $(CTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -DCOMPLEX -UDOUBLE -UUPPER -DLT -UCONJ $< -o $@ - -$(KDIR)ctrsm_kernel_LR$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CTRSMKERNEL_LN) $(CTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -DCOMPLEX -UDOUBLE -DUPPER -DLN -DCONJ $< -o $@ - -$(KDIR)ctrsm_kernel_LC$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CTRSMKERNEL_LT) $(CTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -DCOMPLEX -UDOUBLE -UUPPER -DLT -DCONJ $< -o $@ - -$(KDIR)ctrsm_kernel_RN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CTRSMKERNEL_RN) $(CTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -DCOMPLEX -UDOUBLE -DUPPER -DRN -UCONJ $< -o $@ - -$(KDIR)ctrsm_kernel_RT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CTRSMKERNEL_RT) $(CTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -DCOMPLEX -UDOUBLE -UUPPER -DRT -UCONJ $< -o $@ - -$(KDIR)ctrsm_kernel_RR$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CTRSMKERNEL_RN) $(CTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -DCOMPLEX -UDOUBLE -DUPPER -DRN -DCONJ $< -o $@ - -$(KDIR)ctrsm_kernel_RC$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CTRSMKERNEL_RT) $(CTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -DCOMPLEX -UDOUBLE -UUPPER -DRT -DCONJ $< -o $@ - -$(KDIR)ztrsm_kernel_LN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZTRSMKERNEL_LN) $(ZTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -DCOMPLEX -DDOUBLE -DUPPER -DLN -UCONJ $< -o $@ - -$(KDIR)ztrsm_kernel_LT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZTRSMKERNEL_LT) $(ZTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -DCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ $< -o $@ - -$(KDIR)ztrsm_kernel_LR$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZTRSMKERNEL_LN) $(ZTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -DCOMPLEX -DDOUBLE -DUPPER -DLN -DCONJ $< -o $@ - -$(KDIR)ztrsm_kernel_LC$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZTRSMKERNEL_LT) $(ZTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -DCOMPLEX -DDOUBLE -UUPPER -DLT -DCONJ $< -o $@ - -$(KDIR)ztrsm_kernel_RN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZTRSMKERNEL_RN) $(ZTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -DCOMPLEX -DDOUBLE -DUPPER -DRN -UCONJ $< -o $@ - -$(KDIR)ztrsm_kernel_RT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZTRSMKERNEL_RT) $(ZTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -DCOMPLEX -DDOUBLE -UUPPER -DRT -UCONJ $< -o $@ - -$(KDIR)ztrsm_kernel_RR$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZTRSMKERNEL_RN) $(ZTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -DCOMPLEX -DDOUBLE -DUPPER -DRN -DCONJ $< -o $@ - -$(KDIR)ztrsm_kernel_RC$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZTRSMKERNEL_RT) $(ZTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -DCOMPLEX -DDOUBLE -UUPPER -DRT -DCONJ $< -o $@ - -$(KDIR)xtrsm_kernel_LN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XTRSMKERNEL_LN) $(XTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -DCOMPLEX -DXDOUBLE -DUPPER -DLN -UCONJ $< -o $@ - -$(KDIR)xtrsm_kernel_LT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XTRSMKERNEL_LT) $(XTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -DCOMPLEX -DXDOUBLE -UUPPER -DLT -UCONJ $< -o $@ - -$(KDIR)xtrsm_kernel_LR$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XTRSMKERNEL_LN) $(XTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -DCOMPLEX -DXDOUBLE -DUPPER -DLN -DCONJ $< -o $@ - -$(KDIR)xtrsm_kernel_LC$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XTRSMKERNEL_LT) $(XTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -DCOMPLEX -DXDOUBLE -UUPPER -DLT -DCONJ $< -o $@ - -$(KDIR)xtrsm_kernel_RN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XTRSMKERNEL_RN) $(XTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -DCOMPLEX -DXDOUBLE -DUPPER -DRN -UCONJ $< -o $@ - -$(KDIR)xtrsm_kernel_RT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XTRSMKERNEL_RT) $(XTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -DCOMPLEX -DXDOUBLE -UUPPER -DRT -UCONJ $< -o $@ - -$(KDIR)xtrsm_kernel_RR$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XTRSMKERNEL_RN) $(XTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -DCOMPLEX -DXDOUBLE -DUPPER -DRN -DCONJ $< -o $@ - -$(KDIR)xtrsm_kernel_RC$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XTRSMKERNEL_RT) $(XTRSMDEPEND) - $(CC) -c $(PFLAGS) -DTRSMKERNEL -DCOMPLEX -DXDOUBLE -UUPPER -DRT -DCONJ $< -o $@ - - -$(KDIR)strmm_iunucopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_uncopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)strmm_iunncopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_uncopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)strmm_ilnucopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_lncopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)strmm_ilnncopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_lncopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)strmm_iutucopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_utcopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)strmm_iutncopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_utcopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)strmm_iltucopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_ltcopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)strmm_iltncopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_ltcopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)strmm_ounucopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_uncopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)strmm_ounncopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_uncopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)strmm_olnucopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_lncopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)strmm_olnncopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_lncopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)strmm_outucopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_utcopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)strmm_outncopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_utcopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)strmm_oltucopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_ltcopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)strmm_oltncopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_ltcopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)dtrmm_iunucopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_uncopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)dtrmm_iunncopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_uncopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)dtrmm_ilnucopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_lncopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)dtrmm_ilnncopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_lncopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)dtrmm_iutucopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_utcopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)dtrmm_iutncopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_utcopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)dtrmm_iltucopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_ltcopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)dtrmm_iltncopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_ltcopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)dtrmm_ounucopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_uncopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)dtrmm_ounncopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_uncopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)dtrmm_olnucopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_lncopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)dtrmm_olnncopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_lncopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)dtrmm_outucopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_utcopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)dtrmm_outncopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_utcopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)dtrmm_oltucopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_ltcopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)dtrmm_oltncopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_ltcopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)qtrmm_iunucopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_uncopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)qtrmm_iunncopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_uncopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)qtrmm_ilnucopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_lncopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)qtrmm_ilnncopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_lncopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)qtrmm_iutucopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_utcopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)qtrmm_iutncopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_utcopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)qtrmm_iltucopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_ltcopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)qtrmm_iltncopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_ltcopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)qtrmm_ounucopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_uncopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)qtrmm_ounncopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_uncopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)qtrmm_olnucopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_lncopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)qtrmm_olnncopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_lncopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)qtrmm_outucopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_utcopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)qtrmm_outncopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_utcopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)qtrmm_oltucopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_ltcopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)qtrmm_oltncopy$(TSUFFIX).$(PSUFFIX) : generic/trmm_ltcopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)ctrmm_iunucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_uncopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ctrmm_iunncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_uncopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)ctrmm_ilnucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_lncopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ctrmm_ilnncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_lncopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)ctrmm_iutucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_utcopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ctrmm_iutncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_utcopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)ctrmm_iltucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_ltcopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ctrmm_iltncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_ltcopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)ctrmm_ounucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_uncopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ctrmm_ounncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_uncopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)ctrmm_olnucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_lncopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ctrmm_olnncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_lncopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)ctrmm_outucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_utcopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ctrmm_outncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_utcopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)ctrmm_oltucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_ltcopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ctrmm_oltncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_ltcopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)ztrmm_iunucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_uncopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ztrmm_iunncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_uncopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)ztrmm_ilnucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_lncopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ztrmm_ilnncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_lncopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)ztrmm_iutucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_utcopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ztrmm_iutncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_utcopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)ztrmm_iltucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_ltcopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ztrmm_iltncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_ltcopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)ztrmm_ounucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_uncopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ztrmm_ounncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_uncopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)ztrmm_olnucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_lncopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ztrmm_olnncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_lncopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)ztrmm_outucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_utcopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ztrmm_outncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_utcopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)ztrmm_oltucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_ltcopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ztrmm_oltncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_ltcopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)xtrmm_iunucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_uncopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)xtrmm_iunncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_uncopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)xtrmm_ilnucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_lncopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)xtrmm_ilnncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_lncopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)xtrmm_iutucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_utcopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)xtrmm_iutncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_utcopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)xtrmm_iltucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_ltcopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)xtrmm_iltncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_ltcopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)xtrmm_ounucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_uncopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)xtrmm_ounncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_uncopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)xtrmm_olnucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_lncopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)xtrmm_olnncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_lncopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)xtrmm_outucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_utcopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)xtrmm_outncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_utcopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)xtrmm_oltucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_ltcopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)xtrmm_oltncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrmm_ltcopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)ssymm_outcopy$(TSUFFIX).$(PSUFFIX) : generic/symm_ucopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -ULOWER $< -o $@ - -$(KDIR)ssymm_oltcopy$(TSUFFIX).$(PSUFFIX) : generic/symm_lcopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -DLOWER $< -o $@ - -$(KDIR)ssymm_iutcopy$(TSUFFIX).$(PSUFFIX) : generic/symm_ucopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER $< -o $@ - -$(KDIR)ssymm_iltcopy$(TSUFFIX).$(PSUFFIX) : generic/symm_lcopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER $< -o $@ - -$(KDIR)dsymm_outcopy$(TSUFFIX).$(PSUFFIX) : generic/symm_ucopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -ULOWER $< -o $@ - -$(KDIR)dsymm_oltcopy$(TSUFFIX).$(PSUFFIX) : generic/symm_lcopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -DLOWER $< -o $@ - -$(KDIR)dsymm_iutcopy$(TSUFFIX).$(PSUFFIX) : generic/symm_ucopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER $< -o $@ - -$(KDIR)dsymm_iltcopy$(TSUFFIX).$(PSUFFIX) : generic/symm_lcopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER $< -o $@ - -$(KDIR)qsymm_outcopy$(TSUFFIX).$(PSUFFIX) : generic/symm_ucopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -ULOWER $< -o $@ - -$(KDIR)qsymm_oltcopy$(TSUFFIX).$(PSUFFIX) : generic/symm_lcopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -DLOWER $< -o $@ - -$(KDIR)qsymm_iutcopy$(TSUFFIX).$(PSUFFIX) : generic/symm_ucopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -ULOWER $< -o $@ - -$(KDIR)qsymm_iltcopy$(TSUFFIX).$(PSUFFIX) : generic/symm_lcopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -DLOWER $< -o $@ - -$(KDIR)csymm_outcopy$(TSUFFIX).$(PSUFFIX) : generic/zsymm_ucopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -ULOWER $< -o $@ - -$(KDIR)csymm_oltcopy$(TSUFFIX).$(PSUFFIX) : generic/zsymm_lcopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -DLOWER $< -o $@ - -$(KDIR)csymm_iutcopy$(TSUFFIX).$(PSUFFIX) : generic/zsymm_ucopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER $< -o $@ - -$(KDIR)csymm_iltcopy$(TSUFFIX).$(PSUFFIX) : generic/zsymm_lcopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -DLOWER $< -o $@ - -$(KDIR)zsymm_outcopy$(TSUFFIX).$(PSUFFIX) : generic/zsymm_ucopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -ULOWER $< -o $@ - -$(KDIR)zsymm_oltcopy$(TSUFFIX).$(PSUFFIX) : generic/zsymm_lcopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -DLOWER $< -o $@ - -$(KDIR)zsymm_iutcopy$(TSUFFIX).$(PSUFFIX) : generic/zsymm_ucopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER $< -o $@ - -$(KDIR)zsymm_iltcopy$(TSUFFIX).$(PSUFFIX) : generic/zsymm_lcopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER $< -o $@ - -$(KDIR)xsymm_outcopy$(TSUFFIX).$(PSUFFIX) : generic/zsymm_ucopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -ULOWER $< -o $@ - -$(KDIR)xsymm_oltcopy$(TSUFFIX).$(PSUFFIX) : generic/zsymm_lcopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -DLOWER $< -o $@ - -$(KDIR)xsymm_iutcopy$(TSUFFIX).$(PSUFFIX) : generic/zsymm_ucopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -ULOWER $< -o $@ - -$(KDIR)xsymm_iltcopy$(TSUFFIX).$(PSUFFIX) : generic/zsymm_lcopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -DLOWER $< -o $@ - -$(KDIR)chemm_outcopy$(TSUFFIX).$(PSUFFIX) : generic/zhemm_utcopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER $< -ULOWER -o $@ - -$(KDIR)chemm_oltcopy$(TSUFFIX).$(PSUFFIX) : generic/zhemm_ltcopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER $< -DLOWER -o $@ - -$(KDIR)chemm_iutcopy$(TSUFFIX).$(PSUFFIX) : generic/zhemm_utcopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER $< -ULOWER -o $@ - -$(KDIR)chemm_iltcopy$(TSUFFIX).$(PSUFFIX) : generic/zhemm_ltcopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER $< -DLOWER -o $@ - -$(KDIR)zhemm_outcopy$(TSUFFIX).$(PSUFFIX) : generic/zhemm_utcopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER $< -ULOWER -o $@ - -$(KDIR)zhemm_oltcopy$(TSUFFIX).$(PSUFFIX) : generic/zhemm_ltcopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER $< -DLOWER -o $@ - -$(KDIR)zhemm_iutcopy$(TSUFFIX).$(PSUFFIX) : generic/zhemm_utcopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER $< -ULOWER -o $@ - -$(KDIR)zhemm_iltcopy$(TSUFFIX).$(PSUFFIX) : generic/zhemm_ltcopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER $< -DLOWER -o $@ - -$(KDIR)xhemm_outcopy$(TSUFFIX).$(PSUFFIX) : generic/zhemm_utcopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER $< -ULOWER -o $@ - -$(KDIR)xhemm_oltcopy$(TSUFFIX).$(PSUFFIX) : generic/zhemm_ltcopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER $< -DLOWER -o $@ - -$(KDIR)xhemm_iutcopy$(TSUFFIX).$(PSUFFIX) : generic/zhemm_utcopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER $< -ULOWER -o $@ - -$(KDIR)xhemm_iltcopy$(TSUFFIX).$(PSUFFIX) : generic/zhemm_ltcopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER $< -DLOWER -o $@ - -$(KDIR)cgemm3m_oncopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)cgemm3m_oncopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)cgemm3m_oncopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)cgemm3m_otcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)cgemm3m_otcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)cgemm3m_otcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)cgemm3m_incopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@ - -$(KDIR)cgemm3m_incopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)cgemm3m_incopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)cgemm3m_itcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@ - -$(KDIR)cgemm3m_itcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)cgemm3m_itcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)zgemm3m_oncopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)zgemm3m_oncopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)zgemm3m_oncopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)zgemm3m_otcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)zgemm3m_otcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)zgemm3m_otcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)zgemm3m_incopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@ - -$(KDIR)zgemm3m_incopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)zgemm3m_incopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)zgemm3m_itcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@ - -$(KDIR)zgemm3m_itcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)zgemm3m_itcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)xgemm3m_oncopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)xgemm3m_oncopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)xgemm3m_oncopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)xgemm3m_otcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)xgemm3m_otcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)xgemm3m_otcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)xgemm3m_incopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@ - -$(KDIR)xgemm3m_incopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)xgemm3m_incopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)xgemm3m_itcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@ - -$(KDIR)xgemm3m_itcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)xgemm3m_itcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)csymm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)csymm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)csymm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)csymm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)csymm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)csymm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)csymm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@ - -$(KDIR)csymm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@ - -$(KDIR)csymm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)csymm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)csymm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)csymm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)zsymm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)zsymm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)zsymm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)zsymm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)zsymm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)zsymm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)zsymm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@ - -$(KDIR)zsymm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@ - -$(KDIR)zsymm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)zsymm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)zsymm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)zsymm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)xsymm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)xsymm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)xsymm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)xsymm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)xsymm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)xsymm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)xsymm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@ - -$(KDIR)xsymm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@ - -$(KDIR)xsymm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)xsymm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)xsymm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)xsymm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)chemm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)chemm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)chemm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)chemm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)chemm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)chemm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)chemm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@ - -$(KDIR)chemm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@ - -$(KDIR)chemm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)chemm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)chemm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)chemm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)zhemm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)zhemm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)zhemm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)zhemm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)zhemm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)zhemm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)zhemm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@ - -$(KDIR)zhemm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@ - -$(KDIR)zhemm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)zhemm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)zhemm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)zhemm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)xhemm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)xhemm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@ - -$(KDIR)xhemm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)xhemm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)xhemm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)xhemm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_N).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)xhemm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@ - -$(KDIR)xhemm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@ - -$(KDIR)xhemm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)xhemm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@ - -$(KDIR)xhemm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)xhemm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_M).c - $(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@ - -$(KDIR)strsm_iunucopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_uncopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)strsm_iunncopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_uncopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)strsm_ilnucopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_lncopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)strsm_ilnncopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_lncopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)strsm_iutucopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_utcopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)strsm_iutncopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_utcopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)strsm_iltucopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_ltcopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)strsm_iltncopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_ltcopy_$(SGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)strsm_ounucopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_uncopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)strsm_ounncopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_uncopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)strsm_olnucopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_lncopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)strsm_olnncopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_lncopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)strsm_outucopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_utcopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)strsm_outncopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_utcopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)strsm_oltucopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_ltcopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)strsm_oltncopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_ltcopy_$(SGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)dtrsm_iunucopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_uncopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)dtrsm_iunncopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_uncopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)dtrsm_ilnucopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_lncopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)dtrsm_ilnncopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_lncopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)dtrsm_iutucopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_utcopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)dtrsm_iutncopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_utcopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)dtrsm_iltucopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_ltcopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)dtrsm_iltncopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_ltcopy_$(DGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)dtrsm_ounucopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_uncopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)dtrsm_ounncopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_uncopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)dtrsm_olnucopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_lncopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)dtrsm_olnncopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_lncopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)dtrsm_outucopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_utcopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)dtrsm_outncopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_utcopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)dtrsm_oltucopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_ltcopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)dtrsm_oltncopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_ltcopy_$(DGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)qtrsm_iunucopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_uncopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)qtrsm_iunncopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_uncopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)qtrsm_ilnucopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_lncopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)qtrsm_ilnncopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_lncopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)qtrsm_iutucopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_utcopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)qtrsm_iutncopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_utcopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)qtrsm_iltucopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_ltcopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)qtrsm_iltncopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_ltcopy_$(QGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)qtrsm_ounucopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_uncopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)qtrsm_ounncopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_uncopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)qtrsm_olnucopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_lncopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)qtrsm_olnncopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_lncopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)qtrsm_outucopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_utcopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)qtrsm_outncopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_utcopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)qtrsm_oltucopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_ltcopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)qtrsm_oltncopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_ltcopy_$(QGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)ctrsm_iunucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_uncopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ctrsm_iunncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_uncopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)ctrsm_ilnucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_lncopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ctrsm_ilnncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_lncopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)ctrsm_iutucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_utcopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ctrsm_iutncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_utcopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)ctrsm_iltucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_ltcopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ctrsm_iltncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_ltcopy_$(CGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)ctrsm_ounucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_uncopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ctrsm_ounncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_uncopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)ctrsm_olnucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_lncopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ctrsm_olnncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_lncopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)ctrsm_outucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_utcopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ctrsm_outncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_utcopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)ctrsm_oltucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_ltcopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ctrsm_oltncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_ltcopy_$(CGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)ztrsm_iunucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_uncopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ztrsm_iunncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_uncopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)ztrsm_ilnucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_lncopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ztrsm_ilnncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_lncopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)ztrsm_iutucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_utcopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ztrsm_iutncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_utcopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)ztrsm_iltucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_ltcopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ztrsm_iltncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_ltcopy_$(ZGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)ztrsm_ounucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_uncopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ztrsm_ounncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_uncopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)ztrsm_olnucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_lncopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ztrsm_olnncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_lncopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)ztrsm_outucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_utcopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)ztrsm_outncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_utcopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)ztrsm_oltucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_ltcopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)ztrsm_oltncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_ltcopy_$(ZGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)xtrsm_iunucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_uncopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)xtrsm_iunncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_uncopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)xtrsm_ilnucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_lncopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)xtrsm_ilnncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_lncopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)xtrsm_iutucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_utcopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)xtrsm_iutncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_utcopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)xtrsm_iltucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_ltcopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)xtrsm_iltncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_ltcopy_$(XGEMM_UNROLL_M).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)xtrsm_ounucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_uncopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)xtrsm_ounncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_uncopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)xtrsm_olnucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_lncopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)xtrsm_olnncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_lncopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - -$(KDIR)xtrsm_outucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_utcopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@ - -$(KDIR)xtrsm_outncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_utcopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -ULOWER -UUNIT $< -o $@ - -$(KDIR)xtrsm_oltucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_ltcopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -DLOWER -DUNIT $< -o $@ - -$(KDIR)xtrsm_oltncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_ltcopy_$(XGEMM_UNROLL_N).c - $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ - - -##### BLAS extensions ###### - -ifndef DOMATCOPY_CN -DOMATCOPY_CN = ../arm/omatcopy_cn.c -endif - -$(KDIR)domatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DOMATCOPY_CN) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@ - -ifndef DOMATCOPY_RN -DOMATCOPY_RN = ../arm/omatcopy_rn.c -endif - -$(KDIR)domatcopy_k_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DOMATCOPY_RN) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -DROWM $< -o $@ - -ifndef DOMATCOPY_CT -DOMATCOPY_CT = ../arm/omatcopy_ct.c -endif - -$(KDIR)domatcopy_k_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DOMATCOPY_CT) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@ - -ifndef DOMATCOPY_RT -DOMATCOPY_RT = ../arm/omatcopy_rt.c -endif - -$(KDIR)domatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DOMATCOPY_RT) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -DROWM $< -o $@ - -ifndef DIMATCOPY_CN -DIMATCOPY_CN = ../generic/imatcopy_cn.c -endif - -$(KDIR)dimatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DIMATCOPY_CN) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@ - -ifndef DIMATCOPY_RN -DIMATCOPY_RN = ../generic/imatcopy_rn.c -endif - -$(KDIR)dimatcopy_k_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DIMATCOPY_RN) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -DROWM $< -o $@ - -ifndef DIMATCOPY_CT -DIMATCOPY_CT = ../generic/imatcopy_ct.c -endif - -$(KDIR)dimatcopy_k_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DIMATCOPY_CT) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@ - -ifndef DIMATCOPY_RT -DIMATCOPY_RT = ../generic/imatcopy_rt.c -endif - -$(KDIR)dimatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DIMATCOPY_RT) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -DROWM $< -o $@ - -ifndef SOMATCOPY_CN -SOMATCOPY_CN = ../arm/omatcopy_cn.c -endif - -$(KDIR)somatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SOMATCOPY_CN) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -UROWM $< -o $@ - -ifndef SOMATCOPY_RN -SOMATCOPY_RN = ../arm/omatcopy_rn.c -endif - -$(KDIR)somatcopy_k_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SOMATCOPY_RN) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DROWM $< -o $@ - -ifndef SOMATCOPY_CT -SOMATCOPY_CT = ../arm/omatcopy_ct.c -endif - -$(KDIR)somatcopy_k_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SOMATCOPY_CT) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -UROWM $< -o $@ - -ifndef SOMATCOPY_RT -SOMATCOPY_RT = ../arm/omatcopy_rt.c -endif - -$(KDIR)somatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SOMATCOPY_RT) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DROWM $< -o $@ - -ifndef SIMATCOPY_CN -SIMATCOPY_CN = ../generic/imatcopy_cn.c -endif - -$(KDIR)simatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SIMATCOPY_CN) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -UROWM $< -o $@ - -ifndef SIMATCOPY_RN -SIMATCOPY_RN = ../generic/imatcopy_rn.c -endif - -$(KDIR)simatcopy_k_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SIMATCOPY_RN) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DROWM $< -o $@ - -ifndef SIMATCOPY_CT -SIMATCOPY_CT = ../generic/imatcopy_ct.c -endif - -$(KDIR)simatcopy_k_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SIMATCOPY_CT) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -UROWM $< -o $@ - -ifndef SIMATCOPY_RT -SIMATCOPY_RT = ../generic/imatcopy_rt.c -endif - -$(KDIR)simatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SIMATCOPY_RT) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DROWM $< -o $@ - - -ifndef COMATCOPY_CN -COMATCOPY_CN = ../arm/zomatcopy_cn.c -endif - -$(KDIR)comatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_CN) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM -UCONJ $< -o $@ - -ifndef COMATCOPY_RN -COMATCOPY_RN = ../arm/zomatcopy_rn.c -endif - -$(KDIR)comatcopy_k_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_RN) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DROWM -UCONJ $< -o $@ - -ifndef COMATCOPY_CT -COMATCOPY_CT = ../arm/zomatcopy_ct.c -endif - -$(KDIR)comatcopy_k_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_CT) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM -UCONJ $< -o $@ - -ifndef COMATCOPY_RT -COMATCOPY_RT = ../arm/zomatcopy_rt.c -endif - -$(KDIR)comatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_RT) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DROWM -UCONJ $< -o $@ - -ifndef COMATCOPY_CNC -COMATCOPY_CNC = ../arm/zomatcopy_cnc.c -endif - -$(KDIR)comatcopy_k_cnc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_CNC) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM -DCONJ $< -o $@ - -ifndef COMATCOPY_RNC -COMATCOPY_RNC = ../arm/zomatcopy_rnc.c -endif - -$(KDIR)comatcopy_k_rnc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_RNC) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@ - -ifndef COMATCOPY_CTC -COMATCOPY_CTC = ../arm/zomatcopy_ctc.c -endif - -$(KDIR)comatcopy_k_ctc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_CTC) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM -DCONJ $< -o $@ - -ifndef COMATCOPY_RTC -COMATCOPY_RTC = ../arm/zomatcopy_rtc.c -endif - -$(KDIR)comatcopy_k_rtc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_RTC) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@ - -ifndef CIMATCOPY_CN -CIMATCOPY_CN = ../generic/zimatcopy_cn.c -endif - -$(KDIR)cimatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CIMATCOPY_CN) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM -UCONJ $< -o $@ - -ifndef CIMATCOPY_RN -CIMATCOPY_RN = ../generic/zimatcopy_rn.c -endif - -$(KDIR)cimatcopy_k_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CIMATCOPY_RN) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DROWM -UCONJ $< -o $@ - -ifndef CIMATCOPY_CT -CIMATCOPY_CT = ../generic/zimatcopy_ct.c -endif - -$(KDIR)cimatcopy_k_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CIMATCOPY_CT) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM -UCONJ $< -o $@ - -ifndef CIMATCOPY_RT -CIMATCOPY_RT = ../generic/zimatcopy_rt.c -endif - -$(KDIR)cimatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CIMATCOPY_RT) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DROWM -UCONJ $< -o $@ - -ifndef CIMATCOPY_CNC -CIMATCOPY_CNC = ../generic/zimatcopy_cnc.c -endif - -$(KDIR)cimatcopy_k_cnc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CIMATCOPY_CNC) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM -DCONJ $< -o $@ - -ifndef CIMATCOPY_RNC -CIMATCOPY_RNC = ../generic/zimatcopy_rnc.c -endif - -$(KDIR)cimatcopy_k_rnc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CIMATCOPY_RNC) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@ - -ifndef CIMATCOPY_CTC -CIMATCOPY_CTC = ../generic/zimatcopy_ctc.c -endif - -$(KDIR)cimatcopy_k_ctc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CIMATCOPY_CTC) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM -DCONJ $< -o $@ - -ifndef CIMATCOPY_RTC -CIMATCOPY_RTC = ../generic/zimatcopy_rtc.c -endif - -$(KDIR)cimatcopy_k_rtc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CIMATCOPY_RTC) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@ - - - -ifndef ZOMATCOPY_CN -ZOMATCOPY_CN = ../arm/zomatcopy_cn.c -endif - -$(KDIR)zomatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_CN) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM -UCONJ $< -o $@ - -ifndef ZOMATCOPY_RN -ZOMATCOPY_RN = ../arm/zomatcopy_rn.c -endif - -$(KDIR)zomatcopy_k_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_RN) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -UCONJ $< -o $@ - -ifndef ZOMATCOPY_CT -ZOMATCOPY_CT = ../arm/zomatcopy_ct.c -endif - -$(KDIR)zomatcopy_k_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_CT) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM -UCONJ $< -o $@ - -ifndef ZOMATCOPY_RT -ZOMATCOPY_RT = ../arm/zomatcopy_rt.c -endif - -$(KDIR)zomatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_RT) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -UCONJ $< -o $@ - -ifndef ZOMATCOPY_CNC -ZOMATCOPY_CNC = ../arm/zomatcopy_cnc.c -endif - -$(KDIR)zomatcopy_k_cnc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_CNC) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM -DCONJ $< -o $@ - -ifndef ZOMATCOPY_RNC -ZOMATCOPY_RNC = ../arm/zomatcopy_rnc.c -endif - -$(KDIR)zomatcopy_k_rnc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_RNC) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@ - -ifndef ZOMATCOPY_CTC -ZOMATCOPY_CTC = ../arm/zomatcopy_ctc.c -endif - -$(KDIR)zomatcopy_k_ctc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_CTC) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM -DCONJ $< -o $@ - -ifndef ZOMATCOPY_RTC -ZOMATCOPY_RTC = ../arm/zomatcopy_rtc.c -endif - -$(KDIR)zomatcopy_k_rtc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_RTC) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@ - -ifndef ZIMATCOPY_CN -ZIMATCOPY_CN = ../generic/zimatcopy_cn.c -endif - -$(KDIR)zimatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZIMATCOPY_CN) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM -UCONJ $< -o $@ - -ifndef ZIMATCOPY_RN -ZIMATCOPY_RN = ../generic/zimatcopy_rn.c -endif - -$(KDIR)zimatcopy_k_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZIMATCOPY_RN) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -UCONJ $< -o $@ - -ifndef ZIMATCOPY_CT -ZIMATCOPY_CT = ../generic/zimatcopy_ct.c -endif - -$(KDIR)zimatcopy_k_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZIMATCOPY_CT) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM -UCONJ $< -o $@ - -ifndef ZIMATCOPY_RT -ZIMATCOPY_RT = ../generic/zimatcopy_rt.c -endif - -$(KDIR)zimatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZIMATCOPY_RT) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -UCONJ $< -o $@ - -ifndef ZIMATCOPY_CNC -ZIMATCOPY_CNC = ../generic/zimatcopy_cnc.c -endif - -$(KDIR)zimatcopy_k_cnc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZIMATCOPY_CNC) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM -DCONJ $< -o $@ - -ifndef ZIMATCOPY_RNC -ZIMATCOPY_RNC = ../generic/zimatcopy_rnc.c -endif - -$(KDIR)zimatcopy_k_rnc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZIMATCOPY_RNC) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@ - -ifndef ZIMATCOPY_CTC -ZIMATCOPY_CTC = ../generic/zimatcopy_ctc.c -endif - -$(KDIR)zimatcopy_k_ctc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZIMATCOPY_CTC) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM -DCONJ $< -o $@ - -ifndef ZIMATCOPY_RTC -ZIMATCOPY_RTC = ../generic/zimatcopy_rtc.c -endif - -$(KDIR)zimatcopy_k_rtc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZIMATCOPY_RTC) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@ - - -ifndef SGEADD_K -SGEADD_K = ../generic/geadd.c -endif - -$(KDIR)sgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEADD_K) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -UROWM $< -o $@ - -ifndef DGEADD_K -DGEADD_K = ../generic/geadd.c -endif - -$(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEADD_K) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@ - -ifndef CGEADD_K -CGEADD_K = ../generic/zgeadd.c -endif - -$(KDIR)cgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEADD_K) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM $< -o $@ - -ifndef ZGEADD_K -ZGEADD_K = ../generic/zgeadd.c -endif - -$(KDIR)zgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEADD_K) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM $< -o $@ - - - -###### BLAS small matrix optimization ##### - -ifndef DGEMM_SMALL_M_PERMIT -DGEMM_SMALL_M_PERMIT = ../generic/gemm_small_matrix_permit.c -endif - -ifndef DGEMM_SMALL_K_NN -DGEMM_SMALL_K_NN = ../generic/gemm_small_matrix_kernel_nn.c -endif - -ifndef DGEMM_SMALL_K_NT -DGEMM_SMALL_K_NT = ../generic/gemm_small_matrix_kernel_nt.c -endif - -ifndef DGEMM_SMALL_K_TN -DGEMM_SMALL_K_TN = ../generic/gemm_small_matrix_kernel_tn.c -endif - -ifndef DGEMM_SMALL_K_TT -DGEMM_SMALL_K_TT = ../generic/gemm_small_matrix_kernel_tt.c -endif - -$(KDIR)dgemm_small_matrix_permit$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMM_SMALL_M_PERMIT) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)dgemm_small_kernel_nn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMM_SMALL_K_NN) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)dgemm_small_kernel_nt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMM_SMALL_K_NT) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)dgemm_small_kernel_tn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMM_SMALL_K_TN) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)dgemm_small_kernel_tt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMM_SMALL_K_TT) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ - -ifndef DGEMM_SMALL_K_B0_NN -DGEMM_SMALL_K_B0_NN = ../generic/gemm_small_matrix_kernel_nn.c -endif - -ifndef DGEMM_SMALL_K_B0_NT -DGEMM_SMALL_K_B0_NT = ../generic/gemm_small_matrix_kernel_nt.c -endif - -ifndef DGEMM_SMALL_K_B0_TN -DGEMM_SMALL_K_B0_TN = ../generic/gemm_small_matrix_kernel_tn.c -endif - -ifndef DGEMM_SMALL_K_B0_TT -DGEMM_SMALL_K_B0_TT = ../generic/gemm_small_matrix_kernel_tt.c -endif - -$(KDIR)dgemm_small_kernel_b0_nn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMM_SMALL_K_B0_NN) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -DB0 $< -o $@ - -$(KDIR)dgemm_small_kernel_b0_nt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMM_SMALL_K_B0_NT) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -DB0 $< -o $@ - -$(KDIR)dgemm_small_kernel_b0_tn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMM_SMALL_K_B0_TN) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -DB0 $< -o $@ - -$(KDIR)dgemm_small_kernel_b0_tt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMM_SMALL_K_B0_TT) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -DB0 $< -o $@ - -ifndef SGEMM_SMALL_M_PERMIT -SGEMM_SMALL_M_PERMIT = ../generic/gemm_small_matrix_permit.c -endif - -ifndef SGEMM_SMALL_K_NN -SGEMM_SMALL_K_NN = ../generic/gemm_small_matrix_kernel_nn.c -endif - -ifndef SGEMM_SMALL_K_NT -SGEMM_SMALL_K_NT = ../generic/gemm_small_matrix_kernel_nt.c -endif - -ifndef SGEMM_SMALL_K_TN -SGEMM_SMALL_K_TN = ../generic/gemm_small_matrix_kernel_tn.c -endif - -ifndef SGEMM_SMALL_K_TT -SGEMM_SMALL_K_TT = ../generic/gemm_small_matrix_kernel_tt.c -endif - -$(KDIR)sgemm_small_matrix_permit$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_SMALL_M_PERMIT) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)sgemm_small_kernel_nn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_SMALL_K_NN) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)sgemm_small_kernel_nt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_SMALL_K_NT) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)sgemm_small_kernel_tn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_SMALL_K_TN) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)sgemm_small_kernel_tt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_SMALL_K_TT) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ - -ifndef SGEMM_SMALL_K_B0_NN -SGEMM_SMALL_K_B0_NN = ../generic/gemm_small_matrix_kernel_nn.c -endif - -ifndef SGEMM_SMALL_K_B0_NT -SGEMM_SMALL_K_B0_NT = ../generic/gemm_small_matrix_kernel_nt.c -endif - -ifndef SGEMM_SMALL_K_B0_TN -SGEMM_SMALL_K_B0_TN = ../generic/gemm_small_matrix_kernel_tn.c -endif - -ifndef SGEMM_SMALL_K_B0_TT -SGEMM_SMALL_K_B0_TT = ../generic/gemm_small_matrix_kernel_tt.c -endif - -$(KDIR)sgemm_small_kernel_b0_nn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_SMALL_K_B0_NN) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DB0 $< -o $@ - -$(KDIR)sgemm_small_kernel_b0_nt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_SMALL_K_B0_NT) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DB0 $< -o $@ - -$(KDIR)sgemm_small_kernel_b0_tn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_SMALL_K_B0_TN) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DB0 $< -o $@ - -$(KDIR)sgemm_small_kernel_b0_tt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_SMALL_K_B0_TT) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DB0 $< -o $@ - - -ifeq ($(BUILD_BFLOAT16), 1) -ifndef SBGEMM_SMALL_M_PERMIT -SBGEMM_SMALL_M_PERMIT = ../generic/gemm_small_matrix_permit.c -endif - -ifndef SBGEMM_SMALL_K_NN -SBGEMM_SMALL_K_NN = ../generic/gemm_small_matrix_kernel_nn.c -endif - -ifndef SBGEMM_SMALL_K_NT -SBGEMM_SMALL_K_NT = ../generic/gemm_small_matrix_kernel_nt.c -endif - -ifndef SBGEMM_SMALL_K_TN -SBGEMM_SMALL_K_TN = ../generic/gemm_small_matrix_kernel_tn.c -endif - -ifndef SBGEMM_SMALL_K_TT -SBGEMM_SMALL_K_TT = ../generic/gemm_small_matrix_kernel_tt.c -endif - -$(KDIR)sbgemm_small_matrix_permit$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMM_SMALL_M_PERMIT) - $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)sbgemm_small_kernel_nn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMM_SMALL_K_NN) - $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)sbgemm_small_kernel_nt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMM_SMALL_K_NT) - $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)sbgemm_small_kernel_tn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMM_SMALL_K_TN) - $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ - -$(KDIR)sbgemm_small_kernel_tt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMM_SMALL_K_TT) - $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ - -ifndef SBGEMM_SMALL_K_B0_NN -SBGEMM_SMALL_K_B0_NN = ../generic/gemm_small_matrix_kernel_nn.c -endif - -ifndef SBGEMM_SMALL_K_B0_NT -SBGEMM_SMALL_K_B0_NT = ../generic/gemm_small_matrix_kernel_nt.c -endif - -ifndef SBGEMM_SMALL_K_B0_TN -SBGEMM_SMALL_K_B0_TN = ../generic/gemm_small_matrix_kernel_tn.c -endif - -ifndef SBGEMM_SMALL_K_B0_TT -SBGEMM_SMALL_K_B0_TT = ../generic/gemm_small_matrix_kernel_tt.c -endif - -$(KDIR)sbgemm_small_kernel_b0_nn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMM_SMALL_K_B0_NN) - $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX -DB0 $< -o $@ - -$(KDIR)sbgemm_small_kernel_b0_nt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMM_SMALL_K_B0_NT) - $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX -DB0 $< -o $@ - -$(KDIR)sbgemm_small_kernel_b0_tn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMM_SMALL_K_B0_TN) - $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX -DB0 $< -o $@ - -$(KDIR)sbgemm_small_kernel_b0_tt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMM_SMALL_K_B0_TT) - $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX -DB0 $< -o $@ -endif - -ifndef CGEMM_SMALL_M_PERMIT -CGEMM_SMALL_M_PERMIT = ../generic/zgemm_small_matrix_permit.c -endif - -ifndef CGEMM_SMALL_K_NN -CGEMM_SMALL_K_NN = ../generic/zgemm_small_matrix_kernel_nn.c -endif - -ifndef CGEMM_SMALL_K_NT -CGEMM_SMALL_K_NT = ../generic/zgemm_small_matrix_kernel_nt.c -endif - -ifndef CGEMM_SMALL_K_TN -CGEMM_SMALL_K_TN = ../generic/zgemm_small_matrix_kernel_tn.c -endif - -ifndef CGEMM_SMALL_K_TT -CGEMM_SMALL_K_TT = ../generic/zgemm_small_matrix_kernel_tt.c -endif - -$(KDIR)cgemm_small_matrix_permit$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_M_PERMIT) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX $< -o $@ - -$(KDIR)cgemm_small_kernel_nn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_NN) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNN $< -o $@ - -$(KDIR)cgemm_small_kernel_nr$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_NN) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNR $< -o $@ - -$(KDIR)cgemm_small_kernel_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_NN) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DRN $< -o $@ - -$(KDIR)cgemm_small_kernel_rr$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_NN) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DRR $< -o $@ - -$(KDIR)cgemm_small_kernel_nt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_NT) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNT $< -o $@ - -$(KDIR)cgemm_small_kernel_nc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_NT) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNC $< -o $@ - -$(KDIR)cgemm_small_kernel_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_NT) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DRT $< -o $@ - -$(KDIR)cgemm_small_kernel_rc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_NT) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DRC=RC $< -o $@ - -$(KDIR)cgemm_small_kernel_tn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_TN) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DTN $< -o $@ - -$(KDIR)cgemm_small_kernel_tr$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_TN) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DTR $< -o $@ - -$(KDIR)cgemm_small_kernel_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_TN) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCN $< -o $@ - -$(KDIR)cgemm_small_kernel_cr$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_TN) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCR=CR $< -o $@ - -$(KDIR)cgemm_small_kernel_tt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_TT) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DTT $< -o $@ - -$(KDIR)cgemm_small_kernel_tc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_TT) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DTC $< -o $@ - -$(KDIR)cgemm_small_kernel_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_TT) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCT $< -o $@ - -$(KDIR)cgemm_small_kernel_cc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_TT) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCC $< -o $@ - -ifndef CGEMM_SMALL_K_B0_NN -CGEMM_SMALL_K_B0_NN = ../generic/zgemm_small_matrix_kernel_nn.c -endif - -ifndef CGEMM_SMALL_K_B0_NT -CGEMM_SMALL_K_B0_NT = ../generic/zgemm_small_matrix_kernel_nt.c -endif - -ifndef CGEMM_SMALL_K_B0_TN -CGEMM_SMALL_K_B0_TN = ../generic/zgemm_small_matrix_kernel_tn.c -endif - -ifndef CGEMM_SMALL_K_B0_TT -CGEMM_SMALL_K_B0_TT = ../generic/zgemm_small_matrix_kernel_tt.c -endif - -$(KDIR)cgemm_small_kernel_b0_nn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_NN) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNN -DB0 $< -o $@ - -$(KDIR)cgemm_small_kernel_b0_nr$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_NN) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNR -DB0 $< -o $@ - -$(KDIR)cgemm_small_kernel_b0_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_NN) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DRN -DB0 $< -o $@ - -$(KDIR)cgemm_small_kernel_b0_rr$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_NN) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DRR -DB0 $< -o $@ - -$(KDIR)cgemm_small_kernel_b0_nt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_NT) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNT -DB0 $< -o $@ - -$(KDIR)cgemm_small_kernel_b0_nc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_NT) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNC -DB0 $< -o $@ - -$(KDIR)cgemm_small_kernel_b0_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_NT) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DRT -DB0 $< -o $@ - -$(KDIR)cgemm_small_kernel_b0_rc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_NT) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DRC=RC -DB0 $< -o $@ - -$(KDIR)cgemm_small_kernel_b0_tn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_TN) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DTN -DB0 $< -o $@ - -$(KDIR)cgemm_small_kernel_b0_tr$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_TN) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DTR -DB0 $< -o $@ - -$(KDIR)cgemm_small_kernel_b0_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_TN) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCN -DB0 $< -o $@ - -$(KDIR)cgemm_small_kernel_b0_cr$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_TN) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCR=CR -DB0 $< -o $@ - -$(KDIR)cgemm_small_kernel_b0_tt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_TT) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DTT -DB0 $< -o $@ - -$(KDIR)cgemm_small_kernel_b0_tc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_TT) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DTC -DB0 $< -o $@ - -$(KDIR)cgemm_small_kernel_b0_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_TT) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCT -DB0 $< -o $@ - -$(KDIR)cgemm_small_kernel_b0_cc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM_SMALL_K_B0_TT) - $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCC -DB0 $< -o $@ - -ifndef ZGEMM_SMALL_M_PERMIT -ZGEMM_SMALL_M_PERMIT = ../generic/zgemm_small_matrix_permit.c -endif - -ifndef ZGEMM_SMALL_K_NN -ZGEMM_SMALL_K_NN = ../generic/zgemm_small_matrix_kernel_nn.c -endif - -ifndef ZGEMM_SMALL_K_NT -ZGEMM_SMALL_K_NT = ../generic/zgemm_small_matrix_kernel_nt.c -endif - -ifndef ZGEMM_SMALL_K_TN -ZGEMM_SMALL_K_TN = ../generic/zgemm_small_matrix_kernel_tn.c -endif - -ifndef ZGEMM_SMALL_K_TT -ZGEMM_SMALL_K_TT = ../generic/zgemm_small_matrix_kernel_tt.c -endif - -$(KDIR)zgemm_small_matrix_permit$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_M_PERMIT) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX $< -o $@ - - -$(KDIR)zgemm_small_kernel_nn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_NN) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNN $< -o $@ - -$(KDIR)zgemm_small_kernel_nr$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_NN) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNR $< -o $@ - -$(KDIR)zgemm_small_kernel_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_NN) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DRN $< -o $@ - -$(KDIR)zgemm_small_kernel_rr$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_NN) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DRR $< -o $@ - -$(KDIR)zgemm_small_kernel_nt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_NT) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNT $< -o $@ - -$(KDIR)zgemm_small_kernel_nc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_NT) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNC $< -o $@ - -$(KDIR)zgemm_small_kernel_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_NT) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DRT $< -o $@ - -$(KDIR)zgemm_small_kernel_rc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_NT) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DRC=RC $< -o $@ - -$(KDIR)zgemm_small_kernel_tn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_TN) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DTN $< -o $@ - -$(KDIR)zgemm_small_kernel_tr$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_TN) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DTR $< -o $@ - -$(KDIR)zgemm_small_kernel_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_TN) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCN $< -o $@ - -$(KDIR)zgemm_small_kernel_cr$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_TN) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCR=CR $< -o $@ - -$(KDIR)zgemm_small_kernel_tt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_TT) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DTT $< -o $@ - -$(KDIR)zgemm_small_kernel_tc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_TT) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DTC $< -o $@ - -$(KDIR)zgemm_small_kernel_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_TT) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCT $< -o $@ - -$(KDIR)zgemm_small_kernel_cc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_TT) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCC $< -o $@ - -ifndef ZGEMM_SMALL_K_B0_NN -ZGEMM_SMALL_K_B0_NN = ../generic/zgemm_small_matrix_kernel_nn.c -endif - -ifndef ZGEMM_SMALL_K_B0_NT -ZGEMM_SMALL_K_B0_NT = ../generic/zgemm_small_matrix_kernel_nt.c -endif - -ifndef ZGEMM_SMALL_K_B0_TN -ZGEMM_SMALL_K_B0_TN = ../generic/zgemm_small_matrix_kernel_tn.c -endif - -ifndef ZGEMM_SMALL_K_B0_TT -ZGEMM_SMALL_K_B0_TT = ../generic/zgemm_small_matrix_kernel_tt.c -endif - -$(KDIR)zgemm_small_kernel_b0_nn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_NN) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNN -DB0 $< -o $@ - -$(KDIR)zgemm_small_kernel_b0_nr$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_NN) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNR -DB0 $< -o $@ - -$(KDIR)zgemm_small_kernel_b0_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_NN) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DRN -DB0 $< -o $@ - -$(KDIR)zgemm_small_kernel_b0_rr$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_NN) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DRR -DB0 $< -o $@ - -$(KDIR)zgemm_small_kernel_b0_nt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_NT) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNT -DB0 $< -o $@ - -$(KDIR)zgemm_small_kernel_b0_nc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_NT) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNC -DB0 $< -o $@ - -$(KDIR)zgemm_small_kernel_b0_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_NT) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DRT -DB0 $< -o $@ - -$(KDIR)zgemm_small_kernel_b0_rc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_NT) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DRC=RC -DB0 $< -o $@ - -$(KDIR)zgemm_small_kernel_b0_tn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_TN) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DTN -DB0 $< -o $@ - -$(KDIR)zgemm_small_kernel_b0_tr$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_TN) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DTR -DB0 $< -o $@ - -$(KDIR)zgemm_small_kernel_b0_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_TN) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCN -DB0 $< -o $@ - -$(KDIR)zgemm_small_kernel_b0_cr$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_TN) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCR=CR -DB0 $< -o $@ - -$(KDIR)zgemm_small_kernel_b0_tt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_TT) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DTT -DB0 $< -o $@ - -$(KDIR)zgemm_small_kernel_b0_tc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_TT) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DTC -DB0 $< -o $@ - -$(KDIR)zgemm_small_kernel_b0_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_TT) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCT -DB0 $< -o $@ - -$(KDIR)zgemm_small_kernel_b0_cc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_SMALL_K_B0_TT) - $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCC -DB0 $< -o $@ From cd8eb83bae95f0ccc5308d1612b60d505847296e Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 5 Nov 2023 18:13:37 +0100 Subject: [PATCH 098/125] Fix allocations and compiler warnings in ZROTG (#4289) * Clean up ZROTG --- interface/zrotg.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/interface/zrotg.c b/interface/zrotg.c index 4d2a9d510..ea73352dd 100644 --- a/interface/zrotg.c +++ b/interface/zrotg.c @@ -30,14 +30,12 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { FLOAT db_r = *(DB+0); FLOAT db_i = *(DB+1); //long double r; - FLOAT *r, *S1=(FLOAT *)malloc(2*sizeof(FLOAT)); - FLOAT *R=(FLOAT *)malloc(2*sizeof(FLOAT)); + FLOAT S1[2]; + FLOAT R[2]; long double d; FLOAT ada = da_r * da_r + da_i * da_i; FLOAT adb = db_r * db_r + db_i * db_i; - FLOAT adart = sqrt( da_r * da_r + da_i * da_i); - FLOAT adbrt = sqrt( db_r * db_r + db_i * db_i); PRINT_DEBUG_NAME; @@ -115,10 +113,13 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { } } else { *C = ada / adahsq; - if (*C >= safmin) + if (*C >= safmin) { *R = *DA / *C; - else + *(R+1) = *(DA+1) / *(C+1); + } else { *R = *DA * (h / adahsq); + *(R+1) = *(DA+1) * (h / adahsq); + } *S = *S1 * ada / adahsq; *(S+1) = *(S1+1) * ada / adahsq; } @@ -178,4 +179,4 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { } } } - \ No newline at end of file + From ac7efc61fd41dd4f93bcf471bc8687e96fa882ac Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 6 Nov 2023 00:03:33 +0100 Subject: [PATCH 099/125] Put more build information into Makefile.conf_last --- Makefile | 20 ++++++++++++++++---- Makefile.install | 8 ++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 8621a8b3f..5f6643402 100644 --- a/Makefile +++ b/Makefile @@ -35,11 +35,7 @@ export NO_LAPACK export C_LAPACK endif -ifeq ($(F_COMPILER),CRAY) -LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -Og -Os,$(LAPACK_FFLAGS)) -else LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -O -Og -Os,$(LAPACK_FFLAGS)) -endif SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench cpp_thread_test @@ -210,9 +206,25 @@ ifeq ($(DYNAMIC_OLDER), 1) @echo DYNAMIC_OLDER=1 >> Makefile.conf_last endif endif + @echo TARGET=$(CORE) >> Makefile.conf_last ifdef USE_THREAD @echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last endif +ifdef SMP +ifdef NUM_THREADS + @echo NUM_THREADS=$(NUM_THREADS) >> Makefile.conf_last +else + @echo NUM_THREADS=$(NUM_CORES) >> Makefile.conf_last +endif +endif +ifeq ($(USE_OPENMP),1) + @echo USE_OPENMP=1 >> Makefile.conf_last +endif +ifeq ($(INTERFACE64),1) + @echo INTERFACE64=1 >> Makefile.conf_last +endif + @echo THELIBNAME=$(LIBNAME) >> Makefile.conf_last + @echo THELIBSONAME=$(LIBSONAME) >> Makefile.conf_last @-ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX) @touch lib.grd diff --git a/Makefile.install b/Makefile.install index 01899b970..81f959177 100644 --- a/Makefile.install +++ b/Makefile.install @@ -3,6 +3,14 @@ export GOTOBLAS_MAKEFILE = 1 -include $(TOPDIR)/Makefile.conf_last include ./Makefile.system +ifdef THELIBNAME +LIBNAME=$(THELIBNAME) +LIBSONAME=$(THELIBSONAME) +endif +ifeq ($(INTERFACE64),1) +USE_64BITINT=1 +endif + PREFIX ?= /opt/OpenBLAS OPENBLAS_INCLUDE_DIR := $(PREFIX)/include From cf8295da5ca2774a04fed14a89d7dc0de25f9146 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 6 Nov 2023 21:22:26 +0100 Subject: [PATCH 100/125] Fix potential overflow in the calculation of MAXIT --- lapack-netlib/SRC/cbdsqr.f | 34 +++++++++++++++++++++++++--------- lapack-netlib/SRC/zbdsqr.f | 34 +++++++++++++++++++++++++--------- 2 files changed, 50 insertions(+), 18 deletions(-) diff --git a/lapack-netlib/SRC/cbdsqr.f b/lapack-netlib/SRC/cbdsqr.f index 40706644e..cf1459ad2 100644 --- a/lapack-netlib/SRC/cbdsqr.f +++ b/lapack-netlib/SRC/cbdsqr.f @@ -204,6 +204,17 @@ *> algorithm through its inner loop. The algorithms stops *> (and so fails to converge) if the number of passes *> through the inner loop exceeds MAXITR*N**2. +*> +*> \endverbatim +* +*> \par Note: +* =========== +*> +*> \verbatim +*> Bug report from Cezary Dendek. +*> On November 3rd 2023, the INTEGER variable MAXIT = MAXITR*N**2 is +*> removed since it can overflow pretty easily (for N larger or equal +*> than 18,919). We instead use MAXITDIVN = MAXITR*N. *> \endverbatim * * Authors: @@ -214,7 +225,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup bdsqr * * ===================================================================== SUBROUTINE CBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, @@ -255,8 +266,8 @@ * .. * .. Local Scalars .. LOGICAL LOWER, ROTATE - INTEGER I, IDIR, ISUB, ITER, J, LL, LLL, M, MAXIT, NM1, - $ NM12, NM13, OLDLL, OLDM + INTEGER I, IDIR, ISUB, ITER, ITERDIVN, J, LL, LLL, M, + $ MAXITDIVN, NM1, NM12, NM13, OLDLL, OLDM REAL ABSE, ABSS, COSL, COSR, CS, EPS, F, G, H, MU, $ OLDCS, OLDSN, R, SHIFT, SIGMN, SIGMX, SINL, $ SINR, SLL, SMAX, SMIN, SMINOA, @@ -389,20 +400,21 @@ 40 CONTINUE 50 CONTINUE SMINOA = SMINOA / SQRT( REAL( N ) ) - THRESH = MAX( TOL*SMINOA, MAXITR*N*N*UNFL ) + THRESH = MAX( TOL*SMINOA, MAXITR*(N*(N*UNFL)) ) ELSE * * Absolute accuracy desired * - THRESH = MAX( ABS( TOL )*SMAX, MAXITR*N*N*UNFL ) + THRESH = MAX( ABS( TOL )*SMAX, MAXITR*(N*(N*UNFL)) ) END IF * * Prepare for main iteration loop for the singular values * (MAXIT is the maximum number of passes through the inner * loop permitted before nonconvergence signalled.) * - MAXIT = MAXITR*N*N - ITER = 0 + MAXITDIVN = MAXITR*N + ITERDIVN = 0 + ITER = -1 OLDLL = -1 OLDM = -1 * @@ -418,8 +430,12 @@ * IF( M.LE.1 ) $ GO TO 160 - IF( ITER.GT.MAXIT ) - $ GO TO 200 + IF( ITER.GE.N ) THEN + ITER = ITER - N + ITERDIVN = ITERDIVN + 1 + IF( ITERDIVN.GE.MAXITDIVN ) + $ GO TO 200 + END IF * * Find diagonal block of matrix to work on * diff --git a/lapack-netlib/SRC/zbdsqr.f b/lapack-netlib/SRC/zbdsqr.f index faedafc3c..865bb9dd5 100644 --- a/lapack-netlib/SRC/zbdsqr.f +++ b/lapack-netlib/SRC/zbdsqr.f @@ -204,6 +204,17 @@ *> algorithm through its inner loop. The algorithms stops *> (and so fails to converge) if the number of passes *> through the inner loop exceeds MAXITR*N**2. +*> +*> \endverbatim +* +*> \par Note: +* =========== +*> +*> \verbatim +*> Bug report from Cezary Dendek. +*> On November 3rd 2023, the INTEGER variable MAXIT = MAXITR*N**2 is +*> removed since it can overflow pretty easily (for N larger or equal +*> than 18,919). We instead use MAXITDIVN = MAXITR*N. *> \endverbatim * * Authors: @@ -214,7 +225,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERcomputational +*> \ingroup bdsqr * * ===================================================================== SUBROUTINE ZBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, @@ -255,8 +266,8 @@ * .. * .. Local Scalars .. LOGICAL LOWER, ROTATE - INTEGER I, IDIR, ISUB, ITER, J, LL, LLL, M, MAXIT, NM1, - $ NM12, NM13, OLDLL, OLDM + INTEGER I, IDIR, ISUB, ITER, ITERDIVN, J, LL, LLL, M, + $ MAXITDIVN, NM1, NM12, NM13, OLDLL, OLDM DOUBLE PRECISION ABSE, ABSS, COSL, COSR, CS, EPS, F, G, H, MU, $ OLDCS, OLDSN, R, SHIFT, SIGMN, SIGMX, SINL, $ SINR, SLL, SMAX, SMIN, SMINOA, @@ -389,20 +400,21 @@ 40 CONTINUE 50 CONTINUE SMINOA = SMINOA / SQRT( DBLE( N ) ) - THRESH = MAX( TOL*SMINOA, MAXITR*N*N*UNFL ) + THRESH = MAX( TOL*SMINOA, MAXITR*(N*(N*UNFL)) ) ELSE * * Absolute accuracy desired * - THRESH = MAX( ABS( TOL )*SMAX, MAXITR*N*N*UNFL ) + THRESH = MAX( ABS( TOL )*SMAX, MAXITR*(N*(N*UNFL)) ) END IF * * Prepare for main iteration loop for the singular values * (MAXIT is the maximum number of passes through the inner * loop permitted before nonconvergence signalled.) * - MAXIT = MAXITR*N*N - ITER = 0 + MAXITDIVN = MAXITR*N + ITERDIVN = 0 + ITER = -1 OLDLL = -1 OLDM = -1 * @@ -418,8 +430,12 @@ * IF( M.LE.1 ) $ GO TO 160 - IF( ITER.GT.MAXIT ) - $ GO TO 200 + IF( ITER.GE.N ) THEN + ITER = ITER - N + ITERDIVN = ITERDIVN + 1 + IF( ITERDIVN.GE.MAXITDIVN ) + $ GO TO 200 + END IF * * Find diagonal block of matrix to work on * From f6ec777701d5d5e8271c9c271e260873c9b05911 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 6 Nov 2023 21:40:50 +0100 Subject: [PATCH 101/125] Fix accumulation (Reference-LAPACK PR 909) --- lapack-netlib/SRC/classq.f90 | 81 ++++++++++++++++++------------------ lapack-netlib/SRC/dlassq.f90 | 81 ++++++++++++++++++------------------ lapack-netlib/SRC/slassq.f90 | 81 ++++++++++++++++++------------------ lapack-netlib/SRC/zlassq.f90 | 81 ++++++++++++++++++------------------ 4 files changed, 160 insertions(+), 164 deletions(-) diff --git a/lapack-netlib/SRC/classq.f90 b/lapack-netlib/SRC/classq.f90 index cb4e7971f..c5f793cc0 100644 --- a/lapack-netlib/SRC/classq.f90 +++ b/lapack-netlib/SRC/classq.f90 @@ -34,28 +34,15 @@ !> !> \verbatim !> -!> CLASSQ returns the values scl and smsq such that +!> CLASSQ returns the values scale_out and sumsq_out such that !> -!> ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, +!> (scale_out**2)*sumsq_out = x( 1 )**2 +...+ x( n )**2 + (scale**2)*sumsq, !> -!> where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is +!> where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is !> assumed to be non-negative. !> !> scale and sumsq must be supplied in SCALE and SUMSQ and -!> scl and smsq are overwritten on SCALE and SUMSQ respectively. -!> -!> If scale * sqrt( sumsq ) > tbig then -!> we require: scale >= sqrt( TINY*EPS ) / sbig on entry, -!> and if 0 < scale * sqrt( sumsq ) < tsml then -!> we require: scale <= sqrt( HUGE ) / ssml on entry, -!> where -!> tbig -- upper threshold for values whose square is representable; -!> sbig -- scaling constant for big numbers; \see la_constants.f90 -!> tsml -- lower threshold for values whose square is representable; -!> ssml -- scaling constant for small numbers; \see la_constants.f90 -!> and -!> TINY*EPS -- tiniest representable number; -!> HUGE -- biggest representable number. +!> scale_out and sumsq_out are overwritten on SCALE and SUMSQ respectively. !> !> \endverbatim ! @@ -72,7 +59,7 @@ !> \verbatim !> X is COMPLEX array, dimension (1+(N-1)*abs(INCX)) !> The vector for which a scaled sum of squares is computed. -!> x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. +!> x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. !> \endverbatim !> !> \param[in] INCX @@ -82,24 +69,24 @@ !> If INCX > 0, X(1+(i-1)*INCX) = x(i) for 1 <= i <= n !> If INCX < 0, X(1-(n-i)*INCX) = x(i) for 1 <= i <= n !> If INCX = 0, x isn't a vector so there is no need to call -!> this subroutine. If you call it anyway, it will count x(1) +!> this subroutine. If you call it anyway, it will count x(1) !> in the vector norm N times. !> \endverbatim !> !> \param[in,out] SCALE !> \verbatim !> SCALE is REAL -!> On entry, the value scale in the equation above. -!> On exit, SCALE is overwritten with scl , the scaling factor +!> On entry, the value scale in the equation above. +!> On exit, SCALE is overwritten by scale_out, the scaling factor !> for the sum of squares. !> \endverbatim !> !> \param[in,out] SUMSQ !> \verbatim !> SUMSQ is REAL -!> On entry, the value sumsq in the equation above. -!> On exit, SUMSQ is overwritten with smsq , the basic sum of -!> squares from which scl has been factored out. +!> On entry, the value sumsq in the equation above. +!> On exit, SUMSQ is overwritten by sumsq_out, the basic sum of +!> squares from which scale_out has been factored out. !> \endverbatim ! ! Authors: @@ -130,10 +117,10 @@ !> !> \endverbatim ! -!> \ingroup OTHERauxiliary +!> \ingroup lassq ! ! ===================================================================== -subroutine CLASSQ( n, x, incx, scl, sumsq ) +subroutine CLASSQ( n, x, incx, scale, sumsq ) use LA_CONSTANTS, & only: wp=>sp, zero=>szero, one=>sone, & sbig=>ssbig, ssml=>sssml, tbig=>stbig, tsml=>stsml @@ -145,7 +132,7 @@ subroutine CLASSQ( n, x, incx, scl, sumsq ) ! ! .. Scalar Arguments .. integer :: incx, n - real(wp) :: scl, sumsq + real(wp) :: scale, sumsq ! .. ! .. Array Arguments .. complex(wp) :: x(*) @@ -158,10 +145,10 @@ subroutine CLASSQ( n, x, incx, scl, sumsq ) ! ! Quick return if possible ! - if( LA_ISNAN(scl) .or. LA_ISNAN(sumsq) ) return - if( sumsq == zero ) scl = one - if( scl == zero ) then - scl = one + if( LA_ISNAN(scale) .or. LA_ISNAN(sumsq) ) return + if( sumsq == zero ) scale = one + if( scale == zero ) then + scale = one sumsq = zero end if if (n <= 0) then @@ -207,15 +194,27 @@ subroutine CLASSQ( n, x, incx, scl, sumsq ) ! Put the existing sum of squares into one of the accumulators ! if( sumsq > zero ) then - ax = scl*sqrt( sumsq ) + ax = scale*sqrt( sumsq ) if (ax > tbig) then -! We assume scl >= sqrt( TINY*EPS ) / sbig - abig = abig + (scl*sbig)**2 * sumsq + if (scale > one) then + scale = scale * sbig + abig = abig + scale * (scale * sumsq) + else + ! sumsq > tbig^2 => (sbig * (sbig * sumsq)) is representable + abig = abig + scale * (scale * (sbig * (sbig * sumsq))) + end if else if (ax < tsml) then -! We assume scl <= sqrt( HUGE ) / ssml - if (notbig) asml = asml + (scl*ssml)**2 * sumsq + if (notbig) then + if (scale < one) then + scale = scale * ssml + asml = asml + scale * (scale * sumsq) + else + ! sumsq < tsml^2 => (ssml * (ssml * sumsq)) is representable + asml = asml + scale * (scale * (ssml * (ssml * sumsq))) + end if + end if else - amed = amed + scl**2 * sumsq + amed = amed + scale * (scale * sumsq) end if end if ! @@ -229,7 +228,7 @@ subroutine CLASSQ( n, x, incx, scl, sumsq ) if (amed > zero .or. LA_ISNAN(amed)) then abig = abig + (amed*sbig)*sbig end if - scl = one / sbig + scale = one / sbig sumsq = abig else if (asml > zero) then ! @@ -245,17 +244,17 @@ subroutine CLASSQ( n, x, incx, scl, sumsq ) ymin = asml ymax = amed end if - scl = one + scale = one sumsq = ymax**2*( one + (ymin/ymax)**2 ) else - scl = one / ssml + scale = one / ssml sumsq = asml end if else ! ! Otherwise all values are mid-range or zero ! - scl = one + scale = one sumsq = amed end if return diff --git a/lapack-netlib/SRC/dlassq.f90 b/lapack-netlib/SRC/dlassq.f90 index fddd1bf38..37626844b 100644 --- a/lapack-netlib/SRC/dlassq.f90 +++ b/lapack-netlib/SRC/dlassq.f90 @@ -34,28 +34,15 @@ !> !> \verbatim !> -!> DLASSQ returns the values scl and smsq such that +!> DLASSQ returns the values scale_out and sumsq_out such that !> -!> ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, +!> (scale_out**2)*sumsq_out = x( 1 )**2 +...+ x( n )**2 + (scale**2)*sumsq, !> -!> where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is +!> where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is !> assumed to be non-negative. !> !> scale and sumsq must be supplied in SCALE and SUMSQ and -!> scl and smsq are overwritten on SCALE and SUMSQ respectively. -!> -!> If scale * sqrt( sumsq ) > tbig then -!> we require: scale >= sqrt( TINY*EPS ) / sbig on entry, -!> and if 0 < scale * sqrt( sumsq ) < tsml then -!> we require: scale <= sqrt( HUGE ) / ssml on entry, -!> where -!> tbig -- upper threshold for values whose square is representable; -!> sbig -- scaling constant for big numbers; \see la_constants.f90 -!> tsml -- lower threshold for values whose square is representable; -!> ssml -- scaling constant for small numbers; \see la_constants.f90 -!> and -!> TINY*EPS -- tiniest representable number; -!> HUGE -- biggest representable number. +!> scale_out and sumsq_out are overwritten on SCALE and SUMSQ respectively. !> !> \endverbatim ! @@ -72,7 +59,7 @@ !> \verbatim !> X is DOUBLE PRECISION array, dimension (1+(N-1)*abs(INCX)) !> The vector for which a scaled sum of squares is computed. -!> x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. +!> x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. !> \endverbatim !> !> \param[in] INCX @@ -82,24 +69,24 @@ !> If INCX > 0, X(1+(i-1)*INCX) = x(i) for 1 <= i <= n !> If INCX < 0, X(1-(n-i)*INCX) = x(i) for 1 <= i <= n !> If INCX = 0, x isn't a vector so there is no need to call -!> this subroutine. If you call it anyway, it will count x(1) +!> this subroutine. If you call it anyway, it will count x(1) !> in the vector norm N times. !> \endverbatim !> !> \param[in,out] SCALE !> \verbatim !> SCALE is DOUBLE PRECISION -!> On entry, the value scale in the equation above. -!> On exit, SCALE is overwritten with scl , the scaling factor +!> On entry, the value scale in the equation above. +!> On exit, SCALE is overwritten by scale_out, the scaling factor !> for the sum of squares. !> \endverbatim !> !> \param[in,out] SUMSQ !> \verbatim !> SUMSQ is DOUBLE PRECISION -!> On entry, the value sumsq in the equation above. -!> On exit, SUMSQ is overwritten with smsq , the basic sum of -!> squares from which scl has been factored out. +!> On entry, the value sumsq in the equation above. +!> On exit, SUMSQ is overwritten by sumsq_out, the basic sum of +!> squares from which scale_out has been factored out. !> \endverbatim ! ! Authors: @@ -130,10 +117,10 @@ !> !> \endverbatim ! -!> \ingroup OTHERauxiliary +!> \ingroup lassq ! ! ===================================================================== -subroutine DLASSQ( n, x, incx, scl, sumsq ) +subroutine DLASSQ( n, x, incx, scale, sumsq ) use LA_CONSTANTS, & only: wp=>dp, zero=>dzero, one=>done, & sbig=>dsbig, ssml=>dssml, tbig=>dtbig, tsml=>dtsml @@ -145,7 +132,7 @@ subroutine DLASSQ( n, x, incx, scl, sumsq ) ! ! .. Scalar Arguments .. integer :: incx, n - real(wp) :: scl, sumsq + real(wp) :: scale, sumsq ! .. ! .. Array Arguments .. real(wp) :: x(*) @@ -158,10 +145,10 @@ subroutine DLASSQ( n, x, incx, scl, sumsq ) ! ! Quick return if possible ! - if( LA_ISNAN(scl) .or. LA_ISNAN(sumsq) ) return - if( sumsq == zero ) scl = one - if( scl == zero ) then - scl = one + if( LA_ISNAN(scale) .or. LA_ISNAN(sumsq) ) return + if( sumsq == zero ) scale = one + if( scale == zero ) then + scale = one sumsq = zero end if if (n <= 0) then @@ -198,15 +185,27 @@ subroutine DLASSQ( n, x, incx, scl, sumsq ) ! Put the existing sum of squares into one of the accumulators ! if( sumsq > zero ) then - ax = scl*sqrt( sumsq ) + ax = scale*sqrt( sumsq ) if (ax > tbig) then -! We assume scl >= sqrt( TINY*EPS ) / sbig - abig = abig + (scl*sbig)**2 * sumsq + if (scale > one) then + scale = scale * sbig + abig = abig + scale * (scale * sumsq) + else + ! sumsq > tbig^2 => (sbig * (sbig * sumsq)) is representable + abig = abig + scale * (scale * (sbig * (sbig * sumsq))) + end if else if (ax < tsml) then -! We assume scl <= sqrt( HUGE ) / ssml - if (notbig) asml = asml + (scl*ssml)**2 * sumsq + if (notbig) then + if (scale < one) then + scale = scale * ssml + asml = asml + scale * (scale * sumsq) + else + ! sumsq < tsml^2 => (ssml * (ssml * sumsq)) is representable + asml = asml + scale * (scale * (ssml * (ssml * sumsq))) + end if + end if else - amed = amed + scl**2 * sumsq + amed = amed + scale * (scale * sumsq) end if end if ! @@ -220,7 +219,7 @@ subroutine DLASSQ( n, x, incx, scl, sumsq ) if (amed > zero .or. LA_ISNAN(amed)) then abig = abig + (amed*sbig)*sbig end if - scl = one / sbig + scale = one / sbig sumsq = abig else if (asml > zero) then ! @@ -236,17 +235,17 @@ subroutine DLASSQ( n, x, incx, scl, sumsq ) ymin = asml ymax = amed end if - scl = one + scale = one sumsq = ymax**2*( one + (ymin/ymax)**2 ) else - scl = one / ssml + scale = one / ssml sumsq = asml end if else ! ! Otherwise all values are mid-range or zero ! - scl = one + scale = one sumsq = amed end if return diff --git a/lapack-netlib/SRC/slassq.f90 b/lapack-netlib/SRC/slassq.f90 index 19f49402b..c8959f4a7 100644 --- a/lapack-netlib/SRC/slassq.f90 +++ b/lapack-netlib/SRC/slassq.f90 @@ -34,28 +34,15 @@ !> !> \verbatim !> -!> SLASSQ returns the values scl and smsq such that +!> SLASSQ returns the values scale_out and sumsq_out such that !> -!> ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, +!> (scale_out**2)*sumsq_out = x( 1 )**2 +...+ x( n )**2 + (scale**2)*sumsq, !> -!> where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is +!> where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is !> assumed to be non-negative. !> !> scale and sumsq must be supplied in SCALE and SUMSQ and -!> scl and smsq are overwritten on SCALE and SUMSQ respectively. -!> -!> If scale * sqrt( sumsq ) > tbig then -!> we require: scale >= sqrt( TINY*EPS ) / sbig on entry, -!> and if 0 < scale * sqrt( sumsq ) < tsml then -!> we require: scale <= sqrt( HUGE ) / ssml on entry, -!> where -!> tbig -- upper threshold for values whose square is representable; -!> sbig -- scaling constant for big numbers; \see la_constants.f90 -!> tsml -- lower threshold for values whose square is representable; -!> ssml -- scaling constant for small numbers; \see la_constants.f90 -!> and -!> TINY*EPS -- tiniest representable number; -!> HUGE -- biggest representable number. +!> scale_out and sumsq_out are overwritten on SCALE and SUMSQ respectively. !> !> \endverbatim ! @@ -72,7 +59,7 @@ !> \verbatim !> X is REAL array, dimension (1+(N-1)*abs(INCX)) !> The vector for which a scaled sum of squares is computed. -!> x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. +!> x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. !> \endverbatim !> !> \param[in] INCX @@ -82,24 +69,24 @@ !> If INCX > 0, X(1+(i-1)*INCX) = x(i) for 1 <= i <= n !> If INCX < 0, X(1-(n-i)*INCX) = x(i) for 1 <= i <= n !> If INCX = 0, x isn't a vector so there is no need to call -!> this subroutine. If you call it anyway, it will count x(1) +!> this subroutine. If you call it anyway, it will count x(1) !> in the vector norm N times. !> \endverbatim !> !> \param[in,out] SCALE !> \verbatim !> SCALE is REAL -!> On entry, the value scale in the equation above. -!> On exit, SCALE is overwritten with scl , the scaling factor +!> On entry, the value scale in the equation above. +!> On exit, SCALE is overwritten by scale_out, the scaling factor !> for the sum of squares. !> \endverbatim !> !> \param[in,out] SUMSQ !> \verbatim !> SUMSQ is REAL -!> On entry, the value sumsq in the equation above. -!> On exit, SUMSQ is overwritten with smsq , the basic sum of -!> squares from which scl has been factored out. +!> On entry, the value sumsq in the equation above. +!> On exit, SUMSQ is overwritten by sumsq_out, the basic sum of +!> squares from which scale_out has been factored out. !> \endverbatim ! ! Authors: @@ -130,10 +117,10 @@ !> !> \endverbatim ! -!> \ingroup OTHERauxiliary +!> \ingroup lassq ! ! ===================================================================== -subroutine SLASSQ( n, x, incx, scl, sumsq ) +subroutine SLASSQ( n, x, incx, scale, sumsq ) use LA_CONSTANTS, & only: wp=>sp, zero=>szero, one=>sone, & sbig=>ssbig, ssml=>sssml, tbig=>stbig, tsml=>stsml @@ -145,7 +132,7 @@ subroutine SLASSQ( n, x, incx, scl, sumsq ) ! ! .. Scalar Arguments .. integer :: incx, n - real(wp) :: scl, sumsq + real(wp) :: scale, sumsq ! .. ! .. Array Arguments .. real(wp) :: x(*) @@ -158,10 +145,10 @@ subroutine SLASSQ( n, x, incx, scl, sumsq ) ! ! Quick return if possible ! - if( LA_ISNAN(scl) .or. LA_ISNAN(sumsq) ) return - if( sumsq == zero ) scl = one - if( scl == zero ) then - scl = one + if( LA_ISNAN(scale) .or. LA_ISNAN(sumsq) ) return + if( sumsq == zero ) scale = one + if( scale == zero ) then + scale = one sumsq = zero end if if (n <= 0) then @@ -198,15 +185,27 @@ subroutine SLASSQ( n, x, incx, scl, sumsq ) ! Put the existing sum of squares into one of the accumulators ! if( sumsq > zero ) then - ax = scl*sqrt( sumsq ) + ax = scale*sqrt( sumsq ) if (ax > tbig) then -! We assume scl >= sqrt( TINY*EPS ) / sbig - abig = abig + (scl*sbig)**2 * sumsq + if (scale > one) then + scale = scale * sbig + abig = abig + scale * (scale * sumsq) + else + ! sumsq > tbig^2 => (sbig * (sbig * sumsq)) is representable + abig = abig + scale * (scale * (sbig * (sbig * sumsq))) + end if else if (ax < tsml) then -! We assume scl <= sqrt( HUGE ) / ssml - if (notbig) asml = asml + (scl*ssml)**2 * sumsq + if (notbig) then + if (scale < one) then + scale = scale * ssml + asml = asml + scale * (scale * sumsq) + else + ! sumsq < tsml^2 => (ssml * (ssml * sumsq)) is representable + asml = asml + scale * (scale * (ssml * (ssml * sumsq))) + end if + end if else - amed = amed + scl**2 * sumsq + amed = amed + scale * (scale * sumsq) end if end if ! @@ -220,7 +219,7 @@ subroutine SLASSQ( n, x, incx, scl, sumsq ) if (amed > zero .or. LA_ISNAN(amed)) then abig = abig + (amed*sbig)*sbig end if - scl = one / sbig + scale = one / sbig sumsq = abig else if (asml > zero) then ! @@ -236,17 +235,17 @@ subroutine SLASSQ( n, x, incx, scl, sumsq ) ymin = asml ymax = amed end if - scl = one + scale = one sumsq = ymax**2*( one + (ymin/ymax)**2 ) else - scl = one / ssml + scale = one / ssml sumsq = asml end if else ! ! Otherwise all values are mid-range or zero ! - scl = one + scale = one sumsq = amed end if return diff --git a/lapack-netlib/SRC/zlassq.f90 b/lapack-netlib/SRC/zlassq.f90 index 9346dacac..c35214766 100644 --- a/lapack-netlib/SRC/zlassq.f90 +++ b/lapack-netlib/SRC/zlassq.f90 @@ -34,28 +34,15 @@ !> !> \verbatim !> -!> ZLASSQ returns the values scl and smsq such that +!> ZLASSQ returns the values scale_out and sumsq_out such that !> -!> ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, +!> (scale_out**2)*sumsq_out = x( 1 )**2 +...+ x( n )**2 + (scale**2)*sumsq, !> -!> where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is +!> where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is !> assumed to be non-negative. !> !> scale and sumsq must be supplied in SCALE and SUMSQ and -!> scl and smsq are overwritten on SCALE and SUMSQ respectively. -!> -!> If scale * sqrt( sumsq ) > tbig then -!> we require: scale >= sqrt( TINY*EPS ) / sbig on entry, -!> and if 0 < scale * sqrt( sumsq ) < tsml then -!> we require: scale <= sqrt( HUGE ) / ssml on entry, -!> where -!> tbig -- upper threshold for values whose square is representable; -!> sbig -- scaling constant for big numbers; \see la_constants.f90 -!> tsml -- lower threshold for values whose square is representable; -!> ssml -- scaling constant for small numbers; \see la_constants.f90 -!> and -!> TINY*EPS -- tiniest representable number; -!> HUGE -- biggest representable number. +!> scale_out and sumsq_out are overwritten on SCALE and SUMSQ respectively. !> !> \endverbatim ! @@ -72,7 +59,7 @@ !> \verbatim !> X is DOUBLE COMPLEX array, dimension (1+(N-1)*abs(INCX)) !> The vector for which a scaled sum of squares is computed. -!> x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. +!> x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. !> \endverbatim !> !> \param[in] INCX @@ -82,24 +69,24 @@ !> If INCX > 0, X(1+(i-1)*INCX) = x(i) for 1 <= i <= n !> If INCX < 0, X(1-(n-i)*INCX) = x(i) for 1 <= i <= n !> If INCX = 0, x isn't a vector so there is no need to call -!> this subroutine. If you call it anyway, it will count x(1) +!> this subroutine. If you call it anyway, it will count x(1) !> in the vector norm N times. !> \endverbatim !> !> \param[in,out] SCALE !> \verbatim !> SCALE is DOUBLE PRECISION -!> On entry, the value scale in the equation above. -!> On exit, SCALE is overwritten with scl , the scaling factor +!> On entry, the value scale in the equation above. +!> On exit, SCALE is overwritten by scale_out, the scaling factor !> for the sum of squares. !> \endverbatim !> !> \param[in,out] SUMSQ !> \verbatim !> SUMSQ is DOUBLE PRECISION -!> On entry, the value sumsq in the equation above. -!> On exit, SUMSQ is overwritten with smsq , the basic sum of -!> squares from which scl has been factored out. +!> On entry, the value sumsq in the equation above. +!> On exit, SUMSQ is overwritten by sumsq_out, the basic sum of +!> squares from which scale_out has been factored out. !> \endverbatim ! ! Authors: @@ -130,10 +117,10 @@ !> !> \endverbatim ! -!> \ingroup OTHERauxiliary +!> \ingroup lassq ! ! ===================================================================== -subroutine ZLASSQ( n, x, incx, scl, sumsq ) +subroutine ZLASSQ( n, x, incx, scale, sumsq ) use LA_CONSTANTS, & only: wp=>dp, zero=>dzero, one=>done, & sbig=>dsbig, ssml=>dssml, tbig=>dtbig, tsml=>dtsml @@ -145,7 +132,7 @@ subroutine ZLASSQ( n, x, incx, scl, sumsq ) ! ! .. Scalar Arguments .. integer :: incx, n - real(wp) :: scl, sumsq + real(wp) :: scale, sumsq ! .. ! .. Array Arguments .. complex(wp) :: x(*) @@ -158,10 +145,10 @@ subroutine ZLASSQ( n, x, incx, scl, sumsq ) ! ! Quick return if possible ! - if( LA_ISNAN(scl) .or. LA_ISNAN(sumsq) ) return - if( sumsq == zero ) scl = one - if( scl == zero ) then - scl = one + if( LA_ISNAN(scale) .or. LA_ISNAN(sumsq) ) return + if( sumsq == zero ) scale = one + if( scale == zero ) then + scale = one sumsq = zero end if if (n <= 0) then @@ -207,15 +194,27 @@ subroutine ZLASSQ( n, x, incx, scl, sumsq ) ! Put the existing sum of squares into one of the accumulators ! if( sumsq > zero ) then - ax = scl*sqrt( sumsq ) + ax = scale*sqrt( sumsq ) if (ax > tbig) then -! We assume scl >= sqrt( TINY*EPS ) / sbig - abig = abig + (scl*sbig)**2 * sumsq + if (scale > one) then + scale = scale * sbig + abig = abig + scale * (scale * sumsq) + else + ! sumsq > tbig^2 => (sbig * (sbig * sumsq)) is representable + abig = abig + scale * (scale * (sbig * (sbig * sumsq))) + end if else if (ax < tsml) then -! We assume scl <= sqrt( HUGE ) / ssml - if (notbig) asml = asml + (scl*ssml)**2 * sumsq + if (notbig) then + if (scale < one) then + scale = scale * ssml + asml = asml + scale * (scale * sumsq) + else + ! sumsq < tsml^2 => (ssml * (ssml * sumsq)) is representable + asml = asml + scale * (scale * (ssml * (ssml * sumsq))) + end if + end if else - amed = amed + scl**2 * sumsq + amed = amed + scale * (scale * sumsq) end if end if ! @@ -229,7 +228,7 @@ subroutine ZLASSQ( n, x, incx, scl, sumsq ) if (amed > zero .or. LA_ISNAN(amed)) then abig = abig + (amed*sbig)*sbig end if - scl = one / sbig + scale = one / sbig sumsq = abig else if (asml > zero) then ! @@ -245,17 +244,17 @@ subroutine ZLASSQ( n, x, incx, scl, sumsq ) ymin = asml ymax = amed end if - scl = one + scale = one sumsq = ymax**2*( one + (ymin/ymax)**2 ) else - scl = one / ssml + scale = one / ssml sumsq = asml end if else ! ! Otherwise all values are mid-range or zero ! - scl = one + scale = one sumsq = amed end if return From 176cc6348ed3c0415391c42478301430e9bbe031 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 6 Nov 2023 21:55:19 +0100 Subject: [PATCH 102/125] Correct constant ALPHA to match changed algorithm (Reference-LAPACK PR 928) --- lapack-netlib/SRC/cunbdb6.f | 4 ++-- lapack-netlib/SRC/dorbdb6.f | 4 ++-- lapack-netlib/SRC/sorbdb6.f | 4 ++-- lapack-netlib/SRC/zunbdb6.f | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/lapack-netlib/SRC/cunbdb6.f b/lapack-netlib/SRC/cunbdb6.f index b93a389d6..cd14d9295 100644 --- a/lapack-netlib/SRC/cunbdb6.f +++ b/lapack-netlib/SRC/cunbdb6.f @@ -152,7 +152,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup unbdb6 * * ===================================================================== SUBROUTINE CUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, @@ -174,7 +174,7 @@ * * .. Parameters .. REAL ALPHA, REALONE, REALZERO - PARAMETER ( ALPHA = 0.01E0, REALONE = 1.0E0, + PARAMETER ( ALPHA = 0.1E0, REALONE = 1.0E0, $ REALZERO = 0.0E0 ) COMPLEX NEGONE, ONE, ZERO PARAMETER ( NEGONE = (-1.0E0,0.0E0), ONE = (1.0E0,0.0E0), diff --git a/lapack-netlib/SRC/dorbdb6.f b/lapack-netlib/SRC/dorbdb6.f index 45c8ba8a2..142887684 100644 --- a/lapack-netlib/SRC/dorbdb6.f +++ b/lapack-netlib/SRC/dorbdb6.f @@ -152,7 +152,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleOTHERcomputational +*> \ingroup unbdb6 * * ===================================================================== SUBROUTINE DORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, @@ -174,7 +174,7 @@ * * .. Parameters .. DOUBLE PRECISION ALPHA, REALONE, REALZERO - PARAMETER ( ALPHA = 0.01D0, REALONE = 1.0D0, + PARAMETER ( ALPHA = 0.1D0, REALONE = 1.0D0, $ REALZERO = 0.0D0 ) DOUBLE PRECISION NEGONE, ONE, ZERO PARAMETER ( NEGONE = -1.0D0, ONE = 1.0D0, ZERO = 0.0D0 ) diff --git a/lapack-netlib/SRC/sorbdb6.f b/lapack-netlib/SRC/sorbdb6.f index b2449e3be..d320c9e46 100644 --- a/lapack-netlib/SRC/sorbdb6.f +++ b/lapack-netlib/SRC/sorbdb6.f @@ -152,7 +152,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup unbdb6 * * ===================================================================== SUBROUTINE SORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, @@ -174,7 +174,7 @@ * * .. Parameters .. REAL ALPHA, REALONE, REALZERO - PARAMETER ( ALPHA = 0.01E0, REALONE = 1.0E0, + PARAMETER ( ALPHA = 0.1E0, REALONE = 1.0E0, $ REALZERO = 0.0E0 ) REAL NEGONE, ONE, ZERO PARAMETER ( NEGONE = -1.0E0, ONE = 1.0E0, ZERO = 0.0E0 ) diff --git a/lapack-netlib/SRC/zunbdb6.f b/lapack-netlib/SRC/zunbdb6.f index ed666e449..ac7fa4be3 100644 --- a/lapack-netlib/SRC/zunbdb6.f +++ b/lapack-netlib/SRC/zunbdb6.f @@ -152,7 +152,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERcomputational +*> \ingroup unbdb6 * * ===================================================================== SUBROUTINE ZUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, @@ -174,7 +174,7 @@ * * .. Parameters .. DOUBLE PRECISION ALPHA, REALONE, REALZERO - PARAMETER ( ALPHA = 0.01D0, REALONE = 1.0D0, + PARAMETER ( ALPHA = 0.1D0, REALONE = 1.0D0, $ REALZERO = 0.0D0 ) COMPLEX*16 NEGONE, ONE, ZERO PARAMETER ( NEGONE = (-1.0D0,0.0D0), ONE = (1.0D0,0.0D0), From 206e76187ed1e7e78d3f93258eb90fa753a9d1d9 Mon Sep 17 00:00:00 2001 From: Chip Kerchner Date: Tue, 7 Nov 2023 18:08:57 -0600 Subject: [PATCH 103/125] Fix FCOMMON_OPT for power. Error out for certain C and Fortran compiler combos in AIX. --- Makefile.power | 23 +++++++++++++++-------- Makefile.system | 4 ++++ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/Makefile.power b/Makefile.power index ada51b2e1..95bada34f 100644 --- a/Makefile.power +++ b/Makefile.power @@ -13,9 +13,9 @@ ifeq ($(CORE), POWER10) ifneq ($(C_COMPILER), PGI) CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math ifeq ($(F_COMPILER), IBM) -FCOMMON_OPT += -O2 -qrecur -qnosave +FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr10 -qtune=pwr10 -qfloat=nomaf -qzerosize else -FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math +FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math endif endif endif @@ -38,9 +38,9 @@ CCOMMON_OPT += -fast -Mvect=simd -Mcache_align endif ifneq ($(F_COMPILER), PGI) ifeq ($(F_COMPILER), IBM) -FCOMMON_OPT += -O2 -qrecur -qnosave +FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr9 -qtune=pwr9 -qfloat=nomaf -qzerosize else -FCOMMON_OPT += -O2 -frecursive -fno-fast-math +FCOMMON_OPT += -O2 -frecursive -fno-fast-math -mcpu=power9 -mtune=power9 endif ifeq ($(F_COMPILER), GFORTRAN) @@ -65,15 +65,15 @@ endif ifneq ($(F_COMPILER), PGI) ifeq ($(OSNAME), AIX) ifeq ($(F_COMPILER), IBM) -FCOMMON_OPT += -O2 -qrecur -qnosave +FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr8 -qtune=pwr8 -qfloat=nomaf -qzerosize else -FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math +FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math endif else ifeq ($(F_COMPILER), IBM) -FCOMMON_OPT += -O2 -qrecur -qnosave +FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr8 -qtune=pwr8 -qfloat=nomaf -qzerosize else -FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math +FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math endif endif else @@ -135,6 +135,13 @@ endif ifdef BINARY64 +ifeq ($(C_COMPILER)$(F_COMPILER)$(OSNAME), GCCIBMAIX) +$(error Using GCC and XLF on AIX is not a supported combination.) +endif +ifeq ($(C_COMPILER)$(F_COMPILER)$(OSNAME), CLANGGFORTRANAIX) +$(error Using Clang and gFortran on AIX is not a supported combination.) +endif + ifeq ($(OSNAME), AIX) ifeq ($(C_COMPILER), GCC) CCOMMON_OPT += -mpowerpc64 -maix64 diff --git a/Makefile.system b/Makefile.system index 30b0ddec2..cb19dea73 100644 --- a/Makefile.system +++ b/Makefile.system @@ -1374,6 +1374,8 @@ ifeq ($(F_COMPILER), SUN) FCOMMON_OPT += -pic else ifeq ($(F_COMPILER), NAG) FCOMMON_OPT += -PIC +else ifeq ($(F_COMPILER), IBM) +FCOMMON_OPT += -qpic=large else FCOMMON_OPT += -fPIC endif @@ -1626,9 +1628,11 @@ override FPFLAGS += $(FCOMMON_OPT) $(COMMON_PROF) ifeq ($(NEED_PIC), 1) ifeq (,$(findstring PIC,$(FFLAGS))) +ifneq ($(F_COMPILER),IBM) override FFLAGS += -fPIC endif endif +endif #For LAPACK Fortran codes. #Disable -fopenmp for LAPACK Fortran codes on Windows. From 5e31c5708393b4e086052c0e7b8cbc57d7ebede3 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Tue, 7 Nov 2023 20:58:34 -0600 Subject: [PATCH 104/125] Only define __builtin_cpu_is and __builtin_cpu_supports if not present. --- driver/others/dynamic_power.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 10a5d64b3..570a87568 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -202,6 +202,7 @@ static int cpuid(void) #ifndef __BUILTIN_CPU_SUPPORTS__ #include +#if defined(__has_builtin) && !__has_builtin(__builtin_cpu_is) static int __builtin_cpu_is(const char *arg) { static int ipinfo = -1; @@ -224,12 +225,15 @@ static int __builtin_cpu_is(const char *arg) } return 0; } +#endif +#if defined(__has_builtin) && !__has_builtin(__builtin_cpu_supports) static int __builtin_cpu_supports(const char *arg) { return 0; } #endif +#endif static gotoblas_t *get_coretype(void) { From 4eecccd49b251be2cb303b67093c4602afb39aec Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Wed, 8 Nov 2023 07:12:21 -0600 Subject: [PATCH 105/125] Fix __builtin_cpu_is for AIX. --- driver/others/dynamic_power.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 570a87568..f0faf2baf 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -202,7 +202,7 @@ static int cpuid(void) #ifndef __BUILTIN_CPU_SUPPORTS__ #include -#if defined(__has_builtin) && !__has_builtin(__builtin_cpu_is) +#if defined(_AIX) || (defined(__has_builtin) && !__has_builtin(__builtin_cpu_is)) static int __builtin_cpu_is(const char *arg) { static int ipinfo = -1; @@ -227,7 +227,7 @@ static int __builtin_cpu_is(const char *arg) } #endif -#if defined(__has_builtin) && !__has_builtin(__builtin_cpu_supports) +#if defined(_AIX) || (defined(__has_builtin) && !__has_builtin(__builtin_cpu_supports)) static int __builtin_cpu_supports(const char *arg) { return 0; From 778e3b746a7217bbafa099133f956753e8355c4b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 10 Nov 2023 11:55:29 +0100 Subject: [PATCH 106/125] Enable autodetection of current AMD cpus as their AVX512 Intel counterparts --- cpuid_x86.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/cpuid_x86.c b/cpuid_x86.c index fdcead8bd..6cf4d6503 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -1660,7 +1660,13 @@ int get_cpuname(void){ else return CPUTYPE_BARCELONA; } - case 10: // Zen3 + case 10: // Zen3/4 +#ifndef NO_AVX512 + if(support_avx512_bf16()) + return CPUTYPE_COOPERLAKE; + if(support_avx512()) + return CPUTYPE_SKYLAKEX; +#endif if(support_avx()) #ifndef NO_AVX2 return CPUTYPE_ZEN; @@ -2438,6 +2444,12 @@ int get_coretype(void){ // Ryzen 2 default: // Matisse,Renoir Ryzen2 models +#ifndef NO_AVX512 + if(support_avx512_bf16()) + return CORE_COOPERLAKE; + if(support_avx512()) + return CORE_SKYLAKEX; +#endif if(support_avx()) #ifndef NO_AVX2 return CORE_ZEN; From 3ad27007fc71d5acb42c24569792146331b0cba3 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 10 Nov 2023 15:19:11 +0100 Subject: [PATCH 107/125] rebase --- Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Makefile b/Makefile index 5f6643402..b344abcd2 100644 --- a/Makefile +++ b/Makefile @@ -35,7 +35,11 @@ export NO_LAPACK export C_LAPACK endif +ifeq ($(F_COMPILER),CRAY) +LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -Og -Os,$(LAPACK_FFLAGS)) +else LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -O -Og -Os,$(LAPACK_FFLAGS)) +endif SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench cpp_thread_test From 1a308a006664b20ae9dbce5e3e69d52e09b44829 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 10 Nov 2023 15:27:46 +0100 Subject: [PATCH 108/125] Move OpenMP dependency handling for clang/gfortran combo --- Makefile.system | 3 +++ f_check | 7 ------- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/Makefile.system b/Makefile.system index 30b0ddec2..c6c1dc02f 100644 --- a/Makefile.system +++ b/Makefile.system @@ -608,6 +608,9 @@ endif ifeq ($(C_COMPILER), CLANG) CCOMMON_OPT += -fopenmp +ifeq ($(F_COMPILER), GFORTRAN) +FEXTRALIB := $(subst -lgomp,-lomp,$(FEXTRALIB)) +endif endif ifeq ($(C_COMPILER), INTEL) diff --git a/f_check b/f_check index 31f4376d0..dac34edee 100755 --- a/f_check +++ b/f_check @@ -373,13 +373,6 @@ if [ -n "$link" ]; then ;; esac - case "$flag" in *-lgomp*) - case "$CC" in *clang*) - flag="-lomp" - ;; - esac - esac - case "$flag" in -l*) case "$flag" in *ibrary*|*gfortranbegin*|*flangmain*|*frtbegin*|*pathfstart*|\ From 1d4ed20c2f47994cba88432de2c5a5ea3c1db58e Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Fri, 10 Nov 2023 15:58:30 +0100 Subject: [PATCH 109/125] Add conditions to all CI jobs to only run on main repo by default This is a follow-up to gh-4271. At the moment, when a contributor pushes the latest `develop` to their own branch to bring their own fork in sync with `main`, or if they push another branch, this triggers 30 CI jobs to run. Most will complete silently and only burn CPU time unnecessarily. If there's a failure, this may result in unexpected failure notifications. And the AWS Graviton3 run won't complete at all and time out, since the Cirun hook will only work when triggered from the main repo. --- .github/workflows/arm64_graviton.yml | 1 + .github/workflows/c910v.yml | 1 + .github/workflows/dynamic_arch.yml | 3 +++ .github/workflows/loongarch64.yml | 1 + .github/workflows/mips64.yml | 1 + 5 files changed, 7 insertions(+) diff --git a/.github/workflows/arm64_graviton.yml b/.github/workflows/arm64_graviton.yml index bcb05047c..9dd0ae589 100644 --- a/.github/workflows/arm64_graviton.yml +++ b/.github/workflows/arm64_graviton.yml @@ -7,6 +7,7 @@ permissions: jobs: build: + if: "github.repository == 'OpenMathLib/OpenBLAS'" runs-on: "cirun-aws-runner-graviton--${{ github.run_id }}" strategy: diff --git a/.github/workflows/c910v.yml b/.github/workflows/c910v.yml index 199304fb1..e6ed08f2c 100644 --- a/.github/workflows/c910v.yml +++ b/.github/workflows/c910v.yml @@ -7,6 +7,7 @@ permissions: jobs: TEST: + if: "github.repository == 'OpenMathLib/OpenBLAS'" runs-on: ubuntu-latest env: xuetie_toolchain: https://occ-oss-prod.oss-cn-hangzhou.aliyuncs.com/resource//1663142514282 diff --git a/.github/workflows/dynamic_arch.yml b/.github/workflows/dynamic_arch.yml index 0c39bfddf..02429e317 100644 --- a/.github/workflows/dynamic_arch.yml +++ b/.github/workflows/dynamic_arch.yml @@ -7,6 +7,7 @@ permissions: jobs: build: + if: "github.repository == 'OpenMathLib/OpenBLAS'" runs-on: ${{ matrix.os }} strategy: @@ -146,6 +147,7 @@ jobs: msys2: + if: "github.repository == 'OpenMathLib/OpenBLAS'" runs-on: windows-latest strategy: @@ -312,6 +314,7 @@ jobs: cross_build: + if: "github.repository == 'OpenMathLib/OpenBLAS'" runs-on: ubuntu-22.04 strategy: diff --git a/.github/workflows/loongarch64.yml b/.github/workflows/loongarch64.yml index e0236ca86..fa62d0b41 100644 --- a/.github/workflows/loongarch64.yml +++ b/.github/workflows/loongarch64.yml @@ -4,6 +4,7 @@ on: [push, pull_request] jobs: TEST: + if: "github.repository == 'OpenMathLib/OpenBLAS'" runs-on: ubuntu-latest strategy: fail-fast: false diff --git a/.github/workflows/mips64.yml b/.github/workflows/mips64.yml index de7c0c0f3..7f09d4fca 100644 --- a/.github/workflows/mips64.yml +++ b/.github/workflows/mips64.yml @@ -7,6 +7,7 @@ permissions: jobs: TEST: + if: "github.repository == 'OpenMathLib/OpenBLAS'" runs-on: ubuntu-latest strategy: fail-fast: false From 2418a20f1fb89783dc1e198fd0e2bd78ecf43e77 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Fri, 10 Nov 2023 16:05:52 +0100 Subject: [PATCH 110/125] Cancel running CI jobs when new changes are pushed to a PR The `group` expression ensures that the cancel-in-progress behavior is to only cancel if a new commit is pushed to the PR for which the job is running, not other PRs. This is a fairly standard snippet, used also in CI jobs for NumPy and other projects. --- .github/workflows/arm64_graviton.yml | 4 ++++ .github/workflows/c910v.yml | 4 ++++ .github/workflows/dynamic_arch.yml | 4 ++++ .github/workflows/loongarch64.yml | 4 ++++ .github/workflows/mips64.yml | 4 ++++ .github/workflows/nightly-Homebrew-build.yml | 4 ++++ 6 files changed, 24 insertions(+) diff --git a/.github/workflows/arm64_graviton.yml b/.github/workflows/arm64_graviton.yml index 9dd0ae589..4382510df 100644 --- a/.github/workflows/arm64_graviton.yml +++ b/.github/workflows/arm64_graviton.yml @@ -2,6 +2,10 @@ name: arm64 graviton cirun on: [push, pull_request] +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + permissions: contents: read # to fetch code (actions/checkout) diff --git a/.github/workflows/c910v.yml b/.github/workflows/c910v.yml index e6ed08f2c..30cf32b34 100644 --- a/.github/workflows/c910v.yml +++ b/.github/workflows/c910v.yml @@ -2,6 +2,10 @@ name: c910v qemu test on: [push, pull_request] +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + permissions: contents: read # to fetch code (actions/checkout) diff --git a/.github/workflows/dynamic_arch.yml b/.github/workflows/dynamic_arch.yml index 02429e317..49721958a 100644 --- a/.github/workflows/dynamic_arch.yml +++ b/.github/workflows/dynamic_arch.yml @@ -2,6 +2,10 @@ name: continuous build on: [push, pull_request] +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + permissions: contents: read # to fetch code (actions/checkout) diff --git a/.github/workflows/loongarch64.yml b/.github/workflows/loongarch64.yml index fa62d0b41..4a9bf98b6 100644 --- a/.github/workflows/loongarch64.yml +++ b/.github/workflows/loongarch64.yml @@ -2,6 +2,10 @@ name: loongarch64 qemu test on: [push, pull_request] +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + jobs: TEST: if: "github.repository == 'OpenMathLib/OpenBLAS'" diff --git a/.github/workflows/mips64.yml b/.github/workflows/mips64.yml index 7f09d4fca..4686ba713 100644 --- a/.github/workflows/mips64.yml +++ b/.github/workflows/mips64.yml @@ -2,6 +2,10 @@ name: mips64 qemu test on: [push, pull_request] +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + permissions: contents: read # to fetch code (actions/checkout) diff --git a/.github/workflows/nightly-Homebrew-build.yml b/.github/workflows/nightly-Homebrew-build.yml index eb315f1d4..ca57fba70 100644 --- a/.github/workflows/nightly-Homebrew-build.yml +++ b/.github/workflows/nightly-Homebrew-build.yml @@ -18,6 +18,10 @@ on: name: Nightly-Homebrew-Build +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + permissions: contents: read # to fetch code (actions/checkout) From 8613632dc53eebc505b56c38af5d46123e2704b3 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Fri, 10 Nov 2023 16:23:44 +0100 Subject: [PATCH 111/125] Trigger AWS Graviton 3 CI job only for develop and release branches --- .github/workflows/arm64_graviton.yml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/arm64_graviton.yml b/.github/workflows/arm64_graviton.yml index 4382510df..6928312b5 100644 --- a/.github/workflows/arm64_graviton.yml +++ b/.github/workflows/arm64_graviton.yml @@ -1,6 +1,14 @@ name: arm64 graviton cirun -on: [push, pull_request] +on: + push: + branches: + - develop + - release-** + pull_request: + branches: + - develop + - release-** concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} From 5ffbe646e12fe76682410674f5de4146c87aa973 Mon Sep 17 00:00:00 2001 From: Angelika Schwarz <17718454+angsch@users.noreply.github.com> Date: Sat, 11 Nov 2023 09:19:48 +0100 Subject: [PATCH 112/125] Improve matcopy interface * rows = 0 or cols = 0 is now a legal input and takes quick return path * Follow BLAS/LAPACK convention that the leading dimensions must be at least 1. --- interface/imatcopy.c | 22 ++++++++++++---------- interface/omatcopy.c | 22 ++++++++++++---------- interface/zimatcopy.c | 30 ++++++++++++++++-------------- interface/zomatcopy.c | 30 ++++++++++++++++-------------- 4 files changed, 56 insertions(+), 48 deletions(-) diff --git a/interface/imatcopy.c b/interface/imatcopy.c index 4cf0966cc..6a1ad282c 100644 --- a/interface/imatcopy.c +++ b/interface/imatcopy.c @@ -100,27 +100,29 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, if ( order == BlasColMajor) { - if ( trans == BlasNoTrans && *ldb < *rows ) info = 8; - if ( trans == BlasTrans && *ldb < *cols ) info = 8; + if ( trans == BlasNoTrans && *ldb < MAX(1,*rows) ) info = 8; + if ( trans == BlasTrans && *ldb < MAX(1,*cols) ) info = 8; } if ( order == BlasRowMajor) { - if ( trans == BlasNoTrans && *ldb < *cols ) info = 8; - if ( trans == BlasTrans && *ldb < *rows ) info = 8; + if ( trans == BlasNoTrans && *ldb < MAX(1,*cols) ) info = 8; + if ( trans == BlasTrans && *ldb < MAX(1,*rows) ) info = 8; } - if ( order == BlasColMajor && *lda < *rows ) info = 7; - if ( order == BlasRowMajor && *lda < *cols ) info = 7; - if ( *cols <= 0 ) info = 4; - if ( *rows <= 0 ) info = 3; - if ( trans < 0 ) info = 2; - if ( order < 0 ) info = 1; + if ( order == BlasColMajor && *lda < MAX(1,*rows) ) info = 7; + if ( order == BlasRowMajor && *lda < MAX(1,*cols) ) info = 7; + if ( *cols < 0 ) info = 4; + if ( *rows < 0 ) info = 3; + if ( trans < 0 ) info = 2; + if ( order < 0 ) info = 1; if (info >= 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } + if ((*rows == 0) || (*cols == 0)) return; + #ifdef NEW_IMATCOPY if ( *lda == *ldb ) { if ( order == BlasColMajor ) diff --git a/interface/omatcopy.c b/interface/omatcopy.c index 59650cfa0..c26446f5c 100644 --- a/interface/omatcopy.c +++ b/interface/omatcopy.c @@ -90,27 +90,29 @@ void CNAME(enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, #endif if ( order == BlasColMajor) { - if ( trans == BlasNoTrans && *ldb < *rows ) info = 9; - if ( trans == BlasTrans && *ldb < *cols ) info = 9; + if ( trans == BlasNoTrans && *ldb < MAX(1,*rows) ) info = 9; + if ( trans == BlasTrans && *ldb < MAX(1,*cols) ) info = 9; } if ( order == BlasRowMajor) { - if ( trans == BlasNoTrans && *ldb < *cols ) info = 9; - if ( trans == BlasTrans && *ldb < *rows ) info = 9; + if ( trans == BlasNoTrans && *ldb < MAX(1,*cols) ) info = 9; + if ( trans == BlasTrans && *ldb < MAX(1,*rows) ) info = 9; } - if ( order == BlasColMajor && *lda < *rows ) info = 7; - if ( order == BlasRowMajor && *lda < *cols ) info = 7; - if ( *cols <= 0 ) info = 4; - if ( *rows <= 0 ) info = 3; - if ( trans < 0 ) info = 2; - if ( order < 0 ) info = 1; + if ( order == BlasColMajor && *lda < MAX(1,*rows) ) info = 7; + if ( order == BlasRowMajor && *lda < MAX(1,*cols) ) info = 7; + if ( *cols < 0 ) info = 4; + if ( *rows < 0 ) info = 3; + if ( trans < 0 ) info = 2; + if ( order < 0 ) info = 1; if (info >= 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } + if ((*rows == 0) || (*cols == 0)) return; + if ( order == BlasColMajor ) { if ( trans == BlasNoTrans ) diff --git a/interface/zimatcopy.c b/interface/zimatcopy.c index b0b32dc87..b66489eb7 100644 --- a/interface/zimatcopy.c +++ b/interface/zimatcopy.c @@ -101,31 +101,33 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, if ( order == BlasColMajor) { - if ( trans == BlasNoTrans && *ldb < *rows ) info = 9; - if ( trans == BlasConj && *ldb < *rows ) info = 9; - if ( trans == BlasTrans && *ldb < *cols ) info = 9; - if ( trans == BlasTransConj && *ldb < *cols ) info = 9; + if ( trans == BlasNoTrans && *ldb < MAX(1,*rows) ) info = 9; + if ( trans == BlasConj && *ldb < MAX(1,*rows) ) info = 9; + if ( trans == BlasTrans && *ldb < MAX(1,*cols) ) info = 9; + if ( trans == BlasTransConj && *ldb < MAX(1,*cols) ) info = 9; } if ( order == BlasRowMajor) { - if ( trans == BlasNoTrans && *ldb < *cols ) info = 9; - if ( trans == BlasConj && *ldb < *cols ) info = 9; - if ( trans == BlasTrans && *ldb < *rows ) info = 9; - if ( trans == BlasTransConj && *ldb < *rows ) info = 9; + if ( trans == BlasNoTrans && *ldb < MAX(1,*cols) ) info = 9; + if ( trans == BlasConj && *ldb < MAX(1,*cols) ) info = 9; + if ( trans == BlasTrans && *ldb < MAX(1,*rows) ) info = 9; + if ( trans == BlasTransConj && *ldb < MAX(1,*rows) ) info = 9; } - if ( order == BlasColMajor && *lda < *rows ) info = 7; - if ( order == BlasRowMajor && *lda < *cols ) info = 7; - if ( *cols <= 0 ) info = 4; - if ( *rows <= 0 ) info = 3; - if ( trans < 0 ) info = 2; - if ( order < 0 ) info = 1; + if ( order == BlasColMajor && *lda < MAX(1,*rows) ) info = 7; + if ( order == BlasRowMajor && *lda < MAX(1,*cols) ) info = 7; + if ( *cols < 0 ) info = 4; + if ( *rows < 0 ) info = 3; + if ( trans < 0 ) info = 2; + if ( order < 0 ) info = 1; if (info >= 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } + if ((*rows == 0) || (*cols == 0)) return; + #ifdef NEW_IMATCOPY if (*lda == *ldb ) { if ( order == BlasColMajor ) diff --git a/interface/zomatcopy.c b/interface/zomatcopy.c index 7345633a2..7121711d8 100644 --- a/interface/zomatcopy.c +++ b/interface/zomatcopy.c @@ -92,31 +92,33 @@ void CNAME(enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, #endif if ( order == BlasColMajor) { - if ( trans == BlasNoTrans && *ldb < *rows ) info = 9; - if ( trans == BlasConj && *ldb < *rows ) info = 9; - if ( trans == BlasTrans && *ldb < *cols ) info = 9; - if ( trans == BlasTransConj && *ldb < *cols ) info = 9; + if ( trans == BlasNoTrans && *ldb < MAX(1,*rows) ) info = 9; + if ( trans == BlasConj && *ldb < MAX(1,*rows) ) info = 9; + if ( trans == BlasTrans && *ldb < MAX(1,*cols) ) info = 9; + if ( trans == BlasTransConj && *ldb < MAX(1,*cols) ) info = 9; } if ( order == BlasRowMajor) { - if ( trans == BlasNoTrans && *ldb < *cols ) info = 9; - if ( trans == BlasConj && *ldb < *cols ) info = 9; - if ( trans == BlasTrans && *ldb < *rows ) info = 9; - if ( trans == BlasTransConj && *ldb < *rows ) info = 9; + if ( trans == BlasNoTrans && *ldb < MAX(1,*cols) ) info = 9; + if ( trans == BlasConj && *ldb < MAX(1,*cols) ) info = 9; + if ( trans == BlasTrans && *ldb < MAX(1,*rows) ) info = 9; + if ( trans == BlasTransConj && *ldb < MAX(1,*rows) ) info = 9; } - if ( order == BlasColMajor && *lda < *rows ) info = 7; - if ( order == BlasRowMajor && *lda < *cols ) info = 7; - if ( *cols <= 0 ) info = 4; - if ( *rows <= 0 ) info = 3; - if ( trans < 0 ) info = 2; - if ( order < 0 ) info = 1; + if ( order == BlasColMajor && *lda < MAX(1,*rows) ) info = 7; + if ( order == BlasRowMajor && *lda < MAX(1,*cols) ) info = 7; + if ( *cols < 0 ) info = 4; + if ( *rows < 0 ) info = 3; + if ( trans < 0 ) info = 2; + if ( order < 0 ) info = 1; if (info >= 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } + if ((*rows == 0) || (*cols == 0)) return; + if ( order == BlasColMajor ) { From ff6437f2d7954530ca8cd74fb4ea98631ff83398 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 11 Nov 2023 21:30:32 +0100 Subject: [PATCH 113/125] Add workaround for omp_get_max_threads hanging on FreeBSD with libomp from LLVM14 --- driver/others/blas_server_omp.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/driver/others/blas_server_omp.c b/driver/others/blas_server_omp.c index 3e2179373..fcc0d68ad 100644 --- a/driver/others/blas_server_omp.c +++ b/driver/others/blas_server_omp.c @@ -124,9 +124,18 @@ void openblas_set_num_threads(int num_threads) { } int blas_thread_init(void){ - if(blas_omp_number_max <= 0) - blas_omp_number_max = omp_get_max_threads(); - + +#if defined(__FreeBSD__) && defined(__clang__) +extern int openblas_omp_num_threads_env(); + + if(blas_omp_number_max <= 0) + blas_omp_number_max= openblas_omp_num_threads_env(); + if (blas_omp_number_max <= 0) + blas_omp_number_max=MAX_CPU_NUMBER; +#else + blas_omp_number_max = /omp_get_max_threads(); +#endif + blas_get_cpu_number(); adjust_thread_buffers(); From 9324520d0ebd8f89507f59f1fe7b5d8b0f758915 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 11 Nov 2023 23:14:58 +0100 Subject: [PATCH 114/125] typo fix --- driver/others/blas_server_omp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/driver/others/blas_server_omp.c b/driver/others/blas_server_omp.c index fcc0d68ad..f7008fb08 100644 --- a/driver/others/blas_server_omp.c +++ b/driver/others/blas_server_omp.c @@ -133,7 +133,7 @@ extern int openblas_omp_num_threads_env(); if (blas_omp_number_max <= 0) blas_omp_number_max=MAX_CPU_NUMBER; #else - blas_omp_number_max = /omp_get_max_threads(); + blas_omp_number_max = omp_get_max_threads(); #endif blas_get_cpu_number(); From b6144f70ff0d6f7967c8119b068d460d6c5aaf95 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 11 Nov 2023 23:41:18 +0100 Subject: [PATCH 115/125] Change ?GECON to return INFO=1 if RCOND is NaN (Reference-LAPACK PR 926) --- lapack-netlib/SRC/cgecon.f | 39 +++++++++++++++++++++++++++++++------- lapack-netlib/SRC/dgecon.f | 39 +++++++++++++++++++++++++++++++------- lapack-netlib/SRC/sgecon.f | 39 +++++++++++++++++++++++++++++++------- lapack-netlib/SRC/zgecon.f | 39 +++++++++++++++++++++++++++++++------- 4 files changed, 128 insertions(+), 28 deletions(-) diff --git a/lapack-netlib/SRC/cgecon.f b/lapack-netlib/SRC/cgecon.f index 6f426c2ab..e018b18bb 100644 --- a/lapack-netlib/SRC/cgecon.f +++ b/lapack-netlib/SRC/cgecon.f @@ -105,8 +105,15 @@ *> \verbatim *> INFO is INTEGER *> = 0: successful exit -*> < 0: if INFO = -i, the i-th argument had an illegal value -*> =-5: if ANORM is NAN or negative. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> NaNs are illegal values for ANORM, and they propagate to +*> the output parameter RCOND. +*> Infinity is illegal for ANORM, and it propagates to the output +*> parameter RCOND as 0. +*> = 1: if RCOND = NaN, or +*> RCOND = Inf, or +*> the computed norm of the inverse of A is 0. +*> In the latter, RCOND = 0 is returned. *> \endverbatim * * Authors: @@ -117,7 +124,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEcomputational +*> \ingroup gecon * * ===================================================================== SUBROUTINE CGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, RWORK, @@ -147,7 +154,7 @@ LOGICAL ONENRM CHARACTER NORMIN INTEGER IX, KASE, KASE1 - REAL AINVNM, SCALE, SL, SMLNUM, SU + REAL AINVNM, SCALE, SL, SMLNUM, SU, HUGEVAL COMPLEX ZDUM * .. * .. Local Arrays .. @@ -172,6 +179,8 @@ CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) * .. * .. Executable Statements .. +* + HUGEVAL = SLAMCH( 'Overflow' ) * * Test the input parameters. * @@ -183,7 +192,7 @@ INFO = -2 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN INFO = -4 - ELSE IF( ANORM.LT.ZERO .OR. SISNAN( ANORM ) ) THEN + ELSE IF( ANORM.LT.ZERO ) THEN INFO = -5 END IF IF( INFO.NE.0 ) THEN @@ -199,6 +208,13 @@ RETURN ELSE IF( ANORM.EQ.ZERO ) THEN RETURN + ELSE IF( SISNAN( ANORM ) ) THEN + RCOND = ANORM + INFO = -5 + RETURN + ELSE IF( ANORM.GT.HUGEVAL ) THEN + INFO = -5 + RETURN END IF * SMLNUM = SLAMCH( 'Safe minimum' ) @@ -256,8 +272,17 @@ * * Compute the estimate of the reciprocal condition number. * - IF( AINVNM.NE.ZERO ) - $ RCOND = ( ONE / AINVNM ) / ANORM + IF( AINVNM.NE.ZERO ) THEN + RCOND = ( ONE / AINVNM ) / ANORM + ELSE + INFO = 1 + RETURN + END IF +* +* Check for NaNs and Infs +* + IF( SISNAN( RCOND ) .OR. RCOND.GT.HUGEVAL ) + $ INFO = 1 * 20 CONTINUE RETURN diff --git a/lapack-netlib/SRC/dgecon.f b/lapack-netlib/SRC/dgecon.f index 1ad302ae3..a543eb03a 100644 --- a/lapack-netlib/SRC/dgecon.f +++ b/lapack-netlib/SRC/dgecon.f @@ -105,8 +105,15 @@ *> \verbatim *> INFO is INTEGER *> = 0: successful exit -*> < 0: if INFO = -i, the i-th argument had an illegal value -*> =-5: if ANORM is NAN or negative. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> NaNs are illegal values for ANORM, and they propagate to +*> the output parameter RCOND. +*> Infinity is illegal for ANORM, and it propagates to the output +*> parameter RCOND as 0. +*> = 1: if RCOND = NaN, or +*> RCOND = Inf, or +*> the computed norm of the inverse of A is 0. +*> In the latter, RCOND = 0 is returned. *> \endverbatim * * Authors: @@ -117,7 +124,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleGEcomputational +*> \ingroup gecon * * ===================================================================== SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK, @@ -147,7 +154,7 @@ LOGICAL ONENRM CHARACTER NORMIN INTEGER IX, KASE, KASE1 - DOUBLE PRECISION AINVNM, SCALE, SL, SMLNUM, SU + DOUBLE PRECISION AINVNM, SCALE, SL, SMLNUM, SU, HUGEVAL * .. * .. Local Arrays .. INTEGER ISAVE( 3 ) @@ -165,6 +172,8 @@ INTRINSIC ABS, MAX * .. * .. Executable Statements .. +* + HUGEVAL = DLAMCH( 'Overflow' ) * * Test the input parameters. * @@ -176,7 +185,7 @@ INFO = -2 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN INFO = -4 - ELSE IF( ANORM.LT.ZERO .OR. DISNAN( ANORM ) ) THEN + ELSE IF( ANORM.LT.ZERO ) THEN INFO = -5 END IF IF( INFO.NE.0 ) THEN @@ -192,6 +201,13 @@ RETURN ELSE IF( ANORM.EQ.ZERO ) THEN RETURN + ELSE IF( DISNAN( ANORM ) ) THEN + RCOND = ANORM + INFO = -5 + RETURN + ELSE IF( ANORM.GT.HUGEVAL ) THEN + INFO = -5 + RETURN END IF * SMLNUM = DLAMCH( 'Safe minimum' ) @@ -248,8 +264,17 @@ * * Compute the estimate of the reciprocal condition number. * - IF( AINVNM.NE.ZERO ) - $ RCOND = ( ONE / AINVNM ) / ANORM + IF( AINVNM.NE.ZERO ) THEN + RCOND = ( ONE / AINVNM ) / ANORM + ELSE + INFO = 1 + RETURN + END IF +* +* Check for NaNs and Infs +* + IF( DISNAN( RCOND ) .OR. RCOND.GT.HUGEVAL ) + $ INFO = 1 * 20 CONTINUE RETURN diff --git a/lapack-netlib/SRC/sgecon.f b/lapack-netlib/SRC/sgecon.f index 86aeea73b..82f463ebb 100644 --- a/lapack-netlib/SRC/sgecon.f +++ b/lapack-netlib/SRC/sgecon.f @@ -105,8 +105,15 @@ *> \verbatim *> INFO is INTEGER *> = 0: successful exit -*> < 0: if INFO = -i, the i-th argument had an illegal value -*> =-5: if ANORM is NAN or negative. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> NaNs are illegal values for ANORM, and they propagate to +*> the output parameter RCOND. +*> Infinity is illegal for ANORM, and it propagates to the output +*> parameter RCOND as 0. +*> = 1: if RCOND = NaN, or +*> RCOND = Inf, or +*> the computed norm of the inverse of A is 0. +*> In the latter, RCOND = 0 is returned. *> \endverbatim * * Authors: @@ -117,7 +124,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEcomputational +*> \ingroup gecon * * ===================================================================== SUBROUTINE SGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK, @@ -147,7 +154,7 @@ LOGICAL ONENRM CHARACTER NORMIN INTEGER IX, KASE, KASE1 - REAL AINVNM, SCALE, SL, SMLNUM, SU + REAL AINVNM, SCALE, SL, SMLNUM, SU, HUGEVAL * .. * .. Local Arrays .. INTEGER ISAVE( 3 ) @@ -165,6 +172,8 @@ INTRINSIC ABS, MAX * .. * .. Executable Statements .. +* + HUGEVAL = SLAMCH( 'Overflow' ) * * Test the input parameters. * @@ -176,7 +185,7 @@ INFO = -2 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN INFO = -4 - ELSE IF( ANORM.LT.ZERO .OR. SISNAN( ANORM ) ) THEN + ELSE IF( ANORM.LT.ZERO ) THEN INFO = -5 END IF IF( INFO.NE.0 ) THEN @@ -192,6 +201,13 @@ RETURN ELSE IF( ANORM.EQ.ZERO ) THEN RETURN + ELSE IF( SISNAN( ANORM ) ) THEN + RCOND = ANORM + INFO = -5 + RETURN + ELSE IF( ANORM.GT.HUGEVAL ) THEN + INFO = -5 + RETURN END IF * SMLNUM = SLAMCH( 'Safe minimum' ) @@ -248,8 +264,17 @@ * * Compute the estimate of the reciprocal condition number. * - IF( AINVNM.NE.ZERO ) - $ RCOND = ( ONE / AINVNM ) / ANORM + IF( AINVNM.NE.ZERO ) THEN + RCOND = ( ONE / AINVNM ) / ANORM + ELSE + INFO = 1 + RETURN + END IF +* +* Check for NaNs and Infs +* + IF( SISNAN( RCOND ) .OR. RCOND.GT.HUGEVAL ) + $ INFO = 1 * 20 CONTINUE RETURN diff --git a/lapack-netlib/SRC/zgecon.f b/lapack-netlib/SRC/zgecon.f index 9cbfe35bc..ef567d7c2 100644 --- a/lapack-netlib/SRC/zgecon.f +++ b/lapack-netlib/SRC/zgecon.f @@ -105,8 +105,15 @@ *> \verbatim *> INFO is INTEGER *> = 0: successful exit -*> < 0: if INFO = -i, the i-th argument had an illegal value -*> =-5: if ANORM is NAN or negative. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> NaNs are illegal values for ANORM, and they propagate to +*> the output parameter RCOND. +*> Infinity is illegal for ANORM, and it propagates to the output +*> parameter RCOND as 0. +*> = 1: if RCOND = NaN, or +*> RCOND = Inf, or +*> the computed norm of the inverse of A is 0. +*> In the latter, RCOND = 0 is returned. *> \endverbatim * * Authors: @@ -117,7 +124,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16GEcomputational +*> \ingroup gecon * * ===================================================================== SUBROUTINE ZGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, RWORK, @@ -147,7 +154,7 @@ LOGICAL ONENRM CHARACTER NORMIN INTEGER IX, KASE, KASE1 - DOUBLE PRECISION AINVNM, SCALE, SL, SMLNUM, SU + DOUBLE PRECISION AINVNM, SCALE, SL, SMLNUM, SU, HUGEVAL COMPLEX*16 ZDUM * .. * .. Local Arrays .. @@ -172,6 +179,8 @@ CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) * .. * .. Executable Statements .. +* + HUGEVAL = DLAMCH( 'Overflow' ) * * Test the input parameters. * @@ -183,7 +192,7 @@ INFO = -2 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN INFO = -4 - ELSE IF( ANORM.LT.ZERO .OR. DISNAN( ANORM ) ) THEN + ELSE IF( ANORM.LT.ZERO ) THEN INFO = -5 END IF IF( INFO.NE.0 ) THEN @@ -199,6 +208,13 @@ RETURN ELSE IF( ANORM.EQ.ZERO ) THEN RETURN + ELSE IF( DISNAN( ANORM ) ) THEN + RCOND = ANORM + INFO = -5 + RETURN + ELSE IF( ANORM.GT.HUGEVAL ) THEN + INFO = -5 + RETURN END IF * SMLNUM = DLAMCH( 'Safe minimum' ) @@ -256,8 +272,17 @@ * * Compute the estimate of the reciprocal condition number. * - IF( AINVNM.NE.ZERO ) - $ RCOND = ( ONE / AINVNM ) / ANORM + IF( AINVNM.NE.ZERO ) THEN + RCOND = ( ONE / AINVNM ) / ANORM + ELSE + INFO = 1 + RETURN + END IF +* +* Check for NaNs and Infs +* + IF( DISNAN( RCOND ) .OR. RCOND.GT.HUGEVAL ) + $ INFO = 1 * 20 CONTINUE RETURN From 58427ff74d25952c82d1d1d8edea1afaa94f9fcd Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 12 Nov 2023 10:54:39 +0100 Subject: [PATCH 116/125] Deprecate ?GELQS and ?GEQRS from TESTING/LIN (Reference-LAPACK PR 900) (#4307) * Move ?GELQS and ?GEQRS from TESTING/LIN to DEPRECATED (Reference-LAPACK PR 900) * Add f2c-converted versions of ?GELQS and ?GEQRS --- cmake/lapack.cmake | 8 + lapack-netlib/SRC/DEPRECATED/cgelqs.c | 479 +++++++++++++++++ .../{TESTING/LIN => SRC/DEPRECATED}/cgelqs.f | 0 lapack-netlib/SRC/DEPRECATED/cgeqrs.c | 471 +++++++++++++++++ .../{TESTING/LIN => SRC/DEPRECATED}/cgeqrs.f | 0 lapack-netlib/SRC/DEPRECATED/dgelqs.c | 480 +++++++++++++++++ .../{TESTING/LIN => SRC/DEPRECATED}/dgelqs.f | 0 lapack-netlib/SRC/DEPRECATED/dgeqrs.c | 471 +++++++++++++++++ .../{TESTING/LIN => SRC/DEPRECATED}/dgeqrs.f | 0 lapack-netlib/SRC/DEPRECATED/sgelqs.c | 472 +++++++++++++++++ .../{TESTING/LIN => SRC/DEPRECATED}/sgelqs.f | 0 lapack-netlib/SRC/DEPRECATED/sgeqrs.c | 470 +++++++++++++++++ .../{TESTING/LIN => SRC/DEPRECATED}/sgeqrs.f | 0 lapack-netlib/SRC/DEPRECATED/zgelqs.c | 481 ++++++++++++++++++ .../{TESTING/LIN => SRC/DEPRECATED}/zgelqs.f | 0 lapack-netlib/SRC/DEPRECATED/zgeqrs.c | 472 +++++++++++++++++ .../{TESTING/LIN => SRC/DEPRECATED}/zgeqrs.f | 0 lapack-netlib/SRC/Makefile | 12 +- lapack-netlib/TESTING/LIN/CMakeLists.txt | 8 +- lapack-netlib/TESTING/LIN/Makefile | 8 +- lapack-netlib/TESTING/LIN/cchklq.f | 20 +- lapack-netlib/TESTING/LIN/cchkqr.f | 22 +- lapack-netlib/TESTING/LIN/cerrlq.f | 27 +- lapack-netlib/TESTING/LIN/cerrqr.f | 27 +- lapack-netlib/TESTING/LIN/dchklq.f | 20 +- lapack-netlib/TESTING/LIN/dchkqr.f | 22 +- lapack-netlib/TESTING/LIN/derrlq.f | 27 +- lapack-netlib/TESTING/LIN/derrqr.f | 27 +- lapack-netlib/TESTING/LIN/schklq.f | 20 +- lapack-netlib/TESTING/LIN/schkqr.f | 20 +- lapack-netlib/TESTING/LIN/serrlq.f | 27 +- lapack-netlib/TESTING/LIN/serrqr.f | 27 +- lapack-netlib/TESTING/LIN/zchklq.f | 20 +- lapack-netlib/TESTING/LIN/zchkqr.f | 20 +- lapack-netlib/TESTING/LIN/zerrlq.f | 27 +- lapack-netlib/TESTING/LIN/zerrqr.f | 27 +- 36 files changed, 3934 insertions(+), 278 deletions(-) create mode 100644 lapack-netlib/SRC/DEPRECATED/cgelqs.c rename lapack-netlib/{TESTING/LIN => SRC/DEPRECATED}/cgelqs.f (100%) create mode 100644 lapack-netlib/SRC/DEPRECATED/cgeqrs.c rename lapack-netlib/{TESTING/LIN => SRC/DEPRECATED}/cgeqrs.f (100%) create mode 100644 lapack-netlib/SRC/DEPRECATED/dgelqs.c rename lapack-netlib/{TESTING/LIN => SRC/DEPRECATED}/dgelqs.f (100%) create mode 100644 lapack-netlib/SRC/DEPRECATED/dgeqrs.c rename lapack-netlib/{TESTING/LIN => SRC/DEPRECATED}/dgeqrs.f (100%) create mode 100644 lapack-netlib/SRC/DEPRECATED/sgelqs.c rename lapack-netlib/{TESTING/LIN => SRC/DEPRECATED}/sgelqs.f (100%) create mode 100644 lapack-netlib/SRC/DEPRECATED/sgeqrs.c rename lapack-netlib/{TESTING/LIN => SRC/DEPRECATED}/sgeqrs.f (100%) create mode 100644 lapack-netlib/SRC/DEPRECATED/zgelqs.c rename lapack-netlib/{TESTING/LIN => SRC/DEPRECATED}/zgelqs.f (100%) create mode 100644 lapack-netlib/SRC/DEPRECATED/zgeqrs.c rename lapack-netlib/{TESTING/LIN => SRC/DEPRECATED}/zgeqrs.f (100%) diff --git a/cmake/lapack.cmake b/cmake/lapack.cmake index 5c6290484..22476f561 100644 --- a/cmake/lapack.cmake +++ b/cmake/lapack.cmake @@ -438,15 +438,19 @@ endif() if(BUILD_LAPACK_DEPRECATED) list(APPEND SLASRC DEPRECATED/sgegs.f DEPRECATED/sgegv.f + DEPRECATED/sgelqs.f DEPRECATED/sgeqrs.f DEPRECATED/sgeqpf.f DEPRECATED/sgelsx.f DEPRECATED/sggsvd.f DEPRECATED/sggsvp.f DEPRECATED/slahrd.f DEPRECATED/slatzm.f DEPRECATED/stzrqf.f) list(APPEND DLASRC DEPRECATED/dgegs.f DEPRECATED/dgegv.f + DEPRECATED/dgelqs.f DEPRECATED/dgeqrs.f DEPRECATED/dgeqpf.f DEPRECATED/dgelsx.f DEPRECATED/dggsvd.f DEPRECATED/dggsvp.f DEPRECATED/dlahrd.f DEPRECATED/dlatzm.f DEPRECATED/dtzrqf.f) list(APPEND CLASRC DEPRECATED/cgegs.f DEPRECATED/cgegv.f + DEPRECATED/cgelqs.f DEPRECATED/cgeqrs.f DEPRECATED/cgeqpf.f DEPRECATED/cgelsx.f DEPRECATED/cggsvd.f DEPRECATED/cggsvp.f DEPRECATED/clahrd.f DEPRECATED/clatzm.f DEPRECATED/ctzrqf.f) list(APPEND ZLASRC DEPRECATED/zgegs.f DEPRECATED/zgegv.f + DEPRECATED/zgelqs.f DEPRECATED/zgeqrs.f DEPRECATED/zgeqpf.f DEPRECATED/zgelsx.f DEPRECATED/zggsvd.f DEPRECATED/zggsvp.f DEPRECATED/zlahrd.f DEPRECATED/zlatzm.f DEPRECATED/ztzrqf.f) message(STATUS "Building deprecated routines") @@ -935,15 +939,19 @@ endif() if(BUILD_LAPACK_DEPRECATED) list(APPEND SLASRC DEPRECATED/sgegs.c DEPRECATED/sgegv.c + DEPRECATED/sgelqs.c DEPRECATED/sgeqrs.c DEPRECATED/sgeqpf.c DEPRECATED/sgelsx.c DEPRECATED/sggsvd.c DEPRECATED/sggsvp.c DEPRECATED/slahrd.c DEPRECATED/slatzm.c DEPRECATED/stzrqf.c) list(APPEND DLASRC DEPRECATED/dgegs.c DEPRECATED/dgegv.c + DEPRECATED/dgelqs.c DEPRECATED/dgeqrs.c DEPRECATED/dgeqpf.c DEPRECATED/dgelsx.c DEPRECATED/dggsvd.c DEPRECATED/dggsvp.c DEPRECATED/dlahrd.c DEPRECATED/dlatzm.c DEPRECATED/dtzrqf.c) list(APPEND CLASRC DEPRECATED/cgegs.c DEPRECATED/cgegv.c + DEPRECATED/cgelqs.c DEPRECATED/cgeqrs.c DEPRECATED/cgeqpf.c DEPRECATED/cgelsx.c DEPRECATED/cggsvd.c DEPRECATED/cggsvp.c DEPRECATED/clahrd.c DEPRECATED/clatzm.c DEPRECATED/ctzrqf.c) list(APPEND ZLASRC DEPRECATED/zgegs.c DEPRECATED/zgegv.c + DEPRECATED/zgelqs.c DEPRECATED/zgeqrs.c DEPRECATED/zgeqpf.c DEPRECATED/zgelsx.c DEPRECATED/zggsvd.c DEPRECATED/zggsvp.c DEPRECATED/zlahrd.c DEPRECATED/zlatzm.c DEPRECATED/ztzrqf.c) message(STATUS "Building deprecated routines") diff --git a/lapack-netlib/SRC/DEPRECATED/cgelqs.c b/lapack-netlib/SRC/DEPRECATED/cgelqs.c new file mode 100644 index 000000000..ee6d56119 --- /dev/null +++ b/lapack-netlib/SRC/DEPRECATED/cgelqs.c @@ -0,0 +1,479 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) {ceil(w)} +#define myhuge_(w) {HUGE_VAL} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +/* -- translated by f2c (version 20000121). + You must link the resulting object file with the libraries: + -lf2c -lm (in that order) +*/ + + + +/* Table of constant values */ + +static complex c_b1 = {0.f,0.f}; +static complex c_b2 = {1.f,0.f}; + +/* > \brief \b CGELQS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGELQS( M, N, NRHS, A, LDA, TAU, B, LDB, WORK, LWORK, */ +/* INFO ) */ + +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS */ +/* COMPLEX A( LDA, * ), B( LDB, * ), TAU( * ), */ +/* $ WORK( LWORK ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Compute a minimum-norm solution */ +/* > f2cmin || A*X - B || */ +/* > using the LQ factorization */ +/* > A = L*Q */ +/* > computed by CGELQF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of columns of B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > Details of the LQ factorization of the original matrix A as */ +/* > returned by CGELQF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX array, dimension (M) */ +/* > Details of the orthogonal matrix Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,NRHS) */ +/* > On entry, the m-by-nrhs right hand side matrix B. */ +/* > On exit, the n-by-nrhs solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (LWORK) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK must be at least NRHS, */ +/* > and should be at least NRHS*NB, where NB is the block size */ +/* > for this environment. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex_lin */ + +/* ===================================================================== */ +/* Subroutine */ int cgelqs_(integer *m, integer *n, integer *nrhs, complex * + a, integer *lda, complex *tau, complex *b, integer *ldb, complex * + work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *), claset_(char *, + integer *, integer *, complex *, complex *, complex *, integer *), xerbla_(char *, integer *), cunmlq_(char *, char + *, integer *, integer *, integer *, complex *, integer *, complex + *, complex *, integer *, complex *, integer *, integer *); + + +/* -- LAPACK test routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *m > *n) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*m)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -8; + } else if (*lwork < 1 || *lwork < *nrhs && *m > 0 && *n > 0) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGELQS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0 || *m == 0) { + return 0; + } + +/* Solve L*X = B(1:m,:) */ + + ctrsm_("Left", "Lower", "No transpose", "Non-unit", m, nrhs, &c_b2, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Set B(m+1:n,:) to zero */ + + if (*m < *n) { + i__1 = *n - *m; + claset_("Full", &i__1, nrhs, &c_b1, &c_b1, &b[*m + 1 + b_dim1], ldb); + } + +/* B := Q' * B */ + + cunmlq_("Left", "Conjugate transpose", n, nrhs, m, &a[a_offset], lda, & + tau[1], &b[b_offset], ldb, &work[1], lwork, info); + + return 0; + +/* End of CGELQS */ + +} /* cgelqs_ */ + diff --git a/lapack-netlib/TESTING/LIN/cgelqs.f b/lapack-netlib/SRC/DEPRECATED/cgelqs.f similarity index 100% rename from lapack-netlib/TESTING/LIN/cgelqs.f rename to lapack-netlib/SRC/DEPRECATED/cgelqs.f diff --git a/lapack-netlib/SRC/DEPRECATED/cgeqrs.c b/lapack-netlib/SRC/DEPRECATED/cgeqrs.c new file mode 100644 index 000000000..c71b8af67 --- /dev/null +++ b/lapack-netlib/SRC/DEPRECATED/cgeqrs.c @@ -0,0 +1,471 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) {ceil(w)} +#define myhuge_(w) {HUGE_VAL} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +/* -- translated by f2c (version 20000121). + You must link the resulting object file with the libraries: + -lf2c -lm (in that order) +*/ + + + +/* Table of constant values */ + +static complex c_b1 = {1.f,0.f}; + +/* > \brief \b CGEQRS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGEQRS( M, N, NRHS, A, LDA, TAU, B, LDB, WORK, LWORK, */ +/* INFO ) */ + +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS */ +/* COMPLEX A( LDA, * ), B( LDB, * ), TAU( * ), */ +/* $ WORK( LWORK ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Solve the least squares problem */ +/* > f2cmin || A*X - B || */ +/* > using the QR factorization */ +/* > A = Q*R */ +/* > computed by CGEQRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of columns of B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > Details of the QR factorization of the original matrix A as */ +/* > returned by CGEQRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX array, dimension (N) */ +/* > Details of the orthogonal matrix Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,NRHS) */ +/* > On entry, the m-by-nrhs right hand side matrix B. */ +/* > On exit, the n-by-nrhs solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (LWORK) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK must be at least NRHS, */ +/* > and should be at least NRHS*NB, where NB is the block size */ +/* > for this environment. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex_lin */ + +/* ===================================================================== */ +/* Subroutine */ int cgeqrs_(integer *m, integer *n, integer *nrhs, complex * + a, integer *lda, complex *tau, complex *b, integer *ldb, complex * + work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *), xerbla_(char *, + integer *), cunmqr_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *); + + +/* -- LAPACK test routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + +/* Test the input arguments. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*m)) { + *info = -5; + } else if (*ldb < f2cmax(1,*m)) { + *info = -8; + } else if (*lwork < 1 || *lwork < *nrhs && *m > 0 && *n > 0) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGEQRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0 || *m == 0) { + return 0; + } + +/* B := Q' * B */ + + cunmqr_("Left", "Conjugate transpose", m, nrhs, n, &a[a_offset], lda, & + tau[1], &b[b_offset], ldb, &work[1], lwork, info); + +/* Solve R*X = B(1:n,:) */ + + ctrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b1, &a[ + a_offset], lda, &b[b_offset], ldb); + + return 0; + +/* End of CGEQRS */ + +} /* cgeqrs_ */ + diff --git a/lapack-netlib/TESTING/LIN/cgeqrs.f b/lapack-netlib/SRC/DEPRECATED/cgeqrs.f similarity index 100% rename from lapack-netlib/TESTING/LIN/cgeqrs.f rename to lapack-netlib/SRC/DEPRECATED/cgeqrs.f diff --git a/lapack-netlib/SRC/DEPRECATED/dgelqs.c b/lapack-netlib/SRC/DEPRECATED/dgelqs.c new file mode 100644 index 000000000..e3cf1e029 --- /dev/null +++ b/lapack-netlib/SRC/DEPRECATED/dgelqs.c @@ -0,0 +1,480 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) {ceil(w)} +#define myhuge_(w) {HUGE_VAL} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +/* -- translated by f2c (version 20000121). + You must link the resulting object file with the libraries: + -lf2c -lm (in that order) +*/ + + + +/* Table of constant values */ + +static doublereal c_b7 = 1.; +static doublereal c_b9 = 0.; + +/* > \brief \b DGELQS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGELQS( M, N, NRHS, A, LDA, TAU, B, LDB, WORK, LWORK, */ +/* INFO ) */ + +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS */ +/* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), TAU( * ), */ +/* $ WORK( LWORK ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Compute a minimum-norm solution */ +/* > f2cmin || A*X - B || */ +/* > using the LQ factorization */ +/* > A = L*Q */ +/* > computed by DGELQF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of columns of B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > Details of the LQ factorization of the original matrix A as */ +/* > returned by DGELQF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (M) */ +/* > Details of the orthogonal matrix Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > On entry, the m-by-nrhs right hand side matrix B. */ +/* > On exit, the n-by-nrhs solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (LWORK) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK must be at least NRHS, */ +/* > and should be at least NRHS*NB, where NB is the block size */ +/* > for this environment. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup double_lin */ + +/* ===================================================================== */ +/* Subroutine */ int dgelqs_(integer *m, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *tau, doublereal *b, integer * + ldb, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), dlaset_( + char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *), xerbla_(char *, integer *), dormlq_(char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *); + + +/* -- LAPACK test routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *m > *n) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*m)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -8; + } else if (*lwork < 1 || *lwork < *nrhs && *m > 0 && *n > 0) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGELQS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0 || *m == 0) { + return 0; + } + +/* Solve L*X = B(1:m,:) */ + + dtrsm_("Left", "Lower", "No transpose", "Non-unit", m, nrhs, &c_b7, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Set B(m+1:n,:) to zero */ + + if (*m < *n) { + i__1 = *n - *m; + dlaset_("Full", &i__1, nrhs, &c_b9, &c_b9, &b[*m + 1 + b_dim1], ldb); + } + +/* B := Q' * B */ + + dormlq_("Left", "Transpose", n, nrhs, m, &a[a_offset], lda, &tau[1], &b[ + b_offset], ldb, &work[1], lwork, info); + + return 0; + +/* End of DGELQS */ + +} /* dgelqs_ */ + diff --git a/lapack-netlib/TESTING/LIN/dgelqs.f b/lapack-netlib/SRC/DEPRECATED/dgelqs.f similarity index 100% rename from lapack-netlib/TESTING/LIN/dgelqs.f rename to lapack-netlib/SRC/DEPRECATED/dgelqs.f diff --git a/lapack-netlib/SRC/DEPRECATED/dgeqrs.c b/lapack-netlib/SRC/DEPRECATED/dgeqrs.c new file mode 100644 index 000000000..70236738a --- /dev/null +++ b/lapack-netlib/SRC/DEPRECATED/dgeqrs.c @@ -0,0 +1,471 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) {ceil(w)} +#define myhuge_(w) {HUGE_VAL} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +/* -- translated by f2c (version 20000121). + You must link the resulting object file with the libraries: + -lf2c -lm (in that order) +*/ + + + +/* Table of constant values */ + +static doublereal c_b9 = 1.; + +/* > \brief \b DGEQRS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEQRS( M, N, NRHS, A, LDA, TAU, B, LDB, WORK, LWORK, */ +/* INFO ) */ + +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS */ +/* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), TAU( * ), */ +/* $ WORK( LWORK ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Solve the least squares problem */ +/* > f2cmin || A*X - B || */ +/* > using the QR factorization */ +/* > A = Q*R */ +/* > computed by DGEQRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of columns of B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > Details of the QR factorization of the original matrix A as */ +/* > returned by DGEQRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (N) */ +/* > Details of the orthogonal matrix Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > On entry, the m-by-nrhs right hand side matrix B. */ +/* > On exit, the n-by-nrhs solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (LWORK) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK must be at least NRHS, */ +/* > and should be at least NRHS*NB, where NB is the block size */ +/* > for this environment. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup double_lin */ + +/* ===================================================================== */ +/* Subroutine */ int dgeqrs_(integer *m, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *tau, doublereal *b, integer * + ldb, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), xerbla_( + char *, integer *), dormqr_(char *, char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *); + + +/* -- LAPACK test routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + +/* Test the input arguments. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*m)) { + *info = -5; + } else if (*ldb < f2cmax(1,*m)) { + *info = -8; + } else if (*lwork < 1 || *lwork < *nrhs && *m > 0 && *n > 0) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEQRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0 || *m == 0) { + return 0; + } + +/* B := Q' * B */ + + dormqr_("Left", "Transpose", m, nrhs, n, &a[a_offset], lda, &tau[1], &b[ + b_offset], ldb, &work[1], lwork, info); + +/* Solve R*X = B(1:n,:) */ + + dtrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b9, &a[ + a_offset], lda, &b[b_offset], ldb); + + return 0; + +/* End of DGEQRS */ + +} /* dgeqrs_ */ + diff --git a/lapack-netlib/TESTING/LIN/dgeqrs.f b/lapack-netlib/SRC/DEPRECATED/dgeqrs.f similarity index 100% rename from lapack-netlib/TESTING/LIN/dgeqrs.f rename to lapack-netlib/SRC/DEPRECATED/dgeqrs.f diff --git a/lapack-netlib/SRC/DEPRECATED/sgelqs.c b/lapack-netlib/SRC/DEPRECATED/sgelqs.c new file mode 100644 index 000000000..03034b0dc --- /dev/null +++ b/lapack-netlib/SRC/DEPRECATED/sgelqs.c @@ -0,0 +1,472 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) {ceil(w)} +#define myhuge_(w) {HUGE_VAL} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +/* Table of constant values */ + +static real c_b7 = 1.f; +static real c_b9 = 0.f; + +/* > \brief \b SGELQS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGELQS( M, N, NRHS, A, LDA, TAU, B, LDB, WORK, LWORK, */ +/* INFO ) */ + +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS */ +/* REAL A( LDA, * ), B( LDB, * ), TAU( * ), */ +/* $ WORK( LWORK ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Compute a minimum-norm solution */ +/* > f2cmin || A*X - B || */ +/* > using the LQ factorization */ +/* > A = L*Q */ +/* > computed by SGELQF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of columns of B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > Details of the LQ factorization of the original matrix A as */ +/* > returned by SGELQF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is REAL array, dimension (M) */ +/* > Details of the orthogonal matrix Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,NRHS) */ +/* > On entry, the m-by-nrhs right hand side matrix B. */ +/* > On exit, the n-by-nrhs solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (LWORK) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK must be at least NRHS, */ +/* > and should be at least NRHS*NB, where NB is the block size */ +/* > for this environment. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup single_lin */ + +/* ===================================================================== */ +/* Subroutine */ int sgelqs_(integer *m, integer *n, integer *nrhs, real *a, + integer *lda, real *tau, real *b, integer *ldb, real *work, integer * + lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int strsm_(char *, char *, char *, char *, + integer *, integer *, real *, real *, integer *, real *, integer * + ), xerbla_(char *, integer *), slaset_(char *, integer *, integer *, real *, real *, + real *, integer *), sormlq_(char *, char *, integer *, + integer *, integer *, real *, integer *, real *, real *, integer * + , real *, integer *, integer *); + + +/* -- LAPACK test routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *m > *n) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*m)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -8; + } else if (*lwork < 1 || *lwork < *nrhs && *m > 0 && *n > 0) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGELQS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0 || *m == 0) { + return 0; + } + +/* Solve L*X = B(1:m,:) */ + + strsm_("Left", "Lower", "No transpose", "Non-unit", m, nrhs, &c_b7, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Set B(m+1:n,:) to zero */ + + if (*m < *n) { + i__1 = *n - *m; + slaset_("Full", &i__1, nrhs, &c_b9, &c_b9, &b[*m + 1 + b_dim1], ldb); + } + +/* B := Q' * B */ + + sormlq_("Left", "Transpose", n, nrhs, m, &a[a_offset], lda, &tau[1], &b[ + b_offset], ldb, &work[1], lwork, info); + + return 0; + +/* End of SGELQS */ + +} /* sgelqs_ */ + diff --git a/lapack-netlib/TESTING/LIN/sgelqs.f b/lapack-netlib/SRC/DEPRECATED/sgelqs.f similarity index 100% rename from lapack-netlib/TESTING/LIN/sgelqs.f rename to lapack-netlib/SRC/DEPRECATED/sgelqs.f diff --git a/lapack-netlib/SRC/DEPRECATED/sgeqrs.c b/lapack-netlib/SRC/DEPRECATED/sgeqrs.c new file mode 100644 index 000000000..b593d0dc9 --- /dev/null +++ b/lapack-netlib/SRC/DEPRECATED/sgeqrs.c @@ -0,0 +1,470 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) {ceil(w)} +#define myhuge_(w) {HUGE_VAL} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +/* -- translated by f2c (version 20000121). + You must link the resulting object file with the libraries: + -lf2c -lm (in that order) +*/ + + + +/* Table of constant values */ + +static real c_b9 = 1.f; + +/* > \brief \b SGEQRS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEQRS( M, N, NRHS, A, LDA, TAU, B, LDB, WORK, LWORK, */ +/* INFO ) */ + +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS */ +/* REAL A( LDA, * ), B( LDB, * ), TAU( * ), */ +/* $ WORK( LWORK ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Solve the least squares problem */ +/* > f2cmin || A*X - B || */ +/* > using the QR factorization */ +/* > A = Q*R */ +/* > computed by SGEQRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of columns of B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > Details of the QR factorization of the original matrix A as */ +/* > returned by SGEQRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is REAL array, dimension (N) */ +/* > Details of the orthogonal matrix Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,NRHS) */ +/* > On entry, the m-by-nrhs right hand side matrix B. */ +/* > On exit, the n-by-nrhs solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (LWORK) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK must be at least NRHS, */ +/* > and should be at least NRHS*NB, where NB is the block size */ +/* > for this environment. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup single_lin */ + +/* ===================================================================== */ +/* Subroutine */ int sgeqrs_(integer *m, integer *n, integer *nrhs, real *a, + integer *lda, real *tau, real *b, integer *ldb, real *work, integer * + lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int strsm_(char *, char *, char *, char *, + integer *, integer *, real *, real *, integer *, real *, integer * + ), xerbla_(char *, integer *), sormqr_(char *, char *, integer *, integer *, integer *, + real *, integer *, real *, real *, integer *, real *, integer *, + integer *); + + +/* -- LAPACK test routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + +/* Test the input arguments. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*m)) { + *info = -5; + } else if (*ldb < f2cmax(1,*m)) { + *info = -8; + } else if (*lwork < 1 || *lwork < *nrhs && *m > 0 && *n > 0) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEQRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0 || *m == 0) { + return 0; + } + +/* B := Q' * B */ + + sormqr_("Left", "Transpose", m, nrhs, n, &a[a_offset], lda, &tau[1], &b[ + b_offset], ldb, &work[1], lwork, info); + +/* Solve R*X = B(1:n,:) */ + + strsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b9, &a[ + a_offset], lda, &b[b_offset], ldb); + + return 0; + +/* End of SGEQRS */ + +} /* sgeqrs_ */ + diff --git a/lapack-netlib/TESTING/LIN/sgeqrs.f b/lapack-netlib/SRC/DEPRECATED/sgeqrs.f similarity index 100% rename from lapack-netlib/TESTING/LIN/sgeqrs.f rename to lapack-netlib/SRC/DEPRECATED/sgeqrs.f diff --git a/lapack-netlib/SRC/DEPRECATED/zgelqs.c b/lapack-netlib/SRC/DEPRECATED/zgelqs.c new file mode 100644 index 000000000..b77ba906a --- /dev/null +++ b/lapack-netlib/SRC/DEPRECATED/zgelqs.c @@ -0,0 +1,481 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) {ceil(w)} +#define myhuge_(w) {HUGE_VAL} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +/* -- translated by f2c (version 20000121). + You must link the resulting object file with the libraries: + -lf2c -lm (in that order) +*/ + + + +/* Table of constant values */ + +static doublecomplex c_b1 = {0.,0.}; +static doublecomplex c_b2 = {1.,0.}; + +/* > \brief \b ZGELQS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGELQS( M, N, NRHS, A, LDA, TAU, B, LDB, WORK, LWORK, */ +/* INFO ) */ + +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ), TAU( * ), */ +/* $ WORK( LWORK ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Compute a minimum-norm solution */ +/* > f2cmin || A*X - B || */ +/* > using the LQ factorization */ +/* > A = L*Q */ +/* > computed by ZGELQF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of columns of B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > Details of the LQ factorization of the original matrix A as */ +/* > returned by ZGELQF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 array, dimension (M) */ +/* > Details of the orthogonal matrix Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,NRHS) */ +/* > On entry, the m-by-nrhs right hand side matrix B. */ +/* > On exit, the n-by-nrhs solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (LWORK) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK must be at least NRHS, */ +/* > and should be at least NRHS*NB, where NB is the block size */ +/* > for this environment. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16_lin */ + +/* ===================================================================== */ +/* Subroutine */ int zgelqs_(integer *m, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *b, + integer *ldb, doublecomplex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int ztrsm_(char *, char *, char *, char *, + integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *), + xerbla_(char *, integer *), zlaset_(char *, integer *, + integer *, doublecomplex *, doublecomplex *, doublecomplex *, + integer *), zunmlq_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + + +/* -- LAPACK test routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *m > *n) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*m)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -8; + } else if (*lwork < 1 || *lwork < *nrhs && *m > 0 && *n > 0) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGELQS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0 || *m == 0) { + return 0; + } + +/* Solve L*X = B(1:m,:) */ + + ztrsm_("Left", "Lower", "No transpose", "Non-unit", m, nrhs, &c_b2, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Set B(m+1:n,:) to zero */ + + if (*m < *n) { + i__1 = *n - *m; + zlaset_("Full", &i__1, nrhs, &c_b1, &c_b1, &b[*m + 1 + b_dim1], ldb); + } + +/* B := Q' * B */ + + zunmlq_("Left", "Conjugate transpose", n, nrhs, m, &a[a_offset], lda, & + tau[1], &b[b_offset], ldb, &work[1], lwork, info); + + return 0; + +/* End of ZGELQS */ + +} /* zgelqs_ */ + diff --git a/lapack-netlib/TESTING/LIN/zgelqs.f b/lapack-netlib/SRC/DEPRECATED/zgelqs.f similarity index 100% rename from lapack-netlib/TESTING/LIN/zgelqs.f rename to lapack-netlib/SRC/DEPRECATED/zgelqs.f diff --git a/lapack-netlib/SRC/DEPRECATED/zgeqrs.c b/lapack-netlib/SRC/DEPRECATED/zgeqrs.c new file mode 100644 index 000000000..3e8f3cce7 --- /dev/null +++ b/lapack-netlib/SRC/DEPRECATED/zgeqrs.c @@ -0,0 +1,472 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) {ceil(w)} +#define myhuge_(w) {HUGE_VAL} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +/* -- translated by f2c (version 20000121). + You must link the resulting object file with the libraries: + -lf2c -lm (in that order) +*/ + + + +/* Table of constant values */ + +static doublecomplex c_b1 = {1.,0.}; + +/* > \brief \b ZGEQRS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGEQRS( M, N, NRHS, A, LDA, TAU, B, LDB, WORK, LWORK, */ +/* INFO ) */ + +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ), TAU( * ), */ +/* $ WORK( LWORK ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Solve the least squares problem */ +/* > f2cmin || A*X - B || */ +/* > using the QR factorization */ +/* > A = Q*R */ +/* > computed by ZGEQRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of columns of B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > Details of the QR factorization of the original matrix A as */ +/* > returned by ZGEQRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 array, dimension (N) */ +/* > Details of the orthogonal matrix Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,NRHS) */ +/* > On entry, the m-by-nrhs right hand side matrix B. */ +/* > On exit, the n-by-nrhs solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (LWORK) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK must be at least NRHS, */ +/* > and should be at least NRHS*NB, where NB is the block size */ +/* > for this environment. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16_lin */ + +/* ===================================================================== */ +/* Subroutine */ int zgeqrs_(integer *m, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *b, + integer *ldb, doublecomplex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int ztrsm_(char *, char *, char *, char *, + integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *), + xerbla_(char *, integer *), zunmqr_(char *, char *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, integer *); + + +/* -- LAPACK test routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + +/* Test the input arguments. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*m)) { + *info = -5; + } else if (*ldb < f2cmax(1,*m)) { + *info = -8; + } else if (*lwork < 1 || *lwork < *nrhs && *m > 0 && *n > 0) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGEQRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0 || *m == 0) { + return 0; + } + +/* B := Q' * B */ + + zunmqr_("Left", "Conjugate transpose", m, nrhs, n, &a[a_offset], lda, & + tau[1], &b[b_offset], ldb, &work[1], lwork, info); + +/* Solve R*X = B(1:n,:) */ + + ztrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b1, &a[ + a_offset], lda, &b[b_offset], ldb); + + return 0; + +/* End of ZGEQRS */ + +} /* zgeqrs_ */ + diff --git a/lapack-netlib/TESTING/LIN/zgeqrs.f b/lapack-netlib/SRC/DEPRECATED/zgeqrs.f similarity index 100% rename from lapack-netlib/TESTING/LIN/zgeqrs.f rename to lapack-netlib/SRC/DEPRECATED/zgeqrs.f diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile index 26314c4df..448fbd8df 100644 --- a/lapack-netlib/SRC/Makefile +++ b/lapack-netlib/SRC/Makefile @@ -544,26 +544,30 @@ endif ifeq ($(BUILD_COMPLEX),1) CDEPRECSRC = DEPRECATED/cgegs.o DEPRECATED/cgegv.o DEPRECATED/cgelsx.o \ DEPRECATED/cgeqpf.o DEPRECATED/cggsvd.o DEPRECATED/cggsvp.o \ - DEPRECATED/clahrd.o DEPRECATED/clatzm.o DEPRECATED/ctzrqf.o + DEPRECATED/clahrd.o DEPRECATED/clatzm.o DEPRECATED/ctzrqf.o \ + DEPRECATED/cgelqs.o DEPRECATED/cgeqrs.o endif ifeq ($(BUILD_DOUBLE),1) DDEPRECSRC = \ DEPRECATED/dgegs.o DEPRECATED/dgegv.o DEPRECATED/dgelsx.o \ DEPRECATED/dgeqpf.o DEPRECATED/dggsvd.o DEPRECATED/dggsvp.o \ - DEPRECATED/dlahrd.o DEPRECATED/dlatzm.o DEPRECATED/dtzrqf.o + DEPRECATED/dlahrd.o DEPRECATED/dlatzm.o DEPRECATED/dtzrqf.o \ + DEPRECATED/dgelqs.o DEPRECATED/dgeqrs.o endif ifeq ($(BUILD_SINGLE),1) SDEPRECSRC = \ DEPRECATED/sgegs.o DEPRECATED/sgegv.o DEPRECATED/sgelsx.o \ DEPRECATED/sgeqpf.o DEPRECATED/sggsvd.o DEPRECATED/sggsvp.o \ - DEPRECATED/slahrd.o DEPRECATED/slatzm.o DEPRECATED/stzrqf.o + DEPRECATED/slahrd.o DEPRECATED/slatzm.o DEPRECATED/stzrqf.o \ + DEPRECATED/sgelqs.o DEPRECATED/sgeqrs.o endif ifeq ($(BUILD_COMPLEX16),1) ZDEPRECSRC = \ DEPRECATED/zgegs.o DEPRECATED/zgegv.o DEPRECATED/zgelsx.o \ DEPRECATED/zgeqpf.o DEPRECATED/zggsvd.o DEPRECATED/zggsvp.o \ - DEPRECATED/zlahrd.o DEPRECATED/zlatzm.o DEPRECATED/ztzrqf.o + DEPRECATED/zlahrd.o DEPRECATED/zlatzm.o DEPRECATED/ztzrqf.o \ + DEPRECATED/zgelqs.o DEPRECATED/zgeqrs.o endif # filter out optimized codes from OpenBLAS diff --git a/lapack-netlib/TESTING/LIN/CMakeLists.txt b/lapack-netlib/TESTING/LIN/CMakeLists.txt index fc55b8a96..676857a80 100644 --- a/lapack-netlib/TESTING/LIN/CMakeLists.txt +++ b/lapack-netlib/TESTING/LIN/CMakeLists.txt @@ -20,7 +20,7 @@ set(SLINTST schkaa.F serrgt.f serrlq.f serrls.f serrps.f serrql.f serrqp.f serrqr.f serrrq.f serrtr.f serrtz.f - sgbt01.f sgbt02.f sgbt05.f sgelqs.f sgeqls.f sgeqrs.f + sgbt01.f sgbt02.f sgbt05.f sgeqls.f sgerqs.f sget01.f sget02.f sget03.f sget04.f sget06.f sget07.f sgtt01.f sgtt02.f sgtt05.f slaptm.f slarhs.f slatb4.f slatb5.f slattb.f slattp.f @@ -70,7 +70,7 @@ set(CLINTST cchkaa.F cerrgt.f cerrlq.f cerrls.f cerrps.f cerrql.f cerrqp.f cerrqr.f cerrrq.f cerrtr.f cerrtz.f - cgbt01.f cgbt02.f cgbt05.f cgelqs.f cgeqls.f cgeqrs.f + cgbt01.f cgbt02.f cgbt05.f cgeqls.f cgerqs.f cget01.f cget02.f cget03.f cget04.f cget07.f cgtt01.f cgtt02.f cgtt05.f chet01.f chet01_rook.f chet01_3.f @@ -121,7 +121,7 @@ set(DLINTST dchkaa.F derrgt.f derrlq.f derrls.f derrps.f derrql.f derrqp.f derrqr.f derrrq.f derrtr.f derrtz.f - dgbt01.f dgbt02.f dgbt05.f dgelqs.f dgeqls.f dgeqrs.f + dgbt01.f dgbt02.f dgbt05.f dgeqls.f dgerqs.f dget01.f dget02.f dget03.f dget04.f dget06.f dget07.f dgtt01.f dgtt02.f dgtt05.f dlaptm.f dlarhs.f dlatb4.f dlatb5.f dlattb.f dlattp.f @@ -172,7 +172,7 @@ set(ZLINTST zchkaa.F zerrgt.f zerrlq.f zerrls.f zerrps.f zerrql.f zerrqp.f zerrqr.f zerrrq.f zerrtr.f zerrtz.f - zgbt01.f zgbt02.f zgbt05.f zgelqs.f zgeqls.f zgeqrs.f + zgbt01.f zgbt02.f zgbt05.f zgeqls.f zgerqs.f zget01.f zget02.f zget03.f zget04.f zget07.f zgtt01.f zgtt02.f zgtt05.f zhet01.f zhet01_rook.f zhet01_3.f diff --git a/lapack-netlib/TESTING/LIN/Makefile b/lapack-netlib/TESTING/LIN/Makefile index 54b26455e..64abc4dba 100644 --- a/lapack-netlib/TESTING/LIN/Makefile +++ b/lapack-netlib/TESTING/LIN/Makefile @@ -55,7 +55,7 @@ SLINTST = schkaa.o \ serrgt.o serrlq.o serrls.o \ serrps.o serrql.o serrqp.o serrqr.o \ serrrq.o serrtr.o serrtz.o \ - sgbt01.o sgbt02.o sgbt05.o sgelqs.o sgeqls.o sgeqrs.o \ + sgbt01.o sgbt02.o sgbt05.o sgeqls.o \ sgerqs.o sget01.o sget02.o \ sget03.o sget04.o sget06.o sget07.o sgtt01.o sgtt02.o \ sgtt05.o slaptm.o slarhs.o slatb4.o slatb5.o slattb.o slattp.o \ @@ -100,7 +100,7 @@ CLINTST = cchkaa.o \ cerrgt.o cerrlq.o \ cerrls.o cerrps.o cerrql.o cerrqp.o \ cerrqr.o cerrrq.o cerrtr.o cerrtz.o \ - cgbt01.o cgbt02.o cgbt05.o cgelqs.o cgeqls.o cgeqrs.o \ + cgbt01.o cgbt02.o cgbt05.o cgeqls.o \ cgerqs.o cget01.o cget02.o \ cget03.o cget04.o cget07.o cgtt01.o cgtt02.o \ cgtt05.o chet01.o chet01_rook.o chet01_3.o chet01_aa.o \ @@ -147,7 +147,7 @@ DLINTST = dchkaa.o \ derrgt.o derrlq.o derrls.o \ derrps.o derrql.o derrqp.o derrqr.o \ derrrq.o derrtr.o derrtz.o \ - dgbt01.o dgbt02.o dgbt05.o dgelqs.o dgeqls.o dgeqrs.o \ + dgbt01.o dgbt02.o dgbt05.o dgeqls.o \ dgerqs.o dget01.o dget02.o \ dget03.o dget04.o dget06.o dget07.o dgtt01.o dgtt02.o \ dgtt05.o dlaptm.o dlarhs.o dlatb4.o dlatb5.o dlattb.o dlattp.o \ @@ -192,7 +192,7 @@ ZLINTST = zchkaa.o \ zerrgt.o zerrlq.o \ zerrls.o zerrps.o zerrql.o zerrqp.o \ zerrqr.o zerrrq.o zerrtr.o zerrtz.o \ - zgbt01.o zgbt02.o zgbt05.o zgelqs.o zgeqls.o zgeqrs.o \ + zgbt01.o zgbt02.o zgbt05.o zgeqls.o \ zgerqs.o zget01.o zget02.o \ zget03.o zget04.o zget07.o zgtt01.o zgtt02.o \ zgtt05.o zhet01.o zhet01_rook.o zhet01_3.o zhet01_aa.o \ diff --git a/lapack-netlib/TESTING/LIN/cchklq.f b/lapack-netlib/TESTING/LIN/cchklq.f index 54107d047..4499de36f 100644 --- a/lapack-netlib/TESTING/LIN/cchklq.f +++ b/lapack-netlib/TESTING/LIN/cchklq.f @@ -235,7 +235,7 @@ REAL RESULT( NTESTS ) * .. * .. External Subroutines .. - EXTERNAL ALAERH, ALAHD, ALASUM, CERRLQ, CGELQS, CGET02, + EXTERNAL ALAERH, ALAHD, ALASUM, CERRLQ, CGELS, CGET02, $ CLACPY, CLARHS, CLATB4, CLATMS, CLQT01, CLQT02, $ CLQT03, XLAENV * .. @@ -370,7 +370,7 @@ $ WORK, LWORK, RWORK, RESULT( 3 ) ) NT = NT + 4 * -* If M>=N and K=N, call CGELQS to solve a system +* If M<=N and K=M, call CGELS to solve a system * with NRHS right hand sides and compute the * residual. * @@ -387,14 +387,20 @@ * CALL CLACPY( 'Full', M, NRHS, B, LDA, X, $ LDA ) - SRNAMT = 'CGELQS' - CALL CGELQS( M, N, NRHS, AF, LDA, TAU, X, - $ LDA, WORK, LWORK, INFO ) * -* Check error code from CGELQS. +* Reset AF to the original matrix. CGELS +* factors the matrix before solving the system. +* + CALL CLACPY( 'Full', M, N, A, LDA, AF, LDA ) +* + SRNAMT = 'CGELS' + CALL CGELS( 'No transpose', M, N, NRHS, AF, + $ LDA, X, LDA, WORK, LWORK, INFO ) +* +* Check error code from CGELS. * IF( INFO.NE.0 ) - $ CALL ALAERH( PATH, 'CGELQS', INFO, 0, ' ', + $ CALL ALAERH( PATH, 'CGELS', INFO, 0, 'N', $ M, N, NRHS, -1, NB, IMAT, $ NFAIL, NERRS, NOUT ) * diff --git a/lapack-netlib/TESTING/LIN/cchkqr.f b/lapack-netlib/TESTING/LIN/cchkqr.f index 7ea178eaf..4fa7413f9 100644 --- a/lapack-netlib/TESTING/LIN/cchkqr.f +++ b/lapack-netlib/TESTING/LIN/cchkqr.f @@ -244,7 +244,7 @@ EXTERNAL CGENND * .. * .. External Subroutines .. - EXTERNAL ALAERH, ALAHD, ALASUM, CERRQR, CGEQRS, CGET02, + EXTERNAL ALAERH, ALAHD, ALASUM, CERRQR, CGELS, CGET02, $ CLACPY, CLARHS, CLATB4, CLATMS, CQRT01, $ CQRT01P, CQRT02, CQRT03, XLAENV * .. @@ -371,7 +371,7 @@ IF( .NOT. CGENND( M, N, AF, LDA ) ) $ RESULT( 9 ) = 2*THRESH NT = NT + 1 - ELSE IF( M.GE.N ) THEN + ELSE IF( M.GE.N ) THEN * * Test CUNGQR, using factorization * returned by CQRT01 @@ -388,7 +388,7 @@ $ WORK, LWORK, RWORK, RESULT( 3 ) ) NT = NT + 4 * -* If M>=N and K=N, call CGEQRS to solve a system +* If M>=N and K=N, call CGELS to solve a system * with NRHS right hand sides and compute the * residual. * @@ -405,14 +405,20 @@ * CALL CLACPY( 'Full', M, NRHS, B, LDA, X, $ LDA ) - SRNAMT = 'CGEQRS' - CALL CGEQRS( M, N, NRHS, AF, LDA, TAU, X, - $ LDA, WORK, LWORK, INFO ) * -* Check error code from CGEQRS. +* Reset AF to the original matrix. CGELS +* factors the matrix before solving the system. +* + CALL CLACPY( 'Full', M, N, A, LDA, AF, LDA ) +* + SRNAMT = 'CGELS' + CALL CGELS( 'No transpose', M, N, NRHS, AF, + $ LDA, X, LDA, WORK, LWORK, INFO ) +* +* Check error code from CGELS. * IF( INFO.NE.0 ) - $ CALL ALAERH( PATH, 'CGEQRS', INFO, 0, ' ', + $ CALL ALAERH( PATH, 'CGELS', INFO, 0, 'N', $ M, N, NRHS, -1, NB, IMAT, $ NFAIL, NERRS, NOUT ) * diff --git a/lapack-netlib/TESTING/LIN/cerrlq.f b/lapack-netlib/TESTING/LIN/cerrlq.f index 1036835b4..495adac0d 100644 --- a/lapack-netlib/TESTING/LIN/cerrlq.f +++ b/lapack-netlib/TESTING/LIN/cerrlq.f @@ -76,7 +76,7 @@ $ W( NMAX ), X( NMAX ) * .. * .. External Subroutines .. - EXTERNAL ALAESM, CGELQ2, CGELQF, CGELQS, CHKXER, CUNGL2, + EXTERNAL ALAESM, CGELQ2, CGELQF, CHKXER, CUNGL2, $ CUNGLQ, CUNML2, CUNMLQ * .. * .. Scalars in Common .. @@ -140,31 +140,6 @@ CALL CGELQ2( 2, 1, A, 1, B, W, INFO ) CALL CHKXER( 'CGELQ2', INFOT, NOUT, LERR, OK ) * -* CGELQS -* - SRNAMT = 'CGELQS' - INFOT = 1 - CALL CGELQS( -1, 0, 0, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'CGELQS', INFOT, NOUT, LERR, OK ) - INFOT = 2 - CALL CGELQS( 0, -1, 0, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'CGELQS', INFOT, NOUT, LERR, OK ) - INFOT = 2 - CALL CGELQS( 2, 1, 0, A, 2, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'CGELQS', INFOT, NOUT, LERR, OK ) - INFOT = 3 - CALL CGELQS( 0, 0, -1, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'CGELQS', INFOT, NOUT, LERR, OK ) - INFOT = 5 - CALL CGELQS( 2, 2, 0, A, 1, X, B, 2, W, 1, INFO ) - CALL CHKXER( 'CGELQS', INFOT, NOUT, LERR, OK ) - INFOT = 8 - CALL CGELQS( 1, 2, 0, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'CGELQS', INFOT, NOUT, LERR, OK ) - INFOT = 10 - CALL CGELQS( 1, 1, 2, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'CGELQS', INFOT, NOUT, LERR, OK ) -* * CUNGLQ * SRNAMT = 'CUNGLQ' diff --git a/lapack-netlib/TESTING/LIN/cerrqr.f b/lapack-netlib/TESTING/LIN/cerrqr.f index 21cf22936..30ce001eb 100644 --- a/lapack-netlib/TESTING/LIN/cerrqr.f +++ b/lapack-netlib/TESTING/LIN/cerrqr.f @@ -77,7 +77,7 @@ * .. * .. External Subroutines .. EXTERNAL ALAESM, CGEQR2, CGEQR2P, CGEQRF, CGEQRFP, - $ CGEQRS, CHKXER, CUNG2R, CUNGQR, CUNM2R, + $ CHKXER, CUNG2R, CUNGQR, CUNM2R, $ CUNMQR * .. * .. Scalars in Common .. @@ -170,31 +170,6 @@ CALL CGEQR2P( 2, 1, A, 1, B, W, INFO ) CALL CHKXER( 'CGEQR2P', INFOT, NOUT, LERR, OK ) * -* CGEQRS -* - SRNAMT = 'CGEQRS' - INFOT = 1 - CALL CGEQRS( -1, 0, 0, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'CGEQRS', INFOT, NOUT, LERR, OK ) - INFOT = 2 - CALL CGEQRS( 0, -1, 0, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'CGEQRS', INFOT, NOUT, LERR, OK ) - INFOT = 2 - CALL CGEQRS( 1, 2, 0, A, 2, X, B, 2, W, 1, INFO ) - CALL CHKXER( 'CGEQRS', INFOT, NOUT, LERR, OK ) - INFOT = 3 - CALL CGEQRS( 0, 0, -1, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'CGEQRS', INFOT, NOUT, LERR, OK ) - INFOT = 5 - CALL CGEQRS( 2, 1, 0, A, 1, X, B, 2, W, 1, INFO ) - CALL CHKXER( 'CGEQRS', INFOT, NOUT, LERR, OK ) - INFOT = 8 - CALL CGEQRS( 2, 1, 0, A, 2, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'CGEQRS', INFOT, NOUT, LERR, OK ) - INFOT = 10 - CALL CGEQRS( 1, 1, 2, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'CGEQRS', INFOT, NOUT, LERR, OK ) -* * CUNGQR * SRNAMT = 'CUNGQR' diff --git a/lapack-netlib/TESTING/LIN/dchklq.f b/lapack-netlib/TESTING/LIN/dchklq.f index 70af41fe0..a207e0056 100644 --- a/lapack-netlib/TESTING/LIN/dchklq.f +++ b/lapack-netlib/TESTING/LIN/dchklq.f @@ -235,7 +235,7 @@ DOUBLE PRECISION RESULT( NTESTS ) * .. * .. External Subroutines .. - EXTERNAL ALAERH, ALAHD, ALASUM, DERRLQ, DGELQS, DGET02, + EXTERNAL ALAERH, ALAHD, ALASUM, DERRLQ, DGELS, DGET02, $ DLACPY, DLARHS, DLATB4, DLATMS, DLQT01, DLQT02, $ DLQT03, XLAENV * .. @@ -373,7 +373,7 @@ $ WORK, LWORK, RWORK, RESULT( 3 ) ) NT = NT + 4 * -* If M>=N and K=N, call DGELQS to solve a system +* If M<=N and K=M, call DGELS to solve a system * with NRHS right hand sides and compute the * residual. * @@ -390,14 +390,20 @@ * CALL DLACPY( 'Full', M, NRHS, B, LDA, X, $ LDA ) - SRNAMT = 'DGELQS' - CALL DGELQS( M, N, NRHS, AF, LDA, TAU, X, - $ LDA, WORK, LWORK, INFO ) * -* Check error code from DGELQS. +* Reset AF to the original matrix. DGELS +* factors the matrix before solving the system. +* + CALL DLACPY( 'Full', M, N, A, LDA, AF, LDA ) +* + SRNAMT = 'DGELS' + CALL DGELS( 'No transpose', M, N, NRHS, AF, + $ LDA, X, LDA, WORK, LWORK, INFO ) +* +* Check error code from DGELS. * IF( INFO.NE.0 ) - $ CALL ALAERH( PATH, 'DGELQS', INFO, 0, ' ', + $ CALL ALAERH( PATH, 'DGELS', INFO, 0, 'N', $ M, N, NRHS, -1, NB, IMAT, $ NFAIL, NERRS, NOUT ) * diff --git a/lapack-netlib/TESTING/LIN/dchkqr.f b/lapack-netlib/TESTING/LIN/dchkqr.f index c729e61a9..8188d7a00 100644 --- a/lapack-netlib/TESTING/LIN/dchkqr.f +++ b/lapack-netlib/TESTING/LIN/dchkqr.f @@ -244,7 +244,7 @@ EXTERNAL DGENND * .. * .. External Subroutines .. - EXTERNAL ALAERH, ALAHD, ALASUM, DERRQR, DGEQRS, DGET02, + EXTERNAL ALAERH, ALAHD, ALASUM, DERRQR, DGELS, DGET02, $ DLACPY, DLARHS, DLATB4, DLATMS, DQRT01, $ DQRT01P, DQRT02, DQRT03, XLAENV * .. @@ -372,7 +372,7 @@ IF( .NOT. DGENND( M, N, AF, LDA ) ) $ RESULT( 9 ) = 2*THRESH NT = NT + 1 - ELSE IF( M.GE.N ) THEN + ELSE IF( M.GE.N ) THEN * * Test DORGQR, using factorization * returned by DQRT01 @@ -389,7 +389,7 @@ $ WORK, LWORK, RWORK, RESULT( 3 ) ) NT = NT + 4 * -* If M>=N and K=N, call DGEQRS to solve a system +* If M>=N and K=N, call DGELS to solve a system * with NRHS right hand sides and compute the * residual. * @@ -406,14 +406,20 @@ * CALL DLACPY( 'Full', M, NRHS, B, LDA, X, $ LDA ) - SRNAMT = 'DGEQRS' - CALL DGEQRS( M, N, NRHS, AF, LDA, TAU, X, - $ LDA, WORK, LWORK, INFO ) * -* Check error code from DGEQRS. +* Reset AF. DGELS overwrites the matrix with +* its factorization. +* + CALL DLACPY( 'Full', M, N, A, LDA, AF, LDA ) +* + SRNAMT = 'DGELS' + CALL DGELS( 'No transpose', M, N, NRHS, AF, + $ LDA, X, LDA, WORK, LWORK, INFO ) +* +* Check error code from DGELS. * IF( INFO.NE.0 ) - $ CALL ALAERH( PATH, 'DGEQRS', INFO, 0, ' ', + $ CALL ALAERH( PATH, 'DGELS', INFO, 0, 'N', $ M, N, NRHS, -1, NB, IMAT, $ NFAIL, NERRS, NOUT ) * diff --git a/lapack-netlib/TESTING/LIN/derrlq.f b/lapack-netlib/TESTING/LIN/derrlq.f index d3cfcddd0..76ff4709e 100644 --- a/lapack-netlib/TESTING/LIN/derrlq.f +++ b/lapack-netlib/TESTING/LIN/derrlq.f @@ -76,7 +76,7 @@ $ W( NMAX ), X( NMAX ) * .. * .. External Subroutines .. - EXTERNAL ALAESM, CHKXER, DGELQ2, DGELQF, DGELQS, DORGL2, + EXTERNAL ALAESM, CHKXER, DGELQ2, DGELQF, DORGL2, $ DORGLQ, DORML2, DORMLQ * .. * .. Scalars in Common .. @@ -140,31 +140,6 @@ CALL DGELQ2( 2, 1, A, 1, B, W, INFO ) CALL CHKXER( 'DGELQ2', INFOT, NOUT, LERR, OK ) * -* DGELQS -* - SRNAMT = 'DGELQS' - INFOT = 1 - CALL DGELQS( -1, 0, 0, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'DGELQS', INFOT, NOUT, LERR, OK ) - INFOT = 2 - CALL DGELQS( 0, -1, 0, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'DGELQS', INFOT, NOUT, LERR, OK ) - INFOT = 2 - CALL DGELQS( 2, 1, 0, A, 2, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'DGELQS', INFOT, NOUT, LERR, OK ) - INFOT = 3 - CALL DGELQS( 0, 0, -1, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'DGELQS', INFOT, NOUT, LERR, OK ) - INFOT = 5 - CALL DGELQS( 2, 2, 0, A, 1, X, B, 2, W, 1, INFO ) - CALL CHKXER( 'DGELQS', INFOT, NOUT, LERR, OK ) - INFOT = 8 - CALL DGELQS( 1, 2, 0, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'DGELQS', INFOT, NOUT, LERR, OK ) - INFOT = 10 - CALL DGELQS( 1, 1, 2, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'DGELQS', INFOT, NOUT, LERR, OK ) -* * DORGLQ * SRNAMT = 'DORGLQ' diff --git a/lapack-netlib/TESTING/LIN/derrqr.f b/lapack-netlib/TESTING/LIN/derrqr.f index 03155b133..f7e850b80 100644 --- a/lapack-netlib/TESTING/LIN/derrqr.f +++ b/lapack-netlib/TESTING/LIN/derrqr.f @@ -77,7 +77,7 @@ * .. * .. External Subroutines .. EXTERNAL ALAESM, CHKXER, DGEQR2, DGEQR2P, DGEQRF, - $ DGEQRFP, DGEQRS, DORG2R, DORGQR, DORM2R, + $ DGEQRFP, DORG2R, DORGQR, DORM2R, $ DORMQR * .. * .. Scalars in Common .. @@ -170,31 +170,6 @@ CALL DGEQR2P( 2, 1, A, 1, B, W, INFO ) CALL CHKXER( 'DGEQR2P', INFOT, NOUT, LERR, OK ) * -* DGEQRS -* - SRNAMT = 'DGEQRS' - INFOT = 1 - CALL DGEQRS( -1, 0, 0, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'DGEQRS', INFOT, NOUT, LERR, OK ) - INFOT = 2 - CALL DGEQRS( 0, -1, 0, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'DGEQRS', INFOT, NOUT, LERR, OK ) - INFOT = 2 - CALL DGEQRS( 1, 2, 0, A, 2, X, B, 2, W, 1, INFO ) - CALL CHKXER( 'DGEQRS', INFOT, NOUT, LERR, OK ) - INFOT = 3 - CALL DGEQRS( 0, 0, -1, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'DGEQRS', INFOT, NOUT, LERR, OK ) - INFOT = 5 - CALL DGEQRS( 2, 1, 0, A, 1, X, B, 2, W, 1, INFO ) - CALL CHKXER( 'DGEQRS', INFOT, NOUT, LERR, OK ) - INFOT = 8 - CALL DGEQRS( 2, 1, 0, A, 2, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'DGEQRS', INFOT, NOUT, LERR, OK ) - INFOT = 10 - CALL DGEQRS( 1, 1, 2, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'DGEQRS', INFOT, NOUT, LERR, OK ) -* * DORGQR * SRNAMT = 'DORGQR' diff --git a/lapack-netlib/TESTING/LIN/schklq.f b/lapack-netlib/TESTING/LIN/schklq.f index cd66e8d10..9335503f9 100644 --- a/lapack-netlib/TESTING/LIN/schklq.f +++ b/lapack-netlib/TESTING/LIN/schklq.f @@ -235,7 +235,7 @@ REAL RESULT( NTESTS ) * .. * .. External Subroutines .. - EXTERNAL ALAERH, ALAHD, ALASUM, SERRLQ, SGELQS, SGET02, + EXTERNAL ALAERH, ALAHD, ALASUM, SERRLQ, SGET02, $ SLACPY, SLARHS, SLATB4, SLATMS, SLQT01, SLQT02, $ SLQT03, XLAENV * .. @@ -370,7 +370,7 @@ $ WORK, LWORK, RWORK, RESULT( 3 ) ) NT = NT + 4 * -* If M>=N and K=N, call SGELQS to solve a system +* If M<=N and K=M, call SGELS to solve a system * with NRHS right hand sides and compute the * residual. * @@ -387,14 +387,20 @@ * CALL SLACPY( 'Full', M, NRHS, B, LDA, X, $ LDA ) - SRNAMT = 'SGELQS' - CALL SGELQS( M, N, NRHS, AF, LDA, TAU, X, - $ LDA, WORK, LWORK, INFO ) * -* Check error code from SGELQS. +* Reset AF to the original matrix. SGELS +* factors the matrix before solving the system. +* + CALL SLACPY( 'Full', M, N, A, LDA, AF, LDA ) +* + SRNAMT = 'SGELS' + CALL SGELS( 'No transpose', M, N, NRHS, AF, + $ LDA, X, LDA, WORK, LWORK, INFO ) +* +* Check error code from SGELS. * IF( INFO.NE.0 ) - $ CALL ALAERH( PATH, 'SGELQS', INFO, 0, ' ', + $ CALL ALAERH( PATH, 'SGELS', INFO, 0, 'N', $ M, N, NRHS, -1, NB, IMAT, $ NFAIL, NERRS, NOUT ) * diff --git a/lapack-netlib/TESTING/LIN/schkqr.f b/lapack-netlib/TESTING/LIN/schkqr.f index 5c45ede9b..f72c8f1eb 100644 --- a/lapack-netlib/TESTING/LIN/schkqr.f +++ b/lapack-netlib/TESTING/LIN/schkqr.f @@ -244,7 +244,7 @@ EXTERNAL SGENND * .. * .. External Subroutines .. - EXTERNAL ALAERH, ALAHD, ALASUM, SERRQR, SGEQRS, SGET02, + EXTERNAL ALAERH, ALAHD, ALASUM, SERRQR, SGELS, SGET02, $ SLACPY, SLARHS, SLATB4, SLATMS, SQRT01, $ SQRT01P, SQRT02, SQRT03, XLAENV * .. @@ -388,7 +388,7 @@ $ WORK, LWORK, RWORK, RESULT( 3 ) ) NT = NT + 4 * -* If M>=N and K=N, call SGEQRS to solve a system +* If M>=N and K=N, call SGELS to solve a system * with NRHS right hand sides and compute the * residual. * @@ -405,14 +405,20 @@ * CALL SLACPY( 'Full', M, NRHS, B, LDA, X, $ LDA ) - SRNAMT = 'SGEQRS' - CALL SGEQRS( M, N, NRHS, AF, LDA, TAU, X, - $ LDA, WORK, LWORK, INFO ) * -* Check error code from SGEQRS. +* Reset AF to the original matrix. SGELS +* factors the matrix before solving the system. +* + CALL SLACPY( 'Full', M, N, A, LDA, AF, LDA ) +* + SRNAMT = 'SGELS' + CALL SGELS( 'No transpose', M, N, NRHS, AF, + $ LDA, X, LDA, WORK, LWORK, INFO ) +* +* Check error code from SGELS. * IF( INFO.NE.0 ) - $ CALL ALAERH( PATH, 'SGEQRS', INFO, 0, ' ', + $ CALL ALAERH( PATH, 'SGELS', INFO, 0, 'N', $ M, N, NRHS, -1, NB, IMAT, $ NFAIL, NERRS, NOUT ) * diff --git a/lapack-netlib/TESTING/LIN/serrlq.f b/lapack-netlib/TESTING/LIN/serrlq.f index 5bb0fe201..e5df8ce52 100644 --- a/lapack-netlib/TESTING/LIN/serrlq.f +++ b/lapack-netlib/TESTING/LIN/serrlq.f @@ -76,7 +76,7 @@ $ W( NMAX ), X( NMAX ) * .. * .. External Subroutines .. - EXTERNAL ALAESM, CHKXER, SGELQ2, SGELQF, SGELQS, SORGL2, + EXTERNAL ALAESM, CHKXER, SGELQ2, SGELQF, SORGL2, $ SORGLQ, SORML2, SORMLQ * .. * .. Scalars in Common .. @@ -140,31 +140,6 @@ CALL SGELQ2( 2, 1, A, 1, B, W, INFO ) CALL CHKXER( 'SGELQ2', INFOT, NOUT, LERR, OK ) * -* SGELQS -* - SRNAMT = 'SGELQS' - INFOT = 1 - CALL SGELQS( -1, 0, 0, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'SGELQS', INFOT, NOUT, LERR, OK ) - INFOT = 2 - CALL SGELQS( 0, -1, 0, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'SGELQS', INFOT, NOUT, LERR, OK ) - INFOT = 2 - CALL SGELQS( 2, 1, 0, A, 2, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'SGELQS', INFOT, NOUT, LERR, OK ) - INFOT = 3 - CALL SGELQS( 0, 0, -1, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'SGELQS', INFOT, NOUT, LERR, OK ) - INFOT = 5 - CALL SGELQS( 2, 2, 0, A, 1, X, B, 2, W, 1, INFO ) - CALL CHKXER( 'SGELQS', INFOT, NOUT, LERR, OK ) - INFOT = 8 - CALL SGELQS( 1, 2, 0, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'SGELQS', INFOT, NOUT, LERR, OK ) - INFOT = 10 - CALL SGELQS( 1, 1, 2, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'SGELQS', INFOT, NOUT, LERR, OK ) -* * SORGLQ * SRNAMT = 'SORGLQ' diff --git a/lapack-netlib/TESTING/LIN/serrqr.f b/lapack-netlib/TESTING/LIN/serrqr.f index 1ad40b7aa..e228813f7 100644 --- a/lapack-netlib/TESTING/LIN/serrqr.f +++ b/lapack-netlib/TESTING/LIN/serrqr.f @@ -77,7 +77,7 @@ * .. * .. External Subroutines .. EXTERNAL ALAESM, CHKXER, SGEQR2, SGEQR2P, SGEQRF, - $ SGEQRFP, SGEQRS, SORG2R, SORGQR, SORM2R, + $ SGEQRFP, SORG2R, SORGQR, SORM2R, $ SORMQR * .. * .. Scalars in Common .. @@ -170,31 +170,6 @@ CALL SGEQR2P( 2, 1, A, 1, B, W, INFO ) CALL CHKXER( 'SGEQR2P', INFOT, NOUT, LERR, OK ) * -* SGEQRS -* - SRNAMT = 'SGEQRS' - INFOT = 1 - CALL SGEQRS( -1, 0, 0, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'SGEQRS', INFOT, NOUT, LERR, OK ) - INFOT = 2 - CALL SGEQRS( 0, -1, 0, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'SGEQRS', INFOT, NOUT, LERR, OK ) - INFOT = 2 - CALL SGEQRS( 1, 2, 0, A, 2, X, B, 2, W, 1, INFO ) - CALL CHKXER( 'SGEQRS', INFOT, NOUT, LERR, OK ) - INFOT = 3 - CALL SGEQRS( 0, 0, -1, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'SGEQRS', INFOT, NOUT, LERR, OK ) - INFOT = 5 - CALL SGEQRS( 2, 1, 0, A, 1, X, B, 2, W, 1, INFO ) - CALL CHKXER( 'SGEQRS', INFOT, NOUT, LERR, OK ) - INFOT = 8 - CALL SGEQRS( 2, 1, 0, A, 2, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'SGEQRS', INFOT, NOUT, LERR, OK ) - INFOT = 10 - CALL SGEQRS( 1, 1, 2, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'SGEQRS', INFOT, NOUT, LERR, OK ) -* * SORGQR * SRNAMT = 'SORGQR' diff --git a/lapack-netlib/TESTING/LIN/zchklq.f b/lapack-netlib/TESTING/LIN/zchklq.f index 371bb946b..ccef7b803 100644 --- a/lapack-netlib/TESTING/LIN/zchklq.f +++ b/lapack-netlib/TESTING/LIN/zchklq.f @@ -235,7 +235,7 @@ DOUBLE PRECISION RESULT( NTESTS ) * .. * .. External Subroutines .. - EXTERNAL ALAERH, ALAHD, ALASUM, XLAENV, ZERRLQ, ZGELQS, + EXTERNAL ALAERH, ALAHD, ALASUM, XLAENV, ZERRLQ, ZGELS, $ ZGET02, ZLACPY, ZLARHS, ZLATB4, ZLATMS, ZLQT01, $ ZLQT02, ZLQT03 * .. @@ -370,7 +370,7 @@ $ WORK, LWORK, RWORK, RESULT( 3 ) ) NT = NT + 4 * -* If M>=N and K=N, call ZGELQS to solve a system +* If M<=N and K=M, call ZGELS to solve a system * with NRHS right hand sides and compute the * residual. * @@ -387,14 +387,20 @@ * CALL ZLACPY( 'Full', M, NRHS, B, LDA, X, $ LDA ) - SRNAMT = 'ZGELQS' - CALL ZGELQS( M, N, NRHS, AF, LDA, TAU, X, - $ LDA, WORK, LWORK, INFO ) * -* Check error code from ZGELQS. +* Reset AF to the original matrix. ZGELS +* factors the matrix before solving the system. +* + CALL ZLACPY( 'Full', M, N, A, LDA, AF, LDA ) +* + SRNAMT = 'ZGELS' + CALL ZGELS( 'No transpose', M, N, NRHS, AF, + $ LDA, X, LDA, WORK, LWORK, INFO ) +* +* Check error code from ZGELS. * IF( INFO.NE.0 ) - $ CALL ALAERH( PATH, 'ZGELQS', INFO, 0, ' ', + $ CALL ALAERH( PATH, 'ZGELS', INFO, 0, 'N', $ M, N, NRHS, -1, NB, IMAT, $ NFAIL, NERRS, NOUT ) * diff --git a/lapack-netlib/TESTING/LIN/zchkqr.f b/lapack-netlib/TESTING/LIN/zchkqr.f index a240d2da5..c088bacc9 100644 --- a/lapack-netlib/TESTING/LIN/zchkqr.f +++ b/lapack-netlib/TESTING/LIN/zchkqr.f @@ -244,7 +244,7 @@ EXTERNAL ZGENND * .. * .. External Subroutines .. - EXTERNAL ALAERH, ALAHD, ALASUM, XLAENV, ZERRQR, ZGEQRS, + EXTERNAL ALAERH, ALAHD, ALASUM, XLAENV, ZERRQR, ZGELS, $ ZGET02, ZLACPY, ZLARHS, ZLATB4, ZLATMS, ZQRT01, $ ZQRT01P, ZQRT02, ZQRT03 * .. @@ -388,7 +388,7 @@ $ WORK, LWORK, RWORK, RESULT( 3 ) ) NT = NT + 4 * -* If M>=N and K=N, call ZGEQRS to solve a system +* If M>=N and K=N, call ZGELS to solve a system * with NRHS right hand sides and compute the * residual. * @@ -405,14 +405,20 @@ * CALL ZLACPY( 'Full', M, NRHS, B, LDA, X, $ LDA ) - SRNAMT = 'ZGEQRS' - CALL ZGEQRS( M, N, NRHS, AF, LDA, TAU, X, - $ LDA, WORK, LWORK, INFO ) * -* Check error code from ZGEQRS. +* Reset AF to the original matrix. ZGELS +* factors the matrix before solving the system. +* + CALL ZLACPY( 'Full', M, N, A, LDA, AF, LDA ) +* + SRNAMT = 'ZGELS' + CALL ZGELS( 'No transpose', M, N, NRHS, AF, + $ LDA, X, LDA, WORK, LWORK, INFO ) +* +* Check error code from ZGELS. * IF( INFO.NE.0 ) - $ CALL ALAERH( PATH, 'ZGEQRS', INFO, 0, ' ', + $ CALL ALAERH( PATH, 'ZGELS', INFO, 0, 'N', $ M, N, NRHS, -1, NB, IMAT, $ NFAIL, NERRS, NOUT ) * diff --git a/lapack-netlib/TESTING/LIN/zerrlq.f b/lapack-netlib/TESTING/LIN/zerrlq.f index d8e5a8fe8..d91b4e4b3 100644 --- a/lapack-netlib/TESTING/LIN/zerrlq.f +++ b/lapack-netlib/TESTING/LIN/zerrlq.f @@ -76,7 +76,7 @@ $ W( NMAX ), X( NMAX ) * .. * .. External Subroutines .. - EXTERNAL ALAESM, CHKXER, ZGELQ2, ZGELQF, ZGELQS, ZUNGL2, + EXTERNAL ALAESM, CHKXER, ZGELQ2, ZGELQF, ZUNGL2, $ ZUNGLQ, ZUNML2, ZUNMLQ * .. * .. Scalars in Common .. @@ -142,31 +142,6 @@ CALL ZGELQ2( 2, 1, A, 1, B, W, INFO ) CALL CHKXER( 'ZGELQ2', INFOT, NOUT, LERR, OK ) * -* ZGELQS -* - SRNAMT = 'ZGELQS' - INFOT = 1 - CALL ZGELQS( -1, 0, 0, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'ZGELQS', INFOT, NOUT, LERR, OK ) - INFOT = 2 - CALL ZGELQS( 0, -1, 0, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'ZGELQS', INFOT, NOUT, LERR, OK ) - INFOT = 2 - CALL ZGELQS( 2, 1, 0, A, 2, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'ZGELQS', INFOT, NOUT, LERR, OK ) - INFOT = 3 - CALL ZGELQS( 0, 0, -1, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'ZGELQS', INFOT, NOUT, LERR, OK ) - INFOT = 5 - CALL ZGELQS( 2, 2, 0, A, 1, X, B, 2, W, 1, INFO ) - CALL CHKXER( 'ZGELQS', INFOT, NOUT, LERR, OK ) - INFOT = 8 - CALL ZGELQS( 1, 2, 0, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'ZGELQS', INFOT, NOUT, LERR, OK ) - INFOT = 10 - CALL ZGELQS( 1, 1, 2, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'ZGELQS', INFOT, NOUT, LERR, OK ) -* * ZUNGLQ * SRNAMT = 'ZUNGLQ' diff --git a/lapack-netlib/TESTING/LIN/zerrqr.f b/lapack-netlib/TESTING/LIN/zerrqr.f index 114453d4c..3542c7a04 100644 --- a/lapack-netlib/TESTING/LIN/zerrqr.f +++ b/lapack-netlib/TESTING/LIN/zerrqr.f @@ -77,7 +77,7 @@ * .. * .. External Subroutines .. EXTERNAL ALAESM, CHKXER, ZGEQR2, ZGEQR2P, ZGEQRF, - $ ZGEQRFP, ZGEQRS, ZUNG2R, ZUNGQR, ZUNM2R, + $ ZGEQRFP, ZUNG2R, ZUNGQR, ZUNM2R, $ ZUNMQR * .. * .. Scalars in Common .. @@ -172,31 +172,6 @@ CALL ZGEQR2P( 2, 1, A, 1, B, W, INFO ) CALL CHKXER( 'ZGEQR2P', INFOT, NOUT, LERR, OK ) * -* ZGEQRS -* - SRNAMT = 'ZGEQRS' - INFOT = 1 - CALL ZGEQRS( -1, 0, 0, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'ZGEQRS', INFOT, NOUT, LERR, OK ) - INFOT = 2 - CALL ZGEQRS( 0, -1, 0, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'ZGEQRS', INFOT, NOUT, LERR, OK ) - INFOT = 2 - CALL ZGEQRS( 1, 2, 0, A, 2, X, B, 2, W, 1, INFO ) - CALL CHKXER( 'ZGEQRS', INFOT, NOUT, LERR, OK ) - INFOT = 3 - CALL ZGEQRS( 0, 0, -1, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'ZGEQRS', INFOT, NOUT, LERR, OK ) - INFOT = 5 - CALL ZGEQRS( 2, 1, 0, A, 1, X, B, 2, W, 1, INFO ) - CALL CHKXER( 'ZGEQRS', INFOT, NOUT, LERR, OK ) - INFOT = 8 - CALL ZGEQRS( 2, 1, 0, A, 2, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'ZGEQRS', INFOT, NOUT, LERR, OK ) - INFOT = 10 - CALL ZGEQRS( 1, 1, 2, A, 1, X, B, 1, W, 1, INFO ) - CALL CHKXER( 'ZGEQRS', INFOT, NOUT, LERR, OK ) -* * ZUNGQR * SRNAMT = 'ZUNGQR' From 225036fd92fb0093280dbfdafd295a4f40678917 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 12 Nov 2023 13:43:22 +0100 Subject: [PATCH 117/125] Apply ROUNDUP_LWORK (Reference-LAPACK PR 904) --- lapack-netlib/SRC/VARIANTS/qr/LL/cgeqrf.f | 9 +++++---- lapack-netlib/SRC/VARIANTS/qr/LL/dgeqrf.f | 9 +++++---- lapack-netlib/SRC/VARIANTS/qr/LL/sgeqrf.f | 9 +++++---- lapack-netlib/SRC/VARIANTS/qr/LL/zgeqrf.f | 9 +++++---- 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/lapack-netlib/SRC/VARIANTS/qr/LL/cgeqrf.f b/lapack-netlib/SRC/VARIANTS/qr/LL/cgeqrf.f index 743731a00..d3f78b8be 100644 --- a/lapack-netlib/SRC/VARIANTS/qr/LL/cgeqrf.f +++ b/lapack-netlib/SRC/VARIANTS/qr/LL/cgeqrf.f @@ -176,7 +176,8 @@ C> * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Executable Statements .. @@ -225,13 +226,13 @@ C> * Optimal workspace for dlarfb = MAX(1,N)*NT * LWKOPT = (LBWORK+LLWORK)*NB - WORK( 1 ) = (LWKOPT+NT*NT) + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT+NT*NT) ELSE LBWORK = CEILING(REAL(K)/REAL(NB))*NB LWKOPT = (LBWORK+LLWORK-NB)*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF @@ -413,7 +414,7 @@ C> END IF - WORK( 1 ) = IWS + WORK( 1 ) = SROUNDUP_LWORK(IWS) RETURN * * End of CGEQRF diff --git a/lapack-netlib/SRC/VARIANTS/qr/LL/dgeqrf.f b/lapack-netlib/SRC/VARIANTS/qr/LL/dgeqrf.f index bbdd46113..dd8baab4d 100644 --- a/lapack-netlib/SRC/VARIANTS/qr/LL/dgeqrf.f +++ b/lapack-netlib/SRC/VARIANTS/qr/LL/dgeqrf.f @@ -176,7 +176,8 @@ C> * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + DOUBLE PRECISION DROUNDUP_LWORK + EXTERNAL ILAENV, DROUNDUP_LWORK * .. * .. Executable Statements .. @@ -225,13 +226,13 @@ C> * Optimal workspace for dlarfb = MAX(1,N)*NT * LWKOPT = (LBWORK+LLWORK)*NB - WORK( 1 ) = (LWKOPT+NT*NT) + WORK( 1 ) = DROUNDUP_LWORK(LWKOPT+NT*NT) ELSE LBWORK = CEILING(REAL(K)/REAL(NB))*NB LWKOPT = (LBWORK+LLWORK-NB)*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = DROUNDUP_LWORK(LWKOPT) END IF @@ -413,7 +414,7 @@ C> END IF - WORK( 1 ) = IWS + WORK( 1 ) = DROUNDUP_LWORK(IWS) RETURN * * End of DGEQRF diff --git a/lapack-netlib/SRC/VARIANTS/qr/LL/sgeqrf.f b/lapack-netlib/SRC/VARIANTS/qr/LL/sgeqrf.f index bf68d635b..93dc48fa5 100644 --- a/lapack-netlib/SRC/VARIANTS/qr/LL/sgeqrf.f +++ b/lapack-netlib/SRC/VARIANTS/qr/LL/sgeqrf.f @@ -176,7 +176,8 @@ C> * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + DOUBLE PRECISION DROUNDUP_LWORK + EXTERNAL ILAENV, DROUNDUP_LWORK * .. * .. Executable Statements .. @@ -225,13 +226,13 @@ C> * Optimal workspace for dlarfb = MAX(1,N)*NT * LWKOPT = (LBWORK+LLWORK)*NB - WORK( 1 ) = (LWKOPT+NT*NT) + WORK( 1 ) = DROUNDUP_LWORK(LWKOPT+NT*NT) ELSE LBWORK = CEILING(REAL(K)/REAL(NB))*NB LWKOPT = (LBWORK+LLWORK-NB)*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = DROUNDUP_LWORK(LWKOPT) END IF @@ -413,7 +414,7 @@ C> END IF - WORK( 1 ) = IWS + WORK( 1 ) = DROUNDUP_LWORK(IWS) RETURN * * End of SGEQRF diff --git a/lapack-netlib/SRC/VARIANTS/qr/LL/zgeqrf.f b/lapack-netlib/SRC/VARIANTS/qr/LL/zgeqrf.f index 06918568e..3ef07bfc7 100644 --- a/lapack-netlib/SRC/VARIANTS/qr/LL/zgeqrf.f +++ b/lapack-netlib/SRC/VARIANTS/qr/LL/zgeqrf.f @@ -176,7 +176,8 @@ C> * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Executable Statements .. @@ -225,13 +226,13 @@ C> * Optimal workspace for dlarfb = MAX(1,N)*NT * LWKOPT = (LBWORK+LLWORK)*NB - WORK( 1 ) = (LWKOPT+NT*NT) + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT+NT*NT) ELSE LBWORK = CEILING(REAL(K)/REAL(NB))*NB LWKOPT = (LBWORK+LLWORK-NB)*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF @@ -413,7 +414,7 @@ C> END IF - WORK( 1 ) = IWS + WORK( 1 ) = SROUNDUP_LWORK(IWS) RETURN * * End of ZGEQRF From c9378badd929615aadca9120a664ab1e4bf83d11 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 12 Nov 2023 13:56:06 +0100 Subject: [PATCH 118/125] Apply ROUNDUP_LWORK (Reference-LAPACK PR 904) --- lapack-netlib/SRC/cgees.f | 13 ++-- lapack-netlib/SRC/cgeesx.f | 13 ++-- lapack-netlib/SRC/cgeev.f | 14 ++-- lapack-netlib/SRC/cgeevx.f | 14 ++-- lapack-netlib/SRC/cgehrd.f | 9 +-- lapack-netlib/SRC/cgelq.f | 11 +-- lapack-netlib/SRC/cgelqf.f | 9 +-- lapack-netlib/SRC/cgelsd.f | 13 ++-- lapack-netlib/SRC/cgelss.f | 8 +-- lapack-netlib/SRC/cgelst.f | 19 +++-- lapack-netlib/SRC/cgeqlf.f | 9 +-- lapack-netlib/SRC/cgeqrf.f | 9 +-- lapack-netlib/SRC/cgeqrfp.f | 9 +-- lapack-netlib/SRC/cgerqf.f | 9 +-- lapack-netlib/SRC/cgesvd.f | 10 +-- lapack-netlib/SRC/cgetri.f | 9 +-- lapack-netlib/SRC/cgetsls.f | 15 ++-- lapack-netlib/SRC/cgges.f | 14 ++-- lapack-netlib/SRC/cggesx.f | 14 ++-- lapack-netlib/SRC/cggev.f | 14 ++-- lapack-netlib/SRC/cggevx.f | 13 ++-- lapack-netlib/SRC/cggglm.f | 7 +- lapack-netlib/SRC/cgglse.f | 7 +- lapack-netlib/SRC/cggqrf.f | 7 +- lapack-netlib/SRC/cggrqf.f | 7 +- lapack-netlib/SRC/chbev_2stage.f | 23 +++--- lapack-netlib/SRC/chbevd.f | 10 +-- lapack-netlib/SRC/chbevx_2stage.f | 29 ++++---- lapack-netlib/SRC/chbgvd.f | 9 +-- lapack-netlib/SRC/cheev.f | 10 +-- lapack-netlib/SRC/cheev_2stage.f | 25 +++---- lapack-netlib/SRC/cheevd.f | 10 +-- lapack-netlib/SRC/cheevr.f | 10 +-- lapack-netlib/SRC/cheevx.f | 10 +-- lapack-netlib/SRC/cheevx_2stage.f | 29 ++++---- lapack-netlib/SRC/chegv.f | 9 +-- lapack-netlib/SRC/chegv_2stage.f | 23 +++--- lapack-netlib/SRC/chegvd.f | 9 +-- lapack-netlib/SRC/chegvx.f | 9 +-- lapack-netlib/SRC/chesv.f | 9 +-- lapack-netlib/SRC/chesv_aa.f | 9 +-- lapack-netlib/SRC/chesv_aa_2stage.f | 7 +- lapack-netlib/SRC/chesv_rk.f | 9 +-- lapack-netlib/SRC/chesv_rook.f | 9 +-- lapack-netlib/SRC/chesvx.f | 10 +-- lapack-netlib/SRC/chetrd_hb2st.F | 107 ++++++++++++++-------------- 46 files changed, 334 insertions(+), 317 deletions(-) diff --git a/lapack-netlib/SRC/cgees.f b/lapack-netlib/SRC/cgees.f index 71acfdba3..2085dc49b 100644 --- a/lapack-netlib/SRC/cgees.f +++ b/lapack-netlib/SRC/cgees.f @@ -189,7 +189,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEeigen +*> \ingroup gees * * ===================================================================== SUBROUTINE CGEES( JOBVS, SORT, SELECT, N, A, LDA, SDIM, W, VS, @@ -230,13 +230,13 @@ * .. * .. External Subroutines .. EXTERNAL CCOPY, CGEBAK, CGEBAL, CGEHRD, CHSEQR, CLACPY, - $ CLASCL, CTRSEN, CUNGHR, SLABAD, XERBLA + $ CLASCL, CTRSEN, CUNGHR, XERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL CLANGE, SLAMCH - EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH + REAL CLANGE, SLAMCH, SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC MAX, SQRT @@ -292,7 +292,7 @@ MAXWRK = MAX( MAXWRK, HSWORK ) END IF END IF - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) * IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN INFO = -12 @@ -318,7 +318,6 @@ EPS = SLAMCH( 'P' ) SMLNUM = SLAMCH( 'S' ) BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) SMLNUM = SQRT( SMLNUM ) / EPS BIGNUM = ONE / SMLNUM * @@ -413,7 +412,7 @@ CALL CCOPY( N, A, LDA+1, W, 1 ) END IF * - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) RETURN * * End of CGEES diff --git a/lapack-netlib/SRC/cgeesx.f b/lapack-netlib/SRC/cgeesx.f index 782e36747..036ae90c2 100644 --- a/lapack-netlib/SRC/cgeesx.f +++ b/lapack-netlib/SRC/cgeesx.f @@ -230,7 +230,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEeigen +*> \ingroup geesx * * ===================================================================== SUBROUTINE CGEESX( JOBVS, SORT, SELECT, SENSE, N, A, LDA, SDIM, W, @@ -274,13 +274,13 @@ * .. * .. External Subroutines .. EXTERNAL CCOPY, CGEBAK, CGEBAL, CGEHRD, CHSEQR, CLACPY, - $ CLASCL, CTRSEN, CUNGHR, SLABAD, SLASCL, XERBLA + $ CLASCL, CTRSEN, CUNGHR, SLASCL, XERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL CLANGE, SLAMCH - EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH + REAL CLANGE, SLAMCH, SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC MAX, SQRT @@ -350,7 +350,7 @@ IF( .NOT.WANTSN ) $ LWRK = MAX( LWRK, ( N*N )/2 ) END IF - WORK( 1 ) = LWRK + WORK( 1 ) = SROUNDUP_LWORK(LWRK) * IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN INFO = -15 @@ -376,7 +376,6 @@ EPS = SLAMCH( 'P' ) SMLNUM = SLAMCH( 'S' ) BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) SMLNUM = SQRT( SMLNUM ) / EPS BIGNUM = ONE / SMLNUM * @@ -488,7 +487,7 @@ END IF END IF * - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) RETURN * * End of CGEESX diff --git a/lapack-netlib/SRC/cgeev.f b/lapack-netlib/SRC/cgeev.f index a77525ef8..bb41599d1 100644 --- a/lapack-netlib/SRC/cgeev.f +++ b/lapack-netlib/SRC/cgeev.f @@ -172,7 +172,7 @@ * * @generated from zgeev.f, fortran z -> c, Tue Apr 19 01:47:44 2016 * -*> \ingroup complexGEeigen +*> \ingroup geev * * ===================================================================== SUBROUTINE CGEEV( JOBVL, JOBVR, N, A, LDA, W, VL, LDVL, VR, LDVR, @@ -212,14 +212,15 @@ REAL DUM( 1 ) * .. * .. External Subroutines .. - EXTERNAL SLABAD, XERBLA, CSSCAL, CGEBAK, CGEBAL, CGEHRD, + EXTERNAL XERBLA, CSSCAL, CGEBAK, CGEBAL, CGEHRD, $ CHSEQR, CLACPY, CLASCL, CSCAL, CTREVC3, CUNGHR * .. * .. External Functions .. LOGICAL LSAME INTEGER ISAMAX, ILAENV - REAL SLAMCH, SCNRM2, CLANGE - EXTERNAL LSAME, ISAMAX, ILAENV, SLAMCH, SCNRM2, CLANGE + REAL SLAMCH, SCNRM2, CLANGE, SROUNDUP_LWORK + EXTERNAL LSAME, ISAMAX, ILAENV, SLAMCH, SCNRM2, CLANGE, + $ SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC REAL, CMPLX, CONJG, AIMAG, MAX, SQRT @@ -291,7 +292,7 @@ HSWORK = INT( WORK(1) ) MAXWRK = MAX( MAXWRK, HSWORK, MINWRK ) END IF - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) * IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN INFO = -12 @@ -315,7 +316,6 @@ EPS = SLAMCH( 'P' ) SMLNUM = SLAMCH( 'S' ) BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) SMLNUM = SQRT( SMLNUM ) / EPS BIGNUM = ONE / SMLNUM * @@ -493,7 +493,7 @@ END IF END IF * - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) RETURN * * End of CGEEV diff --git a/lapack-netlib/SRC/cgeevx.f b/lapack-netlib/SRC/cgeevx.f index 2388f5acc..5dbc394e9 100644 --- a/lapack-netlib/SRC/cgeevx.f +++ b/lapack-netlib/SRC/cgeevx.f @@ -279,7 +279,7 @@ * * @generated from zgeevx.f, fortran z -> c, Tue Apr 19 01:47:44 2016 * -*> \ingroup complexGEeigen +*> \ingroup geevx * * ===================================================================== SUBROUTINE CGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, W, VL, @@ -323,15 +323,16 @@ REAL DUM( 1 ) * .. * .. External Subroutines .. - EXTERNAL SLABAD, SLASCL, XERBLA, CSSCAL, CGEBAK, CGEBAL, + EXTERNAL SLASCL, XERBLA, CSSCAL, CGEBAK, CGEBAL, $ CGEHRD, CHSEQR, CLACPY, CLASCL, CSCAL, CTREVC3, $ CTRSNA, CUNGHR * .. * .. External Functions .. LOGICAL LSAME INTEGER ISAMAX, ILAENV - REAL SLAMCH, SCNRM2, CLANGE - EXTERNAL LSAME, ISAMAX, ILAENV, SLAMCH, SCNRM2, CLANGE + REAL SLAMCH, SCNRM2, CLANGE, SROUNDUP_LWORK + EXTERNAL LSAME, ISAMAX, ILAENV, SLAMCH, SCNRM2, CLANGE, + $ SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC REAL, CMPLX, CONJG, AIMAG, MAX, SQRT @@ -434,7 +435,7 @@ END IF MAXWRK = MAX( MAXWRK, MINWRK ) END IF - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) * IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN INFO = -20 @@ -458,7 +459,6 @@ EPS = SLAMCH( 'P' ) SMLNUM = SLAMCH( 'S' ) BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) SMLNUM = SQRT( SMLNUM ) / EPS BIGNUM = ONE / SMLNUM * @@ -657,7 +657,7 @@ END IF END IF * - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) RETURN * * End of CGEEVX diff --git a/lapack-netlib/SRC/cgehrd.f b/lapack-netlib/SRC/cgehrd.f index d9c050267..f407f931a 100644 --- a/lapack-netlib/SRC/cgehrd.f +++ b/lapack-netlib/SRC/cgehrd.f @@ -120,7 +120,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEcomputational +*> \ingroup gehrd * *> \par Further Details: * ===================== @@ -201,7 +201,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -227,7 +228,7 @@ * NB = MIN( NBMAX, ILAENV( 1, 'CGEHRD', ' ', N, ILO, IHI, -1 ) ) LWKOPT = N*NB + TSIZE - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -344,7 +345,7 @@ * Use unblocked code to reduce the rest of the matrix * CALL CGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO ) - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/cgelq.f b/lapack-netlib/SRC/cgelq.f index d66033166..ff482bc42 100644 --- a/lapack-netlib/SRC/cgelq.f +++ b/lapack-netlib/SRC/cgelq.f @@ -166,6 +166,8 @@ *> the LQ factorization. *> \endverbatim *> +*> \ingroup gelq +*> * ===================================================================== SUBROUTINE CGELQ( M, N, A, LDA, T, TSIZE, WORK, LWORK, $ INFO ) @@ -190,7 +192,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CGELQT, CLASWLQ, XERBLA @@ -292,9 +295,9 @@ T( 2 ) = MB T( 3 ) = NB IF( MINW ) THEN - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) ELSE - WORK( 1 ) = LWREQ + WORK( 1 ) = SROUNDUP_LWORK(LWREQ) END IF END IF IF( INFO.NE.0 ) THEN @@ -319,7 +322,7 @@ $ LWORK, INFO ) END IF * - WORK( 1 ) = LWREQ + WORK( 1 ) = SROUNDUP_LWORK(LWREQ) * RETURN * diff --git a/lapack-netlib/SRC/cgelqf.f b/lapack-netlib/SRC/cgelqf.f index 37ef13a27..75f5bc960 100644 --- a/lapack-netlib/SRC/cgelqf.f +++ b/lapack-netlib/SRC/cgelqf.f @@ -118,7 +118,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEcomputational +*> \ingroup gelqf * *> \par Further Details: * ===================== @@ -167,7 +167,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -176,7 +177,7 @@ INFO = 0 NB = ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) LWKOPT = M*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) LQUERY = ( LWORK.EQ.-1 ) IF( M.LT.0 ) THEN INFO = -1 @@ -266,7 +267,7 @@ $ CALL CGELQ2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, $ IINFO ) * - WORK( 1 ) = IWS + WORK( 1 ) = SROUNDUP_LWORK(IWS) RETURN * * End of CGELQF diff --git a/lapack-netlib/SRC/cgelsd.f b/lapack-netlib/SRC/cgelsd.f index c3c77bf63..5d7eec68d 100644 --- a/lapack-netlib/SRC/cgelsd.f +++ b/lapack-netlib/SRC/cgelsd.f @@ -204,7 +204,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEsolve +*> \ingroup gelsd * *> \par Contributors: * ================== @@ -249,13 +249,13 @@ * .. External Subroutines .. EXTERNAL CGEBRD, CGELQF, CGEQRF, CLACPY, $ CLALSD, CLASCL, CLASET, CUNMBR, - $ CUNMLQ, CUNMQR, SLABAD, SLASCL, + $ CUNMLQ, CUNMQR, SLASCL, $ SLASET, XERBLA * .. * .. External Functions .. INTEGER ILAENV - REAL CLANGE, SLAMCH - EXTERNAL CLANGE, SLAMCH, ILAENV + REAL CLANGE, SLAMCH, SROUNDUP_LWORK + EXTERNAL CLANGE, SLAMCH, ILAENV, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC INT, LOG, MAX, MIN, REAL @@ -367,7 +367,7 @@ END IF END IF MINWRK = MIN( MINWRK, MAXWRK ) - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) IWORK( 1 ) = LIWORK RWORK( 1 ) = LRWORK * @@ -396,7 +396,6 @@ SFMIN = SLAMCH( 'S' ) SMLNUM = SFMIN / EPS BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) * * Scale A if max entry outside range [SMLNUM,BIGNUM]. * @@ -647,7 +646,7 @@ END IF * 10 CONTINUE - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) IWORK( 1 ) = LIWORK RWORK( 1 ) = LRWORK RETURN diff --git a/lapack-netlib/SRC/cgelss.f b/lapack-netlib/SRC/cgelss.f index d1e38c504..00d7f596a 100644 --- a/lapack-netlib/SRC/cgelss.f +++ b/lapack-netlib/SRC/cgelss.f @@ -218,8 +218,8 @@ * .. * .. External Functions .. INTEGER ILAENV - REAL CLANGE, SLAMCH - EXTERNAL ILAENV, CLANGE, SLAMCH + REAL CLANGE, SLAMCH, SROUNDUP_LWORK + EXTERNAL ILAENV, CLANGE, SLAMCH, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -361,7 +361,7 @@ END IF MAXWRK = MAX( MINWRK, MAXWRK ) END IF - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) * IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) $ INFO = -12 @@ -758,7 +758,7 @@ CALL CLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) END IF 70 CONTINUE - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) RETURN * * End of CGELSS diff --git a/lapack-netlib/SRC/cgelst.f b/lapack-netlib/SRC/cgelst.f index 7d8e44ddf..b69626934 100644 --- a/lapack-netlib/SRC/cgelst.f +++ b/lapack-netlib/SRC/cgelst.f @@ -176,7 +176,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEsolve +*> \ingroup gelst * *> \par Contributors: * ================== @@ -224,15 +224,15 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL SLAMCH, CLANGE - EXTERNAL LSAME, ILAENV, SLAMCH, CLANGE + REAL SLAMCH, CLANGE, SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SLAMCH, CLANGE, SROUNDUP_LWORK * .. * .. External Subroutines .. - EXTERNAL CGELQT, CGEQRT, CGEMLQT, CGEMQRT, SLABAD, + EXTERNAL CGELQT, CGEQRT, CGEMLQT, CGEMQRT, $ CLASCL, CLASET, CTRTRS, XERBLA * .. * .. Intrinsic Functions .. - INTRINSIC REAL, MAX, MIN + INTRINSIC MAX, MIN * .. * .. Executable Statements .. * @@ -270,7 +270,7 @@ * MNNRHS = MAX( MN, NRHS ) LWOPT = MAX( 1, (MN+MNNRHS)*NB ) - WORK( 1 ) = REAL( LWOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWOPT ) * END IF * @@ -285,7 +285,7 @@ * IF( MIN( M, N, NRHS ).EQ.0 ) THEN CALL CLASET( 'Full', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) - WORK( 1 ) = REAL( LWOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWOPT ) RETURN END IF * @@ -311,7 +311,6 @@ * SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) * * Scale A, B if max element outside range [SMLNUM,BIGNUM] * @@ -334,7 +333,7 @@ * Matrix all zero. Return zero solution. * CALL CLASET( 'Full', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) - WORK( 1 ) = REAL( LWOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWOPT ) RETURN END IF * @@ -524,7 +523,7 @@ $ INFO ) END IF * - WORK( 1 ) = REAL( LWOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWOPT ) * RETURN * diff --git a/lapack-netlib/SRC/cgeqlf.f b/lapack-netlib/SRC/cgeqlf.f index d2c11c269..918bbddad 100644 --- a/lapack-netlib/SRC/cgeqlf.f +++ b/lapack-netlib/SRC/cgeqlf.f @@ -113,7 +113,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEcomputational +*> \ingroup geqlf * *> \par Further Details: * ===================== @@ -162,7 +162,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -186,7 +187,7 @@ NB = ILAENV( 1, 'CGEQLF', ' ', M, N, -1, -1 ) LWKOPT = N*NB END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN INFO = -7 @@ -276,7 +277,7 @@ IF( MU.GT.0 .AND. NU.GT.0 ) $ CALL CGEQL2( MU, NU, A, LDA, TAU, WORK, IINFO ) * - WORK( 1 ) = IWS + WORK( 1 ) = SROUNDUP_LWORK(IWS) RETURN * * End of CGEQLF diff --git a/lapack-netlib/SRC/cgeqrf.f b/lapack-netlib/SRC/cgeqrf.f index d71bd5b33..bf22a2cd3 100644 --- a/lapack-netlib/SRC/cgeqrf.f +++ b/lapack-netlib/SRC/cgeqrf.f @@ -121,7 +121,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEcomputational +*> \ingroup geqrf * *> \par Further Details: * ===================== @@ -170,7 +170,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -199,7 +200,7 @@ ELSE LWKOPT = N*NB END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN END IF * @@ -274,7 +275,7 @@ $ CALL CGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, $ IINFO ) * - WORK( 1 ) = IWS + WORK( 1 ) = SROUNDUP_LWORK(IWS) RETURN * * End of CGEQRF diff --git a/lapack-netlib/SRC/cgeqrfp.f b/lapack-netlib/SRC/cgeqrfp.f index 995404f43..eaf98ddf3 100644 --- a/lapack-netlib/SRC/cgeqrfp.f +++ b/lapack-netlib/SRC/cgeqrfp.f @@ -122,7 +122,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEcomputational +*> \ingroup geqrfp * *> \par Further Details: * ===================== @@ -173,7 +173,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -182,7 +183,7 @@ INFO = 0 NB = ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) LWKOPT = N*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) LQUERY = ( LWORK.EQ.-1 ) IF( M.LT.0 ) THEN INFO = -1 @@ -272,7 +273,7 @@ $ CALL CGEQR2P( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, $ IINFO ) * - WORK( 1 ) = IWS + WORK( 1 ) = SROUNDUP_LWORK(IWS) RETURN * * End of CGEQRFP diff --git a/lapack-netlib/SRC/cgerqf.f b/lapack-netlib/SRC/cgerqf.f index d2247844c..6f914c892 100644 --- a/lapack-netlib/SRC/cgerqf.f +++ b/lapack-netlib/SRC/cgerqf.f @@ -114,7 +114,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEcomputational +*> \ingroup gerqf * *> \par Further Details: * ===================== @@ -163,7 +163,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -187,7 +188,7 @@ NB = ILAENV( 1, 'CGERQF', ' ', M, N, -1, -1 ) LWKOPT = M*NB END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * IF ( .NOT.LQUERY ) THEN IF( LWORK.LE.0 .OR. ( N.GT.0 .AND. LWORK.LT.MAX( 1, M ) ) ) @@ -278,7 +279,7 @@ IF( MU.GT.0 .AND. NU.GT.0 ) $ CALL CGERQ2( MU, NU, A, LDA, TAU, WORK, IINFO ) * - WORK( 1 ) = IWS + WORK( 1 ) = SROUNDUP_LWORK(IWS) RETURN * * End of CGERQF diff --git a/lapack-netlib/SRC/cgesvd.f b/lapack-netlib/SRC/cgesvd.f index 239b13431..6165a6acf 100644 --- a/lapack-netlib/SRC/cgesvd.f +++ b/lapack-netlib/SRC/cgesvd.f @@ -206,7 +206,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEsing +*> \ingroup gesvd * * ===================================================================== SUBROUTINE CGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, @@ -259,8 +259,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL CLANGE, SLAMCH - EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH + REAL CLANGE, SLAMCH, SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, SQRT @@ -615,7 +615,7 @@ END IF END IF MAXWRK = MAX( MINWRK, MAXWRK ) - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) * IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN INFO = -13 @@ -3694,7 +3694,7 @@ * * Return optimal workspace in WORK(1) * - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) * RETURN * diff --git a/lapack-netlib/SRC/cgetri.f b/lapack-netlib/SRC/cgetri.f index bd7fc286c..2060d1444 100644 --- a/lapack-netlib/SRC/cgetri.f +++ b/lapack-netlib/SRC/cgetri.f @@ -107,7 +107,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEcomputational +*> \ingroup getri * * ===================================================================== SUBROUTINE CGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) @@ -138,7 +138,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CGEMM, CGEMV, CSWAP, CTRSM, CTRTRI, XERBLA @@ -153,7 +154,7 @@ INFO = 0 NB = ILAENV( 1, 'CGETRI', ' ', N, -1, -1, -1 ) LWKOPT = N*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.0 ) THEN INFO = -1 @@ -251,7 +252,7 @@ $ CALL CSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 ) 60 CONTINUE * - WORK( 1 ) = IWS + WORK( 1 ) = SROUNDUP_LWORK(IWS) RETURN * * End of CGETRI diff --git a/lapack-netlib/SRC/cgetsls.f b/lapack-netlib/SRC/cgetsls.f index 8a4d02224..b4bb7562f 100644 --- a/lapack-netlib/SRC/cgetsls.f +++ b/lapack-netlib/SRC/cgetsls.f @@ -154,7 +154,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEsolve +*> \ingroup getsls * * ===================================================================== SUBROUTINE CGETSLS( TRANS, M, N, NRHS, A, LDA, B, LDB, @@ -191,15 +191,15 @@ * .. * .. External Functions .. LOGICAL LSAME - REAL SLAMCH, CLANGE - EXTERNAL LSAME, SLABAD, SLAMCH, CLANGE + REAL SLAMCH, CLANGE, SROUNDUP_LWORK + EXTERNAL LSAME, SLAMCH, CLANGE, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CGEQR, CGEMQR, CLASCL, CLASET, $ CTRTRS, XERBLA, CGELQ, CGEMLQ * .. * .. Intrinsic Functions .. - INTRINSIC REAL, MAX, MIN, INT + INTRINSIC MAX, MIN, INT * .. * .. Executable Statements .. * @@ -265,7 +265,7 @@ INFO = -10 END IF * - WORK( 1 ) = REAL( WSIZEO ) + WORK( 1 ) = SROUNDUP_LWORK( WSIZEO ) * END IF * @@ -274,7 +274,7 @@ RETURN END IF IF( LQUERY ) THEN - IF( LWORK.EQ.-2 ) WORK( 1 ) = REAL( WSIZEM ) + IF( LWORK.EQ.-2 ) WORK( 1 ) = SROUNDUP_LWORK( WSIZEM ) RETURN END IF IF( LWORK.LT.WSIZEO ) THEN @@ -297,7 +297,6 @@ * SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) * * Scale A, B if max element outside range [SMLNUM,BIGNUM] * @@ -485,7 +484,7 @@ END IF * 50 CONTINUE - WORK( 1 ) = REAL( TSZO + LWO ) + WORK( 1 ) = SROUNDUP_LWORK( TSZO + LWO ) RETURN * * End of CGETSLS diff --git a/lapack-netlib/SRC/cgges.f b/lapack-netlib/SRC/cgges.f index c54174da4..0ff848735 100644 --- a/lapack-netlib/SRC/cgges.f +++ b/lapack-netlib/SRC/cgges.f @@ -261,7 +261,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEeigen +*> \ingroup gges * * ===================================================================== SUBROUTINE CGGES( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, LDB, @@ -312,14 +312,13 @@ * .. * .. External Subroutines .. EXTERNAL CGEQRF, CGGBAK, CGGBAL, CGGHRD, CHGEQZ, CLACPY, - $ CLASCL, CLASET, CTGSEN, CUNGQR, CUNMQR, SLABAD, - $ XERBLA + $ CLASCL, CLASET, CTGSEN, CUNGQR, CUNMQR, XERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL CLANGE, SLAMCH - EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH + REAL CLANGE, SLAMCH, SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC MAX, SQRT @@ -390,7 +389,7 @@ LWKOPT = MAX( LWKOPT, N + $ N*ILAENV( 1, 'CUNGQR', ' ', N, 1, N, -1 ) ) END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) $ INFO = -18 @@ -415,7 +414,6 @@ EPS = SLAMCH( 'P' ) SMLNUM = SLAMCH( 'S' ) BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) SMLNUM = SQRT( SMLNUM ) / EPS BIGNUM = ONE / SMLNUM * @@ -587,7 +585,7 @@ * 30 CONTINUE * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/cggesx.f b/lapack-netlib/SRC/cggesx.f index 6385a74c1..3bf460fac 100644 --- a/lapack-netlib/SRC/cggesx.f +++ b/lapack-netlib/SRC/cggesx.f @@ -320,7 +320,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEeigen +*> \ingroup ggesx * * ===================================================================== SUBROUTINE CGGESX( JOBVSL, JOBVSR, SORT, SELCTG, SENSE, N, A, LDA, @@ -373,14 +373,13 @@ * .. * .. External Subroutines .. EXTERNAL CGEQRF, CGGBAK, CGGBAL, CGGHRD, CHGEQZ, CLACPY, - $ CLASCL, CLASET, CTGSEN, CUNGQR, CUNMQR, SLABAD, - $ XERBLA + $ CLASCL, CLASET, CTGSEN, CUNGQR, CUNMQR, XERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL CLANGE, SLAMCH - EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH + REAL CLANGE, SLAMCH, SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC MAX, SQRT @@ -476,7 +475,7 @@ MAXWRK = 1 LWRK = 1 END IF - WORK( 1 ) = LWRK + WORK( 1 ) = SROUNDUP_LWORK(LWRK) IF( WANTSN .OR. N.EQ.0 ) THEN LIWMIN = 1 ELSE @@ -510,7 +509,6 @@ EPS = SLAMCH( 'P' ) SMLNUM = SLAMCH( 'S' ) BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) SMLNUM = SQRT( SMLNUM ) / EPS BIGNUM = ONE / SMLNUM * @@ -705,7 +703,7 @@ * 40 CONTINUE * - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) IWORK( 1 ) = LIWMIN * RETURN diff --git a/lapack-netlib/SRC/cggev.f b/lapack-netlib/SRC/cggev.f index c1c28a180..cf16e3079 100644 --- a/lapack-netlib/SRC/cggev.f +++ b/lapack-netlib/SRC/cggev.f @@ -209,7 +209,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEeigen +*> \ingroup ggev * * ===================================================================== SUBROUTINE CGGEV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA, @@ -254,14 +254,13 @@ * .. * .. External Subroutines .. EXTERNAL CGEQRF, CGGBAK, CGGBAL, CGGHRD, CHGEQZ, CLACPY, - $ CLASCL, CLASET, CTGEVC, CUNGQR, CUNMQR, SLABAD, - $ XERBLA + $ CLASCL, CLASET, CTGEVC, CUNGQR, CUNMQR, XERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL CLANGE, SLAMCH - EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH + REAL CLANGE, SLAMCH, SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, MAX, REAL, SQRT @@ -336,7 +335,7 @@ LWKOPT = MAX( LWKOPT, N + $ N*ILAENV( 1, 'CUNGQR', ' ', N, 1, N, -1 ) ) END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) $ INFO = -15 @@ -359,7 +358,6 @@ EPS = SLAMCH( 'E' )*SLAMCH( 'B' ) SMLNUM = SLAMCH( 'S' ) BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) SMLNUM = SQRT( SMLNUM ) / EPS BIGNUM = ONE / SMLNUM * @@ -547,7 +545,7 @@ IF( ILBSCL ) $ CALL CLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of CGGEV diff --git a/lapack-netlib/SRC/cggevx.f b/lapack-netlib/SRC/cggevx.f index 405c9c3b5..fa4e92682 100644 --- a/lapack-netlib/SRC/cggevx.f +++ b/lapack-netlib/SRC/cggevx.f @@ -335,7 +335,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEeigen +*> \ingroup ggevx * *> \par Further Details: * ===================== @@ -416,13 +416,13 @@ * .. External Subroutines .. EXTERNAL CGEQRF, CGGBAK, CGGBAL, CGGHRD, CHGEQZ, CLACPY, $ CLASCL, CLASET, CTGEVC, CTGSNA, CUNGQR, CUNMQR, - $ SLABAD, SLASCL, XERBLA + $ SLASCL, XERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL CLANGE, SLAMCH - EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH + REAL CLANGE, SLAMCH, SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, MAX, REAL, SQRT @@ -521,7 +521,7 @@ $ N*ILAENV( 1, 'CUNGQR', ' ', N, 1, N, 0 ) ) END IF END IF - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) * IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN INFO = -25 @@ -545,7 +545,6 @@ EPS = SLAMCH( 'P' ) SMLNUM = SLAMCH( 'S' ) BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) SMLNUM = SQRT( SMLNUM ) / EPS BIGNUM = ONE / SMLNUM * @@ -793,7 +792,7 @@ IF( ILBSCL ) $ CALL CLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) * - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) RETURN * * End of CGGEVX diff --git a/lapack-netlib/SRC/cggglm.f b/lapack-netlib/SRC/cggglm.f index fb384b651..0d36deca6 100644 --- a/lapack-netlib/SRC/cggglm.f +++ b/lapack-netlib/SRC/cggglm.f @@ -177,7 +177,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHEReigen +*> \ingroup ggglm * * ===================================================================== SUBROUTINE CGGGLM( N, M, P, A, LDA, B, LDB, D, X, Y, WORK, LWORK, @@ -213,7 +213,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC INT, MAX, MIN @@ -252,7 +253,7 @@ LWKMIN = M + N + P LWKOPT = M + NP + MAX( N, P )*NB END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN INFO = -12 diff --git a/lapack-netlib/SRC/cgglse.f b/lapack-netlib/SRC/cgglse.f index cca20dfed..b1c562385 100644 --- a/lapack-netlib/SRC/cgglse.f +++ b/lapack-netlib/SRC/cgglse.f @@ -172,7 +172,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERsolve +*> \ingroup gglse * * ===================================================================== SUBROUTINE CGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK, @@ -207,7 +207,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC INT, MAX, MIN @@ -246,7 +247,7 @@ LWKMIN = M + N + P LWKOPT = P + MN + MAX( M, N )*NB END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN INFO = -12 diff --git a/lapack-netlib/SRC/cggqrf.f b/lapack-netlib/SRC/cggqrf.f index 0185f4e0d..29b0bf4af 100644 --- a/lapack-netlib/SRC/cggqrf.f +++ b/lapack-netlib/SRC/cggqrf.f @@ -173,7 +173,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup ggqrf * *> \par Further Details: * ===================== @@ -236,7 +236,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC INT, MAX, MIN @@ -251,7 +252,7 @@ NB3 = ILAENV( 1, 'CUNMQR', ' ', N, M, P, -1 ) NB = MAX( NB1, NB2, NB3 ) LWKOPT = MAX( N, M, P)*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.0 ) THEN INFO = -1 diff --git a/lapack-netlib/SRC/cggrqf.f b/lapack-netlib/SRC/cggrqf.f index 5227100da..273ab3ef7 100644 --- a/lapack-netlib/SRC/cggrqf.f +++ b/lapack-netlib/SRC/cggrqf.f @@ -172,7 +172,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup ggrqf * *> \par Further Details: * ===================== @@ -235,7 +235,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC INT, MAX, MIN @@ -250,7 +251,7 @@ NB3 = ILAENV( 1, 'CUNMRQ', ' ', M, N, P, -1 ) NB = MAX( NB1, NB2, NB3 ) LWKOPT = MAX( N, M, P)*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) LQUERY = ( LWORK.EQ.-1 ) IF( M.LT.0 ) THEN INFO = -1 diff --git a/lapack-netlib/SRC/chbev_2stage.f b/lapack-netlib/SRC/chbev_2stage.f index 123d84729..f84d8d3d4 100644 --- a/lapack-netlib/SRC/chbev_2stage.f +++ b/lapack-netlib/SRC/chbev_2stage.f @@ -132,7 +132,7 @@ *> \verbatim *> LWORK is INTEGER *> The length of the array WORK. LWORK >= 1, when N <= 1; -*> otherwise +*> otherwise *> If JOBZ = 'N' and N > 1, LWORK must be queried. *> LWORK = MAX(1, dimension) where *> dimension = (2KD+1)*N + KD*NTHREADS @@ -171,7 +171,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHEReigen +*> \ingroup hbev_2stage * *> \par Further Details: * ===================== @@ -189,7 +189,7 @@ *> http://doi.acm.org/10.1145/2063384.2063394 *> *> A. Haidar, J. Kurzak, P. Luszczek, 2013. -*> An improved parallel singular value algorithm and its implementation +*> An improved parallel singular value algorithm and its implementation *> for multicore hardware, In Proceedings of 2013 International Conference *> for High Performance Computing, Networking, Storage and Analysis (SC '13). *> Denver, Colorado, USA, 2013. @@ -197,11 +197,11 @@ *> http://doi.acm.org/10.1145/2503210.2503292 *> *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. -*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure *> calculations based on fine-grained memory aware tasks. *> International Journal of High Performance Computing Applications. *> Volume 28 Issue 2, Pages 196-209, May 2014. -*> http://hpc.sagepub.com/content/28/2/196 +*> http://hpc.sagepub.com/content/28/2/196 *> *> \endverbatim * @@ -240,8 +240,9 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV2STAGE - REAL SLAMCH, CLANHB - EXTERNAL LSAME, SLAMCH, CLANHB, ILAENV2STAGE + REAL SLAMCH, CLANHB, SROUNDUP_LWORK + EXTERNAL LSAME, SLAMCH, CLANHB, ILAENV2STAGE, + $ SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SSCAL, SSTERF, XERBLA, CLASCL, CSTEQR, @@ -276,7 +277,7 @@ IF( INFO.EQ.0 ) THEN IF( N.LE.1 ) THEN LWMIN = 1 - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) ELSE IB = ILAENV2STAGE( 2, 'CHETRD_HB2ST', JOBZ, $ N, KD, -1, -1 ) @@ -285,7 +286,7 @@ LWTRD = ILAENV2STAGE( 4, 'CHETRD_HB2ST', JOBZ, $ N, KD, IB, -1 ) LWMIN = LHTRD + LWTRD - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) ENDIF * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) @@ -351,7 +352,7 @@ LLWORK = LWORK - INDWRK + 1 * CALL CHETRD_HB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, W, - $ RWORK( INDE ), WORK( INDHOUS ), LHTRD, + $ RWORK( INDE ), WORK( INDHOUS ), LHTRD, $ WORK( INDWRK ), LLWORK, IINFO ) * * For eigenvalues only, call SSTERF. For eigenvectors, call CSTEQR. @@ -377,7 +378,7 @@ * * Set WORK(1) to optimal workspace size. * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) * RETURN * diff --git a/lapack-netlib/SRC/chbevd.f b/lapack-netlib/SRC/chbevd.f index de33c9039..a5afe6b76 100644 --- a/lapack-netlib/SRC/chbevd.f +++ b/lapack-netlib/SRC/chbevd.f @@ -201,7 +201,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHEReigen +*> \ingroup hbevd * * ===================================================================== SUBROUTINE CHBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, @@ -239,8 +239,8 @@ * .. * .. External Functions .. LOGICAL LSAME - REAL CLANHB, SLAMCH - EXTERNAL LSAME, CLANHB, SLAMCH + REAL CLANHB, SLAMCH, SROUNDUP_LWORK + EXTERNAL LSAME, CLANHB, SLAMCH, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CGEMM, CHBTRD, CLACPY, CLASCL, CSTEDC, SSCAL, @@ -288,7 +288,7 @@ END IF * IF( INFO.EQ.0 ) THEN - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) RWORK( 1 ) = LRWMIN IWORK( 1 ) = LIWMIN * @@ -382,7 +382,7 @@ CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) END IF * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) RWORK( 1 ) = LRWMIN IWORK( 1 ) = LIWMIN RETURN diff --git a/lapack-netlib/SRC/chbevx_2stage.f b/lapack-netlib/SRC/chbevx_2stage.f index 22bced45f..1d609dfbd 100644 --- a/lapack-netlib/SRC/chbevx_2stage.f +++ b/lapack-netlib/SRC/chbevx_2stage.f @@ -22,7 +22,7 @@ * * SUBROUTINE CHBEVX_2STAGE( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, * Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, -* Z, LDZ, WORK, LWORK, RWORK, IWORK, +* Z, LDZ, WORK, LWORK, RWORK, IWORK, * IFAIL, INFO ) * * IMPLICIT NONE @@ -233,7 +233,7 @@ *> \verbatim *> LWORK is INTEGER *> The length of the array WORK. LWORK >= 1, when N <= 1; -*> otherwise +*> otherwise *> If JOBZ = 'N' and N > 1, LWORK must be queried. *> LWORK = MAX(1, dimension) where *> dimension = (2KD+1)*N + KD*NTHREADS @@ -285,7 +285,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHEReigen +*> \ingroup hbevx_2stage * *> \par Further Details: * ===================== @@ -303,7 +303,7 @@ *> http://doi.acm.org/10.1145/2063384.2063394 *> *> A. Haidar, J. Kurzak, P. Luszczek, 2013. -*> An improved parallel singular value algorithm and its implementation +*> An improved parallel singular value algorithm and its implementation *> for multicore hardware, In Proceedings of 2013 International Conference *> for High Performance Computing, Networking, Storage and Analysis (SC '13). *> Denver, Colorado, USA, 2013. @@ -311,18 +311,18 @@ *> http://doi.acm.org/10.1145/2503210.2503292 *> *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. -*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure *> calculations based on fine-grained memory aware tasks. *> International Journal of High Performance Computing Applications. *> Volume 28 Issue 2, Pages 196-209, May 2014. -*> http://hpc.sagepub.com/content/28/2/196 +*> http://hpc.sagepub.com/content/28/2/196 *> *> \endverbatim * * ===================================================================== SUBROUTINE CHBEVX_2STAGE( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, $ Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, - $ Z, LDZ, WORK, LWORK, RWORK, IWORK, + $ Z, LDZ, WORK, LWORK, RWORK, IWORK, $ IFAIL, INFO ) * IMPLICIT NONE @@ -367,8 +367,9 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV2STAGE - REAL SLAMCH, CLANHB - EXTERNAL LSAME, SLAMCH, CLANHB, ILAENV2STAGE + REAL SLAMCH, CLANHB, SROUNDUP_LWORK + EXTERNAL LSAME, SLAMCH, CLANHB, ILAENV2STAGE, + $ SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SCOPY, SSCAL, SSTEBZ, SSTERF, XERBLA, CCOPY, @@ -424,16 +425,16 @@ IF( INFO.EQ.0 ) THEN IF( N.LE.1 ) THEN LWMIN = 1 - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) ELSE IB = ILAENV2STAGE( 2, 'CHETRD_HB2ST', JOBZ, $ N, KD, -1, -1 ) - LHTRD = ILAENV2STAGE( 3, 'CHETRD_HB2ST', JOBZ, + LHTRD = ILAENV2STAGE( 3, 'CHETRD_HB2ST', JOBZ, $ N, KD, IB, -1 ) - LWTRD = ILAENV2STAGE( 4, 'CHETRD_HB2ST', JOBZ, + LWTRD = ILAENV2STAGE( 4, 'CHETRD_HB2ST', JOBZ, $ N, KD, IB, -1 ) LWMIN = LHTRD + LWTRD - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) ENDIF * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) @@ -637,7 +638,7 @@ * * Set WORK(1) to optimal workspace size. * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) * RETURN * diff --git a/lapack-netlib/SRC/chbgvd.f b/lapack-netlib/SRC/chbgvd.f index 655006370..00fb2b5f5 100644 --- a/lapack-netlib/SRC/chbgvd.f +++ b/lapack-netlib/SRC/chbgvd.f @@ -232,7 +232,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHEReigen +*> \ingroup hbgvd * *> \par Contributors: * ================== @@ -275,7 +275,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SSTERF, XERBLA, CGEMM, CHBGST, CHBTRD, CLACPY, @@ -322,7 +323,7 @@ END IF * IF( INFO.EQ.0 ) THEN - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) RWORK( 1 ) = LRWMIN IWORK( 1 ) = LIWMIN * @@ -388,7 +389,7 @@ CALL CLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) END IF * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) RWORK( 1 ) = LRWMIN IWORK( 1 ) = LIWMIN RETURN diff --git a/lapack-netlib/SRC/cheev.f b/lapack-netlib/SRC/cheev.f index fb8e451df..60df7d8b8 100644 --- a/lapack-netlib/SRC/cheev.f +++ b/lapack-netlib/SRC/cheev.f @@ -132,7 +132,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexHEeigen +*> \ingroup heev * * ===================================================================== SUBROUTINE CHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, @@ -169,8 +169,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL CLANHE, SLAMCH - EXTERNAL ILAENV, LSAME, CLANHE, SLAMCH + REAL CLANHE, SLAMCH, SROUNDUP_LWORK + EXTERNAL ILAENV, LSAME, CLANHE, SLAMCH, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CHETRD, CLASCL, CSTEQR, CUNGTR, SSCAL, SSTERF, @@ -201,7 +201,7 @@ IF( INFO.EQ.0 ) THEN NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 ) LWKOPT = MAX( 1, ( NB+1 )*N ) - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * IF( LWORK.LT.MAX( 1, 2*N-1 ) .AND. .NOT.LQUERY ) $ INFO = -8 @@ -286,7 +286,7 @@ * * Set WORK(1) to optimal complex workspace size. * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/cheev_2stage.f b/lapack-netlib/SRC/cheev_2stage.f index fb7989d9f..4e1cecc64 100644 --- a/lapack-netlib/SRC/cheev_2stage.f +++ b/lapack-netlib/SRC/cheev_2stage.f @@ -106,12 +106,12 @@ *> \verbatim *> LWORK is INTEGER *> The length of the array WORK. LWORK >= 1, when N <= 1; -*> otherwise +*> otherwise *> If JOBZ = 'N' and N > 1, LWORK must be queried. *> LWORK = MAX(1, dimension) where *> dimension = max(stage1,stage2) + (KD+1)*N + N -*> = N*KD + N*max(KD+1,FACTOPTNB) -*> + max(2*KD*KD, KD*NTHREADS) +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) *> + (KD+1)*N + N *> where KD is the blocking size of the reduction, *> FACTOPTNB is the blocking used by the QR or LQ @@ -149,7 +149,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexHEeigen +*> \ingroup heev_2stage * *> \par Further Details: * ===================== @@ -167,7 +167,7 @@ *> http://doi.acm.org/10.1145/2063384.2063394 *> *> A. Haidar, J. Kurzak, P. Luszczek, 2013. -*> An improved parallel singular value algorithm and its implementation +*> An improved parallel singular value algorithm and its implementation *> for multicore hardware, In Proceedings of 2013 International Conference *> for High Performance Computing, Networking, Storage and Analysis (SC '13). *> Denver, Colorado, USA, 2013. @@ -175,11 +175,11 @@ *> http://doi.acm.org/10.1145/2503210.2503292 *> *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. -*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure *> calculations based on fine-grained memory aware tasks. *> International Journal of High Performance Computing Applications. *> Volume 28 Issue 2, Pages 196-209, May 2014. -*> http://hpc.sagepub.com/content/28/2/196 +*> http://hpc.sagepub.com/content/28/2/196 *> *> \endverbatim * @@ -220,8 +220,9 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV2STAGE - REAL SLAMCH, CLANHE - EXTERNAL LSAME, SLAMCH, CLANHE, ILAENV2STAGE + REAL SLAMCH, CLANHE, SROUNDUP_LWORK + EXTERNAL LSAME, SLAMCH, CLANHE, ILAENV2STAGE, + $ SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SSCAL, SSTERF, XERBLA, CLASCL, CSTEQR, @@ -255,7 +256,7 @@ LHTRD = ILAENV2STAGE( 3, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) LWTRD = ILAENV2STAGE( 4, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) LWMIN = N + LHTRD + LWTRD - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -8 @@ -314,7 +315,7 @@ LLWORK = LWORK - INDWRK + 1 * CALL CHETRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, RWORK( INDE ), - $ WORK( INDTAU ), WORK( INDHOUS ), LHTRD, + $ WORK( INDTAU ), WORK( INDHOUS ), LHTRD, $ WORK( INDWRK ), LLWORK, IINFO ) * * For eigenvalues only, call SSTERF. For eigenvectors, first call @@ -343,7 +344,7 @@ * * Set WORK(1) to optimal complex workspace size. * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) * RETURN * diff --git a/lapack-netlib/SRC/cheevd.f b/lapack-netlib/SRC/cheevd.f index dce0b2083..b5ca804eb 100644 --- a/lapack-netlib/SRC/cheevd.f +++ b/lapack-netlib/SRC/cheevd.f @@ -180,7 +180,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexHEeigen +*> \ingroup heevd * *> \par Further Details: * ===================== @@ -230,8 +230,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL CLANHE, SLAMCH - EXTERNAL ILAENV, LSAME, CLANHE, SLAMCH + REAL CLANHE, SLAMCH, SROUNDUP_LWORK + EXTERNAL ILAENV, LSAME, CLANHE, SLAMCH, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CHETRD, CLACPY, CLASCL, CSTEDC, CUNMTR, SSCAL, @@ -282,7 +282,7 @@ LROPT = LRWMIN LIOPT = LIWMIN END IF - WORK( 1 ) = LOPT + WORK( 1 ) = SROUNDUP_LWORK(LOPT) RWORK( 1 ) = LROPT IWORK( 1 ) = LIOPT * @@ -378,7 +378,7 @@ CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) END IF * - WORK( 1 ) = LOPT + WORK( 1 ) = SROUNDUP_LWORK(LOPT) RWORK( 1 ) = LROPT IWORK( 1 ) = LIOPT * diff --git a/lapack-netlib/SRC/cheevr.f b/lapack-netlib/SRC/cheevr.f index b8854b182..05c5e66be 100644 --- a/lapack-netlib/SRC/cheevr.f +++ b/lapack-netlib/SRC/cheevr.f @@ -338,7 +338,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexHEeigen +*> \ingroup heevr * *> \par Contributors: * ================== @@ -392,8 +392,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL CLANSY, SLAMCH - EXTERNAL LSAME, ILAENV, CLANSY, SLAMCH + REAL CLANSY, SLAMCH, SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, CLANSY, SLAMCH, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CHETRD, CSSCAL, CSTEMR, CSTEIN, CSWAP, CUNMTR, @@ -454,7 +454,7 @@ NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 ) NB = MAX( NB, ILAENV( 1, 'CUNMTR', UPLO, N, -1, -1, -1 ) ) LWKOPT = MAX( ( NB+1 )*N, LWMIN ) - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RWORK( 1 ) = LRWMIN IWORK( 1 ) = LIWMIN * @@ -710,7 +710,7 @@ * * Set WORK(1) to optimal workspace size. * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RWORK( 1 ) = LRWMIN IWORK( 1 ) = LIWMIN * diff --git a/lapack-netlib/SRC/cheevx.f b/lapack-netlib/SRC/cheevx.f index 1cec902aa..e91599a44 100644 --- a/lapack-netlib/SRC/cheevx.f +++ b/lapack-netlib/SRC/cheevx.f @@ -250,7 +250,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexHEeigen +*> \ingroup heevx * * ===================================================================== SUBROUTINE CHEEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, @@ -294,8 +294,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL SLAMCH, CLANHE - EXTERNAL LSAME, ILAENV, SLAMCH, CLANHE + REAL SLAMCH, CLANHE, SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SLAMCH, CLANHE, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SCOPY, SSCAL, SSTEBZ, SSTERF, XERBLA, CSSCAL, @@ -354,7 +354,7 @@ NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 ) NB = MAX( NB, ILAENV( 1, 'CUNMTR', UPLO, N, -1, -1, -1 ) ) LWKOPT = MAX( 1, ( NB + 1 )*N ) - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) @@ -552,7 +552,7 @@ * * Set WORK(1) to optimal complex workspace size. * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/cheevx_2stage.f b/lapack-netlib/SRC/cheevx_2stage.f index 04f6e30e8..70a681ec4 100644 --- a/lapack-netlib/SRC/cheevx_2stage.f +++ b/lapack-netlib/SRC/cheevx_2stage.f @@ -209,12 +209,12 @@ *> \verbatim *> LWORK is INTEGER *> The length of the array WORK. LWORK >= 1, when N <= 1; -*> otherwise +*> otherwise *> If JOBZ = 'N' and N > 1, LWORK must be queried. *> LWORK = MAX(1, 8*N, dimension) where *> dimension = max(stage1,stage2) + (KD+1)*N + N -*> = N*KD + N*max(KD+1,FACTOPTNB) -*> + max(2*KD*KD, KD*NTHREADS) +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) *> + (KD+1)*N + N *> where KD is the blocking size of the reduction, *> FACTOPTNB is the blocking used by the QR or LQ @@ -265,7 +265,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexHEeigen +*> \ingroup heevx_2stage * *> \par Further Details: * ===================== @@ -283,7 +283,7 @@ *> http://doi.acm.org/10.1145/2063384.2063394 *> *> A. Haidar, J. Kurzak, P. Luszczek, 2013. -*> An improved parallel singular value algorithm and its implementation +*> An improved parallel singular value algorithm and its implementation *> for multicore hardware, In Proceedings of 2013 International Conference *> for High Performance Computing, Networking, Storage and Analysis (SC '13). *> Denver, Colorado, USA, 2013. @@ -291,11 +291,11 @@ *> http://doi.acm.org/10.1145/2503210.2503292 *> *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. -*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure *> calculations based on fine-grained memory aware tasks. *> International Journal of High Performance Computing Applications. *> Volume 28 Issue 2, Pages 196-209, May 2014. -*> http://hpc.sagepub.com/content/28/2/196 +*> http://hpc.sagepub.com/content/28/2/196 *> *> \endverbatim * @@ -335,7 +335,7 @@ CHARACTER ORDER INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, $ INDISP, INDIWK, INDRWK, INDTAU, INDWRK, ISCALE, - $ ITMP1, J, JJ, LLWORK, + $ ITMP1, J, JJ, LLWORK, $ NSPLIT, LWMIN, LHTRD, LWTRD, KD, IB, INDHOUS REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, $ SIGMA, SMLNUM, TMP1, VLL, VUU @@ -343,8 +343,9 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV2STAGE - REAL SLAMCH, CLANHE - EXTERNAL LSAME, SLAMCH, CLANHE, ILAENV2STAGE + REAL SLAMCH, CLANHE, SROUNDUP_LWORK + EXTERNAL LSAME, SLAMCH, CLANHE, ILAENV2STAGE, + $ SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SCOPY, SSCAL, SSTEBZ, SSTERF, XERBLA, CSSCAL, @@ -397,7 +398,7 @@ IF( INFO.EQ.0 ) THEN IF( N.LE.1 ) THEN LWMIN = 1 - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) ELSE KD = ILAENV2STAGE( 1, 'CHETRD_2STAGE', JOBZ, $ N, -1, -1, -1 ) @@ -408,7 +409,7 @@ LWTRD = ILAENV2STAGE( 4, 'CHETRD_2STAGE', JOBZ, $ N, KD, IB, -1 ) LWMIN = N + LHTRD + LWTRD - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) END IF * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) @@ -499,7 +500,7 @@ LLWORK = LWORK - INDWRK + 1 * CALL CHETRD_2STAGE( JOBZ, UPLO, N, A, LDA, RWORK( INDD ), - $ RWORK( INDE ), WORK( INDTAU ), + $ RWORK( INDE ), WORK( INDTAU ), $ WORK( INDHOUS ), LHTRD, WORK( INDWRK ), $ LLWORK, IINFO ) * @@ -610,7 +611,7 @@ * * Set WORK(1) to optimal complex workspace size. * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) * RETURN * diff --git a/lapack-netlib/SRC/chegv.f b/lapack-netlib/SRC/chegv.f index 198e5d102..53f9d5196 100644 --- a/lapack-netlib/SRC/chegv.f +++ b/lapack-netlib/SRC/chegv.f @@ -173,7 +173,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexHEeigen +*> \ingroup hegv * * ===================================================================== SUBROUTINE CHEGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, @@ -206,7 +206,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL ILAENV, LSAME + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CHEEV, CHEGST, CPOTRF, CTRMM, CTRSM, XERBLA @@ -240,7 +241,7 @@ IF( INFO.EQ.0 ) THEN NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 ) LWKOPT = MAX( 1, ( NB + 1 )*N ) - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * IF( LWORK.LT.MAX( 1, 2*N-1 ) .AND. .NOT.LQUERY ) THEN INFO = -11 @@ -309,7 +310,7 @@ END IF END IF * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/chegv_2stage.f b/lapack-netlib/SRC/chegv_2stage.f index d2b8fc795..8de1f7f06 100644 --- a/lapack-netlib/SRC/chegv_2stage.f +++ b/lapack-netlib/SRC/chegv_2stage.f @@ -144,12 +144,12 @@ *> \verbatim *> LWORK is INTEGER *> The length of the array WORK. LWORK >= 1, when N <= 1; -*> otherwise +*> otherwise *> If JOBZ = 'N' and N > 1, LWORK must be queried. *> LWORK = MAX(1, dimension) where *> dimension = max(stage1,stage2) + (KD+1)*N + N -*> = N*KD + N*max(KD+1,FACTOPTNB) -*> + max(2*KD*KD, KD*NTHREADS) +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) *> + (KD+1)*N + N *> where KD is the blocking size of the reduction, *> FACTOPTNB is the blocking used by the QR or LQ @@ -192,7 +192,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexHEeigen +*> \ingroup hegv_2stage * *> \par Further Details: * ===================== @@ -210,7 +210,7 @@ *> http://doi.acm.org/10.1145/2063384.2063394 *> *> A. Haidar, J. Kurzak, P. Luszczek, 2013. -*> An improved parallel singular value algorithm and its implementation +*> An improved parallel singular value algorithm and its implementation *> for multicore hardware, In Proceedings of 2013 International Conference *> for High Performance Computing, Networking, Storage and Analysis (SC '13). *> Denver, Colorado, USA, 2013. @@ -218,11 +218,11 @@ *> http://doi.acm.org/10.1145/2503210.2503292 *> *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. -*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure *> calculations based on fine-grained memory aware tasks. *> International Journal of High Performance Computing Applications. *> Volume 28 Issue 2, Pages 196-209, May 2014. -*> http://hpc.sagepub.com/content/28/2/196 +*> http://hpc.sagepub.com/content/28/2/196 *> *> \endverbatim * @@ -259,7 +259,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV2STAGE - EXTERNAL LSAME, ILAENV2STAGE + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV2STAGE, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL XERBLA, CHEGST, CPOTRF, CTRMM, CTRSM, @@ -297,7 +298,7 @@ LHTRD = ILAENV2STAGE( 3, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) LWTRD = ILAENV2STAGE( 4, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) LWMIN = N + LHTRD + LWTRD - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -11 @@ -327,7 +328,7 @@ * Transform problem to standard eigenvalue problem and solve. * CALL CHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) - CALL CHEEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, + CALL CHEEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, $ WORK, LWORK, RWORK, INFO ) * IF( WANTZ ) THEN @@ -367,7 +368,7 @@ END IF END IF * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) * RETURN * diff --git a/lapack-netlib/SRC/chegvd.f b/lapack-netlib/SRC/chegvd.f index 4edc36f2a..d2dc941e6 100644 --- a/lapack-netlib/SRC/chegvd.f +++ b/lapack-netlib/SRC/chegvd.f @@ -219,7 +219,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexHEeigen +*> \ingroup hegvd * *> \par Further Details: * ===================== @@ -268,7 +268,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CHEEVD, CHEGST, CPOTRF, CTRMM, CTRSM, XERBLA @@ -316,7 +317,7 @@ END IF * IF( INFO.EQ.0 ) THEN - WORK( 1 ) = LOPT + WORK( 1 ) = SROUNDUP_LWORK(LOPT) RWORK( 1 ) = LROPT IWORK( 1 ) = LIOPT * @@ -392,7 +393,7 @@ END IF END IF * - WORK( 1 ) = LOPT + WORK( 1 ) = SROUNDUP_LWORK(LOPT) RWORK( 1 ) = LROPT IWORK( 1 ) = LIOPT * diff --git a/lapack-netlib/SRC/chegvx.f b/lapack-netlib/SRC/chegvx.f index 8e565222d..172d0571e 100644 --- a/lapack-netlib/SRC/chegvx.f +++ b/lapack-netlib/SRC/chegvx.f @@ -293,7 +293,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexHEeigen +*> \ingroup hegvx * *> \par Contributors: * ================== @@ -335,7 +335,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL ILAENV, LSAME + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CHEEVX, CHEGST, CPOTRF, CTRMM, CTRSM, XERBLA @@ -390,7 +391,7 @@ IF( INFO.EQ.0 ) THEN NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 ) LWKOPT = MAX( 1, ( NB + 1 )*N ) - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN INFO = -20 @@ -464,7 +465,7 @@ * * Set WORK(1) to optimal complex workspace size. * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/chesv.f b/lapack-netlib/SRC/chesv.f index 238fb0a94..cea1235b7 100644 --- a/lapack-netlib/SRC/chesv.f +++ b/lapack-netlib/SRC/chesv.f @@ -163,7 +163,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexHEsolve +*> \ingroup hesv * * ===================================================================== SUBROUTINE CHESV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, @@ -191,7 +191,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL XERBLA, CHETRF, CHETRS, CHETRS2 @@ -226,7 +227,7 @@ NB = ILAENV( 1, 'CHETRF', UPLO, N, -1, -1, -1 ) LWKOPT = N*NB END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -259,7 +260,7 @@ * END IF * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/chesv_aa.f b/lapack-netlib/SRC/chesv_aa.f index c9b97e09c..53ecc0a16 100644 --- a/lapack-netlib/SRC/chesv_aa.f +++ b/lapack-netlib/SRC/chesv_aa.f @@ -154,7 +154,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexHEsolve +*> \ingroup hesv_aa * * ===================================================================== SUBROUTINE CHESV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, @@ -182,7 +182,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL XERBLA, CHETRF_AA, CHETRS_AA @@ -217,7 +218,7 @@ $ -1, INFO ) LWKOPT_HETRS = INT( WORK(1) ) LWKOPT = MAX( LWKOPT_HETRF, LWKOPT_HETRS ) - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -239,7 +240,7 @@ * END IF * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/chesv_aa_2stage.f b/lapack-netlib/SRC/chesv_aa_2stage.f index 36970a329..12950c4af 100644 --- a/lapack-netlib/SRC/chesv_aa_2stage.f +++ b/lapack-netlib/SRC/chesv_aa_2stage.f @@ -177,7 +177,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexSYcomputational +*> \ingroup hesv_aa_2stage * * ===================================================================== SUBROUTINE CHESV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, @@ -207,7 +207,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CHETRF_AA_2STAGE, CHETRS_AA_2STAGE, @@ -267,7 +268,7 @@ * END IF * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/chesv_rk.f b/lapack-netlib/SRC/chesv_rk.f index e123fa299..268a55e23 100644 --- a/lapack-netlib/SRC/chesv_rk.f +++ b/lapack-netlib/SRC/chesv_rk.f @@ -205,7 +205,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexHEsolve +*> \ingroup hesv_rk * *> \par Contributors: * ================== @@ -247,7 +247,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL XERBLA, CHETRF_RK, CHETRS_3 @@ -282,7 +283,7 @@ CALL CHETRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, -1, INFO ) LWKOPT = INT( WORK( 1 ) ) END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -304,7 +305,7 @@ * END IF * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/chesv_rook.f b/lapack-netlib/SRC/chesv_rook.f index 8e0b1a88f..2a0d3fdaf 100644 --- a/lapack-netlib/SRC/chesv_rook.f +++ b/lapack-netlib/SRC/chesv_rook.f @@ -184,7 +184,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexHEsolve +*> \ingroup hesv_rook *> *> \verbatim *> @@ -225,7 +225,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL XERBLA, CHETRF_ROOK, CHETRS_ROOK @@ -260,7 +261,7 @@ NB = ILAENV( 1, 'CHETRF_ROOK', UPLO, N, -1, -1, -1 ) LWKOPT = N*NB END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -283,7 +284,7 @@ * END IF * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/chesvx.f b/lapack-netlib/SRC/chesvx.f index 6da49bdcf..c23a35ce7 100644 --- a/lapack-netlib/SRC/chesvx.f +++ b/lapack-netlib/SRC/chesvx.f @@ -276,7 +276,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexHEsolve +*> \ingroup hesvx * * ===================================================================== SUBROUTINE CHESVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, @@ -313,8 +313,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL CLANHE, SLAMCH - EXTERNAL ILAENV, LSAME, CLANHE, SLAMCH + REAL CLANHE, SLAMCH, SROUNDUP_LWORK + EXTERNAL ILAENV, LSAME, CLANHE, SLAMCH, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CHECON, CHERFS, CHETRF, CHETRS, CLACPY, XERBLA @@ -356,7 +356,7 @@ NB = ILAENV( 1, 'CHETRF', UPLO, N, -1, -1, -1 ) LWKOPT = MAX( LWKOPT, N*NB ) END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -405,7 +405,7 @@ IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) $ INFO = N + 1 * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/chetrd_hb2st.F b/lapack-netlib/SRC/chetrd_hb2st.F index 30b01ed83..3688e40a3 100644 --- a/lapack-netlib/SRC/chetrd_hb2st.F +++ b/lapack-netlib/SRC/chetrd_hb2st.F @@ -18,7 +18,7 @@ * Definition: * =========== * -* SUBROUTINE CHETRD_HB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, +* SUBROUTINE CHETRD_HB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, * D, E, HOUS, LHOUS, WORK, LWORK, INFO ) * * #if defined(_OPENMP) @@ -53,12 +53,12 @@ *> \param[in] STAGE1 *> \verbatim *> STAGE1 is CHARACTER*1 -*> = 'N': "No": to mention that the stage 1 of the reduction +*> = 'N': "No": to mention that the stage 1 of the reduction *> from dense to band using the chetrd_he2hb routine -*> was not called before this routine to reproduce AB. -*> In other term this routine is called as standalone. -*> = 'Y': "Yes": to mention that the stage 1 of the -*> reduction from dense to band using the chetrd_he2hb +*> was not called before this routine to reproduce AB. +*> In other term this routine is called as standalone. +*> = 'Y': "Yes": to mention that the stage 1 of the +*> reduction from dense to band using the chetrd_he2hb *> routine has been called to produce AB (e.g., AB is *> the output of chetrd_he2hb. *> \endverbatim @@ -66,10 +66,10 @@ *> \param[in] VECT *> \verbatim *> VECT is CHARACTER*1 -*> = 'N': No need for the Housholder representation, +*> = 'N': No need for the Housholder representation, *> and thus LHOUS is of size max(1, 4*N); -*> = 'V': the Householder representation is needed to -*> either generate or to apply Q later on, +*> = 'V': the Householder representation is needed to +*> either generate or to apply Q later on, *> then LHOUS is to be queried and computed. *> (NOT AVAILABLE IN THIS RELEASE). *> \endverbatim @@ -147,7 +147,7 @@ *> message related to LHOUS is issued by XERBLA. *> LHOUS = MAX(1, dimension) where *> dimension = 4*N if VECT='N' -*> not available now if VECT='H' +*> not available now if VECT='H' *> \endverbatim *> *> \param[out] WORK @@ -188,7 +188,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup hetrd_hb2st * *> \par Further Details: * ===================== @@ -208,7 +208,7 @@ *> http://doi.acm.org/10.1145/2063384.2063394 *> *> A. Haidar, J. Kurzak, P. Luszczek, 2013. -*> An improved parallel singular value algorithm and its implementation +*> An improved parallel singular value algorithm and its implementation *> for multicore hardware, In Proceedings of 2013 International Conference *> for High Performance Computing, Networking, Storage and Analysis (SC '13). *> Denver, Colorado, USA, 2013. @@ -216,16 +216,16 @@ *> http://doi.acm.org/10.1145/2503210.2503292 *> *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. -*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure *> calculations based on fine-grained memory aware tasks. *> International Journal of High Performance Computing Applications. *> Volume 28 Issue 2, Pages 196-209, May 2014. -*> http://hpc.sagepub.com/content/28/2/196 +*> http://hpc.sagepub.com/content/28/2/196 *> *> \endverbatim *> * ===================================================================== - SUBROUTINE CHETRD_HB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, + SUBROUTINE CHETRD_HB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, $ D, E, HOUS, LHOUS, WORK, LWORK, INFO ) * * @@ -259,11 +259,11 @@ * .. * .. Local Scalars .. LOGICAL LQUERY, WANTQ, UPPER, AFTERS1 - INTEGER I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST, + INTEGER I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST, $ ED, STIND, EDIND, BLKLASTIND, COLPT, THED, $ STEPERCOL, GRSIZ, THGRSIZ, THGRNB, THGRID, $ NBTILES, TTYPE, TID, NTHREADS, DEBUG, - $ ABDPOS, ABOFDPOS, DPOS, OFDPOS, AWPOS, + $ ABDPOS, ABOFDPOS, DPOS, OFDPOS, AWPOS, $ INDA, INDW, APOS, SIZEA, LDA, INDV, INDTAU, $ SICEV, SIZETAU, LDV, LHMIN, LWMIN REAL ABSTMP @@ -277,8 +277,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV2STAGE - EXTERNAL LSAME, ILAENV2STAGE + INTEGER ILAENV2STAGE + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV2STAGE, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -318,7 +319,7 @@ * IF( INFO.EQ.0 ) THEN HOUS( 1 ) = LHMIN - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) END IF * IF( INFO.NE.0 ) THEN @@ -358,7 +359,7 @@ ABDPOS = KD + 1 ABOFDPOS = KD ELSE - APOS = INDA + APOS = INDA AWPOS = INDA + KD + 1 DPOS = APOS OFDPOS = DPOS + 1 @@ -366,11 +367,11 @@ ABOFDPOS = 2 ENDIF -* -* Case KD=0: -* The matrix is diagonal. We just copy it (convert to "real" for -* complex because D is double and the imaginary part should be 0) -* and store it in D. A sequential code here is better or +* +* Case KD=0: +* The matrix is diagonal. We just copy it (convert to "real" for +* complex because D is double and the imaginary part should be 0) +* and store it in D. A sequential code here is better or * in a parallel environment it might need two cores for D and E * IF( KD.EQ.0 ) THEN @@ -385,17 +386,17 @@ WORK( 1 ) = 1 RETURN END IF -* -* Case KD=1: -* The matrix is already Tridiagonal. We have to make diagonal +* +* Case KD=1: +* The matrix is already Tridiagonal. We have to make diagonal * and offdiagonal elements real, and store them in D and E. -* For that, for real precision just copy the diag and offdiag -* to D and E while for the COMPLEX case the bulge chasing is -* performed to convert the hermetian tridiagonal to symmetric -* tridiagonal. A simpler conversion formula might be used, but then +* For that, for real precision just copy the diag and offdiag +* to D and E while for the COMPLEX case the bulge chasing is +* performed to convert the hermetian tridiagonal to symmetric +* tridiagonal. A simpler conversion formula might be used, but then * updating the Q matrix will be required and based if Q is generated -* or not this might complicate the story. -* +* or not this might complicate the story. +* IF( KD.EQ.1 ) THEN DO 50 I = 1, N D( I ) = REAL( AB( ABDPOS, I ) ) @@ -444,7 +445,7 @@ C END IF RETURN END IF * -* Main code start here. +* Main code start here. * Reduce the hermitian band of A to a tridiagonal matrix. * THGRSIZ = N @@ -453,7 +454,7 @@ C END IF NBTILES = CEILING( REAL(N)/REAL(KD) ) STEPERCOL = CEILING( REAL(SHIFT)/REAL(GRSIZ) ) THGRNB = CEILING( REAL(N-1)/REAL(THGRSIZ) ) -* +* CALL CLACPY( "A", KD+1, N, AB, LDAB, WORK( APOS ), LDA ) CALL CLASET( "A", KD, N, ZERO, ZERO, WORK( AWPOS ), LDA ) * @@ -462,7 +463,7 @@ C END IF * #if defined(_OPENMP) !$OMP PARALLEL PRIVATE( TID, THGRID, BLKLASTIND ) -!$OMP$ PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID ) +!$OMP$ PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID ) !$OMP$ PRIVATE( MYID, TTYPE, COLPT, STIND, EDIND ) !$OMP$ SHARED ( UPLO, WANTQ, INDV, INDTAU, HOUS, WORK) !$OMP$ SHARED ( N, KD, IB, NBTILES, LDA, LDV, INDA ) @@ -471,7 +472,7 @@ C END IF #endif * * main bulge chasing loop -* +* DO 100 THGRID = 1, THGRNB STT = (THGRID-1)*THGRSIZ+1 THED = MIN( (STT + THGRSIZ -1), (N-1)) @@ -482,7 +483,7 @@ C END IF ST = STT DO 130 SWEEPID = ST, ED DO 140 K = 1, GRSIZ - MYID = (I-SWEEPID)*(STEPERCOL*GRSIZ) + MYID = (I-SWEEPID)*(STEPERCOL*GRSIZ) $ + (M-1)*GRSIZ + K IF ( MYID.EQ.1 ) THEN TTYPE = 1 @@ -508,16 +509,16 @@ C END IF ENDIF * * Call the kernel -* +* #if defined(_OPENMP) && _OPENMP >= 201307 - IF( TTYPE.NE.1 ) THEN + IF( TTYPE.NE.1 ) THEN !$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) !$OMP$ DEPEND(in:WORK(MYID-1)) !$OMP$ DEPEND(out:WORK(MYID)) TID = OMP_GET_THREAD_NUM() - CALL CHB2ST_KERNELS( UPLO, WANTQ, TTYPE, + CALL CHB2ST_KERNELS( UPLO, WANTQ, TTYPE, $ STIND, EDIND, SWEEPID, N, KD, IB, - $ WORK ( INDA ), LDA, + $ WORK ( INDA ), LDA, $ HOUS( INDV ), HOUS( INDTAU ), LDV, $ WORK( INDW + TID*KD ) ) !$OMP END TASK @@ -525,20 +526,20 @@ C END IF !$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) !$OMP$ DEPEND(out:WORK(MYID)) TID = OMP_GET_THREAD_NUM() - CALL CHB2ST_KERNELS( UPLO, WANTQ, TTYPE, + CALL CHB2ST_KERNELS( UPLO, WANTQ, TTYPE, $ STIND, EDIND, SWEEPID, N, KD, IB, - $ WORK ( INDA ), LDA, + $ WORK ( INDA ), LDA, $ HOUS( INDV ), HOUS( INDTAU ), LDV, $ WORK( INDW + TID*KD ) ) !$OMP END TASK ENDIF #else - CALL CHB2ST_KERNELS( UPLO, WANTQ, TTYPE, + CALL CHB2ST_KERNELS( UPLO, WANTQ, TTYPE, $ STIND, EDIND, SWEEPID, N, KD, IB, - $ WORK ( INDA ), LDA, + $ WORK ( INDA ), LDA, $ HOUS( INDV ), HOUS( INDTAU ), LDV, $ WORK( INDW ) ) -#endif +#endif IF ( BLKLASTIND.GE.(N-1) ) THEN STT = STT + 1 EXIT @@ -553,14 +554,14 @@ C END IF !$OMP END MASTER !$OMP END PARALLEL #endif -* +* * Copy the diagonal from A to D. Note that D is REAL thus only * the Real part is needed, the imaginary part should be zero. * DO 150 I = 1, N D( I ) = REAL( WORK( DPOS+(I-1)*LDA ) ) 150 CONTINUE -* +* * Copy the off diagonal from A to E. Note that E is REAL thus only * the Real part is needed, the imaginary part should be zero. * @@ -575,10 +576,10 @@ C END IF ENDIF * HOUS( 1 ) = LHMIN - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) RETURN * * End of CHETRD_HB2ST * END - + From 71fbdd908d59087e8d809dd123b83f68850ec122 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 12 Nov 2023 14:10:16 +0100 Subject: [PATCH 119/125] Apply ROUNDUP_LWORK (Reference-LAPACK PR 904) --- lapack-netlib/SRC/chetrd_he2hb.f | 9 +++++---- lapack-netlib/SRC/chetrf.f | 9 +++++---- lapack-netlib/SRC/chetrf_aa.f | 9 +++++---- lapack-netlib/SRC/chetrf_rk.f | 9 +++++---- lapack-netlib/SRC/chetrf_rook.f | 9 +++++---- lapack-netlib/SRC/chetri_3.f | 9 +++++---- lapack-netlib/SRC/chetrs_aa.f | 7 ++++--- lapack-netlib/SRC/chpevd.f | 10 +++++----- lapack-netlib/SRC/chpgvd.f | 9 +++++---- lapack-netlib/SRC/chseqr.f | 5 +++-- lapack-netlib/SRC/clamswlq.f | 11 +++++++---- lapack-netlib/SRC/clamtsqr.f | 9 ++++++--- lapack-netlib/SRC/claswlq.f | 14 +++++++------- lapack-netlib/SRC/clatsqr.f | 9 ++++++--- lapack-netlib/SRC/cstedc.f | 10 +++++----- lapack-netlib/SRC/cstemr.f | 8 ++++---- lapack-netlib/SRC/csysv.f | 9 +++++---- lapack-netlib/SRC/csysv_aa.f | 9 +++++---- lapack-netlib/SRC/csysv_aa_2stage.f | 7 ++++--- lapack-netlib/SRC/csysv_rk.f | 9 +++++---- lapack-netlib/SRC/csysv_rook.f | 9 +++++---- lapack-netlib/SRC/csysvx.f | 10 +++++----- lapack-netlib/SRC/csytrf.f | 9 +++++---- lapack-netlib/SRC/csytrf_aa.f | 9 +++++---- lapack-netlib/SRC/csytrf_aa_2stage.f | 7 ++++--- lapack-netlib/SRC/csytrf_rk.f | 9 +++++---- lapack-netlib/SRC/csytrf_rook.f | 9 +++++---- lapack-netlib/SRC/csytri_3.f | 9 +++++---- lapack-netlib/SRC/csytrs_aa.f | 7 ++++--- lapack-netlib/SRC/ctgsen.f | 10 +++++++--- lapack-netlib/SRC/ctgsna.f | 14 +++++++------- lapack-netlib/SRC/ctgsyl.f | 9 +++++---- lapack-netlib/SRC/ctrevc3.f | 12 ++++++------ lapack-netlib/SRC/ctrsen.f | 10 +++++----- lapack-netlib/SRC/ctzrzf.f | 9 +++++---- lapack-netlib/SRC/cunbdb.f | 8 ++++---- lapack-netlib/SRC/cunbdb1.f | 8 ++++---- lapack-netlib/SRC/cunbdb2.f | 8 ++++---- lapack-netlib/SRC/cunbdb3.f | 8 ++++---- lapack-netlib/SRC/cunbdb4.f | 8 ++++---- lapack-netlib/SRC/cuncsd.f | 8 +++++--- lapack-netlib/SRC/cuncsd2by1.f | 7 ++++--- lapack-netlib/SRC/cungbr.f | 9 +++++---- lapack-netlib/SRC/cunghr.f | 9 +++++---- lapack-netlib/SRC/cunglq.f | 9 +++++---- lapack-netlib/SRC/cungql.f | 7 ++++--- lapack-netlib/SRC/cungqr.f | 9 +++++---- lapack-netlib/SRC/cungrq.f | 9 +++++---- lapack-netlib/SRC/cungtr.f | 9 +++++---- lapack-netlib/SRC/cunmbr.f | 9 +++++---- lapack-netlib/SRC/cunmhr.f | 9 +++++---- lapack-netlib/SRC/cunmlq.f | 9 +++++---- lapack-netlib/SRC/cunmql.f | 9 +++++---- lapack-netlib/SRC/cunmqr.f | 9 +++++---- lapack-netlib/SRC/cunmrq.f | 9 +++++---- lapack-netlib/SRC/cunmrz.f | 9 +++++---- lapack-netlib/SRC/cunmtr.f | 9 +++++---- 57 files changed, 282 insertions(+), 228 deletions(-) diff --git a/lapack-netlib/SRC/chetrd_he2hb.f b/lapack-netlib/SRC/chetrd_he2hb.f index 904555c10..090f02100 100644 --- a/lapack-netlib/SRC/chetrd_he2hb.f +++ b/lapack-netlib/SRC/chetrd_he2hb.f @@ -158,7 +158,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexHEcomputational +*> \ingroup hetrd_he2hb * *> \par Further Details: * ===================== @@ -283,7 +283,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV2STAGE - EXTERNAL LSAME, ILAENV2STAGE + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV2STAGE, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -313,7 +314,7 @@ CALL XERBLA( 'CHETRD_HE2HB', -INFO ) RETURN ELSE IF( LQUERY ) THEN - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) RETURN END IF * @@ -506,7 +507,7 @@ END IF * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) RETURN * * End of CHETRD_HE2HB diff --git a/lapack-netlib/SRC/chetrf.f b/lapack-netlib/SRC/chetrf.f index 484e76256..0c596ffe7 100644 --- a/lapack-netlib/SRC/chetrf.f +++ b/lapack-netlib/SRC/chetrf.f @@ -130,7 +130,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexHEcomputational +*> \ingroup hetrf * *> \par Further Details: * ===================== @@ -197,7 +197,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CHETF2, CLAHEF, XERBLA @@ -228,7 +229,7 @@ * NB = ILAENV( 1, 'CHETRF', UPLO, N, -1, -1, -1 ) LWKOPT = N*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -346,7 +347,7 @@ END IF * 40 CONTINUE - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of CHETRF diff --git a/lapack-netlib/SRC/chetrf_aa.f b/lapack-netlib/SRC/chetrf_aa.f index d9e4fbd19..0547a4eab 100644 --- a/lapack-netlib/SRC/chetrf_aa.f +++ b/lapack-netlib/SRC/chetrf_aa.f @@ -125,7 +125,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexHEcomputational +*> \ingroup hetrf_aa * * ===================================================================== SUBROUTINE CHETRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO) @@ -159,7 +159,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CLAHEF_AA, CGEMM, CCOPY, CSWAP, CSCAL, XERBLA @@ -190,7 +191,7 @@ * IF( INFO.EQ.0 ) THEN LWKOPT = (NB+1)*N - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -459,7 +460,7 @@ END IF * 20 CONTINUE - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of CHETRF_AA diff --git a/lapack-netlib/SRC/chetrf_rk.f b/lapack-netlib/SRC/chetrf_rk.f index 3f60f4370..ef442c937 100644 --- a/lapack-netlib/SRC/chetrf_rk.f +++ b/lapack-netlib/SRC/chetrf_rk.f @@ -229,7 +229,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexHEcomputational +*> \ingroup hetrf_rk * *> \par Further Details: * ===================== @@ -280,7 +280,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CLAHEF_RK, CHETF2_RK, CSWAP, XERBLA @@ -311,7 +312,7 @@ * NB = ILAENV( 1, 'CHETRF_RK', UPLO, N, -1, -1, -1 ) LWKOPT = N*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -487,7 +488,7 @@ * END IF * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of CHETRF_RK diff --git a/lapack-netlib/SRC/chetrf_rook.f b/lapack-netlib/SRC/chetrf_rook.f index 805e0f4cb..1593c2edc 100644 --- a/lapack-netlib/SRC/chetrf_rook.f +++ b/lapack-netlib/SRC/chetrf_rook.f @@ -150,7 +150,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexHEcomputational +*> \ingroup hetrf_rook * *> \par Further Details: * ===================== @@ -232,7 +232,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CLAHEF_ROOK, CHETF2_ROOK, XERBLA @@ -263,7 +264,7 @@ * NB = ILAENV( 1, 'CHETRF_ROOK', UPLO, N, -1, -1, -1 ) LWKOPT = MAX( 1, N*NB ) - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -386,7 +387,7 @@ END IF * 40 CONTINUE - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of CHETRF_ROOK diff --git a/lapack-netlib/SRC/chetri_3.f b/lapack-netlib/SRC/chetri_3.f index cc2318b22..deda63598 100644 --- a/lapack-netlib/SRC/chetri_3.f +++ b/lapack-netlib/SRC/chetri_3.f @@ -152,7 +152,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexHEcomputational +*> \ingroup hetri_3 * *> \par Contributors: * ================== @@ -190,7 +190,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CHETRI_3X, XERBLA @@ -225,7 +226,7 @@ CALL XERBLA( 'CHETRI_3', -INFO ) RETURN ELSE IF( LQUERY ) THEN - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN END IF * @@ -236,7 +237,7 @@ * CALL CHETRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/chetrs_aa.f b/lapack-netlib/SRC/chetrs_aa.f index 2546adb2d..879549106 100644 --- a/lapack-netlib/SRC/chetrs_aa.f +++ b/lapack-netlib/SRC/chetrs_aa.f @@ -123,7 +123,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexHEcomputational +*> \ingroup hetrs_aa * * ===================================================================== SUBROUTINE CHETRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, @@ -155,7 +155,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME,SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CLACPY, CLACGV, CGTSV, CSWAP, CTRSM, XERBLA @@ -186,7 +187,7 @@ RETURN ELSE IF( LQUERY ) THEN LWKOPT = (3*N-2) - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN END IF * diff --git a/lapack-netlib/SRC/chpevd.f b/lapack-netlib/SRC/chpevd.f index 06d01064d..2449783a2 100644 --- a/lapack-netlib/SRC/chpevd.f +++ b/lapack-netlib/SRC/chpevd.f @@ -186,7 +186,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHEReigen +*> \ingroup hpevd * * ===================================================================== SUBROUTINE CHPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, @@ -223,8 +223,8 @@ * .. * .. External Functions .. LOGICAL LSAME - REAL CLANHP, SLAMCH - EXTERNAL LSAME, CLANHP, SLAMCH + REAL CLANHP, SLAMCH, SROUNDUP_LWORK + EXTERNAL LSAME, CLANHP, SLAMCH, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CHPTRD, CSSCAL, CSTEDC, CUPMTR, SSCAL, SSTERF, @@ -268,7 +268,7 @@ LIWMIN = 1 END IF END IF - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) RWORK( 1 ) = LRWMIN IWORK( 1 ) = LIWMIN * @@ -359,7 +359,7 @@ CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) END IF * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) RWORK( 1 ) = LRWMIN IWORK( 1 ) = LIWMIN RETURN diff --git a/lapack-netlib/SRC/chpgvd.f b/lapack-netlib/SRC/chpgvd.f index c24ca1360..57ac4fc72 100644 --- a/lapack-netlib/SRC/chpgvd.f +++ b/lapack-netlib/SRC/chpgvd.f @@ -212,7 +212,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHEReigen +*> \ingroup hpgvd * *> \par Contributors: * ================== @@ -246,7 +246,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CHPEVD, CHPGST, CPPTRF, CTPMV, CTPSV, XERBLA @@ -292,7 +293,7 @@ END IF END IF * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) RWORK( 1 ) = LRWMIN IWORK( 1 ) = LIWMIN IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN @@ -374,7 +375,7 @@ END IF END IF * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) RWORK( 1 ) = LRWMIN IWORK( 1 ) = LIWMIN RETURN diff --git a/lapack-netlib/SRC/chseqr.f b/lapack-netlib/SRC/chseqr.f index 007f72f59..56ff01fc6 100644 --- a/lapack-netlib/SRC/chseqr.f +++ b/lapack-netlib/SRC/chseqr.f @@ -216,7 +216,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup hseqr * *> \par Contributors: * ================== @@ -343,7 +343,8 @@ * .. External Functions .. INTEGER ILAENV LOGICAL LSAME - EXTERNAL ILAENV, LSAME + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CCOPY, CLACPY, CLAHQR, CLAQR0, CLASET, XERBLA diff --git a/lapack-netlib/SRC/clamswlq.f b/lapack-netlib/SRC/clamswlq.f index 1606cc611..5daf60bf6 100644 --- a/lapack-netlib/SRC/clamswlq.f +++ b/lapack-netlib/SRC/clamswlq.f @@ -189,6 +189,8 @@ *> SIAM J. Sci. Comput, vol. 34, no. 1, 2012 *> \endverbatim *> +*> \ingroup lamswlq +*> * ===================================================================== SUBROUTINE CLAMSWLQ( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T, $ LDT, C, LDC, WORK, LWORK, INFO ) @@ -215,7 +217,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. External Subroutines .. EXTERNAL CTPMLQT, CGEMLQT, XERBLA * .. @@ -259,10 +262,10 @@ * IF( INFO.NE.0 ) THEN CALL XERBLA( 'CLAMSWLQ', -INFO ) - WORK(1) = LW + WORK(1) = SROUNDUP_LWORK(LW) RETURN ELSE IF (LQUERY) THEN - WORK(1) = LW + WORK(1) = SROUNDUP_LWORK(LW) RETURN END IF * @@ -401,7 +404,7 @@ * END IF * - WORK(1) = LW + WORK(1) = SROUNDUP_LWORK(LW) RETURN * * End of CLAMSWLQ diff --git a/lapack-netlib/SRC/clamtsqr.f b/lapack-netlib/SRC/clamtsqr.f index 5677420ac..05021e642 100644 --- a/lapack-netlib/SRC/clamtsqr.f +++ b/lapack-netlib/SRC/clamtsqr.f @@ -191,6 +191,8 @@ *> SIAM J. Sci. Comput, vol. 34, no. 1, 2012 *> \endverbatim *> +*> \ingroup lamtsqr +*> * ===================================================================== SUBROUTINE CLAMTSQR( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T, $ LDT, C, LDC, WORK, LWORK, INFO ) @@ -217,7 +219,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. External Subroutines .. EXTERNAL CGEMQRT, CTPMQRT, XERBLA * .. @@ -264,7 +267,7 @@ * Determine the block size if it is tall skinny or short and wide * IF( INFO.EQ.0) THEN - WORK(1) = LW + WORK(1) = SROUNDUP_LWORK(LW) END IF * IF( INFO.NE.0 ) THEN @@ -409,7 +412,7 @@ * END IF * - WORK(1) = LW + WORK(1) = SROUNDUP_LWORK(LW) RETURN * * End of CLAMTSQR diff --git a/lapack-netlib/SRC/claswlq.f b/lapack-netlib/SRC/claswlq.f index 1a09b8305..12e8373df 100644 --- a/lapack-netlib/SRC/claswlq.f +++ b/lapack-netlib/SRC/claswlq.f @@ -159,6 +159,8 @@ *> SIAM J. Sci. Comput, vol. 34, no. 1, 2012 *> \endverbatim *> +*> \ingroup laswlq +*> * ===================================================================== SUBROUTINE CLASWLQ( M, N, MB, NB, A, LDA, T, LDT, WORK, LWORK, $ INFO) @@ -183,16 +185,14 @@ * .. * .. EXTERNAL FUNCTIONS .. LOGICAL LSAME - EXTERNAL LSAME + INTEGER ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. EXTERNAL SUBROUTINES .. EXTERNAL CGELQT, CTPLQT, XERBLA * .. INTRINSIC FUNCTIONS .. INTRINSIC MAX, MIN, MOD * .. -* .. EXTERNAL FUNCTIONS .. - INTEGER ILAENV - EXTERNAL ILAENV -* .. * .. EXECUTABLE STATEMENTS .. * * TEST THE INPUT ARGUMENTS @@ -217,7 +217,7 @@ INFO = -10 END IF IF( INFO.EQ.0) THEN - WORK(1) = MB*M + WORK(1) = SROUNDUP_LWORK(MB*M) END IF * IF( INFO.NE.0 ) THEN @@ -266,7 +266,7 @@ $ WORK, INFO ) END IF * - WORK( 1 ) = M * MB + WORK( 1 ) = SROUNDUP_LWORK(M * MB) RETURN * * End of CLASWLQ diff --git a/lapack-netlib/SRC/clatsqr.f b/lapack-netlib/SRC/clatsqr.f index 377190081..cd2cb4aa7 100644 --- a/lapack-netlib/SRC/clatsqr.f +++ b/lapack-netlib/SRC/clatsqr.f @@ -161,6 +161,8 @@ *> SIAM J. Sci. Comput, vol. 34, no. 1, 2012 *> \endverbatim *> +*> \ingroup latsqr +*> * ===================================================================== SUBROUTINE CLATSQR( M, N, MB, NB, A, LDA, T, LDT, WORK, $ LWORK, INFO) @@ -185,7 +187,8 @@ * .. * .. EXTERNAL FUNCTIONS .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. EXTERNAL SUBROUTINES .. EXTERNAL CGEQRT, CTPQRT, XERBLA * .. INTRINSIC FUNCTIONS .. @@ -215,7 +218,7 @@ INFO = -10 END IF IF( INFO.EQ.0) THEN - WORK(1) = NB*N + WORK(1) = SROUNDUP_LWORK(NB*N) END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'CLATSQR', -INFO ) @@ -262,7 +265,7 @@ $ WORK, INFO ) END IF * - work( 1 ) = N*NB + WORK( 1 ) = SROUNDUP_LWORK(N*NB) RETURN * * End of CLATSQR diff --git a/lapack-netlib/SRC/cstedc.f b/lapack-netlib/SRC/cstedc.f index 77a4ec3be..d7db591b3 100644 --- a/lapack-netlib/SRC/cstedc.f +++ b/lapack-netlib/SRC/cstedc.f @@ -192,7 +192,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup stedc * *> \par Contributors: * ================== @@ -233,8 +233,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL SLAMCH, SLANST - EXTERNAL ILAENV, LSAME, SLAMCH, SLANST + REAL SLAMCH, SLANST, SROUNDUP_LWORK + EXTERNAL ILAENV, LSAME, SLAMCH, SLANST, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL XERBLA, CLACPY, CLACRM, CLAED0, CSTEQR, CSWAP, @@ -295,7 +295,7 @@ LRWMIN = 1 + 4*N + 2*N**2 LIWMIN = 3 + 5*N END IF - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) RWORK( 1 ) = LRWMIN IWORK( 1 ) = LIWMIN * @@ -466,7 +466,7 @@ END IF * 70 CONTINUE - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) RWORK( 1 ) = LRWMIN IWORK( 1 ) = LIWMIN * diff --git a/lapack-netlib/SRC/cstemr.f b/lapack-netlib/SRC/cstemr.f index 9d47450e3..46b20d880 100644 --- a/lapack-netlib/SRC/cstemr.f +++ b/lapack-netlib/SRC/cstemr.f @@ -376,8 +376,8 @@ * .. * .. External Functions .. LOGICAL LSAME - REAL SLAMCH, SLANST - EXTERNAL LSAME, SLAMCH, SLANST + REAL SLAMCH, SLANST, SROUNDUP_LWORK + EXTERNAL LSAME, SLAMCH, SLANST, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CLARRV, CSWAP, SCOPY, SLAE2, SLAEV2, SLARRC, @@ -462,7 +462,7 @@ RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) * IF( INFO.EQ.0 ) THEN - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN * IF( WANTZ .AND. ALLEIG ) THEN @@ -801,7 +801,7 @@ ENDIF * * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN RETURN * diff --git a/lapack-netlib/SRC/csysv.f b/lapack-netlib/SRC/csysv.f index 4ddabf62f..a2d1e7cbe 100644 --- a/lapack-netlib/SRC/csysv.f +++ b/lapack-netlib/SRC/csysv.f @@ -163,7 +163,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexSYsolve +*> \ingroup hesv * * ===================================================================== SUBROUTINE CSYSV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, @@ -190,7 +190,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL XERBLA, CSYTRF, CSYTRS, CSYTRS2 @@ -225,7 +226,7 @@ CALL CSYTRF( UPLO, N, A, LDA, IPIV, WORK, -1, INFO ) LWKOPT = INT( WORK( 1 ) ) END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -258,7 +259,7 @@ * END IF * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/csysv_aa.f b/lapack-netlib/SRC/csysv_aa.f index 8548c2789..571a91123 100644 --- a/lapack-netlib/SRC/csysv_aa.f +++ b/lapack-netlib/SRC/csysv_aa.f @@ -154,7 +154,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexSYsolve +*> \ingroup hesv_aa * * ===================================================================== SUBROUTINE CSYSV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, @@ -182,7 +182,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL ILAENV, LSAME + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL XERBLA, CSYTRF_AA, CSYTRS_AA @@ -217,7 +218,7 @@ $ -1, INFO ) LWKOPT_SYTRS = INT( WORK(1) ) LWKOPT = MAX( LWKOPT_SYTRF, LWKOPT_SYTRS ) - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -239,7 +240,7 @@ * END IF * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/csysv_aa_2stage.f b/lapack-netlib/SRC/csysv_aa_2stage.f index 22227505c..10119d8ba 100644 --- a/lapack-netlib/SRC/csysv_aa_2stage.f +++ b/lapack-netlib/SRC/csysv_aa_2stage.f @@ -177,7 +177,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexSYcomputational +*> \ingroup hesv_aa_2stage * * ===================================================================== SUBROUTINE CSYSV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, @@ -207,7 +207,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CSYTRF_AA_2STAGE, @@ -267,7 +268,7 @@ * END IF * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/csysv_rk.f b/lapack-netlib/SRC/csysv_rk.f index ef5334dcd..cb98ab1dc 100644 --- a/lapack-netlib/SRC/csysv_rk.f +++ b/lapack-netlib/SRC/csysv_rk.f @@ -205,7 +205,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexSYsolve +*> \ingroup hesv_rk * *> \par Contributors: * ================== @@ -247,7 +247,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL XERBLA, CSYTRF_RK, CSYTRS_3 @@ -282,7 +283,7 @@ CALL CSYTRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, -1, INFO ) LWKOPT = INT( WORK( 1 ) ) END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -304,7 +305,7 @@ * END IF * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/csysv_rook.f b/lapack-netlib/SRC/csysv_rook.f index aad594e21..8798ddfb2 100644 --- a/lapack-netlib/SRC/csysv_rook.f +++ b/lapack-netlib/SRC/csysv_rook.f @@ -181,7 +181,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexSYsolve +*> \ingroup hesv_rook * *> \par Contributors: * ================== @@ -223,7 +223,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL XERBLA, CSYTRF_ROOK, CSYTRS_ROOK @@ -258,7 +259,7 @@ CALL CSYTRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, -1, INFO ) LWKOPT = INT( WORK( 1 ) ) END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -281,7 +282,7 @@ * END IF * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/csysvx.f b/lapack-netlib/SRC/csysvx.f index 2afa082a9..3c7a37889 100644 --- a/lapack-netlib/SRC/csysvx.f +++ b/lapack-netlib/SRC/csysvx.f @@ -276,7 +276,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexSYsolve +*> \ingroup hesvx * * ===================================================================== SUBROUTINE CSYSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, @@ -313,8 +313,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL CLANSY, SLAMCH - EXTERNAL ILAENV, LSAME, CLANSY, SLAMCH + REAL CLANSY, SLAMCH, SROUNDUP_LWORK + EXTERNAL ILAENV, LSAME, CLANSY, SLAMCH, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CLACPY, CSYCON, CSYRFS, CSYTRF, CSYTRS, XERBLA @@ -356,7 +356,7 @@ NB = ILAENV( 1, 'CSYTRF', UPLO, N, -1, -1, -1 ) LWKOPT = MAX( LWKOPT, N*NB ) END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -405,7 +405,7 @@ IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) $ INFO = N + 1 * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/csytrf.f b/lapack-netlib/SRC/csytrf.f index 951196b83..519e78490 100644 --- a/lapack-netlib/SRC/csytrf.f +++ b/lapack-netlib/SRC/csytrf.f @@ -135,7 +135,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexSYcomputational +*> \ingroup hetrf * *> \par Further Details: * ===================== @@ -202,7 +202,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CLASYF, CSYTF2, XERBLA @@ -233,7 +234,7 @@ * NB = ILAENV( 1, 'CSYTRF', UPLO, N, -1, -1, -1 ) LWKOPT = MAX( 1, N*NB ) - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -351,7 +352,7 @@ END IF * 40 CONTINUE - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of CSYTRF diff --git a/lapack-netlib/SRC/csytrf_aa.f b/lapack-netlib/SRC/csytrf_aa.f index c5467bf01..cf994913d 100644 --- a/lapack-netlib/SRC/csytrf_aa.f +++ b/lapack-netlib/SRC/csytrf_aa.f @@ -125,7 +125,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexSYcomputational +*> \ingroup hetrf_aa * * ===================================================================== SUBROUTINE CSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO) @@ -159,7 +159,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CLASYF_AA, CGEMM, CGEMV, CSCAL, CSWAP, CCOPY, @@ -191,7 +192,7 @@ * IF( INFO.EQ.0 ) THEN LWKOPT = (NB+1)*N - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -457,7 +458,7 @@ END IF * 20 CONTINUE - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of CSYTRF_AA diff --git a/lapack-netlib/SRC/csytrf_aa_2stage.f b/lapack-netlib/SRC/csytrf_aa_2stage.f index b21df8cd3..e56aedaf6 100644 --- a/lapack-netlib/SRC/csytrf_aa_2stage.f +++ b/lapack-netlib/SRC/csytrf_aa_2stage.f @@ -152,7 +152,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexSYcomputational +*> \ingroup hetrf_aa_2stage * * ===================================================================== SUBROUTINE CSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, @@ -188,7 +188,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CCOPY, CGBTRF, CGEMM, CGETRF, CLACPY, @@ -230,7 +231,7 @@ TB( 1 ) = (3*NB+1)*N END IF IF( WQUERY ) THEN - WORK( 1 ) = N*NB + WORK( 1 ) = SROUNDUP_LWORK(N*NB) END IF END IF IF( TQUERY .OR. WQUERY ) THEN diff --git a/lapack-netlib/SRC/csytrf_rk.f b/lapack-netlib/SRC/csytrf_rk.f index 996801e7d..de39bda41 100644 --- a/lapack-netlib/SRC/csytrf_rk.f +++ b/lapack-netlib/SRC/csytrf_rk.f @@ -229,7 +229,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexSYcomputational +*> \ingroup hetrf_rk * *> \par Further Details: * ===================== @@ -280,7 +280,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CLASYF_RK, CSYTF2_RK, CSWAP, XERBLA @@ -311,7 +312,7 @@ * NB = ILAENV( 1, 'CSYTRF_RK', UPLO, N, -1, -1, -1 ) LWKOPT = MAX( 1, N*NB ) - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -487,7 +488,7 @@ * END IF * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of CSYTRF_RK diff --git a/lapack-netlib/SRC/csytrf_rook.f b/lapack-netlib/SRC/csytrf_rook.f index ce7c1e586..72fe0629f 100644 --- a/lapack-netlib/SRC/csytrf_rook.f +++ b/lapack-netlib/SRC/csytrf_rook.f @@ -146,7 +146,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexSYcomputational +*> \ingroup hetrf_rook * *> \par Further Details: * ===================== @@ -228,7 +228,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CLASYF_ROOK, CSYTF2_ROOK, XERBLA @@ -259,7 +260,7 @@ * NB = ILAENV( 1, 'CSYTRF_ROOK', UPLO, N, -1, -1, -1 ) LWKOPT = MAX( 1, N*NB ) - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -382,7 +383,7 @@ END IF * 40 CONTINUE - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of CSYTRF_ROOK diff --git a/lapack-netlib/SRC/csytri_3.f b/lapack-netlib/SRC/csytri_3.f index 279f62853..604d84b21 100644 --- a/lapack-netlib/SRC/csytri_3.f +++ b/lapack-netlib/SRC/csytri_3.f @@ -152,7 +152,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexSYcomputational +*> \ingroup hetri_3 * *> \par Contributors: * ================== @@ -190,7 +190,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CSYTRI_3X, XERBLA @@ -225,7 +226,7 @@ CALL XERBLA( 'CSYTRI_3', -INFO ) RETURN ELSE IF( LQUERY ) THEN - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN END IF * @@ -236,7 +237,7 @@ * CALL CSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/csytrs_aa.f b/lapack-netlib/SRC/csytrs_aa.f index 1f6ea40af..7f63539a6 100644 --- a/lapack-netlib/SRC/csytrs_aa.f +++ b/lapack-netlib/SRC/csytrs_aa.f @@ -123,7 +123,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexSYcomputational +*> \ingroup hetrs_aa * * ===================================================================== SUBROUTINE CSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, @@ -155,7 +155,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CLACPY, CGTSV, CSWAP, CTRSM, XERBLA @@ -186,7 +187,7 @@ RETURN ELSE IF( LQUERY ) THEN LWKOPT = (3*N-2) - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN END IF * diff --git a/lapack-netlib/SRC/ctgsen.f b/lapack-netlib/SRC/ctgsen.f index ffd638099..180e96b32 100644 --- a/lapack-netlib/SRC/ctgsen.f +++ b/lapack-netlib/SRC/ctgsen.f @@ -290,7 +290,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup tgsen * *> \par Further Details: * ===================== @@ -467,6 +467,10 @@ * .. Local Arrays .. INTEGER ISAVE( 3 ) * .. +* .. External Functions .. + REAL SROUNDUP_LWORK + EXTERNAL SROUNDUP_LWORK +* .. * .. External Subroutines .. REAL SLAMCH EXTERNAL CLACN2, CLACPY, CLASSQ, CSCAL, CTGEXC, CTGSYL, @@ -537,7 +541,7 @@ LIWMIN = 1 END IF * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN @@ -771,7 +775,7 @@ * 70 CONTINUE * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN * RETURN diff --git a/lapack-netlib/SRC/ctgsna.f b/lapack-netlib/SRC/ctgsna.f index 2295dc5cc..50498c413 100644 --- a/lapack-netlib/SRC/ctgsna.f +++ b/lapack-netlib/SRC/ctgsna.f @@ -213,7 +213,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup tgsna * *> \par Further Details: * ===================== @@ -343,12 +343,13 @@ * .. * .. External Functions .. LOGICAL LSAME - REAL SCNRM2, SLAMCH, SLAPY2 + REAL SCNRM2, SLAMCH, SLAPY2, SROUNDUP_LWORK COMPLEX CDOTC - EXTERNAL LSAME, SCNRM2, SLAMCH, SLAPY2, CDOTC + EXTERNAL LSAME, SCNRM2, SLAMCH, SLAPY2, SROUNDUP_LWORK, + $ CDOTC * .. * .. External Subroutines .. - EXTERNAL CGEMV, CLACPY, CTGEXC, CTGSYL, SLABAD, XERBLA + EXTERNAL CGEMV, CLACPY, CTGEXC, CTGSYL, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, CMPLX, MAX @@ -402,7 +403,7 @@ ELSE LWMIN = N END IF - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) * IF( MM.LT.M ) THEN INFO = -15 @@ -428,7 +429,6 @@ EPS = SLAMCH( 'P' ) SMLNUM = SLAMCH( 'S' ) / EPS BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) KS = 0 DO 20 K = 1, N * @@ -508,7 +508,7 @@ END IF * 20 CONTINUE - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) RETURN * * End of CTGSNA diff --git a/lapack-netlib/SRC/ctgsyl.f b/lapack-netlib/SRC/ctgsyl.f index ae1437125..620556399 100644 --- a/lapack-netlib/SRC/ctgsyl.f +++ b/lapack-netlib/SRC/ctgsyl.f @@ -260,7 +260,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexSYcomputational +*> \ingroup tgsyl * *> \par Contributors: * ================== @@ -329,7 +329,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CGEMM, CLACPY, CLASET, CSCAL, CTGSY2, XERBLA @@ -382,7 +383,7 @@ ELSE LWMIN = 1 END IF - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -20 @@ -683,7 +684,7 @@ 210 CONTINUE END IF * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) * RETURN * diff --git a/lapack-netlib/SRC/ctrevc3.f b/lapack-netlib/SRC/ctrevc3.f index 11b32104d..13cbf553f 100644 --- a/lapack-netlib/SRC/ctrevc3.f +++ b/lapack-netlib/SRC/ctrevc3.f @@ -222,7 +222,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup trevc3 * *> \par Further Details: * ===================== @@ -278,12 +278,13 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV, ICAMAX - REAL SLAMCH, SCASUM - EXTERNAL LSAME, ILAENV, ICAMAX, SLAMCH, SCASUM + REAL SLAMCH, SCASUM, SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, ICAMAX, SLAMCH, SCASUM, + $ SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL XERBLA, CCOPY, CLASET, CSSCAL, CGEMM, CGEMV, - $ CLATRS, CLACPY, SLABAD + $ CLATRS, CLACPY * .. * .. Intrinsic Functions .. INTRINSIC ABS, REAL, CMPLX, CONJG, AIMAG, MAX @@ -322,7 +323,7 @@ INFO = 0 NB = ILAENV( 1, 'CTREVC', SIDE // HOWMNY, N, -1, -1, -1 ) MAXWRK = MAX( 1, N + 2*N*NB ) - WORK(1) = MAXWRK + WORK(1) = SROUNDUP_LWORK(MAXWRK) RWORK(1) = MAX( 1, N ) LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN @@ -371,7 +372,6 @@ * UNFL = SLAMCH( 'Safe minimum' ) OVFL = ONE / UNFL - CALL SLABAD( UNFL, OVFL ) ULP = SLAMCH( 'Precision' ) SMLNUM = UNFL*( N / ULP ) * diff --git a/lapack-netlib/SRC/ctrsen.f b/lapack-netlib/SRC/ctrsen.f index d93b97be6..9d59f6bf2 100644 --- a/lapack-netlib/SRC/ctrsen.f +++ b/lapack-netlib/SRC/ctrsen.f @@ -182,7 +182,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup trsen * *> \par Further Details: * ===================== @@ -293,8 +293,8 @@ * .. * .. External Functions .. LOGICAL LSAME - REAL CLANGE - EXTERNAL LSAME, CLANGE + REAL CLANGE, SROUNDUP_LWORK + EXTERNAL LSAME, CLANGE, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CLACN2, CLACPY, CTREXC, CTRSYL, XERBLA @@ -350,7 +350,7 @@ END IF * IF( INFO.EQ.0 ) THEN - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) END IF * IF( INFO.NE.0 ) THEN @@ -444,7 +444,7 @@ W( K ) = T( K, K ) 50 CONTINUE * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) * RETURN * diff --git a/lapack-netlib/SRC/ctzrzf.f b/lapack-netlib/SRC/ctzrzf.f index b21f092ce..ac3f59400 100644 --- a/lapack-netlib/SRC/ctzrzf.f +++ b/lapack-netlib/SRC/ctzrzf.f @@ -116,7 +116,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup tzrzf * *> \par Contributors: * ================== @@ -179,7 +179,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -207,7 +208,7 @@ LWKOPT = M*NB LWKMIN = MAX( 1, M ) END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN INFO = -7 @@ -301,7 +302,7 @@ IF( MU.GT.0 ) $ CALL CLATRZ( MU, N, N-M, A, LDA, TAU, WORK ) * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/cunbdb.f b/lapack-netlib/SRC/cunbdb.f index a41895dc8..b45dcfde6 100644 --- a/lapack-netlib/SRC/cunbdb.f +++ b/lapack-netlib/SRC/cunbdb.f @@ -255,7 +255,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup unbdb * *> \par Further Details: * ===================== @@ -320,9 +320,9 @@ * * .. * .. External Functions .. - REAL SCNRM2 + REAL SCNRM2, SROUNDUP_LWORK LOGICAL LSAME - EXTERNAL SCNRM2, LSAME + EXTERNAL SCNRM2, SROUNDUP_LWORK, LSAME * .. * .. Intrinsic Functions INTRINSIC ATAN2, COS, MAX, MIN, SIN @@ -377,7 +377,7 @@ IF( INFO .EQ. 0 ) THEN LWORKOPT = M - Q LWORKMIN = M - Q - WORK(1) = LWORKOPT + WORK(1) = SROUNDUP_LWORK(LWORKOPT) IF( LWORK .LT. LWORKMIN .AND. .NOT. LQUERY ) THEN INFO = -21 END IF diff --git a/lapack-netlib/SRC/cunbdb1.f b/lapack-netlib/SRC/cunbdb1.f index 80faa8808..a4875ab5b 100644 --- a/lapack-netlib/SRC/cunbdb1.f +++ b/lapack-netlib/SRC/cunbdb1.f @@ -173,7 +173,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup unbdb1 * *> \par Further Details: * ===================== @@ -230,8 +230,8 @@ EXTERNAL CLACGV * .. * .. External Functions .. - REAL SCNRM2 - EXTERNAL SCNRM2 + REAL SCNRM2, SROUNDUP_LWORK + EXTERNAL SCNRM2, SROUNDUP_LWORK * .. * .. Intrinsic Function .. INTRINSIC ATAN2, COS, MAX, SIN, SQRT @@ -264,7 +264,7 @@ LORBDB5 = Q-2 LWORKOPT = MAX( ILARF+LLARF-1, IORBDB5+LORBDB5-1 ) LWORKMIN = LWORKOPT - WORK(1) = LWORKOPT + WORK(1) = SROUNDUP_LWORK(LWORKOPT) IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN INFO = -14 END IF diff --git a/lapack-netlib/SRC/cunbdb2.f b/lapack-netlib/SRC/cunbdb2.f index 94b9fdbf9..6399964f8 100644 --- a/lapack-netlib/SRC/cunbdb2.f +++ b/lapack-netlib/SRC/cunbdb2.f @@ -173,7 +173,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup unbdb2 * *> \par Further Details: * ===================== @@ -231,8 +231,8 @@ $ XERBLA * .. * .. External Functions .. - REAL SCNRM2 - EXTERNAL SCNRM2 + REAL SCNRM2, SROUNDUP_LWORK + EXTERNAL SCNRM2, SROUNDUP_LWORK * .. * .. Intrinsic Function .. INTRINSIC ATAN2, COS, MAX, SIN, SQRT @@ -265,7 +265,7 @@ LORBDB5 = Q-1 LWORKOPT = MAX( ILARF+LLARF-1, IORBDB5+LORBDB5-1 ) LWORKMIN = LWORKOPT - WORK(1) = LWORKOPT + WORK(1) = SROUNDUP_LWORK(LWORKOPT) IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN INFO = -14 END IF diff --git a/lapack-netlib/SRC/cunbdb3.f b/lapack-netlib/SRC/cunbdb3.f index f942bc698..d02460597 100644 --- a/lapack-netlib/SRC/cunbdb3.f +++ b/lapack-netlib/SRC/cunbdb3.f @@ -173,7 +173,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup unbdb3 * *> \par Further Details: * ===================== @@ -229,8 +229,8 @@ EXTERNAL CLARF, CLARFGP, CUNBDB5, CSROT, CLACGV, XERBLA * .. * .. External Functions .. - REAL SCNRM2 - EXTERNAL SCNRM2 + REAL SCNRM2, SROUNDUP_LWORK + EXTERNAL SCNRM2, SROUNDUP_LWORK * .. * .. Intrinsic Function .. INTRINSIC ATAN2, COS, MAX, SIN, SQRT @@ -263,7 +263,7 @@ LORBDB5 = Q-1 LWORKOPT = MAX( ILARF+LLARF-1, IORBDB5+LORBDB5-1 ) LWORKMIN = LWORKOPT - WORK(1) = LWORKOPT + WORK(1) = SROUNDUP_LWORK(LWORKOPT) IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN INFO = -14 END IF diff --git a/lapack-netlib/SRC/cunbdb4.f b/lapack-netlib/SRC/cunbdb4.f index a551c184e..33acc1ee5 100644 --- a/lapack-netlib/SRC/cunbdb4.f +++ b/lapack-netlib/SRC/cunbdb4.f @@ -183,7 +183,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup unbdb4 * *> \par Further Details: * ===================== @@ -242,8 +242,8 @@ $ XERBLA * .. * .. External Functions .. - REAL SCNRM2 - EXTERNAL SCNRM2 + REAL SCNRM2, SROUNDUP_LWORK + EXTERNAL SCNRM2, SROUNDUP_LWORK * .. * .. Intrinsic Function .. INTRINSIC ATAN2, COS, MAX, SIN, SQRT @@ -277,7 +277,7 @@ LWORKOPT = ILARF + LLARF - 1 LWORKOPT = MAX( LWORKOPT, IORBDB5 + LORBDB5 - 1 ) LWORKMIN = LWORKOPT - WORK(1) = LWORKOPT + WORK(1) = SROUNDUP_LWORK(LWORKOPT) IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN INFO = -14 END IF diff --git a/lapack-netlib/SRC/cuncsd.f b/lapack-netlib/SRC/cuncsd.f index 3653a396a..003daaab4 100644 --- a/lapack-netlib/SRC/cuncsd.f +++ b/lapack-netlib/SRC/cuncsd.f @@ -308,7 +308,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup uncsd * * ===================================================================== RECURSIVE SUBROUTINE CUNCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, @@ -365,7 +365,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. Intrinsic Functions INTRINSIC INT, MAX, MIN @@ -504,7 +505,8 @@ $ IORBDB + LORBDBWORKOPT ) - 1 LWORKMIN = MAX( IORGQR + LORGQRWORKMIN, IORGLQ + LORGLQWORKMIN, $ IORBDB + LORBDBWORKMIN ) - 1 - WORK(1) = MAX(LWORKOPT,LWORKMIN) + LWORKOPT = MAX(LWORKOPT,LWORKMIN) + WORK(1) = SROUNDUP_LWORK(LWORKOPT) * IF( LWORK .LT. LWORKMIN $ .AND. .NOT. ( LQUERY .OR. LRQUERY ) ) THEN diff --git a/lapack-netlib/SRC/cuncsd2by1.f b/lapack-netlib/SRC/cuncsd2by1.f index f0c44f670..128e82cec 100644 --- a/lapack-netlib/SRC/cuncsd2by1.f +++ b/lapack-netlib/SRC/cuncsd2by1.f @@ -247,7 +247,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup uncsd2by1 * * ===================================================================== SUBROUTINE CUNCSD2BY1( JOBU1, JOBU2, JOBV1T, M, P, Q, X11, LDX11, @@ -299,7 +299,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. Intrinsic Function .. INTRINSIC INT, MAX, MIN @@ -508,7 +509,7 @@ LWORKOPT = MAX( IORBDB+LORBDB-1, $ IORGQR+LORGQROPT-1, $ IORGLQ+LORGLQOPT-1 ) - WORK(1) = LWORKOPT + WORK(1) = SROUNDUP_LWORK(LWORKOPT) IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN INFO = -19 END IF diff --git a/lapack-netlib/SRC/cungbr.f b/lapack-netlib/SRC/cungbr.f index a31a53d79..2f0208fdb 100644 --- a/lapack-netlib/SRC/cungbr.f +++ b/lapack-netlib/SRC/cungbr.f @@ -150,7 +150,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGBcomputational +*> \ingroup ungbr * * ===================================================================== SUBROUTINE CUNGBR( VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) @@ -180,7 +180,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CUNGLQ, CUNGQR, XERBLA @@ -241,7 +242,7 @@ CALL XERBLA( 'CUNGBR', -INFO ) RETURN ELSE IF( LQUERY ) THEN - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN END IF * @@ -327,7 +328,7 @@ END IF END IF END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of CUNGBR diff --git a/lapack-netlib/SRC/cunghr.f b/lapack-netlib/SRC/cunghr.f index 4f8a0a263..3aa3fb1ae 100644 --- a/lapack-netlib/SRC/cunghr.f +++ b/lapack-netlib/SRC/cunghr.f @@ -119,7 +119,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup unghr * * ===================================================================== SUBROUTINE CUNGHR( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) @@ -151,7 +151,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -178,7 +179,7 @@ IF( INFO.EQ.0 ) THEN NB = ILAENV( 1, 'CUNGQR', ' ', NH, NH, NH, -1 ) LWKOPT = MAX( 1, NH )*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -230,7 +231,7 @@ CALL CUNGQR( NH, NH, NH, A( ILO+1, ILO+1 ), LDA, TAU( ILO ), $ WORK, LWORK, IINFO ) END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of CUNGHR diff --git a/lapack-netlib/SRC/cunglq.f b/lapack-netlib/SRC/cunglq.f index e250e036c..353715054 100644 --- a/lapack-netlib/SRC/cunglq.f +++ b/lapack-netlib/SRC/cunglq.f @@ -120,7 +120,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup unglq * * ===================================================================== SUBROUTINE CUNGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) @@ -155,7 +155,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -164,7 +165,7 @@ INFO = 0 NB = ILAENV( 1, 'CUNGLQ', ' ', M, N, K, -1 ) LWKOPT = MAX( 1, M )*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) LQUERY = ( LWORK.EQ.-1 ) IF( M.LT.0 ) THEN INFO = -1 @@ -278,7 +279,7 @@ 50 CONTINUE END IF * - WORK( 1 ) = IWS + WORK( 1 ) = SROUNDUP_LWORK(IWS) RETURN * * End of CUNGLQ diff --git a/lapack-netlib/SRC/cungql.f b/lapack-netlib/SRC/cungql.f index d3b812a62..ed2f6803c 100644 --- a/lapack-netlib/SRC/cungql.f +++ b/lapack-netlib/SRC/cungql.f @@ -121,7 +121,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup ungql * * ===================================================================== SUBROUTINE CUNGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) @@ -156,7 +156,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -181,7 +182,7 @@ NB = ILAENV( 1, 'CUNGQL', ' ', M, N, K, -1 ) LWKOPT = N*NB END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN INFO = -8 diff --git a/lapack-netlib/SRC/cungqr.f b/lapack-netlib/SRC/cungqr.f index 5010ae0df..b6e8cc59a 100644 --- a/lapack-netlib/SRC/cungqr.f +++ b/lapack-netlib/SRC/cungqr.f @@ -121,7 +121,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup ungqr * * ===================================================================== SUBROUTINE CUNGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) @@ -156,7 +156,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -165,7 +166,7 @@ INFO = 0 NB = ILAENV( 1, 'CUNGQR', ' ', M, N, K, -1 ) LWKOPT = MAX( 1, N )*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) LQUERY = ( LWORK.EQ.-1 ) IF( M.LT.0 ) THEN INFO = -1 @@ -279,7 +280,7 @@ 50 CONTINUE END IF * - WORK( 1 ) = IWS + WORK( 1 ) = SROUNDUP_LWORK(IWS) RETURN * * End of CUNGQR diff --git a/lapack-netlib/SRC/cungrq.f b/lapack-netlib/SRC/cungrq.f index 1593ff938..aceaac0b8 100644 --- a/lapack-netlib/SRC/cungrq.f +++ b/lapack-netlib/SRC/cungrq.f @@ -121,7 +121,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup ungrq * * ===================================================================== SUBROUTINE CUNGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) @@ -156,7 +156,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -181,7 +182,7 @@ NB = ILAENV( 1, 'CUNGRQ', ' ', M, N, K, -1 ) LWKOPT = M*NB END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN INFO = -8 @@ -286,7 +287,7 @@ 50 CONTINUE END IF * - WORK( 1 ) = IWS + WORK( 1 ) = SROUNDUP_LWORK(IWS) RETURN * * End of CUNGRQ diff --git a/lapack-netlib/SRC/cungtr.f b/lapack-netlib/SRC/cungtr.f index 26ff0428e..27f197340 100644 --- a/lapack-netlib/SRC/cungtr.f +++ b/lapack-netlib/SRC/cungtr.f @@ -116,7 +116,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup ungtr * * ===================================================================== SUBROUTINE CUNGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO ) @@ -147,7 +147,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL ILAENV, LSAME + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CUNGQL, CUNGQR, XERBLA @@ -179,7 +180,7 @@ NB = ILAENV( 1, 'CUNGQR', ' ', N-1, N-1, N-1, -1 ) END IF LWKOPT = MAX( 1, N-1 )*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -245,7 +246,7 @@ $ LWORK, IINFO ) END IF END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of CUNGTR diff --git a/lapack-netlib/SRC/cunmbr.f b/lapack-netlib/SRC/cunmbr.f index cef6025b0..a21c486e9 100644 --- a/lapack-netlib/SRC/cunmbr.f +++ b/lapack-netlib/SRC/cunmbr.f @@ -189,7 +189,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup unmbr * * ===================================================================== SUBROUTINE CUNMBR( VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, @@ -218,7 +218,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL ILAENV, LSAME + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CUNMLQ, CUNMQR, XERBLA @@ -290,7 +291,7 @@ ELSE LWKOPT = 1 END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -367,7 +368,7 @@ $ TAU, C( I1, I2 ), LDC, WORK, LWORK, IINFO ) END IF END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of CUNMBR diff --git a/lapack-netlib/SRC/cunmhr.f b/lapack-netlib/SRC/cunmhr.f index af3140d5f..29bb631f1 100644 --- a/lapack-netlib/SRC/cunmhr.f +++ b/lapack-netlib/SRC/cunmhr.f @@ -171,7 +171,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup unmhr * * ===================================================================== SUBROUTINE CUNMHR( SIDE, TRANS, M, N, ILO, IHI, A, LDA, TAU, C, @@ -199,7 +199,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL ILAENV, LSAME + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CUNMQR, XERBLA @@ -253,7 +254,7 @@ NB = ILAENV( 1, 'CUNMQR', SIDE // TRANS, M, NH, NH, -1 ) END IF LWKOPT = NW*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -285,7 +286,7 @@ CALL CUNMQR( SIDE, TRANS, MI, NI, NH, A( ILO+1, ILO ), LDA, $ TAU( ILO ), C( I1, I2 ), LDC, WORK, LWORK, IINFO ) * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of CUNMHR diff --git a/lapack-netlib/SRC/cunmlq.f b/lapack-netlib/SRC/cunmlq.f index 25a410770..4da1af1d5 100644 --- a/lapack-netlib/SRC/cunmlq.f +++ b/lapack-netlib/SRC/cunmlq.f @@ -160,7 +160,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup unmlq * * ===================================================================== SUBROUTINE CUNMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, @@ -195,7 +195,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CLARFB, CLARFT, CUNML2, XERBLA @@ -250,7 +251,7 @@ $ K, -1 ) ) LWKOPT = NW*NB + TSIZE END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -343,7 +344,7 @@ $ C( IC, JC ), LDC, WORK, LDWORK ) 10 CONTINUE END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of CUNMLQ diff --git a/lapack-netlib/SRC/cunmql.f b/lapack-netlib/SRC/cunmql.f index 3c7166066..84fc29d32 100644 --- a/lapack-netlib/SRC/cunmql.f +++ b/lapack-netlib/SRC/cunmql.f @@ -160,7 +160,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup unmql * * ===================================================================== SUBROUTINE CUNMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, @@ -194,7 +194,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CLARFB, CLARFT, CUNM2L, XERBLA @@ -249,7 +250,7 @@ $ K, -1 ) ) LWKOPT = NW*NB + TSIZE END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -332,7 +333,7 @@ $ WORK, LDWORK ) 10 CONTINUE END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of CUNMQL diff --git a/lapack-netlib/SRC/cunmqr.f b/lapack-netlib/SRC/cunmqr.f index 7e59d7129..7d85a861f 100644 --- a/lapack-netlib/SRC/cunmqr.f +++ b/lapack-netlib/SRC/cunmqr.f @@ -160,7 +160,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup unmqr * * ===================================================================== SUBROUTINE CUNMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, @@ -194,7 +194,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CLARFB, CLARFT, CUNM2R, XERBLA @@ -245,7 +246,7 @@ NB = MIN( NBMAX, ILAENV( 1, 'CUNMQR', SIDE // TRANS, M, N, K, $ -1 ) ) LWKOPT = NW*NB + TSIZE - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -331,7 +332,7 @@ $ C( IC, JC ), LDC, WORK, LDWORK ) 10 CONTINUE END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of CUNMQR diff --git a/lapack-netlib/SRC/cunmrq.f b/lapack-netlib/SRC/cunmrq.f index 5a233f604..f02cfd9a9 100644 --- a/lapack-netlib/SRC/cunmrq.f +++ b/lapack-netlib/SRC/cunmrq.f @@ -160,7 +160,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup unmrq * * ===================================================================== SUBROUTINE CUNMRQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, @@ -195,7 +195,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CLARFB, CLARFT, CUNMR2, XERBLA @@ -250,7 +251,7 @@ $ K, -1 ) ) LWKOPT = NW*NB + TSIZE END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -337,7 +338,7 @@ $ WORK, LDWORK ) 10 CONTINUE END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of CUNMRQ diff --git a/lapack-netlib/SRC/cunmrz.f b/lapack-netlib/SRC/cunmrz.f index 8e06f2329..9ccf1878b 100644 --- a/lapack-netlib/SRC/cunmrz.f +++ b/lapack-netlib/SRC/cunmrz.f @@ -168,7 +168,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup unmrz * *> \par Contributors: * ================== @@ -213,7 +213,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CLARZB, CLARZT, CUNMR3, XERBLA @@ -271,7 +272,7 @@ $ K, -1 ) ) LWKOPT = NW*NB + TSIZE END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -371,7 +372,7 @@ * END IF * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/cunmtr.f b/lapack-netlib/SRC/cunmtr.f index 097dba91e..6eafc15c4 100644 --- a/lapack-netlib/SRC/cunmtr.f +++ b/lapack-netlib/SRC/cunmtr.f @@ -164,7 +164,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup unmtr * * ===================================================================== SUBROUTINE CUNMTR( SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC, @@ -192,7 +192,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL ILAENV, LSAME + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL CUNMQL, CUNMQR, XERBLA @@ -256,7 +257,7 @@ END IF END IF LWKOPT = NW*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -301,7 +302,7 @@ CALL CUNMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU, $ C( I1, I2 ), LDC, WORK, LWORK, IINFO ) END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of CUNMTR From f5664740cd492d9f7c614c4876a9204c40fdf777 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 12 Nov 2023 14:29:04 +0100 Subject: [PATCH 120/125] Apply ROUNDUP_LWORK (Reference-LAPACK PR 904) --- lapack-netlib/SRC/sgebrd.f | 11 ++++++----- lapack-netlib/SRC/sgees.f | 15 +++++++-------- lapack-netlib/SRC/sgeesx.f | 13 ++++++------- lapack-netlib/SRC/sgeev.f | 16 +++++++--------- lapack-netlib/SRC/sgeevx.f | 13 ++++++------- lapack-netlib/SRC/sgehrd.f | 9 +++++---- lapack-netlib/SRC/sgelq.f | 11 +++++++---- lapack-netlib/SRC/sgelqf.f | 9 +++++---- lapack-netlib/SRC/sgels.f | 15 +++++++-------- lapack-netlib/SRC/sgelsd.f | 15 +++++++-------- lapack-netlib/SRC/sgelss.f | 8 ++++---- lapack-netlib/SRC/sgelst.f | 19 +++++++++---------- lapack-netlib/SRC/sgelsy.f | 13 ++++++------- lapack-netlib/SRC/sgemlq.f | 16 +++++++++++----- lapack-netlib/SRC/sgemqr.f | 15 +++++++++------ lapack-netlib/SRC/sgeqlf.f | 9 +++++---- lapack-netlib/SRC/sgeqp3.f | 10 +++++----- lapack-netlib/SRC/sgeqrf.f | 9 +++++---- lapack-netlib/SRC/sgeqrfp.f | 9 +++++---- lapack-netlib/SRC/sgerqf.f | 9 +++++---- lapack-netlib/SRC/sgesvd.f | 10 +++++----- lapack-netlib/SRC/sgesvdx.f | 10 +++++----- lapack-netlib/SRC/sgetri.f | 9 +++++---- lapack-netlib/SRC/sgetsls.f | 15 +++++++-------- lapack-netlib/SRC/sgetsqrhrt.f | 12 ++++++++---- lapack-netlib/SRC/sgges.f | 16 +++++++--------- lapack-netlib/SRC/sgges3.f | 16 +++++++--------- lapack-netlib/SRC/sggesx.f | 16 +++++++--------- lapack-netlib/SRC/sggev.f | 16 +++++++--------- lapack-netlib/SRC/sggev3.f | 16 +++++++--------- lapack-netlib/SRC/sggevx.f | 17 ++++++++--------- lapack-netlib/SRC/sggglm.f | 7 ++++--- lapack-netlib/SRC/sgghd3.f | 11 ++++++----- lapack-netlib/SRC/sgglse.f | 7 ++++--- lapack-netlib/SRC/sggqrf.f | 10 ++++++---- lapack-netlib/SRC/sggrqf.f | 10 ++++++---- lapack-netlib/SRC/sggsvd3.f | 10 +++++----- lapack-netlib/SRC/sggsvp3.f | 9 +++++---- lapack-netlib/SRC/shgeqz.f | 9 +++++---- lapack-netlib/SRC/shseqr.f | 7 ++++--- lapack-netlib/SRC/slaqr2.f | 13 ++++++------- lapack-netlib/SRC/slaqr3.f | 16 +++++++--------- lapack-netlib/SRC/slaqr4.f | 11 ++++++----- lapack-netlib/SRC/slaqz0.f | 6 +++--- lapack-netlib/SRC/slaqz3.f | 9 ++++----- lapack-netlib/SRC/slaqz4.f | 5 +++-- lapack-netlib/SRC/slaswlq.f | 7 +++++-- lapack-netlib/SRC/sorgbr.f | 9 +++++---- 48 files changed, 284 insertions(+), 269 deletions(-) diff --git a/lapack-netlib/SRC/sgebrd.f b/lapack-netlib/SRC/sgebrd.f index 08701164c..2d0c6d651 100644 --- a/lapack-netlib/SRC/sgebrd.f +++ b/lapack-netlib/SRC/sgebrd.f @@ -147,7 +147,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEcomputational +*> \ingroup gebrd * *> \par Further Details: * ===================== @@ -230,11 +230,12 @@ EXTERNAL SGEBD2, SGEMM, SLABRD, XERBLA * .. * .. Intrinsic Functions .. - INTRINSIC MAX, MIN, REAL + INTRINSIC MAX, MIN * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -243,7 +244,7 @@ INFO = 0 NB = MAX( 1, ILAENV( 1, 'SGEBRD', ' ', M, N, -1, -1 ) ) LWKOPT = ( M+N )*NB - WORK( 1 ) = REAL( LWKOPT ) + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) LQUERY = ( LWORK.EQ.-1 ) IF( M.LT.0 ) THEN INFO = -1 @@ -341,7 +342,7 @@ * CALL SGEBD2( M-I+1, N-I+1, A( I, I ), LDA, D( I ), E( I ), $ TAUQ( I ), TAUP( I ), WORK, IINFO ) - WORK( 1 ) = WS + WORK( 1 ) = SROUNDUP_LWORK(WS) RETURN * * End of SGEBRD diff --git a/lapack-netlib/SRC/sgees.f b/lapack-netlib/SRC/sgees.f index 6febd549c..4418ea064 100644 --- a/lapack-netlib/SRC/sgees.f +++ b/lapack-netlib/SRC/sgees.f @@ -208,7 +208,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEeigen +*> \ingroup gees * * ===================================================================== SUBROUTINE SGEES( JOBVS, SORT, SELECT, N, A, LDA, SDIM, WR, WI, @@ -250,14 +250,14 @@ REAL DUM( 1 ) * .. * .. External Subroutines .. - EXTERNAL SCOPY, SGEBAK, SGEBAL, SGEHRD, SHSEQR, SLABAD, - $ SLACPY, SLASCL, SORGHR, SSWAP, STRSEN, XERBLA + EXTERNAL SCOPY, SGEBAK, SGEBAL, SGEHRD, SHSEQR, SLACPY, + $ SLASCL, SORGHR, SSWAP, STRSEN, XERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL SLAMCH, SLANGE - EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE + REAL SLAMCH, SLANGE, SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC MAX, SQRT @@ -312,7 +312,7 @@ MAXWRK = MAX( MAXWRK, N + HSWORK ) END IF END IF - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) * IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN INFO = -13 @@ -338,7 +338,6 @@ EPS = SLAMCH( 'P' ) SMLNUM = SLAMCH( 'S' ) BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) SMLNUM = SQRT( SMLNUM ) / EPS BIGNUM = ONE / SMLNUM * @@ -524,7 +523,7 @@ 30 CONTINUE END IF * - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) RETURN * * End of SGEES diff --git a/lapack-netlib/SRC/sgeesx.f b/lapack-netlib/SRC/sgeesx.f index 6810fe7c8..cabe9f1f7 100644 --- a/lapack-netlib/SRC/sgeesx.f +++ b/lapack-netlib/SRC/sgeesx.f @@ -272,7 +272,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEeigen +*> \ingroup geesx * * ===================================================================== SUBROUTINE SGEESX( JOBVS, SORT, SELECT, SENSE, N, A, LDA, SDIM, @@ -317,14 +317,14 @@ REAL DUM( 1 ) * .. * .. External Subroutines .. - EXTERNAL SCOPY, SGEBAK, SGEBAL, SGEHRD, SHSEQR, SLABAD, + EXTERNAL SCOPY, SGEBAK, SGEBAL, SGEHRD, SHSEQR, $ SLACPY, SLASCL, SORGHR, SSWAP, STRSEN, XERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL SLAMCH, SLANGE - EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE + REAL SLAMCH, SLANGE, SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC MAX, SQRT @@ -398,7 +398,7 @@ $ LIWRK = ( N*N )/4 END IF IWORK( 1 ) = LIWRK - WORK( 1 ) = LWRK + WORK( 1 ) = SROUNDUP_LWORK(LWRK) * IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN INFO = -16 @@ -426,7 +426,6 @@ EPS = SLAMCH( 'P' ) SMLNUM = SLAMCH( 'S' ) BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) SMLNUM = SQRT( SMLNUM ) / EPS BIGNUM = ONE / SMLNUM * @@ -634,7 +633,7 @@ 30 CONTINUE END IF * - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) IF( WANTSV .OR. WANTSB ) THEN IWORK( 1 ) = SDIM*(N-SDIM) ELSE diff --git a/lapack-netlib/SRC/sgeev.f b/lapack-netlib/SRC/sgeev.f index ed1724721..93f993265 100644 --- a/lapack-netlib/SRC/sgeev.f +++ b/lapack-netlib/SRC/sgeev.f @@ -184,7 +184,7 @@ * * @generated from dgeev.f, fortran d -> s, Tue Apr 19 01:47:44 2016 * -*> \ingroup realGEeigen +*> \ingroup geev * * ===================================================================== SUBROUTINE SGEEV( JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR, @@ -223,16 +223,15 @@ REAL DUM( 1 ) * .. * .. External Subroutines .. - EXTERNAL SGEBAK, SGEBAL, SGEHRD, SHSEQR, SLABAD, SLACPY, - $ SLARTG, SLASCL, SORGHR, SROT, SSCAL, STREVC3, - $ XERBLA + EXTERNAL SGEBAK, SGEBAL, SGEHRD, SHSEQR, SLACPY, SLARTG, + $ SLASCL, SORGHR, SROT, SSCAL, STREVC3, XERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ISAMAX, ILAENV - REAL SLAMCH, SLANGE, SLAPY2, SNRM2 + REAL SLAMCH, SLANGE, SLAPY2, SNRM2, SROUNDUP_LWORK EXTERNAL LSAME, ISAMAX, ILAENV, SLAMCH, SLANGE, SLAPY2, - $ SNRM2 + $ SNRM2, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC MAX, SQRT @@ -312,7 +311,7 @@ END IF MAXWRK = MAX( MAXWRK, MINWRK ) END IF - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) * IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN INFO = -13 @@ -336,7 +335,6 @@ EPS = SLAMCH( 'P' ) SMLNUM = SLAMCH( 'S' ) BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) SMLNUM = SQRT( SMLNUM ) / EPS BIGNUM = ONE / SMLNUM * @@ -519,7 +517,7 @@ END IF END IF * - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) RETURN * * End of SGEEV diff --git a/lapack-netlib/SRC/sgeevx.f b/lapack-netlib/SRC/sgeevx.f index ed1ea1cb9..b0af78605 100644 --- a/lapack-netlib/SRC/sgeevx.f +++ b/lapack-netlib/SRC/sgeevx.f @@ -297,7 +297,7 @@ * * @generated from dgeevx.f, fortran d -> s, Tue Apr 19 01:47:44 2016 * -*> \ingroup realGEeigen +*> \ingroup geevx * * ===================================================================== SUBROUTINE SGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, WR, WI, @@ -341,16 +341,16 @@ REAL DUM( 1 ) * .. * .. External Subroutines .. - EXTERNAL SGEBAK, SGEBAL, SGEHRD, SHSEQR, SLABAD, SLACPY, + EXTERNAL SGEBAK, SGEBAL, SGEHRD, SHSEQR, SLACPY, $ SLARTG, SLASCL, SORGHR, SROT, SSCAL, STREVC3, $ STRSNA, XERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ISAMAX, ILAENV - REAL SLAMCH, SLANGE, SLAPY2, SNRM2 + REAL SLAMCH, SLANGE, SLAPY2, SNRM2, SROUNDUP_LWORK EXTERNAL LSAME, ISAMAX, ILAENV, SLAMCH, SLANGE, SLAPY2, - $ SNRM2 + $ SNRM2, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC MAX, SQRT @@ -453,7 +453,7 @@ END IF MAXWRK = MAX( MAXWRK, MINWRK ) END IF - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) * IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN INFO = -21 @@ -477,7 +477,6 @@ EPS = SLAMCH( 'P' ) SMLNUM = SLAMCH( 'S' ) BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) SMLNUM = SQRT( SMLNUM ) / EPS BIGNUM = ONE / SMLNUM * @@ -684,7 +683,7 @@ END IF END IF * - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) RETURN * * End of SGEEVX diff --git a/lapack-netlib/SRC/sgehrd.f b/lapack-netlib/SRC/sgehrd.f index 41b9aa78e..47733d947 100644 --- a/lapack-netlib/SRC/sgehrd.f +++ b/lapack-netlib/SRC/sgehrd.f @@ -120,7 +120,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEcomputational +*> \ingroup gehrd * *> \par Further Details: * ===================== @@ -201,7 +201,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -227,7 +228,7 @@ * NB = MIN( NBMAX, ILAENV( 1, 'SGEHRD', ' ', N, ILO, IHI, -1 ) ) LWKOPT = N*NB + TSIZE - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -344,7 +345,7 @@ * Use unblocked code to reduce the rest of the matrix * CALL SGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO ) - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/sgelq.f b/lapack-netlib/SRC/sgelq.f index 9209f918e..74c7cc267 100644 --- a/lapack-netlib/SRC/sgelq.f +++ b/lapack-netlib/SRC/sgelq.f @@ -166,6 +166,8 @@ *> the LQ factorization. *> \endverbatim *> +*> \ingroup gelq +*> * ===================================================================== SUBROUTINE SGELQ( M, N, A, LDA, T, TSIZE, WORK, LWORK, $ INFO ) @@ -190,7 +192,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SGELQT, SLASWLQ, XERBLA @@ -292,9 +295,9 @@ T( 2 ) = MB T( 3 ) = NB IF( MINW ) THEN - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) ELSE - WORK( 1 ) = LWREQ + WORK( 1 ) = SROUNDUP_LWORK(LWREQ) END IF END IF IF( INFO.NE.0 ) THEN @@ -319,7 +322,7 @@ $ LWORK, INFO ) END IF * - WORK( 1 ) = LWREQ + WORK( 1 ) = SROUNDUP_LWORK(LWREQ) RETURN * * End of SGELQ diff --git a/lapack-netlib/SRC/sgelqf.f b/lapack-netlib/SRC/sgelqf.f index 24d8ab19c..1ceec4742 100644 --- a/lapack-netlib/SRC/sgelqf.f +++ b/lapack-netlib/SRC/sgelqf.f @@ -118,7 +118,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEcomputational +*> \ingroup gelqf * *> \par Further Details: * ===================== @@ -167,7 +167,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -176,7 +177,7 @@ INFO = 0 NB = ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) LWKOPT = M*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) LQUERY = ( LWORK.EQ.-1 ) IF( M.LT.0 ) THEN INFO = -1 @@ -266,7 +267,7 @@ $ CALL SGELQ2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, $ IINFO ) * - WORK( 1 ) = IWS + WORK( 1 ) = SROUNDUP_LWORK(IWS) RETURN * * End of SGELQF diff --git a/lapack-netlib/SRC/sgels.f b/lapack-netlib/SRC/sgels.f index ea02c3318..b58f70c9e 100644 --- a/lapack-netlib/SRC/sgels.f +++ b/lapack-netlib/SRC/sgels.f @@ -175,7 +175,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEsolve +*> \ingroup gels * * ===================================================================== SUBROUTINE SGELS( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, @@ -210,15 +210,15 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL SLAMCH, SLANGE - EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE + REAL SLAMCH, SLANGE, SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE, SROUNDUP_LWORK * .. * .. External Subroutines .. - EXTERNAL SGELQF, SGEQRF, SLABAD, SLASCL, SLASET, SORMLQ, + EXTERNAL SGELQF, SGEQRF, SLASCL, SLASET, SORMLQ, $ SORMQR, STRTRS, XERBLA * .. * .. Intrinsic Functions .. - INTRINSIC MAX, MIN, REAL + INTRINSIC MAX, MIN * .. * .. Executable Statements .. * @@ -273,7 +273,7 @@ END IF * WSIZE = MAX( 1, MN + MAX( MN, NRHS )*NB ) - WORK( 1 ) = REAL( WSIZE ) + WORK( 1 ) = SROUNDUP_LWORK( WSIZE ) * END IF * @@ -295,7 +295,6 @@ * SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) * * Scale A, B if max element outside range [SMLNUM,BIGNUM] * @@ -492,7 +491,7 @@ END IF * 50 CONTINUE - WORK( 1 ) = REAL( WSIZE ) + WORK( 1 ) = SROUNDUP_LWORK( WSIZE ) * RETURN * diff --git a/lapack-netlib/SRC/sgelsd.f b/lapack-netlib/SRC/sgelsd.f index 9fda7b593..2818213f4 100644 --- a/lapack-netlib/SRC/sgelsd.f +++ b/lapack-netlib/SRC/sgelsd.f @@ -189,7 +189,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEsolve +*> \ingroup gelsd * *> \par Contributors: * ================== @@ -229,13 +229,13 @@ REAL ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM * .. * .. External Subroutines .. - EXTERNAL SGEBRD, SGELQF, SGEQRF, SLABAD, SLACPY, SLALSD, - $ SLASCL, SLASET, SORMBR, SORMLQ, SORMQR, XERBLA + EXTERNAL SGEBRD, SGELQF, SGEQRF, SLACPY, SLALSD, SLASCL, + $ SLASET, SORMBR, SORMLQ, SORMQR, XERBLA * .. * .. External Functions .. INTEGER ILAENV - REAL SLAMCH, SLANGE - EXTERNAL SLAMCH, SLANGE, ILAENV + REAL SLAMCH, SLANGE, SROUNDUP_LWORK + EXTERNAL SLAMCH, SLANGE, ILAENV, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC INT, LOG, MAX, MIN, REAL @@ -348,7 +348,7 @@ END IF END IF MINWRK = MIN( MINWRK, MAXWRK ) - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) IWORK( 1 ) = LIWORK * IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN @@ -376,7 +376,6 @@ SFMIN = SLAMCH( 'S' ) SMLNUM = SFMIN / EPS BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) * * Scale A if max entry outside range [SMLNUM,BIGNUM]. * @@ -615,7 +614,7 @@ END IF * 10 CONTINUE - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) IWORK( 1 ) = LIWORK RETURN * diff --git a/lapack-netlib/SRC/sgelss.f b/lapack-netlib/SRC/sgelss.f index 89d3a6e4f..2e4b0cdd5 100644 --- a/lapack-netlib/SRC/sgelss.f +++ b/lapack-netlib/SRC/sgelss.f @@ -207,8 +207,8 @@ * .. * .. External Functions .. INTEGER ILAENV - REAL SLAMCH, SLANGE - EXTERNAL ILAENV, SLAMCH, SLANGE + REAL SLAMCH, SLANGE, SROUNDUP_LWORK + EXTERNAL ILAENV, SLAMCH, SLANGE, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -355,7 +355,7 @@ END IF MAXWRK = MAX( MINWRK, MAXWRK ) END IF - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) * IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) $ INFO = -12 @@ -731,7 +731,7 @@ END IF * 70 CONTINUE - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) RETURN * * End of SGELSS diff --git a/lapack-netlib/SRC/sgelst.f b/lapack-netlib/SRC/sgelst.f index 5377bc720..b89918656 100644 --- a/lapack-netlib/SRC/sgelst.f +++ b/lapack-netlib/SRC/sgelst.f @@ -176,7 +176,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEsolve +*> \ingroup gelst * *> \par Contributors: * ================== @@ -222,15 +222,15 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL SLAMCH, SLANGE - EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE + REAL SLAMCH, SLANGE, SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE, SROUNDUP_LWORK * .. * .. External Subroutines .. - EXTERNAL SGELQT, SGEQRT, SGEMLQT, SGEMQRT, SLABAD, + EXTERNAL SGELQT, SGEQRT, SGEMLQT, SGEMQRT, $ SLASCL, SLASET, STRTRS, XERBLA * .. * .. Intrinsic Functions .. - INTRINSIC REAL, MAX, MIN + INTRINSIC MAX, MIN * .. * .. Executable Statements .. * @@ -268,7 +268,7 @@ * MNNRHS = MAX( MN, NRHS ) LWOPT = MAX( 1, (MN+MNNRHS)*NB ) - WORK( 1 ) = REAL( LWOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWOPT ) * END IF * @@ -283,7 +283,7 @@ * IF( MIN( M, N, NRHS ).EQ.0 ) THEN CALL SLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) - WORK( 1 ) = REAL( LWOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWOPT ) RETURN END IF * @@ -309,7 +309,6 @@ * SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) * * Scale A, B if max element outside range [SMLNUM,BIGNUM] * @@ -332,7 +331,7 @@ * Matrix all zero. Return zero solution. * CALL SLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) - WORK( 1 ) = REAL( LWOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWOPT ) RETURN END IF * @@ -522,7 +521,7 @@ $ INFO ) END IF * - WORK( 1 ) = REAL( LWOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWOPT ) * RETURN * diff --git a/lapack-netlib/SRC/sgelsy.f b/lapack-netlib/SRC/sgelsy.f index 89dd39e80..c7f5069de 100644 --- a/lapack-netlib/SRC/sgelsy.f +++ b/lapack-netlib/SRC/sgelsy.f @@ -191,7 +191,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEsolve +*> \ingroup gelsy * *> \par Contributors: * ================== @@ -234,11 +234,11 @@ * .. * .. External Functions .. INTEGER ILAENV - REAL SLAMCH, SLANGE - EXTERNAL ILAENV, SLAMCH, SLANGE + REAL SLAMCH, SLANGE, SROUNDUP_LWORK + EXTERNAL ILAENV, SLAMCH, SLANGE, SROUNDUP_LWORK * .. * .. External Subroutines .. - EXTERNAL SCOPY, SGEQP3, SLABAD, SLAIC1, SLASCL, SLASET, + EXTERNAL SCOPY, SGEQP3, SLAIC1, SLASCL, SLASET, $ SORMQR, SORMRZ, STRSM, STZRZF, XERBLA * .. * .. Intrinsic Functions .. @@ -282,7 +282,7 @@ LWKOPT = MAX( LWKMIN, $ MN + 2*N + NB*( N + 1 ), 2*MN + NB*NRHS ) END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN INFO = -12 @@ -307,7 +307,6 @@ * SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) * * Scale A, B if max entries outside range [SMLNUM,BIGNUM] * @@ -469,7 +468,7 @@ END IF * 70 CONTINUE - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/sgemlq.f b/lapack-netlib/SRC/sgemlq.f index 536abf184..83536825c 100644 --- a/lapack-netlib/SRC/sgemlq.f +++ b/lapack-netlib/SRC/sgemlq.f @@ -119,7 +119,7 @@ *> The dimension of the array WORK. *> If LWORK = -1, then a workspace query is assumed. The routine *> only calculates the size of the WORK array, returns this -*> value as WORK(1), and no error message related to WORK +*> value as WORK(1), and no error message related to WORK *> is issued by XERBLA. *> \endverbatim *> @@ -143,7 +143,7 @@ *> *> \verbatim *> -*> These details are particular for this LAPACK implementation. Users should not +*> These details are particular for this LAPACK implementation. Users should not *> take them for granted. These details may change in the future, and are not likely *> true for another LAPACK implementation. These details are relevant if one wants *> to try to understand the code. They are not part of the interface. @@ -159,11 +159,13 @@ *> block sizes MB and NB returned by ILAENV, SGELQ will use either *> SLASWLQ (if the matrix is wide-and-short) or SGELQT to compute *> the LQ factorization. -*> This version of SGEMLQ will use either SLAMSWLQ or SGEMLQT to +*> This version of SGEMLQ will use either SLAMSWLQ or SGEMLQT to *> multiply matrix Q by another matrix. *> Further Details in SLAMSWLQ or SGEMLQT. *> \endverbatim *> +*> \ingroup gemlq +*> * ===================================================================== SUBROUTINE SGEMLQ( SIDE, TRANS, M, N, K, A, LDA, T, TSIZE, $ C, LDC, WORK, LWORK, INFO ) @@ -191,6 +193,10 @@ LOGICAL LSAME EXTERNAL LSAME * .. +* .. External Functions .. + REAL SROUNDUP_LWORK + EXTERNAL SROUNDUP_LWORK +* .. * .. External Subroutines .. EXTERNAL SLAMSWLQ, SGEMLQT, XERBLA * .. @@ -249,7 +255,7 @@ END IF * IF( INFO.EQ.0 ) THEN - WORK( 1 ) = REAL( LW ) + WORK( 1 ) = SROUNDUP_LWORK( LW ) END IF * IF( INFO.NE.0 ) THEN @@ -274,7 +280,7 @@ $ MB, C, LDC, WORK, LWORK, INFO ) END IF * - WORK( 1 ) = REAL( LW ) + WORK( 1 ) = SROUNDUP_LWORK( LW ) * RETURN * diff --git a/lapack-netlib/SRC/sgemqr.f b/lapack-netlib/SRC/sgemqr.f index 2a9257459..3207f8bfd 100644 --- a/lapack-netlib/SRC/sgemqr.f +++ b/lapack-netlib/SRC/sgemqr.f @@ -120,7 +120,7 @@ *> The dimension of the array WORK. *> If LWORK = -1, then a workspace query is assumed. The routine *> only calculates the size of the WORK array, returns this -*> value as WORK(1), and no error message related to WORK +*> value as WORK(1), and no error message related to WORK *> is issued by XERBLA. *> \endverbatim *> @@ -144,7 +144,7 @@ *> *> \verbatim *> -*> These details are particular for this LAPACK implementation. Users should not +*> These details are particular for this LAPACK implementation. Users should not *> take them for granted. These details may change in the future, and are not likely *> true for another LAPACK implementation. These details are relevant if one wants *> to try to understand the code. They are not part of the interface. @@ -160,12 +160,14 @@ *> block sizes MB and NB returned by ILAENV, SGEQR will use either *> SLATSQR (if the matrix is tall-and-skinny) or SGEQRT to compute *> the QR factorization. -*> This version of SGEMQR will use either SLAMTSQR or SGEMQRT to +*> This version of SGEMQR will use either SLAMTSQR or SGEMQRT to *> multiply matrix Q by another matrix. *> Further Details in SLAMTSQR or SGEMQRT. *> *> \endverbatim *> +*> \ingroup gemqr +*> * ===================================================================== SUBROUTINE SGEMQR( SIDE, TRANS, M, N, K, A, LDA, T, TSIZE, $ C, LDC, WORK, LWORK, INFO ) @@ -191,7 +193,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SGEMQRT, SLAMTSQR, XERBLA @@ -251,7 +254,7 @@ END IF * IF( INFO.EQ.0 ) THEN - WORK( 1 ) = LW + WORK( 1 ) = SROUNDUP_LWORK(LW) END IF * IF( INFO.NE.0 ) THEN @@ -276,7 +279,7 @@ $ NB, C, LDC, WORK, LWORK, INFO ) END IF * - WORK( 1 ) = LW + WORK( 1 ) = SROUNDUP_LWORK(LW) * RETURN * diff --git a/lapack-netlib/SRC/sgeqlf.f b/lapack-netlib/SRC/sgeqlf.f index efecfbb3c..b1266c89e 100644 --- a/lapack-netlib/SRC/sgeqlf.f +++ b/lapack-netlib/SRC/sgeqlf.f @@ -113,7 +113,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEcomputational +*> \ingroup geqlf * *> \par Further Details: * ===================== @@ -162,7 +162,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -186,7 +187,7 @@ NB = ILAENV( 1, 'SGEQLF', ' ', M, N, -1, -1 ) LWKOPT = N*NB END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN INFO = -7 @@ -276,7 +277,7 @@ IF( MU.GT.0 .AND. NU.GT.0 ) $ CALL SGEQL2( MU, NU, A, LDA, TAU, WORK, IINFO ) * - WORK( 1 ) = IWS + WORK( 1 ) = SROUNDUP_LWORK(IWS) RETURN * * End of SGEQLF diff --git a/lapack-netlib/SRC/sgeqp3.f b/lapack-netlib/SRC/sgeqp3.f index 493bdae6a..9f2f40b2e 100644 --- a/lapack-netlib/SRC/sgeqp3.f +++ b/lapack-netlib/SRC/sgeqp3.f @@ -120,7 +120,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEcomputational +*> \ingroup geqp3 * *> \par Further Details: * ===================== @@ -177,8 +177,8 @@ * .. * .. External Functions .. INTEGER ILAENV - REAL SNRM2 - EXTERNAL ILAENV, SNRM2 + REAL SNRM2, SROUNDUP_LWORK + EXTERNAL ILAENV, SNRM2, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC INT, MAX, MIN @@ -205,7 +205,7 @@ NB = ILAENV( INB, 'SGEQRF', ' ', M, N, -1, -1 ) LWKOPT = 2*N + ( N + 1 )*NB END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * IF( ( LWORK.LT.IWS ) .AND. .NOT.LQUERY ) THEN INFO = -8 @@ -347,7 +347,7 @@ * END IF * - WORK( 1 ) = IWS + WORK( 1 ) = SROUNDUP_LWORK(IWS) RETURN * * End of SGEQP3 diff --git a/lapack-netlib/SRC/sgeqrf.f b/lapack-netlib/SRC/sgeqrf.f index b24615f7a..689fe1aea 100644 --- a/lapack-netlib/SRC/sgeqrf.f +++ b/lapack-netlib/SRC/sgeqrf.f @@ -121,7 +121,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEcomputational +*> \ingroup geqrf * *> \par Further Details: * ===================== @@ -170,7 +170,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -199,7 +200,7 @@ ELSE LWKOPT = N*NB END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN END IF * @@ -274,7 +275,7 @@ $ CALL SGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, $ IINFO ) * - WORK( 1 ) = IWS + WORK( 1 ) = SROUNDUP_LWORK(IWS) RETURN * * End of SGEQRF diff --git a/lapack-netlib/SRC/sgeqrfp.f b/lapack-netlib/SRC/sgeqrfp.f index 03d33654b..d1ee2a828 100644 --- a/lapack-netlib/SRC/sgeqrfp.f +++ b/lapack-netlib/SRC/sgeqrfp.f @@ -122,7 +122,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEcomputational +*> \ingroup geqrfp * *> \par Further Details: * ===================== @@ -173,7 +173,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -182,7 +183,7 @@ INFO = 0 NB = ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) LWKOPT = N*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) LQUERY = ( LWORK.EQ.-1 ) IF( M.LT.0 ) THEN INFO = -1 @@ -272,7 +273,7 @@ $ CALL SGEQR2P( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, $ IINFO ) * - WORK( 1 ) = IWS + WORK( 1 ) = SROUNDUP_LWORK(IWS) RETURN * * End of SGEQRFP diff --git a/lapack-netlib/SRC/sgerqf.f b/lapack-netlib/SRC/sgerqf.f index 037cd5345..1d3400a1f 100644 --- a/lapack-netlib/SRC/sgerqf.f +++ b/lapack-netlib/SRC/sgerqf.f @@ -114,7 +114,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEcomputational +*> \ingroup gerqf * *> \par Further Details: * ===================== @@ -163,7 +163,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -187,7 +188,7 @@ NB = ILAENV( 1, 'SGERQF', ' ', M, N, -1, -1 ) LWKOPT = M*NB END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * IF ( .NOT.LQUERY ) THEN IF( LWORK.LE.0 .OR. ( N.GT.0 .AND. LWORK.LT.MAX( 1, M ) ) ) @@ -278,7 +279,7 @@ IF( MU.GT.0 .AND. NU.GT.0 ) $ CALL SGERQ2( MU, NU, A, LDA, TAU, WORK, IINFO ) * - WORK( 1 ) = IWS + WORK( 1 ) = SROUNDUP_LWORK(IWS) RETURN * * End of SGERQF diff --git a/lapack-netlib/SRC/sgesvd.f b/lapack-netlib/SRC/sgesvd.f index 83321ffaa..d3fa94582 100644 --- a/lapack-netlib/SRC/sgesvd.f +++ b/lapack-netlib/SRC/sgesvd.f @@ -203,7 +203,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEsing +*> \ingroup gesvd * * ===================================================================== SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, @@ -251,8 +251,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL SLAMCH, SLANGE - EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE + REAL SLAMCH, SLANGE, SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, SQRT @@ -628,7 +628,7 @@ END IF END IF MAXWRK = MAX( MAXWRK, MINWRK ) - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) * IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN INFO = -13 @@ -3493,7 +3493,7 @@ * * Return optimal workspace in WORK(1) * - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) * RETURN * diff --git a/lapack-netlib/SRC/sgesvdx.f b/lapack-netlib/SRC/sgesvdx.f index b6495dbd4..8b55b9b2e 100644 --- a/lapack-netlib/SRC/sgesvdx.f +++ b/lapack-netlib/SRC/sgesvdx.f @@ -254,7 +254,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEsing +*> \ingroup gesvdx * * ===================================================================== SUBROUTINE SGESVDX( JOBU, JOBVT, RANGE, M, N, A, LDA, VL, VU, @@ -301,8 +301,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL SLAMCH, SLANGE - EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE + REAL SLAMCH, SLANGE, SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, SQRT @@ -456,7 +456,7 @@ END IF END IF MAXWRK = MAX( MAXWRK, MINWRK ) - WORK( 1 ) = REAL( MAXWRK ) + WORK( 1 ) = SROUNDUP_LWORK( MAXWRK ) * IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN INFO = -19 @@ -822,7 +822,7 @@ * * Return optimal workspace in WORK(1) * - WORK( 1 ) = REAL( MAXWRK ) + WORK( 1 ) = SROUNDUP_LWORK( MAXWRK ) * RETURN * diff --git a/lapack-netlib/SRC/sgetri.f b/lapack-netlib/SRC/sgetri.f index 749ede9a7..fe71bc4a5 100644 --- a/lapack-netlib/SRC/sgetri.f +++ b/lapack-netlib/SRC/sgetri.f @@ -107,7 +107,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEcomputational +*> \ingroup getri * * ===================================================================== SUBROUTINE SGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) @@ -137,7 +137,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SGEMM, SGEMV, SSWAP, STRSM, STRTRI, XERBLA @@ -152,7 +153,7 @@ INFO = 0 NB = ILAENV( 1, 'SGETRI', ' ', N, -1, -1, -1 ) LWKOPT = N*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.0 ) THEN INFO = -1 @@ -250,7 +251,7 @@ $ CALL SSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 ) 60 CONTINUE * - WORK( 1 ) = IWS + WORK( 1 ) = SROUNDUP_LWORK(IWS) RETURN * * End of SGETRI diff --git a/lapack-netlib/SRC/sgetsls.f b/lapack-netlib/SRC/sgetsls.f index e6ce705fa..d89c6a4e6 100644 --- a/lapack-netlib/SRC/sgetsls.f +++ b/lapack-netlib/SRC/sgetsls.f @@ -154,7 +154,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEsolve +*> \ingroup getsls * * ===================================================================== SUBROUTINE SGETSLS( TRANS, M, N, NRHS, A, LDA, B, LDB, @@ -188,15 +188,15 @@ * .. * .. External Functions .. LOGICAL LSAME - REAL SLAMCH, SLANGE - EXTERNAL LSAME, SLABAD, SLAMCH, SLANGE + REAL SLAMCH, SLANGE, SROUNDUP_LWORK + EXTERNAL LSAME, SLAMCH, SLANGE, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SGEQR, SGEMQR, SLASCL, SLASET, $ STRTRS, XERBLA, SGELQ, SGEMLQ * .. * .. Intrinsic Functions .. - INTRINSIC REAL, MAX, MIN, INT + INTRINSIC MAX, MIN, INT * .. * .. Executable Statements .. * @@ -262,7 +262,7 @@ INFO = -10 END IF * - WORK( 1 ) = REAL( WSIZEO ) + WORK( 1 ) = SROUNDUP_LWORK( WSIZEO ) * END IF * @@ -271,7 +271,7 @@ RETURN END IF IF( LQUERY ) THEN - IF( LWORK.EQ.-2 ) WORK( 1 ) = REAL( WSIZEM ) + IF( LWORK.EQ.-2 ) WORK( 1 ) = SROUNDUP_LWORK( WSIZEM ) RETURN END IF IF( LWORK.LT.WSIZEO ) THEN @@ -294,7 +294,6 @@ * SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) * * Scale A, B if max element outside range [SMLNUM,BIGNUM] * @@ -482,7 +481,7 @@ END IF * 50 CONTINUE - WORK( 1 ) = REAL( TSZO + LWO ) + WORK( 1 ) = SROUNDUP_LWORK( TSZO + LWO ) RETURN * * End of SGETSLS diff --git a/lapack-netlib/SRC/sgetsqrhrt.f b/lapack-netlib/SRC/sgetsqrhrt.f index f9580da7b..d80ff4da8 100644 --- a/lapack-netlib/SRC/sgetsqrhrt.f +++ b/lapack-netlib/SRC/sgetsqrhrt.f @@ -160,7 +160,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup singleOTHERcomputational +*> \ingroup getsqrhrt * *> \par Contributors: * ================== @@ -200,6 +200,10 @@ INTEGER I, IINFO, J, LW1, LW2, LWT, LDWT, LWORKOPT, $ NB1LOCAL, NB2LOCAL, NUM_ALL_ROW_BLOCKS * .. +* .. External Functions .. + REAL SROUNDUP_LWORK + EXTERNAL SROUNDUP_LWORK +* .. * .. External Subroutines .. EXTERNAL SCOPY, SLATSQR, SORGTSQR_ROW, SORHR_COL, $ XERBLA @@ -277,14 +281,14 @@ CALL XERBLA( 'SGETSQRHRT', -INFO ) RETURN ELSE IF ( LQUERY ) THEN - WORK( 1 ) = REAL( LWORKOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWORKOPT ) RETURN END IF * * Quick return if possible * IF( MIN( M, N ).EQ.0 ) THEN - WORK( 1 ) = REAL( LWORKOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWORKOPT ) RETURN END IF * @@ -341,7 +345,7 @@ END IF END DO * - WORK( 1 ) = REAL( LWORKOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWORKOPT ) RETURN * * End of SGETSQRHRT diff --git a/lapack-netlib/SRC/sgges.f b/lapack-netlib/SRC/sgges.f index 3834aea00..8f42882dd 100644 --- a/lapack-netlib/SRC/sgges.f +++ b/lapack-netlib/SRC/sgges.f @@ -275,7 +275,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEeigen +*> \ingroup gges * * ===================================================================== SUBROUTINE SGGES( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, LDB, @@ -321,15 +321,14 @@ REAL DIF( 2 ) * .. * .. External Subroutines .. - EXTERNAL SGEQRF, SGGBAK, SGGBAL, SGGHRD, SHGEQZ, SLABAD, - $ SLACPY, SLASCL, SLASET, SORGQR, SORMQR, STGSEN, - $ XERBLA + EXTERNAL SGEQRF, SGGBAK, SGGBAL, SGGHRD, SHGEQZ, SLACPY, + $ SLASCL, SLASET, SORGQR, SORMQR, STGSEN * .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL SLAMCH, SLANGE - EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE + REAL SLAMCH, SLANGE, SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, SQRT @@ -406,7 +405,7 @@ MINWRK = 1 MAXWRK = 1 END IF - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) * IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) $ INFO = -19 @@ -431,7 +430,6 @@ EPS = SLAMCH( 'P' ) SAFMIN = SLAMCH( 'S' ) SAFMAX = ONE / SAFMIN - CALL SLABAD( SAFMIN, SAFMAX ) SMLNUM = SQRT( SAFMIN ) / EPS BIGNUM = ONE / SMLNUM * @@ -668,7 +666,7 @@ * 40 CONTINUE * - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) * RETURN * diff --git a/lapack-netlib/SRC/sgges3.f b/lapack-netlib/SRC/sgges3.f index b27704ff5..e35d4955a 100644 --- a/lapack-netlib/SRC/sgges3.f +++ b/lapack-netlib/SRC/sgges3.f @@ -273,7 +273,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEeigen +*> \ingroup gges3 * * ===================================================================== SUBROUTINE SGGES3( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, @@ -318,14 +318,13 @@ REAL DIF( 2 ) * .. * .. External Subroutines .. - EXTERNAL SGEQRF, SGGBAK, SGGBAL, SGGHD3, SLAQZ0, SLABAD, - $ SLACPY, SLASCL, SLASET, SORGQR, SORMQR, STGSEN, - $ XERBLA + EXTERNAL SGEQRF, SGGBAK, SGGBAL, SGGHD3, SLAQZ0, SLACPY, + $ SLASCL, SLASET, SORGQR, SORMQR, STGSEN, XERBLA * .. * .. External Functions .. LOGICAL LSAME - REAL SLAMCH, SLANGE - EXTERNAL LSAME, SLAMCH, SLANGE + REAL SLAMCH, SLANGE, SROUNDUP_LWORK + EXTERNAL LSAME, SLAMCH, SLANGE, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, SQRT @@ -408,7 +407,7 @@ $ IERR ) LWKOPT = MAX( LWKOPT, 2*N+INT( WORK( 1 ) ) ) END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -430,7 +429,6 @@ EPS = SLAMCH( 'P' ) SAFMIN = SLAMCH( 'S' ) SAFMAX = ONE / SAFMIN - CALL SLABAD( SAFMIN, SAFMAX ) SMLNUM = SQRT( SAFMIN ) / EPS BIGNUM = ONE / SMLNUM * @@ -659,7 +657,7 @@ * 40 CONTINUE * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/sggesx.f b/lapack-netlib/SRC/sggesx.f index a6c0443ba..e5a14fc19 100644 --- a/lapack-netlib/SRC/sggesx.f +++ b/lapack-netlib/SRC/sggesx.f @@ -337,7 +337,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEeigen +*> \ingroup ggesx * *> \par Further Details: * ===================== @@ -405,15 +405,14 @@ REAL DIF( 2 ) * .. * .. External Subroutines .. - EXTERNAL SGEQRF, SGGBAK, SGGBAL, SGGHRD, SHGEQZ, SLABAD, - $ SLACPY, SLASCL, SLASET, SORGQR, SORMQR, STGSEN, - $ XERBLA + EXTERNAL SGEQRF, SGGBAK, SGGBAL, SGGHRD, SHGEQZ, SLACPY, + $ SLASCL, SLASET, SORGQR, SORMQR, STGSEN, XERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL SLAMCH, SLANGE - EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE + REAL SLAMCH, SLANGE, SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, SQRT @@ -510,7 +509,7 @@ MAXWRK = 1 LWRK = 1 END IF - WORK( 1 ) = LWRK + WORK( 1 ) = SROUNDUP_LWORK(LWRK) IF( WANTSN .OR. N.EQ.0 ) THEN LIWMIN = 1 ELSE @@ -544,7 +543,6 @@ EPS = SLAMCH( 'P' ) SAFMIN = SLAMCH( 'S' ) SAFMAX = ONE / SAFMIN - CALL SLABAD( SAFMIN, SAFMAX ) SMLNUM = SQRT( SAFMIN ) / EPS BIGNUM = ONE / SMLNUM * @@ -807,7 +805,7 @@ * 50 CONTINUE * - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) IWORK( 1 ) = LIWMIN * RETURN diff --git a/lapack-netlib/SRC/sggev.f b/lapack-netlib/SRC/sggev.f index 69744b72b..cacad7cac 100644 --- a/lapack-netlib/SRC/sggev.f +++ b/lapack-netlib/SRC/sggev.f @@ -218,7 +218,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEeigen +*> \ingroup ggev * * ===================================================================== SUBROUTINE SGGEV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, ALPHAI, @@ -257,15 +257,14 @@ LOGICAL LDUMMA( 1 ) * .. * .. External Subroutines .. - EXTERNAL SGEQRF, SGGBAK, SGGBAL, SGGHRD, SHGEQZ, SLABAD, - $ SLACPY, SLASCL, SLASET, SORGQR, SORMQR, STGEVC, - $ XERBLA + EXTERNAL SGEQRF, SGGBAK, SGGBAL, SGGHRD, SHGEQZ, SLACPY, + $ SLASCL, SLASET, SORGQR, SORMQR, STGEVC, XERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL SLAMCH, SLANGE - EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE + REAL SLAMCH, SLANGE, SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, SQRT @@ -335,7 +334,7 @@ MAXWRK = MAX( MAXWRK, N*( 7 + $ ILAENV( 1, 'SORGQR', ' ', N, 1, N, -1 ) ) ) END IF - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) * IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) $ INFO = -16 @@ -358,7 +357,6 @@ EPS = SLAMCH( 'P' ) SMLNUM = SLAMCH( 'S' ) BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) SMLNUM = SQRT( SMLNUM ) / EPS BIGNUM = ONE / SMLNUM * @@ -581,7 +579,7 @@ CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) END IF * - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) RETURN * * End of SGGEV diff --git a/lapack-netlib/SRC/sggev3.f b/lapack-netlib/SRC/sggev3.f index 945c3a017..c82d2187f 100644 --- a/lapack-netlib/SRC/sggev3.f +++ b/lapack-netlib/SRC/sggev3.f @@ -217,7 +217,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEeigen +*> \ingroup ggev3 * * ===================================================================== SUBROUTINE SGGEV3( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, @@ -256,14 +256,13 @@ LOGICAL LDUMMA( 1 ) * .. * .. External Subroutines .. - EXTERNAL SGEQRF, SGGBAK, SGGBAL, SGGHD3, SLAQZ0, SLABAD, - $ SLACPY, SLASCL, SLASET, SORGQR, SORMQR, STGEVC, - $ XERBLA + EXTERNAL SGEQRF, SGGBAK, SGGBAL, SGGHD3, SLAQZ0, SLACPY, + $ SLASCL, SLASET, SORGQR, SORMQR, STGEVC * .. * .. External Functions .. LOGICAL LSAME - REAL SLAMCH, SLANGE - EXTERNAL LSAME, SLAMCH, SLANGE + REAL SLAMCH, SLANGE, SROUNDUP_LWORK + EXTERNAL LSAME, SLAMCH, SLANGE, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, SQRT @@ -341,7 +340,7 @@ $ WORK, -1, 0, IERR ) LWKOPT = MAX( LWKOPT, 2*N+INT ( WORK( 1 ) ) ) END IF - WORK( 1 ) = REAL( LWKOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWKOPT ) * END IF * @@ -362,7 +361,6 @@ EPS = SLAMCH( 'P' ) SMLNUM = SLAMCH( 'S' ) BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) SMLNUM = SQRT( SMLNUM ) / EPS BIGNUM = ONE / SMLNUM * @@ -578,7 +576,7 @@ CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) END IF * - WORK( 1 ) = REAL( LWKOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWKOPT ) RETURN * * End of SGGEV3 diff --git a/lapack-netlib/SRC/sggevx.f b/lapack-netlib/SRC/sggevx.f index bb05f499a..63164a021 100644 --- a/lapack-netlib/SRC/sggevx.f +++ b/lapack-netlib/SRC/sggevx.f @@ -352,7 +352,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEeigen +*> \ingroup ggevx * *> \par Further Details: * ===================== @@ -427,15 +427,15 @@ LOGICAL LDUMMA( 1 ) * .. * .. External Subroutines .. - EXTERNAL SGEQRF, SGGBAK, SGGBAL, SGGHRD, SHGEQZ, SLABAD, - $ SLACPY, SLASCL, SLASET, SORGQR, SORMQR, STGEVC, - $ STGSNA, XERBLA + EXTERNAL SGEQRF, SGGBAK, SGGBAL, SGGHRD, SHGEQZ, SLACPY, + $ SLASCL, SLASET, SORGQR, SORMQR, STGEVC, STGSNA, + $ XERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL SLAMCH, SLANGE - EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE + REAL SLAMCH, SLANGE, SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, SQRT @@ -532,7 +532,7 @@ $ N*ILAENV( 1, 'SORGQR', ' ', N, 1, N, 0 ) ) END IF END IF - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) * IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN INFO = -26 @@ -557,7 +557,6 @@ EPS = SLAMCH( 'P' ) SMLNUM = SLAMCH( 'S' ) BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) SMLNUM = SQRT( SMLNUM ) / EPS BIGNUM = ONE / SMLNUM * @@ -855,7 +854,7 @@ CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) END IF * - WORK( 1 ) = MAXWRK + WORK( 1 ) = SROUNDUP_LWORK(MAXWRK) RETURN * * End of SGGEVX diff --git a/lapack-netlib/SRC/sggglm.f b/lapack-netlib/SRC/sggglm.f index 56b4dba52..37094e4f2 100644 --- a/lapack-netlib/SRC/sggglm.f +++ b/lapack-netlib/SRC/sggglm.f @@ -177,7 +177,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHEReigen +*> \ingroup ggglm * * ===================================================================== SUBROUTINE SGGGLM( N, M, P, A, LDA, B, LDB, D, X, Y, WORK, LWORK, @@ -212,7 +212,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC INT, MAX, MIN @@ -251,7 +252,7 @@ LWKMIN = M + N + P LWKOPT = M + NP + MAX( N, P )*NB END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN INFO = -12 diff --git a/lapack-netlib/SRC/sgghd3.f b/lapack-netlib/SRC/sgghd3.f index 23acf6ec5..9c5858b5a 100644 --- a/lapack-netlib/SRC/sgghd3.f +++ b/lapack-netlib/SRC/sgghd3.f @@ -211,7 +211,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup gghd3 * *> \par Further Details: * ===================== @@ -260,14 +260,15 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL ILAENV, LSAME + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SGGHRD, SLARTG, SLASET, SORM22, SROT, SGEMM, $ SGEMV, STRMV, SLACPY, XERBLA * .. * .. Intrinsic Functions .. - INTRINSIC REAL, MAX + INTRINSIC MAX * .. * .. Executable Statements .. * @@ -276,7 +277,7 @@ INFO = 0 NB = ILAENV( 1, 'SGGHD3', ' ', N, ILO, IHI, -1 ) LWKOPT = MAX( 6*N*NB, 1 ) - WORK( 1 ) = REAL( LWKOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWKOPT ) INITQ = LSAME( COMPQ, 'I' ) WANTQ = INITQ .OR. LSAME( COMPQ, 'V' ) INITZ = LSAME( COMPZ, 'I' ) @@ -885,7 +886,7 @@ IF ( JCOL.LT.IHI ) $ CALL SGGHRD( COMPQ2, COMPZ2, N, JCOL, IHI, A, LDA, B, LDB, Q, $ LDQ, Z, LDZ, IERR ) - WORK( 1 ) = REAL( LWKOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWKOPT ) * RETURN * diff --git a/lapack-netlib/SRC/sgglse.f b/lapack-netlib/SRC/sgglse.f index 59addc3f4..53e3f8e45 100644 --- a/lapack-netlib/SRC/sgglse.f +++ b/lapack-netlib/SRC/sgglse.f @@ -172,7 +172,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERsolve +*> \ingroup gglse * * ===================================================================== SUBROUTINE SGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK, @@ -207,7 +207,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC INT, MAX, MIN @@ -246,7 +247,7 @@ LWKMIN = M + N + P LWKOPT = P + MN + MAX( M, N )*NB END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN INFO = -12 diff --git a/lapack-netlib/SRC/sggqrf.f b/lapack-netlib/SRC/sggqrf.f index 59b498da5..ebb42a899 100644 --- a/lapack-netlib/SRC/sggqrf.f +++ b/lapack-netlib/SRC/sggqrf.f @@ -173,7 +173,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup ggqrf * *> \par Further Details: * ===================== @@ -236,7 +236,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC INT, MAX, MIN @@ -251,7 +252,7 @@ NB3 = ILAENV( 1, 'SORMQR', ' ', N, M, P, -1 ) NB = MAX( NB1, NB2, NB3 ) LWKOPT = MAX( N, M, P )*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.0 ) THEN INFO = -1 @@ -287,7 +288,8 @@ * RQ factorization of N-by-P matrix B: B = T*Z. * CALL SGERQF( N, P, B, LDB, TAUB, WORK, LWORK, INFO ) - WORK( 1 ) = MAX( LOPT, INT( WORK( 1 ) ) ) + LWKOPT = MAX( LOPT, INT( WORK( 1 ) ) ) + WORK( 1 ) = SROUNDUP_LWORK( LWKOPT ) * RETURN * diff --git a/lapack-netlib/SRC/sggrqf.f b/lapack-netlib/SRC/sggrqf.f index 8b7d4786a..2163f1ef8 100644 --- a/lapack-netlib/SRC/sggrqf.f +++ b/lapack-netlib/SRC/sggrqf.f @@ -172,7 +172,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup ggrqf * *> \par Further Details: * ===================== @@ -235,7 +235,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC INT, MAX, MIN @@ -250,7 +251,7 @@ NB3 = ILAENV( 1, 'SORMRQ', ' ', M, N, P, -1 ) NB = MAX( NB1, NB2, NB3 ) LWKOPT = MAX( N, M, P)*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) LQUERY = ( LWORK.EQ.-1 ) IF( M.LT.0 ) THEN INFO = -1 @@ -287,7 +288,8 @@ * QR factorization of P-by-N matrix B: B = Z*T * CALL SGEQRF( P, N, B, LDB, TAUB, WORK, LWORK, INFO ) - WORK( 1 ) = MAX( LOPT, INT( WORK( 1 ) ) ) + LWKOPT = MAX( LOPT, INT( WORK( 1 ) ) ) + WORK( 1 ) = SROUNDUP_LWORK( LWKOPT ) * RETURN * diff --git a/lapack-netlib/SRC/sggsvd3.f b/lapack-netlib/SRC/sggsvd3.f index 9077f2ea8..053fff5de 100644 --- a/lapack-netlib/SRC/sggsvd3.f +++ b/lapack-netlib/SRC/sggsvd3.f @@ -328,7 +328,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEsing +*> \ingroup ggsvd3 * *> \par Contributors: * ================== @@ -372,8 +372,8 @@ * .. * .. External Functions .. LOGICAL LSAME - REAL SLAMCH, SLANGE - EXTERNAL LSAME, SLAMCH, SLANGE + REAL SLAMCH, SLANGE, SROUNDUP_LWORK + EXTERNAL LSAME, SLAMCH, SLANGE, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SCOPY, SGGSVP3, STGSJA, XERBLA @@ -429,7 +429,7 @@ LWKOPT = N + INT( WORK( 1 ) ) LWKOPT = MAX( 2*N, LWKOPT ) LWKOPT = MAX( 1, LWKOPT ) - WORK( 1 ) = REAL( LWKOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWKOPT ) END IF * IF( INFO.NE.0 ) THEN @@ -492,7 +492,7 @@ END IF 20 CONTINUE * - WORK( 1 ) = REAL( LWKOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWKOPT ) RETURN * * End of SGGSVD3 diff --git a/lapack-netlib/SRC/sggsvp3.f b/lapack-netlib/SRC/sggsvp3.f index 4f76b32bc..a463b9064 100644 --- a/lapack-netlib/SRC/sggsvp3.f +++ b/lapack-netlib/SRC/sggsvp3.f @@ -250,7 +250,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup ggsvp3 * *> \par Further Details: * ===================== @@ -300,7 +300,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SGEQP3, SGEQR2, SGERQ2, SLACPY, SLAPMT, @@ -365,7 +366,7 @@ CALL SGEQP3( M, N, A, LDA, IWORK, TAU, WORK, -1, INFO ) LWKOPT = MAX( LWKOPT, INT( WORK ( 1 ) ) ) LWKOPT = MAX( 1, LWKOPT ) - WORK( 1 ) = REAL( LWKOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWKOPT ) END IF * IF( INFO.NE.0 ) THEN @@ -560,7 +561,7 @@ * END IF * - WORK( 1 ) = REAL( LWKOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWKOPT ) RETURN * * End of SGGSVP3 diff --git a/lapack-netlib/SRC/shgeqz.f b/lapack-netlib/SRC/shgeqz.f index 6543f8cb1..9ad64d2bf 100644 --- a/lapack-netlib/SRC/shgeqz.f +++ b/lapack-netlib/SRC/shgeqz.f @@ -282,7 +282,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEcomputational +*> \ingroup hgeqz * *> \par Further Details: * ===================== @@ -346,8 +346,9 @@ * .. * .. External Functions .. LOGICAL LSAME - REAL SLAMCH, SLANHS, SLAPY2, SLAPY3 - EXTERNAL LSAME, SLAMCH, SLANHS, SLAPY2, SLAPY3 + REAL SLAMCH, SLANHS, SLAPY2, SLAPY3, SROUNDUP_LWORK + EXTERNAL LSAME, SLAMCH, SLANHS, SLAPY2, SLAPY3, + $ SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SLAG2, SLARFG, SLARTG, SLASET, SLASV2, SROT, @@ -1364,7 +1365,7 @@ * Exit (other than argument error) -- return optimal workspace size * 420 CONTINUE - WORK( 1 ) = REAL( N ) + WORK( 1 ) = SROUNDUP_LWORK( N ) RETURN * * End of SHGEQZ diff --git a/lapack-netlib/SRC/shseqr.f b/lapack-netlib/SRC/shseqr.f index 3b8d4c4d8..68b9fe6bd 100644 --- a/lapack-netlib/SRC/shseqr.f +++ b/lapack-netlib/SRC/shseqr.f @@ -233,7 +233,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup hseqr * *> \par Contributors: * ================== @@ -358,7 +358,8 @@ * .. External Functions .. INTEGER ILAENV LOGICAL LSAME - EXTERNAL ILAENV, LSAME + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SLACPY, SLAHQR, SLAQR0, SLASET, XERBLA @@ -373,7 +374,7 @@ WANTT = LSAME( JOB, 'S' ) INITZ = LSAME( COMPZ, 'I' ) WANTZ = INITZ .OR. LSAME( COMPZ, 'V' ) - WORK( 1 ) = REAL( MAX( 1, N ) ) + WORK( 1 ) = SROUNDUP_LWORK( MAX( 1, N ) ) LQUERY = LWORK.EQ.-1 * INFO = 0 diff --git a/lapack-netlib/SRC/slaqr2.f b/lapack-netlib/SRC/slaqr2.f index 62c4ef5eb..caf79fd1c 100644 --- a/lapack-netlib/SRC/slaqr2.f +++ b/lapack-netlib/SRC/slaqr2.f @@ -263,7 +263,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERauxiliary +*> \ingroup laqr2 * *> \par Contributors: * ================== @@ -305,11 +305,11 @@ LOGICAL BULGE, SORTED * .. * .. External Functions .. - REAL SLAMCH - EXTERNAL SLAMCH + REAL SLAMCH, SROUNDUP_LWORK + EXTERNAL SLAMCH, SROUNDUP_LWORK * .. * .. External Subroutines .. - EXTERNAL SCOPY, SGEHRD, SGEMM, SLABAD, SLACPY, SLAHQR, + EXTERNAL SCOPY, SGEHRD, SGEMM, SLACPY, SLAHQR, $ SLANV2, SLARF, SLARFG, SLASET, SORMHR, STREXC * .. * .. Intrinsic Functions .. @@ -343,7 +343,7 @@ * ==== Quick return in case of workspace query. ==== * IF( LWORK.EQ.-1 ) THEN - WORK( 1 ) = REAL( LWKOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWKOPT ) RETURN END IF * @@ -362,7 +362,6 @@ * SAFMIN = SLAMCH( 'SAFE MINIMUM' ) SAFMAX = ONE / SAFMIN - CALL SLABAD( SAFMIN, SAFMAX ) ULP = SLAMCH( 'PRECISION' ) SMLNUM = SAFMIN*( REAL( N ) / ULP ) * @@ -674,7 +673,7 @@ * * ==== Return optimal workspace. ==== * - WORK( 1 ) = REAL( LWKOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWKOPT ) * * ==== End of SLAQR2 ==== * diff --git a/lapack-netlib/SRC/slaqr3.f b/lapack-netlib/SRC/slaqr3.f index 519ccd6ed..d3ffb0f96 100644 --- a/lapack-netlib/SRC/slaqr3.f +++ b/lapack-netlib/SRC/slaqr3.f @@ -260,7 +260,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERauxiliary +*> \ingroup laqr3 * *> \par Contributors: * ================== @@ -302,14 +302,13 @@ LOGICAL BULGE, SORTED * .. * .. External Functions .. - REAL SLAMCH + REAL SLAMCH, SROUNDUP_LWORK INTEGER ILAENV - EXTERNAL SLAMCH, ILAENV + EXTERNAL SLAMCH, SROUNDUP_LWORK, ILAENV * .. * .. External Subroutines .. - EXTERNAL SCOPY, SGEHRD, SGEMM, SLABAD, SLACPY, SLAHQR, - $ SLANV2, SLAQR4, SLARF, SLARFG, SLASET, SORMHR, - $ STREXC + EXTERNAL SCOPY, SGEHRD, SGEMM, SLACPY, SLAHQR, SLANV2, + $ SLAQR4, SLARF, SLARFG, SLASET, SORMHR, STREXC * .. * .. Intrinsic Functions .. INTRINSIC ABS, INT, MAX, MIN, REAL, SQRT @@ -348,7 +347,7 @@ * ==== Quick return in case of workspace query. ==== * IF( LWORK.EQ.-1 ) THEN - WORK( 1 ) = REAL( LWKOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWKOPT ) RETURN END IF * @@ -367,7 +366,6 @@ * SAFMIN = SLAMCH( 'SAFE MINIMUM' ) SAFMAX = ONE / SAFMIN - CALL SLABAD( SAFMIN, SAFMAX ) ULP = SLAMCH( 'PRECISION' ) SMLNUM = SAFMIN*( REAL( N ) / ULP ) * @@ -685,7 +683,7 @@ * * ==== Return optimal workspace. ==== * - WORK( 1 ) = REAL( LWKOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWKOPT ) * * ==== End of SLAQR3 ==== * diff --git a/lapack-netlib/SRC/slaqr4.f b/lapack-netlib/SRC/slaqr4.f index 1f0a51c85..d6721df97 100644 --- a/lapack-netlib/SRC/slaqr4.f +++ b/lapack-netlib/SRC/slaqr4.f @@ -239,7 +239,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERauxiliary +*> \ingroup laqr4 * *> \par Contributors: * ================== @@ -316,7 +316,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Local Arrays .. REAL ZDUM( 1, 1 ) @@ -325,7 +326,7 @@ EXTERNAL SLACPY, SLAHQR, SLANV2, SLAQR2, SLAQR5 * .. * .. Intrinsic Functions .. - INTRINSIC ABS, INT, MAX, MIN, MOD, REAL + INTRINSIC ABS, INT, MAX, MIN, MOD * .. * .. Executable Statements .. INFO = 0 @@ -401,7 +402,7 @@ * ==== Quick return in case of workspace query. ==== * IF( LWORK.EQ.-1 ) THEN - WORK( 1 ) = REAL( LWKOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWKOPT ) RETURN END IF * @@ -732,7 +733,7 @@ * * ==== Return the optimal value of LWORK. ==== * - WORK( 1 ) = REAL( LWKOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWKOPT ) * * ==== End of SLAQR4 ==== * diff --git a/lapack-netlib/SRC/slaqz0.f b/lapack-netlib/SRC/slaqz0.f index 8b2d3286e..c128093e4 100644 --- a/lapack-netlib/SRC/slaqz0.f +++ b/lapack-netlib/SRC/slaqz0.f @@ -294,7 +294,7 @@ * *> \date May 2020 * -*> \ingroup doubleGEcomputational +*> \ingroup laqz0 *> * ===================================================================== RECURSIVE SUBROUTINE SLAQZ0( WANTS, WANTQ, WANTZ, N, ILO, IHI, A, @@ -331,7 +331,7 @@ * External Functions EXTERNAL :: XERBLA, SHGEQZ, SLAQZ3, SLAQZ4, SLASET, $ SLARTG, SROT - REAL, EXTERNAL :: SLAMCH, SLANHS + REAL, EXTERNAL :: SLAMCH, SLANHS, SROUNDUP_LWORK LOGICAL, EXTERNAL :: LSAME INTEGER, EXTERNAL :: ILAENV @@ -461,7 +461,7 @@ LWORKREQ = MAX( ITEMP1+2*NW**2, ITEMP2+2*NBR**2 ) IF ( LWORK .EQ.-1 ) THEN - WORK( 1 ) = REAL( LWORKREQ ) + WORK( 1 ) = SROUNDUP_LWORK( LWORKREQ ) RETURN ELSE IF ( LWORK .LT. LWORKREQ ) THEN INFO = -19 diff --git a/lapack-netlib/SRC/slaqz3.f b/lapack-netlib/SRC/slaqz3.f index edb8a6012..979381364 100644 --- a/lapack-netlib/SRC/slaqz3.f +++ b/lapack-netlib/SRC/slaqz3.f @@ -228,7 +228,7 @@ * *> \date May 2020 * -*> \ingroup doubleGEcomputational +*> \ingroup laqz3 *> * ===================================================================== RECURSIVE SUBROUTINE SLAQZ3( ILSCHUR, ILQ, ILZ, N, ILO, IHI, NW, @@ -258,9 +258,9 @@ REAL :: S, SMLNUM, ULP, SAFMIN, SAFMAX, C1, S1, TEMP * External Functions - EXTERNAL :: XERBLA, STGEXC, SLABAD, SLAQZ0, SLACPY, SLASET, + EXTERNAL :: XERBLA, STGEXC, SLAQZ0, SLACPY, SLASET, $ SLAQZ2, SROT, SLARTG, SLAG2, SGEMM - REAL, EXTERNAL :: SLAMCH + REAL, EXTERNAL :: SLAMCH, SROUNDUP_LWORK INFO = 0 @@ -286,7 +286,7 @@ LWORKREQ = MAX( LWORKREQ, N*NW, 2*NW**2+N ) IF ( LWORK .EQ.-1 ) THEN * workspace query, quick return - WORK( 1 ) = LWORKREQ + WORK( 1 ) = SROUNDUP_LWORK(LWORKREQ) RETURN ELSE IF ( LWORK .LT. LWORKREQ ) THEN INFO = -26 @@ -300,7 +300,6 @@ * Get machine constants SAFMIN = SLAMCH( 'SAFE MINIMUM' ) SAFMAX = ONE/SAFMIN - CALL SLABAD( SAFMIN, SAFMAX ) ULP = SLAMCH( 'PRECISION' ) SMLNUM = SAFMIN*( REAL( N )/ULP ) diff --git a/lapack-netlib/SRC/slaqz4.f b/lapack-netlib/SRC/slaqz4.f index 3c307dd47..95b2784c5 100644 --- a/lapack-netlib/SRC/slaqz4.f +++ b/lapack-netlib/SRC/slaqz4.f @@ -204,7 +204,7 @@ * *> \date May 2020 * -*> \ingroup doubleGEcomputational +*> \ingroup laqz4 *> * ===================================================================== SUBROUTINE SLAQZ4( ILSCHUR, ILQ, ILZ, N, ILO, IHI, NSHIFTS, @@ -236,6 +236,7 @@ * External functions EXTERNAL :: XERBLA, SGEMM, SLAQZ1, SLAQZ2, SLASET, SLARTG, SROT, $ SLACPY + REAL, EXTERNAL :: SROUNDUP_LWORK INFO = 0 IF ( NBLOCK_DESIRED .LT. NSHIFTS+1 ) THEN @@ -243,7 +244,7 @@ END IF IF ( LWORK .EQ.-1 ) THEN * workspace query, quick return - WORK( 1 ) = N*NBLOCK_DESIRED + WORK( 1 ) = SROUNDUP_LWORK(N*NBLOCK_DESIRED) RETURN ELSE IF ( LWORK .LT. N*NBLOCK_DESIRED ) THEN INFO = -25 diff --git a/lapack-netlib/SRC/slaswlq.f b/lapack-netlib/SRC/slaswlq.f index 95e0ddcce..685f823a0 100644 --- a/lapack-netlib/SRC/slaswlq.f +++ b/lapack-netlib/SRC/slaswlq.f @@ -159,6 +159,8 @@ *> SIAM J. Sci. Comput, vol. 34, no. 1, 2012 *> \endverbatim *> +*> \ingroup laswlq +*> * ===================================================================== SUBROUTINE SLASWLQ( M, N, MB, NB, A, LDA, T, LDT, WORK, LWORK, $ INFO) @@ -183,7 +185,8 @@ * .. * .. EXTERNAL FUNCTIONS .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. EXTERNAL SUBROUTINES .. EXTERNAL SGELQT, SGEQRT, STPLQT, STPQRT, XERBLA * .. INTRINSIC FUNCTIONS .. @@ -262,7 +265,7 @@ $ WORK, INFO ) END IF * - WORK( 1 ) = M * MB + WORK( 1 ) = SROUNDUP_LWORK(M * MB) RETURN * * End of SLASWLQ diff --git a/lapack-netlib/SRC/sorgbr.f b/lapack-netlib/SRC/sorgbr.f index b1a5c03a2..46f4ab130 100644 --- a/lapack-netlib/SRC/sorgbr.f +++ b/lapack-netlib/SRC/sorgbr.f @@ -150,7 +150,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGBcomputational +*> \ingroup ungbr * * ===================================================================== SUBROUTINE SORGBR( VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) @@ -179,7 +179,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SORGLQ, SORGQR, XERBLA @@ -240,7 +241,7 @@ CALL XERBLA( 'SORGBR', -INFO ) RETURN ELSE IF( LQUERY ) THEN - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN END IF * @@ -326,7 +327,7 @@ END IF END IF END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of SORGBR From 2ce67e2ada4f4d5033f83e860c2857db71eb3965 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 12 Nov 2023 14:42:52 +0100 Subject: [PATCH 121/125] Apply ROUNDUP_LWORK (Reference-LAPACK PR 904) --- lapack-netlib/SRC/sorghr.f | 9 ++- lapack-netlib/SRC/sorglq.f | 9 ++- lapack-netlib/SRC/sorgql.f | 9 ++- lapack-netlib/SRC/sorgqr.f | 9 ++- lapack-netlib/SRC/sorgrq.f | 9 ++- lapack-netlib/SRC/sorgtr.f | 9 ++- lapack-netlib/SRC/sorgtsqr.f | 14 ++-- lapack-netlib/SRC/sorgtsqr_row.f | 14 ++-- lapack-netlib/SRC/sorm22.f | 11 +-- lapack-netlib/SRC/sormbr.f | 9 ++- lapack-netlib/SRC/sormhr.f | 9 ++- lapack-netlib/SRC/sormlq.f | 9 ++- lapack-netlib/SRC/sormql.f | 9 ++- lapack-netlib/SRC/sormqr.f | 9 ++- lapack-netlib/SRC/sormrq.f | 9 ++- lapack-netlib/SRC/sormrz.f | 9 ++- lapack-netlib/SRC/sormtr.f | 9 ++- lapack-netlib/SRC/ssbev_2stage.f | 25 ++++--- lapack-netlib/SRC/ssbevd.f | 10 +-- lapack-netlib/SRC/ssbevd_2stage.f | 21 +++--- lapack-netlib/SRC/ssbevx_2stage.f | 23 +++--- lapack-netlib/SRC/ssbgvd.f | 9 ++- lapack-netlib/SRC/sspevd.f | 10 +-- lapack-netlib/SRC/sspgvd.f | 9 ++- lapack-netlib/SRC/sstedc.f | 10 +-- lapack-netlib/SRC/sstemr.f | 8 +- lapack-netlib/SRC/sstevd.f | 10 +-- lapack-netlib/SRC/sstevr.f | 10 +-- lapack-netlib/SRC/ssyev.f | 10 +-- lapack-netlib/SRC/ssyev_2stage.f | 27 +++---- lapack-netlib/SRC/ssyevd.f | 10 +-- lapack-netlib/SRC/ssyevr.f | 10 +-- lapack-netlib/SRC/ssyevr_2stage.f | 27 +++---- lapack-netlib/SRC/ssyevx.f | 12 +-- lapack-netlib/SRC/ssyevx_2stage.f | 25 ++++--- lapack-netlib/SRC/ssygv.f | 9 ++- lapack-netlib/SRC/ssygv_2stage.f | 19 ++--- lapack-netlib/SRC/ssygvd.f | 9 ++- lapack-netlib/SRC/ssygvx.f | 9 ++- lapack-netlib/SRC/ssysv.f | 9 ++- lapack-netlib/SRC/ssysv_aa.f | 9 ++- lapack-netlib/SRC/ssysv_aa_2stage.f | 7 +- lapack-netlib/SRC/ssysv_rk.f | 9 ++- lapack-netlib/SRC/ssysv_rook.f | 9 ++- lapack-netlib/SRC/ssysvx.f | 10 +-- lapack-netlib/SRC/ssytrd.f | 9 ++- lapack-netlib/SRC/ssytrd_sb2st.F | 107 ++++++++++++++------------- lapack-netlib/SRC/ssytrd_sy2sb.f | 9 ++- lapack-netlib/SRC/ssytrf.f | 9 ++- lapack-netlib/SRC/ssytrf_aa.f | 9 ++- lapack-netlib/SRC/ssytrf_aa_2stage.f | 7 +- lapack-netlib/SRC/ssytrf_rk.f | 9 ++- lapack-netlib/SRC/ssytrf_rook.f | 9 ++- lapack-netlib/SRC/ssytri_3.f | 9 ++- lapack-netlib/SRC/ssytrs_aa.f | 7 +- lapack-netlib/SRC/stgexc.f | 8 +- lapack-netlib/SRC/stgsen.f | 10 +-- lapack-netlib/SRC/stgsna.f | 11 +-- lapack-netlib/SRC/stgsyl.f | 9 ++- lapack-netlib/SRC/strsen.f | 10 +-- lapack-netlib/SRC/stzrzf.f | 9 ++- 61 files changed, 414 insertions(+), 357 deletions(-) diff --git a/lapack-netlib/SRC/sorghr.f b/lapack-netlib/SRC/sorghr.f index f65cd898c..624ede282 100644 --- a/lapack-netlib/SRC/sorghr.f +++ b/lapack-netlib/SRC/sorghr.f @@ -119,7 +119,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup unghr * * ===================================================================== SUBROUTINE SORGHR( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) @@ -150,7 +150,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -177,7 +178,7 @@ IF( INFO.EQ.0 ) THEN NB = ILAENV( 1, 'SORGQR', ' ', NH, NH, NH, -1 ) LWKOPT = MAX( 1, NH )*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -229,7 +230,7 @@ CALL SORGQR( NH, NH, NH, A( ILO+1, ILO+1 ), LDA, TAU( ILO ), $ WORK, LWORK, IINFO ) END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of SORGHR diff --git a/lapack-netlib/SRC/sorglq.f b/lapack-netlib/SRC/sorglq.f index b1d107964..30f6d5d48 100644 --- a/lapack-netlib/SRC/sorglq.f +++ b/lapack-netlib/SRC/sorglq.f @@ -120,7 +120,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup unglq * * ===================================================================== SUBROUTINE SORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) @@ -155,7 +155,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -164,7 +165,7 @@ INFO = 0 NB = ILAENV( 1, 'SORGLQ', ' ', M, N, K, -1 ) LWKOPT = MAX( 1, M )*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) LQUERY = ( LWORK.EQ.-1 ) IF( M.LT.0 ) THEN INFO = -1 @@ -278,7 +279,7 @@ 50 CONTINUE END IF * - WORK( 1 ) = IWS + WORK( 1 ) = SROUNDUP_LWORK(IWS) RETURN * * End of SORGLQ diff --git a/lapack-netlib/SRC/sorgql.f b/lapack-netlib/SRC/sorgql.f index 34ab5edef..f104e64b2 100644 --- a/lapack-netlib/SRC/sorgql.f +++ b/lapack-netlib/SRC/sorgql.f @@ -121,7 +121,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup ungql * * ===================================================================== SUBROUTINE SORGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) @@ -156,7 +156,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -181,7 +182,7 @@ NB = ILAENV( 1, 'SORGQL', ' ', M, N, K, -1 ) LWKOPT = N*NB END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN INFO = -8 @@ -285,7 +286,7 @@ 50 CONTINUE END IF * - WORK( 1 ) = IWS + WORK( 1 ) = SROUNDUP_LWORK(IWS) RETURN * * End of SORGQL diff --git a/lapack-netlib/SRC/sorgqr.f b/lapack-netlib/SRC/sorgqr.f index 056de54d7..a87ea6c65 100644 --- a/lapack-netlib/SRC/sorgqr.f +++ b/lapack-netlib/SRC/sorgqr.f @@ -121,7 +121,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup ungqr * * ===================================================================== SUBROUTINE SORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) @@ -156,7 +156,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -165,7 +166,7 @@ INFO = 0 NB = ILAENV( 1, 'SORGQR', ' ', M, N, K, -1 ) LWKOPT = MAX( 1, N )*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) LQUERY = ( LWORK.EQ.-1 ) IF( M.LT.0 ) THEN INFO = -1 @@ -279,7 +280,7 @@ 50 CONTINUE END IF * - WORK( 1 ) = IWS + WORK( 1 ) = SROUNDUP_LWORK(IWS) RETURN * * End of SORGQR diff --git a/lapack-netlib/SRC/sorgrq.f b/lapack-netlib/SRC/sorgrq.f index d9b6ccbe6..331f20904 100644 --- a/lapack-netlib/SRC/sorgrq.f +++ b/lapack-netlib/SRC/sorgrq.f @@ -121,7 +121,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup ungrq * * ===================================================================== SUBROUTINE SORGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) @@ -156,7 +156,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -181,7 +182,7 @@ NB = ILAENV( 1, 'SORGRQ', ' ', M, N, K, -1 ) LWKOPT = M*NB END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN INFO = -8 @@ -285,7 +286,7 @@ 50 CONTINUE END IF * - WORK( 1 ) = IWS + WORK( 1 ) = SROUNDUP_LWORK(IWS) RETURN * * End of SORGRQ diff --git a/lapack-netlib/SRC/sorgtr.f b/lapack-netlib/SRC/sorgtr.f index 67bde00cb..6a1dc3034 100644 --- a/lapack-netlib/SRC/sorgtr.f +++ b/lapack-netlib/SRC/sorgtr.f @@ -116,7 +116,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup ungtr * * ===================================================================== SUBROUTINE SORGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO ) @@ -146,7 +146,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL ILAENV, LSAME + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SORGQL, SORGQR, XERBLA @@ -178,7 +179,7 @@ NB = ILAENV( 1, 'SORGQR', ' ', N-1, N-1, N-1, -1 ) END IF LWKOPT = MAX( 1, N-1 )*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -244,7 +245,7 @@ $ LWORK, IINFO ) END IF END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of SORGTR diff --git a/lapack-netlib/SRC/sorgtsqr.f b/lapack-netlib/SRC/sorgtsqr.f index 692eba1d9..0be27af77 100644 --- a/lapack-netlib/SRC/sorgtsqr.f +++ b/lapack-netlib/SRC/sorgtsqr.f @@ -157,7 +157,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup singleOTHERcomputational +*> \ingroup ungtsqr * *> \par Contributors: * ================== @@ -196,11 +196,15 @@ LOGICAL LQUERY INTEGER IINFO, LDC, LWORKOPT, LC, LW, NBLOCAL, J * .. +* .. External Functions .. + REAL SROUNDUP_LWORK + EXTERNAL SROUNDUP_LWORK +* .. * .. External Subroutines .. EXTERNAL SCOPY, SLAMTSQR, SLASET, XERBLA * .. * .. Intrinsic Functions .. - INTRINSIC REAL, MAX, MIN + INTRINSIC MAX, MIN * .. * .. Executable Statements .. * @@ -257,14 +261,14 @@ CALL XERBLA( 'SORGTSQR', -INFO ) RETURN ELSE IF ( LQUERY ) THEN - WORK( 1 ) = REAL( LWORKOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWORKOPT ) RETURN END IF * * Quick return if possible * IF( MIN( M, N ).EQ.0 ) THEN - WORK( 1 ) = REAL( LWORKOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWORKOPT ) RETURN END IF * @@ -297,7 +301,7 @@ CALL SCOPY( M, WORK( (J-1)*LDC + 1 ), 1, A( 1, J ), 1 ) END DO * - WORK( 1 ) = REAL( LWORKOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWORKOPT ) RETURN * * End of SORGTSQR diff --git a/lapack-netlib/SRC/sorgtsqr_row.f b/lapack-netlib/SRC/sorgtsqr_row.f index d2a2150cd..5a1e1ff07 100644 --- a/lapack-netlib/SRC/sorgtsqr_row.f +++ b/lapack-netlib/SRC/sorgtsqr_row.f @@ -169,7 +169,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup sigleOTHERcomputational +*> \ingroup ungtsqr_row * *> \par Contributors: * ================== @@ -213,11 +213,15 @@ * .. Local Arrays .. REAL DUMMY( 1, 1 ) * .. +* .. External Functions .. + REAL SROUNDUP_LWORK + EXTERNAL SROUNDUP_LWORK +* .. * .. External Subroutines .. EXTERNAL SLARFB_GETT, SLASET, XERBLA * .. * .. Intrinsic Functions .. - INTRINSIC REAL, MAX, MIN + INTRINSIC MAX, MIN * .. * .. Executable Statements .. * @@ -255,14 +259,14 @@ CALL XERBLA( 'SORGTSQR_ROW', -INFO ) RETURN ELSE IF ( LQUERY ) THEN - WORK( 1 ) = REAL( LWORKOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWORKOPT ) RETURN END IF * * Quick return if possible * IF( MIN( M, N ).EQ.0 ) THEN - WORK( 1 ) = REAL( LWORKOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWORKOPT ) RETURN END IF * @@ -371,7 +375,7 @@ * END DO * - WORK( 1 ) = REAL( LWORKOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWORKOPT ) RETURN * * End of SORGTSQR_ROW diff --git a/lapack-netlib/SRC/sorm22.f b/lapack-netlib/SRC/sorm22.f index 15096870a..886adb2cf 100644 --- a/lapack-netlib/SRC/sorm22.f +++ b/lapack-netlib/SRC/sorm22.f @@ -155,7 +155,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup unm22 * * ===================================================================== SUBROUTINE SORM22( SIDE, TRANS, M, N, N1, N2, Q, LDQ, C, LDC, @@ -187,13 +187,14 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SGEMM, SLACPY, STRMM, XERBLA * .. * .. Intrinsic Functions .. - INTRINSIC REAL, MAX, MIN + INTRINSIC MAX, MIN * .. * .. Executable Statements .. * @@ -237,7 +238,7 @@ * IF( INFO.EQ.0 ) THEN LWKOPT = M*N - WORK( 1 ) = REAL( LWKOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWKOPT ) END IF * IF( INFO.NE.0 ) THEN @@ -430,7 +431,7 @@ END IF END IF * - WORK( 1 ) = REAL( LWKOPT ) + WORK( 1 ) = SROUNDUP_LWORK( LWKOPT ) RETURN * * End of SORM22 diff --git a/lapack-netlib/SRC/sormbr.f b/lapack-netlib/SRC/sormbr.f index efe5be41a..e2dccc363 100644 --- a/lapack-netlib/SRC/sormbr.f +++ b/lapack-netlib/SRC/sormbr.f @@ -188,7 +188,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup unmbr * * ===================================================================== SUBROUTINE SORMBR( VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, @@ -217,7 +217,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL ILAENV, LSAME + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SORMLQ, SORMQR, XERBLA @@ -285,7 +286,7 @@ END IF END IF LWKOPT = NW*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -363,7 +364,7 @@ $ TAU, C( I1, I2 ), LDC, WORK, LWORK, IINFO ) END IF END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of SORMBR diff --git a/lapack-netlib/SRC/sormhr.f b/lapack-netlib/SRC/sormhr.f index 2d2053af4..e033feb38 100644 --- a/lapack-netlib/SRC/sormhr.f +++ b/lapack-netlib/SRC/sormhr.f @@ -171,7 +171,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup unmhr * * ===================================================================== SUBROUTINE SORMHR( SIDE, TRANS, M, N, ILO, IHI, A, LDA, TAU, C, @@ -199,7 +199,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL ILAENV, LSAME + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SORMQR, XERBLA @@ -253,7 +254,7 @@ NB = ILAENV( 1, 'SORMQR', SIDE // TRANS, M, NH, NH, -1 ) END IF LWKOPT = NW*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -285,7 +286,7 @@ CALL SORMQR( SIDE, TRANS, MI, NI, NH, A( ILO+1, ILO ), LDA, $ TAU( ILO ), C( I1, I2 ), LDC, WORK, LWORK, IINFO ) * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of SORMHR diff --git a/lapack-netlib/SRC/sormlq.f b/lapack-netlib/SRC/sormlq.f index ee996e560..1a32568b6 100644 --- a/lapack-netlib/SRC/sormlq.f +++ b/lapack-netlib/SRC/sormlq.f @@ -160,7 +160,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup unmlq * * ===================================================================== SUBROUTINE SORMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, @@ -195,7 +195,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SLARFB, SLARFT, SORML2, XERBLA @@ -246,7 +247,7 @@ NB = MIN( NBMAX, ILAENV( 1, 'SORMLQ', SIDE // TRANS, M, N, K, $ -1 ) ) LWKOPT = NW*NB + TSIZE - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -338,7 +339,7 @@ $ C( IC, JC ), LDC, WORK, LDWORK ) 10 CONTINUE END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of SORMLQ diff --git a/lapack-netlib/SRC/sormql.f b/lapack-netlib/SRC/sormql.f index 72a8d22ee..9564d4141 100644 --- a/lapack-netlib/SRC/sormql.f +++ b/lapack-netlib/SRC/sormql.f @@ -160,7 +160,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup unmql * * ===================================================================== SUBROUTINE SORMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, @@ -194,7 +194,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SLARFB, SLARFT, SORM2L, XERBLA @@ -249,7 +250,7 @@ $ K, -1 ) ) LWKOPT = NW*NB + TSIZE END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -330,7 +331,7 @@ $ WORK, LDWORK ) 10 CONTINUE END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of SORMQL diff --git a/lapack-netlib/SRC/sormqr.f b/lapack-netlib/SRC/sormqr.f index 5d4256f09..adb1203df 100644 --- a/lapack-netlib/SRC/sormqr.f +++ b/lapack-netlib/SRC/sormqr.f @@ -160,7 +160,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup unmqr * * ===================================================================== SUBROUTINE SORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, @@ -194,7 +194,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SLARFB, SLARFT, SORM2R, XERBLA @@ -245,7 +246,7 @@ NB = MIN( NBMAX, ILAENV( 1, 'SORMQR', SIDE // TRANS, M, N, K, $ -1 ) ) LWKOPT = NW*NB + TSIZE - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -331,7 +332,7 @@ $ C( IC, JC ), LDC, WORK, LDWORK ) 10 CONTINUE END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of SORMQR diff --git a/lapack-netlib/SRC/sormrq.f b/lapack-netlib/SRC/sormrq.f index 62fcdacdb..f091f0507 100644 --- a/lapack-netlib/SRC/sormrq.f +++ b/lapack-netlib/SRC/sormrq.f @@ -160,7 +160,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup unmrq * * ===================================================================== SUBROUTINE SORMRQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, @@ -195,7 +195,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SLARFB, SLARFT, SORMR2, XERBLA @@ -250,7 +251,7 @@ $ K, -1 ) ) LWKOPT = NW*NB + TSIZE END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -337,7 +338,7 @@ $ WORK, LDWORK ) 10 CONTINUE END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of SORMRQ diff --git a/lapack-netlib/SRC/sormrz.f b/lapack-netlib/SRC/sormrz.f index cdadd62b5..b037a984b 100644 --- a/lapack-netlib/SRC/sormrz.f +++ b/lapack-netlib/SRC/sormrz.f @@ -168,7 +168,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup unmrz * *> \par Contributors: * ================== @@ -213,7 +213,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SLARZB, SLARZT, SORMR3, XERBLA @@ -271,7 +272,7 @@ $ K, -1 ) ) LWKOPT = NW*NB + TSIZE END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -367,7 +368,7 @@ * END IF * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/sormtr.f b/lapack-netlib/SRC/sormtr.f index 3ba749fee..1bc87768f 100644 --- a/lapack-netlib/SRC/sormtr.f +++ b/lapack-netlib/SRC/sormtr.f @@ -164,7 +164,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup unmtr * * ===================================================================== SUBROUTINE SORMTR( SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC, @@ -192,7 +192,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL ILAENV, LSAME + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SORMQL, SORMQR, XERBLA @@ -256,7 +257,7 @@ END IF END IF LWKOPT = NW*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -301,7 +302,7 @@ CALL SORMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU, $ C( I1, I2 ), LDC, WORK, LWORK, IINFO ) END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of SORMTR diff --git a/lapack-netlib/SRC/ssbev_2stage.f b/lapack-netlib/SRC/ssbev_2stage.f index 5752c1ecc..71ace4e27 100644 --- a/lapack-netlib/SRC/ssbev_2stage.f +++ b/lapack-netlib/SRC/ssbev_2stage.f @@ -131,7 +131,7 @@ *> \verbatim *> LWORK is INTEGER *> The length of the array WORK. LWORK >= 1, when N <= 1; -*> otherwise +*> otherwise *> If JOBZ = 'N' and N > 1, LWORK must be queried. *> LWORK = MAX(1, dimension) where *> dimension = (2KD+1)*N + KD*NTHREADS + N @@ -164,7 +164,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHEReigen +*> \ingroup hbev_2stage * *> \par Further Details: * ===================== @@ -182,7 +182,7 @@ *> http://doi.acm.org/10.1145/2063384.2063394 *> *> A. Haidar, J. Kurzak, P. Luszczek, 2013. -*> An improved parallel singular value algorithm and its implementation +*> An improved parallel singular value algorithm and its implementation *> for multicore hardware, In Proceedings of 2013 International Conference *> for High Performance Computing, Networking, Storage and Analysis (SC '13). *> Denver, Colorado, USA, 2013. @@ -190,11 +190,11 @@ *> http://doi.acm.org/10.1145/2503210.2503292 *> *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. -*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure *> calculations based on fine-grained memory aware tasks. *> International Journal of High Performance Computing Applications. *> Volume 28 Issue 2, Pages 196-209, May 2014. -*> http://hpc.sagepub.com/content/28/2/196 +*> http://hpc.sagepub.com/content/28/2/196 *> *> \endverbatim * @@ -232,12 +232,13 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV2STAGE - REAL SLAMCH, SLANSB - EXTERNAL LSAME, SLAMCH, SLANSB, ILAENV2STAGE + REAL SLAMCH, SLANSB, SROUNDUP_LWORK + EXTERNAL LSAME, SLAMCH, SLANSB, ILAENV2STAGE, + $ SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SLASCL, SSCAL, SSTEQR, SSTERF, XERBLA, - $ SSYTRD_SB2ST + $ SSYTRD_SB2ST * .. * .. Intrinsic Functions .. INTRINSIC SQRT @@ -268,7 +269,7 @@ IF( INFO.EQ.0 ) THEN IF( N.LE.1 ) THEN LWMIN = 1 - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) ELSE IB = ILAENV2STAGE( 2, 'SSYTRD_SB2ST', JOBZ, $ N, KD, -1, -1 ) @@ -277,7 +278,7 @@ LWTRD = ILAENV2STAGE( 4, 'SSYTRD_SB2ST', JOBZ, $ N, KD, IB, -1 ) LWMIN = N + LHTRD + LWTRD - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) ENDIF * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) @@ -343,7 +344,7 @@ LLWORK = LWORK - INDWRK + 1 * CALL SSYTRD_SB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, W, - $ WORK( INDE ), WORK( INDHOUS ), LHTRD, + $ WORK( INDE ), WORK( INDHOUS ), LHTRD, $ WORK( INDWRK ), LLWORK, IINFO ) * * For eigenvalues only, call SSTERF. For eigenvectors, call SSTEQR. @@ -368,7 +369,7 @@ * * Set WORK(1) to optimal workspace size. * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) * RETURN * diff --git a/lapack-netlib/SRC/ssbevd.f b/lapack-netlib/SRC/ssbevd.f index e87f9a030..e4118dbed 100644 --- a/lapack-netlib/SRC/ssbevd.f +++ b/lapack-netlib/SRC/ssbevd.f @@ -179,7 +179,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHEReigen +*> \ingroup hbevd * * ===================================================================== SUBROUTINE SSBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, @@ -213,8 +213,8 @@ * .. * .. External Functions .. LOGICAL LSAME - REAL SLAMCH, SLANSB - EXTERNAL LSAME, SLAMCH, SLANSB + REAL SLAMCH, SLANSB, SROUNDUP_LWORK + EXTERNAL LSAME, SLAMCH, SLANSB, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SGEMM, SLACPY, SLASCL, SSBTRD, SSCAL, SSTEDC, @@ -259,7 +259,7 @@ END IF * IF( INFO.EQ.0 ) THEN - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN @@ -342,7 +342,7 @@ IF( ISCALE.EQ.1 ) $ CALL SSCAL( N, ONE / SIGMA, W, 1 ) * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN RETURN * diff --git a/lapack-netlib/SRC/ssbevd_2stage.f b/lapack-netlib/SRC/ssbevd_2stage.f index 014bade48..de3f1c010 100644 --- a/lapack-netlib/SRC/ssbevd_2stage.f +++ b/lapack-netlib/SRC/ssbevd_2stage.f @@ -134,7 +134,7 @@ *> \verbatim *> LWORK is INTEGER *> The length of the array WORK. LWORK >= 1, when N <= 1; -*> otherwise +*> otherwise *> If JOBZ = 'N' and N > 1, LWORK must be queried. *> LWORK = MAX(1, dimension) where *> dimension = (2KD+1)*N + KD*NTHREADS + N @@ -188,7 +188,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHEReigen +*> \ingroup hbevd_2stage * *> \par Further Details: * ===================== @@ -206,7 +206,7 @@ *> http://doi.acm.org/10.1145/2063384.2063394 *> *> A. Haidar, J. Kurzak, P. Luszczek, 2013. -*> An improved parallel singular value algorithm and its implementation +*> An improved parallel singular value algorithm and its implementation *> for multicore hardware, In Proceedings of 2013 International Conference *> for High Performance Computing, Networking, Storage and Analysis (SC '13). *> Denver, Colorado, USA, 2013. @@ -214,11 +214,11 @@ *> http://doi.acm.org/10.1145/2503210.2503292 *> *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. -*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure *> calculations based on fine-grained memory aware tasks. *> International Journal of High Performance Computing Applications. *> Volume 28 Issue 2, Pages 196-209, May 2014. -*> http://hpc.sagepub.com/content/28/2/196 +*> http://hpc.sagepub.com/content/28/2/196 *> *> \endverbatim * @@ -258,8 +258,9 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV2STAGE - REAL SLAMCH, SLANSB - EXTERNAL LSAME, SLAMCH, SLANSB, ILAENV2STAGE + REAL SLAMCH, SLANSB, SROUNDUP_LWORK + EXTERNAL LSAME, SLAMCH, SLANSB, ILAENV2STAGE, + $ SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SGEMM, SLACPY, SLASCL, SSCAL, SSTEDC, @@ -307,7 +308,7 @@ END IF * IF( INFO.EQ.0 ) THEN - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN @@ -374,7 +375,7 @@ LLWRK2 = LWORK - INDWK2 + 1 * CALL SSYTRD_SB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, W, - $ WORK( INDE ), WORK( INDHOUS ), LHTRD, + $ WORK( INDE ), WORK( INDHOUS ), LHTRD, $ WORK( INDWRK ), LLWORK, IINFO ) * * For eigenvalues only, call SSTERF. For eigenvectors, call SSTEDC. @@ -394,7 +395,7 @@ IF( ISCALE.EQ.1 ) $ CALL SSCAL( N, ONE / SIGMA, W, 1 ) * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN RETURN * diff --git a/lapack-netlib/SRC/ssbevx_2stage.f b/lapack-netlib/SRC/ssbevx_2stage.f index 224b676b7..d25d3639a 100644 --- a/lapack-netlib/SRC/ssbevx_2stage.f +++ b/lapack-netlib/SRC/ssbevx_2stage.f @@ -235,7 +235,7 @@ *> \verbatim *> LWORK is INTEGER *> The length of the array WORK. LWORK >= 1, when N <= 1; -*> otherwise +*> otherwise *> If JOBZ = 'N' and N > 1, LWORK must be queried. *> LWORK = MAX(1, 7*N, dimension) where *> dimension = (2KD+1)*N + KD*NTHREADS + 2*N @@ -281,7 +281,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHEReigen +*> \ingroup hbevx_2stage * *> \par Further Details: * ===================== @@ -299,7 +299,7 @@ *> http://doi.acm.org/10.1145/2063384.2063394 *> *> A. Haidar, J. Kurzak, P. Luszczek, 2013. -*> An improved parallel singular value algorithm and its implementation +*> An improved parallel singular value algorithm and its implementation *> for multicore hardware, In Proceedings of 2013 International Conference *> for High Performance Computing, Networking, Storage and Analysis (SC '13). *> Denver, Colorado, USA, 2013. @@ -307,11 +307,11 @@ *> http://doi.acm.org/10.1145/2503210.2503292 *> *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. -*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure *> calculations based on fine-grained memory aware tasks. *> International Journal of High Performance Computing Applications. *> Volume 28 Issue 2, Pages 196-209, May 2014. -*> http://hpc.sagepub.com/content/28/2/196 +*> http://hpc.sagepub.com/content/28/2/196 *> *> \endverbatim * @@ -357,8 +357,9 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV2STAGE - REAL SLAMCH, SLANSB - EXTERNAL LSAME, SLAMCH, SLANSB, ILAENV2STAGE + REAL SLAMCH, SLANSB, SROUNDUP_LWORK + EXTERNAL LSAME, SLAMCH, SLANSB, ILAENV2STAGE, + $ SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SCOPY, SGEMV, SLACPY, SLASCL, SSCAL, @@ -414,7 +415,7 @@ IF( INFO.EQ.0 ) THEN IF( N.LE.1 ) THEN LWMIN = 1 - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) ELSE IB = ILAENV2STAGE( 2, 'SSYTRD_SB2ST', JOBZ, $ N, KD, -1, -1 ) @@ -423,7 +424,7 @@ LWTRD = ILAENV2STAGE( 4, 'SSYTRD_SB2ST', JOBZ, $ N, KD, IB, -1 ) LWMIN = 2*N + LHTRD + LWTRD - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) ENDIF * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) @@ -513,7 +514,7 @@ LLWORK = LWORK - INDWRK + 1 * CALL SSYTRD_SB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, WORK( INDD ), - $ WORK( INDE ), WORK( INDHOUS ), LHTRD, + $ WORK( INDE ), WORK( INDHOUS ), LHTRD, $ WORK( INDWRK ), LLWORK, IINFO ) * * If all eigenvalues are desired and ABSTOL is less than or equal @@ -624,7 +625,7 @@ * * Set WORK(1) to optimal workspace size. * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) * RETURN * diff --git a/lapack-netlib/SRC/ssbgvd.f b/lapack-netlib/SRC/ssbgvd.f index 7c21ee455..f872e5464 100644 --- a/lapack-netlib/SRC/ssbgvd.f +++ b/lapack-netlib/SRC/ssbgvd.f @@ -208,7 +208,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHEReigen +*> \ingroup hbgvd * *> \par Contributors: * ================== @@ -247,7 +247,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SGEMM, SLACPY, SPBSTF, SSBGST, SSBTRD, SSTEDC, @@ -292,7 +293,7 @@ END IF * IF( INFO.EQ.0 ) THEN - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN @@ -353,7 +354,7 @@ CALL SLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) END IF * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN * RETURN diff --git a/lapack-netlib/SRC/sspevd.f b/lapack-netlib/SRC/sspevd.f index 0872e95ac..1aae48d1d 100644 --- a/lapack-netlib/SRC/sspevd.f +++ b/lapack-netlib/SRC/sspevd.f @@ -164,7 +164,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHEReigen +*> \ingroup hpevd * * ===================================================================== SUBROUTINE SSPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, @@ -198,8 +198,8 @@ * .. * .. External Functions .. LOGICAL LSAME - REAL SLAMCH, SLANSP - EXTERNAL LSAME, SLAMCH, SLANSP + REAL SLAMCH, SLANSP, SROUNDUP_LWORK + EXTERNAL LSAME, SLAMCH, SLANSP, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SOPMTR, SSCAL, SSPTRD, SSTEDC, SSTERF, XERBLA @@ -240,7 +240,7 @@ END IF END IF IWORK( 1 ) = LIWMIN - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -9 @@ -319,7 +319,7 @@ IF( ISCALE.EQ.1 ) $ CALL SSCAL( N, ONE / SIGMA, W, 1 ) * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN RETURN * diff --git a/lapack-netlib/SRC/sspgvd.f b/lapack-netlib/SRC/sspgvd.f index 1a88365f2..c1e14594b 100644 --- a/lapack-netlib/SRC/sspgvd.f +++ b/lapack-netlib/SRC/sspgvd.f @@ -191,7 +191,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHEReigen +*> \ingroup hpgvd * *> \par Contributors: * ================== @@ -225,7 +225,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SPPTRF, SSPEVD, SSPGST, STPMV, STPSV, XERBLA @@ -267,7 +268,7 @@ LWMIN = 2*N END IF END IF - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -11 @@ -345,7 +346,7 @@ END IF END IF * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN * RETURN diff --git a/lapack-netlib/SRC/sstedc.f b/lapack-netlib/SRC/sstedc.f index 61e3c2fda..0e1cb4258 100644 --- a/lapack-netlib/SRC/sstedc.f +++ b/lapack-netlib/SRC/sstedc.f @@ -167,7 +167,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup auxOTHERcomputational +*> \ingroup stedc * *> \par Contributors: * ================== @@ -208,8 +208,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL SLAMCH, SLANST - EXTERNAL ILAENV, LSAME, SLAMCH, SLANST + REAL SLAMCH, SLANST, SROUNDUP_LWORK + EXTERNAL ILAENV, LSAME, SLAMCH, SLANST, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SGEMM, SLACPY, SLAED0, SLASCL, SLASET, SLASRT, @@ -268,7 +268,7 @@ LIWMIN = 3 + 5*N END IF END IF - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN * IF( LWORK.LT.LWMIN .AND. .NOT. LQUERY ) THEN @@ -463,7 +463,7 @@ END IF * 50 CONTINUE - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN * RETURN diff --git a/lapack-netlib/SRC/sstemr.f b/lapack-netlib/SRC/sstemr.f index 2ed697b69..62cfa3d4d 100644 --- a/lapack-netlib/SRC/sstemr.f +++ b/lapack-netlib/SRC/sstemr.f @@ -359,8 +359,8 @@ * .. * .. External Functions .. LOGICAL LSAME - REAL SLAMCH, SLANST - EXTERNAL LSAME, SLAMCH, SLANST + REAL SLAMCH, SLANST, SROUNDUP_LWORK + EXTERNAL LSAME, SLAMCH, SLANST, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SCOPY, SLAE2, SLAEV2, SLARRC, SLARRE, SLARRJ, @@ -443,7 +443,7 @@ RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) * IF( INFO.EQ.0 ) THEN - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN * IF( WANTZ .AND. ALLEIG ) THEN @@ -782,7 +782,7 @@ ENDIF * * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN RETURN * diff --git a/lapack-netlib/SRC/sstevd.f b/lapack-netlib/SRC/sstevd.f index 218af8c76..4fc2a6311 100644 --- a/lapack-netlib/SRC/sstevd.f +++ b/lapack-netlib/SRC/sstevd.f @@ -149,7 +149,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHEReigen +*> \ingroup stevd * * ===================================================================== SUBROUTINE SSTEVD( JOBZ, N, D, E, Z, LDZ, WORK, LWORK, IWORK, @@ -182,8 +182,8 @@ * .. * .. External Functions .. LOGICAL LSAME - REAL SLAMCH, SLANST - EXTERNAL LSAME, SLAMCH, SLANST + REAL SLAMCH, SLANST, SROUNDUP_LWORK + EXTERNAL LSAME, SLAMCH, SLANST, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SSCAL, SSTEDC, SSTERF, XERBLA @@ -215,7 +215,7 @@ END IF * IF( INFO.EQ.0 ) THEN - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN @@ -283,7 +283,7 @@ IF( ISCALE.EQ.1 ) $ CALL SSCAL( N, ONE / SIGMA, D, 1 ) * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN * RETURN diff --git a/lapack-netlib/SRC/sstevr.f b/lapack-netlib/SRC/sstevr.f index 2ab63eb3e..42f49b11b 100644 --- a/lapack-netlib/SRC/sstevr.f +++ b/lapack-netlib/SRC/sstevr.f @@ -287,7 +287,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHEReigen +*> \ingroup stevr * *> \par Contributors: * ================== @@ -336,8 +336,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL SLAMCH, SLANST - EXTERNAL LSAME, ILAENV, SLAMCH, SLANST + REAL SLAMCH, SLANST, SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SLAMCH, SLANST, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SCOPY, SSCAL, SSTEBZ, SSTEMR, SSTEIN, SSTERF, @@ -389,7 +389,7 @@ END IF * IF( INFO.EQ.0 ) THEN - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN @@ -570,7 +570,7 @@ * IF (wantz .and. INDEIG ) Z( 1,1) = Z(1,1) / 1.002 + .002 * * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN RETURN * diff --git a/lapack-netlib/SRC/ssyev.f b/lapack-netlib/SRC/ssyev.f index 03ed326b8..638445f04 100644 --- a/lapack-netlib/SRC/ssyev.f +++ b/lapack-netlib/SRC/ssyev.f @@ -125,7 +125,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realSYeigen +*> \ingroup heev * * ===================================================================== SUBROUTINE SSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO ) @@ -158,8 +158,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL SLAMCH, SLANSY - EXTERNAL ILAENV, LSAME, SLAMCH, SLANSY + REAL SLAMCH, SLANSY, SROUNDUP_LWORK + EXTERNAL ILAENV, LSAME, SLAMCH, SLANSY, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SLASCL, SORGTR, SSCAL, SSTEQR, SSTERF, SSYTRD, @@ -190,7 +190,7 @@ IF( INFO.EQ.0 ) THEN NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 ) LWKOPT = MAX( 1, ( NB+2 )*N ) - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * IF( LWORK.LT.MAX( 1, 3*N-1 ) .AND. .NOT.LQUERY ) $ INFO = -8 @@ -274,7 +274,7 @@ * * Set WORK(1) to optimal workspace size. * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/ssyev_2stage.f b/lapack-netlib/SRC/ssyev_2stage.f index a6fa30cc8..519ee334d 100644 --- a/lapack-netlib/SRC/ssyev_2stage.f +++ b/lapack-netlib/SRC/ssyev_2stage.f @@ -20,7 +20,7 @@ * Definition: * =========== * -* SUBROUTINE SSYEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, +* SUBROUTINE SSYEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, * INFO ) * * IMPLICIT NONE @@ -105,12 +105,12 @@ *> \verbatim *> LWORK is INTEGER *> The length of the array WORK. LWORK >= 1, when N <= 1; -*> otherwise +*> otherwise *> If JOBZ = 'N' and N > 1, LWORK must be queried. *> LWORK = MAX(1, dimension) where *> dimension = max(stage1,stage2) + (KD+1)*N + 2*N -*> = N*KD + N*max(KD+1,FACTOPTNB) -*> + max(2*KD*KD, KD*NTHREADS) +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) *> + (KD+1)*N + 2*N *> where KD is the blocking size of the reduction, *> FACTOPTNB is the blocking used by the QR or LQ @@ -143,7 +143,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realSYeigen +*> \ingroup heev_2stage * *> \par Further Details: * ===================== @@ -161,7 +161,7 @@ *> http://doi.acm.org/10.1145/2063384.2063394 *> *> A. Haidar, J. Kurzak, P. Luszczek, 2013. -*> An improved parallel singular value algorithm and its implementation +*> An improved parallel singular value algorithm and its implementation *> for multicore hardware, In Proceedings of 2013 International Conference *> for High Performance Computing, Networking, Storage and Analysis (SC '13). *> Denver, Colorado, USA, 2013. @@ -169,16 +169,16 @@ *> http://doi.acm.org/10.1145/2503210.2503292 *> *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. -*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure *> calculations based on fine-grained memory aware tasks. *> International Journal of High Performance Computing Applications. *> Volume 28 Issue 2, Pages 196-209, May 2014. -*> http://hpc.sagepub.com/content/28/2/196 +*> http://hpc.sagepub.com/content/28/2/196 *> *> \endverbatim * * ===================================================================== - SUBROUTINE SSYEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, + SUBROUTINE SSYEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, $ INFO ) * IMPLICIT NONE @@ -211,8 +211,9 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV2STAGE - REAL SLAMCH, SLANSY - EXTERNAL LSAME, SLAMCH, SLANSY, ILAENV2STAGE + REAL SLAMCH, SLANSY, SROUNDUP_LWORK + EXTERNAL LSAME, SLAMCH, SLANSY, ILAENV2STAGE, + $ SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SLASCL, SORGTR, SSCAL, SSTEQR, SSTERF, @@ -305,7 +306,7 @@ LLWORK = LWORK - INDWRK + 1 * CALL SSYTRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK( INDE ), - $ WORK( INDTAU ), WORK( INDHOUS ), LHTRD, + $ WORK( INDTAU ), WORK( INDHOUS ), LHTRD, $ WORK( INDWRK ), LLWORK, IINFO ) * * For eigenvalues only, call SSTERF. For eigenvectors, first call @@ -336,7 +337,7 @@ * * Set WORK(1) to optimal workspace size. * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) * RETURN * diff --git a/lapack-netlib/SRC/ssyevd.f b/lapack-netlib/SRC/ssyevd.f index ee0e33384..a5e4638d6 100644 --- a/lapack-netlib/SRC/ssyevd.f +++ b/lapack-netlib/SRC/ssyevd.f @@ -160,7 +160,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realSYeigen +*> \ingroup heevd * *> \par Contributors: * ================== @@ -204,8 +204,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL SLAMCH, SLANSY - EXTERNAL ILAENV, LSAME, SLAMCH, SLANSY + REAL SLAMCH, SLANSY, SROUNDUP_LWORK + EXTERNAL ILAENV, LSAME, SLAMCH, SLANSY, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SLACPY, SLASCL, SORMTR, SSCAL, SSTEDC, SSTERF, @@ -251,7 +251,7 @@ $ N*ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 ) ) LIOPT = LIWMIN END IF - WORK( 1 ) = LOPT + WORK( 1 ) = SROUNDUP_LWORK(LOPT) IWORK( 1 ) = LIOPT * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN @@ -335,7 +335,7 @@ IF( ISCALE.EQ.1 ) $ CALL SSCAL( N, ONE / SIGMA, W, 1 ) * - WORK( 1 ) = LOPT + WORK( 1 ) = SROUNDUP_LWORK(LOPT) IWORK( 1 ) = LIOPT * RETURN diff --git a/lapack-netlib/SRC/ssyevr.f b/lapack-netlib/SRC/ssyevr.f index d8e4ce3ea..47e4d7cbf 100644 --- a/lapack-netlib/SRC/ssyevr.f +++ b/lapack-netlib/SRC/ssyevr.f @@ -317,7 +317,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realSYeigen +*> \ingroup heevr * *> \par Contributors: * ================== @@ -368,8 +368,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL SLAMCH, SLANSY - EXTERNAL LSAME, ILAENV, SLAMCH, SLANSY + REAL SLAMCH, SLANSY, SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SLAMCH, SLANSY, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SCOPY, SORMTR, SSCAL, SSTEBZ, SSTEMR, SSTEIN, @@ -428,7 +428,7 @@ NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 ) NB = MAX( NB, ILAENV( 1, 'SORMTR', UPLO, N, -1, -1, -1 ) ) LWKOPT = MAX( ( NB+1 )*N, LWMIN ) - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) IWORK( 1 ) = LIWMIN * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN @@ -677,7 +677,7 @@ * * Set WORK(1) to optimal workspace size. * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) IWORK( 1 ) = LIWMIN * RETURN diff --git a/lapack-netlib/SRC/ssyevr_2stage.f b/lapack-netlib/SRC/ssyevr_2stage.f index 8ab2844c6..a2d6a6231 100644 --- a/lapack-netlib/SRC/ssyevr_2stage.f +++ b/lapack-netlib/SRC/ssyevr_2stage.f @@ -263,7 +263,7 @@ *> indicating the nonzero elements in Z. The i-th eigenvector *> is nonzero only in elements ISUPPZ( 2*i-1 ) through *> ISUPPZ( 2*i ). This is an output of SSTEMR (tridiagonal -*> matrix). The support of the eigenvectors of A is typically +*> matrix). The support of the eigenvectors of A is typically *> 1:N because of the orthogonal transformations applied by SORMTR. *> Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 *> \endverbatim @@ -277,12 +277,12 @@ *> \param[in] LWORK *> \verbatim *> LWORK is INTEGER -*> The dimension of the array WORK. +*> The dimension of the array WORK. *> If JOBZ = 'N' and N > 1, LWORK must be queried. *> LWORK = MAX(1, 26*N, dimension) where *> dimension = max(stage1,stage2) + (KD+1)*N + 5*N -*> = N*KD + N*max(KD+1,FACTOPTNB) -*> + max(2*KD*KD, KD*NTHREADS) +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) *> + (KD+1)*N + 5*N *> where KD is the blocking size of the reduction, *> FACTOPTNB is the blocking used by the QR or LQ @@ -330,7 +330,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realSYeigen +*> \ingroup heevr_2stage * *> \par Contributors: * ================== @@ -358,7 +358,7 @@ *> http://doi.acm.org/10.1145/2063384.2063394 *> *> A. Haidar, J. Kurzak, P. Luszczek, 2013. -*> An improved parallel singular value algorithm and its implementation +*> An improved parallel singular value algorithm and its implementation *> for multicore hardware, In Proceedings of 2013 International Conference *> for High Performance Computing, Networking, Storage and Analysis (SC '13). *> Denver, Colorado, USA, 2013. @@ -366,11 +366,11 @@ *> http://doi.acm.org/10.1145/2503210.2503292 *> *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. -*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure *> calculations based on fine-grained memory aware tasks. *> International Journal of High Performance Computing Applications. *> Volume 28 Issue 2, Pages 196-209, May 2014. -*> http://hpc.sagepub.com/content/28/2/196 +*> http://hpc.sagepub.com/content/28/2/196 *> *> \endverbatim * @@ -416,8 +416,9 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV, ILAENV2STAGE - REAL SLAMCH, SLANSY - EXTERNAL LSAME, SLAMCH, SLANSY, ILAENV, ILAENV2STAGE + REAL SLAMCH, SLANSY, SROUNDUP_LWORK + EXTERNAL LSAME, SLAMCH, SLANSY, SROUNDUP_LWORK, ILAENV, + $ ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL SCOPY, SORMTR, SSCAL, SSTEBZ, SSTEMR, SSTEIN, @@ -484,7 +485,7 @@ * NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 ) * NB = MAX( NB, ILAENV( 1, 'SORMTR', UPLO, N, -1, -1, -1 ) ) * LWKOPT = MAX( ( NB+1 )*N, LWMIN ) - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN END IF * @@ -608,7 +609,7 @@ * Call SSYTRD_2STAGE to reduce symmetric matrix to tridiagonal form. * * - CALL SSYTRD_2STAGE( JOBZ, UPLO, N, A, LDA, WORK( INDD ), + CALL SSYTRD_2STAGE( JOBZ, UPLO, N, A, LDA, WORK( INDD ), $ WORK( INDE ), WORK( INDTAU ), WORK( INDHOUS ), $ LHTRD, WORK( INDWK ), LLWORK, IINFO ) * @@ -732,7 +733,7 @@ * * Set WORK(1) to optimal workspace size. * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN * RETURN diff --git a/lapack-netlib/SRC/ssyevx.f b/lapack-netlib/SRC/ssyevx.f index 11776e8c5..2204aa39b 100644 --- a/lapack-netlib/SRC/ssyevx.f +++ b/lapack-netlib/SRC/ssyevx.f @@ -244,7 +244,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realSYeigen +*> \ingroup heevx * * ===================================================================== SUBROUTINE SSYEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, @@ -285,8 +285,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL SLAMCH, SLANSY - EXTERNAL LSAME, ILAENV, SLAMCH, SLANSY + REAL SLAMCH, SLANSY, SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SLAMCH, SLANSY, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SCOPY, SLACPY, SORGTR, SORMTR, SSCAL, SSTEBZ, @@ -338,13 +338,13 @@ IF( INFO.EQ.0 ) THEN IF( N.LE.1 ) THEN LWKMIN = 1 - WORK( 1 ) = LWKMIN + WORK( 1 ) = SROUNDUP_LWORK(LWKMIN) ELSE LWKMIN = 8*N NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 ) NB = MAX( NB, ILAENV( 1, 'SORMTR', UPLO, N, -1, -1, -1 ) ) LWKOPT = MAX( LWKMIN, ( NB + 3 )*N ) - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) @@ -542,7 +542,7 @@ * * Set WORK(1) to optimal workspace size. * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/ssyevx_2stage.f b/lapack-netlib/SRC/ssyevx_2stage.f index 1a2225c87..a8585e5f7 100644 --- a/lapack-netlib/SRC/ssyevx_2stage.f +++ b/lapack-netlib/SRC/ssyevx_2stage.f @@ -208,12 +208,12 @@ *> \verbatim *> LWORK is INTEGER *> The length of the array WORK. LWORK >= 1, when N <= 1; -*> otherwise +*> otherwise *> If JOBZ = 'N' and N > 1, LWORK must be queried. *> LWORK = MAX(1, 8*N, dimension) where *> dimension = max(stage1,stage2) + (KD+1)*N + 3*N -*> = N*KD + N*max(KD+1,FACTOPTNB) -*> + max(2*KD*KD, KD*NTHREADS) +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) *> + (KD+1)*N + 3*N *> where KD is the blocking size of the reduction, *> FACTOPTNB is the blocking used by the QR or LQ @@ -259,7 +259,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realSYeigen +*> \ingroup heevx_2stage * *> \par Further Details: * ===================== @@ -277,7 +277,7 @@ *> http://doi.acm.org/10.1145/2063384.2063394 *> *> A. Haidar, J. Kurzak, P. Luszczek, 2013. -*> An improved parallel singular value algorithm and its implementation +*> An improved parallel singular value algorithm and its implementation *> for multicore hardware, In Proceedings of 2013 International Conference *> for High Performance Computing, Networking, Storage and Analysis (SC '13). *> Denver, Colorado, USA, 2013. @@ -285,11 +285,11 @@ *> http://doi.acm.org/10.1145/2503210.2503292 *> *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. -*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure *> calculations based on fine-grained memory aware tasks. *> International Journal of High Performance Computing Applications. *> Volume 28 Issue 2, Pages 196-209, May 2014. -*> http://hpc.sagepub.com/content/28/2/196 +*> http://hpc.sagepub.com/content/28/2/196 *> *> \endverbatim * @@ -334,8 +334,9 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV2STAGE - REAL SLAMCH, SLANSY - EXTERNAL LSAME, SLAMCH, SLANSY, ILAENV2STAGE + REAL SLAMCH, SLANSY, SROUNDUP_LWORK + EXTERNAL LSAME, SLAMCH, SLANSY, ILAENV2STAGE, + $ SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SCOPY, SLACPY, SORGTR, SORMTR, SSCAL, SSTEBZ, @@ -388,7 +389,7 @@ IF( INFO.EQ.0 ) THEN IF( N.LE.1 ) THEN LWMIN = 1 - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) ELSE KD = ILAENV2STAGE( 1, 'SSYTRD_2STAGE', JOBZ, $ N, -1, -1, -1 ) @@ -487,7 +488,7 @@ INDWRK = INDHOUS + LHTRD LLWORK = LWORK - INDWRK + 1 * - CALL SSYTRD_2STAGE( JOBZ, UPLO, N, A, LDA, WORK( INDD ), + CALL SSYTRD_2STAGE( JOBZ, UPLO, N, A, LDA, WORK( INDD ), $ WORK( INDE ), WORK( INDTAU ), WORK( INDHOUS ), $ LHTRD, WORK( INDWRK ), LLWORK, IINFO ) * @@ -600,7 +601,7 @@ * * Set WORK(1) to optimal workspace size. * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) * RETURN * diff --git a/lapack-netlib/SRC/ssygv.f b/lapack-netlib/SRC/ssygv.f index f39947d92..3a79f5431 100644 --- a/lapack-netlib/SRC/ssygv.f +++ b/lapack-netlib/SRC/ssygv.f @@ -167,7 +167,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realSYeigen +*> \ingroup hegv * * ===================================================================== SUBROUTINE SSYGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, @@ -199,7 +199,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL ILAENV, LSAME + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SPOTRF, SSYEV, SSYGST, STRMM, STRSM, XERBLA @@ -234,7 +235,7 @@ LWKMIN = MAX( 1, 3*N - 1 ) NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 ) LWKOPT = MAX( LWKMIN, ( NB + 2 )*N ) - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN INFO = -11 @@ -303,7 +304,7 @@ END IF END IF * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of SSYGV diff --git a/lapack-netlib/SRC/ssygv_2stage.f b/lapack-netlib/SRC/ssygv_2stage.f index 3d9a44b5e..8719d8c7a 100644 --- a/lapack-netlib/SRC/ssygv_2stage.f +++ b/lapack-netlib/SRC/ssygv_2stage.f @@ -143,12 +143,12 @@ *> \verbatim *> LWORK is INTEGER *> The length of the array WORK. LWORK >= 1, when N <= 1; -*> otherwise +*> otherwise *> If JOBZ = 'N' and N > 1, LWORK must be queried. *> LWORK = MAX(1, dimension) where *> dimension = max(stage1,stage2) + (KD+1)*N + 2*N -*> = N*KD + N*max(KD+1,FACTOPTNB) -*> + max(2*KD*KD, KD*NTHREADS) +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) *> + (KD+1)*N + 2*N *> where KD is the blocking size of the reduction, *> FACTOPTNB is the blocking used by the QR or LQ @@ -186,7 +186,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realSYeigen +*> \ingroup hegv_2stage * *> \par Further Details: * ===================== @@ -204,7 +204,7 @@ *> http://doi.acm.org/10.1145/2063384.2063394 *> *> A. Haidar, J. Kurzak, P. Luszczek, 2013. -*> An improved parallel singular value algorithm and its implementation +*> An improved parallel singular value algorithm and its implementation *> for multicore hardware, In Proceedings of 2013 International Conference *> for High Performance Computing, Networking, Storage and Analysis (SC '13). *> Denver, Colorado, USA, 2013. @@ -212,11 +212,11 @@ *> http://doi.acm.org/10.1145/2503210.2503292 *> *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. -*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure *> calculations based on fine-grained memory aware tasks. *> International Journal of High Performance Computing Applications. *> Volume 28 Issue 2, Pages 196-209, May 2014. -*> http://hpc.sagepub.com/content/28/2/196 +*> http://hpc.sagepub.com/content/28/2/196 *> *> \endverbatim * @@ -252,7 +252,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV2STAGE - EXTERNAL LSAME, ILAENV2STAGE + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV2STAGE, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SPOTRF, SSYGST, STRMM, STRSM, XERBLA, @@ -359,7 +360,7 @@ END IF END IF * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) RETURN * * End of SSYGV_2STAGE diff --git a/lapack-netlib/SRC/ssygvd.f b/lapack-netlib/SRC/ssygvd.f index 3c8bd2a0e..a90d1afb7 100644 --- a/lapack-netlib/SRC/ssygvd.f +++ b/lapack-netlib/SRC/ssygvd.f @@ -197,7 +197,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realSYeigen +*> \ingroup hegvd * *> \par Further Details: * ===================== @@ -245,7 +245,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SPOTRF, SSYEVD, SSYGST, STRMM, STRSM, XERBLA @@ -289,7 +290,7 @@ END IF * IF( INFO.EQ.0 ) THEN - WORK( 1 ) = LOPT + WORK( 1 ) = SROUNDUP_LWORK(LOPT) IWORK( 1 ) = LIOPT * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN @@ -361,7 +362,7 @@ END IF END IF * - WORK( 1 ) = LOPT + WORK( 1 ) = SROUNDUP_LWORK(LOPT) IWORK( 1 ) = LIOPT * RETURN diff --git a/lapack-netlib/SRC/ssygvx.f b/lapack-netlib/SRC/ssygvx.f index 344075c9f..16adefa22 100644 --- a/lapack-netlib/SRC/ssygvx.f +++ b/lapack-netlib/SRC/ssygvx.f @@ -283,7 +283,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realSYeigen +*> \ingroup hegvx * *> \par Contributors: * ================== @@ -324,7 +324,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL ILAENV, LSAME + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SPOTRF, SSYEVX, SSYGST, STRMM, STRSM, XERBLA @@ -380,7 +381,7 @@ LWKMIN = MAX( 1, 8*N ) NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 ) LWKOPT = MAX( LWKMIN, ( NB + 3 )*N ) - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN INFO = -20 @@ -453,7 +454,7 @@ * * Set WORK(1) to optimal workspace size. * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/ssysv.f b/lapack-netlib/SRC/ssysv.f index 06a42dfb7..523ea66c1 100644 --- a/lapack-netlib/SRC/ssysv.f +++ b/lapack-netlib/SRC/ssysv.f @@ -163,7 +163,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realSYsolve +*> \ingroup hesv * * ===================================================================== SUBROUTINE SSYSV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, @@ -190,7 +190,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL XERBLA, SSYTRF, SSYTRS, SSYTRS2 @@ -225,7 +226,7 @@ CALL SSYTRF( UPLO, N, A, LDA, IPIV, WORK, -1, INFO ) LWKOPT = INT( WORK( 1 ) ) END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -258,7 +259,7 @@ * END IF * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/ssysv_aa.f b/lapack-netlib/SRC/ssysv_aa.f index 5661332c5..e43d4de7f 100644 --- a/lapack-netlib/SRC/ssysv_aa.f +++ b/lapack-netlib/SRC/ssysv_aa.f @@ -154,7 +154,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realSYsolve +*> \ingroup hesv_aa * * ===================================================================== SUBROUTINE SSYSV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, @@ -181,7 +181,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL XERBLA, SSYTRS_AA, SSYTRF_AA @@ -216,7 +217,7 @@ $ -1, INFO ) LWKOPT_SYTRS = INT( WORK(1) ) LWKOPT = MAX( LWKOPT_SYTRF, LWKOPT_SYTRS ) - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -238,7 +239,7 @@ * END IF * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/ssysv_aa_2stage.f b/lapack-netlib/SRC/ssysv_aa_2stage.f index aa862f14b..3d88e068e 100644 --- a/lapack-netlib/SRC/ssysv_aa_2stage.f +++ b/lapack-netlib/SRC/ssysv_aa_2stage.f @@ -178,7 +178,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realSYsolve +*> \ingroup hesv_aa_2stage * * ===================================================================== SUBROUTINE SSYSV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, @@ -208,7 +208,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SSYTRF_AA_2STAGE, SSYTRS_AA_2STAGE, @@ -268,7 +269,7 @@ * END IF * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/ssysv_rk.f b/lapack-netlib/SRC/ssysv_rk.f index 9a7dfa4bb..abf862d66 100644 --- a/lapack-netlib/SRC/ssysv_rk.f +++ b/lapack-netlib/SRC/ssysv_rk.f @@ -205,7 +205,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup singleSYsolve +*> \ingroup hesv_rk * *> \par Contributors: * ================== @@ -247,7 +247,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL XERBLA, SSYTRF_RK, SSYTRS_3 @@ -282,7 +283,7 @@ CALL SSYTRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, -1, INFO ) LWKOPT = INT( WORK( 1 ) ) END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -305,7 +306,7 @@ * END IF * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/ssysv_rook.f b/lapack-netlib/SRC/ssysv_rook.f index fb7ba8c53..c5c77e562 100644 --- a/lapack-netlib/SRC/ssysv_rook.f +++ b/lapack-netlib/SRC/ssysv_rook.f @@ -181,7 +181,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realSYsolve +*> \ingroup hesv_rook * *> \par Contributors: * ================== @@ -223,7 +223,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL XERBLA, SSYTRF_ROOK, SSYTRS_ROOK @@ -258,7 +259,7 @@ CALL SSYTRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, -1, INFO ) LWKOPT = INT( WORK( 1 ) ) END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -281,7 +282,7 @@ * END IF * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/ssysvx.f b/lapack-netlib/SRC/ssysvx.f index b19ce2641..0d72217eb 100644 --- a/lapack-netlib/SRC/ssysvx.f +++ b/lapack-netlib/SRC/ssysvx.f @@ -275,7 +275,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realSYsolve +*> \ingroup hesvx * * ===================================================================== SUBROUTINE SSYSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, @@ -311,8 +311,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - REAL SLAMCH, SLANSY - EXTERNAL ILAENV, LSAME, SLAMCH, SLANSY + REAL SLAMCH, SLANSY, SROUNDUP_LWORK + EXTERNAL ILAENV, LSAME, SLAMCH, SLANSY, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SLACPY, SSYCON, SSYRFS, SSYTRF, SSYTRS, XERBLA @@ -354,7 +354,7 @@ NB = ILAENV( 1, 'SSYTRF', UPLO, N, -1, -1, -1 ) LWKOPT = MAX( LWKOPT, N*NB ) END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -404,7 +404,7 @@ IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) $ INFO = N + 1 * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/ssytrd.f b/lapack-netlib/SRC/ssytrd.f index f09ad9ab4..f4fbecdc9 100644 --- a/lapack-netlib/SRC/ssytrd.f +++ b/lapack-netlib/SRC/ssytrd.f @@ -139,7 +139,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realSYcomputational +*> \ingroup hetrd * *> \par Further Details: * ===================== @@ -223,7 +223,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -248,7 +249,7 @@ * NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 ) LWKOPT = N*NB - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -365,7 +366,7 @@ $ TAU( I ), IINFO ) END IF * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of SSYTRD diff --git a/lapack-netlib/SRC/ssytrd_sb2st.F b/lapack-netlib/SRC/ssytrd_sb2st.F index b8386670a..32bae26dc 100644 --- a/lapack-netlib/SRC/ssytrd_sb2st.F +++ b/lapack-netlib/SRC/ssytrd_sb2st.F @@ -18,7 +18,7 @@ * Definition: * =========== * -* SUBROUTINE SSYTRD_SB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, +* SUBROUTINE SSYTRD_SB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, * D, E, HOUS, LHOUS, WORK, LWORK, INFO ) * * #if defined(_OPENMP) @@ -53,12 +53,12 @@ *> \param[in] STAGE1 *> \verbatim *> STAGE1 is CHARACTER*1 -*> = 'N': "No": to mention that the stage 1 of the reduction +*> = 'N': "No": to mention that the stage 1 of the reduction *> from dense to band using the ssytrd_sy2sb routine -*> was not called before this routine to reproduce AB. -*> In other term this routine is called as standalone. -*> = 'Y': "Yes": to mention that the stage 1 of the -*> reduction from dense to band using the ssytrd_sy2sb +*> was not called before this routine to reproduce AB. +*> In other term this routine is called as standalone. +*> = 'Y': "Yes": to mention that the stage 1 of the +*> reduction from dense to band using the ssytrd_sy2sb *> routine has been called to produce AB (e.g., AB is *> the output of ssytrd_sy2sb. *> \endverbatim @@ -66,10 +66,10 @@ *> \param[in] VECT *> \verbatim *> VECT is CHARACTER*1 -*> = 'N': No need for the Housholder representation, +*> = 'N': No need for the Housholder representation, *> and thus LHOUS is of size max(1, 4*N); -*> = 'V': the Householder representation is needed to -*> either generate or to apply Q later on, +*> = 'V': the Householder representation is needed to +*> either generate or to apply Q later on, *> then LHOUS is to be queried and computed. *> (NOT AVAILABLE IN THIS RELEASE). *> \endverbatim @@ -147,7 +147,7 @@ *> message related to LHOUS is issued by XERBLA. *> LHOUS = MAX(1, dimension) where *> dimension = 4*N if VECT='N' -*> not available now if VECT='H' +*> not available now if VECT='H' *> \endverbatim *> *> \param[out] WORK @@ -188,7 +188,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup real16OTHERcomputational +*> \ingroup hetrd_hb2st * *> \par Further Details: * ===================== @@ -208,7 +208,7 @@ *> http://doi.acm.org/10.1145/2063384.2063394 *> *> A. Haidar, J. Kurzak, P. Luszczek, 2013. -*> An improved parallel singular value algorithm and its implementation +*> An improved parallel singular value algorithm and its implementation *> for multicore hardware, In Proceedings of 2013 International Conference *> for High Performance Computing, Networking, Storage and Analysis (SC '13). *> Denver, Colorado, USA, 2013. @@ -216,16 +216,16 @@ *> http://doi.acm.org/10.1145/2503210.2503292 *> *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. -*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure *> calculations based on fine-grained memory aware tasks. *> International Journal of High Performance Computing Applications. *> Volume 28 Issue 2, Pages 196-209, May 2014. -*> http://hpc.sagepub.com/content/28/2/196 +*> http://hpc.sagepub.com/content/28/2/196 *> *> \endverbatim *> * ===================================================================== - SUBROUTINE SSYTRD_SB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, + SUBROUTINE SSYTRD_SB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, $ D, E, HOUS, LHOUS, WORK, LWORK, INFO ) * #if defined(_OPENMP) @@ -258,11 +258,11 @@ * .. * .. Local Scalars .. LOGICAL LQUERY, WANTQ, UPPER, AFTERS1 - INTEGER I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST, + INTEGER I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST, $ ED, STIND, EDIND, BLKLASTIND, COLPT, THED, $ STEPERCOL, GRSIZ, THGRSIZ, THGRNB, THGRID, $ NBTILES, TTYPE, TID, NTHREADS, DEBUG, - $ ABDPOS, ABOFDPOS, DPOS, OFDPOS, AWPOS, + $ ABDPOS, ABOFDPOS, DPOS, OFDPOS, AWPOS, $ INDA, INDW, APOS, SIZEA, LDA, INDV, INDTAU, $ SISEV, SIZETAU, LDV, LHMIN, LWMIN * .. @@ -274,8 +274,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV2STAGE - EXTERNAL LSAME, ILAENV2STAGE + INTEGER ILAENV2STAGE + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV2STAGE, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -315,7 +316,7 @@ * IF( INFO.EQ.0 ) THEN HOUS( 1 ) = LHMIN - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) END IF * IF( INFO.NE.0 ) THEN @@ -355,7 +356,7 @@ ABDPOS = KD + 1 ABOFDPOS = KD ELSE - APOS = INDA + APOS = INDA AWPOS = INDA + KD + 1 DPOS = APOS OFDPOS = DPOS + 1 @@ -363,11 +364,11 @@ ABOFDPOS = 2 ENDIF -* -* Case KD=0: -* The matrix is diagonal. We just copy it (convert to "real" for -* real because D is double and the imaginary part should be 0) -* and store it in D. A sequential code here is better or +* +* Case KD=0: +* The matrix is diagonal. We just copy it (convert to "real" for +* real because D is double and the imaginary part should be 0) +* and store it in D. A sequential code here is better or * in a parallel environment it might need two cores for D and E * IF( KD.EQ.0 ) THEN @@ -382,17 +383,17 @@ WORK( 1 ) = 1 RETURN END IF -* -* Case KD=1: -* The matrix is already Tridiagonal. We have to make diagonal +* +* Case KD=1: +* The matrix is already Tridiagonal. We have to make diagonal * and offdiagonal elements real, and store them in D and E. -* For that, for real precision just copy the diag and offdiag -* to D and E while for the COMPLEX case the bulge chasing is -* performed to convert the hermetian tridiagonal to symmetric -* tridiagonal. A simpler conversion formula might be used, but then +* For that, for real precision just copy the diag and offdiag +* to D and E while for the COMPLEX case the bulge chasing is +* performed to convert the hermetian tridiagonal to symmetric +* tridiagonal. A simpler conversion formula might be used, but then * updating the Q matrix will be required and based if Q is generated -* or not this might complicate the story. -* +* or not this might complicate the story. +* IF( KD.EQ.1 ) THEN DO 50 I = 1, N D( I ) = ( AB( ABDPOS, I ) ) @@ -413,7 +414,7 @@ RETURN END IF * -* Main code start here. +* Main code start here. * Reduce the symmetric band of A to a tridiagonal matrix. * THGRSIZ = N @@ -422,7 +423,7 @@ NBTILES = CEILING( REAL(N)/REAL(KD) ) STEPERCOL = CEILING( REAL(SHIFT)/REAL(GRSIZ) ) THGRNB = CEILING( REAL(N-1)/REAL(THGRSIZ) ) -* +* CALL SLACPY( "A", KD+1, N, AB, LDAB, WORK( APOS ), LDA ) CALL SLASET( "A", KD, N, ZERO, ZERO, WORK( AWPOS ), LDA ) * @@ -431,7 +432,7 @@ * #if defined(_OPENMP) !$OMP PARALLEL PRIVATE( TID, THGRID, BLKLASTIND ) -!$OMP$ PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID ) +!$OMP$ PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID ) !$OMP$ PRIVATE( MYID, TTYPE, COLPT, STIND, EDIND ) !$OMP$ SHARED ( UPLO, WANTQ, INDV, INDTAU, HOUS, WORK) !$OMP$ SHARED ( N, KD, IB, NBTILES, LDA, LDV, INDA ) @@ -440,7 +441,7 @@ #endif * * main bulge chasing loop -* +* DO 100 THGRID = 1, THGRNB STT = (THGRID-1)*THGRSIZ+1 THED = MIN( (STT + THGRSIZ -1), (N-1)) @@ -451,7 +452,7 @@ ST = STT DO 130 SWEEPID = ST, ED DO 140 K = 1, GRSIZ - MYID = (I-SWEEPID)*(STEPERCOL*GRSIZ) + MYID = (I-SWEEPID)*(STEPERCOL*GRSIZ) $ + (M-1)*GRSIZ + K IF ( MYID.EQ.1 ) THEN TTYPE = 1 @@ -477,16 +478,16 @@ ENDIF * * Call the kernel -* +* #if defined(_OPENMP) && _OPENMP >= 201307 - IF( TTYPE.NE.1 ) THEN + IF( TTYPE.NE.1 ) THEN !$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) !$OMP$ DEPEND(in:WORK(MYID-1)) !$OMP$ DEPEND(out:WORK(MYID)) TID = OMP_GET_THREAD_NUM() - CALL SSB2ST_KERNELS( UPLO, WANTQ, TTYPE, + CALL SSB2ST_KERNELS( UPLO, WANTQ, TTYPE, $ STIND, EDIND, SWEEPID, N, KD, IB, - $ WORK ( INDA ), LDA, + $ WORK ( INDA ), LDA, $ HOUS( INDV ), HOUS( INDTAU ), LDV, $ WORK( INDW + TID*KD ) ) !$OMP END TASK @@ -494,20 +495,20 @@ !$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) !$OMP$ DEPEND(out:WORK(MYID)) TID = OMP_GET_THREAD_NUM() - CALL SSB2ST_KERNELS( UPLO, WANTQ, TTYPE, + CALL SSB2ST_KERNELS( UPLO, WANTQ, TTYPE, $ STIND, EDIND, SWEEPID, N, KD, IB, - $ WORK ( INDA ), LDA, + $ WORK ( INDA ), LDA, $ HOUS( INDV ), HOUS( INDTAU ), LDV, $ WORK( INDW + TID*KD ) ) !$OMP END TASK ENDIF #else - CALL SSB2ST_KERNELS( UPLO, WANTQ, TTYPE, + CALL SSB2ST_KERNELS( UPLO, WANTQ, TTYPE, $ STIND, EDIND, SWEEPID, N, KD, IB, - $ WORK ( INDA ), LDA, + $ WORK ( INDA ), LDA, $ HOUS( INDV ), HOUS( INDTAU ), LDV, $ WORK( INDW ) ) -#endif +#endif IF ( BLKLASTIND.GE.(N-1) ) THEN STT = STT + 1 EXIT @@ -522,14 +523,14 @@ !$OMP END MASTER !$OMP END PARALLEL #endif -* +* * Copy the diagonal from A to D. Note that D is REAL thus only * the Real part is needed, the imaginary part should be zero. * DO 150 I = 1, N D( I ) = ( WORK( DPOS+(I-1)*LDA ) ) 150 CONTINUE -* +* * Copy the off diagonal from A to E. Note that E is REAL thus only * the Real part is needed, the imaginary part should be zero. * @@ -544,10 +545,10 @@ ENDIF * HOUS( 1 ) = LHMIN - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) RETURN * * End of SSYTRD_SB2ST * END - + diff --git a/lapack-netlib/SRC/ssytrd_sy2sb.f b/lapack-netlib/SRC/ssytrd_sy2sb.f index 2c92cd14a..4efc43630 100644 --- a/lapack-netlib/SRC/ssytrd_sy2sb.f +++ b/lapack-netlib/SRC/ssytrd_sy2sb.f @@ -158,7 +158,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realSYcomputational +*> \ingroup hetrd_he2hb * *> \par Further Details: * ===================== @@ -283,7 +283,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV2STAGE - EXTERNAL LSAME, ILAENV2STAGE + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV2STAGE, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -313,7 +314,7 @@ CALL XERBLA( 'SSYTRD_SY2SB', -INFO ) RETURN ELSE IF( LQUERY ) THEN - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) RETURN END IF * @@ -506,7 +507,7 @@ END IF * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) RETURN * * End of SSYTRD_SY2SB diff --git a/lapack-netlib/SRC/ssytrf.f b/lapack-netlib/SRC/ssytrf.f index 31e38e466..a788fbcf0 100644 --- a/lapack-netlib/SRC/ssytrf.f +++ b/lapack-netlib/SRC/ssytrf.f @@ -135,7 +135,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realSYcomputational +*> \ingroup hetrf * *> \par Further Details: * ===================== @@ -202,7 +202,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SLASYF, SSYTF2, XERBLA @@ -233,7 +234,7 @@ * NB = ILAENV( 1, 'SSYTRF', UPLO, N, -1, -1, -1 ) LWKOPT = MAX( 1, N*NB ) - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -352,7 +353,7 @@ END IF * 40 CONTINUE - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of SSYTRF diff --git a/lapack-netlib/SRC/ssytrf_aa.f b/lapack-netlib/SRC/ssytrf_aa.f index 4ba026fc8..d6408a978 100644 --- a/lapack-netlib/SRC/ssytrf_aa.f +++ b/lapack-netlib/SRC/ssytrf_aa.f @@ -125,7 +125,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realSYcomputational +*> \ingroup hetrf_aa * * ===================================================================== SUBROUTINE SSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO) @@ -159,7 +159,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SLASYF_AA, SGEMV, SSCAL, SCOPY, SSWAP, SGEMM, @@ -191,7 +192,7 @@ * IF( INFO.EQ.0 ) THEN LWKOPT = (NB+1)*N - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -457,7 +458,7 @@ END IF * 20 CONTINUE - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of SSYTRF_AA diff --git a/lapack-netlib/SRC/ssytrf_aa_2stage.f b/lapack-netlib/SRC/ssytrf_aa_2stage.f index 07357f2ab..abe6564c5 100644 --- a/lapack-netlib/SRC/ssytrf_aa_2stage.f +++ b/lapack-netlib/SRC/ssytrf_aa_2stage.f @@ -152,7 +152,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realSYcomputational +*> \ingroup hetrf_aa_2stage * * ===================================================================== SUBROUTINE SSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, @@ -187,7 +187,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL XERBLA, SCOPY, SLACPY, @@ -230,7 +231,7 @@ TB( 1 ) = (3*NB+1)*N END IF IF( WQUERY ) THEN - WORK( 1 ) = N*NB + WORK( 1 ) = SROUNDUP_LWORK(N*NB) END IF END IF IF( TQUERY .OR. WQUERY ) THEN diff --git a/lapack-netlib/SRC/ssytrf_rk.f b/lapack-netlib/SRC/ssytrf_rk.f index 8e1ef460a..72830543c 100644 --- a/lapack-netlib/SRC/ssytrf_rk.f +++ b/lapack-netlib/SRC/ssytrf_rk.f @@ -229,7 +229,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup singleSYcomputational +*> \ingroup hetrf_rk * *> \par Further Details: * ===================== @@ -280,7 +280,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SLASYF_RK, SSYTF2_RK, SSWAP, XERBLA @@ -311,7 +312,7 @@ * NB = ILAENV( 1, 'SSYTRF_RK', UPLO, N, -1, -1, -1 ) LWKOPT = MAX( 1, N*NB ) - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -487,7 +488,7 @@ * END IF * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of SSYTRF_RK diff --git a/lapack-netlib/SRC/ssytrf_rook.f b/lapack-netlib/SRC/ssytrf_rook.f index 653289e2b..339a229e7 100644 --- a/lapack-netlib/SRC/ssytrf_rook.f +++ b/lapack-netlib/SRC/ssytrf_rook.f @@ -146,7 +146,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realSYcomputational +*> \ingroup hetrf_rook * *> \par Further Details: * ===================== @@ -228,7 +228,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SLASYF_ROOK, SSYTF2_ROOK, XERBLA @@ -259,7 +260,7 @@ * NB = ILAENV( 1, 'SSYTRF_ROOK', UPLO, N, -1, -1, -1 ) LWKOPT = MAX( 1, N*NB ) - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) END IF * IF( INFO.NE.0 ) THEN @@ -382,7 +383,7 @@ END IF * 40 CONTINUE - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN * * End of SSYTRF_ROOK diff --git a/lapack-netlib/SRC/ssytri_3.f b/lapack-netlib/SRC/ssytri_3.f index 58d5df92a..bca01105d 100644 --- a/lapack-netlib/SRC/ssytri_3.f +++ b/lapack-netlib/SRC/ssytri_3.f @@ -152,7 +152,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup singleSYcomputational +*> \ingroup hetri_3 * *> \par Contributors: * ================== @@ -190,7 +190,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SSYTRI_3X, XERBLA @@ -225,7 +226,7 @@ CALL XERBLA( 'SSYTRI_3', -INFO ) RETURN ELSE IF( LQUERY ) THEN - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN END IF * @@ -236,7 +237,7 @@ * CALL SSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * diff --git a/lapack-netlib/SRC/ssytrs_aa.f b/lapack-netlib/SRC/ssytrs_aa.f index 3cfa2a206..12fca0c71 100644 --- a/lapack-netlib/SRC/ssytrs_aa.f +++ b/lapack-netlib/SRC/ssytrs_aa.f @@ -123,7 +123,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realSYcomputational +*> \ingroup hetrs_aa * * ===================================================================== SUBROUTINE SSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, @@ -155,7 +155,8 @@ * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + REAL SROUNDUP_LWORK + EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SGTSV, SSWAP, SLACPY, STRSM, XERBLA @@ -186,7 +187,7 @@ RETURN ELSE IF( LQUERY ) THEN LWKOPT = (3*N-2) - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) RETURN END IF * diff --git a/lapack-netlib/SRC/stgexc.f b/lapack-netlib/SRC/stgexc.f index d1ad79936..d68eb5fc7 100644 --- a/lapack-netlib/SRC/stgexc.f +++ b/lapack-netlib/SRC/stgexc.f @@ -195,7 +195,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEcomputational +*> \ingroup tgexc * *> \par Contributors: * ================== @@ -241,6 +241,10 @@ LOGICAL LQUERY INTEGER HERE, LWMIN, NBF, NBL, NBNEXT * .. +* .. External Functions .. + REAL SROUNDUP_LWORK + EXTERNAL SROUNDUP_LWORK +* .. * .. External Subroutines .. EXTERNAL STGEX2, XERBLA * .. @@ -533,7 +537,7 @@ $ GO TO 20 END IF ILST = HERE - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) RETURN * * End of STGEXC diff --git a/lapack-netlib/SRC/stgsen.f b/lapack-netlib/SRC/stgsen.f index f1103d740..ac9c4677a 100644 --- a/lapack-netlib/SRC/stgsen.f +++ b/lapack-netlib/SRC/stgsen.f @@ -304,7 +304,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup tgsen * *> \par Further Details: * ===================== @@ -490,8 +490,8 @@ $ XERBLA * .. * .. External Functions .. - REAL SLAMCH - EXTERNAL SLAMCH + REAL SLAMCH, SROUNDUP_LWORK + EXTERNAL SLAMCH, SROUNDUP_LWORK * .. * .. Intrinsic Functions .. INTRINSIC MAX, SIGN, SQRT @@ -571,7 +571,7 @@ LIWMIN = 1 END IF * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN @@ -852,7 +852,7 @@ END IF 70 CONTINUE * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN * RETURN diff --git a/lapack-netlib/SRC/stgsna.f b/lapack-netlib/SRC/stgsna.f index 430f3c4b7..e8cb28b95 100644 --- a/lapack-netlib/SRC/stgsna.f +++ b/lapack-netlib/SRC/stgsna.f @@ -230,7 +230,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup tgsna * *> \par Further Details: * ===================== @@ -416,8 +416,9 @@ * .. * .. External Functions .. LOGICAL LSAME - REAL SDOT, SLAMCH, SLAPY2, SNRM2 - EXTERNAL LSAME, SDOT, SLAMCH, SLAPY2, SNRM2 + REAL SDOT, SLAMCH, SLAPY2, SNRM2, SROUNDUP_LWORK + EXTERNAL LSAME, SDOT, SLAMCH, SLAPY2, SNRM2, + $ SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SGEMV, SLACPY, SLAG2, STGEXC, STGSYL, XERBLA @@ -490,7 +491,7 @@ ELSE LWMIN = N END IF - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) * IF( MM.LT.M ) THEN INFO = -15 @@ -689,7 +690,7 @@ $ KS = KS + 1 * 20 CONTINUE - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) RETURN * * End of STGSNA diff --git a/lapack-netlib/SRC/stgsyl.f b/lapack-netlib/SRC/stgsyl.f index 733c8ab9c..07a82e380 100644 --- a/lapack-netlib/SRC/stgsyl.f +++ b/lapack-netlib/SRC/stgsyl.f @@ -261,7 +261,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realSYcomputational +*> \ingroup tgsyl * *> \par Contributors: * ================== @@ -331,7 +331,8 @@ * .. External Functions .. LOGICAL LSAME INTEGER ILAENV - EXTERNAL LSAME, ILAENV + REAL SROUNDUP_LWORK + EXTERNAL LSAME, ILAENV, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SGEMM, SLACPY, SLASET, SSCAL, STGSY2, XERBLA @@ -384,7 +385,7 @@ ELSE LWMIN = 1 END IF - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -20 @@ -670,7 +671,7 @@ * END IF * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) * RETURN * diff --git a/lapack-netlib/SRC/strsen.f b/lapack-netlib/SRC/strsen.f index c0f75fb12..f7a05ae8b 100644 --- a/lapack-netlib/SRC/strsen.f +++ b/lapack-netlib/SRC/strsen.f @@ -231,7 +231,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup trsen * *> \par Further Details: * ===================== @@ -346,8 +346,8 @@ * .. * .. External Functions .. LOGICAL LSAME - REAL SLANGE - EXTERNAL LSAME, SLANGE + REAL SLANGE, SROUNDUP_LWORK + EXTERNAL LSAME, SLANGE, SROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL SLACN2, SLACPY, STREXC, STRSYL, XERBLA @@ -427,7 +427,7 @@ END IF * IF( INFO.EQ.0 ) THEN - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN END IF * @@ -558,7 +558,7 @@ END IF 60 CONTINUE * - WORK( 1 ) = LWMIN + WORK( 1 ) = SROUNDUP_LWORK(LWMIN) IWORK( 1 ) = LIWMIN * RETURN diff --git a/lapack-netlib/SRC/stzrzf.f b/lapack-netlib/SRC/stzrzf.f index e8cbb56b6..516bea5d4 100644 --- a/lapack-netlib/SRC/stzrzf.f +++ b/lapack-netlib/SRC/stzrzf.f @@ -116,7 +116,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup tzrzf * *> \par Contributors: * ================== @@ -179,7 +179,8 @@ * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + REAL SROUNDUP_LWORK + EXTERNAL ILAENV, SROUNDUP_LWORK * .. * .. Executable Statements .. * @@ -207,7 +208,7 @@ LWKOPT = M*NB LWKMIN = MAX( 1, M ) END IF - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN INFO = -7 @@ -301,7 +302,7 @@ IF( MU.GT.0 ) $ CALL SLATRZ( MU, N, N-M, A, LDA, TAU, WORK ) * - WORK( 1 ) = LWKOPT + WORK( 1 ) = SROUNDUP_LWORK(LWKOPT) * RETURN * From 3d38da2bc401d56162a528547430b0b69664ea38 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 12 Nov 2023 16:50:52 +0100 Subject: [PATCH 122/125] Make vector orthogonalization more reliable (Reference-LAPACK PR 930) --- lapack-netlib/SRC/clarfgp.f | 7 +++--- lapack-netlib/SRC/cunbdb5.f | 50 ++++++++++++++++++++++++++----------- lapack-netlib/SRC/cunbdb6.f | 21 ++++++++-------- lapack-netlib/SRC/dlarfgp.f | 9 ++++--- lapack-netlib/SRC/dorbdb5.f | 50 ++++++++++++++++++++++++++----------- lapack-netlib/SRC/dorbdb6.f | 21 ++++++++-------- lapack-netlib/SRC/slarfgp.f | 7 +++--- lapack-netlib/SRC/sorbdb5.f | 50 ++++++++++++++++++++++++++----------- lapack-netlib/SRC/sorbdb6.f | 21 ++++++++-------- lapack-netlib/SRC/zlarfgp.f | 7 +++--- lapack-netlib/SRC/zunbdb5.f | 50 ++++++++++++++++++++++++++----------- lapack-netlib/SRC/zunbdb6.f | 21 ++++++++-------- 12 files changed, 201 insertions(+), 113 deletions(-) diff --git a/lapack-netlib/SRC/clarfgp.f b/lapack-netlib/SRC/clarfgp.f index b584484c7..47b5e47b0 100644 --- a/lapack-netlib/SRC/clarfgp.f +++ b/lapack-netlib/SRC/clarfgp.f @@ -97,7 +97,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERauxiliary +*> \ingroup larfgp * * ===================================================================== SUBROUTINE CLARFGP( N, ALPHA, X, INCX, TAU ) @@ -122,7 +122,7 @@ * .. * .. Local Scalars .. INTEGER J, KNT - REAL ALPHI, ALPHR, BETA, BIGNUM, SMLNUM, XNORM + REAL ALPHI, ALPHR, BETA, BIGNUM, EPS, SMLNUM, XNORM COMPLEX SAVEALPHA * .. * .. External Functions .. @@ -143,11 +143,12 @@ RETURN END IF * + EPS = SLAMCH( 'Precision' ) XNORM = SCNRM2( N-1, X, INCX ) ALPHR = REAL( ALPHA ) ALPHI = AIMAG( ALPHA ) * - IF( XNORM.EQ.ZERO ) THEN + IF( XNORM.LE.EPS*ABS(ALPHA) ) THEN * * H = [1-alpha/abs(alpha) 0; 0 I], sign chosen so ALPHA >= 0. * diff --git a/lapack-netlib/SRC/cunbdb5.f b/lapack-netlib/SRC/cunbdb5.f index d2ff4e700..22513cf8b 100644 --- a/lapack-netlib/SRC/cunbdb5.f +++ b/lapack-netlib/SRC/cunbdb5.f @@ -148,7 +148,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup unbdb5 * * ===================================================================== SUBROUTINE CUNBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, @@ -169,18 +169,21 @@ * ===================================================================== * * .. Parameters .. + REAL REALZERO + PARAMETER ( REALZERO = 0.0E0 ) COMPLEX ONE, ZERO PARAMETER ( ONE = (1.0E0,0.0E0), ZERO = (0.0E0,0.0E0) ) * .. * .. Local Scalars .. INTEGER CHILDINFO, I, J + REAL EPS, NORM, SCL, SSQ * .. * .. External Subroutines .. - EXTERNAL CUNBDB6, XERBLA + EXTERNAL CLASSQ, CUNBDB6, CSCAL, XERBLA * .. * .. External Functions .. - REAL SCNRM2 - EXTERNAL SCNRM2 + REAL SLAMCH, SCNRM2 + EXTERNAL SLAMCH, SCNRM2 * .. * .. Intrinsic Function .. INTRINSIC MAX @@ -213,16 +216,33 @@ RETURN END IF * -* Project X onto the orthogonal complement of Q + EPS = SLAMCH( 'Precision' ) * - CALL CUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, LDQ2, - $ WORK, LWORK, CHILDINFO ) +* Project X onto the orthogonal complement of Q if X is nonzero * -* If the projection is nonzero, then return + SCL = REALZERO + SSQ = REALZERO + CALL CLASSQ( M1, X1, INCX1, SCL, SSQ ) + CALL CLASSQ( M2, X2, INCX2, SCL, SSQ ) + NORM = SCL * SQRT( SSQ ) * - IF( SCNRM2(M1,X1,INCX1) .NE. ZERO - $ .OR. SCNRM2(M2,X2,INCX2) .NE. ZERO ) THEN - RETURN + IF( NORM .GT. N * EPS ) THEN +* Scale vector to unit norm to avoid problems in the caller code. +* Computing the reciprocal is undesirable but +* * xLASCL cannot be used because of the vector increments and +* * the round-off error has a negligible impact on +* orthogonalization. + CALL CSCAL( M1, ONE / NORM, X1, INCX1 ) + CALL CSCAL( M2, ONE / NORM, X2, INCX2 ) + CALL CUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, + $ LDQ2, WORK, LWORK, CHILDINFO ) +* +* If the projection is nonzero, then return +* + IF( SCNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. SCNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN + RETURN + END IF END IF * * Project each standard basis vector e_1,...,e_M1 in turn, stopping @@ -238,8 +258,8 @@ END DO CALL CUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, CHILDINFO ) - IF( SCNRM2(M1,X1,INCX1) .NE. ZERO - $ .OR. SCNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + IF( SCNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. SCNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN RETURN END IF END DO @@ -257,8 +277,8 @@ X2(I) = ONE CALL CUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, CHILDINFO ) - IF( SCNRM2(M1,X1,INCX1) .NE. ZERO - $ .OR. SCNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + IF( SCNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. SCNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN RETURN END IF END DO diff --git a/lapack-netlib/SRC/cunbdb6.f b/lapack-netlib/SRC/cunbdb6.f index cd14d9295..566fd76b7 100644 --- a/lapack-netlib/SRC/cunbdb6.f +++ b/lapack-netlib/SRC/cunbdb6.f @@ -41,9 +41,8 @@ *> with respect to the columns of *> Q = [ Q1 ] . *> [ Q2 ] -*> The Euclidean norm of X must be one and the columns of Q must be -*> orthonormal. The orthogonalized vector will be zero if and only if it -*> lies entirely in the range of Q. +*> The columns of Q must be orthonormal. The orthogonalized vector will +*> be zero if and only if it lies entirely in the range of Q. *> *> The projection is computed with at most two iterations of the *> classical Gram-Schmidt algorithm, see @@ -174,7 +173,7 @@ * * .. Parameters .. REAL ALPHA, REALONE, REALZERO - PARAMETER ( ALPHA = 0.1E0, REALONE = 1.0E0, + PARAMETER ( ALPHA = 0.83E0, REALONE = 1.0E0, $ REALZERO = 0.0E0 ) COMPLEX NEGONE, ONE, ZERO PARAMETER ( NEGONE = (-1.0E0,0.0E0), ONE = (1.0E0,0.0E0), @@ -223,14 +222,16 @@ * EPS = SLAMCH( 'Precision' ) * +* Compute the Euclidean norm of X +* + SCL = REALZERO + SSQ = REALZERO + CALL CLASSQ( M1, X1, INCX1, SCL, SSQ ) + CALL CLASSQ( M2, X2, INCX2, SCL, SSQ ) + NORM = SCL * SQRT( SSQ ) +* * First, project X onto the orthogonal complement of Q's column * space -* -* Christoph Conrads: In debugging mode the norm should be computed -* and an assertion added comparing the norm with one. Alas, Fortran -* never made it into 1989 when assert() was introduced into the C -* programming language. - NORM = REALONE * IF( M1 .EQ. 0 ) THEN DO I = 1, N diff --git a/lapack-netlib/SRC/dlarfgp.f b/lapack-netlib/SRC/dlarfgp.f index 69845056d..a8cf1b31e 100644 --- a/lapack-netlib/SRC/dlarfgp.f +++ b/lapack-netlib/SRC/dlarfgp.f @@ -97,7 +97,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleOTHERauxiliary +*> \ingroup larfgp * * ===================================================================== SUBROUTINE DLARFGP( N, ALPHA, X, INCX, TAU ) @@ -122,7 +122,7 @@ * .. * .. Local Scalars .. INTEGER J, KNT - DOUBLE PRECISION BETA, BIGNUM, SAVEALPHA, SMLNUM, XNORM + DOUBLE PRECISION BETA, BIGNUM, EPS, SAVEALPHA, SMLNUM, XNORM * .. * .. External Functions .. DOUBLE PRECISION DLAMCH, DLAPY2, DNRM2 @@ -141,11 +141,12 @@ RETURN END IF * + EPS = DLAMCH( 'Precision' ) XNORM = DNRM2( N-1, X, INCX ) * - IF( XNORM.EQ.ZERO ) THEN + IF( XNORM.LE.EPS*ABS(ALPHA) ) THEN * -* H = [+/-1, 0; I], sign chosen so ALPHA >= 0 +* H = [+/-1, 0; I], sign chosen so ALPHA >= 0. * IF( ALPHA.GE.ZERO ) THEN * When TAU.eq.ZERO, the vector is special-cased to be diff --git a/lapack-netlib/SRC/dorbdb5.f b/lapack-netlib/SRC/dorbdb5.f index 6e057a05f..cbd58ae54 100644 --- a/lapack-netlib/SRC/dorbdb5.f +++ b/lapack-netlib/SRC/dorbdb5.f @@ -148,7 +148,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleOTHERcomputational +*> \ingroup unbdb5 * * ===================================================================== SUBROUTINE DORBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, @@ -169,18 +169,21 @@ * ===================================================================== * * .. Parameters .. + DOUBLE PRECISION REALZERO + PARAMETER ( REALZERO = 0.0D0 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0 ) * .. * .. Local Scalars .. INTEGER CHILDINFO, I, J + DOUBLE PRECISION EPS, NORM, SCL, SSQ * .. * .. External Subroutines .. - EXTERNAL DORBDB6, XERBLA + EXTERNAL DLASSQ, DORBDB6, DSCAL, XERBLA * .. * .. External Functions .. - DOUBLE PRECISION DNRM2 - EXTERNAL DNRM2 + DOUBLE PRECISION DLAMCH, DNRM2 + EXTERNAL DLAMCH, DNRM2 * .. * .. Intrinsic Function .. INTRINSIC MAX @@ -213,16 +216,33 @@ RETURN END IF * -* Project X onto the orthogonal complement of Q + EPS = DLAMCH( 'Precision' ) * - CALL DORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, LDQ2, - $ WORK, LWORK, CHILDINFO ) +* Project X onto the orthogonal complement of Q if X is nonzero * -* If the projection is nonzero, then return + SCL = REALZERO + SSQ = REALZERO + CALL DLASSQ( M1, X1, INCX1, SCL, SSQ ) + CALL DLASSQ( M2, X2, INCX2, SCL, SSQ ) + NORM = SCL * SQRT( SSQ ) * - IF( DNRM2(M1,X1,INCX1) .NE. ZERO - $ .OR. DNRM2(M2,X2,INCX2) .NE. ZERO ) THEN - RETURN + IF( NORM .GT. N * EPS ) THEN +* Scale vector to unit norm to avoid problems in the caller code. +* Computing the reciprocal is undesirable but +* * xLASCL cannot be used because of the vector increments and +* * the round-off error has a negligible impact on +* orthogonalization. + CALL DSCAL( M1, ONE / NORM, X1, INCX1 ) + CALL DSCAL( M2, ONE / NORM, X2, INCX2 ) + CALL DORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, + $ LDQ2, WORK, LWORK, CHILDINFO ) +* +* If the projection is nonzero, then return +* + IF( DNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. DNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN + RETURN + END IF END IF * * Project each standard basis vector e_1,...,e_M1 in turn, stopping @@ -238,8 +258,8 @@ END DO CALL DORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, CHILDINFO ) - IF( DNRM2(M1,X1,INCX1) .NE. ZERO - $ .OR. DNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + IF( DNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. DNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN RETURN END IF END DO @@ -257,8 +277,8 @@ X2(I) = ONE CALL DORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, CHILDINFO ) - IF( DNRM2(M1,X1,INCX1) .NE. ZERO - $ .OR. DNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + IF( DNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. DNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN RETURN END IF END DO diff --git a/lapack-netlib/SRC/dorbdb6.f b/lapack-netlib/SRC/dorbdb6.f index 142887684..3e356d001 100644 --- a/lapack-netlib/SRC/dorbdb6.f +++ b/lapack-netlib/SRC/dorbdb6.f @@ -41,9 +41,8 @@ *> with respect to the columns of *> Q = [ Q1 ] . *> [ Q2 ] -*> The Euclidean norm of X must be one and the columns of Q must be -*> orthonormal. The orthogonalized vector will be zero if and only if it -*> lies entirely in the range of Q. +*> The columns of Q must be orthonormal. The orthogonalized vector will +*> be zero if and only if it lies entirely in the range of Q. *> *> The projection is computed with at most two iterations of the *> classical Gram-Schmidt algorithm, see @@ -174,7 +173,7 @@ * * .. Parameters .. DOUBLE PRECISION ALPHA, REALONE, REALZERO - PARAMETER ( ALPHA = 0.1D0, REALONE = 1.0D0, + PARAMETER ( ALPHA = 0.83D0, REALONE = 1.0D0, $ REALZERO = 0.0D0 ) DOUBLE PRECISION NEGONE, ONE, ZERO PARAMETER ( NEGONE = -1.0D0, ONE = 1.0D0, ZERO = 0.0D0 ) @@ -222,14 +221,16 @@ * EPS = DLAMCH( 'Precision' ) * +* Compute the Euclidean norm of X +* + SCL = REALZERO + SSQ = REALZERO + CALL DLASSQ( M1, X1, INCX1, SCL, SSQ ) + CALL DLASSQ( M2, X2, INCX2, SCL, SSQ ) + NORM = SCL * SQRT( SSQ ) +* * First, project X onto the orthogonal complement of Q's column * space -* -* Christoph Conrads: In debugging mode the norm should be computed -* and an assertion added comparing the norm with one. Alas, Fortran -* never made it into 1989 when assert() was introduced into the C -* programming language. - NORM = REALONE * IF( M1 .EQ. 0 ) THEN DO I = 1, N diff --git a/lapack-netlib/SRC/slarfgp.f b/lapack-netlib/SRC/slarfgp.f index df42980c4..c28274c2c 100644 --- a/lapack-netlib/SRC/slarfgp.f +++ b/lapack-netlib/SRC/slarfgp.f @@ -97,7 +97,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERauxiliary +*> \ingroup larfgp * * ===================================================================== SUBROUTINE SLARFGP( N, ALPHA, X, INCX, TAU ) @@ -122,7 +122,7 @@ * .. * .. Local Scalars .. INTEGER J, KNT - REAL BETA, BIGNUM, SAVEALPHA, SMLNUM, XNORM + REAL BETA, BIGNUM, EPS, SAVEALPHA, SMLNUM, XNORM * .. * .. External Functions .. REAL SLAMCH, SLAPY2, SNRM2 @@ -141,9 +141,10 @@ RETURN END IF * + EPS = SLAMCH( 'Precision' ) XNORM = SNRM2( N-1, X, INCX ) * - IF( XNORM.EQ.ZERO ) THEN + IF( XNORM.LE.EPS*ABS(ALPHA) ) THEN * * H = [+/-1, 0; I], sign chosen so ALPHA >= 0. * diff --git a/lapack-netlib/SRC/sorbdb5.f b/lapack-netlib/SRC/sorbdb5.f index 8c67aedfb..8fb88876f 100644 --- a/lapack-netlib/SRC/sorbdb5.f +++ b/lapack-netlib/SRC/sorbdb5.f @@ -148,7 +148,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup unbdb5 * * ===================================================================== SUBROUTINE SORBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, @@ -169,18 +169,21 @@ * ===================================================================== * * .. Parameters .. + REAL REALZERO + PARAMETER ( REALZERO = 0.0E0 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E0, ZERO = 0.0E0 ) * .. * .. Local Scalars .. INTEGER CHILDINFO, I, J + REAL EPS, NORM, SCL, SSQ * .. * .. External Subroutines .. - EXTERNAL SORBDB6, XERBLA + EXTERNAL SLASSQ, SORBDB6, SSCAL, XERBLA * .. * .. External Functions .. - REAL SNRM2 - EXTERNAL SNRM2 + REAL SLAMCH, SNRM2 + EXTERNAL SLAMCH, SNRM2 * .. * .. Intrinsic Function .. INTRINSIC MAX @@ -213,16 +216,33 @@ RETURN END IF * -* Project X onto the orthogonal complement of Q + EPS = SLAMCH( 'Precision' ) * - CALL SORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, LDQ2, - $ WORK, LWORK, CHILDINFO ) +* Project X onto the orthogonal complement of Q if X is nonzero * -* If the projection is nonzero, then return + SCL = REALZERO + SSQ = REALZERO + CALL SLASSQ( M1, X1, INCX1, SCL, SSQ ) + CALL SLASSQ( M2, X2, INCX2, SCL, SSQ ) + NORM = SCL * SQRT( SSQ ) * - IF( SNRM2(M1,X1,INCX1) .NE. ZERO - $ .OR. SNRM2(M2,X2,INCX2) .NE. ZERO ) THEN - RETURN + IF( NORM .GT. N * EPS ) THEN +* Scale vector to unit norm to avoid problems in the caller code. +* Computing the reciprocal is undesirable but +* * xLASCL cannot be used because of the vector increments and +* * the round-off error has a negligible impact on +* orthogonalization. + CALL SSCAL( M1, ONE / NORM, X1, INCX1 ) + CALL SSCAL( M2, ONE / NORM, X2, INCX2 ) + CALL SORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, + $ LDQ2, WORK, LWORK, CHILDINFO ) +* +* If the projection is nonzero, then return +* + IF( SNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. SNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN + RETURN + END IF END IF * * Project each standard basis vector e_1,...,e_M1 in turn, stopping @@ -238,8 +258,8 @@ END DO CALL SORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, CHILDINFO ) - IF( SNRM2(M1,X1,INCX1) .NE. ZERO - $ .OR. SNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + IF( SNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. SNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN RETURN END IF END DO @@ -257,8 +277,8 @@ X2(I) = ONE CALL SORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, CHILDINFO ) - IF( SNRM2(M1,X1,INCX1) .NE. ZERO - $ .OR. SNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + IF( SNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. SNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN RETURN END IF END DO diff --git a/lapack-netlib/SRC/sorbdb6.f b/lapack-netlib/SRC/sorbdb6.f index d320c9e46..eac177722 100644 --- a/lapack-netlib/SRC/sorbdb6.f +++ b/lapack-netlib/SRC/sorbdb6.f @@ -41,9 +41,8 @@ *> with respect to the columns of *> Q = [ Q1 ] . *> [ Q2 ] -*> The Euclidean norm of X must be one and the columns of Q must be -*> orthonormal. The orthogonalized vector will be zero if and only if it -*> lies entirely in the range of Q. +*> The columns of Q must be orthonormal. The orthogonalized vector will +*> be zero if and only if it lies entirely in the range of Q. *> *> The projection is computed with at most two iterations of the *> classical Gram-Schmidt algorithm, see @@ -174,7 +173,7 @@ * * .. Parameters .. REAL ALPHA, REALONE, REALZERO - PARAMETER ( ALPHA = 0.1E0, REALONE = 1.0E0, + PARAMETER ( ALPHA = 0.83E0, REALONE = 1.0E0, $ REALZERO = 0.0E0 ) REAL NEGONE, ONE, ZERO PARAMETER ( NEGONE = -1.0E0, ONE = 1.0E0, ZERO = 0.0E0 ) @@ -222,14 +221,16 @@ * EPS = SLAMCH( 'Precision' ) * +* Compute the Euclidean norm of X +* + SCL = REALZERO + SSQ = REALZERO + CALL SLASSQ( M1, X1, INCX1, SCL, SSQ ) + CALL SLASSQ( M2, X2, INCX2, SCL, SSQ ) + NORM = SCL * SQRT( SSQ ) +* * First, project X onto the orthogonal complement of Q's column * space -* -* Christoph Conrads: In debugging mode the norm should be computed -* and an assertion added comparing the norm with one. Alas, Fortran -* never made it into 1989 when assert() was introduced into the C -* programming language. - NORM = REALONE * IF( M1 .EQ. 0 ) THEN DO I = 1, N diff --git a/lapack-netlib/SRC/zlarfgp.f b/lapack-netlib/SRC/zlarfgp.f index 77eba8e86..6c9efb04c 100644 --- a/lapack-netlib/SRC/zlarfgp.f +++ b/lapack-netlib/SRC/zlarfgp.f @@ -97,7 +97,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERauxiliary +*> \ingroup larfgp * * ===================================================================== SUBROUTINE ZLARFGP( N, ALPHA, X, INCX, TAU ) @@ -122,7 +122,7 @@ * .. * .. Local Scalars .. INTEGER J, KNT - DOUBLE PRECISION ALPHI, ALPHR, BETA, BIGNUM, SMLNUM, XNORM + DOUBLE PRECISION ALPHI, ALPHR, BETA, BIGNUM, EPS, SMLNUM, XNORM COMPLEX*16 SAVEALPHA * .. * .. External Functions .. @@ -143,11 +143,12 @@ RETURN END IF * + EPS = DLAMCH( 'Precision' ) XNORM = DZNRM2( N-1, X, INCX ) ALPHR = DBLE( ALPHA ) ALPHI = DIMAG( ALPHA ) * - IF( XNORM.EQ.ZERO ) THEN + IF( XNORM.LE.EPS*ABS(ALPHA) ) THEN * * H = [1-alpha/abs(alpha) 0; 0 I], sign chosen so ALPHA >= 0. * diff --git a/lapack-netlib/SRC/zunbdb5.f b/lapack-netlib/SRC/zunbdb5.f index 23174fe50..c451ae921 100644 --- a/lapack-netlib/SRC/zunbdb5.f +++ b/lapack-netlib/SRC/zunbdb5.f @@ -148,7 +148,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERcomputational +*> \ingroup unbdb5 * * ===================================================================== SUBROUTINE ZUNBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, @@ -169,18 +169,21 @@ * ===================================================================== * * .. Parameters .. + DOUBLE PRECISION REALZERO + PARAMETER ( REALZERO = 0.0D0 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = (1.0D0,0.0D0), ZERO = (0.0D0,0.0D0) ) * .. * .. Local Scalars .. INTEGER CHILDINFO, I, J + DOUBLE PRECISION EPS, NORM, SCL, SSQ * .. * .. External Subroutines .. - EXTERNAL ZUNBDB6, XERBLA + EXTERNAL ZLASSQ, ZUNBDB6, ZSCAL, XERBLA * .. * .. External Functions .. - DOUBLE PRECISION DZNRM2 - EXTERNAL DZNRM2 + DOUBLE PRECISION DLAMCH, DZNRM2 + EXTERNAL DLAMCH, DZNRM2 * .. * .. Intrinsic Function .. INTRINSIC MAX @@ -213,16 +216,33 @@ RETURN END IF * -* Project X onto the orthogonal complement of Q + EPS = DLAMCH( 'Precision' ) * - CALL ZUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, LDQ2, - $ WORK, LWORK, CHILDINFO ) +* Project X onto the orthogonal complement of Q if X is nonzero * -* If the projection is nonzero, then return + SCL = REALZERO + SSQ = REALZERO + CALL ZLASSQ( M1, X1, INCX1, SCL, SSQ ) + CALL ZLASSQ( M2, X2, INCX2, SCL, SSQ ) + NORM = SCL * SQRT( SSQ ) * - IF( DZNRM2(M1,X1,INCX1) .NE. ZERO - $ .OR. DZNRM2(M2,X2,INCX2) .NE. ZERO ) THEN - RETURN + IF( NORM .GT. N * EPS ) THEN +* Scale vector to unit norm to avoid problems in the caller code. +* Computing the reciprocal is undesirable but +* * xLASCL cannot be used because of the vector increments and +* * the round-off error has a negligible impact on +* orthogonalization. + CALL ZSCAL( M1, ONE / NORM, X1, INCX1 ) + CALL ZSCAL( M2, ONE / NORM, X2, INCX2 ) + CALL ZUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, + $ LDQ2, WORK, LWORK, CHILDINFO ) +* +* If the projection is nonzero, then return +* + IF( DZNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. DZNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN + RETURN + END IF END IF * * Project each standard basis vector e_1,...,e_M1 in turn, stopping @@ -238,8 +258,8 @@ END DO CALL ZUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, CHILDINFO ) - IF( DZNRM2(M1,X1,INCX1) .NE. ZERO - $ .OR. DZNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + IF( DZNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. DZNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN RETURN END IF END DO @@ -257,8 +277,8 @@ X2(I) = ONE CALL ZUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, CHILDINFO ) - IF( DZNRM2(M1,X1,INCX1) .NE. ZERO - $ .OR. DZNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + IF( DZNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. DZNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN RETURN END IF END DO diff --git a/lapack-netlib/SRC/zunbdb6.f b/lapack-netlib/SRC/zunbdb6.f index ac7fa4be3..ddc9dfc61 100644 --- a/lapack-netlib/SRC/zunbdb6.f +++ b/lapack-netlib/SRC/zunbdb6.f @@ -41,9 +41,8 @@ *> with respect to the columns of *> Q = [ Q1 ] . *> [ Q2 ] -*> The Euclidean norm of X must be one and the columns of Q must be -*> orthonormal. The orthogonalized vector will be zero if and only if it -*> lies entirely in the range of Q. +*> The columns of Q must be orthonormal. The orthogonalized vector will +*> be zero if and only if it lies entirely in the range of Q. *> *> The projection is computed with at most two iterations of the *> classical Gram-Schmidt algorithm, see @@ -174,7 +173,7 @@ * * .. Parameters .. DOUBLE PRECISION ALPHA, REALONE, REALZERO - PARAMETER ( ALPHA = 0.1D0, REALONE = 1.0D0, + PARAMETER ( ALPHA = 0.83D0, REALONE = 1.0D0, $ REALZERO = 0.0D0 ) COMPLEX*16 NEGONE, ONE, ZERO PARAMETER ( NEGONE = (-1.0D0,0.0D0), ONE = (1.0D0,0.0D0), @@ -223,14 +222,16 @@ * EPS = DLAMCH( 'Precision' ) * +* Compute the Euclidean norm of X +* + SCL = REALZERO + SSQ = REALZERO + CALL ZLASSQ( M1, X1, INCX1, SCL, SSQ ) + CALL ZLASSQ( M2, X2, INCX2, SCL, SSQ ) + NORM = SCL * SQRT( SSQ ) +* * First, project X onto the orthogonal complement of Q's column * space -* -* Christoph Conrads: In debugging mode the norm should be computed -* and an assertion added comparing the norm with one. Alas, Fortran -* never made it into 1989 when assert() was introduced into the C -* programming language. - NORM = REALONE * IF( M1 .EQ. 0 ) THEN DO I = 1, N From 9b5f8eb33a263afafb26746a9d188018bba3b3b2 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 12 Nov 2023 19:35:53 +0100 Subject: [PATCH 123/125] Fix empty function prototypes --- driver/others/blas_server_omp.c | 2 +- driver/others/dynamic_zarch.c | 2 +- driver/others/memory_qalloc.c | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/driver/others/blas_server_omp.c b/driver/others/blas_server_omp.c index f7008fb08..213531057 100644 --- a/driver/others/blas_server_omp.c +++ b/driver/others/blas_server_omp.c @@ -126,7 +126,7 @@ void openblas_set_num_threads(int num_threads) { int blas_thread_init(void){ #if defined(__FreeBSD__) && defined(__clang__) -extern int openblas_omp_num_threads_env(); +extern int openblas_omp_num_threads_env(void); if(blas_omp_number_max <= 0) blas_omp_number_max= openblas_omp_num_threads_env(); diff --git a/driver/others/dynamic_zarch.c b/driver/others/dynamic_zarch.c index 5b45aae2f..dd26c8e80 100644 --- a/driver/others/dynamic_zarch.c +++ b/driver/others/dynamic_zarch.c @@ -13,7 +13,7 @@ extern gotoblas_t gotoblas_Z14; #define NUM_CORETYPES 4 -extern int openblas_verbose(); +extern int openblas_verbose(void); extern void openblas_warning(int verbose, const char* msg); char* gotoblas_corename(void) { diff --git a/driver/others/memory_qalloc.c b/driver/others/memory_qalloc.c index 6174d9b75..a2593e01f 100644 --- a/driver/others/memory_qalloc.c +++ b/driver/others/memory_qalloc.c @@ -288,7 +288,7 @@ int goto_get_num_procs (void) { return blas_cpu_number; } -void openblas_fork_handler() +void openblas_fork_handler(void) { // This handler shuts down the OpenBLAS-managed PTHREAD pool when OpenBLAS is // built with "make USE_OPENMP=0". @@ -305,9 +305,9 @@ void openblas_fork_handler() #endif } -extern int openblas_num_threads_env(); -extern int openblas_goto_num_threads_env(); -extern int openblas_omp_num_threads_env(); +extern int openblas_num_threads_env(void); +extern int openblas_goto_num_threads_env(void); +extern int openblas_omp_num_threads_env(void); int blas_get_cpu_number(void){ #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) From c245c12dc232f8474e97b9ceda7a6e276f5c73b0 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 12 Nov 2023 22:17:39 +0100 Subject: [PATCH 124/125] Update Changelog for 0.3.25 (#4314) * Update Changelog.txt for 0.3.25 --- Changelog.txt | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/Changelog.txt b/Changelog.txt index 3937ef08c..e0fe0ca5a 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,4 +1,50 @@ OpenBLAS ChangeLog +==================================================================== +Version 0.3.25 + 12-Nov-2023 + +general: +- improved the error message shown on exceeding the maximum thread count +- improved the code to add supplementary thread buffers in case of overflow +- fixed a potential division by zero in ?ROTG +- improved the ?MATCOPY functions to accept zero-sized rows or columns +- corrected empty prototypes in function declarations +- cleaned up unused declarations in the f2c-converted versions of the LAPACK sources +- fixed compilation with the Cray CCE Compiler suite +- improved link line rewriting to avoid mixed libgomp/libomp builds with clang&gfortran +- worked around OPENMP builds with LLVM14's libomp hanging on FreeBSD +- improved the Makefiles to require less option duplication on "make install" +- imported the following changes from the upcoming release 3.12 of Reference-LAPACK + - deprecate utility functions ?GELQS and ?GEQRS (LAPACK PR 900) + - apply rounding up to workspace calculations done in floating point (LAPACK PR 904) + - avoid overflow in STGEX2/DTGEX2 (LAPACK PR 907) + - fix accumulation in ?LASSQ (LAPACK PR 909) + - fix handling of NaN values in ?GECON (LAPACK PR 926) + - avoid overflow in CBDSQR/ZBDSQR (LAPACK PR 927) + - fix poor vector orthogonalizations in ?ORBDB5/?UNBDB5 (LAPACK PR 928 & 930) + +x86-64: +- fixed compile-time autodetection of AMD Ryzen3 and Ryzen4 cpus +- fixed capability-based fallback selection for unknown cpus in DYNAMIC_ARCH +- added AVX512 optimizations for ?ASUM on Sapphire Rapids and Cooper Lake + +ARM64: +- fixed building on Apple with homebrew gcc +- fixed building with XCODE 15 +- fixed building on A64FX and Cortex A710/X1/X2 +- increased the default buffer size for recent ARM server cpus + +POWER: +- fixed building with the IBM xlf 16.1.1 compiler +- fixed building with IBM XL C +- added support for DYNAMIC_ARCH builds with clang +- fixed union declaration in the BFLOAT16 test case +- enable optimizations for the AIX assembler on POWER10 + +LOONGARCH64: +- added an optimized SGEMV kernel +- added an optimized DTRSM kernel + ==================================================================== Version 0.3.24 03-Sep-2023 From e1f529d0247a4bc1dd9d1f86f4be10c2cfbd2990 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 12 Nov 2023 22:37:11 +0100 Subject: [PATCH 125/125] Add OSX hw.cpufamily value for Apple M3 --- cpuid_arm64.c | 1 + 1 file changed, 1 insertion(+) diff --git a/cpuid_arm64.c b/cpuid_arm64.c index e586f9a3c..8c5d04c14 100644 --- a/cpuid_arm64.c +++ b/cpuid_arm64.c @@ -270,6 +270,7 @@ int detect(void) sysctlbyname("hw.cpufamily",&value64,&length64,NULL,0); if (value64 ==131287967|| value64 == 458787763 ) return CPU_VORTEX; //A12/M1 if (value64 == 3660830781) return CPU_VORTEX; //A15/M2 + if (value64 == 2271604202) return CPU_VORTEX; //A16/M3 #endif return CPU_ARMV8; #endif