Merge develop into release-0.3.0 for 0.3.25tags/v0.3.25
| @@ -148,6 +148,16 @@ FreeBSD_task: | |||
| - ls -l /usr/local/lib | |||
| - gmake CC=gcc INTERFACE64=1 | |||
| FreeBSD_task: | |||
| name: FreeBSD-clang-openmp | |||
| freebsd_instance: | |||
| image_family: freebsd-13-2 | |||
| install_script: | |||
| - pkg update -f && pkg upgrade -y && pkg install -y gmake gcc | |||
| - ln -s /usr/local/lib/gcc12/libgfortran.so.5.0.0 /usr/lib/libgfortran.so | |||
| compile_script: | |||
| - gmake CC=clang FC=gfortran USE_OPENMP=1 CPP_THREAD_SAFETY_TEST=1 | |||
| #task: | |||
| # name: Windows/LLVM16 --- too slow --- | |||
| # windows_container: | |||
| @@ -0,0 +1,16 @@ | |||
| # Self-Hosted Github Action Runners on AWS via Cirun.io | |||
| # Reference: https://docs.cirun.io/reference/yaml | |||
| runners: | |||
| - name: "aws-runner-graviton" | |||
| # Cloud Provider: AWS | |||
| cloud: "aws" | |||
| region: "us-east-1" | |||
| # Cheapest VM on AWS | |||
| instance_type: "c7g.large" | |||
| # Ubuntu-22.04, ami image | |||
| machine_image: "ami-0a0c8eebcdd6dcbd0" | |||
| preemptible: false | |||
| # Add this label in the "runs-on" param in .github/workflows/<workflow-name>.yml | |||
| # So that this runner is created for running the workflow | |||
| labels: | |||
| - "cirun-aws-runner-graviton" | |||
| @@ -0,0 +1,139 @@ | |||
| name: arm64 graviton cirun | |||
| on: | |||
| push: | |||
| branches: | |||
| - develop | |||
| - release-** | |||
| pull_request: | |||
| branches: | |||
| - develop | |||
| - release-** | |||
| concurrency: | |||
| group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |||
| cancel-in-progress: true | |||
| permissions: | |||
| contents: read # to fetch code (actions/checkout) | |||
| jobs: | |||
| build: | |||
| if: "github.repository == 'OpenMathLib/OpenBLAS'" | |||
| runs-on: "cirun-aws-runner-graviton--${{ github.run_id }}" | |||
| strategy: | |||
| fail-fast: false | |||
| matrix: | |||
| fortran: [gfortran] | |||
| build: [cmake, make] | |||
| steps: | |||
| - name: Checkout repository | |||
| uses: actions/checkout@v3 | |||
| - name: Print system information | |||
| run: | | |||
| if [ "$RUNNER_OS" == "Linux" ]; then | |||
| cat /proc/cpuinfo | |||
| else | |||
| echo "::error::$RUNNER_OS not supported" | |||
| exit 1 | |||
| fi | |||
| - name: Install Dependencies | |||
| run: | | |||
| if [ "$RUNNER_OS" == "Linux" ]; then | |||
| sudo apt update | |||
| sudo apt-get install -y gfortran cmake ccache libtinfo5 | |||
| else | |||
| echo "::error::$RUNNER_OS not supported" | |||
| exit 1 | |||
| fi | |||
| - name: Compilation cache | |||
| uses: actions/cache@v3 | |||
| with: | |||
| path: ~/.ccache | |||
| # We include the commit sha in the cache key, as new cache entries are | |||
| # only created if there is no existing entry for the key yet. | |||
| # GNU make and cmake call the compilers differently. It looks like | |||
| # that causes the cache to mismatch. Keep the ccache for both build | |||
| # tools separate to avoid polluting each other. | |||
| key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }} | |||
| # Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler. | |||
| restore-keys: | | |||
| ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }} | |||
| ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }} | |||
| ccache-${{ runner.os }}-${{ matrix.build }} | |||
| - name: Configure ccache | |||
| run: | | |||
| if [ "${{ matrix.build }}" = "make" ]; then | |||
| # Add ccache to path | |||
| if [ "$RUNNER_OS" = "Linux" ]; then | |||
| echo "/usr/lib/ccache" >> $GITHUB_PATH | |||
| else | |||
| echo "::error::$RUNNER_OS not supported" | |||
| exit 1 | |||
| fi | |||
| fi | |||
| # Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB). | |||
| test -d ~/.ccache || mkdir -p ~/.ccache | |||
| echo "max_size = 300M" > ~/.ccache/ccache.conf | |||
| echo "compression = true" >> ~/.ccache/ccache.conf | |||
| ccache -s | |||
| - name: Build OpenBLAS | |||
| run: | | |||
| case "${{ matrix.build }}" in | |||
| "make") | |||
| make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}" | |||
| ;; | |||
| "cmake") | |||
| mkdir build && cd build | |||
| cmake -DDYNAMIC_ARCH=1 \ | |||
| -DNOFORTRAN=0 \ | |||
| -DBUILD_WITHOUT_LAPACK=0 \ | |||
| -DCMAKE_VERBOSE_MAKEFILE=ON \ | |||
| -DCMAKE_BUILD_TYPE=Release \ | |||
| -DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \ | |||
| -DCMAKE_C_COMPILER_LAUNCHER=ccache \ | |||
| -DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \ | |||
| .. | |||
| cmake --build . | |||
| ;; | |||
| *) | |||
| echo "::error::Configuration not supported" | |||
| exit 1 | |||
| ;; | |||
| esac | |||
| - name: Show ccache status | |||
| continue-on-error: true | |||
| run: ccache -s | |||
| - name: Run tests | |||
| timeout-minutes: 60 | |||
| run: | | |||
| case "${{ matrix.build }}" in | |||
| "make") | |||
| MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0' | |||
| echo "::group::Tests in 'test' directory" | |||
| make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" | |||
| echo "::endgroup::" | |||
| echo "::group::Tests in 'ctest' directory" | |||
| make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" | |||
| echo "::endgroup::" | |||
| echo "::group::Tests in 'utest' directory" | |||
| make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" | |||
| echo "::endgroup::" | |||
| ;; | |||
| "cmake") | |||
| cd build && ctest | |||
| ;; | |||
| *) | |||
| echo "::error::Configuration not supported" | |||
| exit 1 | |||
| ;; | |||
| esac | |||
| @@ -2,11 +2,16 @@ name: c910v qemu test | |||
| on: [push, pull_request] | |||
| concurrency: | |||
| group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |||
| cancel-in-progress: true | |||
| permissions: | |||
| contents: read # to fetch code (actions/checkout) | |||
| jobs: | |||
| TEST: | |||
| if: "github.repository == 'OpenMathLib/OpenBLAS'" | |||
| runs-on: ubuntu-latest | |||
| env: | |||
| xuetie_toolchain: https://occ-oss-prod.oss-cn-hangzhou.aliyuncs.com/resource//1663142514282 | |||
| @@ -2,11 +2,16 @@ name: continuous build | |||
| on: [push, pull_request] | |||
| concurrency: | |||
| group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |||
| cancel-in-progress: true | |||
| permissions: | |||
| contents: read # to fetch code (actions/checkout) | |||
| jobs: | |||
| build: | |||
| if: "github.repository == 'OpenMathLib/OpenBLAS'" | |||
| runs-on: ${{ matrix.os }} | |||
| strategy: | |||
| @@ -146,18 +151,19 @@ jobs: | |||
| msys2: | |||
| if: "github.repository == 'OpenMathLib/OpenBLAS'" | |||
| runs-on: windows-latest | |||
| strategy: | |||
| fail-fast: false | |||
| matrix: | |||
| msystem: [MINGW64, MINGW32, CLANG64, CLANG32] | |||
| msystem: [UCRT64, MINGW32, CLANG64, CLANG32] | |||
| idx: [int32, int64] | |||
| build-type: [Release] | |||
| include: | |||
| - msystem: MINGW64 | |||
| - msystem: UCRT64 | |||
| idx: int32 | |||
| target-prefix: mingw-w64-x86_64 | |||
| target-prefix: mingw-w64-ucrt-x86_64 | |||
| fc-pkg: fc | |||
| - msystem: MINGW32 | |||
| idx: int32 | |||
| @@ -175,10 +181,10 @@ jobs: | |||
| target-prefix: mingw-w64-clang-i686 | |||
| fc-pkg: cc | |||
| c-lapack-flags: -DC_LAPACK=ON | |||
| - msystem: MINGW64 | |||
| - msystem: UCRT64 | |||
| idx: int64 | |||
| idx64-flags: -DBINARY=64 -DINTERFACE64=1 | |||
| target-prefix: mingw-w64-x86_64 | |||
| target-prefix: mingw-w64-ucrt-x86_64 | |||
| fc-pkg: fc | |||
| - msystem: CLANG64 | |||
| idx: int64 | |||
| @@ -188,9 +194,9 @@ jobs: | |||
| # Compiling with Flang 16 seems to cause test errors on machines | |||
| # with AVX512 instructions. Revisit after MSYS2 distributes Flang 17. | |||
| no-avx512-flags: -DNO_AVX512=1 | |||
| - msystem: MINGW64 | |||
| - msystem: UCRT64 | |||
| idx: int32 | |||
| target-prefix: mingw-w64-x86_64 | |||
| target-prefix: mingw-w64-ucrt-x86_64 | |||
| fc-pkg: fc | |||
| build-type: None | |||
| exclude: | |||
| @@ -312,6 +318,7 @@ jobs: | |||
| cross_build: | |||
| if: "github.repository == 'OpenMathLib/OpenBLAS'" | |||
| runs-on: ubuntu-22.04 | |||
| strategy: | |||
| @@ -2,8 +2,13 @@ name: loongarch64 qemu test | |||
| on: [push, pull_request] | |||
| concurrency: | |||
| group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |||
| cancel-in-progress: true | |||
| jobs: | |||
| TEST: | |||
| if: "github.repository == 'OpenMathLib/OpenBLAS'" | |||
| runs-on: ubuntu-latest | |||
| strategy: | |||
| fail-fast: false | |||
| @@ -18,6 +23,9 @@ jobs: | |||
| - target: LOONGSON2K1000 | |||
| triple: loongarch64-unknown-linux-gnu | |||
| opts: NO_SHARED=1 TARGET=LOONGSON2K1000 | |||
| - target: DYNAMIC_ARCH | |||
| triple: loongarch64-unknown-linux-gnu | |||
| opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC | |||
| steps: | |||
| - name: Checkout repository | |||
| @@ -2,11 +2,16 @@ name: mips64 qemu test | |||
| on: [push, pull_request] | |||
| concurrency: | |||
| group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |||
| cancel-in-progress: true | |||
| permissions: | |||
| contents: read # to fetch code (actions/checkout) | |||
| jobs: | |||
| TEST: | |||
| if: "github.repository == 'OpenMathLib/OpenBLAS'" | |||
| runs-on: ubuntu-latest | |||
| strategy: | |||
| fail-fast: false | |||
| @@ -18,11 +18,16 @@ on: | |||
| name: Nightly-Homebrew-Build | |||
| concurrency: | |||
| group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |||
| cancel-in-progress: true | |||
| permissions: | |||
| contents: read # to fetch code (actions/checkout) | |||
| jobs: | |||
| build-OpenBLAS-with-Homebrew: | |||
| if: "github.repository == 'OpenMathLib/OpenBLAS'" | |||
| runs-on: macos-latest | |||
| env: | |||
| DEVELOPER_DIR: /Applications/Xcode_11.4.1.app/Contents/Developer | |||
| @@ -1,4 +1,50 @@ | |||
| OpenBLAS ChangeLog | |||
| ==================================================================== | |||
| Version 0.3.25 | |||
| 12-Nov-2023 | |||
| general: | |||
| - improved the error message shown on exceeding the maximum thread count | |||
| - improved the code to add supplementary thread buffers in case of overflow | |||
| - fixed a potential division by zero in ?ROTG | |||
| - improved the ?MATCOPY functions to accept zero-sized rows or columns | |||
| - corrected empty prototypes in function declarations | |||
| - cleaned up unused declarations in the f2c-converted versions of the LAPACK sources | |||
| - fixed compilation with the Cray CCE Compiler suite | |||
| - improved link line rewriting to avoid mixed libgomp/libomp builds with clang&gfortran | |||
| - worked around OPENMP builds with LLVM14's libomp hanging on FreeBSD | |||
| - improved the Makefiles to require less option duplication on "make install" | |||
| - imported the following changes from the upcoming release 3.12 of Reference-LAPACK | |||
| - deprecate utility functions ?GELQS and ?GEQRS (LAPACK PR 900) | |||
| - apply rounding up to workspace calculations done in floating point (LAPACK PR 904) | |||
| - avoid overflow in STGEX2/DTGEX2 (LAPACK PR 907) | |||
| - fix accumulation in ?LASSQ (LAPACK PR 909) | |||
| - fix handling of NaN values in ?GECON (LAPACK PR 926) | |||
| - avoid overflow in CBDSQR/ZBDSQR (LAPACK PR 927) | |||
| - fix poor vector orthogonalizations in ?ORBDB5/?UNBDB5 (LAPACK PR 928 & 930) | |||
| x86-64: | |||
| - fixed compile-time autodetection of AMD Ryzen3 and Ryzen4 cpus | |||
| - fixed capability-based fallback selection for unknown cpus in DYNAMIC_ARCH | |||
| - added AVX512 optimizations for ?ASUM on Sapphire Rapids and Cooper Lake | |||
| ARM64: | |||
| - fixed building on Apple with homebrew gcc | |||
| - fixed building with XCODE 15 | |||
| - fixed building on A64FX and Cortex A710/X1/X2 | |||
| - increased the default buffer size for recent ARM server cpus | |||
| POWER: | |||
| - fixed building with the IBM xlf 16.1.1 compiler | |||
| - fixed building with IBM XL C | |||
| - added support for DYNAMIC_ARCH builds with clang | |||
| - fixed union declaration in the BFLOAT16 test case | |||
| - enable optimizations for the AIX assembler on POWER10 | |||
| LOONGARCH64: | |||
| - added an optimized SGEMV kernel | |||
| - added an optimized DTRSM kernel | |||
| ==================================================================== | |||
| Version 0.3.24 | |||
| 03-Sep-2023 | |||
| @@ -35,7 +35,11 @@ export NO_LAPACK | |||
| export C_LAPACK | |||
| endif | |||
| ifeq ($(F_COMPILER),CRAY) | |||
| LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -Og -Os,$(LAPACK_FFLAGS)) | |||
| else | |||
| LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -O -Og -Os,$(LAPACK_FFLAGS)) | |||
| endif | |||
| SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench cpp_thread_test | |||
| @@ -206,9 +210,25 @@ ifeq ($(DYNAMIC_OLDER), 1) | |||
| @echo DYNAMIC_OLDER=1 >> Makefile.conf_last | |||
| endif | |||
| endif | |||
| @echo TARGET=$(CORE) >> Makefile.conf_last | |||
| ifdef USE_THREAD | |||
| @echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last | |||
| endif | |||
| ifdef SMP | |||
| ifdef NUM_THREADS | |||
| @echo NUM_THREADS=$(NUM_THREADS) >> Makefile.conf_last | |||
| else | |||
| @echo NUM_THREADS=$(NUM_CORES) >> Makefile.conf_last | |||
| endif | |||
| endif | |||
| ifeq ($(USE_OPENMP),1) | |||
| @echo USE_OPENMP=1 >> Makefile.conf_last | |||
| endif | |||
| ifeq ($(INTERFACE64),1) | |||
| @echo INTERFACE64=1 >> Makefile.conf_last | |||
| endif | |||
| @echo THELIBNAME=$(LIBNAME) >> Makefile.conf_last | |||
| @echo THELIBSONAME=$(LIBSONAME) >> Makefile.conf_last | |||
| @-ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX) | |||
| @touch lib.grd | |||
| @@ -3,6 +3,14 @@ export GOTOBLAS_MAKEFILE = 1 | |||
| -include $(TOPDIR)/Makefile.conf_last | |||
| include ./Makefile.system | |||
| ifdef THELIBNAME | |||
| LIBNAME=$(THELIBNAME) | |||
| LIBSONAME=$(THELIBSONAME) | |||
| endif | |||
| ifeq ($(INTERFACE64),1) | |||
| USE_64BITINT=1 | |||
| endif | |||
| PREFIX ?= /opt/OpenBLAS | |||
| OPENBLAS_INCLUDE_DIR := $(PREFIX)/include | |||
| @@ -13,9 +13,9 @@ ifeq ($(CORE), POWER10) | |||
| ifneq ($(C_COMPILER), PGI) | |||
| CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math | |||
| ifeq ($(F_COMPILER), IBM) | |||
| FCOMMON_OPT += -O2 -qrecur -qnosave | |||
| FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr10 -qtune=pwr10 -qfloat=nomaf -qzerosize | |||
| else | |||
| FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math | |||
| FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math | |||
| endif | |||
| endif | |||
| endif | |||
| @@ -38,19 +38,18 @@ CCOMMON_OPT += -fast -Mvect=simd -Mcache_align | |||
| endif | |||
| ifneq ($(F_COMPILER), PGI) | |||
| ifeq ($(F_COMPILER), IBM) | |||
| FCOMMON_OPT += -O2 -qrecur -qnosave | |||
| FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr9 -qtune=pwr9 -qfloat=nomaf -qzerosize | |||
| else | |||
| FCOMMON_OPT += -O2 -frecursive -fno-fast-math | |||
| FCOMMON_OPT += -O2 -frecursive -fno-fast-math -mcpu=power9 -mtune=power9 | |||
| endif | |||
| ifeq ($(C_COMPILER), GCC) | |||
| ifeq ($(F_COMPILER), GFORTRAN) | |||
| ifneq ($(GCCVERSIONGT4), 1) | |||
| $(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended) | |||
| FCOMMON_OPT += -mcpu=power8 -mtune=power8 | |||
| else | |||
| FCOMMON_OPT += -mcpu=power9 -mtune=power9 | |||
| endif | |||
| else | |||
| FCOMMON_OPT += -mcpu=power9 -mtune=power9 | |||
| endif | |||
| else | |||
| FCOMMON_OPT += -O2 -Mrecursive | |||
| @@ -66,12 +65,16 @@ endif | |||
| ifneq ($(F_COMPILER), PGI) | |||
| ifeq ($(OSNAME), AIX) | |||
| ifeq ($(F_COMPILER), IBM) | |||
| FCOMMON_OPT += -O2 -qrecur -qnosave | |||
| FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr8 -qtune=pwr8 -qfloat=nomaf -qzerosize | |||
| else | |||
| FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math | |||
| FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math | |||
| endif | |||
| else | |||
| FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math | |||
| ifeq ($(F_COMPILER), IBM) | |||
| FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr8 -qtune=pwr8 -qfloat=nomaf -qzerosize | |||
| else | |||
| FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math | |||
| endif | |||
| endif | |||
| else | |||
| FCOMMON_OPT += -O2 -Mrecursive | |||
| @@ -84,13 +87,20 @@ CCOMMON_OPT += -DUSE_OPENMP -fopenmp | |||
| else | |||
| CCOMMON_OPT += -DUSE_OPENMP -mp | |||
| endif | |||
| ifeq ($(F_COMPILER), IBM) | |||
| FCOMMON_OPT += -DUSE_OPENMP | |||
| else | |||
| ifneq ($(F_COMPILER), PGI) | |||
| FCOMMON_OPT += -DUSE_OPENMP -fopenmp | |||
| else | |||
| FCOMMON_OPT += -DUSE_OPENMP -mp | |||
| endif | |||
| endif | |||
| endif | |||
| ifeq ($(C_COMPILER), CLANG) | |||
| CCOMMON_OPT += -fno-integrated-as | |||
| endif | |||
| # workaround for C->FORTRAN ABI violation in LAPACKE | |||
| ifeq ($(F_COMPILER), GFORTRAN) | |||
| FCOMMON_OPT += -fno-optimize-sibling-calls | |||
| @@ -125,8 +135,19 @@ endif | |||
| ifdef BINARY64 | |||
| ifeq ($(C_COMPILER)$(F_COMPILER)$(OSNAME), GCCIBMAIX) | |||
| $(error Using GCC and XLF on AIX is not a supported combination.) | |||
| endif | |||
| ifeq ($(C_COMPILER)$(F_COMPILER)$(OSNAME), CLANGGFORTRANAIX) | |||
| $(error Using Clang and gFortran on AIX is not a supported combination.) | |||
| endif | |||
| ifeq ($(OSNAME), AIX) | |||
| ifeq ($(C_COMPILER), GCC) | |||
| CCOMMON_OPT += -mpowerpc64 -maix64 | |||
| else | |||
| CCOMMON_OPT += -m64 | |||
| endif | |||
| ifeq ($(COMPILER_F77), g77) | |||
| FCOMMON_OPT += -mpowerpc64 -maix64 | |||
| endif | |||
| @@ -277,10 +277,6 @@ endif | |||
| ifndef GOTOBLAS_MAKEFILE | |||
| export GOTOBLAS_MAKEFILE = 1 | |||
| # Determine if the assembler is GNU Assembler | |||
| HAVE_GAS := $(shell $(AS) -v < /dev/null 2>&1 | grep GNU 2>&1 >/dev/null ; echo $$?) | |||
| GETARCH_FLAGS += -DHAVE_GAS=$(HAVE_GAS) | |||
| # Generating Makefile.conf and config.h | |||
| DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" HOST_CFLAGS="$(GETARCH_FLAGS)" CFLAGS="$(CFLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) DYNAMIC_ARCH=$(DYNAMIC_ARCH) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) TARGET=$(TARGET) all) | |||
| @@ -397,11 +393,21 @@ ifeq ($(OSNAME), Darwin) | |||
| ifndef MACOSX_DEPLOYMENT_TARGET | |||
| ifeq ($(ARCH), arm64) | |||
| export MACOSX_DEPLOYMENT_TARGET=11.0 | |||
| ifeq ($(C_COMPILER), GCC) | |||
| export NO_SVE = 1 | |||
| endif | |||
| else | |||
| export MACOSX_DEPLOYMENT_TARGET=10.8 | |||
| endif | |||
| endif | |||
| MD5SUM = md5 -r | |||
| XCVER = $(shell pkgutil --pkg-info=com.apple.pkg.Xcode |awk '/version:/ {print $2}'|cut -d: -f2|cut -f1 -d.) | |||
| ifeq (x$(XCVER)x,xx) | |||
| XCVER = $(shell pkgutil --pkg-info=com.apple.pkg.CLTools_Executables |awk '/version:/ {print $2}'|cut -d: -f2|cut -f1 -d.) | |||
| endif | |||
| ifeq (x$(XCVER), x 15) | |||
| CCOMMON_OPT += -Wl,-ld_classic | |||
| endif | |||
| endif | |||
| ifneq (,$(findstring $(OSNAME), FreeBSD OpenBSD DragonFly)) | |||
| @@ -602,6 +608,9 @@ endif | |||
| ifeq ($(C_COMPILER), CLANG) | |||
| CCOMMON_OPT += -fopenmp | |||
| ifeq ($(F_COMPILER), GFORTRAN) | |||
| FEXTRALIB := $(subst -lgomp,-lomp,$(FEXTRALIB)) | |||
| endif | |||
| endif | |||
| ifeq ($(C_COMPILER), INTEL) | |||
| @@ -750,7 +759,11 @@ DYNAMIC_CORE += POWER9 | |||
| else | |||
| $(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.) | |||
| endif | |||
| ifeq ($(OSNAME), AIX) | |||
| LDVERSIONGTEQ35 := 1 | |||
| else | |||
| LDVERSIONGTEQ35 := $(shell expr `$(CC) -Wl,--version 2> /dev/null | head -1 | cut -f2 -d "." | cut -f1 -d "-"` \>= 35) | |||
| endif | |||
| ifeq ($(GCCVERSIONGTEQ11)$(LDVERSIONGTEQ35), 11) | |||
| DYNAMIC_CORE += POWER10 | |||
| CCOMMON_OPT += -DHAVE_P10_SUPPORT | |||
| @@ -1164,6 +1177,10 @@ endif | |||
| ifeq ($(F_COMPILER), IBM) | |||
| CCOMMON_OPT += -DF_INTERFACE_IBM | |||
| FEXTRALIB += -lxlf90 | |||
| ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC CLANG)) | |||
| FCOMMON_OPT += -qextname | |||
| endif | |||
| # FCOMMON_OPT += -qarch=440 | |||
| ifdef BINARY64 | |||
| FCOMMON_OPT += -q64 | |||
| @@ -1360,6 +1377,8 @@ ifeq ($(F_COMPILER), SUN) | |||
| FCOMMON_OPT += -pic | |||
| else ifeq ($(F_COMPILER), NAG) | |||
| FCOMMON_OPT += -PIC | |||
| else ifeq ($(F_COMPILER), IBM) | |||
| FCOMMON_OPT += -qpic=large | |||
| else | |||
| FCOMMON_OPT += -fPIC | |||
| endif | |||
| @@ -1612,9 +1631,11 @@ override FPFLAGS += $(FCOMMON_OPT) $(COMMON_PROF) | |||
| ifeq ($(NEED_PIC), 1) | |||
| ifeq (,$(findstring PIC,$(FFLAGS))) | |||
| ifneq ($(F_COMPILER),IBM) | |||
| override FFLAGS += -fPIC | |||
| endif | |||
| endif | |||
| endif | |||
| #For LAPACK Fortran codes. | |||
| #Disable -fopenmp for LAPACK Fortran codes on Windows. | |||
| @@ -1628,11 +1649,11 @@ endif | |||
| ifeq ($(F_COMPILER),NAG) | |||
| LAPACK_FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS)) | |||
| FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS)) | |||
| override FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS)) | |||
| endif | |||
| ifeq ($(F_COMPILER),CRAY) | |||
| LAPACK_FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS)) | |||
| FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS)) | |||
| override FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS)) | |||
| endif | |||
| LAPACK_CFLAGS = $(CFLAGS) | |||
| @@ -54,10 +54,15 @@ Building OpenBLAS requires the following to be installed: | |||
| Simply invoking `make` (or `gmake` on BSD) will detect the CPU automatically. | |||
| To set a specific target CPU, use `make TARGET=xxx`, e.g. `make TARGET=NEHALEM`. | |||
| The full target list is in the file `TargetList.txt`. For building with `cmake`, the | |||
| usual conventions apply, i.e. create a build directory either underneath the toplevel | |||
| OpenBLAS source directory or separate from it, and invoke `cmake` there with the path | |||
| to the source tree and any build options you plan to set. | |||
| The full target list is in the file `TargetList.txt`, other build optionss are documented in Makefile.rule and | |||
| can either be set there (typically by removing the comment character from the respective line), or used on the | |||
| `make` command line. | |||
| Note that when you run `make install` after building, you need to repeat all command line options you provided to `make` | |||
| in the build step, as some settings like the supported maximum number of threads are automatically derived from the | |||
| build host by default, which might not be what you want. | |||
| For building with `cmake`, the usual conventions apply, i.e. create a build directory either underneath the toplevel | |||
| OpenBLAS source directory or separate from it, and invoke `cmake` there with the path to the source tree and any | |||
| build options you plan to set. | |||
| ### Cross compile | |||
| @@ -117,7 +122,7 @@ Use `PREFIX=` when invoking `make`, for example | |||
| ```sh | |||
| make install PREFIX=your_installation_directory | |||
| ``` | |||
| (along with all options you added on the `make` command line in the preceding build step) | |||
| The default installation directory is `/opt/OpenBLAS`. | |||
| ## Supported CPUs and Operating Systems | |||
| @@ -137,7 +142,7 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th | |||
| - **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thanks to Werner Saar) | |||
| - **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations. | |||
| - **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations. | |||
| - **AMD ZEN**: Uses Haswell codes with some optimizations. | |||
| - **AMD ZEN**: Uses Haswell codes with some optimizations for Zen 2/3 (use SkylakeX for Zen4) | |||
| #### MIPS32 | |||
| @@ -169,13 +174,16 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th | |||
| - **TSV110**: Optimized some Level-3 helper functions | |||
| - **EMAG 8180**: preliminary support based on A57 | |||
| - **Neoverse N1**: (AWS Graviton2) preliminary support | |||
| - **Apple Vortex**: preliminary support based on ARMV8 | |||
| - **Neoverse V1**: (AWS Graviton3) optimized Level-3 BLAS | |||
| - **Apple Vortex**: preliminary support based on ThunderX2/3 | |||
| - **A64FX**: preliminary support, optimized Level-3 BLAS | |||
| - **ARMV8SVE**: any ARMV8 cpu with SVE extensions | |||
| #### PPC/PPC64 | |||
| - **POWER8**: Optimized BLAS, only for PPC64LE (Little Endian), only with `USE_OPENMP=1` | |||
| - **POWER9**: Optimized Level-3 BLAS (real) and some Level-1,2. PPC64LE with OpenMP only. | |||
| - **POWER10**: | |||
| - **POWER10**: Optimized Level-3 BLAS including SBGEMM and some Level-1,2. | |||
| #### IBM zEnterprise System | |||
| @@ -167,11 +167,10 @@ jobs: | |||
| - job: OSX_OpenMP_Clang | |||
| pool: | |||
| vmImage: 'macOS-11' | |||
| vmImage: 'macOS-latest' | |||
| variables: | |||
| LD_LIBRARY_PATH: /usr/local/opt/llvm/lib | |||
| LIBRARY_PATH: /usr/local/opt/llvm/lib | |||
| MACOSX_DEPLOYMENT_TARGET: 11.0 | |||
| steps: | |||
| - script: | | |||
| brew update | |||
| @@ -180,7 +179,7 @@ jobs: | |||
| - job: OSX_OpenMP_Clang_cmake | |||
| pool: | |||
| vmImage: 'macOS-11' | |||
| vmImage: 'macOS-latest' | |||
| variables: | |||
| LD_LIBRARY_PATH: /usr/local/opt/llvm/lib | |||
| LIBRARY_PATH: /usr/local/opt/llvm/lib | |||
| @@ -210,7 +209,7 @@ jobs: | |||
| - job: OSX_Ifort_Clang | |||
| pool: | |||
| vmImage: 'macOS-11' | |||
| vmImage: 'macOS-latest' | |||
| variables: | |||
| LD_LIBRARY_PATH: /usr/local/opt/llvm/lib | |||
| MACOS_HPCKIT_URL: https://registrationcenter-download.intel.com/akdlm/irc_nas/17643/m_HPCKit_p_2021.2.0.2903_offline.dmg | |||
| @@ -96,11 +96,19 @@ esac | |||
| defined=0 | |||
| if [ "$os" = "AIX" ]; then | |||
| case "$BINARY" in | |||
| 32) compiler_name="$compiler_name -maix32" ;; | |||
| 64) compiler_name="$compiler_name -maix64" ;; | |||
| esac | |||
| defined=1 | |||
| if [ "$compiler" = "GCC" ]; then | |||
| case "$BINARY" in | |||
| 32) compiler_name="$compiler_name -maix32" ;; | |||
| 64) compiler_name="$compiler_name -maix64" ;; | |||
| esac | |||
| defined=1 | |||
| else | |||
| case "$BINARY" in | |||
| 32) compiler_name="$compiler_name -m32" ;; | |||
| 64) compiler_name="$compiler_name -m64" ;; | |||
| esac | |||
| defined=1 | |||
| fi | |||
| fi | |||
| case "$architecture" in | |||
| @@ -438,15 +438,19 @@ endif() | |||
| if(BUILD_LAPACK_DEPRECATED) | |||
| list(APPEND SLASRC DEPRECATED/sgegs.f DEPRECATED/sgegv.f | |||
| DEPRECATED/sgelqs.f DEPRECATED/sgeqrs.f | |||
| DEPRECATED/sgeqpf.f DEPRECATED/sgelsx.f DEPRECATED/sggsvd.f | |||
| DEPRECATED/sggsvp.f DEPRECATED/slahrd.f DEPRECATED/slatzm.f DEPRECATED/stzrqf.f) | |||
| list(APPEND DLASRC DEPRECATED/dgegs.f DEPRECATED/dgegv.f | |||
| DEPRECATED/dgelqs.f DEPRECATED/dgeqrs.f | |||
| DEPRECATED/dgeqpf.f DEPRECATED/dgelsx.f DEPRECATED/dggsvd.f | |||
| DEPRECATED/dggsvp.f DEPRECATED/dlahrd.f DEPRECATED/dlatzm.f DEPRECATED/dtzrqf.f) | |||
| list(APPEND CLASRC DEPRECATED/cgegs.f DEPRECATED/cgegv.f | |||
| DEPRECATED/cgelqs.f DEPRECATED/cgeqrs.f | |||
| DEPRECATED/cgeqpf.f DEPRECATED/cgelsx.f DEPRECATED/cggsvd.f | |||
| DEPRECATED/cggsvp.f DEPRECATED/clahrd.f DEPRECATED/clatzm.f DEPRECATED/ctzrqf.f) | |||
| list(APPEND ZLASRC DEPRECATED/zgegs.f DEPRECATED/zgegv.f | |||
| DEPRECATED/zgelqs.f DEPRECATED/zgeqrs.f | |||
| DEPRECATED/zgeqpf.f DEPRECATED/zgelsx.f DEPRECATED/zggsvd.f | |||
| DEPRECATED/zggsvp.f DEPRECATED/zlahrd.f DEPRECATED/zlatzm.f DEPRECATED/ztzrqf.f) | |||
| message(STATUS "Building deprecated routines") | |||
| @@ -935,15 +939,19 @@ endif() | |||
| if(BUILD_LAPACK_DEPRECATED) | |||
| list(APPEND SLASRC DEPRECATED/sgegs.c DEPRECATED/sgegv.c | |||
| DEPRECATED/sgelqs.c DEPRECATED/sgeqrs.c | |||
| DEPRECATED/sgeqpf.c DEPRECATED/sgelsx.c DEPRECATED/sggsvd.c | |||
| DEPRECATED/sggsvp.c DEPRECATED/slahrd.c DEPRECATED/slatzm.c DEPRECATED/stzrqf.c) | |||
| list(APPEND DLASRC DEPRECATED/dgegs.c DEPRECATED/dgegv.c | |||
| DEPRECATED/dgelqs.c DEPRECATED/dgeqrs.c | |||
| DEPRECATED/dgeqpf.c DEPRECATED/dgelsx.c DEPRECATED/dggsvd.c | |||
| DEPRECATED/dggsvp.c DEPRECATED/dlahrd.c DEPRECATED/dlatzm.c DEPRECATED/dtzrqf.c) | |||
| list(APPEND CLASRC DEPRECATED/cgegs.c DEPRECATED/cgegv.c | |||
| DEPRECATED/cgelqs.c DEPRECATED/cgeqrs.c | |||
| DEPRECATED/cgeqpf.c DEPRECATED/cgelsx.c DEPRECATED/cggsvd.c | |||
| DEPRECATED/cggsvp.c DEPRECATED/clahrd.c DEPRECATED/clatzm.c DEPRECATED/ctzrqf.c) | |||
| list(APPEND ZLASRC DEPRECATED/zgegs.c DEPRECATED/zgegv.c | |||
| DEPRECATED/zgelqs.c DEPRECATED/zgeqrs.c | |||
| DEPRECATED/zgeqpf.c DEPRECATED/zgelsx.c DEPRECATED/zggsvd.c | |||
| DEPRECATED/zggsvp.c DEPRECATED/zlahrd.c DEPRECATED/zlatzm.c DEPRECATED/ztzrqf.c) | |||
| message(STATUS "Building deprecated routines") | |||
| @@ -162,7 +162,11 @@ REALNAME: | |||
| #define HUGE_PAGESIZE ( 4 << 20) | |||
| #ifndef BUFFERSIZE | |||
| #if defined(NEOVERSEN1) || defined(NEOVERSEN2) || defined(NEOVERSEV1) || defined(A64FX) || defined(ARMV8SVE) | |||
| #define BUFFER_SIZE (32 << 22) | |||
| #else | |||
| #define BUFFER_SIZE (32 << 20) | |||
| #endif | |||
| #else | |||
| #define BUFFER_SIZE (32 << BUFFERSIZE) | |||
| #endif | |||
| @@ -192,27 +192,27 @@ int exec_blas(BLASLONG num_cpu, blas_param_t *param, void *buffer); | |||
| int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, | |||
| void *a, BLASLONG lda, | |||
| void *b, BLASLONG ldb, | |||
| void *c, BLASLONG ldc, int (*function)(), int threads); | |||
| void *c, BLASLONG ldc, int (*function)(void), int threads); | |||
| int gemm_thread_m (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG); | |||
| int gemm_thread_m (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG ), void *, void *, BLASLONG); | |||
| int gemm_thread_n (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG); | |||
| int gemm_thread_n (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT*, FLOAT*, BLASLONG), void *, void *, BLASLONG); | |||
| int gemm_thread_mn(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG); | |||
| int gemm_thread_mn(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG), void *, void *, BLASLONG); | |||
| int gemm_thread_variable(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG, BLASLONG); | |||
| int gemm_thread_variable(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG), void *, void *, BLASLONG, BLASLONG); | |||
| int trsm_thread(int mode, BLASLONG m, BLASLONG n, | |||
| double alpha_r, double alpha_i, | |||
| void *a, BLASLONG lda, | |||
| void *c, BLASLONG ldc, int (*function)(), void *buffer); | |||
| void *c, BLASLONG ldc, int (*function)(void), void *buffer); | |||
| int syrk_thread(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG); | |||
| int syrk_thread(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*, FLOAT *, FLOAT *, BLASLONG), void*, void*, BLASLONG); | |||
| int getrf_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, | |||
| void *offsetA, BLASLONG lda, | |||
| void *offsetB, BLASLONG jb, | |||
| void *ipiv, BLASLONG offset, int (*function)(), void *buffer); | |||
| void *ipiv, BLASLONG offset, int (*function)(void), void *buffer); | |||
| #endif /* ENDIF ASSEMBLER */ | |||
| @@ -270,6 +270,7 @@ int detect(void) | |||
| sysctlbyname("hw.cpufamily",&value64,&length64,NULL,0); | |||
| if (value64 ==131287967|| value64 == 458787763 ) return CPU_VORTEX; //A12/M1 | |||
| if (value64 == 3660830781) return CPU_VORTEX; //A15/M2 | |||
| if (value64 == 2271604202) return CPU_VORTEX; //A16/M3 | |||
| #endif | |||
| return CPU_ARMV8; | |||
| #endif | |||
| @@ -194,7 +194,7 @@ static C_INLINE void xgetbv(int op, int * eax, int * edx){ | |||
| } | |||
| #endif | |||
| int support_avx(){ | |||
| int support_avx(void){ | |||
| #ifndef NO_AVX | |||
| int eax, ebx, ecx, edx; | |||
| int ret=0; | |||
| @@ -212,7 +212,7 @@ int support_avx(){ | |||
| #endif | |||
| } | |||
| int support_avx2(){ | |||
| int support_avx2(void){ | |||
| #ifndef NO_AVX2 | |||
| int eax, ebx, ecx=0, edx; | |||
| int ret=0; | |||
| @@ -228,7 +228,7 @@ int support_avx2(){ | |||
| #endif | |||
| } | |||
| int support_avx512(){ | |||
| int support_avx512(void){ | |||
| #if !defined(NO_AVX) && !defined(NO_AVX512) | |||
| int eax, ebx, ecx, edx; | |||
| int ret=0; | |||
| @@ -250,7 +250,7 @@ int support_avx512(){ | |||
| #endif | |||
| } | |||
| int support_avx512_bf16(){ | |||
| int support_avx512_bf16(void){ | |||
| #if !defined(NO_AVX) && !defined(NO_AVX512) | |||
| int eax, ebx, ecx, edx; | |||
| int ret=0; | |||
| @@ -271,7 +271,7 @@ int support_avx512_bf16(){ | |||
| #define BIT_AMX_BF16 0x00400000 | |||
| #define BIT_AMX_ENBD 0x00060000 | |||
| int support_amx_bf16() { | |||
| int support_amx_bf16(void) { | |||
| #if !defined(NO_AVX) && !defined(NO_AVX512) | |||
| int eax, ebx, ecx, edx; | |||
| int ret=0; | |||
| @@ -1660,7 +1660,13 @@ int get_cpuname(void){ | |||
| else | |||
| return CPUTYPE_BARCELONA; | |||
| } | |||
| case 10: // Zen3 | |||
| case 10: // Zen3/4 | |||
| #ifndef NO_AVX512 | |||
| if(support_avx512_bf16()) | |||
| return CPUTYPE_COOPERLAKE; | |||
| if(support_avx512()) | |||
| return CPUTYPE_SKYLAKEX; | |||
| #endif | |||
| if(support_avx()) | |||
| #ifndef NO_AVX2 | |||
| return CPUTYPE_ZEN; | |||
| @@ -2438,6 +2444,12 @@ int get_coretype(void){ | |||
| // Ryzen 2 | |||
| default: | |||
| // Matisse,Renoir Ryzen2 models | |||
| #ifndef NO_AVX512 | |||
| if(support_avx512_bf16()) | |||
| return CORE_COOPERLAKE; | |||
| if(support_avx512()) | |||
| return CORE_SKYLAKEX; | |||
| #endif | |||
| if(support_avx()) | |||
| #ifndef NO_AVX2 | |||
| return CORE_ZEN; | |||
| @@ -214,6 +214,11 @@ endif | |||
| ifeq ($(F_COMPILER), NAG) | |||
| CEXTRALIB = -lgomp | |||
| endif | |||
| ifeq ($(F_COMPILER), IBM) | |||
| ifeq ($(C_COMPILER), GCC) | |||
| CEXTRALIB += -lgomp | |||
| endif | |||
| endif | |||
| endif | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| @@ -242,251 +242,6 @@ typedef struct Namelist Namelist; | |||
| /* procedure parameter types for -A and -C++ */ | |||
| #define F2C_proc_par_types 1 | |||
| #ifdef __cplusplus | |||
| typedef logical (*L_fp)(...); | |||
| #else | |||
| typedef logical (*L_fp)(); | |||
| #endif | |||
| #if 0 | |||
| static float spow_ui(float x, integer n) { | |||
| float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static double dpow_ui(double x, integer n) { | |||
| double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #ifdef _MSC_VER | |||
| static _Fcomplex cpow_ui(complex x, integer n) { | |||
| complex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow.r *= x.r, pow.i *= x.i; | |||
| if(u >>= 1) x.r *= x.r, x.i *= x.i; | |||
| else break; | |||
| } | |||
| } | |||
| _Fcomplex p={pow.r, pow.i}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex float cpow_ui(_Complex float x, integer n) { | |||
| _Complex float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| #ifdef _MSC_VER | |||
| static _Dcomplex zpow_ui(_Dcomplex x, integer n) { | |||
| _Dcomplex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; | |||
| if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; | |||
| else break; | |||
| } | |||
| } | |||
| _Dcomplex p = {pow._Val[0], pow._Val[1]}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex double zpow_ui(_Complex double x, integer n) { | |||
| _Complex double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| static integer pow_ii(integer x, integer n) { | |||
| integer pow; unsigned long int u; | |||
| if (n <= 0) { | |||
| if (n == 0 || x == 1) pow = 1; | |||
| else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
| else n = -n; | |||
| } | |||
| if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
| u = n; | |||
| for(pow = 1; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
| { | |||
| double m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
| { | |||
| float m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| #endif | |||
| #if 0 | |||
| static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Fcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0]; | |||
| zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex float zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conjf(Cf(&x[i])) * Cf(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]); | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Dcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0]; | |||
| zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex double zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conj(Cd(&x[i])) * Cd(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]); | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Fcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0]; | |||
| zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex float zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cf(&x[i]) * Cf(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]); | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Dcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0]; | |||
| zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex double zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cd(&x[i]) * Cd(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]); | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #endif | |||
| #endif | |||
| /* Common Block Declarations */ | |||
| @@ -503,16 +258,16 @@ static integer c__1 = 1; | |||
| static integer c__5 = 5; | |||
| static real c_b43 = (float)1.; | |||
| /* Main program */ int main() | |||
| /* Main program */ int main(void) | |||
| { | |||
| /* Initialized data */ | |||
| static real sfac = (float)9.765625e-4; | |||
| /* Local variables */ | |||
| extern /* Subroutine */ int check1_(), check2_(); | |||
| extern /* Subroutine */ int check1_(real*), check2_(real*); | |||
| static integer ic; | |||
| extern /* Subroutine */ int header_(); | |||
| extern /* Subroutine */ int header_(void); | |||
| /* Test program for the COMPLEX Level 1 CBLAS. */ | |||
| /* Based upon the original CBLAS test routine together with: */ | |||
| @@ -553,7 +308,7 @@ static real c_b43 = (float)1.; | |||
| } /* MAIN__ */ | |||
| /* Subroutine */ int header_() | |||
| /* Subroutine */ int header_(void) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -564,7 +319,7 @@ static real c_b43 = (float)1.; | |||
| /* Format strings */ | |||
| /* Builtin functions */ | |||
| integer s_wsfe(), do_fio(), e_wsfe(); | |||
| integer s_wsfe(void), do_fio(void), e_wsfe(void); | |||
| /* .. Parameters .. */ | |||
| /* .. Scalars in Common .. */ | |||
| @@ -577,8 +332,7 @@ static real c_b43 = (float)1.; | |||
| } /* header_ */ | |||
| /* Subroutine */ int check1_(sfac) | |||
| real *sfac; | |||
| /* Subroutine */ int check1_(real* sfac) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -683,15 +437,15 @@ real *sfac; | |||
| /* Local variables */ | |||
| static integer i__; | |||
| extern /* Subroutine */ int ctest_(); | |||
| extern /* Subroutine */ int ctest_(integer*, complex*, complex*, complex*, real*); | |||
| static complex mwpcs[5], mwpct[5]; | |||
| extern /* Subroutine */ int itest1_(), stest1_(); | |||
| extern /* Subroutine */ int itest1_(integer*, integer*), stest1_(real*,real*,real*,real*); | |||
| static complex cx[8]; | |||
| extern real scnrm2test_(); | |||
| extern real scnrm2test_(integer*, complex*, integer*); | |||
| static integer np1; | |||
| extern integer icamaxtest_(); | |||
| extern /* Subroutine */ int csscaltest_(); | |||
| extern real scasumtest_(); | |||
| extern integer icamaxtest_(integer*, complex*, integer*); | |||
| extern /* Subroutine */ int csscaltest_(integer*, real*, complex*, integer*); | |||
| extern real scasumtest_(integer*, complex*, integer*); | |||
| static integer len; | |||
| /* .. Parameters .. */ | |||
| @@ -808,8 +562,7 @@ real *sfac; | |||
| return 0; | |||
| } /* check1_ */ | |||
| /* Subroutine */ int check2_(sfac) | |||
| real *sfac; | |||
| /* Subroutine */ int check2_(real* sfac) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -981,10 +734,10 @@ real *sfac; | |||
| static complex cdot[1]; | |||
| static integer lenx, leny, i__; | |||
| static complex ctemp; | |||
| extern /* Subroutine */ int ctest_(); | |||
| extern /* Subroutine */ int ctest_(integer*, complex*, complex*, complex*, real*); | |||
| static integer ksize; | |||
| extern /* Subroutine */ int cdotctest_(), ccopytest_(), cdotutest_(), | |||
| cswaptest_(), caxpytest_(); | |||
| extern /* Subroutine */ int cdotctest_(integer*, complex*, integer*, complex*, integer*,complex*), ccopytest_(integer*, complex*, integer*, complex*, integer*), cdotutest_(integer*, complex*, integer*, complex*, integer*, complex*), | |||
| cswaptest_(integer*, complex*, integer*, complex*, integer*), caxpytest_(integer*, complex*, complex*, integer*, complex*, integer*); | |||
| static integer ki, kn; | |||
| static complex cx[7], cy[7]; | |||
| static integer mx, my; | |||
| @@ -1067,9 +820,7 @@ real *sfac; | |||
| return 0; | |||
| } /* check2_ */ | |||
| /* Subroutine */ int stest_(len, scomp, strue, ssize, sfac) | |||
| integer *len; | |||
| real *scomp, *strue, *ssize, *sfac; | |||
| /* Subroutine */ int stest_(integer* len, real* scomp, real* strue, real* ssize,real* sfac) | |||
| { | |||
| /* System generated locals */ | |||
| integer i__1; | |||
| @@ -1077,7 +828,7 @@ real *scomp, *strue, *ssize, *sfac; | |||
| /* Local variables */ | |||
| static integer i__; | |||
| extern doublereal sdiff_(); | |||
| extern doublereal sdiff_(real*, real*); | |||
| static real sd; | |||
| /* ********************************* STEST ************************** */ | |||
| @@ -1133,11 +884,10 @@ L40: | |||
| } /* stest_ */ | |||
| /* Subroutine */ int stest1_(scomp1, strue1, ssize, sfac) | |||
| real *scomp1, *strue1, *ssize, *sfac; | |||
| /* Subroutine */ int stest1_(real* scomp1, real* strue1, real* ssize, real* sfac) | |||
| { | |||
| static real scomp[1], strue[1]; | |||
| extern /* Subroutine */ int stest_(); | |||
| extern /* Subroutine */ int stest_(integer*, real*, real*, real*, real*); | |||
| /* ************************* STEST1 ***************************** */ | |||
| @@ -1164,8 +914,7 @@ real *scomp1, *strue1, *ssize, *sfac; | |||
| return 0; | |||
| } /* stest1_ */ | |||
| doublereal sdiff_(sa, sb) | |||
| real *sa, *sb; | |||
| doublereal sdiff_(real* sa, real* sb) | |||
| { | |||
| /* System generated locals */ | |||
| real ret_val; | |||
| @@ -1179,10 +928,7 @@ real *sa, *sb; | |||
| return ret_val; | |||
| } /* sdiff_ */ | |||
| /* Subroutine */ int ctest_(len, ccomp, ctrue, csize, sfac) | |||
| integer *len; | |||
| complex *ccomp, *ctrue, *csize; | |||
| real *sfac; | |||
| /* Subroutine */ int ctest_(integer* len, complex* ccomp, complex* ctrue, complex* csize, real* sfac) | |||
| { | |||
| /* System generated locals */ | |||
| integer i__1, i__2; | |||
| @@ -1193,7 +939,7 @@ real *sfac; | |||
| /* Local variables */ | |||
| static integer i__; | |||
| static real scomp[20], ssize[20], strue[20]; | |||
| extern /* Subroutine */ int stest_(); | |||
| extern /* Subroutine */ int stest_(integer*, real*,real*,real*,real*); | |||
| /* **************************** CTEST ***************************** */ | |||
| @@ -1231,8 +977,7 @@ real *sfac; | |||
| return 0; | |||
| } /* ctest_ */ | |||
| /* Subroutine */ int itest1_(icomp, itrue) | |||
| integer *icomp, *itrue; | |||
| /* Subroutine */ int itest1_(integer* icomp, integer* itrue) | |||
| { | |||
| /* Local variables */ | |||
| static integer id; | |||
| @@ -242,129 +242,6 @@ typedef struct Namelist Namelist; | |||
| /* procedure parameter types for -A and -C++ */ | |||
| #define F2C_proc_par_types 1 | |||
| #ifdef __cplusplus | |||
| typedef logical (*L_fp)(...); | |||
| #else | |||
| typedef logical (*L_fp)(); | |||
| #endif | |||
| #if 0 | |||
| static float spow_ui(float x, integer n) { | |||
| float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static double dpow_ui(double x, integer n) { | |||
| double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #ifdef _MSC_VER | |||
| static _Fcomplex cpow_ui(complex x, integer n) { | |||
| complex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow.r *= x.r, pow.i *= x.i; | |||
| if(u >>= 1) x.r *= x.r, x.i *= x.i; | |||
| else break; | |||
| } | |||
| } | |||
| _Fcomplex p={pow.r, pow.i}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex float cpow_ui(_Complex float x, integer n) { | |||
| _Complex float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| #ifdef _MSC_VER | |||
| static _Dcomplex zpow_ui(_Dcomplex x, integer n) { | |||
| _Dcomplex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; | |||
| if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; | |||
| else break; | |||
| } | |||
| } | |||
| _Dcomplex p = {pow._Val[0], pow._Val[1]}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex double zpow_ui(_Complex double x, integer n) { | |||
| _Complex double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| static integer pow_ii(integer x, integer n) { | |||
| integer pow; unsigned long int u; | |||
| if (n <= 0) { | |||
| if (n == 0 || x == 1) pow = 1; | |||
| else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
| else n = -n; | |||
| } | |||
| if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
| u = n; | |||
| for(pow = 1; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
| { | |||
| double m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
| { | |||
| float m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| #endif | |||
| /* -- translated by f2c (version 20000121). | |||
| You must link the resulting object file with the libraries: | |||
| -lf2c -lm (in that order) | |||
| */ | |||
| /* Common Block Declarations */ | |||
| @@ -396,7 +273,7 @@ static integer c_n1 = -1; | |||
| static integer c__0 = 0; | |||
| static logical c_false = FALSE_; | |||
| /* Main program */ int main() | |||
| /* Main program */ int main(void) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -414,17 +291,21 @@ static logical c_false = FALSE_; | |||
| static logical same; | |||
| static integer ninc, nbet, ntra; | |||
| static logical rewi; | |||
| extern /* Subroutine */ int cchk1_(), cchk2_(), cchk3_(), cchk4_(), | |||
| cchk5_(), cchk6_(); | |||
| extern /* Subroutine */ int cchk1_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, integer*, ftnlen); | |||
| extern /* Subroutine */ int cchk2_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, integer*, ftnlen); | |||
| extern /* Subroutine */ int cchk3_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, complex*, integer*, ftnlen); | |||
| extern /* Subroutine */ int cchk4_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, complex*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, complex*, integer*, ftnlen); | |||
| extern /* Subroutine */ int cchk5_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, complex*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, complex*, integer*, ftnlen); | |||
| extern /* Subroutine */ int cchk6_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, complex*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, complex*, integer*, ftnlen); | |||
| static complex a[4225] /* was [65][65] */; | |||
| static real g[65]; | |||
| static integer i__, j, n; | |||
| static logical fatal; | |||
| static complex x[65], y[65], z__[130]; | |||
| extern doublereal sdiff_(); | |||
| extern doublereal sdiff_(real*, real*); | |||
| static logical trace; | |||
| static integer nidim; | |||
| extern /* Subroutine */ int cmvch_(); | |||
| extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen); | |||
| static char snaps[32], trans[1]; | |||
| static integer isnum; | |||
| static logical ltest[17]; | |||
| @@ -438,11 +319,11 @@ static logical c_false = FALSE_; | |||
| static char snamet[12]; | |||
| static real thresh; | |||
| static logical rorder; | |||
| extern /* Subroutine */ int cc2chke_(); | |||
| extern /* Subroutine */ void cc2chke_(char*, ftnlen); | |||
| static integer layout; | |||
| static logical ltestt, tsterr; | |||
| static complex alf[7]; | |||
| extern logical lce_(); | |||
| extern logical lce_(complex*, complex*, integer*); | |||
| static integer inc[7], nkb; | |||
| static complex bet[7]; | |||
| static real eps, err; | |||
| @@ -983,22 +864,7 @@ L240: | |||
| } /* MAIN__ */ | |||
| /* Subroutine */ int cchk1_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax, | |||
| incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len) | |||
| char *sname; | |||
| real *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nkb, *kb, *nalf; | |||
| complex *alf; | |||
| integer *nbet; | |||
| complex *bet; | |||
| integer *ninc, *inc, *nmax, *incmax; | |||
| complex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt; | |||
| real *g; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int cchk1_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, complex* alf, integer* nbet, complex* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* y, complex* yy, complex* ys, complex* yt, real* g, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -1015,10 +881,10 @@ ftnlen sname_len; | |||
| static integer incx, incy; | |||
| static logical full, tran, null; | |||
| static integer i__, m, n; | |||
| extern /* Subroutine */ int cmake_(); | |||
| extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen); | |||
| static complex alpha; | |||
| static logical isame[13]; | |||
| extern /* Subroutine */ int cmvch_(); | |||
| extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen); | |||
| static integer nargs; | |||
| static logical reset; | |||
| static integer incxs, incys; | |||
| @@ -1026,14 +892,15 @@ ftnlen sname_len; | |||
| static integer ia, ib, ic; | |||
| static logical banded; | |||
| static integer nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns; | |||
| extern /* Subroutine */ int ccgbmv_(), ccgemv_(); | |||
| extern logical lceres_(); | |||
| extern /* Subroutine */ int ccgbmv_(integer*, char*, integer*, integer*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, ftnlen); | |||
| extern /* Subroutine */ void ccgemv_(integer*, char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, ftnlen); | |||
| extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen); | |||
| static char ctrans[14]; | |||
| static real errmax; | |||
| static complex transl; | |||
| static char transs[1]; | |||
| static integer laa, lda; | |||
| extern logical lce_(); | |||
| extern logical lce_(complex*, complex*, integer*); | |||
| static complex als, bls; | |||
| static real err; | |||
| static integer iku, kls, kus; | |||
| @@ -1448,22 +1315,7 @@ L140: | |||
| } /* cchk1_ */ | |||
| /* Subroutine */ int cchk2_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax, | |||
| incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len) | |||
| char *sname; | |||
| real *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nkb, *kb, *nalf; | |||
| complex *alf; | |||
| integer *nbet; | |||
| complex *bet; | |||
| integer *ninc, *inc, *nmax, *incmax; | |||
| complex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt; | |||
| real *g; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int cchk2_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, complex* alf, integer* nbet, complex* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* y, complex* yy, complex* ys, complex* yt, real* g, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -1481,10 +1333,10 @@ ftnlen sname_len; | |||
| static logical full, null; | |||
| static char uplo[1]; | |||
| static integer i__, k, n; | |||
| extern /* Subroutine */ int cmake_(); | |||
| extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen); | |||
| static complex alpha; | |||
| static logical isame[13]; | |||
| extern /* Subroutine */ int cmvch_(); | |||
| extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen); | |||
| static integer nargs; | |||
| static logical reset; | |||
| static char cuplo[14]; | |||
| @@ -1495,13 +1347,14 @@ ftnlen sname_len; | |||
| static integer nc, ik, in; | |||
| static logical packed; | |||
| static integer nk, ks, ix, iy, ns, lx, ly; | |||
| extern /* Subroutine */ int cchbmv_(), cchemv_(); | |||
| extern logical lceres_(); | |||
| extern /* Subroutine */ int cchpmv_(); | |||
| extern /* Subroutine */ void cchbmv_(integer*, char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, ftnlen); | |||
| extern /* Subroutine */ void cchemv_(integer*, char*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, ftnlen); | |||
| extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void cchpmv_(integer*, char*, integer*, complex*, complex*, complex*, integer*, complex*, complex*, integer*, ftnlen); | |||
| static real errmax; | |||
| static complex transl; | |||
| static integer laa, lda; | |||
| extern logical lce_(); | |||
| extern logical lce_(complex*, complex*, integer*); | |||
| static complex als, bls; | |||
| static real err; | |||
| @@ -1906,19 +1759,7 @@ L130: | |||
| } /* cchk2_ */ | |||
| /* Subroutine */ int cchk3_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nkb, kb, ninc, inc, nmax, incmax, a, aa, as, x, | |||
| xx, xs, xt, g, z__, iorder, sname_len) | |||
| char *sname; | |||
| real *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nkb, *kb, *ninc, *inc, *nmax, *incmax; | |||
| complex *a, *aa, *as, *x, *xx, *xs, *xt; | |||
| real *g; | |||
| complex *z__; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int cchk3_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* xt, real* g, complex* z__, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -1937,10 +1778,10 @@ ftnlen sname_len; | |||
| static logical full, null; | |||
| static char uplo[1], cdiag[14]; | |||
| static integer i__, k, n; | |||
| extern /* Subroutine */ int cmake_(); | |||
| extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen); | |||
| static char diags[1]; | |||
| static logical isame[13]; | |||
| extern /* Subroutine */ int cmvch_(); | |||
| extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen); | |||
| static integer nargs; | |||
| static logical reset; | |||
| static char cuplo[14]; | |||
| @@ -1950,17 +1791,19 @@ ftnlen sname_len; | |||
| static integer nc, ik, in; | |||
| static logical packed; | |||
| static integer nk, ks, ix, ns, lx; | |||
| extern logical lceres_(); | |||
| extern /* Subroutine */ int cctbmv_(), cctbsv_(); | |||
| extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void cctbmv_(integer*, char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, ftnlen, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void cctbsv_(integer*, char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, ftnlen, ftnlen, ftnlen); | |||
| static char ctrans[14]; | |||
| extern /* Subroutine */ int cctpmv_(); | |||
| extern /* Subroutine */ void cctpmv_(integer*, char*, char*, char*, integer*, complex*, complex*, integer*, ftnlen, ftnlen, ftnlen); | |||
| static real errmax; | |||
| extern /* Subroutine */ int cctrmv_(), cctpsv_(); | |||
| extern /* Subroutine */ void cctrmv_(integer*, char*, char*, char*, integer*, complex*, integer*, complex*, integer*, ftnlen, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void cctpsv_(integer*, char*, char*, char*, integer*, complex*, complex*, integer*, ftnlen, ftnlen, ftnlen); | |||
| static complex transl; | |||
| extern /* Subroutine */ int cctrsv_(); | |||
| extern /* Subroutine */ void cctrsv_(integer*, char*, char*, char*, integer*, complex*, integer*, complex*, integer*, ftnlen, ftnlen, ftnlen); | |||
| static char transs[1]; | |||
| static integer laa, icd, lda; | |||
| extern logical lce_(); | |||
| extern logical lce_(complex*, complex*, integer*); | |||
| static integer ict, icu; | |||
| static real err; | |||
| @@ -2418,21 +2261,7 @@ L130: | |||
| } /* cchk3_ */ | |||
| /* Subroutine */ int cchk4_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x, | |||
| xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len) | |||
| char *sname; | |||
| real *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| complex *alf; | |||
| integer *ninc, *inc, *nmax, *incmax; | |||
| complex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt; | |||
| real *g; | |||
| complex *z__; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int cchk4_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, complex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* y, complex* yy, complex* ys, complex* yt, real* g, complex* z__, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* System generated locals */ | |||
| integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; | |||
| @@ -2444,21 +2273,21 @@ ftnlen sname_len; | |||
| static integer incx, incy; | |||
| static logical null; | |||
| static integer i__, j, m, n; | |||
| extern /* Subroutine */ int cmake_(); | |||
| extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen); | |||
| static complex alpha, w[1]; | |||
| static logical isame[13]; | |||
| extern /* Subroutine */ int cmvch_(); | |||
| extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen); | |||
| static integer nargs; | |||
| static logical reset; | |||
| static integer incxs, incys, ia, nc, nd, im, in; | |||
| extern /* Subroutine */ int ccgerc_(); | |||
| extern /* Subroutine */ void ccgerc_(integer*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, integer*); | |||
| static integer ms, ix, iy, ns, lx, ly; | |||
| extern /* Subroutine */ int ccgeru_(); | |||
| extern logical lceres_(); | |||
| extern /* Subroutine */ void ccgeru_(integer*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, integer*); | |||
| extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen); | |||
| static real errmax; | |||
| static complex transl; | |||
| static integer laa, lda; | |||
| extern logical lce_(); | |||
| extern logical lce_(complex*, complex*, integer*); | |||
| static complex als; | |||
| static real err; | |||
| @@ -2786,21 +2615,7 @@ L150: | |||
| } /* cchk4_ */ | |||
| /* Subroutine */ int cchk5_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x, | |||
| xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len) | |||
| char *sname; | |||
| real *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| complex *alf; | |||
| integer *ninc, *inc, *nmax, *incmax; | |||
| complex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt; | |||
| real *g; | |||
| complex *z__; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int cchk5_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, complex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* y, complex* yy, complex* ys, complex* yt, real* g, complex* z__, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -2818,10 +2633,12 @@ ftnlen sname_len; | |||
| static logical full, null; | |||
| static char uplo[1]; | |||
| static integer i__, j, n; | |||
| extern /* Subroutine */ int cmake_(), ccher_(); | |||
| extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void ccher_(integer*, char*, integer*, real*, complex*, integer*, complex*, integer*, ftnlen); | |||
| static complex alpha, w[1]; | |||
| static logical isame[13]; | |||
| extern /* Subroutine */ int cchpr_(), cmvch_(); | |||
| extern /* Subroutine */ void cchpr_(integer*, char*, integer*, real*, complex*, integer*, complex*, ftnlen); | |||
| extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen); | |||
| static integer nargs; | |||
| static logical reset; | |||
| static char cuplo[14]; | |||
| @@ -2832,11 +2649,11 @@ ftnlen sname_len; | |||
| static logical packed; | |||
| static integer ix, ns, lx; | |||
| static real ralpha; | |||
| extern logical lceres_(); | |||
| extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen); | |||
| static real errmax; | |||
| static complex transl; | |||
| static integer laa, lda; | |||
| extern logical lce_(); | |||
| extern logical lce_(complex*, complex*, integer*); | |||
| static real err; | |||
| /* Tests CHER and CHPR. */ | |||
| @@ -3160,21 +2977,7 @@ L130: | |||
| } /* cchk5_ */ | |||
| /* Subroutine */ int cchk6_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x, | |||
| xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len) | |||
| char *sname; | |||
| real *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| complex *alf; | |||
| integer *ninc, *inc, *nmax, *incmax; | |||
| complex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt; | |||
| real *g; | |||
| complex *z__; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int cchk6_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, complex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* y, complex* yy, complex* ys, complex* yt, real* g, complex* z__, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -3192,25 +2995,26 @@ ftnlen sname_len; | |||
| static logical full, null; | |||
| static char uplo[1]; | |||
| static integer i__, j, n; | |||
| extern /* Subroutine */ int cmake_(); | |||
| extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen); | |||
| static complex alpha, w[2]; | |||
| static logical isame[13]; | |||
| extern /* Subroutine */ int cmvch_(); | |||
| extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen); | |||
| static integer nargs; | |||
| static logical reset; | |||
| static char cuplo[14]; | |||
| static integer incxs, incys; | |||
| static logical upper; | |||
| static char uplos[1]; | |||
| extern /* Subroutine */ int ccher2_(), cchpr2_(); | |||
| extern /* Subroutine */ void ccher2_(integer*, char*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, integer*, ftnlen); | |||
| extern /* Subroutine */ void cchpr2_(integer*, char*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, ftnlen); | |||
| static integer ia, ja, ic, nc, jj, lj, in; | |||
| static logical packed; | |||
| static integer ix, iy, ns, lx, ly; | |||
| extern logical lceres_(); | |||
| extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen); | |||
| static real errmax; | |||
| static complex transl; | |||
| static integer laa, lda; | |||
| extern logical lce_(); | |||
| extern logical lce_(complex*, complex*, integer*); | |||
| static complex als; | |||
| static real err; | |||
| @@ -3597,24 +3401,7 @@ L170: | |||
| } /* cchk6_ */ | |||
| /* Subroutine */ int cmvch_(trans, m, n, alpha, a, nmax, x, incx, beta, y, | |||
| incy, yt, g, yy, eps, err, fatal, nout, mv, trans_len) | |||
| char *trans; | |||
| integer *m, *n; | |||
| complex *alpha, *a; | |||
| integer *nmax; | |||
| complex *x; | |||
| integer *incx; | |||
| complex *beta, *y; | |||
| integer *incy; | |||
| complex *yt; | |||
| real *g; | |||
| complex *yy; | |||
| real *eps, *err; | |||
| logical *fatal; | |||
| integer *nout; | |||
| logical *mv; | |||
| ftnlen trans_len; | |||
| /* Subroutine */ int cmvch_(char* trans, integer* m, integer* n, complex* alpha, complex* a, integer* nmax, complex* x, integer* incx, complex* beta, complex* y, integer* incy, complex* yt, real* g, complex* yy, real* eps, real* err, logical* fatal, integer* nout, logical* mv, ftnlen trans_len) | |||
| { | |||
| /* System generated locals */ | |||
| @@ -3812,9 +3599,7 @@ L80: | |||
| } /* cmvch_ */ | |||
| logical lce_(ri, rj, lr) | |||
| complex *ri, *rj; | |||
| integer *lr; | |||
| logical lce_(complex* ri, complex* rj, integer* lr) | |||
| { | |||
| /* System generated locals */ | |||
| integer i__1, i__2, i__3; | |||
| @@ -3861,13 +3646,7 @@ L30: | |||
| } /* lce_ */ | |||
| logical lceres_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len) | |||
| char *type__, *uplo; | |||
| integer *m, *n; | |||
| complex *aa, *as; | |||
| integer *lda; | |||
| ftnlen type_len; | |||
| ftnlen uplo_len; | |||
| logical lceres_(char* type__, char* uplo, integer* m, integer* n, complex* aa, complex* as, integer* lda, ftnlen type_len, ftnlen uplo_len) | |||
| { | |||
| /* System generated locals */ | |||
| integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2, i__3, i__4; | |||
| @@ -3960,9 +3739,7 @@ L80: | |||
| } /* lceres_ */ | |||
| /* Complex */ VOID cbeg_( ret_val, reset) | |||
| complex * ret_val; | |||
| logical *reset; | |||
| /* Complex */ VOID cbeg_(complex* ret_val, logical* reset) | |||
| { | |||
| /* System generated locals */ | |||
| real r__1, r__2; | |||
| @@ -4023,8 +3800,7 @@ L10: | |||
| } /* cbeg_ */ | |||
| doublereal sdiff_(x, y) | |||
| real *x, *y; | |||
| doublereal sdiff_(real* x, real* y) | |||
| { | |||
| /* System generated locals */ | |||
| real ret_val; | |||
| @@ -4044,19 +3820,7 @@ real *x, *y; | |||
| } /* sdiff_ */ | |||
| /* Subroutine */ int cmake_(type__, uplo, diag, m, n, a, nmax, aa, lda, kl, | |||
| ku, reset, transl, type_len, uplo_len, diag_len) | |||
| char *type__, *uplo, *diag; | |||
| integer *m, *n; | |||
| complex *a; | |||
| integer *nmax; | |||
| complex *aa; | |||
| integer *lda, *kl, *ku; | |||
| logical *reset; | |||
| complex *transl; | |||
| ftnlen type_len; | |||
| ftnlen uplo_len; | |||
| ftnlen diag_len; | |||
| /* Subroutine */ int cmake_(char* type__, char* uplo, char* diag, integer* m, integer* n, complex* a, integer* nmax, complex* aa, integer* lda, integer* kl, integer* ku, logical* reset, complex* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) | |||
| { | |||
| /* System generated locals */ | |||
| integer a_dim1, a_offset, i__1, i__2, i__3, i__4; | |||
| @@ -4064,7 +3828,7 @@ ftnlen diag_len; | |||
| complex q__1, q__2; | |||
| /* Local variables */ | |||
| extern /* Complex */ VOID cbeg_(); | |||
| extern /* Complex */ VOID cbeg_(complex*, logical*); | |||
| static integer ibeg, iend, ioff; | |||
| static logical unit; | |||
| static integer i__, j; | |||
| @@ -242,130 +242,6 @@ typedef struct Namelist Namelist; | |||
| /* procedure parameter types for -A and -C++ */ | |||
| #define F2C_proc_par_types 1 | |||
| #ifdef __cplusplus | |||
| typedef logical (*L_fp)(...); | |||
| #else | |||
| typedef logical (*L_fp)(); | |||
| #endif | |||
| #if 0 | |||
| static float spow_ui(float x, integer n) { | |||
| float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static double dpow_ui(double x, integer n) { | |||
| double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #ifdef _MSC_VER | |||
| static _Fcomplex cpow_ui(complex x, integer n) { | |||
| complex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow.r *= x.r, pow.i *= x.i; | |||
| if(u >>= 1) x.r *= x.r, x.i *= x.i; | |||
| else break; | |||
| } | |||
| } | |||
| _Fcomplex p={pow.r, pow.i}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex float cpow_ui(_Complex float x, integer n) { | |||
| _Complex float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| #ifdef _MSC_VER | |||
| static _Dcomplex zpow_ui(_Dcomplex x, integer n) { | |||
| _Dcomplex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; | |||
| if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; | |||
| else break; | |||
| } | |||
| } | |||
| _Dcomplex p = {pow._Val[0], pow._Val[1]}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex double zpow_ui(_Complex double x, integer n) { | |||
| _Complex double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| static integer pow_ii(integer x, integer n) { | |||
| integer pow; unsigned long int u; | |||
| if (n <= 0) { | |||
| if (n == 0 || x == 1) pow = 1; | |||
| else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
| else n = -n; | |||
| } | |||
| if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
| u = n; | |||
| for(pow = 1; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
| { | |||
| double m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
| { | |||
| float m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| #endif | |||
| /* -- translated by f2c (version 20000121). | |||
| You must link the resulting object file with the libraries: | |||
| -lf2c -lm (in that order) | |||
| */ | |||
| /* Common Block Declarations */ | |||
| @@ -21,19 +21,6 @@ typedef float real; | |||
| typedef double doublereal; | |||
| typedef struct { real r, i; } complex; | |||
| typedef struct { doublereal r, i; } doublecomplex; | |||
| #ifdef _MSC_VER | |||
| static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} | |||
| static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} | |||
| static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} | |||
| static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} | |||
| #else | |||
| static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} | |||
| static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} | |||
| static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} | |||
| static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} | |||
| #endif | |||
| #define pCf(z) (*_pCf(z)) | |||
| #define pCd(z) (*_pCd(z)) | |||
| typedef int logical; | |||
| typedef short int shortlogical; | |||
| typedef char logical1; | |||
| @@ -242,124 +229,6 @@ typedef struct Namelist Namelist; | |||
| /* procedure parameter types for -A and -C++ */ | |||
| #define F2C_proc_par_types 1 | |||
| #ifdef __cplusplus | |||
| typedef logical (*L_fp)(...); | |||
| #else | |||
| typedef logical (*L_fp)(); | |||
| #endif | |||
| #if 0 | |||
| static float spow_ui(float x, integer n) { | |||
| float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static double dpow_ui(double x, integer n) { | |||
| double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #ifdef _MSC_VER | |||
| static _Fcomplex cpow_ui(complex x, integer n) { | |||
| complex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow.r *= x.r, pow.i *= x.i; | |||
| if(u >>= 1) x.r *= x.r, x.i *= x.i; | |||
| else break; | |||
| } | |||
| } | |||
| _Fcomplex p={pow.r, pow.i}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex float cpow_ui(_Complex float x, integer n) { | |||
| _Complex float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| #ifdef _MSC_VER | |||
| static _Dcomplex zpow_ui(_Dcomplex x, integer n) { | |||
| _Dcomplex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; | |||
| if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; | |||
| else break; | |||
| } | |||
| } | |||
| _Dcomplex p = {pow._Val[0], pow._Val[1]}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex double zpow_ui(_Complex double x, integer n) { | |||
| _Complex double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| static integer pow_ii(integer x, integer n) { | |||
| integer pow; unsigned long int u; | |||
| if (n <= 0) { | |||
| if (n == 0 || x == 1) pow = 1; | |||
| else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
| else n = -n; | |||
| } | |||
| if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
| u = n; | |||
| for(pow = 1; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
| { | |||
| double m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
| { | |||
| float m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| #endif | |||
| /* Common Block Declarations */ | |||
| @@ -375,16 +244,16 @@ struct { | |||
| static integer c__1 = 1; | |||
| static doublereal c_b34 = 1.; | |||
| /* Main program */ int main() | |||
| /* Main program */ int main(void) | |||
| { | |||
| /* Initialized data */ | |||
| static doublereal sfac = 9.765625e-4; | |||
| /* Local variables */ | |||
| extern /* Subroutine */ int check0_(), check1_(), check2_(), check3_(); | |||
| extern /* Subroutine */ int check0_(doublereal*), check1_(doublereal*), check2_(doublereal*), check3_(doublereal*); | |||
| static integer ic; | |||
| extern /* Subroutine */ int header_(); | |||
| extern /* Subroutine */ int header_(void); | |||
| /* Test program for the DOUBLE PRECISION Level 1 CBLAS. */ | |||
| /* Based upon the original CBLAS test routine together with: */ | |||
| @@ -431,7 +300,7 @@ static doublereal c_b34 = 1.; | |||
| } /* MAIN__ */ | |||
| /* Subroutine */ int header_() | |||
| /* Subroutine */ int header_(void) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -450,8 +319,7 @@ static doublereal c_b34 = 1.; | |||
| } /* header_ */ | |||
| /* Subroutine */ int check0_(sfac) | |||
| doublereal *sfac; | |||
| /* Subroutine */ int check0_(doublereal* sfac) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -464,7 +332,7 @@ doublereal *sfac; | |||
| /* Local variables */ | |||
| static integer k; | |||
| extern /* Subroutine */ int drotgtest_(), stest1_(); | |||
| extern /* Subroutine */ int drotgtest_(doublereal*,doublereal*,doublereal*,doublereal*), stest1_(doublereal*,doublereal*,doublereal*,doublereal*); | |||
| static doublereal sa, sb, sc, ss; | |||
| /* .. Parameters .. */ | |||
| @@ -509,8 +377,7 @@ L40: | |||
| return 0; | |||
| } /* check0_ */ | |||
| /* Subroutine */ int check1_(sfac) | |||
| doublereal *sfac; | |||
| /* Subroutine */ int check1_(doublereal* sfac) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -535,14 +402,14 @@ doublereal *sfac; | |||
| /* Local variables */ | |||
| static integer i__; | |||
| extern doublereal dnrm2test_(); | |||
| extern doublereal dnrm2test_(integer*, doublereal*, integer*); | |||
| static doublereal stemp[1], strue[8]; | |||
| extern /* Subroutine */ int stest_(), dscaltest_(); | |||
| extern doublereal dasumtest_(); | |||
| extern /* Subroutine */ int itest1_(), stest1_(); | |||
| extern /* Subroutine */ int stest_(integer*,doublereal*,doublereal*,doublereal*,doublereal*), dscaltest_(integer*,doublereal*,doublereal*,integer*); | |||
| extern doublereal dasumtest_(integer*,doublereal*,integer*); | |||
| extern /* Subroutine */ int itest1_(integer*,integer*), stest1_(doublereal*,doublereal*,doublereal*,doublereal*); | |||
| static doublereal sx[8]; | |||
| static integer np1; | |||
| extern integer idamaxtest_(); | |||
| extern integer idamaxtest_(integer*,doublereal*,integer*); | |||
| static integer len; | |||
| /* .. Parameters .. */ | |||
| @@ -603,8 +470,7 @@ doublereal *sfac; | |||
| return 0; | |||
| } /* check1_ */ | |||
| /* Subroutine */ int check2_(sfac) | |||
| doublereal *sfac; | |||
| /* Subroutine */ int check2_(doublereal* sfac) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -649,10 +515,10 @@ doublereal *sfac; | |||
| /* Local variables */ | |||
| static integer lenx, leny; | |||
| extern doublereal ddottest_(); | |||
| extern doublereal ddottest_(integer*,doublereal*,integer*,doublereal*,integer*); | |||
| static integer i__, j, ksize; | |||
| extern /* Subroutine */ int stest_(), dcopytest_(), dswaptest_(), | |||
| daxpytest_(), stest1_(); | |||
| extern /* Subroutine */ int stest_(integer*,doublereal*,doublereal*,doublereal*,doublereal*), dcopytest_(integer*,doublereal*,integer*,doublereal*,integer*), dswaptest_(integer*,doublereal*,integer*,doublereal*,integer*), | |||
| daxpytest_(integer*,doublereal*,doublereal*,integer*,doublereal*,integer*), stest1_(doublereal*,doublereal*,doublereal*,doublereal*); | |||
| static integer ki, kn, mx, my; | |||
| static doublereal sx[7], sy[7], stx[7], sty[7]; | |||
| @@ -733,8 +599,7 @@ doublereal *sfac; | |||
| return 0; | |||
| } /* check2_ */ | |||
| /* Subroutine */ int check3_(sfac) | |||
| doublereal *sfac; | |||
| /* Subroutine */ int check3_(doublereal* sfac) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -753,9 +618,9 @@ doublereal *sfac; | |||
| ; | |||
| /* Local variables */ | |||
| extern /* Subroutine */ int drottest_(); | |||
| extern /* Subroutine */ int drottest_(integer*,doublereal*,integer*,doublereal*,integer*,doublereal*,doublereal*); | |||
| static integer i__, k, ksize; | |||
| extern /* Subroutine */int stest_(), drotmtest_(); | |||
| extern /* Subroutine */int stest_(integer*,doublereal*,doublereal*,doublereal*,doublereal*), drotmtest_(integer*,doublereal*,integer*,doublereal*,integer*,doublereal*); | |||
| static integer ki, kn; | |||
| static doublereal dparam[5], sx[10], sy[10], stx[10], sty[10]; | |||
| @@ -826,9 +691,7 @@ doublereal *sfac; | |||
| return 0; | |||
| } /* check3_ */ | |||
| /* Subroutine */ int stest_(len, scomp, strue, ssize, sfac) | |||
| integer *len; | |||
| doublereal *scomp, *strue, *ssize, *sfac; | |||
| /* Subroutine */ int stest_(integer* len, doublereal* scomp, doublereal* strue, doublereal* ssize, doublereal* sfac) | |||
| { | |||
| /* System generated locals */ | |||
| integer i__1; | |||
| @@ -836,7 +699,7 @@ doublereal *scomp, *strue, *ssize, *sfac; | |||
| /* Local variables */ | |||
| static integer i__; | |||
| extern doublereal sdiff_(); | |||
| extern doublereal sdiff_(doublereal*,doublereal*); | |||
| static doublereal sd; | |||
| /* ********************************* STEST ************************** */ | |||
| @@ -892,11 +755,10 @@ L40: | |||
| } /* stest_ */ | |||
| /* Subroutine */ int stest1_(scomp1, strue1, ssize, sfac) | |||
| doublereal *scomp1, *strue1, *ssize, *sfac; | |||
| /* Subroutine */ int stest1_(doublereal* scomp1, doublereal* strue1, doublereal* ssize, doublereal* sfac) | |||
| { | |||
| static doublereal scomp[1], strue[1]; | |||
| extern /* Subroutine */ int stest_(); | |||
| extern /* Subroutine */ int stest_(integer*, doublereal*, doublereal*, doublereal*, doublereal*); | |||
| /* ************************* STEST1 ***************************** */ | |||
| @@ -923,8 +785,7 @@ doublereal *scomp1, *strue1, *ssize, *sfac; | |||
| return 0; | |||
| } /* stest1_ */ | |||
| doublereal sdiff_(sa, sb) | |||
| doublereal *sa, *sb; | |||
| doublereal sdiff_(doublereal* sa, doublereal* sb) | |||
| { | |||
| /* System generated locals */ | |||
| doublereal ret_val; | |||
| @@ -938,8 +799,7 @@ doublereal *sa, *sb; | |||
| return ret_val; | |||
| } /* sdiff_ */ | |||
| /* Subroutine */ int itest1_(icomp, itrue) | |||
| integer *icomp, *itrue; | |||
| /* Subroutine */ int itest1_(integer* icomp, integer* itrue) | |||
| { | |||
| /* Local variables */ | |||
| static integer id; | |||
| @@ -1188,4 +1048,4 @@ doublereal *dparam; | |||
| return 0; | |||
| } /* drotm_ */ | |||
| #endif | |||
| #endif | |||
| @@ -242,129 +242,6 @@ typedef struct Namelist Namelist; | |||
| /* procedure parameter types for -A and -C++ */ | |||
| #define F2C_proc_par_types 1 | |||
| #ifdef __cplusplus | |||
| typedef logical (*L_fp)(...); | |||
| #else | |||
| typedef logical (*L_fp)(); | |||
| #endif | |||
| #if 0 | |||
| static float spow_ui(float x, integer n) { | |||
| float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static double dpow_ui(double x, integer n) { | |||
| double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #ifdef _MSC_VER | |||
| static _Fcomplex cpow_ui(complex x, integer n) { | |||
| complex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow.r *= x.r, pow.i *= x.i; | |||
| if(u >>= 1) x.r *= x.r, x.i *= x.i; | |||
| else break; | |||
| } | |||
| } | |||
| _Fcomplex p={pow.r, pow.i}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex float cpow_ui(_Complex float x, integer n) { | |||
| _Complex float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| #ifdef _MSC_VER | |||
| static _Dcomplex zpow_ui(_Dcomplex x, integer n) { | |||
| _Dcomplex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; | |||
| if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; | |||
| else break; | |||
| } | |||
| } | |||
| _Dcomplex p = {pow._Val[0], pow._Val[1]}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex double zpow_ui(_Complex double x, integer n) { | |||
| _Complex double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| static integer pow_ii(integer x, integer n) { | |||
| integer pow; unsigned long int u; | |||
| if (n <= 0) { | |||
| if (n == 0 || x == 1) pow = 1; | |||
| else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
| else n = -n; | |||
| } | |||
| if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
| u = n; | |||
| for(pow = 1; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
| { | |||
| double m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
| { | |||
| float m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| #endif | |||
| /* -- translated by f2c (version 20000121). | |||
| You must link the resulting object file with the libraries: | |||
| -lf2c -lm (in that order) | |||
| */ | |||
| /* Common Block Declarations */ | |||
| @@ -395,7 +272,7 @@ static integer c_n1 = -1; | |||
| static integer c__0 = 0; | |||
| static logical c_false = FALSE_; | |||
| /* Main program */ int main() | |||
| /* Main program */ int main(void) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -413,17 +290,21 @@ static logical c_false = FALSE_; | |||
| static logical same; | |||
| static integer ninc, nbet, ntra; | |||
| static logical rewi; | |||
| extern /* Subroutine */ int dchk1_(), dchk2_(), dchk3_(), dchk4_(), | |||
| dchk5_(), dchk6_(); | |||
| extern /* Subroutine */ int dchk1_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen); | |||
| extern /* Subroutine */ int dchk2_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen); | |||
| extern /* Subroutine */ int dchk3_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen); | |||
| extern /* Subroutine */ int dchk4_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen); | |||
| extern /* Subroutine */ int dchk5_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen); | |||
| extern /* Subroutine */ int dchk6_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen); | |||
| static doublereal a[4225] /* was [65][65] */, g[65]; | |||
| static integer i__, j; | |||
| extern doublereal ddiff_(); | |||
| extern doublereal ddiff_(doublereal*, doublereal*); | |||
| static integer n; | |||
| static logical fatal; | |||
| static doublereal x[65], y[65], z__[130]; | |||
| static logical trace; | |||
| static integer nidim; | |||
| extern /* Subroutine */ int dmvch_(); | |||
| extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); | |||
| static char snaps[32], trans[1]; | |||
| static integer isnum; | |||
| static logical ltest[16]; | |||
| @@ -437,11 +318,11 @@ static logical c_false = FALSE_; | |||
| static char snamet[12]; | |||
| static doublereal thresh; | |||
| static logical rorder; | |||
| extern /* Subroutine */ int cd2chke_(); | |||
| extern /* Subroutine */ void cd2chke_(char*, ftnlen); | |||
| static integer layout; | |||
| static logical ltestt, tsterr; | |||
| static doublereal alf[7]; | |||
| extern logical lde_(); | |||
| extern logical lde_(doublereal*, doublereal*, integer*); | |||
| static integer inc[7], nkb; | |||
| static doublereal bet[7],eps,err; | |||
| char tmpchar; | |||
| @@ -977,21 +858,7 @@ L240: | |||
| } /* MAIN__ */ | |||
| /* Subroutine */ int dchk1_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax, | |||
| incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len) | |||
| char *sname; | |||
| doublereal *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nkb, *kb, *nalf; | |||
| doublereal *alf; | |||
| integer *nbet; | |||
| doublereal *bet; | |||
| integer *ninc, *inc, *nmax, *incmax; | |||
| doublereal *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int dchk1_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* y, doublereal* yy, doublereal* ys, doublereal* yt, doublereal* g, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -1007,10 +874,10 @@ ftnlen sname_len; | |||
| static integer incx, incy; | |||
| static logical full, tran, null; | |||
| static integer i__, m, n; | |||
| extern /* Subroutine */ int dmake_(); | |||
| extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen); | |||
| static doublereal alpha; | |||
| static logical isame[13]; | |||
| extern /* Subroutine */ int dmvch_(); | |||
| extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); | |||
| static integer nargs; | |||
| static logical reset; | |||
| static integer incxs, incys; | |||
| @@ -1018,13 +885,14 @@ ftnlen sname_len; | |||
| static integer ia, ib, ic; | |||
| static logical banded; | |||
| static integer nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns; | |||
| extern /* Subroutine */ int cdgbmv_(), cdgemv_(); | |||
| extern logical lderes_(); | |||
| extern /* Subroutine */ void cdgbmv_(integer*, char*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen); | |||
| extern /* Subroutine */ void cdgemv_(integer*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen); | |||
| extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); | |||
| static char ctrans[14]; | |||
| static doublereal errmax, transl; | |||
| static char transs[1]; | |||
| static integer laa, lda; | |||
| extern logical lde_(); | |||
| extern logical lde_(doublereal*, doublereal*, integer*); | |||
| static doublereal als, bls, err; | |||
| static integer iku, kls, kus; | |||
| @@ -1429,21 +1297,7 @@ L140: | |||
| } /* dchk1_ */ | |||
| /* Subroutine */ int dchk2_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax, | |||
| incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len) | |||
| char *sname; | |||
| doublereal *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nkb, *kb, *nalf; | |||
| doublereal *alf; | |||
| integer *nbet; | |||
| doublereal *bet; | |||
| integer *ninc, *inc, *nmax, *incmax; | |||
| doublereal *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int dchk2_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* y, doublereal* yy, doublereal* ys, doublereal* yt, doublereal* g, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -1460,10 +1314,10 @@ ftnlen sname_len; | |||
| static logical full, null; | |||
| static char uplo[1]; | |||
| static integer i__, k, n; | |||
| extern /* Subroutine */ int dmake_(); | |||
| extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen); | |||
| static doublereal alpha; | |||
| static logical isame[13]; | |||
| extern /* Subroutine */ int dmvch_(); | |||
| extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); | |||
| static integer nargs; | |||
| static logical reset; | |||
| static char cuplo[14]; | |||
| @@ -1474,12 +1328,13 @@ ftnlen sname_len; | |||
| static integer nc, ik, in; | |||
| static logical packed; | |||
| static integer nk, ks, ix, iy, ns, lx, ly; | |||
| extern logical lderes_(); | |||
| extern /* Subroutine */ int cdsbmv_(), cdspmv_(); | |||
| extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void cdsbmv_(integer*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen); | |||
| extern /* Subroutine */ void cdspmv_(integer*, char*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen); | |||
| static doublereal errmax, transl; | |||
| extern /* Subroutine */ int cdsymv_(); | |||
| extern /* Subroutine */ void cdsymv_(integer*, char*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen); | |||
| static integer laa, lda; | |||
| extern logical lde_(); | |||
| extern logical lde_(doublereal*, doublereal*, integer*); | |||
| static doublereal als, bls, err; | |||
| @@ -1882,17 +1737,7 @@ L130: | |||
| } /* dchk2_ */ | |||
| /* Subroutine */ int dchk3_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nkb, kb, ninc, inc, nmax, incmax, a, aa, as, x, | |||
| xx, xs, xt, g, z__, iorder, sname_len) | |||
| char *sname; | |||
| doublereal *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nkb, *kb, *ninc, *inc, *nmax, *incmax; | |||
| doublereal *a, *aa, *as, *x, *xx, *xs, *xt, *g, *z__; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int dchk3_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* xt, doublereal* g, doublereal* z__, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -1911,10 +1756,10 @@ ftnlen sname_len; | |||
| static logical full, null; | |||
| static char uplo[1], cdiag[14]; | |||
| static integer i__, k, n; | |||
| extern /* Subroutine */ int dmake_(); | |||
| extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen); | |||
| static char diags[1]; | |||
| static logical isame[13]; | |||
| extern /* Subroutine */ int dmvch_(); | |||
| extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); | |||
| static integer nargs; | |||
| static logical reset; | |||
| static char cuplo[14]; | |||
| @@ -1924,16 +1769,19 @@ ftnlen sname_len; | |||
| static integer nc, ik, in; | |||
| static logical packed; | |||
| static integer nk, ks, ix, ns, lx; | |||
| extern logical lderes_(); | |||
| extern /* Subroutine */ int cdtbmv_(), cdtbsv_(); | |||
| extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void cdtbmv_(integer*, char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void cdtbsv_(integer*, char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen); | |||
| static char ctrans[14]; | |||
| static doublereal errmax; | |||
| extern /* Subroutine */ int cdtpmv_(), cdtrmv_(); | |||
| extern /* Subroutine */ void cdtpmv_(integer*, char*, char*, char*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void cdtrmv_(integer*, char*, char*, char*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen); | |||
| static doublereal transl; | |||
| extern /* Subroutine */ int cdtpsv_(), cdtrsv_(); | |||
| extern /* Subroutine */ void cdtpsv_(integer*, char*, char*, char*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void cdtrsv_(integer*, char*, char*, char*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen); | |||
| static char transs[1]; | |||
| static integer laa, icd, lda; | |||
| extern logical lde_(); | |||
| extern logical lde_(doublereal*, doublereal*, integer*); | |||
| static integer ict, icu; | |||
| static doublereal err; | |||
| @@ -2388,19 +2236,7 @@ L130: | |||
| } /* dchk3_ */ | |||
| /* Subroutine */ int dchk4_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x, | |||
| xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len) | |||
| char *sname; | |||
| doublereal *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| doublereal *alf; | |||
| integer *ninc, *inc, *nmax, *incmax; | |||
| doublereal *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int dchk4_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* y, doublereal* yy, doublereal* ys, doublereal* yt, doublereal* g, doublereal* z__, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* System generated locals */ | |||
| integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; | |||
| @@ -2411,17 +2247,18 @@ ftnlen sname_len; | |||
| static integer incx, incy; | |||
| static logical null; | |||
| static integer i__, j, m, n; | |||
| extern /* Subroutine */ int dmake_(), cdger_(); | |||
| extern /* Subroutine */ void cdger_(integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, integer*); | |||
| extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen); | |||
| static doublereal alpha, w[1]; | |||
| static logical isame[13]; | |||
| extern /* Subroutine */ int dmvch_(); | |||
| extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); | |||
| static integer nargs; | |||
| static logical reset; | |||
| static integer incxs, incys, ia, nc, nd, im, in, ms, ix, iy, ns, lx, ly; | |||
| extern logical lderes_(); | |||
| extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); | |||
| static doublereal errmax, transl; | |||
| static integer laa, lda; | |||
| extern logical lde_(); | |||
| extern logical lde_(doublereal*, doublereal*, integer*); | |||
| static doublereal als, err; | |||
| @@ -2727,19 +2564,7 @@ L150: | |||
| } /* dchk4_ */ | |||
| /* Subroutine */ int dchk5_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x, | |||
| xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len) | |||
| char *sname; | |||
| doublereal *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| doublereal *alf; | |||
| integer *ninc, *inc, *nmax, *incmax; | |||
| doublereal *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int dchk5_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* y, doublereal* yy, doublereal* ys, doublereal* yt, doublereal* g, doublereal* z__, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -2757,25 +2582,25 @@ ftnlen sname_len; | |||
| static logical full, null; | |||
| static char uplo[1]; | |||
| static integer i__, j, n; | |||
| extern /* Subroutine */ int dmake_(); | |||
| extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen); | |||
| static doublereal alpha, w[1]; | |||
| static logical isame[13]; | |||
| extern /* Subroutine */ int dmvch_(); | |||
| extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); | |||
| static integer nargs; | |||
| extern /* Subroutine */ int cdspr_(); | |||
| extern /* Subroutine */ void cdspr_(integer*, char*, integer*, doublereal*, doublereal*, integer*, doublereal*, ftnlen); | |||
| static logical reset; | |||
| static char cuplo[14]; | |||
| static integer incxs; | |||
| extern /* Subroutine */ int cdsyr_(); | |||
| extern /* Subroutine */ void cdsyr_(integer*, char*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, ftnlen); | |||
| static logical upper; | |||
| static char uplos[1]; | |||
| static integer ia, ja, ic, nc, jj, lj, in; | |||
| static logical packed; | |||
| static integer ix, ns, lx; | |||
| extern logical lderes_(); | |||
| extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); | |||
| static doublereal errmax, transl; | |||
| static integer laa, lda; | |||
| extern logical lde_(); | |||
| extern logical lde_(doublereal*, doublereal*, integer*); | |||
| static doublereal als, err; | |||
| @@ -3096,19 +2921,7 @@ L130: | |||
| } /* dchk5_ */ | |||
| /* Subroutine */ int dchk6_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x, | |||
| xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len) | |||
| char *sname; | |||
| doublereal *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| doublereal *alf; | |||
| integer *ninc, *inc, *nmax, *incmax; | |||
| doublereal *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int dchk6_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* y, doublereal* yy, doublereal* ys, doublereal* yt, doublereal* g, doublereal* z__, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -3125,24 +2938,25 @@ ftnlen sname_len; | |||
| static logical full, null; | |||
| static char uplo[1]; | |||
| static integer i__, j, n; | |||
| extern /* Subroutine */ int dmake_(); | |||
| extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen); | |||
| static doublereal alpha, w[2]; | |||
| static logical isame[13]; | |||
| extern /* Subroutine */ int dmvch_(); | |||
| extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); | |||
| static integer nargs; | |||
| static logical reset; | |||
| static char cuplo[14]; | |||
| static integer incxs, incys; | |||
| static logical upper; | |||
| static char uplos[1]; | |||
| extern /* Subroutine */ int cdspr2_(), cdsyr2_(); | |||
| extern /* Subroutine */ void cdspr2_(integer*, char*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, ftnlen); | |||
| extern /* Subroutine */ void cdsyr2_(integer*, char*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen); | |||
| static integer ia, ja, ic, nc, jj, lj, in; | |||
| static logical packed; | |||
| static integer ix, iy, ns, lx, ly; | |||
| extern logical lderes_(); | |||
| extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); | |||
| static doublereal errmax, transl; | |||
| static integer laa, lda; | |||
| extern logical lde_(); | |||
| extern logical lde_(doublereal*, doublereal*, integer*); | |||
| static doublereal als, err; | |||
| /* Tests DSYR2 and DSPR2. */ | |||
| @@ -3508,25 +3322,13 @@ L170: | |||
| } /* dchk6_ */ | |||
| /* Subroutine */ int dmake_(type__, uplo, diag, m, n, a, nmax, aa, lda, kl, | |||
| ku, reset, transl, type_len, uplo_len, diag_len) | |||
| char *type__, *uplo, *diag; | |||
| integer *m, *n; | |||
| doublereal *a; | |||
| integer *nmax; | |||
| doublereal *aa; | |||
| integer *lda, *kl, *ku; | |||
| logical *reset; | |||
| doublereal *transl; | |||
| ftnlen type_len; | |||
| ftnlen uplo_len; | |||
| ftnlen diag_len; | |||
| /* Subroutine */ int dmake_(char* type__, char* uplo, char* diag, integer* m, integer* n, doublereal* a, integer* nmax, doublereal* aa, integer* lda, integer* kl, integer* ku, logical* reset, doublereal* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) | |||
| { | |||
| /* System generated locals */ | |||
| integer a_dim1, a_offset, i__1, i__2, i__3, i__4; | |||
| /* Local variables */ | |||
| extern doublereal dbeg_(); | |||
| extern doublereal dbeg_(logical* ); | |||
| static integer ibeg, iend, ioff; | |||
| static logical unit; | |||
| static integer i__, j; | |||
| @@ -3752,28 +3554,14 @@ ftnlen diag_len; | |||
| } /* dmake_ */ | |||
| /* Subroutine */ int dmvch_(trans, m, n, alpha, a, nmax, x, incx, beta, y, | |||
| incy, yt, g, yy, eps, err, fatal, nout, mv, trans_len) | |||
| char *trans; | |||
| integer *m, *n; | |||
| doublereal *alpha, *a; | |||
| integer *nmax; | |||
| doublereal *x; | |||
| integer *incx; | |||
| doublereal *beta, *y; | |||
| integer *incy; | |||
| doublereal *yt, *g, *yy, *eps, *err; | |||
| logical *fatal; | |||
| integer *nout; | |||
| logical *mv; | |||
| ftnlen trans_len; | |||
| /* Subroutine */ int dmvch_(char* trans, integer* m, integer* n, doublereal* alpha, doublereal* a, integer* nmax, doublereal* x, integer* incx, doublereal* beta, doublereal* y, integer* incy, doublereal* yt, doublereal* g, doublereal* yy, doublereal* eps, doublereal* err, logical* fatal, integer* nout, logical* mv, ftnlen trans_len) | |||
| { | |||
| /* System generated locals */ | |||
| integer a_dim1, a_offset, i__1, i__2; | |||
| doublereal d__1; | |||
| /* Builtin functions */ | |||
| double sqrt(); | |||
| double sqrt(double); | |||
| /* Local variables */ | |||
| static doublereal erri; | |||
| @@ -3902,9 +3690,7 @@ L70: | |||
| } /* dmvch_ */ | |||
| logical lde_(ri, rj, lr) | |||
| doublereal *ri, *rj; | |||
| integer *lr; | |||
| logical lde_(doublereal* ri, doublereal* rj, integer* lr) | |||
| { | |||
| /* System generated locals */ | |||
| integer i__1; | |||
| @@ -3949,13 +3735,7 @@ L30: | |||
| } /* lde_ */ | |||
| logical lderes_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len) | |||
| char *type__, *uplo; | |||
| integer *m, *n; | |||
| doublereal *aa, *as; | |||
| integer *lda; | |||
| ftnlen type_len; | |||
| ftnlen uplo_len; | |||
| logical lderes_(char* type__, char* uplo, integer* m, integer* n, doublereal* aa, doublereal* as, integer* lda, ftnlen type_len, ftnlen uplo_len) | |||
| { | |||
| /* System generated locals */ | |||
| integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2; | |||
| @@ -4042,8 +3822,7 @@ L80: | |||
| } /* lderes_ */ | |||
| doublereal dbeg_(reset) | |||
| logical *reset; | |||
| doublereal dbeg_(logical* reset) | |||
| { | |||
| /* System generated locals */ | |||
| doublereal ret_val; | |||
| @@ -4094,8 +3873,7 @@ L10: | |||
| } /* dbeg_ */ | |||
| doublereal ddiff_(x, y) | |||
| doublereal *x, *y; | |||
| doublereal ddiff_(doublereal* x, doublereal* y) | |||
| { | |||
| /* System generated locals */ | |||
| doublereal ret_val; | |||
| @@ -242,129 +242,6 @@ typedef struct Namelist Namelist; | |||
| /* procedure parameter types for -A and -C++ */ | |||
| #define F2C_proc_par_types 1 | |||
| #ifdef __cplusplus | |||
| typedef logical (*L_fp)(...); | |||
| #else | |||
| typedef logical (*L_fp)(); | |||
| #endif | |||
| #if 0 | |||
| static float spow_ui(float x, integer n) { | |||
| float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static double dpow_ui(double x, integer n) { | |||
| double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #ifdef _MSC_VER | |||
| static _Fcomplex cpow_ui(complex x, integer n) { | |||
| complex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow.r *= x.r, pow.i *= x.i; | |||
| if(u >>= 1) x.r *= x.r, x.i *= x.i; | |||
| else break; | |||
| } | |||
| } | |||
| _Fcomplex p={pow.r, pow.i}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex float cpow_ui(_Complex float x, integer n) { | |||
| _Complex float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| #ifdef _MSC_VER | |||
| static _Dcomplex zpow_ui(_Dcomplex x, integer n) { | |||
| _Dcomplex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; | |||
| if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; | |||
| else break; | |||
| } | |||
| } | |||
| _Dcomplex p = {pow._Val[0], pow._Val[1]}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex double zpow_ui(_Complex double x, integer n) { | |||
| _Complex double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| static integer pow_ii(integer x, integer n) { | |||
| integer pow; unsigned long int u; | |||
| if (n <= 0) { | |||
| if (n == 0 || x == 1) pow = 1; | |||
| else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
| else n = -n; | |||
| } | |||
| if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
| u = n; | |||
| for(pow = 1; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
| { | |||
| double m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
| { | |||
| float m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| #endif | |||
| /* -- translated by f2c (version 20000121). | |||
| You must link the resulting object file with the libraries: | |||
| -lf2c -lm (in that order) | |||
| */ | |||
| /* Common Block Declarations */ | |||
| @@ -393,7 +270,7 @@ static logical c_true = TRUE_; | |||
| static integer c__0 = 0; | |||
| static logical c_false = FALSE_; | |||
| /* Main program MAIN__() */ int main() | |||
| /* Main program MAIN__() */ int main(void) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -403,25 +280,24 @@ static logical c_false = FALSE_; | |||
| integer i__1, i__2, i__3; | |||
| doublereal d__1; | |||
| /* Builtin functions */ | |||
| integer s_rsle(), do_lio(), e_rsle(), f_open(), s_wsfe(), do_fio(), | |||
| e_wsfe(), s_wsle(), e_wsle(), s_rsfe(), e_rsfe(); | |||
| integer f_clos(); | |||
| /* Local variables */ | |||
| static integer nalf, idim[9]; | |||
| static logical same; | |||
| static integer nbet, ntra; | |||
| static logical rewi; | |||
| extern /* Subroutine */ int dchk1_(), dchk2_(), dchk3_(), dchk4_(), | |||
| dchk5_(); | |||
| extern /* Subroutine */ int dchk1_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen); | |||
| extern /* Subroutine */ int dchk2_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen); | |||
| extern /* Subroutine */ int dchk3_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen); | |||
| extern /* Subroutine */ int dchk4_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen); | |||
| /* Subroutine */ int dchk5_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* nmax, doublereal* ab, doublereal* aa, doublereal* as, doublereal* bb, doublereal* bs, doublereal* c__, doublereal* cc, doublereal* cs, doublereal* ct, doublereal* g, doublereal* w, integer* iorder, ftnlen sname_len); | |||
| static doublereal c__[4225] /* was [65][65] */, g[65]; | |||
| static integer i__, j; | |||
| extern doublereal ddiff_(); | |||
| extern doublereal ddiff_(doublereal*, doublereal*); | |||
| static integer n; | |||
| static logical fatal; | |||
| static doublereal w[130]; | |||
| extern /* Subroutine */ int dmmch_(); | |||
| extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen); | |||
| static logical trace; | |||
| static integer nidim; | |||
| static char snaps[32]; | |||
| @@ -433,11 +309,11 @@ static logical c_false = FALSE_; | |||
| static char snamet[12], transa[1], transb[1]; | |||
| static doublereal thresh; | |||
| static logical rorder; | |||
| extern /* Subroutine */ int cd3chke_(); | |||
| extern /* Subroutine */ void cd3chke_(char*, ftnlen); | |||
| static integer layout; | |||
| static logical ltestt, tsterr; | |||
| static doublereal alf[7]; | |||
| extern logical lde_(); | |||
| extern logical lde_(doublereal*, doublereal*, integer*); | |||
| static doublereal bet[7], eps, err; | |||
| char tmpchar; | |||
| @@ -907,21 +783,7 @@ L230: | |||
| } /* MAIN__ */ | |||
| /* Subroutine */ int dchk1_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs, | |||
| c__, cc, cs, ct, g, iorder, sname_len) | |||
| char *sname; | |||
| doublereal *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| doublereal *alf; | |||
| integer *nbet; | |||
| doublereal *bet; | |||
| integer *nmax; | |||
| doublereal *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int dchk1_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* nmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* b, doublereal* bb, doublereal* bs, doublereal* c__, doublereal* cc, doublereal* cs, doublereal* ct, doublereal* g, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -931,29 +793,27 @@ ftnlen sname_len; | |||
| integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, | |||
| i__3, i__4, i__5, i__6; | |||
| /* Builtin functions */ | |||
| integer f_rew(), s_wsfe(), e_wsfe(), do_fio(); | |||
| /* Local variables */ | |||
| static doublereal beta; | |||
| static integer ldas, ldbs, ldcs; | |||
| static logical same, null; | |||
| static integer i__, k, m, n; | |||
| extern /* Subroutine */ int dmake_(); | |||
| extern /* Subroutine */ int dmake_(char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen); | |||
| static doublereal alpha; | |||
| extern /* Subroutine */ int dmmch_(); | |||
| extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen); | |||
| static logical isame[13], trana, tranb; | |||
| static integer nargs; | |||
| static logical reset; | |||
| extern /* Subroutine */ void dprcn1_(); | |||
| extern /* Subroutine */ void dprcn1_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, integer*, doublereal*, integer*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen); | |||
| static integer ia, ib, ma, mb, na, nb, nc, ik, im, in; | |||
| extern /* Subroutine */ int cdgemm_(); | |||
| extern /* Subroutine */ void cdgemm_(integer*, char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); | |||
| static integer ks, ms, ns; | |||
| extern logical lderes_(); | |||
| extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); | |||
| static char tranas[1], tranbs[1], transa[1], transb[1]; | |||
| static doublereal errmax; | |||
| static integer ica, icb, laa, lbb, lda, lcc, ldb, ldc; | |||
| extern logical lde_(); | |||
| extern logical lde_(doublereal*, doublereal*, integer*); | |||
| static doublereal als, bls, err; | |||
| /* Tests DGEMM. */ | |||
| @@ -1283,23 +1143,8 @@ L130: | |||
| } /* dchk1_ */ | |||
| /* Subroutine */ void dprcn1_(nout, nc, sname, iorder, transa, transb, m, n, k, | |||
| alpha, lda, ldb, beta, ldc, sname_len, transa_len, transb_len) | |||
| integer *nout, *nc; | |||
| char *sname; | |||
| integer *iorder; | |||
| char *transa, *transb; | |||
| integer *m, *n, *k; | |||
| doublereal *alpha; | |||
| integer *lda, *ldb; | |||
| doublereal *beta; | |||
| integer *ldc; | |||
| ftnlen sname_len; | |||
| ftnlen transa_len; | |||
| ftnlen transb_len; | |||
| /* Subroutine */ void dprcn1_(integer* nout, integer* nc, char* sname, integer* iorder, char* transa, char* transb, integer* m, integer* n, integer* k, doublereal* alpha, integer* lda, integer* ldb, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen transa_len, ftnlen transb_len) | |||
| { | |||
| /* Builtin functions */ | |||
| integer s_wsfe(), do_fio(), e_wsfe(); | |||
| /* Local variables */ | |||
| static char crc[14], cta[14], ctb[14]; | |||
| @@ -1328,21 +1173,7 @@ ftnlen transb_len; | |||
| } /* dprcn1_ */ | |||
| /* Subroutine */ int dchk2_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs, | |||
| c__, cc, cs, ct, g, iorder, sname_len) | |||
| char *sname; | |||
| doublereal *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| doublereal *alf; | |||
| integer *nbet; | |||
| doublereal *bet; | |||
| integer *nmax; | |||
| doublereal *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int dchk2_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* nmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* b, doublereal* bb, doublereal* bs, doublereal* c__, doublereal* cc, doublereal* cs, doublereal* ct, doublereal* g, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -1353,8 +1184,6 @@ ftnlen sname_len; | |||
| integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, | |||
| i__3, i__4, i__5; | |||
| /* Builtin functions */ | |||
| integer f_rew(), s_wsfe(), e_wsfe(), do_fio(); | |||
| /* Local variables */ | |||
| static doublereal beta; | |||
| @@ -1364,21 +1193,21 @@ ftnlen sname_len; | |||
| static logical left, null; | |||
| static char uplo[1]; | |||
| static integer i__, m, n; | |||
| extern /* Subroutine */ int dmake_(); | |||
| extern /* Subroutine */ int dmake_(char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen); | |||
| static doublereal alpha; | |||
| extern /* Subroutine */ int dmmch_(); | |||
| extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen); | |||
| static logical isame[13]; | |||
| static char sides[1]; | |||
| static integer nargs; | |||
| static logical reset; | |||
| static char uplos[1]; | |||
| extern /* Subroutine */ void dprcn2_(); | |||
| extern /* Subroutine */ void dprcn2_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublereal*, integer*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen); | |||
| static integer ia, ib, na, nc, im, in, ms, ns; | |||
| extern logical lderes_(); | |||
| extern /* Subroutine */ int cdsymm_(); | |||
| extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void cdsymm_(integer*, char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); | |||
| static doublereal errmax; | |||
| static integer laa, lbb, lda, lcc, ldb, ldc; | |||
| extern logical lde_(); | |||
| extern logical lde_(doublereal*, doublereal*, integer*); | |||
| static integer ics; | |||
| static doublereal als, bls; | |||
| static integer icu; | |||
| @@ -1692,23 +1521,8 @@ L120: | |||
| } /* dchk2_ */ | |||
| /* Subroutine */ void dprcn2_(nout, nc, sname, iorder, side, uplo, m, n, alpha, | |||
| lda, ldb, beta, ldc, sname_len, side_len, uplo_len) | |||
| integer *nout, *nc; | |||
| char *sname; | |||
| integer *iorder; | |||
| char *side, *uplo; | |||
| integer *m, *n; | |||
| doublereal *alpha; | |||
| integer *lda, *ldb; | |||
| doublereal *beta; | |||
| integer *ldc; | |||
| ftnlen sname_len; | |||
| ftnlen side_len; | |||
| ftnlen uplo_len; | |||
| /* Subroutine */ void dprcn2_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, integer* m, integer* n, doublereal* alpha, integer* lda, integer* ldb, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len) | |||
| { | |||
| /* Builtin functions */ | |||
| integer s_wsfe(), do_fio(), e_wsfe(); | |||
| /* Local variables */ | |||
| static char cs[14], cu[14], crc[14]; | |||
| @@ -1733,19 +1547,7 @@ ftnlen uplo_len; | |||
| } /* dprcn2_ */ | |||
| /* Subroutine */ int dchk3_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, nmax, a, aa, as, b, bb, bs, ct, g, c__, | |||
| iorder, sname_len) | |||
| char *sname; | |||
| doublereal *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| doublereal *alf; | |||
| integer *nmax; | |||
| doublereal *a, *aa, *as, *b, *bb, *bs, *ct, *g, *c__; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int dchk3_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* b, doublereal* bb, doublereal* bs, doublereal* ct, doublereal* g, doublereal* c__, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -1766,24 +1568,24 @@ ftnlen sname_len; | |||
| static logical left, null; | |||
| static char uplo[1]; | |||
| static integer i__, j, m, n; | |||
| extern /* Subroutine */ int dmake_(); | |||
| extern /* Subroutine */ int dmake_(char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen); | |||
| static doublereal alpha; | |||
| static char diags[1]; | |||
| extern /* Subroutine */ int dmmch_(); | |||
| extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen); | |||
| static logical isame[13]; | |||
| static char sides[1]; | |||
| static integer nargs; | |||
| static logical reset; | |||
| static char uplos[1]; | |||
| extern /* Subroutine */ void dprcn3_(); | |||
| extern /* Subroutine */ void dprcn3_(integer*, integer*, char*, integer*, char*, char*, char*, char*, integer*, integer*, doublereal*, integer*, integer*, ftnlen, ftnlen, ftnlen, ftnlen, ftnlen); | |||
| static integer ia, na, nc, im, in, ms, ns; | |||
| extern logical lderes_(); | |||
| extern /* Subroutine */ int cdtrmm_(); | |||
| extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void cdtrmm_(integer*, char*, char*, char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen, ftnlen); | |||
| static char tranas[1], transa[1]; | |||
| extern /* Subroutine */ int cdtrsm_(); | |||
| extern /* Subroutine */ void cdtrsm_(integer*, char*, char*, char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen, ftnlen); | |||
| static doublereal errmax; | |||
| static integer laa, icd, lbb, lda, ldb; | |||
| extern logical lde_(); | |||
| extern logical lde_(doublereal*, doublereal*, integer*); | |||
| static integer ics; | |||
| static doublereal als; | |||
| static integer ict, icu; | |||
| @@ -2165,24 +1967,8 @@ L160: | |||
| } /* dchk3_ */ | |||
| /* Subroutine */ void dprcn3_(nout, nc, sname, iorder, side, uplo, transa, | |||
| diag, m, n, alpha, lda, ldb, sname_len, side_len, uplo_len, | |||
| transa_len, diag_len) | |||
| integer *nout, *nc; | |||
| char *sname; | |||
| integer *iorder; | |||
| char *side, *uplo, *transa, *diag; | |||
| integer *m, *n; | |||
| doublereal *alpha; | |||
| integer *lda, *ldb; | |||
| ftnlen sname_len; | |||
| ftnlen side_len; | |||
| ftnlen uplo_len; | |||
| ftnlen transa_len; | |||
| ftnlen diag_len; | |||
| /* Subroutine */ void dprcn3_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, char* transa, char* diag, integer* m, integer* n, doublereal* alpha, integer* lda, integer* ldb, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len, ftnlen transa_len, ftnlen diag_len) | |||
| { | |||
| /* Builtin functions */ | |||
| integer s_wsfe(), do_fio(), e_wsfe(); | |||
| /* Local variables */ | |||
| static char ca[14], cd[14], cs[14], cu[14], crc[14]; | |||
| @@ -2219,21 +2005,7 @@ ftnlen diag_len; | |||
| } /* dprcn3_ */ | |||
| /* Subroutine */ int dchk4_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs, | |||
| c__, cc, cs, ct, g, iorder, sname_len) | |||
| char *sname; | |||
| doublereal *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| doublereal *alf; | |||
| integer *nbet; | |||
| doublereal *bet; | |||
| integer *nmax; | |||
| doublereal *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int dchk4_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* nmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* b, doublereal* bb, doublereal* bs, doublereal* c__, doublereal* cc, doublereal* cs, doublereal* ct, doublereal* g, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -2244,8 +2016,6 @@ ftnlen sname_len; | |||
| integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, | |||
| i__3, i__4, i__5; | |||
| /* Builtin functions */ | |||
| integer f_rew(), s_wsfe(), e_wsfe(), do_fio(); | |||
| /* Local variables */ | |||
| static doublereal beta; | |||
| @@ -2255,23 +2025,23 @@ ftnlen sname_len; | |||
| static logical tran, null; | |||
| static char uplo[1]; | |||
| static integer i__, j, k, n; | |||
| extern /* Subroutine */ int dmake_(); | |||
| extern /* Subroutine */ int dmake_(char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen); | |||
| static doublereal alpha; | |||
| extern /* Subroutine */ int dmmch_(); | |||
| extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen); | |||
| static logical isame[13]; | |||
| static integer nargs; | |||
| static logical reset; | |||
| static char trans[1]; | |||
| static logical upper; | |||
| static char uplos[1]; | |||
| extern /* Subroutine */ void dprcn4_(); | |||
| extern /* Subroutine */ void dprcn4_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen); | |||
| static integer ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns; | |||
| extern logical lderes_(); | |||
| extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); | |||
| static doublereal errmax; | |||
| extern /* Subroutine */ int cdsyrk_(); | |||
| extern /* Subroutine */ void cdsyrk_(integer*, char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); | |||
| static char transs[1]; | |||
| static integer laa, lda, lcc, ldc; | |||
| extern logical lde_(); | |||
| extern logical lde_(doublereal*, doublereal*, integer*); | |||
| static doublereal als; | |||
| static integer ict, icu; | |||
| static doublereal err; | |||
| @@ -2586,23 +2356,8 @@ L130: | |||
| } /* dchk4_ */ | |||
| /* Subroutine */ void dprcn4_(nout, nc, sname, iorder, uplo, transa, n, k, | |||
| alpha, lda, beta, ldc, sname_len, uplo_len, transa_len) | |||
| integer *nout, *nc; | |||
| char *sname; | |||
| integer *iorder; | |||
| char *uplo, *transa; | |||
| integer *n, *k; | |||
| doublereal *alpha; | |||
| integer *lda; | |||
| doublereal *beta; | |||
| integer *ldc; | |||
| ftnlen sname_len; | |||
| ftnlen uplo_len; | |||
| ftnlen transa_len; | |||
| /* Subroutine */ void dprcn4_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublereal* alpha, integer* lda, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len) | |||
| { | |||
| /* Builtin functions */ | |||
| integer s_wsfe(), do_fio(), e_wsfe(); | |||
| /* Local variables */ | |||
| static char ca[14], cu[14], crc[14]; | |||
| @@ -2629,21 +2384,7 @@ ftnlen transa_len; | |||
| } /* dprcn4_ */ | |||
| /* Subroutine */ int dchk5_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, nbet, bet, nmax, ab, aa, as, bb, bs, | |||
| c__, cc, cs, ct, g, w, iorder, sname_len) | |||
| char *sname; | |||
| doublereal *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| doublereal *alf; | |||
| integer *nbet; | |||
| doublereal *bet; | |||
| integer *nmax; | |||
| doublereal *ab, *aa, *as, *bb, *bs, *c__, *cc, *cs, *ct, *g, *w; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int dchk5_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* nmax, doublereal* ab, doublereal* aa, doublereal* as, doublereal* bb, doublereal* bs, doublereal* c__, doublereal* cc, doublereal* cs, doublereal* ct, doublereal* g, doublereal* w, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -2653,8 +2394,6 @@ ftnlen sname_len; | |||
| /* System generated locals */ | |||
| integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; | |||
| /* Builtin functions */ | |||
| integer f_rew(), s_wsfe(), e_wsfe(), do_fio(); | |||
| /* Local variables */ | |||
| static integer jjab; | |||
| @@ -2665,23 +2404,23 @@ ftnlen sname_len; | |||
| static logical tran, null; | |||
| static char uplo[1]; | |||
| static integer i__, j, k, n; | |||
| extern /* Subroutine */ int dmake_(); | |||
| extern /* Subroutine */ int dmake_(char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen); | |||
| static doublereal alpha; | |||
| extern /* Subroutine */ int dmmch_(); | |||
| extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen); | |||
| static logical isame[13]; | |||
| static integer nargs; | |||
| static logical reset; | |||
| static char trans[1]; | |||
| static logical upper; | |||
| static char uplos[1]; | |||
| extern /* Subroutine */ void dprcn5_(); | |||
| extern /* Subroutine */ void dprcn5_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublereal*, integer*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen); | |||
| static integer ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns; | |||
| extern logical lderes_(); | |||
| extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); | |||
| static doublereal errmax; | |||
| static char transs[1]; | |||
| static integer laa, lbb, lda, lcc, ldb, ldc; | |||
| extern logical lde_(); | |||
| extern /* Subroutine */ int cdsyr2k_(); | |||
| extern logical lde_(doublereal*, doublereal*, integer*); | |||
| extern /* Subroutine */ void cdsyr2k_(integer*, char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen); | |||
| static doublereal als; | |||
| static integer ict, icu; | |||
| static doublereal err; | |||
| @@ -3048,23 +2787,8 @@ L160: | |||
| } /* dchk5_ */ | |||
| /* Subroutine */ void dprcn5_(nout, nc, sname, iorder, uplo, transa, n, k, | |||
| alpha, lda, ldb, beta, ldc, sname_len, uplo_len, transa_len) | |||
| integer *nout, *nc; | |||
| char *sname; | |||
| integer *iorder; | |||
| char *uplo, *transa; | |||
| integer *n, *k; | |||
| doublereal *alpha; | |||
| integer *lda, *ldb; | |||
| doublereal *beta; | |||
| integer *ldc; | |||
| ftnlen sname_len; | |||
| ftnlen uplo_len; | |||
| ftnlen transa_len; | |||
| /* Subroutine */ void dprcn5_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublereal* alpha, integer* lda, integer* ldb, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len) | |||
| { | |||
| /* Builtin functions */ | |||
| integer s_wsfe(), do_fio(), e_wsfe(); | |||
| /* Local variables */ | |||
| static char ca[14], cu[14], crc[14]; | |||
| @@ -3091,25 +2815,13 @@ ftnlen transa_len; | |||
| } /* dprcn5_ */ | |||
| /* Subroutine */ int dmake_(type__, uplo, diag, m, n, a, nmax, aa, lda, reset, | |||
| transl, type_len, uplo_len, diag_len) | |||
| char *type__, *uplo, *diag; | |||
| integer *m, *n; | |||
| doublereal *a; | |||
| integer *nmax; | |||
| doublereal *aa; | |||
| integer *lda; | |||
| logical *reset; | |||
| doublereal *transl; | |||
| ftnlen type_len; | |||
| ftnlen uplo_len; | |||
| ftnlen diag_len; | |||
| /* Subroutine */ int dmake_(char* type__, char* uplo, char* diag, integer* m, integer* n, doublereal* a, integer* nmax, doublereal* aa, integer* lda, logical* reset, doublereal* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) | |||
| { | |||
| /* System generated locals */ | |||
| integer a_dim1, a_offset, i__1, i__2; | |||
| /* Local variables */ | |||
| extern doublereal dbeg_(); | |||
| extern doublereal dbeg_(logical*); | |||
| static integer ibeg, iend; | |||
| static logical unit; | |||
| static integer i__, j; | |||
| @@ -3241,25 +2953,7 @@ ftnlen diag_len; | |||
| } /* dmake_ */ | |||
| /* Subroutine */ int dmmch_(transa, transb, m, n, kk, alpha, a, lda, b, ldb, | |||
| beta, c__, ldc, ct, g, cc, ldcc, eps, err, fatal, nout, mv, | |||
| transa_len, transb_len) | |||
| char *transa, *transb; | |||
| integer *m, *n, *kk; | |||
| doublereal *alpha, *a; | |||
| integer *lda; | |||
| doublereal *b; | |||
| integer *ldb; | |||
| doublereal *beta, *c__; | |||
| integer *ldc; | |||
| doublereal *ct, *g, *cc; | |||
| integer *ldcc; | |||
| doublereal *eps, *err; | |||
| logical *fatal; | |||
| integer *nout; | |||
| logical *mv; | |||
| ftnlen transa_len; | |||
| ftnlen transb_len; | |||
| /* Subroutine */ int dmmch_(char* transa, char* transb, integer* m, integer* n, integer* kk, doublereal* alpha, doublereal* a, integer* lda, doublereal* b, integer* ldb, doublereal* beta, doublereal* c__, integer* ldc, doublereal* ct, doublereal* g, doublereal* cc, integer* ldcc, doublereal* eps, doublereal* err, logical* fatal, integer* nout, logical* mv, ftnlen transa_len, ftnlen transb_len) | |||
| { | |||
| /* System generated locals */ | |||
| integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, cc_dim1, | |||
| @@ -3267,8 +2961,7 @@ ftnlen transb_len; | |||
| doublereal d__1, d__2; | |||
| /* Builtin functions */ | |||
| double sqrt(); | |||
| integer s_wsfe(), e_wsfe(), do_fio(); | |||
| double sqrt(double); | |||
| /* Local variables */ | |||
| static doublereal erri; | |||
| @@ -3432,9 +3125,7 @@ L150: | |||
| } /* dmmch_ */ | |||
| logical lde_(ri, rj, lr) | |||
| doublereal *ri, *rj; | |||
| integer *lr; | |||
| logical lde_(doublereal* ri, doublereal* rj, integer* lr) | |||
| { | |||
| /* System generated locals */ | |||
| integer i__1; | |||
| @@ -3481,13 +3172,7 @@ L30: | |||
| } /* lde_ */ | |||
| logical lderes_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len) | |||
| char *type__, *uplo; | |||
| integer *m, *n; | |||
| doublereal *aa, *as; | |||
| integer *lda; | |||
| ftnlen type_len; | |||
| ftnlen uplo_len; | |||
| logical lderes_(char* type__, char* uplo, integer* m, integer* n, doublereal* aa, doublereal* as, integer* lda, ftnlen type_len, ftnlen uplo_len) | |||
| { | |||
| /* System generated locals */ | |||
| integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2; | |||
| @@ -3576,8 +3261,7 @@ L80: | |||
| } /* lderes_ */ | |||
| doublereal dbeg_(reset) | |||
| logical *reset; | |||
| doublereal dbeg_(logical* reset) | |||
| { | |||
| /* System generated locals */ | |||
| doublereal ret_val; | |||
| @@ -3629,8 +3313,7 @@ L10: | |||
| } /* dbeg_ */ | |||
| doublereal ddiff_(x, y) | |||
| doublereal *x, *y; | |||
| doublereal ddiff_(doublereal* x, doublereal* y) | |||
| { | |||
| /* System generated locals */ | |||
| doublereal ret_val; | |||
| @@ -21,19 +21,6 @@ typedef float real; | |||
| typedef double doublereal; | |||
| typedef struct { real r, i; } complex; | |||
| typedef struct { doublereal r, i; } doublecomplex; | |||
| #ifdef _MSC_VER | |||
| static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} | |||
| static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} | |||
| static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} | |||
| static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} | |||
| #else | |||
| static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} | |||
| static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} | |||
| static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} | |||
| static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} | |||
| #endif | |||
| #define pCf(z) (*_pCf(z)) | |||
| #define pCd(z) (*_pCd(z)) | |||
| typedef int logical; | |||
| typedef short int shortlogical; | |||
| typedef char logical1; | |||
| @@ -242,250 +229,6 @@ typedef struct Namelist Namelist; | |||
| /* procedure parameter types for -A and -C++ */ | |||
| #define F2C_proc_par_types 1 | |||
| #ifdef __cplusplus | |||
| typedef logical (*L_fp)(...); | |||
| #else | |||
| typedef logical (*L_fp)(); | |||
| #endif | |||
| #if 0 | |||
| static float spow_ui(float x, integer n) { | |||
| float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static double dpow_ui(double x, integer n) { | |||
| double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #ifdef _MSC_VER | |||
| static _Fcomplex cpow_ui(complex x, integer n) { | |||
| complex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow.r *= x.r, pow.i *= x.i; | |||
| if(u >>= 1) x.r *= x.r, x.i *= x.i; | |||
| else break; | |||
| } | |||
| } | |||
| _Fcomplex p={pow.r, pow.i}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex float cpow_ui(_Complex float x, integer n) { | |||
| _Complex float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| #ifdef _MSC_VER | |||
| static _Dcomplex zpow_ui(_Dcomplex x, integer n) { | |||
| _Dcomplex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; | |||
| if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; | |||
| else break; | |||
| } | |||
| } | |||
| _Dcomplex p = {pow._Val[0], pow._Val[1]}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex double zpow_ui(_Complex double x, integer n) { | |||
| _Complex double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| static integer pow_ii(integer x, integer n) { | |||
| integer pow; unsigned long int u; | |||
| if (n <= 0) { | |||
| if (n == 0 || x == 1) pow = 1; | |||
| else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
| else n = -n; | |||
| } | |||
| if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
| u = n; | |||
| for(pow = 1; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
| { | |||
| double m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
| { | |||
| float m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| #endif | |||
| #if 0 | |||
| static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Fcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0]; | |||
| zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex float zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conjf(Cf(&x[i])) * Cf(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]); | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Dcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0]; | |||
| zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex double zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conj(Cd(&x[i])) * Cd(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]); | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Fcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0]; | |||
| zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex float zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cf(&x[i]) * Cf(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]); | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Dcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0]; | |||
| zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex double zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cd(&x[i]) * Cd(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]); | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #endif | |||
| #endif | |||
| /* Common Block Declarations */ | |||
| @@ -502,16 +245,16 @@ struct { | |||
| static integer c__1 = 1; | |||
| static real c_b34 = (float)1.; | |||
| /* Main program */ int main () | |||
| /* Main program */ int main (void) | |||
| { | |||
| /* Initialized data */ | |||
| static real sfac = (float)9.765625e-4; | |||
| /* Local variables */ | |||
| extern /* Subroutine */ int check0_(), check1_(), check2_(), check3_(); | |||
| extern /* Subroutine */ int check0_(real*), check1_(real*), check2_(real*), check3_(real*); | |||
| static integer ic; | |||
| extern /* Subroutine */ int header_(); | |||
| extern /* Subroutine */ int header_(void); | |||
| /* Test program for the REAL Level 1 CBLAS. */ | |||
| /* Based upon the original CBLAS test routine together with: */ | |||
| @@ -557,7 +300,7 @@ static real c_b34 = (float)1.; | |||
| exit(0); | |||
| } /* MAIN__ */ | |||
| /* Subroutine */ int header_() | |||
| /* Subroutine */ int header_(void) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -580,8 +323,7 @@ static real c_b34 = (float)1.; | |||
| } /* header_ */ | |||
| /* Subroutine */ int check0_(sfac) | |||
| real *sfac; | |||
| /* Subroutine */ int check0_(real *sfac) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -600,7 +342,7 @@ real *sfac; | |||
| /* Local variables */ | |||
| static integer k; | |||
| extern /* Subroutine */ int srotgtest_(), stest1_(); | |||
| extern /* Subroutine */ int srotgtest_(real*,real*,real*,real*), stest1_(real*,real*,real*,real*); | |||
| static real sa, sb, sc, ss; | |||
| /* .. Parameters .. */ | |||
| @@ -645,8 +387,7 @@ L40: | |||
| return 0; | |||
| } /* check0_ */ | |||
| /* Subroutine */ int check1_(sfac) | |||
| real *sfac; | |||
| /* Subroutine */ int check1_(real* sfac) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -692,14 +433,14 @@ real *sfac; | |||
| /* Local variables */ | |||
| static integer i__; | |||
| extern real snrm2test_(); | |||
| extern real snrm2test_(integer*,real*,integer*); | |||
| static real stemp[1], strue[8]; | |||
| extern /* Subroutine */ int stest_(), sscaltest_(); | |||
| extern real sasumtest_(); | |||
| extern /* Subroutine */ int itest1_(), stest1_(); | |||
| extern /* Subroutine */ int stest_(integer*, real*,real*,real*,real*), sscaltest_(integer*,real*,real*,integer*); | |||
| extern real sasumtest_(integer*,real*,integer*); | |||
| extern /* Subroutine */ int itest1_(integer*,integer*), stest1_(real*,real*,real*,real*); | |||
| static real sx[8]; | |||
| static integer np1; | |||
| extern integer isamaxtest_(); | |||
| extern integer isamaxtest_(integer*,real*,integer*); | |||
| static integer len; | |||
| @@ -761,8 +502,7 @@ real *sfac; | |||
| return 0; | |||
| } /* check1_ */ | |||
| /* Subroutine */ int check2_(sfac) | |||
| real *sfac; | |||
| /* Subroutine */ int check2_(real* sfac) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -850,12 +590,12 @@ real *sfac; | |||
| /* Local variables */ | |||
| static integer lenx, leny; | |||
| extern real sdottest_(); | |||
| extern real sdottest_(integer*,real*,integer*,real*,integer*); | |||
| static integer i__, j, ksize; | |||
| extern /* Subroutine */ int stest_(), scopytest_(), sswaptest_(), | |||
| saxpytest_(); | |||
| extern /* Subroutine */ int stest_(integer*,real*,real*,real*,real*), scopytest_(integer*,real*,integer*,real*,integer*), sswaptest_(integer*,real*,integer*,real*,integer*), | |||
| saxpytest_(integer*,real*,real*,integer*,real*,integer*); | |||
| static integer ki; | |||
| extern /* Subroutine */ int stest1_(); | |||
| extern /* Subroutine */ int stest1_(real*,real*,real*,real*); | |||
| static integer kn, mx, my; | |||
| static real sx[7], sy[7], stx[7], sty[7]; | |||
| @@ -936,8 +676,7 @@ real *sfac; | |||
| return 0; | |||
| } /* check2_ */ | |||
| /* Subroutine */ int check3_(sfac) | |||
| real *sfac; | |||
| /* Subroutine */ int check3_(real* sfac) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -969,9 +708,9 @@ real *sfac; | |||
| 1.17 }; | |||
| /* Local variables */ | |||
| extern /* Subroutine */ void srottest_(); | |||
| extern /* Subroutine */ void srottest_(integer*,real*,integer*,real*,integer*,real*,real*); | |||
| static integer i__, k, ksize; | |||
| extern /* Subroutine */ int stest_(), srotmtest_(); | |||
| extern /* Subroutine */ int stest_(integer*,real*,real*,real*,real*), srotmtest_(integer*,real*,integer*,real*,integer*,real*); | |||
| static integer ki, kn; | |||
| static real sx[19], sy[19], sparam[5], stx[19], sty[19]; | |||
| @@ -1042,16 +781,14 @@ real *sfac; | |||
| return 0; | |||
| } /* check3_ */ | |||
| /* Subroutine */ int stest_(len, scomp, strue, ssize, sfac) | |||
| integer *len; | |||
| real *scomp, *strue, *ssize, *sfac; | |||
| /* Subroutine */ int stest_(integer* len, real* scomp, real* strue, real* ssize, real* sfac) | |||
| { | |||
| integer i__1; | |||
| real r__1, r__2, r__3, r__4, r__5; | |||
| /* Local variables */ | |||
| static integer i__; | |||
| extern doublereal sdiff_(); | |||
| extern doublereal sdiff_(real*,real*); | |||
| static real sd; | |||
| /* ********************************* STEST ************************** */ | |||
| @@ -1107,11 +844,10 @@ L40: | |||
| } /* stest_ */ | |||
| /* Subroutine */ int stest1_(scomp1, strue1, ssize, sfac) | |||
| real *scomp1, *strue1, *ssize, *sfac; | |||
| /* Subroutine */ int stest1_(real* scomp1, real* strue1, real* ssize, real* sfac) | |||
| { | |||
| static real scomp[1], strue[1]; | |||
| extern /* Subroutine */ int stest_(); | |||
| extern /* Subroutine */ int stest_(integer*,real*,real*,real*,real*); | |||
| /* ************************* STEST1 ***************************** */ | |||
| @@ -1138,8 +874,7 @@ real *scomp1, *strue1, *ssize, *sfac; | |||
| return 0; | |||
| } /* stest1_ */ | |||
| doublereal sdiff_(sa, sb) | |||
| real *sa, *sb; | |||
| doublereal sdiff_(real* sa, real* sb) | |||
| { | |||
| /* System generated locals */ | |||
| real ret_val; | |||
| @@ -1153,8 +888,7 @@ real *sa, *sb; | |||
| return ret_val; | |||
| } /* sdiff_ */ | |||
| /* Subroutine */ int itest1_(icomp, itrue) | |||
| integer *icomp, *itrue; | |||
| /* Subroutine */ int itest1_(integer* icomp, integer* itrue) | |||
| { | |||
| /* Local variables */ | |||
| static integer id; | |||
| @@ -242,255 +242,6 @@ typedef struct Namelist Namelist; | |||
| /* procedure parameter types for -A and -C++ */ | |||
| #define F2C_proc_par_types 1 | |||
| #ifdef __cplusplus | |||
| typedef logical (*L_fp)(...); | |||
| #else | |||
| typedef logical (*L_fp)(); | |||
| #endif | |||
| #if 0 | |||
| static float spow_ui(float x, integer n) { | |||
| float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static double dpow_ui(double x, integer n) { | |||
| double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #ifdef _MSC_VER | |||
| static _Fcomplex cpow_ui(complex x, integer n) { | |||
| complex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow.r *= x.r, pow.i *= x.i; | |||
| if(u >>= 1) x.r *= x.r, x.i *= x.i; | |||
| else break; | |||
| } | |||
| } | |||
| _Fcomplex p={pow.r, pow.i}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex float cpow_ui(_Complex float x, integer n) { | |||
| _Complex float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| #ifdef _MSC_VER | |||
| static _Dcomplex zpow_ui(_Dcomplex x, integer n) { | |||
| _Dcomplex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; | |||
| if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; | |||
| else break; | |||
| } | |||
| } | |||
| _Dcomplex p = {pow._Val[0], pow._Val[1]}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex double zpow_ui(_Complex double x, integer n) { | |||
| _Complex double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| static integer pow_ii(integer x, integer n) { | |||
| integer pow; unsigned long int u; | |||
| if (n <= 0) { | |||
| if (n == 0 || x == 1) pow = 1; | |||
| else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
| else n = -n; | |||
| } | |||
| if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
| u = n; | |||
| for(pow = 1; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
| { | |||
| double m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
| { | |||
| float m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| #endif | |||
| #if 0 | |||
| static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Fcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0]; | |||
| zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex float zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conjf(Cf(&x[i])) * Cf(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]); | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Dcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0]; | |||
| zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex double zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conj(Cd(&x[i])) * Cd(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]); | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Fcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0]; | |||
| zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex float zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cf(&x[i]) * Cf(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]); | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Dcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0]; | |||
| zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex double zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cd(&x[i]) * Cd(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]); | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #endif | |||
| #endif | |||
| /* -- translated by f2c (version 20000121). | |||
| You must link the resulting object file with the libraries: | |||
| -lf2c -lm (in that order) | |||
| */ | |||
| /* Common Block Declarations */ | |||
| @@ -521,7 +272,7 @@ static integer c_n1 = -1; | |||
| static integer c__0 = 0; | |||
| static logical c_false = FALSE_; | |||
| /* Main program */ int main() | |||
| /* Main program */ int main(void) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -539,16 +290,20 @@ static logical c_false = FALSE_; | |||
| static logical same; | |||
| static integer ninc, nbet, ntra; | |||
| static logical rewi; | |||
| extern /* Subroutine */ int schk1_(), schk2_(), schk3_(), schk4_(), | |||
| schk5_(), schk6_(); | |||
| extern /* Subroutine */ int schk1_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen); | |||
| extern /* Subroutine */ int schk2_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen); | |||
| extern /* Subroutine */ int schk3_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen); | |||
| extern /* Subroutine */ int schk4_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, integer*, integer*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen); | |||
| extern /* Subroutine */ int schk5_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, integer*, integer*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen); | |||
| extern /* Subroutine */ int schk6_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* y, real* yy, real* ys, real* yt, real* g, real* z__, integer* iorder, ftnlen sname_len); | |||
| static real a[4225] /* was [65][65] */, g[65]; | |||
| static integer i__, j, n; | |||
| static logical fatal; | |||
| static real x[65], y[65], z__[130]; | |||
| extern doublereal sdiff_(); | |||
| extern doublereal sdiff_(real*, real*); | |||
| static logical trace; | |||
| static integer nidim; | |||
| extern /* Subroutine */ int smvch_(); | |||
| extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen); | |||
| static char snaps[32], trans[1]; | |||
| static integer isnum; | |||
| static logical ltest[16]; | |||
| @@ -564,12 +319,12 @@ static logical c_false = FALSE_; | |||
| static logical rorder; | |||
| static integer layout; | |||
| static logical ltestt; | |||
| extern /* Subroutine */ int cs2chke_(); | |||
| extern /* Subroutine */ int cs2chke_(char*, ftnlen); | |||
| static logical tsterr; | |||
| static real alf[7]; | |||
| static integer inc[7], nkb; | |||
| static real bet[7]; | |||
| extern logical lse_(); | |||
| extern logical lse_(real*, real*, integer*); | |||
| static real eps, err; | |||
| char tmpchar; | |||
| @@ -1098,21 +853,7 @@ L240: | |||
| } /* MAIN__ */ | |||
| /* Subroutine */ int schk1_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax, | |||
| incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len) | |||
| char *sname; | |||
| real *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nkb, *kb, *nalf; | |||
| real *alf; | |||
| integer *nbet; | |||
| real *bet; | |||
| integer *ninc, *inc, *nmax, *incmax; | |||
| real *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int schk1_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, real* alf, integer* nbet, real* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* y, real* yy, real* ys, real* yt, real* g, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -1130,24 +871,25 @@ ftnlen sname_len; | |||
| static integer i__, m, n; | |||
| static real alpha; | |||
| static logical isame[13]; | |||
| extern /* Subroutine */ int smake_(); | |||
| extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen); | |||
| static integer nargs; | |||
| extern /* Subroutine */ int smvch_(); | |||
| extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen); | |||
| static logical reset; | |||
| static integer incxs, incys; | |||
| static char trans[1]; | |||
| static integer ia, ib, ic; | |||
| static logical banded; | |||
| static integer nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns; | |||
| extern /* Subroutine */ int csgbmv_(), csgemv_(); | |||
| extern /* Subroutine */ void csgbmv_(integer*, char*, integer*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen); | |||
| extern /* Subroutine */ void csgemv_(integer*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen); | |||
| static char ctrans[14]; | |||
| static real errmax; | |||
| extern logical lseres_(); | |||
| extern logical lseres_(char* type__, char* uplo, integer* m, integer* n, real* aa, real* as, integer* lda, ftnlen ltype_len, ftnlen uplo_len); | |||
| static real transl; | |||
| static char transs[1]; | |||
| static integer laa, lda; | |||
| static real als, bls; | |||
| extern logical lse_(); | |||
| extern logical lse_(real*, real*, integer*); | |||
| static real err; | |||
| static integer iku, kls, kus; | |||
| @@ -1552,21 +1294,7 @@ L140: | |||
| } /* schk1_ */ | |||
| /* Subroutine */ int schk2_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax, | |||
| incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len) | |||
| char *sname; | |||
| real *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nkb, *kb, *nalf; | |||
| real *alf; | |||
| integer *nbet; | |||
| real *bet; | |||
| integer *ninc, *inc, *nmax, *incmax; | |||
| real *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int schk2_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, real* alf, integer* nbet, real* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* y, real* yy, real* ys, real* yt, real* g, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -1585,9 +1313,9 @@ ftnlen sname_len; | |||
| static integer i__, k, n; | |||
| static real alpha; | |||
| static logical isame[13]; | |||
| extern /* Subroutine */ int smake_(); | |||
| extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen); | |||
| static integer nargs; | |||
| extern /* Subroutine */ int smvch_(); | |||
| extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen); | |||
| static logical reset; | |||
| static char cuplo[14]; | |||
| static integer incxs, incys; | |||
| @@ -1598,13 +1326,14 @@ ftnlen sname_len; | |||
| static logical packed; | |||
| static integer nk, ks, ix, iy, ns, lx, ly; | |||
| static real errmax; | |||
| extern logical lseres_(); | |||
| extern /* Subroutine */ int cssbmv_(); | |||
| extern logical lseres_(char* , char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void cssbmv_(integer*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen); | |||
| static real transl; | |||
| extern /* Subroutine */ int csspmv_(), cssymv_(); | |||
| extern /* Subroutine */ void csspmv_(integer*, char*, integer*, real*, real*, real*, integer*, real*, real*, integer*, ftnlen); | |||
| extern /* Subroutine */ void cssymv_(integer*, char*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen); | |||
| static integer laa, lda; | |||
| static real als, bls; | |||
| extern logical lse_(); | |||
| extern logical lse_(real*, real*, integer*); | |||
| static real err; | |||
| /* Tests SSYMV, SSBMV and SSPMV. */ | |||
| @@ -2003,17 +1732,7 @@ L130: | |||
| } /* schk2_ */ | |||
| /* Subroutine */ int schk3_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nkb, kb, ninc, inc, nmax, incmax, a, aa, as, x, | |||
| xx, xs, xt, g, z__, iorder, sname_len) | |||
| char *sname; | |||
| real *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nkb, *kb, *ninc, *inc, *nmax, *incmax; | |||
| real *a, *aa, *as, *x, *xx, *xs, *xt, *g, *z__; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int schk3_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* xt, real* g, real* z__, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -2034,9 +1753,9 @@ ftnlen sname_len; | |||
| static integer i__, k, n; | |||
| static char diags[1]; | |||
| static logical isame[13]; | |||
| extern /* Subroutine */ int smake_(); | |||
| extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen); | |||
| static integer nargs; | |||
| extern /* Subroutine */ int smvch_(); | |||
| extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen); | |||
| static logical reset; | |||
| static char cuplo[14]; | |||
| static integer incxs; | |||
| @@ -2047,14 +1766,17 @@ ftnlen sname_len; | |||
| static integer nk, ks, ix, ns, lx; | |||
| static char ctrans[14]; | |||
| static real errmax; | |||
| extern logical lseres_(); | |||
| extern /* Subroutine */ int cstbmv_(); | |||
| extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void cstbmv_(integer*, char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen); | |||
| static real transl; | |||
| extern /* Subroutine */ int cstbsv_(); | |||
| extern /* Subroutine */ void cstbsv_(integer*, char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen); | |||
| static char transs[1]; | |||
| extern /* Subroutine */ int cstpmv_(), cstrmv_(), cstpsv_(), cstrsv_(); | |||
| extern /* Subroutine */ void cstpmv_(integer*, char*, char*, char*, integer*, real*, real*, integer*, ftnlen, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void cstrmv_(integer*, char*, char*, char*, integer*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void cstpsv_(integer*, char*, char*, char*, integer*, real*, real*, integer*, ftnlen, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void cstrsv_(integer*, char*, char*, char*, integer*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen); | |||
| static integer laa, icd, lda, ict, icu; | |||
| extern logical lse_(); | |||
| extern logical lse_(real*, real*, integer*); | |||
| static real err; | |||
| /* Tests STRMV, STBMV, STPMV, STRSV, STBSV and STPSV. */ | |||
| @@ -2508,19 +2230,7 @@ L130: | |||
| } /* schk3_ */ | |||
| /* Subroutine */ int schk4_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x, | |||
| xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len) | |||
| char *sname; | |||
| real *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| real *alf; | |||
| integer *ninc, *inc, *nmax, *incmax; | |||
| real *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int schk4_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* y, real* yy, real* ys, real* yt, real* g, real* z__, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* System generated locals */ | |||
| integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; | |||
| @@ -2533,17 +2243,18 @@ ftnlen sname_len; | |||
| static integer i__, j, m, n; | |||
| static real alpha, w[1]; | |||
| static logical isame[13]; | |||
| extern /* Subroutine */ int smake_(), csger_(); | |||
| /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void csger_(integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, integer*); | |||
| static integer nargs; | |||
| extern /* Subroutine */ int smvch_(); | |||
| extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen); | |||
| static logical reset; | |||
| static integer incxs, incys, ia, nc, nd, im, in, ms, ix, iy, ns, lx, ly; | |||
| static real errmax; | |||
| extern logical lseres_(); | |||
| extern logical lseres_(char* , char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen); | |||
| static real transl; | |||
| static integer laa, lda; | |||
| static real als; | |||
| extern logical lse_(); | |||
| extern logical lse_(real*, real*, integer*); | |||
| static real err; | |||
| /* Tests SGER. */ | |||
| @@ -2848,19 +2559,7 @@ L150: | |||
| } /* schk4_ */ | |||
| /* Subroutine */ int schk5_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x, | |||
| xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len) | |||
| char *sname; | |||
| real *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| real *alf; | |||
| integer *ninc, *inc, *nmax, *incmax; | |||
| real *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int schk5_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* y, real* yy, real* ys, real* yt, real* g, real* z__, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -2880,25 +2579,25 @@ ftnlen sname_len; | |||
| static integer i__, j, n; | |||
| static real alpha, w[1]; | |||
| static logical isame[13]; | |||
| extern /* Subroutine */ int smake_(); | |||
| extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen); | |||
| static integer nargs; | |||
| extern /* Subroutine */ int smvch_(); | |||
| extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen); | |||
| static logical reset; | |||
| static char cuplo[14]; | |||
| static integer incxs; | |||
| extern /* Subroutine */ int csspr_(); | |||
| extern /* Subroutine */ void csspr_(integer*, char*, integer*, real*, real*, integer*, real*, ftnlen); | |||
| static logical upper; | |||
| static char uplos[1]; | |||
| extern /* Subroutine */ int cssyr_(); | |||
| extern /* Subroutine */ void cssyr_(integer*, char*, integer*, real*, real*, integer*, real*, integer*, ftnlen); | |||
| static integer ia, ja, ic, nc, jj, lj, in; | |||
| static logical packed; | |||
| static integer ix, ns, lx; | |||
| static real errmax; | |||
| extern logical lseres_(); | |||
| extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen); | |||
| static real transl; | |||
| static integer laa, lda; | |||
| static real als; | |||
| extern logical lse_(); | |||
| extern logical lse_(real*, real*, integer*); | |||
| static real err; | |||
| /* Tests SSYR and SSPR. */ | |||
| @@ -3218,19 +2917,7 @@ L130: | |||
| } /* schk5_ */ | |||
| /* Subroutine */ int schk6_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x, | |||
| xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len) | |||
| char *sname; | |||
| real *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| real *alf; | |||
| integer *ninc, *inc, *nmax, *incmax; | |||
| real *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int schk6_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* y, real* yy, real* ys, real* yt, real* g, real* z__, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -3249,26 +2936,26 @@ ftnlen sname_len; | |||
| static integer i__, j, n; | |||
| static real alpha, w[2]; | |||
| static logical isame[13]; | |||
| extern /* Subroutine */ int smake_(); | |||
| extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen); | |||
| static integer nargs; | |||
| extern /* Subroutine */ int smvch_(); | |||
| extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen); | |||
| static logical reset; | |||
| static char cuplo[14]; | |||
| static integer incxs, incys; | |||
| static logical upper; | |||
| static char uplos[1]; | |||
| static integer ia, ja, ic; | |||
| extern /* Subroutine */ int csspr2_(); | |||
| extern /* Subroutine */ void csspr2_(integer*, char*, integer*, real*, real*, integer*, real*, integer*, real*, ftnlen); | |||
| static integer nc, jj, lj, in; | |||
| static logical packed; | |||
| extern /* Subroutine */ int cssyr2_(); | |||
| extern /* Subroutine */ void cssyr2_(integer*, char*, integer*, real*, real*, integer*, real*, integer*, real*, integer*, ftnlen); | |||
| static integer ix, iy, ns, lx, ly; | |||
| static real errmax; | |||
| extern logical lseres_(); | |||
| extern logical lseres_(char* type__, char* uplo, integer* m, integer* n, real* aa, real* as, integer* lda, ftnlen ltype_len, ftnlen uplo_len); | |||
| static real transl; | |||
| static integer laa, lda; | |||
| static real als; | |||
| extern logical lse_(); | |||
| extern logical lse_(real*, real*, integer*); | |||
| static real err; | |||
| /* Tests SSYR2 and SSPR2. */ | |||
| @@ -3634,26 +3321,14 @@ L170: | |||
| } /* schk6_ */ | |||
| /* Subroutine */ int smake_(type__, uplo, diag, m, n, a, nmax, aa, lda, kl, | |||
| ku, reset, transl, type_len, uplo_len, diag_len) | |||
| char *type__, *uplo, *diag; | |||
| integer *m, *n; | |||
| real *a; | |||
| integer *nmax; | |||
| real *aa; | |||
| integer *lda, *kl, *ku; | |||
| logical *reset; | |||
| real *transl; | |||
| ftnlen type_len; | |||
| ftnlen uplo_len; | |||
| ftnlen diag_len; | |||
| { | |||
| /* Subroutine */ int smake_(char* type__, char* uplo, char* diag, integer* m, integer* n, real* a, integer* nmax, real* aa, integer* lda, integer* kl, integer* ku, logical* reset, real* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) | |||
| { | |||
| /* System generated locals */ | |||
| integer a_dim1, a_offset, i__1, i__2, i__3, i__4; | |||
| /* Local variables */ | |||
| static integer ibeg, iend; | |||
| extern doublereal sbeg_(); | |||
| extern doublereal sbeg_(logical*); | |||
| static integer ioff; | |||
| static logical unit; | |||
| static integer i__, j; | |||
| @@ -3879,28 +3554,14 @@ ftnlen diag_len; | |||
| } /* smake_ */ | |||
| /* Subroutine */ int smvch_(trans, m, n, alpha, a, nmax, x, incx, beta, y, | |||
| incy, yt, g, yy, eps, err, fatal, nout, mv, trans_len) | |||
| char *trans; | |||
| integer *m, *n; | |||
| real *alpha, *a; | |||
| integer *nmax; | |||
| real *x; | |||
| integer *incx; | |||
| real *beta, *y; | |||
| integer *incy; | |||
| real *yt, *g, *yy, *eps, *err; | |||
| logical *fatal; | |||
| integer *nout; | |||
| logical *mv; | |||
| ftnlen trans_len; | |||
| /* Subroutine */ int smvch_(char* trans, integer* m, integer* n, real* alpha, real* a, integer* nmax, real* x, integer* incx, real* beta, real* y, integer* incy, real* yt, real* g, real* yy, real* eps, real* err, logical* fatal, integer* nout, logical* mv, ftnlen trans_len) | |||
| { | |||
| /* System generated locals */ | |||
| integer a_dim1, a_offset, i__1, i__2; | |||
| real r__1; | |||
| /* Builtin functions */ | |||
| double sqrt(); | |||
| double sqrt(double); | |||
| /* Local variables */ | |||
| static real erri; | |||
| @@ -4029,9 +3690,7 @@ L70: | |||
| } /* smvch_ */ | |||
| logical lse_(ri, rj, lr) | |||
| real *ri, *rj; | |||
| integer *lr; | |||
| logical lse_(real* ri, real* rj, integer* lr) | |||
| { | |||
| /* System generated locals */ | |||
| integer i__1; | |||
| @@ -4076,13 +3735,7 @@ L30: | |||
| } /* lse_ */ | |||
| logical lseres_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len) | |||
| char *type__, *uplo; | |||
| integer *m, *n; | |||
| real *aa, *as; | |||
| integer *lda; | |||
| ftnlen type_len; | |||
| ftnlen uplo_len; | |||
| logical lseres_(char* type__, char* uplo, integer* m, integer* n, real* aa, real* as, integer* lda, ftnlen ltype_len, ftnlen uplo_len) | |||
| { | |||
| /* System generated locals */ | |||
| integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2; | |||
| @@ -4169,8 +3822,7 @@ L80: | |||
| } /* lseres_ */ | |||
| doublereal sbeg_(reset) | |||
| logical *reset; | |||
| doublereal sbeg_(logical* reset) | |||
| { | |||
| /* System generated locals */ | |||
| real ret_val; | |||
| @@ -4221,8 +3873,7 @@ L10: | |||
| } /* sbeg_ */ | |||
| doublereal sdiff_(x, y) | |||
| real *x, *y; | |||
| doublereal sdiff_(real* x, real* y) | |||
| { | |||
| /* System generated locals */ | |||
| real ret_val; | |||
| @@ -242,129 +242,6 @@ typedef struct Namelist Namelist; | |||
| /* procedure parameter types for -A and -C++ */ | |||
| #define F2C_proc_par_types 1 | |||
| #ifdef __cplusplus | |||
| typedef logical (*L_fp)(...); | |||
| #else | |||
| typedef logical (*L_fp)(); | |||
| #endif | |||
| #if 0 | |||
| static float spow_ui(float x, integer n) { | |||
| float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static double dpow_ui(double x, integer n) { | |||
| double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #ifdef _MSC_VER | |||
| static _Fcomplex cpow_ui(complex x, integer n) { | |||
| complex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow.r *= x.r, pow.i *= x.i; | |||
| if(u >>= 1) x.r *= x.r, x.i *= x.i; | |||
| else break; | |||
| } | |||
| } | |||
| _Fcomplex p={pow.r, pow.i}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex float cpow_ui(_Complex float x, integer n) { | |||
| _Complex float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| #ifdef _MSC_VER | |||
| static _Dcomplex zpow_ui(_Dcomplex x, integer n) { | |||
| _Dcomplex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; | |||
| if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; | |||
| else break; | |||
| } | |||
| } | |||
| _Dcomplex p = {pow._Val[0], pow._Val[1]}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex double zpow_ui(_Complex double x, integer n) { | |||
| _Complex double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| static integer pow_ii(integer x, integer n) { | |||
| integer pow; unsigned long int u; | |||
| if (n <= 0) { | |||
| if (n == 0 || x == 1) pow = 1; | |||
| else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
| else n = -n; | |||
| } | |||
| if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
| u = n; | |||
| for(pow = 1; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
| { | |||
| double m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
| { | |||
| float m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| #endif | |||
| /* -- translated by f2c (version 20000121). | |||
| You must link the resulting object file with the libraries: | |||
| -lf2c -lm (in that order) | |||
| */ | |||
| /* Common Block Declarations */ | |||
| @@ -393,7 +270,7 @@ static logical c_true = TRUE_; | |||
| static integer c__0 = 0; | |||
| static logical c_false = FALSE_; | |||
| /* Main program MAIN__() */ int main() | |||
| /* Main program MAIN__() */ int main(void) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -402,26 +279,25 @@ static logical c_false = FALSE_; | |||
| /* System generated locals */ | |||
| integer i__1, i__2, i__3; | |||
| real r__1; | |||
| /* Builtin functions */ | |||
| integer s_rsle(), do_lio(), e_rsle(), f_open(), s_wsfe(), do_fio(), | |||
| e_wsfe(), s_wsle(), e_wsle(), s_rsfe(), e_rsfe(); | |||
| integer f_clos(); | |||
| /* Local variables */ | |||
| static integer nalf, idim[9]; | |||
| static logical same; | |||
| static integer nbet, ntra; | |||
| static logical rewi; | |||
| extern /* Subroutine */ int schk1_(), schk2_(), schk3_(), schk4_(), | |||
| schk5_(); | |||
| extern /* Subroutine */ int schk1_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, real*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen); | |||
| extern /* Subroutine */ int schk2_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, real*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen); | |||
| extern /* Subroutine */ int schk3_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen); | |||
| extern /* Subroutine */ int schk4_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, real*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen); | |||
| extern /* Subroutine */ int schk5_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, real*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen); | |||
| static real c__[4225] /* was [65][65] */, g[65]; | |||
| static integer i__, j, n; | |||
| static logical fatal; | |||
| static real w[130]; | |||
| extern doublereal sdiff_(); | |||
| extern doublereal sdiff_(real*, real*); | |||
| static logical trace; | |||
| static integer nidim; | |||
| extern /* Subroutine */ int smmch_(); | |||
| extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen); | |||
| static char snaps[32]; | |||
| static integer isnum; | |||
| static logical ltest[6]; | |||
| @@ -433,9 +309,9 @@ static logical c_false = FALSE_; | |||
| static logical rorder; | |||
| static integer layout; | |||
| static logical ltestt, tsterr; | |||
| extern /* Subroutine */ int cs3chke_(); | |||
| extern /* Subroutine */ void cs3chke_(char*, ftnlen); | |||
| static real alf[7], bet[7]; | |||
| extern logical lse_(); | |||
| extern logical lse_(real*, real*, integer*); | |||
| static real eps, err; | |||
| char tmpchar; | |||
| @@ -899,21 +775,7 @@ L230: | |||
| } /* MAIN__ */ | |||
| /* Subroutine */ int schk1_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs, | |||
| c__, cc, cs, ct, g, iorder, sname_len) | |||
| char *sname; | |||
| real *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| real *alf; | |||
| integer *nbet; | |||
| real *bet; | |||
| integer *nmax; | |||
| real *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int schk1_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* nbet, real* bet, integer* nmax, real* a, real* aa, real* as, real* b, real* bb, real* bs, real* c__, real* cc, real* cs, real* ct, real* g, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -923,8 +785,6 @@ ftnlen sname_len; | |||
| integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, | |||
| i__3, i__4, i__5, i__6; | |||
| /* Builtin functions */ | |||
| integer f_rew(), s_wsfe(), e_wsfe(), do_fio(); | |||
| /* Local variables */ | |||
| static real beta; | |||
| @@ -936,18 +796,17 @@ ftnlen sname_len; | |||
| static logical trana, tranb; | |||
| static integer nargs; | |||
| static logical reset; | |||
| extern /* Subroutine */ void sprcn1_(); | |||
| extern /* Subroutine */ int smake_(); | |||
| extern /* Subroutine */ int smmch_(); | |||
| extern /* Subroutine */ void sprcn1_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, integer*, real*, integer*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen); | |||
| extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen); | |||
| extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen); | |||
| static integer ia, ib, ma, mb, na, nb, nc, ik, im, in, ks, ms, ns; | |||
| extern /* Subroutine */ int csgemm_(); | |||
| extern /* Subroutine */ void csgemm_(integer*, char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen, ftnlen); | |||
| static char tranas[1], tranbs[1], transa[1], transb[1]; | |||
| static real errmax; | |||
| extern logical lseres_(); | |||
| extern logical lse_(); | |||
| extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen); | |||
| extern logical lse_(real*, real*, integer*); | |||
| static integer ica, icb, laa, lbb, lda, lcc, ldb, ldc; | |||
| static real als, bls; | |||
| extern logical lse_(); | |||
| static real err; | |||
| /* Tests SGEMM. */ | |||
| @@ -1278,23 +1137,8 @@ L130: | |||
| /* Subroutine */ void sprcn1_(nout, nc, sname, iorder, transa, transb, m, n, k, | |||
| alpha, lda, ldb, beta, ldc, sname_len, transa_len, transb_len) | |||
| integer *nout, *nc; | |||
| char *sname; | |||
| integer *iorder; | |||
| char *transa, *transb; | |||
| integer *m, *n, *k; | |||
| real *alpha; | |||
| integer *lda, *ldb; | |||
| real *beta; | |||
| integer *ldc; | |||
| ftnlen sname_len; | |||
| ftnlen transa_len; | |||
| ftnlen transb_len; | |||
| /* Subroutine */ void sprcn1_(integer* nout, integer* nc, char* sname, integer* iorder, char* transa, char* transb, integer* m, integer* n, integer* k, real* alpha, integer* lda, integer* ldb, real* beta, integer* ldc, ftnlen sname_len, ftnlen transa_len, ftnlen transb_len) | |||
| { | |||
| /* Builtin functions */ | |||
| integer s_wsfe(), do_fio(), e_wsfe(); | |||
| /* Local variables */ | |||
| static char crc[14], cta[14], ctb[14]; | |||
| @@ -1324,21 +1168,7 @@ ftnlen transb_len; | |||
| } /* sprcn1_ */ | |||
| /* Subroutine */ int schk2_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs, | |||
| c__, cc, cs, ct, g, iorder, sname_len) | |||
| char *sname; | |||
| real *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| real *alf; | |||
| integer *nbet; | |||
| real *bet; | |||
| integer *nmax; | |||
| real *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int schk2_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* nbet, real* bet, integer* nmax, real* a, real* aa, real* as, real* b, real* bb, real* bs, real* c__, real* cc, real* cs, real* ct, real* g, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -1349,8 +1179,6 @@ ftnlen sname_len; | |||
| integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, | |||
| i__3, i__4, i__5; | |||
| /* Builtin functions */ | |||
| integer f_rew(), s_wsfe(), e_wsfe(), do_fio(); | |||
| /* Local variables */ | |||
| static real beta; | |||
| @@ -1368,15 +1196,15 @@ ftnlen sname_len; | |||
| static char uplos[1]; | |||
| static integer ia, ib, na, nc, im, in, ms, ns; | |||
| static real errmax; | |||
| extern logical lseres_(); | |||
| extern /* Subroutine */ int cssymm_(); | |||
| extern void sprcn2_(); | |||
| extern int smake_(); | |||
| extern int smmch_(); | |||
| extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void cssymm_(integer*, char*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen, ftnlen); | |||
| extern void sprcn2_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, real*, integer*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen); | |||
| extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen); | |||
| extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen); | |||
| static integer laa, lbb, lda, lcc, ldb, ldc, ics; | |||
| static real als, bls; | |||
| static integer icu; | |||
| extern logical lse_(); | |||
| extern logical lse_(real*, real*, integer*); | |||
| static real err; | |||
| /* Tests SSYMM. */ | |||
| @@ -1685,23 +1513,8 @@ L120: | |||
| } /* schk2_ */ | |||
| /* Subroutine */ void sprcn2_(nout, nc, sname, iorder, side, uplo, m, n, alpha, | |||
| lda, ldb, beta, ldc, sname_len, side_len, uplo_len) | |||
| integer *nout, *nc; | |||
| char *sname; | |||
| integer *iorder; | |||
| char *side, *uplo; | |||
| integer *m, *n; | |||
| real *alpha; | |||
| integer *lda, *ldb; | |||
| real *beta; | |||
| integer *ldc; | |||
| ftnlen sname_len; | |||
| ftnlen side_len; | |||
| ftnlen uplo_len; | |||
| /* Subroutine */ void sprcn2_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, integer* m, integer* n, real* alpha, integer* lda, integer* ldb, real* beta, integer* ldc, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len) | |||
| { | |||
| /* Builtin functions */ | |||
| integer s_wsfe(), do_fio(), e_wsfe(); | |||
| /* Local variables */ | |||
| static char cs[14], cu[14], crc[14]; | |||
| @@ -1726,19 +1539,7 @@ ftnlen uplo_len; | |||
| } /* sprcn2_ */ | |||
| /* Subroutine */ int schk3_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, nmax, a, aa, as, b, bb, bs, ct, g, c__, | |||
| iorder, sname_len) | |||
| char *sname; | |||
| real *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| real *alf; | |||
| integer *nmax; | |||
| real *a, *aa, *as, *b, *bb, *bs, *ct, *g, *c__; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int schk3_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* nmax, real* a, real* aa, real* as, real* b, real* bb, real* bs, real* ct, real* g, real* c__, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -1751,8 +1552,6 @@ ftnlen sname_len; | |||
| integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, | |||
| i__3, i__4, i__5; | |||
| /* Builtin functions */ | |||
| integer f_rew(), s_wsfe(), e_wsfe(), do_fio(); | |||
| /* Local variables */ | |||
| static char diag[1]; | |||
| @@ -1769,18 +1568,19 @@ ftnlen sname_len; | |||
| static integer nargs; | |||
| static logical reset; | |||
| static char uplos[1]; | |||
| extern /* Subroutine */ void sprcn3_(); | |||
| extern /* Subroutine */ void sprcn3_(integer*, integer*, char*, integer*, char*, char*, char*, char*, integer*, integer*, real*, integer*, integer*, ftnlen , ftnlen, ftnlen, ftnlen, ftnlen); | |||
| static integer ia, na, nc, im, in, ms, ns; | |||
| static char tranas[1], transa[1]; | |||
| static real errmax; | |||
| extern int smake_(); | |||
| extern int smmch_(); | |||
| extern logical lseres_(); | |||
| extern /* Subroutine */ int cstrmm_(), cstrsm_(); | |||
| extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen); | |||
| extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen); | |||
| extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void cstrmm_(integer*, char*, char*, char*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void cstrsm_(integer*, char*, char*, char*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen, ftnlen); | |||
| static integer laa, icd, lbb, lda, ldb, ics; | |||
| static real als; | |||
| static integer ict, icu; | |||
| extern logical lse_(); | |||
| extern logical lse_(real*, real*, integer*); | |||
| static real err; | |||
| /* Tests STRMM and STRSM. */ | |||
| @@ -2155,24 +1955,8 @@ L160: | |||
| } /* schk3_ */ | |||
| /* Subroutine */ void sprcn3_(nout, nc, sname, iorder, side, uplo, transa, | |||
| diag, m, n, alpha, lda, ldb, sname_len, side_len, uplo_len, | |||
| transa_len, diag_len) | |||
| integer *nout, *nc; | |||
| char *sname; | |||
| integer *iorder; | |||
| char *side, *uplo, *transa, *diag; | |||
| integer *m, *n; | |||
| real *alpha; | |||
| integer *lda, *ldb; | |||
| ftnlen sname_len; | |||
| ftnlen side_len; | |||
| ftnlen uplo_len; | |||
| ftnlen transa_len; | |||
| ftnlen diag_len; | |||
| /* Subroutine */ void sprcn3_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, char* transa, char* diag, integer* m, integer* n, real* alpha, integer* lda, integer* ldb, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len, ftnlen transa_len, ftnlen diag_len) | |||
| { | |||
| /* Builtin functions */ | |||
| integer s_wsfe(), do_fio(), e_wsfe(); | |||
| /* Local variables */ | |||
| static char ca[14], cd[14], cs[14], cu[14], crc[14]; | |||
| @@ -2210,21 +1994,7 @@ ftnlen diag_len; | |||
| } /* sprcn3_ */ | |||
| /* Subroutine */ int schk4_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs, | |||
| c__, cc, cs, ct, g, iorder, sname_len) | |||
| char *sname; | |||
| real *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| real *alf; | |||
| integer *nbet; | |||
| real *bet; | |||
| integer *nmax; | |||
| real *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int schk4_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* nbet, real* bet, integer* nmax, real* a, real* aa, real* as, real* b, real* bb, real* bs, real* c__, real* cc, real* cs, real* ct, real* g, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -2235,8 +2005,6 @@ ftnlen sname_len; | |||
| integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, | |||
| i__3, i__4, i__5; | |||
| /* Builtin functions */ | |||
| integer f_rew(), s_wsfe(), e_wsfe(), do_fio(); | |||
| /* Local variables */ | |||
| static real beta; | |||
| @@ -2253,18 +2021,18 @@ ftnlen sname_len; | |||
| static char trans[1]; | |||
| static logical upper; | |||
| static char uplos[1]; | |||
| extern /* Subroutine */ void sprcn4_(); | |||
| extern /* Subroutine */ int smake_(); | |||
| extern /* Subroutine */ int smmch_(); | |||
| extern /* Subroutine */ void sprcn4_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen); | |||
| extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen); | |||
| extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen); | |||
| static integer ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns; | |||
| static real errmax; | |||
| extern logical lseres_(); | |||
| extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen); | |||
| static char transs[1]; | |||
| extern /* Subroutine */ int cssyrk_(); | |||
| extern /* Subroutine */ void cssyrk_(integer*, char*, char*, integer*, integer*, real*, real*, integer*, real*, real*, integer*, ftnlen, ftnlen); | |||
| static integer laa, lda, lcc, ldc; | |||
| static real als; | |||
| static integer ict, icu; | |||
| extern logical lse_(); | |||
| extern logical lse_(real*, real*, integer*); | |||
| static real err; | |||
| /* Tests SSYRK. */ | |||
| @@ -2575,23 +2343,8 @@ L130: | |||
| } /* schk4_ */ | |||
| /* Subroutine */ void sprcn4_(nout, nc, sname, iorder, uplo, transa, n, k, | |||
| alpha, lda, beta, ldc, sname_len, uplo_len, transa_len) | |||
| integer *nout, *nc; | |||
| char *sname; | |||
| integer *iorder; | |||
| char *uplo, *transa; | |||
| integer *n, *k; | |||
| real *alpha; | |||
| integer *lda; | |||
| real *beta; | |||
| integer *ldc; | |||
| ftnlen sname_len; | |||
| ftnlen uplo_len; | |||
| ftnlen transa_len; | |||
| /* Subroutine */ void sprcn4_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, real* alpha, integer* lda, real* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len) | |||
| { | |||
| /* Builtin functions */ | |||
| integer s_wsfe(), do_fio(), e_wsfe(); | |||
| /* Local variables */ | |||
| static char ca[14], cu[14], crc[14]; | |||
| @@ -2619,21 +2372,7 @@ ftnlen transa_len; | |||
| } /* sprcn4_ */ | |||
| /* Subroutine */ int schk5_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, nbet, bet, nmax, ab, aa, as, bb, bs, | |||
| c__, cc, cs, ct, g, w, iorder, sname_len) | |||
| char *sname; | |||
| real *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| real *alf; | |||
| integer *nbet; | |||
| real *bet; | |||
| integer *nmax; | |||
| real *ab, *aa, *as, *bb, *bs, *c__, *cc, *cs, *ct, *g, *w; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int schk5_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* nbet, real* bet, integer* nmax, real* ab, real* aa, real* as, real* bb, real* bs, real* c__, real* cc, real* cs, real* ct, real* g, real* w, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -2643,8 +2382,6 @@ ftnlen sname_len; | |||
| /* System generated locals */ | |||
| integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; | |||
| /* Builtin functions */ | |||
| integer f_rew(), s_wsfe(), e_wsfe(), do_fio(); | |||
| /* Local variables */ | |||
| static integer jjab; | |||
| @@ -2663,18 +2400,18 @@ ftnlen sname_len; | |||
| static logical upper; | |||
| static char uplos[1]; | |||
| static integer ia, ib; | |||
| extern /* Subroutine */ void sprcn5_(); | |||
| extern /* Subroutine */ void sprcn5_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, real*, integer*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen); | |||
| static integer jc, ma, na, nc, ik, in, jj, lj, ks, ns; | |||
| static real errmax; | |||
| extern logical lseres_(); | |||
| extern int smake_(); | |||
| extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen); | |||
| extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen); | |||
| static char transs[1]; | |||
| static integer laa, lbb, lda, lcc, ldb, ldc; | |||
| static real als; | |||
| static integer ict, icu; | |||
| extern /* Subroutine */ int cssyr2k_(); | |||
| extern logical lse_(); | |||
| extern int smmch_(); | |||
| extern /* Subroutine */ void cssyr2k_(integer*, char*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen, ftnlen); | |||
| extern logical lse_(real*, real*, integer*); | |||
| extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen); | |||
| static real err; | |||
| /* Tests SSYR2K. */ | |||
| @@ -3037,23 +2774,8 @@ L160: | |||
| } /* schk5_ */ | |||
| /* Subroutine */ void sprcn5_(nout, nc, sname, iorder, uplo, transa, n, k, | |||
| alpha, lda, ldb, beta, ldc, sname_len, uplo_len, transa_len) | |||
| integer *nout, *nc; | |||
| char *sname; | |||
| integer *iorder; | |||
| char *uplo, *transa; | |||
| integer *n, *k; | |||
| real *alpha; | |||
| integer *lda, *ldb; | |||
| real *beta; | |||
| integer *ldc; | |||
| ftnlen sname_len; | |||
| ftnlen uplo_len; | |||
| ftnlen transa_len; | |||
| /* Subroutine */ void sprcn5_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, real* alpha, integer* lda, integer* ldb, real* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len) | |||
| { | |||
| /* Builtin functions */ | |||
| integer s_wsfe(), do_fio(), e_wsfe(); | |||
| /* Local variables */ | |||
| static char ca[14], cu[14], crc[14]; | |||
| @@ -3081,19 +2803,7 @@ ftnlen transa_len; | |||
| } /* sprcn5_ */ | |||
| /* Subroutine */ int smake_(type__, uplo, diag, m, n, a, nmax, aa, lda, reset, | |||
| transl, type_len, uplo_len, diag_len) | |||
| char *type__, *uplo, *diag; | |||
| integer *m, *n; | |||
| real *a; | |||
| integer *nmax; | |||
| real *aa; | |||
| integer *lda; | |||
| logical *reset; | |||
| real *transl; | |||
| ftnlen type_len; | |||
| ftnlen uplo_len; | |||
| ftnlen diag_len; | |||
| /* Subroutine */ int smake_(char* type__, char* uplo, char* diag, integer* m, integer* n, real* a, integer* nmax, real* aa, integer* lda, logical* reset, real* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) | |||
| { | |||
| /* System generated locals */ | |||
| integer a_dim1, a_offset, i__1, i__2; | |||
| @@ -3102,7 +2812,7 @@ ftnlen diag_len; | |||
| /* Local variables */ | |||
| static integer ibeg, iend; | |||
| extern doublereal sbeg_(); | |||
| extern doublereal sbeg_(logical*); | |||
| static logical unit; | |||
| static integer i__, j; | |||
| static logical lower, upper, gen, tri, sym; | |||
| @@ -3233,25 +2943,7 @@ ftnlen diag_len; | |||
| } /* smake_ */ | |||
| /* Subroutine */ int smmch_(transa, transb, m, n, kk, alpha, a, lda, b, ldb, | |||
| beta, c__, ldc, ct, g, cc, ldcc, eps, err, fatal, nout, mv, | |||
| transa_len, transb_len) | |||
| char *transa, *transb; | |||
| integer *m, *n, *kk; | |||
| real *alpha, *a; | |||
| integer *lda; | |||
| real *b; | |||
| integer *ldb; | |||
| real *beta, *c__; | |||
| integer *ldc; | |||
| real *ct, *g, *cc; | |||
| integer *ldcc; | |||
| real *eps, *err; | |||
| logical *fatal; | |||
| integer *nout; | |||
| logical *mv; | |||
| ftnlen transa_len; | |||
| ftnlen transb_len; | |||
| /* Subroutine */ int smmch_(char* transa, char* transb, integer* m, integer* n, integer* kk, real* alpha, real* a, integer* lda, real* b, integer* ldb, real* beta, real* c__, integer* ldc, real* ct, real* g, real* cc, integer* ldcc, real* eps, real* err, logical* fatal, integer* nout, logical* mv, ftnlen transa_len, ftnlen transb_len) | |||
| { | |||
| /* System generated locals */ | |||
| @@ -3260,8 +2952,7 @@ ftnlen transb_len; | |||
| real r__1, r__2; | |||
| /* Builtin functions */ | |||
| double sqrt(); | |||
| integer s_wsfe(), e_wsfe(), do_fio(); | |||
| double sqrt(double); | |||
| /* Local variables */ | |||
| static real erri; | |||
| @@ -3426,9 +3117,7 @@ L150: | |||
| } /* smmch_ */ | |||
| logical lse_(ri, rj, lr) | |||
| real *ri, *rj; | |||
| integer *lr; | |||
| logical lse_(real* ri, real* rj, integer* lr) | |||
| { | |||
| /* System generated locals */ | |||
| integer i__1; | |||
| @@ -3475,13 +3164,7 @@ L30: | |||
| } /* lse_ */ | |||
| logical lseres_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len) | |||
| char *type__, *uplo; | |||
| integer *m, *n; | |||
| real *aa, *as; | |||
| integer *lda; | |||
| ftnlen type_len; | |||
| ftnlen uplo_len; | |||
| logical lseres_(char* type__, char* uplo, integer* m, integer* n, real* aa, real* as, integer* lda, ftnlen type_len, ftnlen uplo_len) | |||
| { | |||
| /* System generated locals */ | |||
| integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2; | |||
| @@ -3572,8 +3255,7 @@ L80: | |||
| } /* lseres_ */ | |||
| doublereal sbeg_(reset) | |||
| logical *reset; | |||
| doublereal sbeg_(logical* reset) | |||
| { | |||
| /* System generated locals */ | |||
| real ret_val; | |||
| @@ -3625,8 +3307,7 @@ L10: | |||
| } /* sbeg_ */ | |||
| doublereal sdiff_(x, y) | |||
| real *x, *y; | |||
| doublereal sdiff_(real* x, real* y) | |||
| { | |||
| /* System generated locals */ | |||
| real ret_val; | |||
| @@ -242,250 +242,6 @@ typedef struct Namelist Namelist; | |||
| /* procedure parameter types for -A and -C++ */ | |||
| #define F2C_proc_par_types 1 | |||
| #ifdef __cplusplus | |||
| typedef logical (*L_fp)(...); | |||
| #else | |||
| typedef logical (*L_fp)(); | |||
| #endif | |||
| #if 0 | |||
| static float spow_ui(float x, integer n) { | |||
| float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static double dpow_ui(double x, integer n) { | |||
| double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #ifdef _MSC_VER | |||
| static _Fcomplex cpow_ui(complex x, integer n) { | |||
| complex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow.r *= x.r, pow.i *= x.i; | |||
| if(u >>= 1) x.r *= x.r, x.i *= x.i; | |||
| else break; | |||
| } | |||
| } | |||
| _Fcomplex p={pow.r, pow.i}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex float cpow_ui(_Complex float x, integer n) { | |||
| _Complex float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| #ifdef _MSC_VER | |||
| static _Dcomplex zpow_ui(_Dcomplex x, integer n) { | |||
| _Dcomplex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; | |||
| if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; | |||
| else break; | |||
| } | |||
| } | |||
| _Dcomplex p = {pow._Val[0], pow._Val[1]}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex double zpow_ui(_Complex double x, integer n) { | |||
| _Complex double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| static integer pow_ii(integer x, integer n) { | |||
| integer pow; unsigned long int u; | |||
| if (n <= 0) { | |||
| if (n == 0 || x == 1) pow = 1; | |||
| else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
| else n = -n; | |||
| } | |||
| if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
| u = n; | |||
| for(pow = 1; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
| { | |||
| double m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
| { | |||
| float m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| #endif | |||
| #if 0 | |||
| static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Fcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0]; | |||
| zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex float zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conjf(Cf(&x[i])) * Cf(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]); | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Dcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0]; | |||
| zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex double zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conj(Cd(&x[i])) * Cd(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]); | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Fcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0]; | |||
| zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex float zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cf(&x[i]) * Cf(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]); | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Dcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0]; | |||
| zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex double zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cd(&x[i]) * Cd(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]); | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #endif | |||
| #endif | |||
| /* Common Block Declarations */ | |||
| @@ -502,16 +258,16 @@ static integer c__1 = 1; | |||
| static integer c__5 = 5; | |||
| static doublereal c_b43 = 1.; | |||
| /* Main program */ int main() | |||
| /* Main program */ int main(void) | |||
| { | |||
| /* Initialized data */ | |||
| static doublereal sfac = 9.765625e-4; | |||
| /* Local variables */ | |||
| extern /* Subroutine */ int check1_(), check2_(); | |||
| extern /* Subroutine */ int check1_(doublereal*), check2_(doublereal*); | |||
| static integer ic; | |||
| extern /* Subroutine */ int header_(); | |||
| extern /* Subroutine */ int header_(void); | |||
| /* Test program for the COMPLEX*16 Level 1 CBLAS. */ | |||
| /* Based upon the original CBLAS test routine together with: */ | |||
| @@ -551,7 +307,7 @@ static doublereal c_b43 = 1.; | |||
| exit(0); | |||
| } /* MAIN__ */ | |||
| /* Subroutine */ int header_() | |||
| /* Subroutine */ int header_(void) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -570,8 +326,7 @@ static doublereal c_b43 = 1.; | |||
| } /* header_ */ | |||
| /* Subroutine */ int check1_(sfac) | |||
| doublereal *sfac; | |||
| /* Subroutine */ int check1_(doublereal* sfac) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -623,15 +378,15 @@ doublereal *sfac; | |||
| /* Local variables */ | |||
| static integer i__; | |||
| extern /* Subroutine */ int ctest_(); | |||
| extern /* Subroutine */ int ctest_(integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*); | |||
| static doublecomplex mwpcs[5], mwpct[5]; | |||
| extern /* Subroutine */ int zscaltest_(), itest1_(), stest1_(); | |||
| extern /* Subroutine */ int zscaltest_(integer*, doublereal*, doublecomplex*, integer*), itest1_(integer*, integer*), stest1_(doublereal*, doublereal*, doublereal*, doublereal*); | |||
| static doublecomplex cx[8]; | |||
| extern doublereal dznrm2test_(); | |||
| extern doublereal dznrm2test_(integer*, doublecomplex*, integer*); | |||
| static integer np1; | |||
| extern /* Subroutine */ int zdscaltest_(); | |||
| extern integer izamaxtest_(); | |||
| extern doublereal dzasumtest_(); | |||
| extern /* Subroutine */ int zdscaltest_(integer*, doublereal*, doublecomplex*, integer*); | |||
| extern integer izamaxtest_(integer*, doublecomplex*, integer*); | |||
| extern doublereal dzasumtest_(integer*, doublecomplex*, integer*); | |||
| static integer len; | |||
| /* .. Parameters .. */ | |||
| @@ -748,8 +503,7 @@ doublereal *sfac; | |||
| return 0; | |||
| } /* check1_ */ | |||
| /* Subroutine */ int check2_(sfac) | |||
| doublereal *sfac; | |||
| /* Subroutine */ int check2_(doublereal* sfac) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -834,14 +588,14 @@ doublereal *sfac; | |||
| /* Local variables */ | |||
| static doublecomplex cdot[1]; | |||
| static integer lenx, leny, i__; | |||
| extern /* Subroutine */ int ctest_(); | |||
| extern /* Subroutine */ int ctest_(integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*); | |||
| static integer ksize; | |||
| static doublecomplex ztemp; | |||
| extern /* Subroutine */ int zdotctest_(), zcopytest_(); | |||
| extern /* Subroutine */ int zdotctest_(integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*), zcopytest_(integer*, doublecomplex*, integer*, doublecomplex*, integer*); | |||
| static integer ki; | |||
| extern /* Subroutine */ int zdotutest_(), zswaptest_(); | |||
| extern /* Subroutine */ int zdotutest_(integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*), zswaptest_(integer*, doublecomplex*, integer*, doublecomplex*, integer*); | |||
| static integer kn; | |||
| extern /* Subroutine */ int zaxpytest_(); | |||
| extern /* Subroutine */ int zaxpytest_(integer*, doublereal*, doublecomplex*, integer*, doublecomplex*, integer*); | |||
| static doublecomplex cx[7], cy[7]; | |||
| static integer mx, my; | |||
| @@ -923,20 +677,18 @@ doublereal *sfac; | |||
| return 0; | |||
| } /* check2_ */ | |||
| /* Subroutine */ int stest_(len, scomp, strue, ssize, sfac) | |||
| integer *len; | |||
| doublereal *scomp, *strue, *ssize, *sfac; | |||
| /* Subroutine */ int stest_(integer* len, doublereal* scomp, doublereal* strue, doublereal* ssize, doublereal* sfac) | |||
| { | |||
| /* System generated locals */ | |||
| integer i__1; | |||
| doublereal d__1, d__2, d__3, d__4, d__5; | |||
| /* Builtin functions */ | |||
| integer s_wsfe(), e_wsfe(), do_fio(); | |||
| integer s_wsfe(void), e_wsfe(void), do_fio(void); | |||
| /* Local variables */ | |||
| static integer i__; | |||
| extern doublereal sdiff_(); | |||
| extern doublereal sdiff_(doublereal*, doublereal*); | |||
| static doublereal sd; | |||
| /* ********************************* STEST ************************** */ | |||
| @@ -992,11 +744,10 @@ L40: | |||
| } /* stest_ */ | |||
| /* Subroutine */ int stest1_(scomp1, strue1, ssize, sfac) | |||
| doublereal *scomp1, *strue1, *ssize, *sfac; | |||
| /* Subroutine */ int stest1_(doublereal* scomp1, doublereal* strue1, doublereal* ssize, doublereal* sfac) | |||
| { | |||
| static doublereal scomp[1], strue[1]; | |||
| extern /* Subroutine */ int stest_(); | |||
| extern /* Subroutine */ int stest_(integer*,doublereal*, doublereal*, doublereal*, doublereal*); | |||
| /* ************************* STEST1 ***************************** */ | |||
| @@ -1023,8 +774,7 @@ doublereal *scomp1, *strue1, *ssize, *sfac; | |||
| return 0; | |||
| } /* stest1_ */ | |||
| doublereal sdiff_(sa, sb) | |||
| doublereal *sa, *sb; | |||
| doublereal sdiff_(doublereal* sa, doublereal* sb) | |||
| { | |||
| /* System generated locals */ | |||
| doublereal ret_val; | |||
| @@ -1038,10 +788,7 @@ doublereal *sa, *sb; | |||
| return ret_val; | |||
| } /* sdiff_ */ | |||
| /* Subroutine */ int ctest_(len, ccomp, ctrue, csize, sfac) | |||
| integer *len; | |||
| doublecomplex *ccomp, *ctrue, *csize; | |||
| doublereal *sfac; | |||
| /* Subroutine */ int ctest_(integer* len, doublecomplex* ccomp, doublecomplex* ctrue, doublecomplex* csize, doublereal* sfac) | |||
| { | |||
| /* System generated locals */ | |||
| integer i__1, i__2; | |||
| @@ -1049,7 +796,7 @@ doublereal *sfac; | |||
| /* Local variables */ | |||
| static integer i__; | |||
| static doublereal scomp[20], ssize[20], strue[20]; | |||
| extern /* Subroutine */ int stest_(); | |||
| extern /* Subroutine */ int stest_(integer*, doublereal*, doublereal*, doublereal*, doublereal*); | |||
| /* **************************** CTEST ***************************** */ | |||
| @@ -1087,8 +834,7 @@ doublereal *sfac; | |||
| return 0; | |||
| } /* ctest_ */ | |||
| /* Subroutine */ int itest1_(icomp, itrue) | |||
| integer *icomp, *itrue; | |||
| /* Subroutine */ int itest1_(integer* icomp, integer* itrue) | |||
| { | |||
| static integer id; | |||
| @@ -242,129 +242,6 @@ typedef struct Namelist Namelist; | |||
| /* procedure parameter types for -A and -C++ */ | |||
| #define F2C_proc_par_types 1 | |||
| #ifdef __cplusplus | |||
| typedef logical (*L_fp)(...); | |||
| #else | |||
| typedef logical (*L_fp)(); | |||
| #endif | |||
| #if 0 | |||
| static float spow_ui(float x, integer n) { | |||
| float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static double dpow_ui(double x, integer n) { | |||
| double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #ifdef _MSC_VER | |||
| static _Fcomplex cpow_ui(complex x, integer n) { | |||
| complex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow.r *= x.r, pow.i *= x.i; | |||
| if(u >>= 1) x.r *= x.r, x.i *= x.i; | |||
| else break; | |||
| } | |||
| } | |||
| _Fcomplex p={pow.r, pow.i}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex float cpow_ui(_Complex float x, integer n) { | |||
| _Complex float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| #ifdef _MSC_VER | |||
| static _Dcomplex zpow_ui(_Dcomplex x, integer n) { | |||
| _Dcomplex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; | |||
| if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; | |||
| else break; | |||
| } | |||
| } | |||
| _Dcomplex p = {pow._Val[0], pow._Val[1]}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex double zpow_ui(_Complex double x, integer n) { | |||
| _Complex double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| static integer pow_ii(integer x, integer n) { | |||
| integer pow; unsigned long int u; | |||
| if (n <= 0) { | |||
| if (n == 0 || x == 1) pow = 1; | |||
| else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
| else n = -n; | |||
| } | |||
| if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
| u = n; | |||
| for(pow = 1; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
| { | |||
| double m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
| { | |||
| float m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| #endif | |||
| /* -- translated by f2c (version 20000121). | |||
| You must link the resulting object file with the libraries: | |||
| -lf2c -lm (in that order) | |||
| */ | |||
| /* Common Block Declarations */ | |||
| @@ -396,7 +273,7 @@ static integer c_n1 = -1; | |||
| static integer c__0 = 0; | |||
| static logical c_false = FALSE_; | |||
| /* Main program */ int main() | |||
| /* Main program */ int main(void) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -414,19 +291,23 @@ static logical c_false = FALSE_; | |||
| static logical same; | |||
| static integer ninc, nbet, ntra; | |||
| static logical rewi; | |||
| extern /* Subroutine */ int zchk1_(), zchk2_(), zchk3_(), zchk4_(), | |||
| zchk5_(), zchk6_(); | |||
| extern /* Subroutine */ int zchk1_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, integer*, ftnlen); | |||
| extern /* Subroutine */ int zchk2_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, integer*, ftnlen); | |||
| extern /* Subroutine */ int zchk3_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen); | |||
| extern /* Subroutine */ int zchk4_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen); | |||
| extern /* Subroutine */ int zchk5_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen); | |||
| extern /* Subroutine */ int zchk6_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen); | |||
| static doublecomplex a[4225] /* was [65][65] */; | |||
| static doublereal g[65]; | |||
| static integer i__, j; | |||
| extern doublereal ddiff_(); | |||
| extern doublereal ddiff_(doublereal*, doublereal*); | |||
| static integer n; | |||
| static logical fatal; | |||
| static doublecomplex x[65], y[65], z__[130]; | |||
| static logical trace; | |||
| static integer nidim; | |||
| static char snaps[32], trans[1]; | |||
| extern /* Subroutine */ int zmvch_(); | |||
| extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); | |||
| static integer isnum; | |||
| static logical ltest[17]; | |||
| static doublecomplex aa[4225]; | |||
| @@ -441,12 +322,12 @@ static logical c_false = FALSE_; | |||
| static logical rorder; | |||
| static integer layout; | |||
| static logical ltestt, tsterr; | |||
| extern /* Subroutine */ int cz2chke_(); | |||
| extern /* Subroutine */ void cz2chke_(char*, ftnlen); | |||
| static doublecomplex alf[7]; | |||
| static integer inc[7], nkb; | |||
| static doublecomplex bet[7]; | |||
| static doublereal eps, err; | |||
| extern logical lze_(); | |||
| extern logical lze_(doublecomplex*, doublecomplex*, integer*); | |||
| char tmpchar; | |||
| /* Test program for the DOUBLE PRECISION COMPLEX Level 2 Blas. */ | |||
| @@ -984,22 +865,7 @@ L240: | |||
| } /* MAIN__ */ | |||
| /* Subroutine */ int zchk1_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax, | |||
| incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len) | |||
| char *sname; | |||
| doublereal *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nkb, *kb, *nalf; | |||
| doublecomplex *alf; | |||
| integer *nbet; | |||
| doublecomplex *bet; | |||
| integer *ninc, *inc, *nmax, *incmax; | |||
| doublecomplex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt; | |||
| doublereal *g; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int zchk1_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* y, doublecomplex* yy, doublecomplex* ys, doublecomplex* yt, doublereal* g, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -1018,27 +884,27 @@ ftnlen sname_len; | |||
| static integer i__, m, n; | |||
| static doublecomplex alpha; | |||
| static logical isame[13]; | |||
| extern /* Subroutine */ int zmake_(); | |||
| extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen); | |||
| static integer nargs; | |||
| static logical reset; | |||
| static integer incxs, incys; | |||
| static char trans[1]; | |||
| extern /* Subroutine */ int zmvch_(); | |||
| extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); | |||
| static integer ia, ib, ic; | |||
| static logical banded; | |||
| static integer nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns; | |||
| extern /* Subroutine */ int czgbmv_(); | |||
| extern /* Subroutine */ void czgbmv_(integer*, char*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen); | |||
| static char ctrans[14]; | |||
| extern /* Subroutine */ int czgemv_(); | |||
| extern /* Subroutine */ void czgemv_(integer*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen); | |||
| static doublereal errmax; | |||
| static doublecomplex transl; | |||
| extern logical lzeres_(); | |||
| extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); | |||
| static char transs[1]; | |||
| static integer laa, lda; | |||
| static doublecomplex als, bls; | |||
| static doublereal err; | |||
| static integer iku, kls; | |||
| extern logical lze_(); | |||
| extern logical lze_(doublecomplex*, doublecomplex*, integer*); | |||
| static integer kus; | |||
| @@ -1451,22 +1317,7 @@ L140: | |||
| } /* zchk1_ */ | |||
| /* Subroutine */ int zchk2_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax, | |||
| incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len) | |||
| char *sname; | |||
| doublereal *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nkb, *kb, *nalf; | |||
| doublecomplex *alf; | |||
| integer *nbet; | |||
| doublecomplex *bet; | |||
| integer *ninc, *inc, *nmax, *incmax; | |||
| doublecomplex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt; | |||
| doublereal *g; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int zchk2_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* y, doublecomplex* yy, doublecomplex* ys, doublecomplex* yt, doublereal* g, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -1486,27 +1337,28 @@ ftnlen sname_len; | |||
| static integer i__, k, n; | |||
| static doublecomplex alpha; | |||
| static logical isame[13]; | |||
| extern /* Subroutine */ int zmake_(); | |||
| extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen); | |||
| static integer nargs; | |||
| static logical reset; | |||
| static char cuplo[14]; | |||
| static integer incxs, incys; | |||
| extern /* Subroutine */ int zmvch_(); | |||
| extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); | |||
| static char uplos[1]; | |||
| static integer ia, ib, ic; | |||
| static logical banded; | |||
| static integer nc, ik, in; | |||
| static logical packed; | |||
| static integer nk, ks, ix, iy, ns, lx, ly; | |||
| extern /* Subroutine */ int czhbmv_(), czhemv_(); | |||
| extern /* Subroutine */ void czhbmv_(integer*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen); | |||
| extern /* Subroutine */ void czhemv_(integer*, char*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen); | |||
| static doublereal errmax; | |||
| static doublecomplex transl; | |||
| extern logical lzeres_(); | |||
| extern /* Subroutine */ int czhpmv_(); | |||
| extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void czhpmv_(integer*, char*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen); | |||
| static integer laa, lda; | |||
| static doublecomplex als, bls; | |||
| static doublereal err; | |||
| extern logical lze_(); | |||
| extern logical lze_(doublecomplex*, doublecomplex*, integer*); | |||
| /* Tests CHEMV, CHBMV and CHPMV. */ | |||
| @@ -1909,19 +1761,7 @@ L130: | |||
| } /* zchk2_ */ | |||
| /* Subroutine */ int zchk3_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nkb, kb, ninc, inc, nmax, incmax, a, aa, as, x, | |||
| xx, xs, xt, g, z__, iorder, sname_len) | |||
| char *sname; | |||
| doublereal *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nkb, *kb, *ninc, *inc, *nmax, *incmax; | |||
| doublecomplex *a, *aa, *as, *x, *xx, *xs, *xt; | |||
| doublereal *g; | |||
| doublecomplex *z__; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int zchk3_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* xt, doublereal* g, doublecomplex* z__, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -1942,13 +1782,13 @@ ftnlen sname_len; | |||
| static integer i__, k, n; | |||
| static char diags[1]; | |||
| static logical isame[13]; | |||
| extern /* Subroutine */ int zmake_(); | |||
| extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen); | |||
| static integer nargs; | |||
| static logical reset; | |||
| static char cuplo[14]; | |||
| static integer incxs; | |||
| static char trans[1]; | |||
| extern /* Subroutine */ int zmvch_(); | |||
| extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); | |||
| static char uplos[1]; | |||
| static logical banded; | |||
| static integer nc, ik, in; | |||
| @@ -1957,14 +1797,17 @@ ftnlen sname_len; | |||
| static char ctrans[14]; | |||
| static doublereal errmax; | |||
| static doublecomplex transl; | |||
| extern logical lzeres_(); | |||
| extern /* Subroutine */ int cztbmv_(); | |||
| extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void cztbmv_(integer*, char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen); | |||
| static char transs[1]; | |||
| extern /* Subroutine */ int cztbsv_(), cztpmv_(), cztrmv_(), cztpsv_(), | |||
| cztrsv_(); | |||
| extern /* Subroutine */ void cztbsv_(integer*, char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void cztpmv_(integer*, char*, char*, char*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void cztpsv_(integer*, char*, char*, char*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void cztrmv_(integer*, char*, char*, char*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void cztrsv_(integer*, char*, char*, char*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen); | |||
| static integer laa, icd, lda, ict, icu; | |||
| static doublereal err; | |||
| extern logical lze_(); | |||
| extern logical lze_(doublecomplex*, doublecomplex*, integer*); | |||
| @@ -2422,21 +2265,7 @@ L130: | |||
| } /* zchk3_ */ | |||
| /* Subroutine */ int zchk4_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x, | |||
| xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len) | |||
| char *sname; | |||
| doublereal *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| doublecomplex *alf; | |||
| integer *ninc, *inc, *nmax, *incmax; | |||
| doublecomplex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt; | |||
| doublereal *g; | |||
| doublecomplex *z__; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int zchk4_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* y, doublecomplex* yy, doublecomplex* ys, doublecomplex* yt, doublereal* g, doublecomplex* z__, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* System generated locals */ | |||
| integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; | |||
| @@ -2450,21 +2279,21 @@ ftnlen sname_len; | |||
| static integer i__, j, m, n; | |||
| static doublecomplex alpha, w[1]; | |||
| static logical isame[13]; | |||
| extern /* Subroutine */ int zmake_(); | |||
| extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen); | |||
| static integer nargs; | |||
| static logical reset; | |||
| static integer incxs, incys; | |||
| extern /* Subroutine */ int zmvch_(); | |||
| extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); | |||
| static integer ia, nc, nd, im, in, ms, ix, iy, ns, lx, ly; | |||
| extern /* Subroutine */ int czgerc_(); | |||
| extern /* Subroutine */ void czgerc_(integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, integer*); | |||
| static doublereal errmax; | |||
| extern /* Subroutine */ int czgeru_(); | |||
| extern /* Subroutine */ void czgeru_(integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, integer*); | |||
| static doublecomplex transl; | |||
| extern logical lzeres_(); | |||
| extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); | |||
| static integer laa, lda; | |||
| static doublecomplex als; | |||
| static doublereal err; | |||
| extern logical lze_(); | |||
| extern logical lze_(doublecomplex*, doublecomplex*, integer*); | |||
| @@ -2793,21 +2622,7 @@ L150: | |||
| } /* zchk4_ */ | |||
| /* Subroutine */ int zchk5_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x, | |||
| xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len) | |||
| char *sname; | |||
| doublereal *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| doublecomplex *alf; | |||
| integer *ninc, *inc, *nmax, *incmax; | |||
| doublecomplex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt; | |||
| doublereal *g; | |||
| doublecomplex *z__; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int zchk5_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* y, doublecomplex* yy, doublecomplex* ys, doublecomplex* yt, doublereal* g, doublecomplex* z__, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -2827,13 +2642,14 @@ ftnlen sname_len; | |||
| static integer i__, j, n; | |||
| static doublecomplex alpha, w[1]; | |||
| static logical isame[13]; | |||
| extern /* Subroutine */ int zmake_(); | |||
| extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen); | |||
| static integer nargs; | |||
| extern /* Subroutine */ int czher_(); | |||
| extern /* Subroutine */ void czher_(integer*, char*, integer*, doublereal*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen); | |||
| static logical reset; | |||
| static char cuplo[14]; | |||
| static integer incxs; | |||
| extern /* Subroutine */ int czhpr_(), zmvch_(); | |||
| extern /* Subroutine */ void czhpr_(integer*, char*, integer*, doublereal*, doublecomplex*, integer*, doublecomplex*, ftnlen); | |||
| extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); | |||
| static logical upper; | |||
| static char uplos[1]; | |||
| static integer ia, ja, ic, nc, jj, lj, in; | |||
| @@ -2841,10 +2657,10 @@ ftnlen sname_len; | |||
| static integer ix, ns, lx; | |||
| static doublereal ralpha, errmax; | |||
| static doublecomplex transl; | |||
| extern logical lzeres_(); | |||
| extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); | |||
| static integer laa, lda; | |||
| static doublereal err; | |||
| extern logical lze_(); | |||
| extern logical lze_(doublecomplex*, doublecomplex*, integer*); | |||
| /* Tests ZHER and ZHPR. */ | |||
| @@ -3167,21 +2983,7 @@ L130: | |||
| } /* zchk5_ */ | |||
| /* Subroutine */ int zchk6_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x, | |||
| xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len) | |||
| char *sname; | |||
| doublereal *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| doublecomplex *alf; | |||
| integer *ninc, *inc, *nmax, *incmax; | |||
| doublecomplex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt; | |||
| doublereal *g; | |||
| doublecomplex *z__; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int zchk6_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* y, doublecomplex* yy, doublecomplex* ys, doublecomplex* yt, doublereal* g, doublecomplex* z__, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -3201,25 +3003,26 @@ ftnlen sname_len; | |||
| static integer i__, j, n; | |||
| static doublecomplex alpha, w[2]; | |||
| static logical isame[13]; | |||
| extern /* Subroutine */ int zmake_(); | |||
| extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen); | |||
| static integer nargs; | |||
| static logical reset; | |||
| static char cuplo[14]; | |||
| static integer incxs, incys; | |||
| extern /* Subroutine */ int zmvch_(); | |||
| extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen); | |||
| static logical upper; | |||
| static char uplos[1]; | |||
| extern /* Subroutine */ int czher2_(), czhpr2_(); | |||
| extern /* Subroutine */ void czher2_(integer*, char*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen); | |||
| extern /* Subroutine */ void czhpr2_(integer*, char*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, ftnlen); | |||
| static integer ia, ja, ic, nc, jj, lj, in; | |||
| static logical packed; | |||
| static integer ix, iy, ns, lx, ly; | |||
| static doublereal errmax; | |||
| static doublecomplex transl; | |||
| extern logical lzeres_(); | |||
| extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); | |||
| static integer laa, lda; | |||
| static doublecomplex als; | |||
| static doublereal err; | |||
| extern logical lze_(); | |||
| extern logical lze_(doublecomplex*, doublecomplex*, integer*); | |||
| /* Tests ZHER2 and ZHPR2. */ | |||
| @@ -3604,24 +3407,7 @@ L170: | |||
| } /* zchk6_ */ | |||
| /* Subroutine */ int zmvch_(trans, m, n, alpha, a, nmax, x, incx, beta, y, | |||
| incy, yt, g, yy, eps, err, fatal, nout, mv, trans_len) | |||
| char *trans; | |||
| integer *m, *n; | |||
| doublecomplex *alpha, *a; | |||
| integer *nmax; | |||
| doublecomplex *x; | |||
| integer *incx; | |||
| doublecomplex *beta, *y; | |||
| integer *incy; | |||
| doublecomplex *yt; | |||
| doublereal *g; | |||
| doublecomplex *yy; | |||
| doublereal *eps, *err; | |||
| logical *fatal; | |||
| integer *nout; | |||
| logical *mv; | |||
| ftnlen trans_len; | |||
| /* Subroutine */ int zmvch_(char* trans, integer* m, integer* n, doublecomplex* alpha, doublecomplex* a, integer* nmax, doublecomplex* x, integer* incx, doublecomplex* beta, doublecomplex* y, integer* incy, doublecomplex* yt, doublereal* g, doublecomplex* yy, doublereal* eps, doublereal* err, logical* fatal, integer* nout, logical* mv, ftnlen trans_len) | |||
| { | |||
| /* System generated locals */ | |||
| @@ -3819,9 +3605,7 @@ L80: | |||
| } /* zmvch_ */ | |||
| logical lze_(ri, rj, lr) | |||
| doublecomplex *ri, *rj; | |||
| integer *lr; | |||
| logical lze_(doublecomplex* ri, doublecomplex* rj, integer* lr) | |||
| { | |||
| /* System generated locals */ | |||
| integer i__1, i__2, i__3; | |||
| @@ -3868,13 +3652,7 @@ L30: | |||
| } /* lze_ */ | |||
| logical lzeres_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len) | |||
| char *type__, *uplo; | |||
| integer *m, *n; | |||
| doublecomplex *aa, *as; | |||
| integer *lda; | |||
| ftnlen type_len; | |||
| ftnlen uplo_len; | |||
| logical lzeres_(char* type__, char* uplo, integer* m, integer* n, doublecomplex* aa, doublecomplex* as, integer* lda, ftnlen type_len, ftnlen uplo_len) | |||
| { | |||
| /* System generated locals */ | |||
| integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2, i__3, i__4; | |||
| @@ -3967,9 +3745,7 @@ L80: | |||
| } /* lzeres_ */ | |||
| /* Double Complex */ VOID zbeg_( ret_val, reset) | |||
| doublecomplex * ret_val; | |||
| logical *reset; | |||
| /* Double Complex */ VOID zbeg_( doublecomplex* ret_val, logical* reset) | |||
| { | |||
| /* System generated locals */ | |||
| doublereal d__1, d__2; | |||
| @@ -4030,8 +3806,7 @@ L10: | |||
| } /* zbeg_ */ | |||
| doublereal ddiff_(x, y) | |||
| doublereal *x, *y; | |||
| doublereal ddiff_(doublereal* x, doublereal* y) | |||
| { | |||
| /* System generated locals */ | |||
| doublereal ret_val; | |||
| @@ -4051,19 +3826,7 @@ doublereal *x, *y; | |||
| } /* ddiff_ */ | |||
| /* Subroutine */ int zmake_(type__, uplo, diag, m, n, a, nmax, aa, lda, kl, | |||
| ku, reset, transl, type_len, uplo_len, diag_len) | |||
| char *type__, *uplo, *diag; | |||
| integer *m, *n; | |||
| doublecomplex *a; | |||
| integer *nmax; | |||
| doublecomplex *aa; | |||
| integer *lda, *kl, *ku; | |||
| logical *reset; | |||
| doublecomplex *transl; | |||
| ftnlen type_len; | |||
| ftnlen uplo_len; | |||
| ftnlen diag_len; | |||
| /* Subroutine */ int zmake_(char* type__, char* uplo, char* diag, integer* m, integer* n, doublecomplex* a, integer* nmax, doublecomplex* aa, integer* lda, integer* kl, integer* ku, logical* reset, doublecomplex* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) | |||
| { | |||
| /* System generated locals */ | |||
| integer a_dim1, a_offset, i__1, i__2, i__3, i__4; | |||
| @@ -4072,7 +3835,7 @@ ftnlen diag_len; | |||
| /* Local variables */ | |||
| static integer ibeg, iend, ioff; | |||
| extern /* Double Complex */ VOID zbeg_(); | |||
| extern /* Double Complex */ VOID zbeg_(doublecomplex*, logical*); | |||
| static logical unit; | |||
| static integer i__, j; | |||
| static logical lower; | |||
| @@ -22,14 +22,11 @@ typedef double doublereal; | |||
| typedef struct { real r, i; } complex; | |||
| typedef struct { doublereal r, i; } doublecomplex; | |||
| #ifdef _MSC_VER | |||
| static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} | |||
| static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} | |||
| static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} | |||
| static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} | |||
| #else | |||
| static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} | |||
| static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} | |||
| static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} | |||
| static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} | |||
| #endif | |||
| #define pCf(z) (*_pCf(z)) | |||
| @@ -242,124 +239,7 @@ typedef struct Namelist Namelist; | |||
| /* procedure parameter types for -A and -C++ */ | |||
| #define F2C_proc_par_types 1 | |||
| #ifdef __cplusplus | |||
| typedef logical (*L_fp)(...); | |||
| #else | |||
| typedef logical (*L_fp)(); | |||
| #endif | |||
| #if 0 | |||
| static float spow_ui(float x, integer n) { | |||
| float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static double dpow_ui(double x, integer n) { | |||
| double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #ifdef _MSC_VER | |||
| static _Fcomplex cpow_ui(complex x, integer n) { | |||
| complex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow.r *= x.r, pow.i *= x.i; | |||
| if(u >>= 1) x.r *= x.r, x.i *= x.i; | |||
| else break; | |||
| } | |||
| } | |||
| _Fcomplex p={pow.r, pow.i}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex float cpow_ui(_Complex float x, integer n) { | |||
| _Complex float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| #ifdef _MSC_VER | |||
| static _Dcomplex zpow_ui(_Dcomplex x, integer n) { | |||
| _Dcomplex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; | |||
| if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; | |||
| else break; | |||
| } | |||
| } | |||
| _Dcomplex p = {pow._Val[0], pow._Val[1]}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex double zpow_ui(_Complex double x, integer n) { | |||
| _Complex double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| static integer pow_ii(integer x, integer n) { | |||
| integer pow; unsigned long int u; | |||
| if (n <= 0) { | |||
| if (n == 0 || x == 1) pow = 1; | |||
| else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
| else n = -n; | |||
| } | |||
| if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
| u = n; | |||
| for(pow = 1; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
| { | |||
| double m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
| { | |||
| float m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| #endif | |||
| /* Common Block Declarations */ | |||
| @@ -388,7 +268,7 @@ static logical c_true = TRUE_; | |||
| static integer c__0 = 0; | |||
| static logical c_false = FALSE_; | |||
| /* Main program MAIN__() */ int main() | |||
| /* Main program MAIN__() */ int main(void) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -400,26 +280,29 @@ static logical c_false = FALSE_; | |||
| doublereal d__1; | |||
| /* Builtin functions */ | |||
| integer s_rsle(), do_lio(), e_rsle(), f_open(), s_wsfe(), do_fio(), | |||
| e_wsfe(), s_wsle(), e_wsle(), s_rsfe(), e_rsfe(); | |||
| integer s_rsle(void), do_lio(void), e_rsle(void), f_open(void), s_wsfe(void), do_fio(void), | |||
| e_wsfe(void), s_wsle(void), e_wsle(void), s_rsfe(void), e_rsfe(void); | |||
| /* Local variables */ | |||
| static integer nalf, idim[9]; | |||
| static logical same; | |||
| static integer nbet, ntra; | |||
| static logical rewi; | |||
| extern /* Subroutine */ int zchk1_(), zchk2_(), zchk3_(), zchk4_(), | |||
| zchk5_(); | |||
| extern /* Subroutine */ int zchk1_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, integer*, ftnlen); | |||
| extern /* Subroutine */ int zchk2_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, integer*, ftnlen); | |||
| extern /* Subroutine */ int zchk3_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen); | |||
| extern /* Subroutine */ int zchk4_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, integer*, ftnlen); | |||
| extern /* Subroutine */ int zchk5_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen); | |||
| static doublecomplex c__[4225] /* was [65][65] */; | |||
| static doublereal g[65]; | |||
| static integer i__, j; | |||
| extern doublereal ddiff_(); | |||
| extern doublereal ddiff_(doublereal*, doublereal*); | |||
| static integer n; | |||
| static logical fatal; | |||
| static doublecomplex w[130]; | |||
| static logical trace; | |||
| static integer nidim; | |||
| extern /* Subroutine */ int zmmch_(); | |||
| extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen); | |||
| static char snaps[32]; | |||
| static integer isnum; | |||
| static logical ltest[9]; | |||
| @@ -431,10 +314,10 @@ static logical c_false = FALSE_; | |||
| static logical rorder; | |||
| static integer layout; | |||
| static logical ltestt, tsterr; | |||
| extern /* Subroutine */ int cz3chke_(); | |||
| extern /* Subroutine */ int cz3chke_(char*, ftnlen); | |||
| static doublecomplex alf[7], bet[7]; | |||
| static doublereal eps, err; | |||
| extern logical lze_(); | |||
| extern logical lze_(doublecomplex*, doublecomplex*, integer*); | |||
| char tmpchar; | |||
| /* Test program for the COMPLEX*16 Level 3 Blas. */ | |||
| @@ -924,22 +807,7 @@ L230: | |||
| } /* MAIN__ */ | |||
| /* Subroutine */ int zchk1_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs, | |||
| c__, cc, cs, ct, g, iorder, sname_len) | |||
| char *sname; | |||
| doublereal *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| doublecomplex *alf; | |||
| integer *nbet; | |||
| doublecomplex *bet; | |||
| integer *nmax; | |||
| doublecomplex *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct; | |||
| doublereal *g; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int zchk1_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* nmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* b, doublecomplex* bb, doublecomplex* bs, doublecomplex* c__, doublecomplex* cc, doublecomplex* cs, doublecomplex* ct, doublereal* g, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -956,21 +824,21 @@ ftnlen sname_len; | |||
| static integer i__, k, m, n; | |||
| static doublecomplex alpha; | |||
| static logical isame[13], trana, tranb; | |||
| extern /* Subroutine */ int zmake_(); | |||
| extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen); | |||
| static integer nargs; | |||
| extern /* Subroutine */ int zmmch_(); | |||
| extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen); | |||
| static logical reset; | |||
| static integer ia, ib; | |||
| extern /* Subroutine */ int zprcn1_(); | |||
| extern /* Subroutine */ int zprcn1_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, integer*, doublecomplex*, integer*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen); | |||
| static integer ma, mb, na, nb, nc, ik, im, in, ks, ms, ns; | |||
| extern /* Subroutine */ int czgemm_(); | |||
| extern /* Subroutine */ void czgemm_(integer*, char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); | |||
| static char tranas[1], tranbs[1], transa[1], transb[1]; | |||
| static doublereal errmax; | |||
| extern logical lzeres_(); | |||
| extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); | |||
| static integer ica, icb, laa, lbb, lda, lcc, ldb, ldc; | |||
| static doublecomplex als, bls; | |||
| static doublereal err; | |||
| extern logical lze_(); | |||
| extern logical lze_(doublecomplex*, doublecomplex*, integer*); | |||
| /* Tests ZGEMM. */ | |||
| @@ -1313,20 +1181,7 @@ L130: | |||
| } /* zchk1_ */ | |||
| /* Subroutine */ int zprcn1_(nout, nc, sname, iorder, transa, transb, m, n, k, | |||
| alpha, lda, ldb, beta, ldc, sname_len, transa_len, transb_len) | |||
| integer *nout, *nc; | |||
| char *sname; | |||
| integer *iorder; | |||
| char *transa, *transb; | |||
| integer *m, *n, *k; | |||
| doublecomplex *alpha; | |||
| integer *lda, *ldb; | |||
| doublecomplex *beta; | |||
| integer *ldc; | |||
| ftnlen sname_len; | |||
| ftnlen transa_len; | |||
| ftnlen transb_len; | |||
| /* Subroutine */ int zprcn1_(integer* nout, integer* nc, char* sname, integer* iorder, char* transa, char* transb, integer* m, integer* n, integer* k, doublecomplex* alpha, integer* lda, integer* ldb, doublecomplex* beta, integer* ldc, ftnlen sname_len, ftnlen transa_len, ftnlen transb_len) | |||
| { | |||
| /* Local variables */ | |||
| static char crc[14], cta[14], ctb[14]; | |||
| @@ -1357,22 +1212,7 @@ return 0; | |||
| } /* zprcn1_ */ | |||
| /* Subroutine */ int zchk2_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs, | |||
| c__, cc, cs, ct, g, iorder, sname_len) | |||
| char *sname; | |||
| doublereal *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| doublecomplex *alf; | |||
| integer *nbet; | |||
| doublecomplex *bet; | |||
| integer *nmax; | |||
| doublecomplex *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct; | |||
| doublereal *g; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int zchk2_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* nmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* b, doublecomplex* bb, doublecomplex* bs, doublecomplex* c__, doublecomplex* cc, doublecomplex* cs, doublecomplex* ct, doublereal* g, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -1394,23 +1234,23 @@ ftnlen sname_len; | |||
| static doublecomplex alpha; | |||
| static logical isame[13]; | |||
| static char sides[1]; | |||
| extern /* Subroutine */ int zmake_(); | |||
| extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen); | |||
| static integer nargs; | |||
| extern /* Subroutine */ int zmmch_(); | |||
| extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen); | |||
| static logical reset; | |||
| static char uplos[1]; | |||
| static integer ia, ib; | |||
| extern /* Subroutine */ int zprcn2_(); | |||
| extern /* Subroutine */ int zprcn2_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublecomplex*, integer*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen); | |||
| static integer na, nc, im, in, ms, ns; | |||
| extern /* Subroutine */ int czhemm_(); | |||
| extern /* Subroutine */ void czhemm_(integer*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); | |||
| static doublereal errmax; | |||
| extern logical lzeres_(); | |||
| extern /* Subroutine */ int czsymm_(); | |||
| extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void czsymm_(integer*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); | |||
| static integer laa, lbb, lda, lcc, ldb, ldc, ics; | |||
| static doublecomplex als, bls; | |||
| static integer icu; | |||
| static doublereal err; | |||
| extern logical lze_(); | |||
| extern logical lze_(doublecomplex*, doublecomplex*, integer*); | |||
| /* Tests ZHEMM and ZSYMM. */ | |||
| @@ -1737,20 +1577,7 @@ L120: | |||
| } /* zchk2_ */ | |||
| /* Subroutine */ int zprcn2_(nout, nc, sname, iorder, side, uplo, m, n, alpha, | |||
| lda, ldb, beta, ldc, sname_len, side_len, uplo_len) | |||
| integer *nout, *nc; | |||
| char *sname; | |||
| integer *iorder; | |||
| char *side, *uplo; | |||
| integer *m, *n; | |||
| doublecomplex *alpha; | |||
| integer *lda, *ldb; | |||
| doublecomplex *beta; | |||
| integer *ldc; | |||
| ftnlen sname_len; | |||
| ftnlen side_len; | |||
| ftnlen uplo_len; | |||
| /* Subroutine */ int zprcn2_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, integer* m, integer* n, doublecomplex* alpha, integer* lda, integer* ldb, doublecomplex* beta, integer* ldc, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len) | |||
| { | |||
| /* Local variables */ | |||
| static char cs[14], cu[14], crc[14]; | |||
| @@ -1777,21 +1604,7 @@ return 0; | |||
| } /* zprcn2_ */ | |||
| /* Subroutine */ int zchk3_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, nmax, a, aa, as, b, bb, bs, ct, g, c__, | |||
| iorder, sname_len) | |||
| char *sname; | |||
| doublereal *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| doublecomplex *alf; | |||
| integer *nmax; | |||
| doublecomplex *a, *aa, *as, *b, *bb, *bs, *ct; | |||
| doublereal *g; | |||
| doublecomplex *c__; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int zchk3_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* nmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* b, doublecomplex* bb, doublecomplex* bs, doublecomplex* ct, doublereal* g, doublecomplex* c__, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -1817,23 +1630,24 @@ ftnlen sname_len; | |||
| static char diags[1]; | |||
| static logical isame[13]; | |||
| static char sides[1]; | |||
| extern /* Subroutine */ int zmake_(); | |||
| extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen); | |||
| static integer nargs; | |||
| extern /* Subroutine */ int zmmch_(); | |||
| extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen); | |||
| static logical reset; | |||
| static char uplos[1]; | |||
| static integer ia, na; | |||
| extern /* Subroutine */ int zprcn3_(); | |||
| extern /* Subroutine */ int zprcn3_(integer*, integer*, char*, integer*, char*, char*, char*, char*, integer*, integer*, doublecomplex*, integer*, integer*, ftnlen, ftnlen, ftnlen, ftnlen, ftnlen); | |||
| static integer nc, im, in, ms, ns; | |||
| static char tranas[1], transa[1]; | |||
| static doublereal errmax; | |||
| extern logical lzeres_(); | |||
| extern /* Subroutine */ int cztrmm_(), cztrsm_(); | |||
| extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void cztrmm_(integer*, char*, char*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen, ftnlen); | |||
| extern /* Subroutine */ void cztrsm_(integer*, char*, char*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen, ftnlen); | |||
| static integer laa, icd, lbb, lda, ldb, ics; | |||
| static doublecomplex als; | |||
| static integer ict, icu; | |||
| static doublereal err; | |||
| extern logical lze_(); | |||
| extern logical lze_(doublecomplex*, doublecomplex*, integer*); | |||
| /* Tests ZTRMM and ZTRSM. */ | |||
| @@ -2227,21 +2041,7 @@ L160: | |||
| } /* zchk3_ */ | |||
| /* Subroutine */ int zprcn3_(nout, nc, sname, iorder, side, uplo, transa, | |||
| diag, m, n, alpha, lda, ldb, sname_len, side_len, uplo_len, | |||
| transa_len, diag_len) | |||
| integer *nout, *nc; | |||
| char *sname; | |||
| integer *iorder; | |||
| char *side, *uplo, *transa, *diag; | |||
| integer *m, *n; | |||
| doublecomplex *alpha; | |||
| integer *lda, *ldb; | |||
| ftnlen sname_len; | |||
| ftnlen side_len; | |||
| ftnlen uplo_len; | |||
| ftnlen transa_len; | |||
| ftnlen diag_len; | |||
| /* Subroutine */ int zprcn3_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, char* transa, char* diag, integer* m, integer* n, doublecomplex* alpha, integer* lda, integer* ldb, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len, ftnlen transa_len, ftnlen diag_len) | |||
| { | |||
| /* Local variables */ | |||
| @@ -2281,22 +2081,7 @@ return 0; | |||
| } /* zprcn3_ */ | |||
| /* Subroutine */ int zchk4_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs, | |||
| c__, cc, cs, ct, g, iorder, sname_len) | |||
| char *sname; | |||
| doublereal *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| doublecomplex *alf; | |||
| integer *nbet; | |||
| doublecomplex *bet; | |||
| integer *nmax; | |||
| doublecomplex *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct; | |||
| doublereal *g; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int zchk4_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* nmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* b, doublecomplex* bb, doublecomplex* bs, doublecomplex* c__, doublecomplex* cc, doublecomplex* cs, doublecomplex* ct, doublereal* g, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -2320,30 +2105,30 @@ ftnlen sname_len; | |||
| static doublecomplex alpha; | |||
| static doublereal rbeta; | |||
| static logical isame[13]; | |||
| extern /* Subroutine */ int zmake_(); | |||
| extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen); | |||
| static integer nargs; | |||
| extern /* Subroutine */ int zmmch_(); | |||
| extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen); | |||
| static doublereal rbets; | |||
| static logical reset; | |||
| static char trans[1]; | |||
| static logical upper; | |||
| static char uplos[1]; | |||
| static integer ia, ib, jc, ma, na; | |||
| extern /* Subroutine */ int zprcn4_(); | |||
| extern /* Subroutine */ int zprcn4_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen); | |||
| static integer nc; | |||
| extern /* Subroutine */ int zprcn6_(); | |||
| extern /* Subroutine */ int zprcn6_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen); | |||
| static integer ik, in, jj, lj, ks, ns; | |||
| static doublereal ralpha; | |||
| extern /* Subroutine */ int czherk_(); | |||
| extern /* Subroutine */ int czherk_(integer*, char*, char*, integer*, integer*, doublereal*, doublecomplex*, integer*, doublereal*, doublecomplex*, integer*, ftnlen, ftnlen); | |||
| static doublereal errmax; | |||
| extern logical lzeres_(); | |||
| extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); | |||
| static char transs[1], transt[1]; | |||
| extern /* Subroutine */ int czsyrk_(); | |||
| extern /* Subroutine */ int czsyrk_(integer*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); | |||
| static integer laa, lda, lcc, ldc; | |||
| static doublecomplex als; | |||
| static integer ict, icu; | |||
| static doublereal err; | |||
| extern logical lze_(); | |||
| extern logical lze_(doublecomplex*, doublecomplex*, integer*); | |||
| /* Tests ZHERK and ZSYRK. */ | |||
| @@ -2732,20 +2517,7 @@ L130: | |||
| } /* zchk4_ */ | |||
| /* Subroutine */ int zprcn4_(nout, nc, sname, iorder, uplo, transa, n, k, | |||
| alpha, lda, beta, ldc, sname_len, uplo_len, transa_len) | |||
| integer *nout, *nc; | |||
| char *sname; | |||
| integer *iorder; | |||
| char *uplo, *transa; | |||
| integer *n, *k; | |||
| doublecomplex *alpha; | |||
| integer *lda; | |||
| doublecomplex *beta; | |||
| integer *ldc; | |||
| ftnlen sname_len; | |||
| ftnlen uplo_len; | |||
| ftnlen transa_len; | |||
| /* Subroutine */ int zprcn4_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublecomplex* alpha, integer* lda, doublecomplex* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len) | |||
| { | |||
| /* Local variables */ | |||
| static char ca[14], cu[14], crc[14]; | |||
| @@ -2775,20 +2547,7 @@ return 0; | |||
| /* Subroutine */ int zprcn6_(nout, nc, sname, iorder, uplo, transa, n, k, | |||
| alpha, lda, beta, ldc, sname_len, uplo_len, transa_len) | |||
| integer *nout, *nc; | |||
| char *sname; | |||
| integer *iorder; | |||
| char *uplo, *transa; | |||
| integer *n, *k; | |||
| doublereal *alpha; | |||
| integer *lda; | |||
| doublereal *beta; | |||
| integer *ldc; | |||
| ftnlen sname_len; | |||
| ftnlen uplo_len; | |||
| ftnlen transa_len; | |||
| /* Subroutine */ int zprcn6_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublereal* alpha, integer* lda, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len) | |||
| { | |||
| /* Local variables */ | |||
| @@ -2818,23 +2577,7 @@ return 0; | |||
| } /* zprcn6_ */ | |||
| /* Subroutine */ int zchk5_(sname, eps, thresh, nout, ntra, trace, rewi, | |||
| fatal, nidim, idim, nalf, alf, nbet, bet, nmax, ab, aa, as, bb, bs, | |||
| c__, cc, cs, ct, g, w, iorder, sname_len) | |||
| char *sname; | |||
| doublereal *eps, *thresh; | |||
| integer *nout, *ntra; | |||
| logical *trace, *rewi, *fatal; | |||
| integer *nidim, *idim, *nalf; | |||
| doublecomplex *alf; | |||
| integer *nbet; | |||
| doublecomplex *bet; | |||
| integer *nmax; | |||
| doublecomplex *ab, *aa, *as, *bb, *bs, *c__, *cc, *cs, *ct; | |||
| doublereal *g; | |||
| doublecomplex *w; | |||
| integer *iorder; | |||
| ftnlen sname_len; | |||
| /* Subroutine */ int zchk5_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* nmax, doublecomplex* ab, doublecomplex* aa, doublecomplex* as, doublecomplex* bb, doublecomplex* bs, doublecomplex* c__, doublecomplex* cc, doublecomplex* cs, doublecomplex* ct, doublereal* g, doublecomplex* w, integer* iorder, ftnlen sname_len) | |||
| { | |||
| /* Initialized data */ | |||
| @@ -2857,27 +2600,28 @@ ftnlen sname_len; | |||
| static doublecomplex alpha; | |||
| static doublereal rbeta; | |||
| static logical isame[13]; | |||
| extern /* Subroutine */ int zmake_(); | |||
| extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen); | |||
| static integer nargs; | |||
| extern /* Subroutine */ int zmmch_(); | |||
| extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen); | |||
| static doublereal rbets; | |||
| static logical reset; | |||
| static char trans[1]; | |||
| static logical upper; | |||
| static char uplos[1]; | |||
| static integer ia, ib, jc, ma, na, nc; | |||
| extern /* Subroutine */ int zprcn5_(), zprcn7_(); | |||
| extern /* Subroutine */ int zprcn5_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublecomplex*, integer*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen); | |||
| extern /* Subroutine */ int zprcn7_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublecomplex*, integer*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen); | |||
| static integer ik, in, jj, lj, ks, ns; | |||
| static doublereal errmax; | |||
| extern logical lzeres_(); | |||
| extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); | |||
| static char transs[1], transt[1]; | |||
| extern /* Subroutine */ int czher2k_(); | |||
| extern /* Subroutine */ int czher2k_(integer*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublereal*, doublecomplex*, integer*, ftnlen, ftnlen); | |||
| static integer laa, lbb, lda, lcc, ldb, ldc; | |||
| static doublecomplex als; | |||
| static integer ict, icu; | |||
| extern /* Subroutine */ int czsyr2k_(); | |||
| extern /* Subroutine */ int czsyr2k_(integer*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen); | |||
| static doublereal err; | |||
| extern logical lze_(); | |||
| extern logical lze_(doublecomplex*, doublecomplex*, integer*); | |||
| /* Tests ZHER2K and ZSYR2K. */ | |||
| @@ -3349,20 +3093,7 @@ L160: | |||
| } /* zchk5_ */ | |||
| /* Subroutine */ int zprcn5_(nout, nc, sname, iorder, uplo, transa, n, k, | |||
| alpha, lda, ldb, beta, ldc, sname_len, uplo_len, transa_len) | |||
| integer *nout, *nc; | |||
| char *sname; | |||
| integer *iorder; | |||
| char *uplo, *transa; | |||
| integer *n, *k; | |||
| doublecomplex *alpha; | |||
| integer *lda, *ldb; | |||
| doublecomplex *beta; | |||
| integer *ldc; | |||
| ftnlen sname_len; | |||
| ftnlen uplo_len; | |||
| ftnlen transa_len; | |||
| /* Subroutine */ int zprcn5_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublecomplex* alpha, integer* lda, integer* ldb, doublecomplex* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len) | |||
| { | |||
| /* Local variables */ | |||
| static char ca[14], cu[14], crc[14]; | |||
| @@ -3392,20 +3123,7 @@ return 0; | |||
| /* Subroutine */ int zprcn7_(nout, nc, sname, iorder, uplo, transa, n, k, | |||
| alpha, lda, ldb, beta, ldc, sname_len, uplo_len, transa_len) | |||
| integer *nout, *nc; | |||
| char *sname; | |||
| integer *iorder; | |||
| char *uplo, *transa; | |||
| integer *n, *k; | |||
| doublecomplex *alpha; | |||
| integer *lda, *ldb; | |||
| doublereal *beta; | |||
| integer *ldc; | |||
| ftnlen sname_len; | |||
| ftnlen uplo_len; | |||
| ftnlen transa_len; | |||
| /* Subroutine */ int zprcn7_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublecomplex* alpha, integer* lda, integer* ldb, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len) | |||
| { | |||
| /* Local variables */ | |||
| @@ -3435,19 +3153,7 @@ return 0; | |||
| } /* zprcn7_ */ | |||
| /* Subroutine */ int zmake_(type__, uplo, diag, m, n, a, nmax, aa, lda, reset, | |||
| transl, type_len, uplo_len, diag_len) | |||
| char *type__, *uplo, *diag; | |||
| integer *m, *n; | |||
| doublecomplex *a; | |||
| integer *nmax; | |||
| doublecomplex *aa; | |||
| integer *lda; | |||
| logical *reset; | |||
| doublecomplex *transl; | |||
| ftnlen type_len; | |||
| ftnlen uplo_len; | |||
| ftnlen diag_len; | |||
| /* Subroutine */ int zmake_(char* type__, char* uplo, char* diag, integer* m, integer* n, doublecomplex* a, integer* nmax, doublecomplex* aa, integer* lda, logical* reset, doublecomplex* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) | |||
| { | |||
| /* System generated locals */ | |||
| integer a_dim1, a_offset, i__1, i__2, i__3, i__4; | |||
| @@ -3456,7 +3162,7 @@ ftnlen diag_len; | |||
| /* Local variables */ | |||
| static integer ibeg, iend; | |||
| extern /* Double Complex */ VOID zbeg_(); | |||
| extern /* Double Complex */ VOID zbeg_(doublecomplex*, logical*); | |||
| static logical unit; | |||
| static integer i__, j; | |||
| static logical lower, upper; | |||
| @@ -3629,27 +3335,7 @@ ftnlen diag_len; | |||
| } /* zmake_ */ | |||
| /* Subroutine */ int zmmch_(transa, transb, m, n, kk, alpha, a, lda, b, ldb, | |||
| beta, c__, ldc, ct, g, cc, ldcc, eps, err, fatal, nout, mv, | |||
| transa_len, transb_len) | |||
| char *transa, *transb; | |||
| integer *m, *n, *kk; | |||
| doublecomplex *alpha, *a; | |||
| integer *lda; | |||
| doublecomplex *b; | |||
| integer *ldb; | |||
| doublecomplex *beta, *c__; | |||
| integer *ldc; | |||
| doublecomplex *ct; | |||
| doublereal *g; | |||
| doublecomplex *cc; | |||
| integer *ldcc; | |||
| doublereal *eps, *err; | |||
| logical *fatal; | |||
| integer *nout; | |||
| logical *mv; | |||
| ftnlen transa_len; | |||
| ftnlen transb_len; | |||
| /* Subroutine */ int zmmch_(char* transa, char* transb, integer* m, integer* n, integer* kk, doublecomplex* alpha, doublecomplex* a, integer* lda, doublecomplex* b, integer* ldb, doublecomplex* beta, doublecomplex* c__, integer* ldc, doublecomplex* ct, doublereal* g, doublecomplex* cc, integer* ldcc, doublereal* eps, doublereal* err, logical* fatal, integer* nout, logical* mv, ftnlen transa_len, ftnlen transb_len) | |||
| { | |||
| /* System generated locals */ | |||
| @@ -3658,7 +3344,7 @@ ftnlen transb_len; | |||
| doublereal d__1, d__2, d__3, d__4, d__5, d__6; | |||
| doublecomplex z__1, z__2, z__3, z__4; | |||
| double sqrt(); | |||
| double sqrt(double); | |||
| /* Local variables */ | |||
| static doublereal erri; | |||
| static integer i__, j, k; | |||
| @@ -4031,9 +3717,7 @@ L250: | |||
| } /* zmmch_ */ | |||
| logical lze_(ri, rj, lr) | |||
| doublecomplex *ri, *rj; | |||
| integer *lr; | |||
| logical lze_(doublecomplex* ri, doublecomplex* rj, integer* lr) | |||
| { | |||
| /* System generated locals */ | |||
| integer i__1, i__2, i__3; | |||
| @@ -4082,13 +3766,7 @@ L30: | |||
| } /* lze_ */ | |||
| logical lzeres_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len) | |||
| char *type__, *uplo; | |||
| integer *m, *n; | |||
| doublecomplex *aa, *as; | |||
| integer *lda; | |||
| ftnlen type_len; | |||
| ftnlen uplo_len; | |||
| logical lzeres_(char* type__, char* uplo, integer* m, integer* n, doublecomplex *aa, doublecomplex* as, integer* lda, ftnlen type_len, ftnlen uplo_len) | |||
| { | |||
| /* System generated locals */ | |||
| integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2, i__3, i__4; | |||
| @@ -4184,9 +3862,7 @@ L80: | |||
| } /* lzeres_ */ | |||
| /* Double Complex */ VOID zbeg_( ret_val, reset) | |||
| doublecomplex * ret_val; | |||
| logical *reset; | |||
| /* Double Complex */ VOID zbeg_(doublecomplex* ret_val, logical* reset) | |||
| { | |||
| /* System generated locals */ | |||
| doublereal d__1, d__2; | |||
| @@ -4249,8 +3925,7 @@ L10: | |||
| } /* zbeg_ */ | |||
| doublereal ddiff_(x, y) | |||
| doublereal *x, *y; | |||
| doublereal ddiff_(doublereal* x, doublereal* y) | |||
| { | |||
| /* System generated locals */ | |||
| doublereal ret_val; | |||
| @@ -40,7 +40,7 @@ | |||
| #include <stdlib.h> | |||
| #include "common.h" | |||
| int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(), void *sa, void *sb, BLASLONG nthreads) { | |||
| int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG ), void *sa, void *sb, BLASLONG nthreads) { | |||
| blas_queue_t queue[MAX_CPU_NUMBER]; | |||
| BLASLONG range[MAX_CPU_NUMBER + 1]; | |||
| @@ -60,7 +60,7 @@ static const int divide_rule[][2] = | |||
| { 1, 61}, { 2, 31}, { 7, 9}, { 8, 8}, | |||
| }; | |||
| int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(), void *sa, void *sb, BLASLONG nthreads) { | |||
| int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG ), void *sa, void *sb, BLASLONG nthreads) { | |||
| blas_queue_t queue[MAX_CPU_NUMBER]; | |||
| @@ -40,7 +40,7 @@ | |||
| #include <stdlib.h> | |||
| #include "common.h" | |||
| int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(), void *sa, void *sb, BLASLONG nthreads) { | |||
| int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG), void *sa, void *sb, BLASLONG nthreads) { | |||
| blas_queue_t queue[MAX_CPU_NUMBER]; | |||
| BLASLONG range[MAX_CPU_NUMBER + 1]; | |||
| @@ -42,7 +42,7 @@ | |||
| int CNAME(int mode, | |||
| blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, | |||
| int (*function)(), void *sa, void *sb, BLASLONG divM, BLASLONG divN) { | |||
| int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG ), void *sa, void *sb, BLASLONG divM, BLASLONG divN) { | |||
| blas_queue_t queue[MAX_CPU_NUMBER]; | |||
| @@ -41,7 +41,7 @@ | |||
| #include <math.h> | |||
| #include "common.h" | |||
| int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(), void *sa, void *sb, BLASLONG nthreads) { | |||
| int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*, FLOAT *, FLOAT *, BLASLONG), void *sa, void *sb, BLASLONG nthreads) { | |||
| blas_queue_t queue[MAX_CPU_NUMBER]; | |||
| BLASLONG range[MAX_CPU_NUMBER + 1]; | |||
| @@ -43,7 +43,7 @@ | |||
| int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, | |||
| void *a, BLASLONG lda, | |||
| void *b, BLASLONG ldb, | |||
| void *c, BLASLONG ldc, int (*function)(), int nthreads){ | |||
| void *c, BLASLONG ldc, int (*function)(void), int nthreads){ | |||
| blas_queue_t queue[MAX_CPU_NUMBER]; | |||
| blas_arg_t args [MAX_CPU_NUMBER]; | |||
| @@ -141,7 +141,7 @@ int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha | |||
| int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, | |||
| void *a, BLASLONG lda, | |||
| void *b, BLASLONG ldb, | |||
| void *c, BLASLONG ldc, int (*function)(), int nthreads){ | |||
| void *c, BLASLONG ldc, int (*function)(void), int nthreads){ | |||
| blas_queue_t queue[MAX_CPU_NUMBER]; | |||
| blas_arg_t args [MAX_CPU_NUMBER]; | |||
| @@ -93,7 +93,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #endif | |||
| #endif | |||
| extern unsigned int openblas_thread_timeout(); | |||
| extern unsigned int openblas_thread_timeout(void); | |||
| #ifdef SMP_SERVER | |||
| @@ -70,7 +70,7 @@ | |||
| int blas_server_avail = 0; | |||
| int blas_omp_number_max = 0; | |||
| extern int openblas_omp_adaptive_env(); | |||
| extern int openblas_omp_adaptive_env(void); | |||
| static void * blas_thread_buffer[MAX_PARALLEL_NUMBER][MAX_CPU_NUMBER]; | |||
| #ifdef HAVE_C11 | |||
| @@ -79,7 +79,7 @@ static atomic_bool blas_buffer_inuse[MAX_PARALLEL_NUMBER]; | |||
| static _Bool blas_buffer_inuse[MAX_PARALLEL_NUMBER]; | |||
| #endif | |||
| static void adjust_thread_buffers() { | |||
| static void adjust_thread_buffers(void) { | |||
| int i=0, j=0; | |||
| @@ -124,8 +124,17 @@ void openblas_set_num_threads(int num_threads) { | |||
| } | |||
| int blas_thread_init(void){ | |||
| if(blas_omp_number_max <= 0) | |||
| blas_omp_number_max = omp_get_max_threads(); | |||
| #if defined(__FreeBSD__) && defined(__clang__) | |||
| extern int openblas_omp_num_threads_env(void); | |||
| if(blas_omp_number_max <= 0) | |||
| blas_omp_number_max= openblas_omp_num_threads_env(); | |||
| if (blas_omp_number_max <= 0) | |||
| blas_omp_number_max=MAX_CPU_NUMBER; | |||
| #else | |||
| blas_omp_number_max = omp_get_max_threads(); | |||
| #endif | |||
| blas_get_cpu_number(); | |||
| @@ -805,7 +805,8 @@ static gotoblas_t *get_coretype(void){ | |||
| } | |||
| return NULL; | |||
| } | |||
| case 0xf: | |||
| break; | |||
| case 0xf: | |||
| if (model <= 0x2) return &gotoblas_NORTHWOOD; | |||
| return &gotoblas_PRESCOTT; | |||
| } | |||
| @@ -3,7 +3,7 @@ | |||
| extern gotoblas_t gotoblas_POWER6; | |||
| extern gotoblas_t gotoblas_POWER8; | |||
| #if (!defined __GNUC__) || ( __GNUC__ >= 6) | |||
| #if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__) | |||
| extern gotoblas_t gotoblas_POWER9; | |||
| #endif | |||
| #ifdef HAVE_P10_SUPPORT | |||
| @@ -20,14 +20,14 @@ static char *corename[] = { | |||
| "POWER10" | |||
| }; | |||
| #define NUM_CORETYPES 4 | |||
| #define NUM_CORETYPES 5 | |||
| char *gotoblas_corename(void) { | |||
| #ifndef C_PGI | |||
| if (gotoblas == &gotoblas_POWER6) return corename[1]; | |||
| #endif | |||
| if (gotoblas == &gotoblas_POWER8) return corename[2]; | |||
| #if (!defined __GNUC__) || ( __GNUC__ >= 6) | |||
| #if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__) | |||
| if (gotoblas == &gotoblas_POWER9) return corename[3]; | |||
| #endif | |||
| #ifdef HAVE_P10_SUPPORT | |||
| @@ -36,13 +36,37 @@ char *gotoblas_corename(void) { | |||
| return corename[0]; | |||
| } | |||
| #if defined(__clang__) | |||
| static int __builtin_cpu_supports(char* arg) | |||
| #define CPU_UNKNOWN 0 | |||
| #define CPU_POWER5 5 | |||
| #define CPU_POWER6 6 | |||
| #define CPU_POWER8 8 | |||
| #define CPU_POWER9 9 | |||
| #define CPU_POWER10 10 | |||
| #ifdef _AIX | |||
| #include <sys/systemcfg.h> | |||
| static int cpuid(void) | |||
| { | |||
| return 0; | |||
| } | |||
| int arch = _system_configuration.implementation; | |||
| #ifdef POWER_6 | |||
| if (arch == POWER_6) return CPU_POWER6; | |||
| #endif | |||
| #ifdef POWER_7 | |||
| else if (arch == POWER_7) return CPU_POWER6; | |||
| #endif | |||
| #ifdef POWER_8 | |||
| else if (arch == POWER_8) return CPU_POWER8; | |||
| #endif | |||
| #ifdef POWER_9 | |||
| else if (arch == POWER_9) return CPU_POWER9; | |||
| #endif | |||
| #ifdef POWER_10 | |||
| else if (arch == POWER_10) return CPU_POWER10; | |||
| #endif | |||
| return CPU_UNKNOWN; | |||
| } | |||
| #else | |||
| #if defined(C_PGI) || defined(__clang__) | |||
| /* | |||
| * NV HPC compilers do not yet implement __builtin_cpu_is(). | |||
| @@ -53,21 +77,12 @@ static int __builtin_cpu_supports(char* arg) | |||
| * what was requested. | |||
| */ | |||
| #include <string.h> | |||
| /* | |||
| * Define POWER processor version table. | |||
| * | |||
| * NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time | |||
| */ | |||
| #define CPU_UNKNOWN 0 | |||
| #define CPU_POWER5 5 | |||
| #define CPU_POWER6 6 | |||
| #define CPU_POWER8 8 | |||
| #define CPU_POWER9 9 | |||
| #define CPU_POWER10 10 | |||
| static struct { | |||
| uint32_t pvr_mask; | |||
| uint32_t pvr_value; | |||
| @@ -160,7 +175,8 @@ static struct { | |||
| }, | |||
| }; | |||
| static int __builtin_cpu_is(const char *cpu) { | |||
| static int cpuid(void) | |||
| { | |||
| int i; | |||
| uint32_t pvr; | |||
| uint32_t cpu_type; | |||
| @@ -178,15 +194,46 @@ static int __builtin_cpu_is(const char *cpu) { | |||
| pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type); | |||
| #endif | |||
| cpu_type = pvrPOWER[i].cpu_type; | |||
| return (int)(cpu_type); | |||
| } | |||
| #endif /* C_PGI */ | |||
| #endif /* _AIX */ | |||
| if (!strcmp(cpu, "power8")) | |||
| return cpu_type == CPU_POWER8; | |||
| if (!strcmp(cpu, "power9")) | |||
| return cpu_type == CPU_POWER9; | |||
| return 0; | |||
| #ifndef __BUILTIN_CPU_SUPPORTS__ | |||
| #include <string.h> | |||
| #if defined(_AIX) || (defined(__has_builtin) && !__has_builtin(__builtin_cpu_is)) | |||
| static int __builtin_cpu_is(const char *arg) | |||
| { | |||
| static int ipinfo = -1; | |||
| if (ipinfo < 0) { | |||
| ipinfo = cpuid(); | |||
| } | |||
| #ifdef HAVE_P10_SUPPORT | |||
| if (ipinfo == CPU_POWER10) { | |||
| if (!strcmp(arg, "power10")) return 1; | |||
| } | |||
| #endif | |||
| if (ipinfo == CPU_POWER9) { | |||
| if (!strcmp(arg, "power9")) return 1; | |||
| } else if (ipinfo == CPU_POWER8) { | |||
| if (!strcmp(arg, "power8")) return 1; | |||
| #ifndef C_PGI | |||
| } else if (ipinfo == CPU_POWER6) { | |||
| if (!strcmp(arg, "power6")) return 1; | |||
| #endif | |||
| } | |||
| return 0; | |||
| } | |||
| #endif | |||
| #endif /* C_PGI */ | |||
| #if defined(_AIX) || (defined(__has_builtin) && !__has_builtin(__builtin_cpu_supports)) | |||
| static int __builtin_cpu_supports(const char *arg) | |||
| { | |||
| return 0; | |||
| } | |||
| #endif | |||
| #endif | |||
| static gotoblas_t *get_coretype(void) { | |||
| @@ -196,19 +243,23 @@ static gotoblas_t *get_coretype(void) { | |||
| #endif | |||
| if (__builtin_cpu_is("power8")) | |||
| return &gotoblas_POWER8; | |||
| #if (!defined __GNUC__) || ( __GNUC__ >= 6) | |||
| #if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__) | |||
| if (__builtin_cpu_is("power9")) | |||
| return &gotoblas_POWER9; | |||
| #endif | |||
| #ifdef HAVE_P10_SUPPORT | |||
| #if defined(_AIX) || defined(__clang__) | |||
| if (__builtin_cpu_is("power10")) | |||
| #else | |||
| if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")) | |||
| #endif | |||
| return &gotoblas_POWER10; | |||
| #endif | |||
| /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ | |||
| #if (!defined __GNUC__) || ( __GNUC__ >= 11) || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2) | |||
| if (__builtin_cpu_is("power10")) | |||
| return &gotoblas_POWER9; | |||
| #endif | |||
| #endif | |||
| return NULL; | |||
| } | |||
| @@ -233,7 +284,7 @@ static gotoblas_t *force_coretype(char * coretype) { | |||
| case 1: return (&gotoblas_POWER6); | |||
| #endif | |||
| case 2: return (&gotoblas_POWER8); | |||
| #if (!defined __GNUC__) || ( __GNUC__ >= 6) | |||
| #if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__) | |||
| case 3: return (&gotoblas_POWER9); | |||
| #endif | |||
| #ifdef HAVE_P10_SUPPORT | |||
| @@ -13,7 +13,7 @@ extern gotoblas_t gotoblas_Z14; | |||
| #define NUM_CORETYPES 4 | |||
| extern int openblas_verbose(); | |||
| extern int openblas_verbose(void); | |||
| extern void openblas_warning(int verbose, const char* msg); | |||
| char* gotoblas_corename(void) { | |||
| @@ -73,6 +73,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #include "common.h" | |||
| #define NEW_BUFFERS 512 | |||
| #ifndef likely | |||
| #ifdef __GNUC__ | |||
| #define likely(x) __builtin_expect(!!(x), 1) | |||
| @@ -426,9 +427,9 @@ int goto_get_num_procs (void) { | |||
| return blas_cpu_number; | |||
| } | |||
| static void blas_memory_init(); | |||
| static void blas_memory_init(void); | |||
| void openblas_fork_handler() | |||
| void openblas_fork_handler(void) | |||
| { | |||
| // This handler shuts down the OpenBLAS-managed PTHREAD pool when OpenBLAS is | |||
| // built with "make USE_OPENMP=0". | |||
| @@ -445,9 +446,9 @@ void openblas_fork_handler() | |||
| #endif | |||
| } | |||
| extern int openblas_num_threads_env(); | |||
| extern int openblas_goto_num_threads_env(); | |||
| extern int openblas_omp_num_threads_env(); | |||
| extern int openblas_num_threads_env(void); | |||
| extern int openblas_goto_num_threads_env(void); | |||
| extern int openblas_omp_num_threads_env(void); | |||
| int blas_get_cpu_number(void){ | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) || defined(OS_HAIKU) | |||
| @@ -591,7 +592,7 @@ static BLASULONG key_lock = 0UL; | |||
| #endif | |||
| /* Returns a pointer to the start of the per-thread memory allocation data */ | |||
| static __inline struct alloc_t ** get_memory_table() { | |||
| static __inline struct alloc_t ** get_memory_table(void) { | |||
| #if defined(SMP) | |||
| LOCK_COMMAND(&key_lock); | |||
| lsk=local_storage_key; | |||
| @@ -1144,7 +1145,7 @@ static void blas_memory_cleanup(void* ptr){ | |||
| } | |||
| } | |||
| static void blas_memory_init(){ | |||
| static void blas_memory_init(void){ | |||
| #if defined(SMP) | |||
| # if defined(OS_WINDOWS) | |||
| local_storage_key = TlsAlloc(); | |||
| @@ -1501,7 +1502,7 @@ static void gotoblas_memory_init(void) { | |||
| /* Initialization for all function; this function should be called before main */ | |||
| static int gotoblas_initialized = 0; | |||
| extern void openblas_read_env(); | |||
| extern void openblas_read_env(void); | |||
| void CONSTRUCTOR gotoblas_init(void) { | |||
| @@ -1998,7 +1999,7 @@ int goto_get_num_procs (void) { | |||
| return blas_cpu_number; | |||
| } | |||
| void openblas_fork_handler() | |||
| void openblas_fork_handler(void) | |||
| { | |||
| // This handler shuts down the OpenBLAS-managed PTHREAD pool when OpenBLAS is | |||
| // built with "make USE_OPENMP=0". | |||
| @@ -2015,9 +2016,9 @@ void openblas_fork_handler() | |||
| #endif | |||
| } | |||
| extern int openblas_num_threads_env(); | |||
| extern int openblas_goto_num_threads_env(); | |||
| extern int openblas_omp_num_threads_env(); | |||
| extern int openblas_num_threads_env(void); | |||
| extern int openblas_goto_num_threads_env(void); | |||
| extern int openblas_omp_num_threads_env(void); | |||
| int blas_get_cpu_number(void){ | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) || defined(OS_HAIKU) | |||
| @@ -2897,7 +2898,7 @@ void *blas_memory_alloc(int procpos){ | |||
| #endif | |||
| position ++; | |||
| } while (position < 512+NUM_BUFFERS); | |||
| } while (position < NEW_BUFFERS + NUM_BUFFERS); | |||
| } | |||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | |||
| UNLOCK_COMMAND(&alloc_lock); | |||
| @@ -3012,11 +3013,12 @@ void *blas_memory_alloc(int procpos){ | |||
| if (memory_overflowed) goto terminate; | |||
| fprintf(stderr,"OpenBLAS warning: precompiled NUM_THREADS exceeded, adding auxiliary array for thread metadata.\n"); | |||
| fprintf(stderr,"To avoid this warning, please rebuild your copy of OpenBLAS with a larger NUM_THREADS setting\n"); | |||
| fprintf(stderr,"or set the environment variable OPENBLAS_NUM_THREADS to %d or lower\n", NUM_BUFFERS); | |||
| fprintf(stderr,"or set the environment variable OPENBLAS_NUM_THREADS to %d or lower\n", MAX_CPU_NUMBER); | |||
| memory_overflowed=1; | |||
| new_release_info = (struct release_t*) malloc(512*sizeof(struct release_t)); | |||
| newmemory = (struct newmemstruct*) malloc(512*sizeof(struct newmemstruct)); | |||
| for (i = 0; i < 512; i++) { | |||
| MB; | |||
| new_release_info = (struct release_t*) malloc(NEW_BUFFERS * sizeof(struct release_t)); | |||
| newmemory = (struct newmemstruct*) malloc(NEW_BUFFERS * sizeof(struct newmemstruct)); | |||
| for (i = 0; i < NEW_BUFFERS; i++) { | |||
| newmemory[i].addr = (void *)0; | |||
| #if defined(WHEREAMI) && !defined(USE_OPENMP) | |||
| newmemory[i].pos = -1; | |||
| @@ -3129,12 +3131,12 @@ void blas_memory_free(void *free_area){ | |||
| printf(" Position : %d\n", position); | |||
| #endif | |||
| if (unlikely(memory_overflowed && position >= NUM_BUFFERS)) { | |||
| while ((position < NUM_BUFFERS+512) && (newmemory[position-NUM_BUFFERS].addr != free_area)) | |||
| while ((position < NUM_BUFFERS+NEW_BUFFERS) && (newmemory[position-NUM_BUFFERS].addr != free_area)) | |||
| position++; | |||
| // arm: ensure all writes are finished before other thread takes this memory | |||
| WMB; | |||
| newmemory[position].used = 0; | |||
| if (position - NUM_BUFFERS >= NEW_BUFFERS) goto error; | |||
| newmemory[position-NUM_BUFFERS].used = 0; | |||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | |||
| UNLOCK_COMMAND(&alloc_lock); | |||
| #endif | |||
| @@ -3213,7 +3215,7 @@ void blas_shutdown(void){ | |||
| memory[pos].lock = 0; | |||
| } | |||
| if (memory_overflowed) | |||
| for (pos = 0; pos < 512; pos ++){ | |||
| for (pos = 0; pos < NEW_BUFFERS; pos ++){ | |||
| newmemory[pos].addr = (void *)0; | |||
| newmemory[pos].used = 0; | |||
| #if defined(WHEREAMI) && !defined(USE_OPENMP) | |||
| @@ -3337,7 +3339,7 @@ static void gotoblas_memory_init(void) { | |||
| /* Initialization for all function; this function should be called before main */ | |||
| static int gotoblas_initialized = 0; | |||
| extern void openblas_read_env(); | |||
| extern void openblas_read_env(void); | |||
| void CONSTRUCTOR gotoblas_init(void) { | |||
| @@ -288,7 +288,7 @@ int goto_get_num_procs (void) { | |||
| return blas_cpu_number; | |||
| } | |||
| void openblas_fork_handler() | |||
| void openblas_fork_handler(void) | |||
| { | |||
| // This handler shuts down the OpenBLAS-managed PTHREAD pool when OpenBLAS is | |||
| // built with "make USE_OPENMP=0". | |||
| @@ -305,9 +305,9 @@ void openblas_fork_handler() | |||
| #endif | |||
| } | |||
| extern int openblas_num_threads_env(); | |||
| extern int openblas_goto_num_threads_env(); | |||
| extern int openblas_omp_num_threads_env(); | |||
| extern int openblas_num_threads_env(void); | |||
| extern int openblas_goto_num_threads_env(void); | |||
| extern int openblas_omp_num_threads_env(void); | |||
| int blas_get_cpu_number(void){ | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||
| @@ -41,15 +41,15 @@ static int openblas_env_goto_num_threads=0; | |||
| static int openblas_env_omp_num_threads=0; | |||
| static int openblas_env_omp_adaptive=0; | |||
| int openblas_verbose() { return openblas_env_verbose;} | |||
| unsigned int openblas_thread_timeout() { return openblas_env_thread_timeout;} | |||
| int openblas_block_factor() { return openblas_env_block_factor;} | |||
| int openblas_num_threads_env() { return openblas_env_openblas_num_threads;} | |||
| int openblas_goto_num_threads_env() { return openblas_env_goto_num_threads;} | |||
| int openblas_omp_num_threads_env() { return openblas_env_omp_num_threads;} | |||
| int openblas_omp_adaptive_env() { return openblas_env_omp_adaptive;} | |||
| void openblas_read_env() { | |||
| int openblas_verbose(void) { return openblas_env_verbose;} | |||
| unsigned int openblas_thread_timeout(void) { return openblas_env_thread_timeout;} | |||
| int openblas_block_factor(void) { return openblas_env_block_factor;} | |||
| int openblas_num_threads_env(void) { return openblas_env_openblas_num_threads;} | |||
| int openblas_goto_num_threads_env(void) { return openblas_env_goto_num_threads;} | |||
| int openblas_omp_num_threads_env(void) { return openblas_env_omp_num_threads;} | |||
| int openblas_omp_adaptive_env(void) { return openblas_env_omp_adaptive;} | |||
| void openblas_read_env(void) { | |||
| int ret=0; | |||
| env_var_t p; | |||
| if (readenv(p,"OPENBLAS_VERBOSE")) ret = atoi(p); | |||
| @@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #include "common.h" | |||
| extern int openblas_verbose(); | |||
| extern int openblas_verbose(void); | |||
| void openblas_warning(int verbose, const char * msg) { | |||
| int current_verbose; | |||
| @@ -69,13 +69,13 @@ static char* openblas_config_str="" | |||
| ; | |||
| #ifdef DYNAMIC_ARCH | |||
| char *gotoblas_corename(); | |||
| char *gotoblas_corename(void); | |||
| #endif | |||
| static char tmp_config_str[256]; | |||
| int openblas_get_parallel(); | |||
| int openblas_get_parallel(void); | |||
| char* CNAME() { | |||
| char* CNAME(void) { | |||
| char tmpstr[20]; | |||
| strcpy(tmp_config_str, openblas_config_str); | |||
| #ifdef DYNAMIC_ARCH | |||
| @@ -90,7 +90,7 @@ char tmpstr[20]; | |||
| } | |||
| char* openblas_get_corename() { | |||
| char* openblas_get_corename(void) { | |||
| #ifndef DYNAMIC_ARCH | |||
| return CHAR_CORENAME; | |||
| #else | |||
| @@ -42,17 +42,17 @@ static int parallel = 0; | |||
| #ifdef NEEDBUNDERSCORE | |||
| int CNAME() { | |||
| int CNAME(void) { | |||
| return parallel; | |||
| } | |||
| int NAME() { | |||
| int NAME(void) { | |||
| return parallel; | |||
| } | |||
| #else | |||
| //The CNAME and NAME are the same. | |||
| int NAME() { | |||
| int NAME(void) { | |||
| return parallel; | |||
| } | |||
| #endif | |||
| @@ -40,7 +40,7 @@ | |||
| #include <string.h> | |||
| #include "common.h" | |||
| extern int openblas_block_factor(); | |||
| extern int openblas_block_factor(void); | |||
| int get_L2_size(void); | |||
| #define DEFAULT_GEMM_P 128 | |||
| @@ -272,23 +272,23 @@ static : ../$(LIBNAME) | |||
| rm -f goto.$(SUFFIX) | |||
| osx.def : $(GENSYM) ../Makefile.system ../getarch.c | |||
| ./$(GENSYM) osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) | |||
| ./$(GENSYM) osx $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) | |||
| aix.def : $(GENSYM) ../Makefile.system ../getarch.c | |||
| ./$(GENSYM) aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) | |||
| ./$(GENSYM) aix $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) | |||
| objcopy.def : $(GENSYM) ../Makefile.system ../getarch.c | |||
| ./$(GENSYM) objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) | |||
| ./$(GENSYM) objcopy $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) | |||
| objconv.def : $(GENSYM) ../Makefile.system ../getarch.c | |||
| ./$(GENSYM) objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) | |||
| ./$(GENSYM) objconv $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) | |||
| test : linktest.c | |||
| $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK. | |||
| rm -f linktest | |||
| linktest.c : $(GENSYM) ../Makefile.system ../getarch.c | |||
| ./$(GENSYM) linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > linktest.c | |||
| ./$(GENSYM) linktest $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > linktest.c | |||
| clean :: | |||
| @rm -f *.def *.dylib __.SYMDEF* *.renamed | |||
| @@ -117,6 +117,9 @@ else | |||
| vendor=PGI | |||
| openmp='-mp' | |||
| ;; | |||
| *xlf*) | |||
| vendor=IBM | |||
| ;; | |||
| *) | |||
| vendor=G77 | |||
| openmp='' | |||
| @@ -155,6 +158,10 @@ else | |||
| *'IBM XL'*) | |||
| vendor=IBM | |||
| openmp='-openmp' | |||
| case "$CC" in *gcc*) | |||
| bu=_ | |||
| ;; | |||
| esac | |||
| ;; | |||
| *NAG*) | |||
| vendor=NAG | |||
| @@ -223,6 +230,10 @@ else | |||
| *ppuf*|*xlf*) | |||
| vendor=IBM | |||
| openmp='-openmp' | |||
| case "$CC" in *gcc*) | |||
| bu=_ | |||
| ;; | |||
| esac | |||
| ;; | |||
| *open64*) | |||
| vendor=OPEN64 | |||
| @@ -362,13 +373,6 @@ if [ -n "$link" ]; then | |||
| ;; | |||
| esac | |||
| case "$flag" in *-lgomp*) | |||
| case "$CC" in *clang*) | |||
| flag="-lomp" | |||
| ;; | |||
| esac | |||
| esac | |||
| case "$flag" in -l*) | |||
| case "$flag" in | |||
| *ibrary*|*gfortranbegin*|*flangmain*|*frtbegin*|*pathfstart*|\ | |||
| @@ -1301,7 +1301,7 @@ xhpr2.$(SUFFIX) xhpr2.$(PSUFFIX) : zhpr2.c | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| sbgemm.$(SUFFIX) sbgemm.$(PSUFFIX) : gemm.c ../param.h | |||
| $(CC) -c $(CFLAGS) $< -o $(@F) | |||
| sbgemmt.$(SUFFIX) sbgemm.$(PSUFFIX) : gemmt.c ../param.h | |||
| sbgemmt.$(SUFFIX) sbgemmt.$(PSUFFIX) : gemmt.c ../param.h | |||
| $(CC) -c $(CFLAGS) $< -o $(@F) | |||
| endif | |||
| @@ -100,27 +100,29 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, | |||
| if ( order == BlasColMajor) | |||
| { | |||
| if ( trans == BlasNoTrans && *ldb < *rows ) info = 8; | |||
| if ( trans == BlasTrans && *ldb < *cols ) info = 8; | |||
| if ( trans == BlasNoTrans && *ldb < MAX(1,*rows) ) info = 8; | |||
| if ( trans == BlasTrans && *ldb < MAX(1,*cols) ) info = 8; | |||
| } | |||
| if ( order == BlasRowMajor) | |||
| { | |||
| if ( trans == BlasNoTrans && *ldb < *cols ) info = 8; | |||
| if ( trans == BlasTrans && *ldb < *rows ) info = 8; | |||
| if ( trans == BlasNoTrans && *ldb < MAX(1,*cols) ) info = 8; | |||
| if ( trans == BlasTrans && *ldb < MAX(1,*rows) ) info = 8; | |||
| } | |||
| if ( order == BlasColMajor && *lda < *rows ) info = 7; | |||
| if ( order == BlasRowMajor && *lda < *cols ) info = 7; | |||
| if ( *cols <= 0 ) info = 4; | |||
| if ( *rows <= 0 ) info = 3; | |||
| if ( trans < 0 ) info = 2; | |||
| if ( order < 0 ) info = 1; | |||
| if ( order == BlasColMajor && *lda < MAX(1,*rows) ) info = 7; | |||
| if ( order == BlasRowMajor && *lda < MAX(1,*cols) ) info = 7; | |||
| if ( *cols < 0 ) info = 4; | |||
| if ( *rows < 0 ) info = 3; | |||
| if ( trans < 0 ) info = 2; | |||
| if ( order < 0 ) info = 1; | |||
| if (info >= 0) { | |||
| BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); | |||
| return; | |||
| } | |||
| if ((*rows == 0) || (*cols == 0)) return; | |||
| #ifdef NEW_IMATCOPY | |||
| if ( *lda == *ldb ) { | |||
| if ( order == BlasColMajor ) | |||
| @@ -97,7 +97,7 @@ int NAME(blasint *N, FLOAT *a, blasint *LDA, blasint *K1, blasint *K2, blasint * | |||
| blas_level1_thread(mode, n, k1, k2, dummyalpha, | |||
| a, lda, NULL, 0, ipiv, incx, | |||
| (int(*)())laswp[flag], nthreads); | |||
| (int(*)(void))laswp[flag], nthreads); | |||
| } | |||
| #endif | |||
| @@ -96,7 +96,7 @@ int NAME(blasint *N, FLOAT *a, blasint *LDA, blasint *K1, blasint *K2, blasint * | |||
| mode = BLAS_SINGLE | BLAS_COMPLEX; | |||
| #endif | |||
| blas_level1_thread(mode, n, k1, k2, dummyalpha, a, lda, NULL, 0, ipiv, incx, (int(*)())laswp[flag], nthreads); | |||
| blas_level1_thread(mode, n, k1, k2, dummyalpha, a, lda, NULL, 0, ipiv, incx, (int(*)(void))laswp[flag], nthreads); | |||
| } | |||
| #endif | |||
| @@ -90,27 +90,29 @@ void CNAME(enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, | |||
| #endif | |||
| if ( order == BlasColMajor) | |||
| { | |||
| if ( trans == BlasNoTrans && *ldb < *rows ) info = 9; | |||
| if ( trans == BlasTrans && *ldb < *cols ) info = 9; | |||
| if ( trans == BlasNoTrans && *ldb < MAX(1,*rows) ) info = 9; | |||
| if ( trans == BlasTrans && *ldb < MAX(1,*cols) ) info = 9; | |||
| } | |||
| if ( order == BlasRowMajor) | |||
| { | |||
| if ( trans == BlasNoTrans && *ldb < *cols ) info = 9; | |||
| if ( trans == BlasTrans && *ldb < *rows ) info = 9; | |||
| if ( trans == BlasNoTrans && *ldb < MAX(1,*cols) ) info = 9; | |||
| if ( trans == BlasTrans && *ldb < MAX(1,*rows) ) info = 9; | |||
| } | |||
| if ( order == BlasColMajor && *lda < *rows ) info = 7; | |||
| if ( order == BlasRowMajor && *lda < *cols ) info = 7; | |||
| if ( *cols <= 0 ) info = 4; | |||
| if ( *rows <= 0 ) info = 3; | |||
| if ( trans < 0 ) info = 2; | |||
| if ( order < 0 ) info = 1; | |||
| if ( order == BlasColMajor && *lda < MAX(1,*rows) ) info = 7; | |||
| if ( order == BlasRowMajor && *lda < MAX(1,*cols) ) info = 7; | |||
| if ( *cols < 0 ) info = 4; | |||
| if ( *rows < 0 ) info = 3; | |||
| if ( trans < 0 ) info = 2; | |||
| if ( order < 0 ) info = 1; | |||
| if (info >= 0) { | |||
| BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); | |||
| return; | |||
| } | |||
| if ((*rows == 0) || (*cols == 0)) return; | |||
| if ( order == BlasColMajor ) | |||
| { | |||
| if ( trans == BlasNoTrans ) | |||
| @@ -66,13 +66,8 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ | |||
| c = da / r; | |||
| s = db / r; | |||
| z = ONE; | |||
| if (da != ZERO) { | |||
| if (ada > adb){ | |||
| z = s; | |||
| } else { | |||
| z = ONE / c; | |||
| } | |||
| } | |||
| if (ada > adb) z = s; | |||
| if ((ada <= adb) && (c != ZERO)) z = ONE / c; | |||
| *C = c; | |||
| *S = s; | |||
| @@ -101,31 +101,33 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, | |||
| if ( order == BlasColMajor) | |||
| { | |||
| if ( trans == BlasNoTrans && *ldb < *rows ) info = 9; | |||
| if ( trans == BlasConj && *ldb < *rows ) info = 9; | |||
| if ( trans == BlasTrans && *ldb < *cols ) info = 9; | |||
| if ( trans == BlasTransConj && *ldb < *cols ) info = 9; | |||
| if ( trans == BlasNoTrans && *ldb < MAX(1,*rows) ) info = 9; | |||
| if ( trans == BlasConj && *ldb < MAX(1,*rows) ) info = 9; | |||
| if ( trans == BlasTrans && *ldb < MAX(1,*cols) ) info = 9; | |||
| if ( trans == BlasTransConj && *ldb < MAX(1,*cols) ) info = 9; | |||
| } | |||
| if ( order == BlasRowMajor) | |||
| { | |||
| if ( trans == BlasNoTrans && *ldb < *cols ) info = 9; | |||
| if ( trans == BlasConj && *ldb < *cols ) info = 9; | |||
| if ( trans == BlasTrans && *ldb < *rows ) info = 9; | |||
| if ( trans == BlasTransConj && *ldb < *rows ) info = 9; | |||
| if ( trans == BlasNoTrans && *ldb < MAX(1,*cols) ) info = 9; | |||
| if ( trans == BlasConj && *ldb < MAX(1,*cols) ) info = 9; | |||
| if ( trans == BlasTrans && *ldb < MAX(1,*rows) ) info = 9; | |||
| if ( trans == BlasTransConj && *ldb < MAX(1,*rows) ) info = 9; | |||
| } | |||
| if ( order == BlasColMajor && *lda < *rows ) info = 7; | |||
| if ( order == BlasRowMajor && *lda < *cols ) info = 7; | |||
| if ( *cols <= 0 ) info = 4; | |||
| if ( *rows <= 0 ) info = 3; | |||
| if ( trans < 0 ) info = 2; | |||
| if ( order < 0 ) info = 1; | |||
| if ( order == BlasColMajor && *lda < MAX(1,*rows) ) info = 7; | |||
| if ( order == BlasRowMajor && *lda < MAX(1,*cols) ) info = 7; | |||
| if ( *cols < 0 ) info = 4; | |||
| if ( *rows < 0 ) info = 3; | |||
| if ( trans < 0 ) info = 2; | |||
| if ( order < 0 ) info = 1; | |||
| if (info >= 0) { | |||
| BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); | |||
| return; | |||
| } | |||
| if ((*rows == 0) || (*cols == 0)) return; | |||
| #ifdef NEW_IMATCOPY | |||
| if (*lda == *ldb ) { | |||
| if ( order == BlasColMajor ) | |||
| @@ -92,31 +92,33 @@ void CNAME(enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, | |||
| #endif | |||
| if ( order == BlasColMajor) | |||
| { | |||
| if ( trans == BlasNoTrans && *ldb < *rows ) info = 9; | |||
| if ( trans == BlasConj && *ldb < *rows ) info = 9; | |||
| if ( trans == BlasTrans && *ldb < *cols ) info = 9; | |||
| if ( trans == BlasTransConj && *ldb < *cols ) info = 9; | |||
| if ( trans == BlasNoTrans && *ldb < MAX(1,*rows) ) info = 9; | |||
| if ( trans == BlasConj && *ldb < MAX(1,*rows) ) info = 9; | |||
| if ( trans == BlasTrans && *ldb < MAX(1,*cols) ) info = 9; | |||
| if ( trans == BlasTransConj && *ldb < MAX(1,*cols) ) info = 9; | |||
| } | |||
| if ( order == BlasRowMajor) | |||
| { | |||
| if ( trans == BlasNoTrans && *ldb < *cols ) info = 9; | |||
| if ( trans == BlasConj && *ldb < *cols ) info = 9; | |||
| if ( trans == BlasTrans && *ldb < *rows ) info = 9; | |||
| if ( trans == BlasTransConj && *ldb < *rows ) info = 9; | |||
| if ( trans == BlasNoTrans && *ldb < MAX(1,*cols) ) info = 9; | |||
| if ( trans == BlasConj && *ldb < MAX(1,*cols) ) info = 9; | |||
| if ( trans == BlasTrans && *ldb < MAX(1,*rows) ) info = 9; | |||
| if ( trans == BlasTransConj && *ldb < MAX(1,*rows) ) info = 9; | |||
| } | |||
| if ( order == BlasColMajor && *lda < *rows ) info = 7; | |||
| if ( order == BlasRowMajor && *lda < *cols ) info = 7; | |||
| if ( *cols <= 0 ) info = 4; | |||
| if ( *rows <= 0 ) info = 3; | |||
| if ( trans < 0 ) info = 2; | |||
| if ( order < 0 ) info = 1; | |||
| if ( order == BlasColMajor && *lda < MAX(1,*rows) ) info = 7; | |||
| if ( order == BlasRowMajor && *lda < MAX(1,*cols) ) info = 7; | |||
| if ( *cols < 0 ) info = 4; | |||
| if ( *rows < 0 ) info = 3; | |||
| if ( trans < 0 ) info = 2; | |||
| if ( order < 0 ) info = 1; | |||
| if (info >= 0) { | |||
| BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); | |||
| return; | |||
| } | |||
| if ((*rows == 0) || (*cols == 0)) return; | |||
| if ( order == BlasColMajor ) | |||
| { | |||
| @@ -30,14 +30,12 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { | |||
| FLOAT db_r = *(DB+0); | |||
| FLOAT db_i = *(DB+1); | |||
| //long double r; | |||
| FLOAT *r, *S1=(FLOAT *)malloc(2*sizeof(FLOAT)); | |||
| FLOAT *R=(FLOAT *)malloc(2*sizeof(FLOAT)); | |||
| FLOAT S1[2]; | |||
| FLOAT R[2]; | |||
| long double d; | |||
| FLOAT ada = da_r * da_r + da_i * da_i; | |||
| FLOAT adb = db_r * db_r + db_i * db_i; | |||
| FLOAT adart = sqrt( da_r * da_r + da_i * da_i); | |||
| FLOAT adbrt = sqrt( db_r * db_r + db_i * db_i); | |||
| PRINT_DEBUG_NAME; | |||
| @@ -61,16 +59,16 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { | |||
| *(S1 + 0) = *(DB + 0); | |||
| *(S1 + 1) = *(DB + 1) *-1; | |||
| if (da_r == ZERO && da_i == ZERO) { | |||
| *C = ZERO; | |||
| *C = ZERO; | |||
| if (db_r == ZERO) { | |||
| (*DA) = fabsl(db_i); | |||
| *S = *S1 /da_r; | |||
| *(S+1) = *(S1+1) /da_r; | |||
| *S = *S1 /(*DA); | |||
| *(S+1) = *(S1+1) /(*DA); | |||
| return; | |||
| } else if ( db_i == ZERO) { | |||
| *DA = fabsl(db_r); | |||
| *S = *S1 /da_r; | |||
| *(S+1) = *(S1+1) /da_r; | |||
| *S = *S1 /(*DA); | |||
| *(S+1) = *(S1+1) /(*DA); | |||
| return; | |||
| } else { | |||
| long double g1 = MAX( fabsl(db_r), fabsl(db_i)); | |||
| @@ -115,10 +113,13 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { | |||
| } | |||
| } else { | |||
| *C = ada / adahsq; | |||
| if (*C >= safmin) | |||
| if (*C >= safmin) { | |||
| *R = *DA / *C; | |||
| else | |||
| *(R+1) = *(DA+1) / *(C+1); | |||
| } else { | |||
| *R = *DA * (h / adahsq); | |||
| *(R+1) = *(DA+1) * (h / adahsq); | |||
| } | |||
| *S = *S1 * ada / adahsq; | |||
| *(S+1) = *(S1+1) * ada / adahsq; | |||
| } | |||
| @@ -178,4 +179,4 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { | |||
| } | |||
| } | |||
| } | |||
| @@ -5,12 +5,6 @@ endif | |||
| TOPDIR = .. | |||
| include $(TOPDIR)/Makefile.system | |||
| ifeq ($(ARCH), power) | |||
| ifeq ($(C_COMPILER), CLANG) | |||
| override CFLAGS += -fno-integrated-as | |||
| endif | |||
| endif | |||
| AVX2OPT = | |||
| ifeq ($(C_COMPILER), GCC) | |||
| # AVX2 support was added in 4.7.0 | |||
| @@ -61,6 +61,15 @@ ifeq ($(CORE), ZEN) | |||
| USE_TRMM = 1 | |||
| endif | |||
| ifeq ($(OS), AIX) | |||
| M4VERSION := $(shell m4 --version < /dev/null 2>&1 | grep GNU 2>&1 >/dev/null ; echo $$?) | |||
| ifeq ($(M4VERSION), 0) | |||
| M4_AIX := m4 -l16384 | |||
| else | |||
| M4_AIX := m4 -B16384 | |||
| endif | |||
| $(info $$var is [${$(M4_AIX)}]) | |||
| endif | |||
| ifeq ($(CORE), POWER8) | |||
| ifeq ($(BINARY64),1) | |||
| USE_TRMM = 1 | |||
| @@ -173,7 +182,7 @@ ifeq ($(BUILD_BFLOAT16),1) | |||
| SBBLASOBJS += sbgemm_beta$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" | |||
| ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" "" | |||
| SBLASOBJS += \ | |||
| sgemm_beta$(TSUFFIX).$(SUFFIX) \ | |||
| strmm_kernel_LN$(TSUFFIX).$(SUFFIX) strmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | |||
| @@ -182,7 +191,7 @@ SBLASOBJS += \ | |||
| strsm_kernel_RN$(TSUFFIX).$(SUFFIX) strsm_kernel_RT$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" "" | |||
| DBLASOBJS += \ | |||
| dgemm_beta$(TSUFFIX).$(SUFFIX) \ | |||
| dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | |||
| @@ -198,7 +207,7 @@ QBLASOBJS += \ | |||
| qtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) qtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | |||
| qtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) qtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" | |||
| CBLASOBJS += \ | |||
| ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | |||
| ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) \ | |||
| @@ -246,7 +255,7 @@ XBLASOBJS += xgemm3m_kernel$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" "" | |||
| SBLASOBJS += \ | |||
| strmm_iunucopy$(TSUFFIX).$(SUFFIX) strmm_iunncopy$(TSUFFIX).$(SUFFIX) \ | |||
| strmm_ilnucopy$(TSUFFIX).$(SUFFIX) strmm_ilnncopy$(TSUFFIX).$(SUFFIX) \ | |||
| @@ -255,10 +264,7 @@ SBLASOBJS += \ | |||
| strmm_ounucopy$(TSUFFIX).$(SUFFIX) strmm_ounncopy$(TSUFFIX).$(SUFFIX) \ | |||
| strmm_olnucopy$(TSUFFIX).$(SUFFIX) strmm_olnncopy$(TSUFFIX).$(SUFFIX) \ | |||
| strmm_outucopy$(TSUFFIX).$(SUFFIX) strmm_outncopy$(TSUFFIX).$(SUFFIX) \ | |||
| strmm_oltucopy$(TSUFFIX).$(SUFFIX) strmm_oltncopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" | |||
| SBLASOBJS += \ | |||
| strmm_oltucopy$(TSUFFIX).$(SUFFIX) strmm_oltncopy$(TSUFFIX).$(SUFFIX) \ | |||
| strsm_iunucopy$(TSUFFIX).$(SUFFIX) strsm_iunncopy$(TSUFFIX).$(SUFFIX) \ | |||
| strsm_ilnucopy$(TSUFFIX).$(SUFFIX) strsm_ilnncopy$(TSUFFIX).$(SUFFIX) \ | |||
| strsm_iutucopy$(TSUFFIX).$(SUFFIX) strsm_iutncopy$(TSUFFIX).$(SUFFIX) \ | |||
| @@ -266,10 +272,7 @@ SBLASOBJS += \ | |||
| strsm_ounucopy$(TSUFFIX).$(SUFFIX) strsm_ounncopy$(TSUFFIX).$(SUFFIX) \ | |||
| strsm_olnucopy$(TSUFFIX).$(SUFFIX) strsm_olnncopy$(TSUFFIX).$(SUFFIX) \ | |||
| strsm_outucopy$(TSUFFIX).$(SUFFIX) strsm_outncopy$(TSUFFIX).$(SUFFIX) \ | |||
| strsm_oltucopy$(TSUFFIX).$(SUFFIX) strsm_oltncopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| SBLASOBJS += \ | |||
| strsm_oltucopy$(TSUFFIX).$(SUFFIX) strsm_oltncopy$(TSUFFIX).$(SUFFIX) \ | |||
| ssymm_iutcopy$(TSUFFIX).$(SUFFIX) ssymm_iltcopy$(TSUFFIX).$(SUFFIX) \ | |||
| ssymm_outcopy$(TSUFFIX).$(SUFFIX) ssymm_oltcopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| @@ -391,7 +394,7 @@ XBLASOBJS += \ | |||
| ifeq ($(USE_GEMM3M), 1) | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" | |||
| CBLASOBJS += \ | |||
| cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \ | |||
| cgemm3m_incopyr$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) \ | |||
| @@ -634,15 +637,7 @@ $(KDIR)$(SBGEMMONCOPYOBJ) : $(KERNELDIR)/$(SBGEMMONCOPY) | |||
| $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||
| $(KDIR)$(SBGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SBGEMMOTCOPY) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemmotcopy.s | |||
| m4 sbgemmotcopy.s > sbgemmotcopy_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemmotcopy_nomacros.s -o $@ | |||
| rm sbgemmotcopy.s sbgemmotcopy_nomacros.s | |||
| else | |||
| $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||
| endif | |||
| ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N)) | |||
| @@ -650,14 +645,7 @@ $(KDIR)$(SBGEMMINCOPYOBJ) : $(KERNELDIR)/$(SBGEMMINCOPY) | |||
| $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||
| $(KDIR)$(SBGEMMITCOPYOBJ) : $(KERNELDIR)/$(SBGEMMITCOPY) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemmitcopy.s | |||
| m4 sbgemmitcopy.s > sbgemmitcopy_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemmitcopy_nomacros.s -o $@ | |||
| rm sbgemmitcopy.s sbgemmitcopy_nomacros.s | |||
| else | |||
| $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||
| endif | |||
| endif | |||
| endif | |||
| @@ -668,7 +656,7 @@ $(KDIR)$(SGEMMONCOPYOBJ) : $(KERNELDIR)/$(SGEMMONCOPY) | |||
| $(KDIR)$(SGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SGEMMOTCOPY) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -UDOUBLE -UCOMPLEX $< -o - > sgemmotcopy.s | |||
| m4 sgemmotcopy.s > sgemmotcopy_nomacros.s | |||
| $(M4_AIX) sgemmotcopy.s > sgemmotcopy_nomacros.s | |||
| $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemmotcopy_nomacros.s -o $@ | |||
| rm sgemmotcopy.s sgemmotcopy_nomacros.s | |||
| else | |||
| @@ -684,7 +672,7 @@ $(KDIR)$(SGEMMINCOPYOBJ) : $(KERNELDIR)/$(SGEMMINCOPY) | |||
| $(KDIR)$(SGEMMITCOPYOBJ) : $(KERNELDIR)/$(SGEMMITCOPY) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -UDOUBLE -UCOMPLEX $< -o - > sgemmitcopy.s | |||
| m4 sgemmitcopy.s > sgemmitcopy_nomacros.s | |||
| $(M4_AIX) sgemmitcopy.s > sgemmitcopy_nomacros.s | |||
| $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemmitcopy_nomacros.s -o $@ | |||
| rm sgemmitcopy.s sgemmitcopy_nomacros.s | |||
| else | |||
| @@ -696,7 +684,7 @@ endif | |||
| $(KDIR)$(DGEMMONCOPYOBJ) : $(KERNELDIR)/$(DGEMMONCOPY) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > dgemm_ncopy.s | |||
| m4 dgemm_ncopy.s > dgemm_ncopy_nomacros.s | |||
| $(M4_AIX) dgemm_ncopy.s > dgemm_ncopy_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_ncopy_nomacros.s -o $@ | |||
| rm dgemm_ncopy.s dgemm_ncopy_nomacros.s | |||
| else | |||
| @@ -714,7 +702,7 @@ $(KDIR)$(DGEMMINCOPYOBJ) : $(KERNELDIR)/$(DGEMMINCOPY) | |||
| $(KDIR)$(DGEMMITCOPYOBJ) : $(KERNELDIR)/$(DGEMMITCOPY) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > dgemm_itcopy.s | |||
| m4 dgemm_itcopy.s > dgemm_itcopy_nomacros.s | |||
| $(M4_AIX) dgemm_itcopy.s > dgemm_itcopy_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_itcopy_nomacros.s -o $@ | |||
| rm dgemm_itcopy.s dgemm_itcopy_nomacros.s | |||
| else | |||
| @@ -757,7 +745,7 @@ $(KDIR)$(CGEMMINCOPYOBJ) : $(KERNELDIR)/$(CGEMMINCOPY) | |||
| $(KDIR)$(CGEMMITCOPYOBJ) : $(KERNELDIR)/$(CGEMMITCOPY) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -UDOUBLE -UCOMPLEX -S $< -o - > cgemm_itcopy.s | |||
| m4 cgemm_itcopy.s > cgemm_itcopy_nomacros.s | |||
| $(M4_AIX) cgemm_itcopy.s > cgemm_itcopy_nomacros.s | |||
| $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX cgemm_itcopy_nomacros.s -o $@ | |||
| rm cgemm_itcopy.s cgemm_itcopy_nomacros.s | |||
| else | |||
| @@ -780,7 +768,7 @@ $(KDIR)$(ZGEMMINCOPYOBJ) : $(KERNELDIR)/$(ZGEMMINCOPY) | |||
| $(KDIR)$(ZGEMMITCOPYOBJ) : $(KERNELDIR)/$(ZGEMMITCOPY) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > zgemm_itcopy.s | |||
| m4 zgemm_itcopy.s > zgemm_itcopy_nomacros.s | |||
| $(M4_AIX) zgemm_itcopy.s > zgemm_itcopy_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX zgemm_itcopy_nomacros.s -o $@ | |||
| rm zgemm_itcopy.s zgemm_itcopy_nomacros.s | |||
| else | |||
| @@ -812,7 +800,7 @@ endif | |||
| $(KDIR)sgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(SGEMMDEPEND) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -UDOUBLE -UCOMPLEX $< -o - > sgemm_kernel$(TSUFFIX).s | |||
| m4 sgemm_kernel$(TSUFFIX).s > sgemm_kernel$(TSUFFIX)_nomacros.s | |||
| $(M4_AIX) sgemm_kernel$(TSUFFIX).s > sgemm_kernel$(TSUFFIX)_nomacros.s | |||
| $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemm_kernel$(TSUFFIX)_nomacros.s -o $@ | |||
| rm sgemm_kernel$(TSUFFIX).s sgemm_kernel$(TSUFFIX)_nomacros.s | |||
| else | |||
| @@ -829,20 +817,13 @@ endif | |||
| ifeq ($(BUILD_BFLOAT16), 1) | |||
| $(KDIR)sbgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMMKERNEL) $(SBGEMMDEPEND) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemm_kernel$(TSUFFIX).s | |||
| m4 sbgemm_kernel$(TSUFFIX).s > sbgemm_kernel$(TSUFFIX)_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemm_kernel$(TSUFFIX)_nomacros.s -o $@ | |||
| rm sbgemm_kernel$(TSUFFIX).s sbgemm_kernel$(TSUFFIX)_nomacros.s | |||
| else | |||
| $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||
| endif | |||
| endif | |||
| $(KDIR)dgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) $(DGEMMDEPEND) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > dgemm_kernel$(TSUFFIX).s | |||
| m4 dgemm_kernel$(TSUFFIX).s > dgemm_kernel$(TSUFFIX)_nomacros.s | |||
| $(M4_AIX) dgemm_kernel$(TSUFFIX).s > dgemm_kernel$(TSUFFIX)_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_kernel$(TSUFFIX)_nomacros.s -o $@ | |||
| rm dgemm_kernel$(TSUFFIX).s dgemm_kernel$(TSUFFIX)_nomacros.s | |||
| else | |||
| @@ -855,7 +836,7 @@ $(KDIR)qgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) $(QGEMMDEP | |||
| $(KDIR)cgemm_kernel_n$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DNN $< -o - > cgemm_kernel_n.s | |||
| m4 cgemm_kernel_n.s > cgemm_kernel_n_nomacros.s | |||
| $(M4_AIX) cgemm_kernel_n.s > cgemm_kernel_n_nomacros.s | |||
| $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNN cgemm_kernel_n_nomacros.s -o $@ | |||
| rm cgemm_kernel_n.s cgemm_kernel_n_nomacros.s | |||
| else | |||
| @@ -865,7 +846,7 @@ endif | |||
| $(KDIR)cgemm_kernel_l$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DCN $< -o - > cgemm_kernel_l.s | |||
| m4 cgemm_kernel_l.s > cgemm_kernel_l_nomacros.s | |||
| $(M4_AIX) cgemm_kernel_l.s > cgemm_kernel_l_nomacros.s | |||
| $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCN cgemm_kernel_l_nomacros.s -o $@ | |||
| rm cgemm_kernel_l.s cgemm_kernel_l_nomacros.s | |||
| else | |||
| @@ -875,7 +856,7 @@ endif | |||
| $(KDIR)cgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DNC $< -o - > cgemm_kernel_r.s | |||
| m4 cgemm_kernel_r.s > cgemm_kernel_r_nomacros.s | |||
| $(M4_AIX) cgemm_kernel_r.s > cgemm_kernel_r_nomacros.s | |||
| $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNC cgemm_kernel_r_nomacros.s -o $@ | |||
| rm cgemm_kernel_r.s cgemm_kernel_r_nomacros.s | |||
| else | |||
| @@ -885,7 +866,7 @@ endif | |||
| $(KDIR)cgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DCC $< -o - > cgemm_kernel_b.s | |||
| m4 cgemm_kernel_b.s > cgemm_kernel_b_nomacros.s | |||
| $(M4_AIX) cgemm_kernel_b.s > cgemm_kernel_b_nomacros.s | |||
| $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCC cgemm_kernel_b_nomacros.s -o $@ | |||
| rm cgemm_kernel_b.s cgemm_kernel_b_nomacros.s | |||
| else | |||
| @@ -895,7 +876,7 @@ endif | |||
| $(KDIR)zgemm_kernel_n$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DNN $< -o - > zgemm_kernel_n.s | |||
| m4 zgemm_kernel_n.s > zgemm_kernel_n_nomacros.s | |||
| $(M4_AIX) zgemm_kernel_n.s > zgemm_kernel_n_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNN zgemm_kernel_n_nomacros.s -o $@ | |||
| rm zgemm_kernel_n.s zgemm_kernel_n_nomacros.s | |||
| else ifeq ($(CORE),SANDYBRIDGE) | |||
| @@ -907,7 +888,7 @@ endif | |||
| $(KDIR)zgemm_kernel_l$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DCN $< -o - > zgemm_kernel_l.s | |||
| m4 zgemm_kernel_l.s > zgemm_kernel_l_nomacros.s | |||
| $(M4_AIX) zgemm_kernel_l.s > zgemm_kernel_l_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCN zgemm_kernel_l_nomacros.s -o $@ | |||
| rm zgemm_kernel_l.s zgemm_kernel_l_nomacros.s | |||
| else ifeq ($(CORE),SANDYBRIDGE) | |||
| @@ -919,7 +900,7 @@ endif | |||
| $(KDIR)zgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DNC $< -o - > zgemm_kernel_r.s | |||
| m4 zgemm_kernel_r.s > zgemm_kernel_r_nomacros.s | |||
| $(M4_AIX) zgemm_kernel_r.s > zgemm_kernel_r_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNC zgemm_kernel_r_nomacros.s -o $@ | |||
| rm zgemm_kernel_r.s zgemm_kernel_r_nomacros.s | |||
| else ifeq ($(CORE),SANDYBRIDGE) | |||
| @@ -931,7 +912,7 @@ endif | |||
| $(KDIR)zgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DCC $< -o - > zgemm_kernel_b.s | |||
| m4 zgemm_kernel_b.s > zgemm_kernel_b_nomacros.s | |||
| $(M4_AIX) zgemm_kernel_b.s > zgemm_kernel_b_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCC zgemm_kernel_b_nomacros.s -o $@ | |||
| rm zgemm_kernel_b.s zgemm_kernel_b_nomacros.s | |||
| else ifeq ($(CORE),SANDYBRIDGE) | |||
| @@ -957,7 +938,7 @@ ifdef USE_TRMM | |||
| $(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o - > strmmkernel_ln.s | |||
| m4 strmmkernel_ln.s > strmmkernel_ln_nomacros.s | |||
| $(M4_AIX) strmmkernel_ln.s > strmmkernel_ln_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA strmmkernel_ln_nomacros.s -o $@ | |||
| rm strmmkernel_ln.s strmmkernel_ln_nomacros.s | |||
| else | |||
| @@ -967,7 +948,7 @@ endif | |||
| $(KDIR)strmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o - > strmmkernel_lt.s | |||
| m4 strmmkernel_lt.s > strmmkernel_lt_nomacros.s | |||
| $(M4_AIX) strmmkernel_lt.s > strmmkernel_lt_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA strmmkernel_lt_nomacros.s -o $@ | |||
| rm strmmkernel_lt.s strmmkernel_lt_nomacros.s | |||
| else | |||
| @@ -977,7 +958,7 @@ endif | |||
| $(KDIR)strmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o - > strmmkernel_rn.s | |||
| m4 strmmkernel_rn.s > strmmkernel_rn_nomacros.s | |||
| $(M4_AIX) strmmkernel_rn.s > strmmkernel_rn_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA strmmkernel_rn_nomacros.s -o $@ | |||
| rm strmmkernel_rn.s strmmkernel_rn_nomacros.s | |||
| else | |||
| @@ -987,7 +968,7 @@ endif | |||
| $(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s | |||
| m4 strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s | |||
| $(M4_AIX) strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@ | |||
| rm strmm_kernel_rt.s strmm_kernel_rt_nomacros.s | |||
| else | |||
| @@ -997,7 +978,7 @@ endif | |||
| $(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o - > dtrmm_kernel_ln.s | |||
| m4 dtrmm_kernel_ln.s > dtrmm_kernel_ln_nomacros.s | |||
| $(M4_AIX) dtrmm_kernel_ln.s > dtrmm_kernel_ln_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA dtrmm_kernel_ln_nomacros.s -o $@ | |||
| rm dtrmm_kernel_ln.s dtrmm_kernel_ln_nomacros.s | |||
| else | |||
| @@ -1007,7 +988,7 @@ endif | |||
| $(KDIR)dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o - > dtrmm_kernel_lt.s | |||
| m4 dtrmm_kernel_lt.s > dtrmm_kernel_lt_nomacros.s | |||
| $(M4_AIX) dtrmm_kernel_lt.s > dtrmm_kernel_lt_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA dtrmm_kernel_lt_nomacros.s -o $@ | |||
| rm dtrmm_kernel_lt.s dtrmm_kernel_lt_nomacros.s | |||
| else | |||
| @@ -1017,7 +998,7 @@ endif | |||
| $(KDIR)dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o - > dtrmm_kernel_rn.s | |||
| m4 dtrmm_kernel_rn.s > dtrmm_kernel_rn_nomacros.s | |||
| $(M4_AIX) dtrmm_kernel_rn.s > dtrmm_kernel_rn_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA dtrmm_kernel_rn_nomacros.s -o $@ | |||
| rm dtrmm_kernel_rn.s dtrmm_kernel_rn_nomacros.s | |||
| else | |||
| @@ -1027,7 +1008,7 @@ endif | |||
| $(KDIR)dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > dtrmm_kernel_rt.s | |||
| m4 dtrmm_kernel_rt.s > dtrmm_kernel_rt_nomacros.s | |||
| $(M4_AIX) dtrmm_kernel_rt.s > dtrmm_kernel_rt_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA dtrmm_kernel_rt_nomacros.s -o $@ | |||
| rm dtrmm_kernel_rt.s dtrmm_kernel_rt_nomacros.s | |||
| else | |||
| @@ -1049,7 +1030,7 @@ $(KDIR)qtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) | |||
| $(KDIR)ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_ln.s | |||
| m4 ctrmm_kernel_ln.s > ctrmm_kernel_ln_nomacros.s | |||
| $(M4_AIX) ctrmm_kernel_ln.s > ctrmm_kernel_ln_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN ctrmm_kernel_ln_nomacros.s -o $@ | |||
| rm ctrmm_kernel_ln.s ctrmm_kernel_ln_nomacros.s | |||
| else | |||
| @@ -1059,7 +1040,7 @@ endif | |||
| $(KDIR)ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_lt.s | |||
| m4 ctrmm_kernel_lt.s > ctrmm_kernel_lt_nomacros.s | |||
| $(M4_AIX) ctrmm_kernel_lt.s > ctrmm_kernel_lt_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN ctrmm_kernel_lt_nomacros.s -o $@ | |||
| rm ctrmm_kernel_lt.s ctrmm_kernel_lt_nomacros.s | |||
| else | |||
| @@ -1069,7 +1050,7 @@ endif | |||
| $(KDIR)ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o - > ctrmm_kernel_lr.s | |||
| m4 ctrmm_kernel_lr.s > ctrmm_kernel_lr_nomacros.s | |||
| $(M4_AIX) ctrmm_kernel_lr.s > ctrmm_kernel_lr_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN ctrmm_kernel_lr_nomacros.s -o $@ | |||
| rm ctrmm_kernel_lr.s ctrmm_kernel_lr_nomacros.s | |||
| else | |||
| @@ -1079,7 +1060,7 @@ endif | |||
| $(KDIR)ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o - > ctrmm_kernel_lc.s | |||
| m4 ctrmm_kernel_lc.s > ctrmm_kernel_lc_nomacros.s | |||
| $(M4_AIX) ctrmm_kernel_lc.s > ctrmm_kernel_lc_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN ctrmm_kernel_lc_nomacros.s -o $@ | |||
| rm ctrmm_kernel_lc_nomacros.s ctrmm_kernel_lc.s | |||
| else | |||
| @@ -1089,7 +1070,7 @@ endif | |||
| $(KDIR)ctrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_rn.s | |||
| m4 ctrmm_kernel_rn.s > ctrmm_kernel_rn_nomacros.s | |||
| $(M4_AIX) ctrmm_kernel_rn.s > ctrmm_kernel_rn_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN ctrmm_kernel_rn_nomacros.s -o $@ | |||
| rm ctrmm_kernel_rn.s ctrmm_kernel_rn_nomacros.s | |||
| else | |||
| @@ -1099,7 +1080,7 @@ endif | |||
| $(KDIR)ctrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_rt.s | |||
| m4 ctrmm_kernel_rt.s > ctrmm_kernel_rt_nomacros.s | |||
| $(M4_AIX) ctrmm_kernel_rt.s > ctrmm_kernel_rt_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN ctrmm_kernel_rt_nomacros.s -o $@ | |||
| rm ctrmm_kernel_rt.s ctrmm_kernel_rt_nomacros.s | |||
| else | |||
| @@ -1109,7 +1090,7 @@ endif | |||
| $(KDIR)ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o - > ctrmm_kernel_rr.s | |||
| m4 ctrmm_kernel_rr.s > ctrmm_kernel_rr_nomacros.s | |||
| $(M4_AIX) ctrmm_kernel_rr.s > ctrmm_kernel_rr_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC ctrmm_kernel_rr_nomacros.s -o $@ | |||
| rm ctrmm_kernel_rr.s ctrmm_kernel_rr_nomacros.s | |||
| else | |||
| @@ -1119,7 +1100,7 @@ endif | |||
| $(KDIR)ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o - > ctrmm_kernel_RC.s | |||
| m4 ctrmm_kernel_RC.s > ctrmm_kernel_RC_nomacros.s | |||
| $(M4_AIX) ctrmm_kernel_RC.s > ctrmm_kernel_RC_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC ctrmm_kernel_RC_nomacros.s -o $@ | |||
| rm ctrmm_kernel_RC.s ctrmm_kernel_RC_nomacros.s | |||
| else | |||
| @@ -1129,7 +1110,7 @@ endif | |||
| $(KDIR)ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_ln.s | |||
| m4 ztrmm_kernel_ln.s > ztrmm_kernel_ln_nomacros.s | |||
| $(M4_AIX) ztrmm_kernel_ln.s > ztrmm_kernel_ln_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_ln_nomacros.s -o $@ | |||
| rm ztrmm_kernel_ln.s ztrmm_kernel_ln_nomacros.s | |||
| else ifeq ($(CORE), SANDYBRIDGE) | |||
| @@ -1141,7 +1122,7 @@ endif | |||
| $(KDIR)ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_lt.s | |||
| m4 ztrmm_kernel_lt.s > ztrmm_kernel_lt_nomacros.s | |||
| $(M4_AIX) ztrmm_kernel_lt.s > ztrmm_kernel_lt_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_lt_nomacros.s -o $@ | |||
| rm ztrmm_kernel_lt.s ztrmm_kernel_lt_nomacros.s | |||
| else ifeq ($(CORE), SANDYBRIDGE) | |||
| @@ -1153,7 +1134,7 @@ endif | |||
| $(KDIR)ztrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o - > ztrmm_kernel_lr.s | |||
| m4 ztrmm_kernel_lr.s > ztrmm_kernel_lr_nomacros.s | |||
| $(M4_AIX) ztrmm_kernel_lr.s > ztrmm_kernel_lr_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN ztrmm_kernel_lr_nomacros.s -o $@ | |||
| rm ztrmm_kernel_lr.s ztrmm_kernel_lr_nomacros.s | |||
| else ifeq ($(CORE), SANDYBRIDGE) | |||
| @@ -1165,7 +1146,7 @@ endif | |||
| $(KDIR)ztrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o - > ztrmm_kernel_lc.s | |||
| m4 ztrmm_kernel_lc.s >ztrmm_kernel_lc_nomacros.s | |||
| $(M4_AIX) ztrmm_kernel_lc.s >ztrmm_kernel_lc_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN ztrmm_kernel_lc_nomacros.s -o $@ | |||
| rm ztrmm_kernel_lc.s ztrmm_kernel_lc_nomacros.s | |||
| else ifeq ($(CORE), SANDYBRIDGE) | |||
| @@ -1177,7 +1158,7 @@ endif | |||
| $(KDIR)ztrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_rn.s | |||
| m4 ztrmm_kernel_rn.s > ztrmm_kernel_rn_nomacros.s | |||
| $(M4_AIX) ztrmm_kernel_rn.s > ztrmm_kernel_rn_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_rn_nomacros.s -o $@ | |||
| rm ztrmm_kernel_rn.s ztrmm_kernel_rn_nomacros.s | |||
| else ifeq ($(CORE), SANDYBRIDGE) | |||
| @@ -1189,7 +1170,7 @@ endif | |||
| $(KDIR)ztrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_rt.s | |||
| m4 ztrmm_kernel_rt.s > ztrmm_kernel_rt_nomacros.s | |||
| $(M4_AIX) ztrmm_kernel_rt.s > ztrmm_kernel_rt_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_rt_nomacros.s -o $@ | |||
| rm ztrmm_kernel_rt.s ztrmm_kernel_rt_nomacros.s | |||
| else ifeq ($(CORE), SANDYBRIDGE) | |||
| @@ -1201,7 +1182,7 @@ endif | |||
| $(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o - > ztrmm_kernel_rr.s | |||
| m4 ztrmm_kernel_rr.s > ztrmm_kernel_rr_nomacros.s | |||
| $(M4_AIX) ztrmm_kernel_rr.s > ztrmm_kernel_rr_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC ztrmm_kernel_rr_nomacros.s -o $@ | |||
| rm ztrmm_kernel_rr.s ztrmm_kernel_rr_nomacros.s | |||
| else ifeq ($(CORE), SANDYBRIDGE) | |||
| @@ -1213,7 +1194,7 @@ endif | |||
| $(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o - > ztrmm_kernel_rc.s | |||
| m4 ztrmm_kernel_rc.s > ztrmm_kernel_rc_nomacros.s | |||
| $(M4_AIX) ztrmm_kernel_rc.s > ztrmm_kernel_rc_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC ztrmm_kernel_rc_nomacros.s -o $@ | |||
| rm ztrmm_kernel_rc.s ztrmm_kernel_rc_nomacros.s | |||
| else ifeq ($(CORE), SANDYBRIDGE) | |||
| @@ -1235,7 +1216,7 @@ $(KDIR)strmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) | |||
| $(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s | |||
| m4 strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s | |||
| $(M4_AIX) strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@ | |||
| rm strmm_kernel_rt.s strmm_kernel_rt_nomacros.s | |||
| else | |||
| @@ -1395,7 +1376,7 @@ $(KDIR)dtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRSMKERNEL_LN) $(DT | |||
| $(KDIR)dtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRSMKERNEL_LT) $(DTRSMDEPEND) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ $< -o - > dtrsm_kernel_lt.s | |||
| m4 dtrsm_kernel_lt.s > dtrsm_kernel_lt_nomacros.s | |||
| $(M4_AIX) dtrsm_kernel_lt.s > dtrsm_kernel_lt_nomacros.s | |||
| $(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ dtrsm_kernel_lt_nomacros.s -o $@ | |||
| rm dtrsm_kernel_lt.s dtrsm_kernel_lt_nomacros.s | |||
| else | |||
| @@ -2987,7 +2968,7 @@ $(KDIR)cgemm_kernel_l$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMM | |||
| $(KDIR)cgemm_kernel_r$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(PFLAGS) -S -UDOUBLE -DCOMPLEX -DNC $< -o - > cgemm_kernel_r.s | |||
| m4 cgemm_kernel_r.s > cgemm_kernel_r_nomacros.s | |||
| $(M4_AIX) cgemm_kernel_r.s > cgemm_kernel_r_nomacros.s | |||
| $(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DNC cgemm_kernel_r_nomacros.s -o $@ | |||
| rm cgemm_kernel_r.s cgemm_kernel_r_nomacros.s | |||
| else | |||
| @@ -3033,7 +3014,7 @@ $(KDIR)strmm_kernel_RN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) | |||
| $(KDIR)strmm_kernel_RT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s | |||
| m4 strmmkernel_rn.s > strmm_kernel_rt_nomacros.s | |||
| $(M4_AIX) strmmkernel_rn.s > strmm_kernel_rt_nomacros.s | |||
| $(CC) $(PFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@ | |||
| rm strmm_kernel_rt.s strmm_kernel_rt_nomacros.s | |||
| else | |||
| @@ -57,7 +57,7 @@ CAMAXKERNEL = zamax.S | |||
| ZAMAXKERNEL = zamax.S | |||
| SAXPYKERNEL = axpy.S | |||
| DAXPYKERNEL = axpy.S | |||
| DAXPYKERNEL = daxpy_thunderx2t99.S | |||
| CAXPYKERNEL = zaxpy.S | |||
| ZAXPYKERNEL = zaxpy.S | |||
| @@ -81,45 +81,35 @@ DGEMVTKERNEL = gemv_t.S | |||
| CGEMVTKERNEL = zgemv_t.S | |||
| ZGEMVTKERNEL = zgemv_t.S | |||
| SASUMKERNEL = asum.S | |||
| DASUMKERNEL = asum.S | |||
| CASUMKERNEL = casum.S | |||
| ZASUMKERNEL = zasum.S | |||
| SCOPYKERNEL = copy.S | |||
| DCOPYKERNEL = copy.S | |||
| CCOPYKERNEL = copy.S | |||
| ZCOPYKERNEL = copy.S | |||
| SSWAPKERNEL = swap.S | |||
| DSWAPKERNEL = swap.S | |||
| CSWAPKERNEL = swap.S | |||
| ZSWAPKERNEL = swap.S | |||
| ISAMAXKERNEL = iamax.S | |||
| IDAMAXKERNEL = iamax.S | |||
| ICAMAXKERNEL = izamax.S | |||
| IZAMAXKERNEL = izamax.S | |||
| SNRM2KERNEL = nrm2.S | |||
| DNRM2KERNEL = nrm2.S | |||
| CNRM2KERNEL = znrm2.S | |||
| ZNRM2KERNEL = znrm2.S | |||
| DDOTKERNEL = dot.S | |||
| ifneq ($(C_COMPILER), PGI) | |||
| SDOTKERNEL = ../generic/dot.c | |||
| else | |||
| SDOTKERNEL = dot.S | |||
| endif | |||
| ifneq ($(C_COMPILER), PGI) | |||
| CDOTKERNEL = zdot.S | |||
| ZDOTKERNEL = zdot.S | |||
| else | |||
| CDOTKERNEL = ../arm/zdot.c | |||
| ZDOTKERNEL = ../arm/zdot.c | |||
| endif | |||
| SASUMKERNEL = sasum_thunderx2t99.c | |||
| DASUMKERNEL = dasum_thunderx2t99.c | |||
| CASUMKERNEL = casum_thunderx2t99.c | |||
| ZASUMKERNEL = zasum_thunderx2t99.c | |||
| SCOPYKERNEL = copy_thunderx2t99.c | |||
| DCOPYKERNEL = copy_thunderx2t99.c | |||
| CCOPYKERNEL = copy_thunderx2t99.c | |||
| ZCOPYKERNEL = copy_thunderx2t99.c | |||
| SSWAPKERNEL = swap_thunderx2t99.S | |||
| DSWAPKERNEL = swap_thunderx2t99.S | |||
| CSWAPKERNEL = swap_thunderx2t99.S | |||
| ZSWAPKERNEL = swap_thunderx2t99.S | |||
| ISAMAXKERNEL = iamax_thunderx2t99.c | |||
| IDAMAXKERNEL = iamax_thunderx2t99.c | |||
| ICAMAXKERNEL = izamax_thunderx2t99.c | |||
| IZAMAXKERNEL = izamax_thunderx2t99.c | |||
| SNRM2KERNEL = scnrm2_thunderx2t99.c | |||
| DNRM2KERNEL = dznrm2_thunderx2t99.c | |||
| CNRM2KERNEL = scnrm2_thunderx2t99.c | |||
| ZNRM2KERNEL = dznrm2_thunderx2t99.c | |||
| DDOTKERNEL = dot.c | |||
| SDOTKERNEL = dot.c | |||
| CDOTKERNEL = zdot_thunderx2t99.c | |||
| ZDOTKERNEL = zdot_thunderx2t99.c | |||
| DSDOTKERNEL = dot.S | |||
| DGEMM_BETA = dgemm_beta.S | |||
| @@ -128,10 +118,10 @@ SGEMM_BETA = sgemm_beta.S | |||
| SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S | |||
| STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S | |||
| SGEMMINCOPY = sgemm_ncopy_sve_v1.c | |||
| SGEMMITCOPY = sgemm_tcopy_sve_v1.c | |||
| SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S | |||
| SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S | |||
| SGEMMINCOPY = gemm_ncopy_sve_v1x$(SGEMM_UNROLL_N).c | |||
| SGEMMITCOPY = gemm_tcopy_sve_v1x$(SGEMM_UNROLL_N).c | |||
| SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S | |||
| SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S | |||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| @@ -149,8 +139,8 @@ SSYMMLCOPY_M = symm_lcopy_sve.c | |||
| DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S | |||
| DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S | |||
| DGEMMINCOPY = dgemm_ncopy_sve_v1.c | |||
| DGEMMITCOPY = dgemm_tcopy_sve_v1.c | |||
| DGEMMINCOPY = gemm_ncopy_sve_v1x$(DGEMM_UNROLL_N).c | |||
| DGEMMITCOPY = gemm_tcopy_sve_v1x$(DGEMM_UNROLL_N).c | |||
| DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S | |||
| DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S | |||
| @@ -170,8 +160,8 @@ DSYMMLCOPY_M = symm_lcopy_sve.c | |||
| CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| CGEMMINCOPY = cgemm_ncopy_sve_v1.c | |||
| CGEMMITCOPY = cgemm_tcopy_sve_v1.c | |||
| CGEMMINCOPY = gemm_ncopy_complex_sve_v1x$(ZGEMM_UNROLL_N).c | |||
| CGEMMITCOPY = gemm_tcopy_complex_sve_v1x$(ZGEMM_UNROLL_N).c | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
| @@ -194,8 +184,8 @@ CSYMMLCOPY_M = zsymm_lcopy_sve.c | |||
| ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| ZGEMMINCOPY = zgemm_ncopy_sve_v1.c | |||
| ZGEMMITCOPY = zgemm_tcopy_sve_v1.c | |||
| ZGEMMINCOPY = gemm_ncopy_complex_sve_v1x$(ZGEMM_UNROLL_N).c | |||
| ZGEMMITCOPY = gemm_tcopy_complex_sve_v1x$(ZGEMM_UNROLL_N).c | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
| @@ -1,216 +1 @@ | |||
| SAMINKERNEL = ../arm/amin.c | |||
| DAMINKERNEL = ../arm/amin.c | |||
| CAMINKERNEL = ../arm/zamin.c | |||
| ZAMINKERNEL = ../arm/zamin.c | |||
| SMAXKERNEL = ../arm/max.c | |||
| DMAXKERNEL = ../arm/max.c | |||
| SMINKERNEL = ../arm/min.c | |||
| DMINKERNEL = ../arm/min.c | |||
| ISAMINKERNEL = ../arm/iamin.c | |||
| IDAMINKERNEL = ../arm/iamin.c | |||
| ICAMINKERNEL = ../arm/izamin.c | |||
| IZAMINKERNEL = ../arm/izamin.c | |||
| ISMAXKERNEL = ../arm/imax.c | |||
| IDMAXKERNEL = ../arm/imax.c | |||
| ISMINKERNEL = ../arm/imin.c | |||
| IDMINKERNEL = ../arm/imin.c | |||
| STRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
| STRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
| STRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
| STRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
| DTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
| DTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
| DTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
| DTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
| TRSMCOPYLN_M = trsm_lncopy_sve.c | |||
| TRSMCOPYLT_M = trsm_ltcopy_sve.c | |||
| TRSMCOPYUN_M = trsm_uncopy_sve.c | |||
| TRSMCOPYUT_M = trsm_utcopy_sve.c | |||
| CTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
| CTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
| CTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
| CTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
| ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
| ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
| ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
| ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
| ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c | |||
| ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c | |||
| ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c | |||
| ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c | |||
| SAMAXKERNEL = amax.S | |||
| DAMAXKERNEL = amax.S | |||
| CAMAXKERNEL = zamax.S | |||
| ZAMAXKERNEL = zamax.S | |||
| SAXPYKERNEL = axpy.S | |||
| DAXPYKERNEL = axpy.S | |||
| CAXPYKERNEL = zaxpy.S | |||
| ZAXPYKERNEL = zaxpy.S | |||
| SROTKERNEL = rot.S | |||
| DROTKERNEL = rot.S | |||
| CROTKERNEL = zrot.S | |||
| ZROTKERNEL = zrot.S | |||
| SSCALKERNEL = scal.S | |||
| DSCALKERNEL = scal.S | |||
| CSCALKERNEL = zscal.S | |||
| ZSCALKERNEL = zscal.S | |||
| SGEMVNKERNEL = gemv_n.S | |||
| DGEMVNKERNEL = gemv_n.S | |||
| CGEMVNKERNEL = zgemv_n.S | |||
| ZGEMVNKERNEL = zgemv_n.S | |||
| SGEMVTKERNEL = gemv_t.S | |||
| DGEMVTKERNEL = gemv_t.S | |||
| CGEMVTKERNEL = zgemv_t.S | |||
| ZGEMVTKERNEL = zgemv_t.S | |||
| SASUMKERNEL = asum.S | |||
| DASUMKERNEL = asum.S | |||
| CASUMKERNEL = casum.S | |||
| ZASUMKERNEL = zasum.S | |||
| SCOPYKERNEL = copy.S | |||
| DCOPYKERNEL = copy.S | |||
| CCOPYKERNEL = copy.S | |||
| ZCOPYKERNEL = copy.S | |||
| SSWAPKERNEL = swap.S | |||
| DSWAPKERNEL = swap.S | |||
| CSWAPKERNEL = swap.S | |||
| ZSWAPKERNEL = swap.S | |||
| ISAMAXKERNEL = iamax.S | |||
| IDAMAXKERNEL = iamax.S | |||
| ICAMAXKERNEL = izamax.S | |||
| IZAMAXKERNEL = izamax.S | |||
| SNRM2KERNEL = nrm2.S | |||
| DNRM2KERNEL = nrm2.S | |||
| CNRM2KERNEL = znrm2.S | |||
| ZNRM2KERNEL = znrm2.S | |||
| DDOTKERNEL = dot.S | |||
| ifneq ($(C_COMPILER), PGI) | |||
| SDOTKERNEL = ../generic/dot.c | |||
| else | |||
| SDOTKERNEL = dot.S | |||
| endif | |||
| ifneq ($(C_COMPILER), PGI) | |||
| CDOTKERNEL = zdot.S | |||
| ZDOTKERNEL = zdot.S | |||
| else | |||
| CDOTKERNEL = ../arm/zdot.c | |||
| ZDOTKERNEL = ../arm/zdot.c | |||
| endif | |||
| DSDOTKERNEL = dot.S | |||
| DGEMM_BETA = dgemm_beta.S | |||
| SGEMM_BETA = sgemm_beta.S | |||
| SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S | |||
| STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S | |||
| SGEMMINCOPY = sgemm_ncopy_sve_v1.c | |||
| SGEMMITCOPY = sgemm_tcopy_sve_v1.c | |||
| SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S | |||
| SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S | |||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| STRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||
| STRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||
| STRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||
| STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||
| SSYMMUCOPY_M = symm_ucopy_sve.c | |||
| SSYMMLCOPY_M = symm_lcopy_sve.c | |||
| DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S | |||
| DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S | |||
| DGEMMINCOPY = dgemm_ncopy_sve_v1.c | |||
| DGEMMITCOPY = dgemm_tcopy_sve_v1.c | |||
| DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S | |||
| DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S | |||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||
| DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||
| DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||
| DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||
| DSYMMUCOPY_M = symm_ucopy_sve.c | |||
| DSYMMLCOPY_M = symm_lcopy_sve.c | |||
| CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| CGEMMINCOPY = cgemm_ncopy_sve_v1.c | |||
| CGEMMITCOPY = cgemm_tcopy_sve_v1.c | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||
| CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||
| CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||
| CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||
| CHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||
| CHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||
| CSYMMUCOPY_M = zsymm_ucopy_sve.c | |||
| CSYMMLCOPY_M = zsymm_lcopy_sve.c | |||
| ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| ZGEMMINCOPY = zgemm_ncopy_sve_v1.c | |||
| ZGEMMITCOPY = zgemm_tcopy_sve_v1.c | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||
| ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||
| ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||
| ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||
| ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||
| ZHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||
| ZSYMMUCOPY_M = zsymm_ucopy_sve.c | |||
| ZSYMMLCOPY_M = zsymm_lcopy_sve.c | |||
| include $(KERNELDIR)/KERNEL.ARMV8SVE | |||
| @@ -1,216 +1 @@ | |||
| SAMINKERNEL = ../arm/amin.c | |||
| DAMINKERNEL = ../arm/amin.c | |||
| CAMINKERNEL = ../arm/zamin.c | |||
| ZAMINKERNEL = ../arm/zamin.c | |||
| SMAXKERNEL = ../arm/max.c | |||
| DMAXKERNEL = ../arm/max.c | |||
| SMINKERNEL = ../arm/min.c | |||
| DMINKERNEL = ../arm/min.c | |||
| ISAMINKERNEL = ../arm/iamin.c | |||
| IDAMINKERNEL = ../arm/iamin.c | |||
| ICAMINKERNEL = ../arm/izamin.c | |||
| IZAMINKERNEL = ../arm/izamin.c | |||
| ISMAXKERNEL = ../arm/imax.c | |||
| IDMAXKERNEL = ../arm/imax.c | |||
| ISMINKERNEL = ../arm/imin.c | |||
| IDMINKERNEL = ../arm/imin.c | |||
| STRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
| STRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
| STRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
| STRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
| DTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
| DTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
| DTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
| DTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
| TRSMCOPYLN_M = trsm_lncopy_sve.c | |||
| TRSMCOPYLT_M = trsm_ltcopy_sve.c | |||
| TRSMCOPYUN_M = trsm_uncopy_sve.c | |||
| TRSMCOPYUT_M = trsm_utcopy_sve.c | |||
| CTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
| CTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
| CTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
| CTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
| ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
| ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
| ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
| ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
| ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c | |||
| ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c | |||
| ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c | |||
| ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c | |||
| SAMAXKERNEL = amax.S | |||
| DAMAXKERNEL = amax.S | |||
| CAMAXKERNEL = zamax.S | |||
| ZAMAXKERNEL = zamax.S | |||
| SAXPYKERNEL = axpy.S | |||
| DAXPYKERNEL = axpy.S | |||
| CAXPYKERNEL = zaxpy.S | |||
| ZAXPYKERNEL = zaxpy.S | |||
| SROTKERNEL = rot.S | |||
| DROTKERNEL = rot.S | |||
| CROTKERNEL = zrot.S | |||
| ZROTKERNEL = zrot.S | |||
| SSCALKERNEL = scal.S | |||
| DSCALKERNEL = scal.S | |||
| CSCALKERNEL = zscal.S | |||
| ZSCALKERNEL = zscal.S | |||
| SGEMVNKERNEL = gemv_n.S | |||
| DGEMVNKERNEL = gemv_n.S | |||
| CGEMVNKERNEL = zgemv_n.S | |||
| ZGEMVNKERNEL = zgemv_n.S | |||
| SGEMVTKERNEL = gemv_t.S | |||
| DGEMVTKERNEL = gemv_t.S | |||
| CGEMVTKERNEL = zgemv_t.S | |||
| ZGEMVTKERNEL = zgemv_t.S | |||
| SASUMKERNEL = asum.S | |||
| DASUMKERNEL = asum.S | |||
| CASUMKERNEL = casum.S | |||
| ZASUMKERNEL = zasum.S | |||
| SCOPYKERNEL = copy.S | |||
| DCOPYKERNEL = copy.S | |||
| CCOPYKERNEL = copy.S | |||
| ZCOPYKERNEL = copy.S | |||
| SSWAPKERNEL = swap.S | |||
| DSWAPKERNEL = swap.S | |||
| CSWAPKERNEL = swap.S | |||
| ZSWAPKERNEL = swap.S | |||
| ISAMAXKERNEL = iamax.S | |||
| IDAMAXKERNEL = iamax.S | |||
| ICAMAXKERNEL = izamax.S | |||
| IZAMAXKERNEL = izamax.S | |||
| SNRM2KERNEL = nrm2.S | |||
| DNRM2KERNEL = nrm2.S | |||
| CNRM2KERNEL = znrm2.S | |||
| ZNRM2KERNEL = znrm2.S | |||
| DDOTKERNEL = dot.S | |||
| ifneq ($(C_COMPILER), PGI) | |||
| SDOTKERNEL = ../generic/dot.c | |||
| else | |||
| SDOTKERNEL = dot.S | |||
| endif | |||
| ifneq ($(C_COMPILER), PGI) | |||
| CDOTKERNEL = zdot.S | |||
| ZDOTKERNEL = zdot.S | |||
| else | |||
| CDOTKERNEL = ../arm/zdot.c | |||
| ZDOTKERNEL = ../arm/zdot.c | |||
| endif | |||
| DSDOTKERNEL = dot.S | |||
| DGEMM_BETA = dgemm_beta.S | |||
| SGEMM_BETA = sgemm_beta.S | |||
| SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S | |||
| STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S | |||
| SGEMMINCOPY = sgemm_ncopy_sve_v1.c | |||
| SGEMMITCOPY = sgemm_tcopy_sve_v1.c | |||
| SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S | |||
| SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S | |||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| STRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||
| STRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||
| STRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||
| STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||
| SSYMMUCOPY_M = symm_ucopy_sve.c | |||
| SSYMMLCOPY_M = symm_lcopy_sve.c | |||
| DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S | |||
| DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S | |||
| DGEMMINCOPY = dgemm_ncopy_sve_v1.c | |||
| DGEMMITCOPY = dgemm_tcopy_sve_v1.c | |||
| DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S | |||
| DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S | |||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||
| DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||
| DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||
| DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||
| DSYMMUCOPY_M = symm_ucopy_sve.c | |||
| DSYMMLCOPY_M = symm_lcopy_sve.c | |||
| CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| CGEMMINCOPY = cgemm_ncopy_sve_v1.c | |||
| CGEMMITCOPY = cgemm_tcopy_sve_v1.c | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||
| CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||
| CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||
| CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||
| CHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||
| CHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||
| CSYMMUCOPY_M = zsymm_ucopy_sve.c | |||
| CSYMMLCOPY_M = zsymm_lcopy_sve.c | |||
| ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| ZGEMMINCOPY = zgemm_ncopy_sve_v1.c | |||
| ZGEMMITCOPY = zgemm_tcopy_sve_v1.c | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||
| ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||
| ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||
| ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||
| ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||
| ZHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||
| ZSYMMUCOPY_M = zsymm_ucopy_sve.c | |||
| ZSYMMLCOPY_M = zsymm_lcopy_sve.c | |||
| include $(KERNELDIR)/KERNEL.ARMV8SVE | |||
| @@ -1,216 +1 @@ | |||
| SAMINKERNEL = ../arm/amin.c | |||
| DAMINKERNEL = ../arm/amin.c | |||
| CAMINKERNEL = ../arm/zamin.c | |||
| ZAMINKERNEL = ../arm/zamin.c | |||
| SMAXKERNEL = ../arm/max.c | |||
| DMAXKERNEL = ../arm/max.c | |||
| SMINKERNEL = ../arm/min.c | |||
| DMINKERNEL = ../arm/min.c | |||
| ISAMINKERNEL = ../arm/iamin.c | |||
| IDAMINKERNEL = ../arm/iamin.c | |||
| ICAMINKERNEL = ../arm/izamin.c | |||
| IZAMINKERNEL = ../arm/izamin.c | |||
| ISMAXKERNEL = ../arm/imax.c | |||
| IDMAXKERNEL = ../arm/imax.c | |||
| ISMINKERNEL = ../arm/imin.c | |||
| IDMINKERNEL = ../arm/imin.c | |||
| STRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
| STRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
| STRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
| STRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
| DTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
| DTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
| DTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
| DTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
| TRSMCOPYLN_M = trsm_lncopy_sve.c | |||
| TRSMCOPYLT_M = trsm_ltcopy_sve.c | |||
| TRSMCOPYUN_M = trsm_uncopy_sve.c | |||
| TRSMCOPYUT_M = trsm_utcopy_sve.c | |||
| CTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
| CTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
| CTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
| CTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
| ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
| ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
| ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
| ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
| ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c | |||
| ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c | |||
| ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c | |||
| ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c | |||
| SAMAXKERNEL = amax.S | |||
| DAMAXKERNEL = amax.S | |||
| CAMAXKERNEL = zamax.S | |||
| ZAMAXKERNEL = zamax.S | |||
| SAXPYKERNEL = axpy.S | |||
| DAXPYKERNEL = axpy.S | |||
| CAXPYKERNEL = zaxpy.S | |||
| ZAXPYKERNEL = zaxpy.S | |||
| SROTKERNEL = rot.S | |||
| DROTKERNEL = rot.S | |||
| CROTKERNEL = zrot.S | |||
| ZROTKERNEL = zrot.S | |||
| SSCALKERNEL = scal.S | |||
| DSCALKERNEL = scal.S | |||
| CSCALKERNEL = zscal.S | |||
| ZSCALKERNEL = zscal.S | |||
| SGEMVNKERNEL = gemv_n.S | |||
| DGEMVNKERNEL = gemv_n.S | |||
| CGEMVNKERNEL = zgemv_n.S | |||
| ZGEMVNKERNEL = zgemv_n.S | |||
| SGEMVTKERNEL = gemv_t.S | |||
| DGEMVTKERNEL = gemv_t.S | |||
| CGEMVTKERNEL = zgemv_t.S | |||
| ZGEMVTKERNEL = zgemv_t.S | |||
| SASUMKERNEL = asum.S | |||
| DASUMKERNEL = asum.S | |||
| CASUMKERNEL = casum.S | |||
| ZASUMKERNEL = zasum.S | |||
| SCOPYKERNEL = copy.S | |||
| DCOPYKERNEL = copy.S | |||
| CCOPYKERNEL = copy.S | |||
| ZCOPYKERNEL = copy.S | |||
| SSWAPKERNEL = swap.S | |||
| DSWAPKERNEL = swap.S | |||
| CSWAPKERNEL = swap.S | |||
| ZSWAPKERNEL = swap.S | |||
| ISAMAXKERNEL = iamax.S | |||
| IDAMAXKERNEL = iamax.S | |||
| ICAMAXKERNEL = izamax.S | |||
| IZAMAXKERNEL = izamax.S | |||
| SNRM2KERNEL = nrm2.S | |||
| DNRM2KERNEL = nrm2.S | |||
| CNRM2KERNEL = znrm2.S | |||
| ZNRM2KERNEL = znrm2.S | |||
| DDOTKERNEL = dot.S | |||
| ifneq ($(C_COMPILER), PGI) | |||
| SDOTKERNEL = ../generic/dot.c | |||
| else | |||
| SDOTKERNEL = dot.S | |||
| endif | |||
| ifneq ($(C_COMPILER), PGI) | |||
| CDOTKERNEL = zdot.S | |||
| ZDOTKERNEL = zdot.S | |||
| else | |||
| CDOTKERNEL = ../arm/zdot.c | |||
| ZDOTKERNEL = ../arm/zdot.c | |||
| endif | |||
| DSDOTKERNEL = dot.S | |||
| DGEMM_BETA = dgemm_beta.S | |||
| SGEMM_BETA = sgemm_beta.S | |||
| SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S | |||
| STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S | |||
| SGEMMINCOPY = sgemm_ncopy_sve_v1.c | |||
| SGEMMITCOPY = sgemm_tcopy_sve_v1.c | |||
| SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S | |||
| SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S | |||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| STRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||
| STRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||
| STRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||
| STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||
| SSYMMUCOPY_M = symm_ucopy_sve.c | |||
| SSYMMLCOPY_M = symm_lcopy_sve.c | |||
| DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S | |||
| DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S | |||
| DGEMMINCOPY = dgemm_ncopy_sve_v1.c | |||
| DGEMMITCOPY = dgemm_tcopy_sve_v1.c | |||
| DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S | |||
| DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S | |||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||
| DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||
| DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||
| DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||
| DSYMMUCOPY_M = symm_ucopy_sve.c | |||
| DSYMMLCOPY_M = symm_lcopy_sve.c | |||
| CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| CGEMMINCOPY = cgemm_ncopy_sve_v1.c | |||
| CGEMMITCOPY = cgemm_tcopy_sve_v1.c | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||
| CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||
| CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||
| CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||
| CHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||
| CHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||
| CSYMMUCOPY_M = zsymm_ucopy_sve.c | |||
| CSYMMLCOPY_M = zsymm_lcopy_sve.c | |||
| ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| ZGEMMINCOPY = zgemm_ncopy_sve_v1.c | |||
| ZGEMMITCOPY = zgemm_tcopy_sve_v1.c | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||
| ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||
| ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||
| ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||
| ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||
| ZHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||
| ZSYMMUCOPY_M = zsymm_ucopy_sve.c | |||
| ZSYMMLCOPY_M = zsymm_lcopy_sve.c | |||
| include $(KERNELDIR)/KERNEL.ARMV8SVE | |||
| @@ -21,12 +21,15 @@ SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| SGEMVNKERNEL = sgemv_n_8_lasx.S | |||
| SGEMVTKERNEL = sgemv_t_8_lasx.S | |||
| DTRSMKERNEL_LN = dtrsm_kernel_LN_16x4_lasx.S | |||
| DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_lasx.S | |||
| DTRSMKERNEL_RN = dtrsm_kernel_RN_16x4_lasx.S | |||
| DTRSMKERNEL_RT = dtrsm_kernel_RT_16x4_lasx.S | |||
| endif | |||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| @@ -341,7 +341,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| fmadd.d $f10, $f12, $f2, $f10 | |||
| .endm | |||
| .macro DGEMV_N XW:req, X_8:req, X_4:req, X_2:req, X_1:req, Y_8:req, Y_4:req, Y_1:req | |||
| .macro DGEMV_N_LASX XW:req, X_8:req, X_4:req, X_2:req, X_1:req, Y_8:req, Y_4:req, Y_1:req | |||
| PTR_SRLI J, N, 3 | |||
| beqz J, .L_\XW\()_N_7 | |||
| PTR_SLLI K_LDA, LDA, 3 | |||
| @@ -541,13 +541,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .hword .L_GAP_1_0 - .L_GAP_TABLE | |||
| .hword .L_GAP_1_1 - .L_GAP_TABLE | |||
| .L_GAP_0_0: /* if (inc_x == 1) && (incy == 1) */ | |||
| DGEMV_N GAP_0_0, X_8, X_4, X_2, X_1, Y_8, Y_4, Y_1 | |||
| DGEMV_N_LASX GAP_0_0, X_8, X_4, X_2, X_1, Y_8, Y_4, Y_1 | |||
| .L_GAP_0_1: /* if (inc_x == 1) && (incy != 1) */ | |||
| DGEMV_N GAP_0_1, X_8, X_4, X_2, X_1, Y_8_GAP, Y_4_GAP, Y_1 | |||
| DGEMV_N_LASX GAP_0_1, X_8, X_4, X_2, X_1, Y_8_GAP, Y_4_GAP, Y_1 | |||
| .L_GAP_1_0: /* if (inc_x != 1) && (incy == 1) */ | |||
| DGEMV_N GAP_1_0, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8, Y_4, Y_1 | |||
| DGEMV_N_LASX GAP_1_0, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8, Y_4, Y_1 | |||
| .L_GAP_1_1: /* if (inc_x != 1) && (incy != 1) */ | |||
| DGEMV_N GAP_1_1, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8_GAP, Y_4_GAP, Y_1 | |||
| DGEMV_N_LASX GAP_1_1, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8_GAP, Y_4_GAP, Y_1 | |||
| .L_END: | |||
| pop_if_used 17 + 7, 24 + 4 | |||
| jirl $r0, $r1, 0x0 | |||
| @@ -220,7 +220,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| GMADD xvf, d, TP0, A0, X0, TP0, TP1, A2, X0, TP1 | |||
| .endm | |||
| .macro DGEMV_T XW:req X8:req, X4:req | |||
| .macro DGEMV_T_LASX XW:req X8:req, X4:req | |||
| PTR_SRLI J, N, 3 | |||
| beqz J, .L_\XW\()_N_7 | |||
| PTR_SLLI K_LDA, LDA, 3 | |||
| @@ -472,9 +472,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .hword .L_GAP_0 - .L_GAP_TABLE | |||
| .hword .L_GAP_1 - .L_GAP_TABLE | |||
| .L_GAP_0: /* if (incx == 1) */ | |||
| DGEMV_T GAP_0, X8, X4 | |||
| DGEMV_T_LASX GAP_0, X8, X4 | |||
| .L_GAP_1: /* if (incx != 1) */ | |||
| DGEMV_T GAP_1, X8_GAP, X4_GAP | |||
| DGEMV_T_LASX GAP_1, X8_GAP, X4_GAP | |||
| .L_END: | |||
| pop_if_used 17 + 8, 24 + 3 | |||
| jirl $r0, $r1, 0x0 | |||
| @@ -0,0 +1,959 @@ | |||
| /******************************************************************************* | |||
| Copyright (c) 2023, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *******************************************************************************/ | |||
| #define ASSEMBLER | |||
| #include "common.h" | |||
| #include "loongarch64_asm.S" | |||
| /********************************************************************* | |||
| * 2023/08/26 guxiwei | |||
| * UTEST : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * | |||
| * | |||
| *********************************************************************/ | |||
| /* int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT dummy1, FLOAT *a, FLOAT *b, | |||
| * FLOAT *c, BLASLONG ldc, BLASLONG offset) | |||
| */ | |||
| #define M $r4 // param 1: bm | |||
| #define N $r5 // param 2: bn | |||
| #define K $r6 // param 3: bk | |||
| #define A $r7 // param 5: ba | |||
| #define B $r8 // param 6: bb | |||
| #define C $r9 // param 7: bc | |||
| #define LDC $r10 // param 8: ldc | |||
| #define OFFSET $r11 // param 9: offset | |||
| /* Cycle control parameters */ | |||
| #define I $r13 | |||
| #define J $r14 | |||
| #define L $r15 | |||
| #define TL $r16 | |||
| /* Matrix address */ | |||
| #define A0 $r17 | |||
| #define B0 $r18 | |||
| #define C0 $r19 | |||
| #define C1 $r20 | |||
| #define C2 $r23 | |||
| #define C3 $r24 | |||
| #define T0 $r25 | |||
| #define T1 $r26 | |||
| #define T2 $r27 | |||
| #define KK $r28 | |||
| #define AA $r29 | |||
| #define CC $r30 | |||
| #define BB B0 | |||
| #undef ZERO | |||
| #define ZERO $r0 | |||
| #define U0 $xr0 | |||
| #define U1 $xr1 | |||
| #define U2 $xr2 | |||
| #define U3 $xr3 | |||
| #define U4 $xr4 | |||
| #define U5 $xr5 | |||
| #define U6 $xr6 | |||
| #define U7 $xr7 | |||
| #define U8 $xr8 | |||
| #define U9 $xr9 | |||
| #define U10 $xr10 | |||
| #define U11 $xr11 | |||
| #define U12 $xr12 | |||
| #define U13 $xr13 | |||
| #define U14 $xr14 | |||
| #define U15 $xr15 | |||
| #define D0 $xr16 | |||
| #define D1 $xr17 | |||
| #define D2 $xr18 | |||
| #define D3 $xr19 | |||
| #define D4 $xr20 | |||
| #define D5 $xr21 | |||
| #define D6 $xr22 | |||
| #define D7 $xr23 | |||
| #define D8 $xr24 | |||
| #define D9 $xr25 | |||
| #define D10 $xr26 | |||
| #define D11 $xr27 | |||
| #define D12 $xr28 | |||
| #define D13 $xr29 | |||
| #define D14 $xr30 | |||
| #define D15 $xr31 | |||
| #define G0 D0 | |||
| #define G1 D1 | |||
| #define G2 D2 | |||
| #define G3 D3 | |||
| #define G4 D4 | |||
| #define G5 D5 | |||
| #define G6 D6 | |||
| #define G7 D7 | |||
| #define G8 D8 | |||
| #define G9 D9 | |||
| #define G10 D10 | |||
| #define G11 D11 | |||
| #define G12 D12 | |||
| #define G13 D13 | |||
| #define G14 D14 | |||
| #define G15 D15 | |||
| /* Prefetch interval */ | |||
| #define A_PRE 0x400 | |||
| #define B_PRE 0x100 | |||
| #include "dtrsm_kernel_macro.S" | |||
| .macro ldrepl_macro start, end, stride | |||
| // Load Ux (x = 0...15) | |||
| .if \start <= \end | |||
| GLDREPL xv, d, $xr\start, A0, \stride * 8 | |||
| ldrepl_macro %start + 1, \end, %stride + 1 | |||
| .endif | |||
| .endm | |||
| .macro nmsub_macro start0, end0, start1, reg | |||
| // Gx -= reg * Ux | |||
| .if \start0 <= \end0 | |||
| xvfnmsub.d $xr\start0, \reg, $xr\start1, $xr\start0 | |||
| nmsub_macro %start0 + 1, \end0, %start1 + 1, \reg | |||
| .endif | |||
| .endm | |||
| .macro B_st_macro start, end, stride, N | |||
| // Store Gx(x = 16...31) | |||
| .if \start <= \end | |||
| .if \N == 4 | |||
| xvst $xr\start, B0, \stride * 0x20 | |||
| .elseif \N == 2 | |||
| vst $vr\start, B0, \stride * 0x10 | |||
| .elseif \N == 1 | |||
| fst.d $f\start, B0, \stride * 0x08 | |||
| .endif | |||
| B_st_macro %start + 1, \end, %stride + 1, \N | |||
| .endif | |||
| .endm | |||
| .macro dsolve_16 N | |||
| // The data layout of C (4x16) is as follows (store 4 data in each register): | |||
| // U0 U1 U2 U3 | |||
| // U4 U5 U6 U7 | |||
| // U8 U9 U10 U11 | |||
| // U12 U13 U14 U15 | |||
| // The first step is to transpose the result of C | |||
| GTRANSPOSE4x4_D U3, U7, U11, U15, G12, G13, G14, G15, D0, D1 | |||
| GTRANSPOSE4x4_D U2, U6, U10, U14, G8, G9, G10, G11, D0, D1 | |||
| GTRANSPOSE4x4_D U1, U5, U9, U13, G4, G5, G6, G7, U3, U7 | |||
| GTRANSPOSE4x4_D U0, U4, U8, U12, G0, G1, G2, G3, U3, U7 | |||
| // Now we have the following memory layout of C: | |||
| // 0 1 2 3 ... 15 | |||
| // 0 | | | | | | | | |||
| // 1 | G0 | G1 | G2 | G3 | ... | G15 | | |||
| // 2 | | | | | | | | |||
| // 3 | | | | | | | | |||
| // Next we are going to process matrix A with a size of 16x16, | |||
| // using only the upper triangular portion. The memory layout of | |||
| // matrix A is as follows, quite large. | |||
| //0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | |||
| // 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | |||
| // 34 35 36 37 38 39 40 41 42 43 44 45 46 47 | |||
| // 51 52 53 54 55 56 57 58 59 60 61 62 63 | |||
| // 68 69 70 71 72 73 74 75 76 77 78 79 | |||
| // 85 86 87 88 89 90 91 92 93 94 95 | |||
| // 102 103 104 105 106 107 108 109 110 111 | |||
| // 119 120 121 122 123 124 125 126 127 | |||
| // 136 137 138 139 140 141 142 143 | |||
| // 153 154 155 156 157 158 159 | |||
| // 170 171 172 173 174 175 | |||
| // 187 188 189 190 191 | |||
| // 204 205 206 207 | |||
| // 221 222 223 | |||
| // 238 239 | |||
| // 255 | |||
| // Sequentially extract data from A in row order | |||
| // Load 0 | |||
| ldrepl_macro 0, 15, 0 | |||
| GMUL xvf, d, G0, G0, U0 | |||
| nmsub_macro 17, 31, 1, G0 | |||
| PTR_ADDI A0, A0, 17 * 8 | |||
| // Load 1 | |||
| ldrepl_macro 1, 15, 0 | |||
| GMUL xvf, d, G1, G1, U1 | |||
| nmsub_macro 18, 31, 2, G1 | |||
| PTR_ADDI A0, A0, 17 * 8 | |||
| // Load 2 | |||
| ldrepl_macro 2, 15, 0 | |||
| GMUL xvf, d, G2, G2, U2 | |||
| nmsub_macro 19, 31, 3, G2 | |||
| PTR_ADDI A0, A0, 17 * 8 | |||
| // Load 3 | |||
| ldrepl_macro 3, 15, 0 | |||
| GMUL xvf, d, G3, G3, U3 | |||
| nmsub_macro 20, 31, 4, G3 | |||
| PTR_ADDI A0, A0, 17 * 8 | |||
| // Load 4 | |||
| ldrepl_macro 4, 15, 0 | |||
| GMUL xvf, d, G4, G4, U4 | |||
| nmsub_macro 21, 31, 5, G4 | |||
| PTR_ADDI A0, A0, 17 * 8 | |||
| // Load 5 | |||
| ldrepl_macro 5, 15, 0 | |||
| GMUL xvf, d, G5, G5, U5 | |||
| nmsub_macro 22, 31, 6, G5 | |||
| PTR_ADDI A0, A0, 17 * 8 | |||
| // Load 6 | |||
| ldrepl_macro 6, 15, 0 | |||
| GMUL xvf, d, G6, G6, U6 | |||
| nmsub_macro 23, 31, 7, G6 | |||
| PTR_ADDI A0, A0, 17 * 8 | |||
| // Load 7 | |||
| ldrepl_macro 7, 15, 0 | |||
| GMUL xvf, d, G7, G7, U7 | |||
| nmsub_macro 24, 31, 8, G7 | |||
| PTR_ADDI A0, A0, 17 * 8 | |||
| // Load 8 | |||
| ldrepl_macro 8, 15, 0 | |||
| GMUL xvf, d, G8, G8, U8 | |||
| nmsub_macro 25, 31, 9, G8 | |||
| PTR_ADDI A0, A0, 17 * 8 | |||
| // Load 9 | |||
| ldrepl_macro 9, 15, 0 | |||
| GMUL xvf, d, G9, G9, U9 | |||
| nmsub_macro 26, 31, 10, G9 | |||
| PTR_ADDI A0, A0, 17 * 8 | |||
| // Load 10 | |||
| ldrepl_macro 10, 15, 0 | |||
| GMUL xvf, d, G10, G10, U10 | |||
| nmsub_macro 27, 31, 11, G10 | |||
| PTR_ADDI A0, A0, 17 * 8 | |||
| // Load 11 | |||
| ldrepl_macro 11, 15, 0 | |||
| GMUL xvf, d, G11, G11, U11 | |||
| nmsub_macro 28, 31, 12, G11 | |||
| PTR_ADDI A0, A0, 17 * 8 | |||
| // Load 12 | |||
| ldrepl_macro 12, 15, 0 | |||
| GMUL xvf, d, G12, G12, U12 | |||
| nmsub_macro 29, 31, 13, G12 | |||
| PTR_ADDI A0, A0, 17 * 8 | |||
| // Load 13 | |||
| ldrepl_macro 13, 15, 0 | |||
| GMUL xvf, d, G13, G13, U13 | |||
| nmsub_macro 30, 31, 14, G13 | |||
| PTR_ADDI A0, A0, 17 * 8 | |||
| // Load 14 | |||
| ldrepl_macro 14, 15, 0 | |||
| GMUL xvf, d, G14, G14, U14 | |||
| nmsub_macro 31, 31, 15, G14 | |||
| PTR_ADDI A0, A0, 17 * 8 | |||
| // Load 15 | |||
| ldrepl_macro 15, 15, 0 | |||
| GMUL xvf, d, G15, G15, U15 | |||
| // Finally, We can store the result. | |||
| // For B, stored sequentially, and C, first transpose and then store | |||
| B_st_macro 16, 31, 0, \N | |||
| GTRANSPOSE4x4_D G0, G1, G2, G3, G0, G1, G2, G3, U0, U1 | |||
| GTRANSPOSE4x4_D G4, G5, G6, G7, G4, G5, G6, G7, U0, U1 | |||
| GTRANSPOSE4x4_D G8, G9, G10, G11, G8, G9, G10, G11, U0, U1 | |||
| GTRANSPOSE4x4_D G12, G13, G14, G15, G12, G13, G14, G15, U0, U1 | |||
| .if \N == 4 | |||
| GST xv, , G0, C0, 0x00, G4, C0, 0x20, G8, C0, 0x40, G12, C0, 0x60, \ | |||
| G1, C1, 0x00, G5, C1, 0x20, G9, C1, 0x40, G13, C1, 0x60, \ | |||
| G2, C2, 0x00, G6, C2, 0x20, G10, C2, 0x40, G14, C2, 0x60, \ | |||
| G3, C3, 0x00, G7, C3, 0x20, G11, C3, 0x40, G15, C3, 0x60 | |||
| .elseif \N == 2 | |||
| GST xv, , G0, C0, 0x00, G4, C0, 0x20, G8, C0, 0x40, G12, C0, 0x60, \ | |||
| G1, C1, 0x00, G5, C1, 0x20, G9, C1, 0x40, G13, C1, 0x60 | |||
| .elseif \N == 1 | |||
| GST xv, , G0, C0, 0x00, G4, C0, 0x20, G8, C0, 0x40, G12, C0, 0x60 | |||
| .endif | |||
| .endm | |||
| .macro dgemm_dsolve_16x4 | |||
| bge ZERO, KK, .L_dsolve_16x4_load | |||
| dgemm_16x4 | |||
| b .L_dsolve_16x4 | |||
| .L_dsolve_16x4_load: | |||
| // Load C | |||
| GLD xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60 | |||
| GLD xv, , U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60 | |||
| GLD xv, , U8, C2, 0x00, U9, C2, 0x20, U10, C2, 0x40, U11, C2, 0x60 | |||
| GLD xv, , U12, C3, 0x00, U13, C3, 0x20, U14, C3, 0x40, U15, C3, 0x60 | |||
| /********************** solver ******************/ | |||
| .L_dsolve_16x4: | |||
| dsolve_16 4 | |||
| .endm | |||
| .macro dsolve_8 N | |||
| // The data layout of C (4x8) is as follows (store 4 data in each register): | |||
| // U0 U1 | |||
| // U2 U3 | |||
| // U4 U5 | |||
| // U6 U7 | |||
| // The first step is to transpose the result of C | |||
| GTRANSPOSE4x4_D U1, U3, U5, U7, G4, G5, G6, G7, G8, G9 | |||
| GTRANSPOSE4x4_D U0, U2, U4, U6, G0, G1, G2, G3, G8, G9 | |||
| // Now we have the following memory layout of C: | |||
| // 0 1 2 3 ... 7 | |||
| // 0 | | | | | | | | |||
| // 1 | G0 | G1 | G2 | G3 | ... | G7 | | |||
| // 2 | | | | | | | | |||
| // 3 | | | | | | | | |||
| // Next we are going to process matrix A with a size of 8x8, | |||
| // using only the upper triangular portion. The memory layout of | |||
| // matrix A is as follows: | |||
| //0 1 2 3 4 5 6 7 | |||
| // 9 10 11 12 13 14 15 | |||
| // 18 19 20 21 22 23 | |||
| // 27 28 29 30 31 | |||
| // 36 37 38 39 | |||
| // 45 46 47 | |||
| // 54 55 | |||
| // 63 | |||
| // Sequentially extract data from A in row order | |||
| // Load 0 | |||
| ldrepl_macro 0, 7, 0 | |||
| GMUL xvf, d, G0, G0, U0 | |||
| nmsub_macro 17, 23, 1, G0 | |||
| PTR_ADDI A0, A0, 9 * 8 | |||
| // Load 1 | |||
| ldrepl_macro 1, 7, 0 | |||
| GMUL xvf, d, G1, G1, U1 | |||
| nmsub_macro 18, 23, 2, G1 | |||
| PTR_ADDI A0, A0, 9 * 8 | |||
| // Load 2 | |||
| ldrepl_macro 2, 7, 0 | |||
| GMUL xvf, d, G2, G2, U2 | |||
| nmsub_macro 19, 23, 3, G2 | |||
| PTR_ADDI A0, A0, 9 * 8 | |||
| // Load 3 | |||
| ldrepl_macro 3, 7, 0 | |||
| GMUL xvf, d, G3, G3, U3 | |||
| nmsub_macro 20, 23, 4, G3 | |||
| PTR_ADDI A0, A0, 9 * 8 | |||
| // Load 4 | |||
| ldrepl_macro 4, 7, 0 | |||
| GMUL xvf, d, G4, G4, U4 | |||
| nmsub_macro 21, 23, 5, G4 | |||
| PTR_ADDI A0, A0, 9 * 8 | |||
| // Load 5 | |||
| ldrepl_macro 5, 7, 0 | |||
| GMUL xvf, d, G5, G5, U5 | |||
| nmsub_macro 22, 23, 6, G5 | |||
| PTR_ADDI A0, A0, 9 * 8 | |||
| // Load 6 | |||
| ldrepl_macro 6, 7, 0 | |||
| GMUL xvf, d, G6, G6, U6 | |||
| nmsub_macro 23, 23, 7, G6 | |||
| PTR_ADDI A0, A0, 9 * 8 | |||
| // Load 7 | |||
| ldrepl_macro 7, 7, 0 | |||
| GMUL xvf, d, G7, G7, U7 | |||
| // Finally, We can store the result. | |||
| // For B, stored sequentially, and C, first transpose and then store | |||
| B_st_macro 16, 23, 0, \N | |||
| GTRANSPOSE4x4_D G0, G1, G2, G3, G0, G1, G2, G3, U0, U1 | |||
| GTRANSPOSE4x4_D G4, G5, G6, G7, G4, G5, G6, G7, U0, U1 | |||
| .if \N == 4 | |||
| GST xv, , G0, C0, 0x00, G4, C0, 0x20, \ | |||
| G1, C1, 0x00, G5, C1, 0x20, \ | |||
| G2, C2, 0x00, G6, C2, 0x20, \ | |||
| G3, C3, 0x00, G7, C3, 0x20 | |||
| .elseif \N == 2 | |||
| GST xv, , G0, C0, 0x00, G4, C0, 0x20, \ | |||
| G1, C1, 0x00, G5, C1, 0x20 | |||
| .elseif \N == 1 | |||
| GST xv, , G0, C0, 0x00, G4, C0, 0x20 | |||
| .endif | |||
| .endm | |||
| .macro dgemm_dsolve_8x4 | |||
| bge ZERO, L, .L_dsolve_8x4_load | |||
| dgemm_8x4 | |||
| b .L_dsolve_8x4 | |||
| .L_dsolve_8x4_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| xvld U1, C0, 0x20 | |||
| /* Load C1 */ | |||
| xvld U2, C1, 0x00 | |||
| xvld U3, C1, 0x20 | |||
| /* Load C2 */ | |||
| xvld U4, C2, 0x00 | |||
| xvld U5, C2, 0x20 | |||
| /* Load C3 */ | |||
| xvld U6, C3, 0x00 | |||
| xvld U7, C3, 0x20 | |||
| /********* solver *********/ | |||
| .L_dsolve_8x4: | |||
| dsolve_8 4 | |||
| .endm | |||
| .macro dsolve_4 N | |||
| // The data layout of C (4x4) is as follows (store 4 data in each register): | |||
| // U0 | |||
| // U1 | |||
| // U2 | |||
| // U3 | |||
| // The first step is to transpose the result of C | |||
| GTRANSPOSE4x4_D U0, U1, U2, U3, G0, G1, G2, G3, G4, G5 | |||
| // Now we have the following memory layout of C: | |||
| // 0 1 2 3 | |||
| // 0 | | | | | | |||
| // 1 | G0 | G1 | G2 | G3 | | |||
| // 2 | | | | | | |||
| // 3 | | | | | | |||
| // Next we are going to process matrix A with a size of 4x4, | |||
| // using only the upper triangular portion. The memory layout of | |||
| // matrix A is as follows: | |||
| //0 1 2 3 | |||
| // 5 6 7 | |||
| // 10 11 | |||
| // 15 | |||
| // Sequentially extract data from A in row order | |||
| // Load 0 | |||
| ldrepl_macro 0, 3, 0 | |||
| GMUL xvf, d, G0, G0, U0 | |||
| nmsub_macro 17, 19, 1, G0 | |||
| PTR_ADDI A0, A0, 5 * 8 | |||
| // Load 1 | |||
| ldrepl_macro 1, 3, 0 | |||
| GMUL xvf, d, G1, G1, U1 | |||
| nmsub_macro 18, 19, 2, G1 | |||
| PTR_ADDI A0, A0, 5 * 8 | |||
| // Load 2 | |||
| ldrepl_macro 2, 3, 0 | |||
| GMUL xvf, d, G2, G2, U2 | |||
| nmsub_macro 19, 19, 3, G2 | |||
| PTR_ADDI A0, A0, 5 * 8 | |||
| // Load 3 | |||
| ldrepl_macro 3, 3, 0 | |||
| GMUL xvf, d, G3, G3, U3 | |||
| // Finally, We can store the result. | |||
| // For B, stored sequentially, and C, first transpose and then store | |||
| B_st_macro 16, 19, 0, \N | |||
| GTRANSPOSE4x4_D G0, G1, G2, G3, G0, G1, G2, G3, U0, U1 | |||
| .if \N == 4 | |||
| GST xv, , G0, C0, 0x00, G1, C1, 0x00, G2, C2, 0x00, G3, C3, 0x00 | |||
| .elseif \N == 2 | |||
| GST xv, , G0, C0, 0x00, G1, C1, 0x00 | |||
| .elseif \N == 1 | |||
| GST xv, , G0, C0, 0x00 | |||
| .endif | |||
| .endm | |||
| .macro dgemm_dsolve_4x4 | |||
| bge ZERO, L, .L_dsolve_4x4_load | |||
| dgemm_4x4 | |||
| b .L_dsolve_4x4 | |||
| .L_dsolve_4x4_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| /* Load C1 */ | |||
| xvld U1, C1, 0x00 | |||
| /* Load C2 */ | |||
| xvld U2, C2, 0x00 | |||
| /* Load C3 */ | |||
| xvld U3, C3, 0x00 | |||
| /************** solver *****************/ | |||
| .L_dsolve_4x4: | |||
| dsolve_4 4 | |||
| .endm | |||
| .macro dsolve_2 N | |||
| // Transpose | |||
| GSBUTTERFLY xv, d, G0, G1, U1, U0 | |||
| // Now we have the following memory layout of C: | |||
| // 0 1 | |||
| // 0 | | | | |||
| // 1 | G0 | G1 | | |||
| // 2 | | | | |||
| // 3 | | | | |||
| // Next we are going to process matrix A with a size of 2x2, | |||
| // using only the upper triangular portion. The memory layout of | |||
| // matrix A is as follows: | |||
| //0 1 | |||
| // 3 | |||
| // Sequentially extract data from A in row order | |||
| // Load 0 | |||
| ldrepl_macro 0, 1, 0 | |||
| GMUL xvf, d, G0, G0, U0 | |||
| nmsub_macro 17, 17, 1, G0 | |||
| PTR_ADDI A0, A0, 3 * 8 | |||
| // Load 1 | |||
| ldrepl_macro 1, 1, 0 | |||
| GMUL xvf, d, G1, G1, U1 | |||
| // Finally, We can store the result. | |||
| // For B, stored sequentially, and C, first transpose and then store | |||
| B_st_macro 16, 17, 0, \N | |||
| GSBUTTERFLY xv, d, U0, U1, G1, G0 | |||
| .if \N == 4 | |||
| vst $vr0, C0, 0x00 | |||
| vst $vr1, C1, 0x00 | |||
| xvstelm.d U0, C2, 0x00, 0x02 | |||
| xvstelm.d U1, C3, 0x00, 0x02 | |||
| xvstelm.d U0, C2, 0x08, 0x03 | |||
| xvstelm.d U1, C3, 0x08, 0x03 | |||
| .elseif \N == 2 | |||
| vst $vr0, C0, 0x00 | |||
| vst $vr1, C1, 0x00 | |||
| .elseif \N == 1 | |||
| vst $vr0, C0, 0x00 | |||
| .endif | |||
| .endm | |||
| .macro dgemm_dsolve_2x4 | |||
| bge ZERO, L, .L_dsolve_2x4_load | |||
| dgemm_2x4 | |||
| b .L_dsolve_2x4 | |||
| .L_dsolve_2x4_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| /* Load C1 */ | |||
| xvld U1, C1, 0x00 | |||
| /* Load C2 */ | |||
| xvld U2, C2, 0x00 | |||
| /* Load C3 */ | |||
| xvld U3, C3, 0x00 | |||
| xvpermi.q U0, U2, 0x02 | |||
| xvpermi.q U1, U3, 0x02 | |||
| /********************** solver ******************/ | |||
| .L_dsolve_2x4: | |||
| dsolve_2 4 | |||
| .endm | |||
| .macro dgemm_dsolve_1x4 | |||
| bge ZERO, L, .L_dsolve_1x4_load | |||
| dgemm_1x4 | |||
| b .L_dsolve_1x4 | |||
| .L_dsolve_1x4_load: | |||
| // Load C | |||
| fld.d $f0, C0, 0x00 | |||
| fld.d $f1, C1, 0x00 | |||
| fld.d $f2, C2, 0x00 | |||
| fld.d $f3, C3, 0x00 | |||
| xvinsve0.d U0, U1, 0x01 | |||
| xvinsve0.d U0, U2, 0x02 | |||
| xvinsve0.d U0, U3, 0x03 | |||
| .L_dsolve_1x4: | |||
| GLDREPL xv, d, D0, A0, 0x00 | |||
| GMUL xvf, d, U0, U0, D0 | |||
| // Store C | |||
| xvstelm.d U0, C0, 0x00, 0x00 | |||
| xvstelm.d U0, C1, 0x00, 0x01 | |||
| xvstelm.d U0, C2, 0x00, 0x02 | |||
| xvstelm.d U0, C3, 0x00, 0x03 | |||
| // Store B | |||
| xvst U0, B0, 0x00 | |||
| .endm | |||
| .macro dgemm_dsolve_16x2 | |||
| bge ZERO, L, .L_dsolve_16x2_load | |||
| dgemm_16x2 | |||
| b .L_dsolve_16x2 | |||
| .L_dsolve_16x2_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| xvld U1, C0, 0x20 | |||
| xvld U2, C0, 0x40 | |||
| xvld U3, C0, 0x60 | |||
| /* Load C1 */ | |||
| xvld U4, C1, 0x00 | |||
| xvld U5, C1, 0x20 | |||
| xvld U6, C1, 0x40 | |||
| xvld U7, C1, 0x60 | |||
| .L_dsolve_16x2: | |||
| dsolve_16 2 | |||
| .endm | |||
| .macro dgemm_dsolve_8x2 | |||
| bge ZERO, L, .L_dsolve_8x2_load | |||
| dgemm_8x2 | |||
| b .L_dsolve_8x2 | |||
| .L_dsolve_8x2_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| xvld U1, C0, 0x20 | |||
| /* Load C1 */ | |||
| xvld U2, C1, 0x00 | |||
| xvld U3, C1, 0x20 | |||
| .L_dsolve_8x2: | |||
| dsolve_8 2 | |||
| .endm | |||
| .macro dgemm_dsolve_4x2 | |||
| bge ZERO, L, .L_dsolve_4x2_load | |||
| dgemm_4x2 | |||
| b .L_dsolve_4x2 | |||
| .L_dsolve_4x2_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| /* Load C1 */ | |||
| xvld U1, C1, 0x00 | |||
| .L_dsolve_4x2: | |||
| dsolve_4 2 | |||
| .endm | |||
| .macro dgemm_dsolve_1x2 | |||
| bge ZERO, L, .L_dsolve_1x2_load | |||
| dgemm_1x2 | |||
| b .L_dsolve_1x2 | |||
| .L_dsolve_1x2_load: | |||
| // Load C | |||
| fld.d $f0, C0, 0x00 | |||
| fld.d $f1, C1, 0x00 | |||
| xvinsve0.d U0, U1, 0x01 | |||
| .L_dsolve_1x2: | |||
| GLDREPL xv, d, D0, A0, 0x00 | |||
| GMUL xvf, d, U0, U0, D0 | |||
| // Store C | |||
| xvstelm.d U0, C0, 0x00, 0x00 | |||
| xvstelm.d U0, C1, 0x00, 0x01 | |||
| // Store B | |||
| vst $vr0, B0, 0x00 | |||
| .endm | |||
| .macro dgemm_dsolve_2x2 | |||
| bge ZERO, L, .L_dsolve_2x2_load | |||
| dgemm_2x2 | |||
| b .L_dsolve_2x2 | |||
| .L_dsolve_2x2_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| /* Load C1 */ | |||
| xvld U1, C1, 0x00 | |||
| .L_dsolve_2x2: | |||
| dsolve_2 2 | |||
| .endm | |||
| .macro dgemm_dsolve_16x1 | |||
| bge ZERO, L, .L_dsolve_16x1_load | |||
| dgemm_16x1 | |||
| b .L_dsolve_16x1 | |||
| .L_dsolve_16x1_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| xvld U1, C0, 0x20 | |||
| xvld U2, C0, 0x40 | |||
| xvld U3, C0, 0x60 | |||
| .L_dsolve_16x1: | |||
| dsolve_16 1 | |||
| .endm | |||
| .macro dgemm_dsolve_8x1 | |||
| bge ZERO, L, .L_dsolve_8x1_load | |||
| dgemm_8x1 | |||
| b .L_dsolve_8x1 | |||
| .L_dsolve_8x1_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| xvld U1, C0, 0x20 | |||
| .L_dsolve_8x1: | |||
| dsolve_8 1 | |||
| .endm | |||
| .macro dgemm_dsolve_4x1 | |||
| bge ZERO, L, .L_dsolve_4x1_load | |||
| dgemm_4x1 | |||
| b .L_dsolve_4x1 | |||
| .L_dsolve_4x1_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| .L_dsolve_4x1: | |||
| dsolve_4 1 | |||
| .endm | |||
| .macro dgemm_dsolve_2x1 | |||
| bge ZERO, L, .L_dsolve_2x1_load | |||
| dgemm_2x1 | |||
| b .L_dsolve_2x1 | |||
| .L_dsolve_2x1_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| .L_dsolve_2x1: | |||
| dsolve_2 1 | |||
| .endm | |||
| .macro dgemm_dsolve_1x1 | |||
| bge ZERO, L, .L_dsolve_1x1_load | |||
| dgemm_1x1 | |||
| b .L_dsolve_1x1 | |||
| .L_dsolve_1x1_load: | |||
| // Load C | |||
| fld.d $f0, C0, 0x00 | |||
| .L_dsolve_1x1: | |||
| GLDREPL xv, d, D0, A0, 0x00 | |||
| GMUL xvf, d, U0, U0, D0 | |||
| // Store C | |||
| xvstelm.d U0, C0, 0x00, 0x00 | |||
| // Store B | |||
| xvstelm.d U0, B0, 0x00, 0x00 | |||
| .endm | |||
| PROLOGUE | |||
| push_if_used 26, 32 | |||
| PTR_SLLI LDC, LDC, 3 | |||
| /* if (!(N >> 2)) goto L_N3 */ | |||
| PTR_SRAI J, N, 2 /* J = bn >> 2 */ | |||
| andi N, N, 0x03 | |||
| beq ZERO, J, .L_N3 | |||
| .align 5 | |||
| .L_J1: | |||
| PTR_ADDI J, J, -1 | |||
| move KK, OFFSET | |||
| move AA, A | |||
| move CC, C | |||
| PTR_SRAI I, M, 4 // M >> 4 | |||
| beqz I, .L_M15 | |||
| .align 4 | |||
| .L_I1: | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_16x4 | |||
| PTR_ADDI I, I, -1 | |||
| PTR_SLLI T0, K, 7 | |||
| PTR_ADDI CC, CC, 0x80 // cc += 16 | |||
| PTR_ADDI KK, KK, 0x10 // kk += 16 | |||
| PTR_ADD AA, AA, T0 // aa += 16 * k | |||
| bnez I, .L_I1 | |||
| .L_M15: | |||
| andi I, M, 8 | |||
| beqz I, .L_M7 | |||
| .L_M8: | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_8x4 | |||
| PTR_SLLI T0, K, 6 | |||
| PTR_ADDI CC, CC, 0x40 // cc += 8 | |||
| PTR_ADDI KK, KK, 0x08 // kk += 8 | |||
| PTR_ADD AA, AA, T0 // aa += 8 * k | |||
| .L_M7: | |||
| andi I, M, 4 | |||
| beqz I, .L_M3 | |||
| .L_M4: | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_4x4 | |||
| PTR_SLLI T0, K, 5 | |||
| PTR_ADDI CC, CC, 0x20 // cc += 4 | |||
| PTR_ADDI KK, KK, 0x04 // kk += 4 | |||
| PTR_ADD AA, AA, T0 // aa += 4 * k | |||
| .L_M3: | |||
| andi I, M, 2 | |||
| beqz I, .L_M1 | |||
| .L_M2: | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_2x4 | |||
| PTR_SLLI T0, K, 4 | |||
| PTR_ADDI CC, CC, 0x10 // cc += 2 | |||
| PTR_ADDI KK, KK, 0x02 // kk += 2 | |||
| PTR_ADD AA, AA, T0 // aa += 2 * k | |||
| .L_M1: | |||
| andi I, M, 1 | |||
| beqz I, .L_M0 | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_1x4 | |||
| PTR_SLLI T0, K, 3 | |||
| PTR_ADDI CC, CC, 0x08 // cc += 1 | |||
| PTR_ADDI KK, KK, 0x01 // kk += 1 | |||
| PTR_ADD AA, AA, T0 // aa += 1 * k | |||
| .L_M0: | |||
| PTR_SLLI T0, K, 5 | |||
| PTR_SLLI T1, LDC, 2 | |||
| PTR_ADD B, B, T0 // b += 4 * k | |||
| PTR_ADD C, C, T1 // c += 4 * ldc | |||
| bnez J, .L_J1 | |||
| .L_N3: | |||
| andi J, N, 2 | |||
| beq ZERO, J, .L_N1 | |||
| .L_N2: | |||
| move KK, OFFSET | |||
| move AA, A | |||
| move CC, C | |||
| PTR_SRAI I, M, 4 // M >> 4 | |||
| beqz I, .L_N2_M15 | |||
| .align 4 | |||
| .L_N2_I1: | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_16x2 | |||
| PTR_ADDI I, I, -1 | |||
| PTR_SLLI T0, K, 7 | |||
| PTR_ADDI CC, CC, 0x80 // cc += 16 | |||
| PTR_ADDI KK, KK, 0x10 // kk += 16 | |||
| PTR_ADD AA, AA, T0 // aa += 16 * k | |||
| bnez I, .L_N2_I1 | |||
| .L_N2_M15: | |||
| andi I, M, 8 | |||
| beqz I, .L_N2_M7 | |||
| .L_N2_M8: | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_8x2 | |||
| PTR_SLLI T0, K, 6 | |||
| PTR_ADDI CC, CC, 0x40 // cc += 8 | |||
| PTR_ADDI KK, KK, 0x08 // kk += 8 | |||
| PTR_ADD AA, AA, T0 // aa += 8 * k | |||
| .L_N2_M7: | |||
| andi I, M, 4 | |||
| beqz I, .L_N2_M3 | |||
| .L_N2_M4: | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_4x2 | |||
| PTR_SLLI T0, K, 5 | |||
| PTR_ADDI CC, CC, 0x20 // cc += 4 | |||
| PTR_ADDI KK, KK, 0x04 // kk += 4 | |||
| PTR_ADD AA, AA, T0 // aa += 4 * k | |||
| .L_N2_M3: | |||
| andi I, M, 2 | |||
| beqz I, .L_N2_M1 | |||
| .L_N2_M2: | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_2x2 | |||
| PTR_SLLI T0, K, 4 | |||
| PTR_ADDI CC, CC, 0x10 // cc += 2 | |||
| PTR_ADDI KK, KK, 0x02 // kk += 2 | |||
| PTR_ADD AA, AA, T0 // aa += 2 * k | |||
| .L_N2_M1: | |||
| andi I, M, 1 | |||
| beqz I, .L_N2_M0 | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_1x2 | |||
| PTR_SLLI T0, K, 3 | |||
| PTR_ADDI CC, CC, 0x08 // cc += 1 | |||
| PTR_ADDI KK, KK, 0x01 // kk += 1 | |||
| PTR_ADD AA, AA, T0 // aa += 1 * k | |||
| .L_N2_M0: | |||
| PTR_SLLI T0, K, 4 | |||
| PTR_SLLI T1, LDC, 1 | |||
| PTR_ADD B, B, T0 // b += 2 * k | |||
| PTR_ADD C, C, T1 // c += 2 * ldc | |||
| .L_N1: | |||
| andi J, N, 1 | |||
| beq ZERO, J, .L_N0 | |||
| move KK, OFFSET | |||
| move AA, A | |||
| move CC, C | |||
| PTR_SRAI I, M, 4 // M >> 4 | |||
| beqz I, .L_N1_M15 | |||
| .align 4 | |||
| .L_N1_I1: | |||
| GADD , d, C0, CC, ZERO | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_16x1 | |||
| PTR_ADDI I, I, -1 | |||
| PTR_SLLI T0, K, 7 | |||
| PTR_ADDI CC, CC, 0x80 // cc += 16 | |||
| PTR_ADDI KK, KK, 0x10 // kk += 16 | |||
| PTR_ADD AA, AA, T0 // aa += 16 * k | |||
| bnez I, .L_N1_I1 | |||
| .L_N1_M15: | |||
| andi I, M, 8 | |||
| beqz I, .L_N1_M7 | |||
| .L_N1_M8: | |||
| GADD , d, C0, CC, ZERO | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_8x1 | |||
| PTR_SLLI T0, K, 6 | |||
| PTR_ADDI CC, CC, 0x40 // cc += 8 | |||
| PTR_ADDI KK, KK, 0x08 // kk += 8 | |||
| PTR_ADD AA, AA, T0 // aa += 8 * k | |||
| .L_N1_M7: | |||
| andi I, M, 4 | |||
| beqz I, .L_N1_M3 | |||
| .L_N1_M4: | |||
| GADD , d, C0, CC, ZERO | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_4x1 | |||
| PTR_SLLI T0, K, 5 | |||
| PTR_ADDI CC, CC, 0x20 // cc += 4 | |||
| PTR_ADDI KK, KK, 0x04 // kk += 4 | |||
| PTR_ADD AA, AA, T0 // aa += 4 * k | |||
| .L_N1_M3: | |||
| andi I, M, 2 | |||
| beqz I, .L_N1_M1 | |||
| .L_N1_M2: | |||
| GADD , d, C0, CC, ZERO | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_2x1 | |||
| PTR_SLLI T0, K, 4 | |||
| PTR_ADDI CC, CC, 0x10 // cc += 2 | |||
| PTR_ADDI KK, KK, 0x02 // kk += 2 | |||
| PTR_ADD AA, AA, T0 // aa += 2 * k | |||
| .L_N1_M1: | |||
| andi I, M, 1 | |||
| beqz I, .L_N1_M0 | |||
| GADD , d, C0, CC, ZERO | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_1x1 | |||
| PTR_SLLI T0, K, 3 | |||
| PTR_ADDI CC, CC, 0x08 // cc += 1 | |||
| PTR_ADDI KK, KK, 0x01 // kk += 1 | |||
| PTR_ADD AA, AA, T0 // aa += 1 * k | |||
| .L_N1_M0: | |||
| .L_N0: | |||
| pop_if_used 26, 32 | |||
| jirl $r0, $r1, 0x0 | |||
| EPILOGUE | |||
| @@ -0,0 +1,882 @@ | |||
| /******************************************************************************* | |||
| Copyright (c) 2023, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *******************************************************************************/ | |||
| #define ASSEMBLER | |||
| #include "common.h" | |||
| #include "loongarch64_asm.S" | |||
| /********************************************************************* | |||
| * 2023/09/26 guxiwei | |||
| * UTEST : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * | |||
| * | |||
| *********************************************************************/ | |||
| /* int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT dummy1, FLOAT *a, FLOAT *b, | |||
| * FLOAT *c, BLASLONG ldc, BLASLONG offset) | |||
| */ | |||
| #define M $r4 // param 1: bm | |||
| #define N $r5 // param 2: bn | |||
| #define K $r6 // param 3: bk | |||
| #define A $r7 // param 5: ba | |||
| #define B $r8 // param 6: bb | |||
| #define C $r9 // param 7: bc | |||
| #define LDC $r10 // param 8: ldc | |||
| #define OFFSET $r11 // param 9: offset | |||
| /* Cycle control parameters */ | |||
| #define I $r13 | |||
| #define J $r14 | |||
| #define L $r15 | |||
| #define TL $r16 | |||
| /* Matrix address */ | |||
| #define A0 $r17 | |||
| #define B0 $r18 | |||
| #define C0 $r19 | |||
| #define C1 $r20 | |||
| #define C2 $r23 | |||
| #define C3 $r24 | |||
| #define T0 $r25 | |||
| #define T1 $r26 | |||
| #define T2 $r27 | |||
| #define KK $r28 | |||
| #define AA $r29 | |||
| #define CC $r30 | |||
| #define BB B0 | |||
| #undef ZERO | |||
| #define ZERO $r0 | |||
| #define U0 $xr0 | |||
| #define U1 $xr1 | |||
| #define U2 $xr2 | |||
| #define U3 $xr3 | |||
| #define U4 $xr4 | |||
| #define U5 $xr5 | |||
| #define U6 $xr6 | |||
| #define U7 $xr7 | |||
| #define U8 $xr8 | |||
| #define U9 $xr9 | |||
| #define U10 $xr10 | |||
| #define U11 $xr11 | |||
| #define U12 $xr12 | |||
| #define U13 $xr13 | |||
| #define U14 $xr14 | |||
| #define U15 $xr15 | |||
| #define D0 $xr16 | |||
| #define D1 $xr17 | |||
| #define D2 $xr18 | |||
| #define D3 $xr19 | |||
| #define D4 $xr20 | |||
| #define D5 $xr21 | |||
| #define D6 $xr22 | |||
| #define D7 $xr23 | |||
| #define D8 $xr24 | |||
| #define D9 $xr25 | |||
| #define D10 $xr26 | |||
| #define D11 $xr27 | |||
| #define D12 $xr28 | |||
| #define D13 $xr29 | |||
| #define D14 $xr30 | |||
| #define D15 $xr31 | |||
| #define G0 D0 | |||
| #define G1 D1 | |||
| #define G2 D2 | |||
| #define G3 D3 | |||
| #define G4 D4 | |||
| #define G5 D5 | |||
| #define G6 D6 | |||
| #define G7 D7 | |||
| #define G8 D8 | |||
| #define G9 D9 | |||
| #define G10 D10 | |||
| #define G11 D11 | |||
| #define G12 D12 | |||
| #define G13 D13 | |||
| #define G14 D14 | |||
| #define G15 D15 | |||
| /* Prefetch interval */ | |||
| #define A_PRE 0x400 | |||
| #define B_PRE 0x100 | |||
| #include "dtrsm_kernel_macro.S" | |||
| .macro ldrepl_macro start, end, stride | |||
| // Load Ux (x = 0...15) | |||
| .if \start <= \end | |||
| GLDREPL xv, d, $xr\start, B0, \stride * 8 | |||
| ldrepl_macro %start + 1, \end, %stride + 1 | |||
| .endif | |||
| .endm | |||
| .macro nmsub_macro start0, end0, start1, reg | |||
| // Ux -= reg * Dx | |||
| .if \start0 <= \end0 | |||
| xvfnmsub.d $xr\start0, \reg, $xr\start1, $xr\start0 | |||
| nmsub_macro %start0 + 1, \end0, %start1 + 1, \reg | |||
| .endif | |||
| .endm | |||
| .macro A_st_macro start, end, stride, N | |||
| // Store Ux(x = 0...15) | |||
| .if \start <= \end | |||
| .if \N == 4 | |||
| xvst $xr\start, A0, \stride * 0x20 | |||
| .elseif \N == 2 | |||
| vst $vr\start, A0, \stride * 0x10 | |||
| .elseif \N == 1 | |||
| fst.d $f\start, A0, \stride * 0x08 | |||
| .endif | |||
| A_st_macro %start + 1, \end, %stride + 1, \N | |||
| .endif | |||
| .endm | |||
| .macro dsolve_16x4 | |||
| // We are going to process matrix B with a size of 4x4, | |||
| // using only the upper triangular portion. The memory layout of | |||
| // matrix B is as follows: | |||
| //0 1 2 3 | |||
| // 5 6 7 | |||
| // 10 11 | |||
| // 15 | |||
| // Sequentially extract data from B in row order | |||
| ldrepl_macro 16, 19, 0 | |||
| GMUL xvf, d, U0, D0, U0, U1, D0, U1, U2, D0, U2, U3, D0, U3 | |||
| ldrepl_macro 20, 22, 5 | |||
| nmsub_macro 4, 7, 0, D1 | |||
| ldrepl_macro 23, 24, 10 | |||
| GMUL xvf, d, U4, D4, U4, U5, D4, U5, U6, D4, U6, U7, D4, U7 | |||
| ldrepl_macro 25, 25, 15 | |||
| nmsub_macro 8, 11, 0, D2 | |||
| nmsub_macro 8, 11, 4, D5 | |||
| GMUL xvf, d, U8, D7, U8, U9, D7, U9, U10, D7, U10, U11, D7, U11 | |||
| nmsub_macro 12, 15, 0, D3 | |||
| nmsub_macro 12, 15, 4, D6 | |||
| nmsub_macro 12, 15, 8, D8 | |||
| GMUL xvf, d, U12, D9, U12, U13, D9, U13, U14, D9, U14, U15, D9, U15 | |||
| // Store A | |||
| A_st_macro 0, 15, 0, 4 | |||
| // Store C | |||
| GST xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60, \ | |||
| U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60, \ | |||
| U8, C2, 0x00, U9, C2, 0x20, U10, C2, 0x40, U11, C2, 0x60, \ | |||
| U12, C3, 0x00, U13, C3, 0x20, U14, C3, 0x40, U15, C3, 0x60 | |||
| .endm | |||
| .macro dsolve_16x2 | |||
| // We are going to process matrix B with a size of 2x2, | |||
| // using only the upper triangular portion. The memory layout of | |||
| // matrix B is as follows: | |||
| //0 1 | |||
| // 3 | |||
| // Sequentially extract data from B in row order | |||
| ldrepl_macro 16, 17, 0 | |||
| GMUL xvf, d, U0, D0, U0, U1, D0, U1, U2, D0, U2, U3, D0, U3 | |||
| ldrepl_macro 18, 18, 3 | |||
| nmsub_macro 4, 7, 0, D1 | |||
| GMUL xvf, d, U4, D2, U4, U5, D2, U5, U6, D2, U6, U7, D2, U7 | |||
| // Store A | |||
| A_st_macro 0, 7, 0, 4 | |||
| // Store C | |||
| GST xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60, \ | |||
| U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60 | |||
| .endm | |||
| .macro dsolve_8x4 | |||
| // We are going to process matrix B with a size of 4x4, | |||
| // using only the upper triangular portion. The memory layout of | |||
| // matrix B is as follows: | |||
| //0 1 2 3 | |||
| // 5 6 7 | |||
| // 10 11 | |||
| // 15 | |||
| // Sequentially extract data from B in row order | |||
| ldrepl_macro 16, 19, 0 | |||
| GMUL xvf, d, U0, D0, U0, U1, D0, U1 | |||
| ldrepl_macro 20, 22, 5 | |||
| nmsub_macro 2, 3, 0, D1 | |||
| ldrepl_macro 23, 24, 10 | |||
| GMUL xvf, d, U2, D4, U2, U3, D4, U3 | |||
| ldrepl_macro 25, 25, 15 | |||
| nmsub_macro 4, 5, 0, D2 | |||
| nmsub_macro 4, 5, 2, D5 | |||
| GMUL xvf, d, U4, D7, U4, U5, D7, U5 | |||
| nmsub_macro 6, 7, 0, D3 | |||
| nmsub_macro 6, 7, 2, D6 | |||
| nmsub_macro 6, 7, 4, D8 | |||
| GMUL xvf, d, U6, D9, U6, U7, D9, U7 | |||
| // Store A | |||
| A_st_macro 0, 7, 0, 4 | |||
| // Store C | |||
| GST xv, , U0, C0, 0x00, U1, C0, 0x20, \ | |||
| U2, C1, 0x00, U3, C1, 0x20, \ | |||
| U4, C2, 0x00, U5, C2, 0x20, \ | |||
| U6, C3, 0x00, U7, C3, 0x20 | |||
| .endm | |||
| .macro dsolve_8x2 | |||
| // We are going to process matrix B with a size of 2x2, | |||
| // using only the upper triangular portion. The memory layout of | |||
| // matrix B is as follows: | |||
| //0 1 | |||
| // 3 | |||
| // Sequentially extract data from B in row order | |||
| ldrepl_macro 16, 17, 0 | |||
| GMUL xvf, d, U0, D0, U0, U1, D0, U1 | |||
| ldrepl_macro 18, 18, 3 | |||
| nmsub_macro 2, 3, 0, D1 | |||
| GMUL xvf, d, U2, D2, U2, U3, D2, U3 | |||
| // Store A | |||
| A_st_macro 0, 3, 0, 4 | |||
| // Store C | |||
| GST xv, , U0, C0, 0x00, U1, C0, 0x20, \ | |||
| U2, C1, 0x00, U3, C1, 0x20 | |||
| .endm | |||
| .macro dsolve_4x4 | |||
| // We are going to process matrix B with a size of 4x4, | |||
| // using only the upper triangular portion. The memory layout of | |||
| // matrix B is as follows: | |||
| //0 1 2 3 | |||
| // 5 6 7 | |||
| // 10 11 | |||
| // 15 | |||
| // Sequentially extract data from B in row order | |||
| ldrepl_macro 16, 19, 0 | |||
| GMUL xvf, d, U0, D0, U0 | |||
| ldrepl_macro 20, 22, 5 | |||
| nmsub_macro 1, 1, 0, D1 | |||
| ldrepl_macro 23, 24, 10 | |||
| GMUL xvf, d, U1, D4, U1 | |||
| ldrepl_macro 25, 25, 15 | |||
| nmsub_macro 2, 2, 0, D2 | |||
| nmsub_macro 2, 2, 1, D5 | |||
| GMUL xvf, d, U2, D7, U2 | |||
| nmsub_macro 3, 3, 0, D3 | |||
| nmsub_macro 3, 3, 1, D6 | |||
| nmsub_macro 3, 3, 2, D8 | |||
| GMUL xvf, d, U3, D9, U3 | |||
| // Store A | |||
| A_st_macro 0, 3, 0, 4 | |||
| // Store C | |||
| GST xv, , U0, C0, 0x00, U1, C1, 0x00, U2, C2, 0x00, U3, C3, 0x00 | |||
| .endm | |||
| .macro dsolve_4x2 | |||
| // We are going to process matrix B with a size of 2x2, | |||
| // using only the upper triangular portion. The memory layout of | |||
| // matrix B is as follows: | |||
| //0 1 | |||
| // 3 | |||
| // Sequentially extract data from B in row order | |||
| ldrepl_macro 16, 17, 0 | |||
| GMUL xvf, d, U0, D0, U0 | |||
| ldrepl_macro 18, 18, 3 | |||
| nmsub_macro 1, 1, 0, D1 | |||
| GMUL xvf, d, U1, D2, U1 | |||
| // Store A | |||
| A_st_macro 0, 1, 0, 4 | |||
| // Store C | |||
| GST xv, , U0, C0, 0x00, U1, C1, 0x00 | |||
| .endm | |||
| .macro dsolve_2x4 | |||
| // We are going to process matrix B with a size of 4x4, | |||
| // using only the upper triangular portion. The memory layout of | |||
| // matrix B is as follows: | |||
| //0 1 2 3 | |||
| // 5 6 7 | |||
| // 10 11 | |||
| // 15 | |||
| // Sequentially extract data from B in row order | |||
| ldrepl_macro 16, 19, 0 | |||
| GMUL xvf, d, U0, D0, U0 | |||
| ldrepl_macro 20, 22, 5 | |||
| nmsub_macro 1, 1, 0, D1 | |||
| ldrepl_macro 23, 24, 10 | |||
| GMUL xvf, d, U1, D4, U1 | |||
| ldrepl_macro 25, 25, 15 | |||
| nmsub_macro 2, 2, 0, D2 | |||
| nmsub_macro 2, 2, 1, D5 | |||
| GMUL xvf, d, U2, D7, U2 | |||
| nmsub_macro 3, 3, 0, D3 | |||
| nmsub_macro 3, 3, 1, D6 | |||
| nmsub_macro 3, 3, 2, D8 | |||
| GMUL xvf, d, U3, D9, U3 | |||
| // Store A | |||
| A_st_macro 0, 3, 0, 2 | |||
| // Store C | |||
| GST v, , $vr0, C0, 0x00, $vr1, C1, 0x00, $vr2, C2, 0x00, $vr3, C3, 0x00, | |||
| .endm | |||
| .macro dsolve_2x2 | |||
| // We are going to process matrix B with a size of 2x2, | |||
| // using only the upper triangular portion. The memory layout of | |||
| // matrix B is as follows: | |||
| //0 1 | |||
| // 3 | |||
| // Sequentially extract data from B in row order | |||
| ldrepl_macro 16, 17, 0 | |||
| GMUL xvf, d, U0, D0, U0 | |||
| ldrepl_macro 18, 18, 3 | |||
| nmsub_macro 1, 1, 0, D1 | |||
| GMUL xvf, d, U1, D2, U1 | |||
| // Store A | |||
| A_st_macro 0, 1, 0, 2 | |||
| // Store C | |||
| GST v, , $vr0, C0, 0x00, $vr1, C1, 0x00 | |||
| .endm | |||
| .macro dsolve_1x4 | |||
| // We are going to process matrix B with a size of 4x4, | |||
| // using only the upper triangular portion. The memory layout of | |||
| // matrix B is as follows: | |||
| //0 1 2 3 | |||
| // 5 6 7 | |||
| // 10 11 | |||
| // 15 | |||
| // Sequentially extract data from B in row order | |||
| ldrepl_macro 16, 19, 0 | |||
| GMUL xvf, d, U0, D0, U0 | |||
| ldrepl_macro 20, 22, 5 | |||
| nmsub_macro 1, 1, 0, D1 | |||
| ldrepl_macro 23, 24, 10 | |||
| GMUL xvf, d, U1, D4, U1 | |||
| ldrepl_macro 25, 25, 15 | |||
| nmsub_macro 2, 2, 0, D2 | |||
| nmsub_macro 2, 2, 1, D5 | |||
| GMUL xvf, d, U2, D7, U2 | |||
| nmsub_macro 3, 3, 0, D3 | |||
| nmsub_macro 3, 3, 1, D6 | |||
| nmsub_macro 3, 3, 2, D8 | |||
| GMUL xvf, d, U3, D9, U3 | |||
| // Store A | |||
| A_st_macro 0, 3, 0, 1 | |||
| // Store C | |||
| GST f, d, $f0, C0, 0x00, $f1, C1, 0x00, $f2, C2, 0x00, $f3, C3, 0x00, | |||
| .endm | |||
| .macro dsolve_1x2 | |||
| // We are going to process matrix B with a size of 2x2, | |||
| // using only the upper triangular portion. The memory layout of | |||
| // matrix B is as follows: | |||
| //0 1 | |||
| // 3 | |||
| // Sequentially extract data from B in row order | |||
| ldrepl_macro 16, 17, 0 | |||
| GMUL xvf, d, U0, D0, U0 | |||
| ldrepl_macro 18, 18, 3 | |||
| nmsub_macro 1, 1, 0, D1 | |||
| GMUL xvf, d, U1, D2, U1 | |||
| // Store A | |||
| A_st_macro 0, 1, 0, 1 | |||
| // Store C | |||
| GST f, d, $f0, C0, 0x00, $f1, C1, 0x00 | |||
| .endm | |||
| .macro dgemm_dsolve_16x4 | |||
| bge ZERO, L, .L_dsolve_16x4_load | |||
| dgemm_16x4 | |||
| b .L_dsolve_16x4 | |||
| .L_dsolve_16x4_load: | |||
| // Load C | |||
| GLD xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60 | |||
| GLD xv, , U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60 | |||
| GLD xv, , U8, C2, 0x00, U9, C2, 0x20, U10, C2, 0x40, U11, C2, 0x60 | |||
| GLD xv, , U12, C3, 0x00, U13, C3, 0x20, U14, C3, 0x40, U15, C3, 0x60 | |||
| /********************** solver ******************/ | |||
| .L_dsolve_16x4: | |||
| dsolve_16x4 | |||
| .endm | |||
| .macro dgemm_dsolve_8x4 | |||
| bge ZERO, L, .L_dsolve_8x4_load | |||
| dgemm_8x4 | |||
| b .L_dsolve_8x4 | |||
| .L_dsolve_8x4_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| xvld U1, C0, 0x20 | |||
| /* Load C1 */ | |||
| xvld U2, C1, 0x00 | |||
| xvld U3, C1, 0x20 | |||
| /* Load C2 */ | |||
| xvld U4, C2, 0x00 | |||
| xvld U5, C2, 0x20 | |||
| /* Load C3 */ | |||
| xvld U6, C3, 0x00 | |||
| xvld U7, C3, 0x20 | |||
| /********* solver *********/ | |||
| .L_dsolve_8x4: | |||
| dsolve_8x4 | |||
| .endm | |||
| .macro dgemm_dsolve_4x4 | |||
| bge ZERO, L, .L_dsolve_4x4_load | |||
| dgemm_4x4 | |||
| b .L_dsolve_4x4 | |||
| .L_dsolve_4x4_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| /* Load C1 */ | |||
| xvld U1, C1, 0x00 | |||
| /* Load C2 */ | |||
| xvld U2, C2, 0x00 | |||
| /* Load C3 */ | |||
| xvld U3, C3, 0x00 | |||
| /************** solver *****************/ | |||
| .L_dsolve_4x4: | |||
| dsolve_4x4 | |||
| .endm | |||
| .macro dgemm_dsolve_2x4 | |||
| bge ZERO, L, .L_dsolve_2x4_load | |||
| dgemm_2x4 | |||
| xvpermi.q U2, U0, 0x01 | |||
| xvpermi.q U3, U1, 0x01 | |||
| b .L_dsolve_2x4 | |||
| .L_dsolve_2x4_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| /* Load C1 */ | |||
| xvld U1, C1, 0x00 | |||
| /* Load C2 */ | |||
| xvld U2, C2, 0x00 | |||
| /* Load C3 */ | |||
| xvld U3, C3, 0x00 | |||
| /********************** solver ******************/ | |||
| .L_dsolve_2x4: | |||
| dsolve_2x4 | |||
| .endm | |||
| .macro dgemm_dsolve_1x4 | |||
| bge ZERO, L, .L_dsolve_1x4_load | |||
| dgemm_1x4 | |||
| xvpackod.d U1, U0, U0 | |||
| xvpermi.q U2, U0, 0x01 | |||
| xvpermi.q U3, U1, 0x01 | |||
| b .L_dsolve_1x4 | |||
| .L_dsolve_1x4_load: | |||
| // Load C | |||
| fld.d $f0, C0, 0x00 | |||
| fld.d $f1, C1, 0x00 | |||
| fld.d $f2, C2, 0x00 | |||
| fld.d $f3, C3, 0x00 | |||
| .L_dsolve_1x4: | |||
| dsolve_1x4 | |||
| .endm | |||
| .macro dgemm_dsolve_16x2 | |||
| bge ZERO, L, .L_dsolve_16x2_load | |||
| dgemm_16x2 | |||
| b .L_dsolve_16x2 | |||
| .L_dsolve_16x2_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| xvld U1, C0, 0x20 | |||
| xvld U2, C0, 0x40 | |||
| xvld U3, C0, 0x60 | |||
| /* Load C1 */ | |||
| xvld U4, C1, 0x00 | |||
| xvld U5, C1, 0x20 | |||
| xvld U6, C1, 0x40 | |||
| xvld U7, C1, 0x60 | |||
| .L_dsolve_16x2: | |||
| dsolve_16x2 | |||
| .endm | |||
| .macro dgemm_dsolve_8x2 | |||
| bge ZERO, L, .L_dsolve_8x2_load | |||
| dgemm_8x2 | |||
| b .L_dsolve_8x2 | |||
| .L_dsolve_8x2_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| xvld U1, C0, 0x20 | |||
| /* Load C1 */ | |||
| xvld U2, C1, 0x00 | |||
| xvld U3, C1, 0x20 | |||
| .L_dsolve_8x2: | |||
| dsolve_8x2 | |||
| .endm | |||
| .macro dgemm_dsolve_4x2 | |||
| bge ZERO, L, .L_dsolve_4x2_load | |||
| dgemm_4x2 | |||
| b .L_dsolve_4x2 | |||
| .L_dsolve_4x2_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| /* Load C1 */ | |||
| xvld U1, C1, 0x00 | |||
| .L_dsolve_4x2: | |||
| dsolve_4x2 | |||
| .endm | |||
| .macro dgemm_dsolve_2x2 | |||
| bge ZERO, L, .L_dsolve_2x2_load | |||
| dgemm_2x2 | |||
| b .L_dsolve_2x2 | |||
| .L_dsolve_2x2_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| /* Load C1 */ | |||
| xvld U1, C1, 0x00 | |||
| .L_dsolve_2x2: | |||
| dsolve_2x2 | |||
| .endm | |||
| .macro dgemm_dsolve_1x2 | |||
| bge ZERO, L, .L_dsolve_1x2_load | |||
| dgemm_1x2 | |||
| xvpackod.d U1, U0, U0 | |||
| b .L_dsolve_1x2 | |||
| .L_dsolve_1x2_load: | |||
| // Load C | |||
| fld.d $f0, C0, 0x00 | |||
| fld.d $f1, C1, 0x00 | |||
| .L_dsolve_1x2: | |||
| dsolve_1x2 | |||
| .endm | |||
| .macro dgemm_dsolve_16x1 | |||
| bge ZERO, L, .L_dsolve_16x1_load | |||
| dgemm_16x1 | |||
| b .L_dsolve_16x1 | |||
| .L_dsolve_16x1_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| xvld U1, C0, 0x20 | |||
| xvld U2, C0, 0x40 | |||
| xvld U3, C0, 0x60 | |||
| .L_dsolve_16x1: | |||
| ldrepl_macro 16, 16, 0 | |||
| GMUL xvf, d, U0, D0, U0, U1, D0, U1, U2, D0, U2, U3, D0, U3 | |||
| // Store A | |||
| A_st_macro 0, 3, 0, 4 | |||
| // Strore C | |||
| GST xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60 | |||
| .endm | |||
| .macro dgemm_dsolve_8x1 | |||
| bge ZERO, L, .L_dsolve_8x1_load | |||
| dgemm_8x1 | |||
| b .L_dsolve_8x1 | |||
| .L_dsolve_8x1_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| xvld U1, C0, 0x20 | |||
| .L_dsolve_8x1: | |||
| ldrepl_macro 16, 16, 0 | |||
| GMUL xvf, d, U0, D0, U0, U1, D0, U1 | |||
| // Store A | |||
| A_st_macro 0, 1, 0, 4 | |||
| // Strore C | |||
| GST xv, , U0, C0, 0x00, U1, C0, 0x20 | |||
| .endm | |||
| .macro dgemm_dsolve_4x1 | |||
| bge ZERO, L, .L_dsolve_4x1_load | |||
| dgemm_4x1 | |||
| b .L_dsolve_4x1 | |||
| .L_dsolve_4x1_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| .L_dsolve_4x1: | |||
| ldrepl_macro 16, 16, 0 | |||
| GMUL xvf, d, U0, D0, U0 | |||
| // Store A | |||
| A_st_macro 0, 0, 0, 4 | |||
| // Strore C | |||
| GST xv, , U0, C0, 0x00 | |||
| .endm | |||
| .macro dgemm_dsolve_2x1 | |||
| bge ZERO, L, .L_dsolve_2x1_load | |||
| dgemm_2x1 | |||
| b .L_dsolve_2x1 | |||
| .L_dsolve_2x1_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| .L_dsolve_2x1: | |||
| ldrepl_macro 16, 16, 0 | |||
| GMUL xvf, d, U0, D0, U0 | |||
| // Store A | |||
| A_st_macro 0, 0, 0, 2 | |||
| // Strore C | |||
| GST v, , $vr0, C0, 0x00 | |||
| .endm | |||
| .macro dgemm_dsolve_1x1 | |||
| bge ZERO, L, .L_dsolve_1x1_load | |||
| dgemm_1x1 | |||
| b .L_dsolve_1x1 | |||
| .L_dsolve_1x1_load: | |||
| // Load C | |||
| fld.d $f0, C0, 0x00 | |||
| .L_dsolve_1x1: | |||
| ldrepl_macro 16, 16, 0 | |||
| GMUL xvf, d, U0, D0, U0 | |||
| // Store A | |||
| A_st_macro 0, 0, 0, 1 | |||
| // Strore C | |||
| GST f, d, $f0, C0, 0x00 | |||
| .endm | |||
| PROLOGUE | |||
| push_if_used 26, 32 | |||
| PTR_SLLI LDC, LDC, 3 | |||
| PTR_SUB KK, ZERO, OFFSET | |||
| /* if (!(N >> 2)) goto L_N3 */ | |||
| PTR_SRAI J, N, 2 /* J = bn >> 2 */ | |||
| andi N, N, 0x03 | |||
| beq ZERO, J, .L_N3 | |||
| .align 5 | |||
| .L_J1: | |||
| PTR_ADDI J, J, -1 | |||
| move AA, A | |||
| move CC, C | |||
| PTR_SRAI I, M, 4 // M >> 4 | |||
| beqz I, .L_M15 | |||
| .align 4 | |||
| .L_I1: | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_16x4 | |||
| PTR_ADDI I, I, -1 | |||
| PTR_SLLI T0, K, 7 | |||
| PTR_ADDI CC, CC, 0x80 // cc += 16 | |||
| PTR_ADD AA, AA, T0 // aa += 16 * k | |||
| bnez I, .L_I1 | |||
| .L_M15: | |||
| andi I, M, 8 | |||
| beqz I, .L_M7 | |||
| .L_M8: | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_8x4 | |||
| PTR_SLLI T0, K, 6 | |||
| PTR_ADDI CC, CC, 0x40 // cc += 8 | |||
| PTR_ADD AA, AA, T0 // aa += 8 * k | |||
| .L_M7: | |||
| andi I, M, 4 | |||
| beqz I, .L_M3 | |||
| .L_M4: | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_4x4 | |||
| PTR_SLLI T0, K, 5 | |||
| PTR_ADDI CC, CC, 0x20 // cc += 4 | |||
| PTR_ADD AA, AA, T0 // aa += 4 * k | |||
| .L_M3: | |||
| andi I, M, 2 | |||
| beqz I, .L_M1 | |||
| .L_M2: | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_2x4 | |||
| PTR_SLLI T0, K, 4 | |||
| PTR_ADDI CC, CC, 0x10 // cc += 2 | |||
| PTR_ADD AA, AA, T0 // aa += 2 * k | |||
| .L_M1: | |||
| andi I, M, 1 | |||
| beqz I, .L_M0 | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_1x4 | |||
| PTR_SLLI T0, K, 3 | |||
| PTR_ADDI CC, CC, 0x08 // cc += 1 | |||
| PTR_ADD AA, AA, T0 // aa += 1 * k | |||
| .L_M0: | |||
| PTR_SLLI T0, K, 5 | |||
| PTR_SLLI T1, LDC, 2 | |||
| PTR_ADD B, B, T0 // b += 4 * k | |||
| PTR_ADD C, C, T1 // c += 4 * ldc | |||
| PTR_ADDI KK, KK, 4 // kk += 4 | |||
| bnez J, .L_J1 | |||
| .L_N3: | |||
| andi J, N, 2 | |||
| beq ZERO, J, .L_N1 | |||
| .L_N2: | |||
| move AA, A | |||
| move CC, C | |||
| PTR_SRAI I, M, 4 // M >> 4 | |||
| beqz I, .L_N2_M15 | |||
| .align 4 | |||
| .L_N2_I1: | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_16x2 | |||
| PTR_ADDI I, I, -1 | |||
| PTR_SLLI T0, K, 7 | |||
| PTR_ADDI CC, CC, 0x80 // cc += 16 | |||
| PTR_ADD AA, AA, T0 // aa += 16 * k | |||
| bnez I, .L_N2_I1 | |||
| .L_N2_M15: | |||
| andi I, M, 8 | |||
| beqz I, .L_N2_M7 | |||
| .L_N2_M8: | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_8x2 | |||
| PTR_SLLI T0, K, 6 | |||
| PTR_ADDI CC, CC, 0x40 // cc += 8 | |||
| PTR_ADD AA, AA, T0 // aa += 8 * k | |||
| .L_N2_M7: | |||
| andi I, M, 4 | |||
| beqz I, .L_N2_M3 | |||
| .L_N2_M4: | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_4x2 | |||
| PTR_SLLI T0, K, 5 | |||
| PTR_ADDI CC, CC, 0x20 // cc += 4 | |||
| PTR_ADD AA, AA, T0 // aa += 4 * k | |||
| .L_N2_M3: | |||
| andi I, M, 2 | |||
| beqz I, .L_N2_M1 | |||
| .L_N2_M2: | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_2x2 | |||
| PTR_SLLI T0, K, 4 | |||
| PTR_ADDI CC, CC, 0x10 // cc += 2 | |||
| PTR_ADD AA, AA, T0 // aa += 2 * k | |||
| .L_N2_M1: | |||
| andi I, M, 1 | |||
| beqz I, .L_N2_M0 | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_1x2 | |||
| PTR_SLLI T0, K, 3 | |||
| PTR_ADDI CC, CC, 0x08 // cc += 1 | |||
| PTR_ADD AA, AA, T0 // aa += 1 * k | |||
| .L_N2_M0: | |||
| PTR_SLLI T0, K, 4 | |||
| PTR_SLLI T1, LDC, 1 | |||
| PTR_ADD B, B, T0 // b += 2 * k | |||
| PTR_ADD C, C, T1 // c += 2 * ldc | |||
| PTR_ADDI KK, KK, 2 // kk += 2 | |||
| .L_N1: | |||
| andi J, N, 1 | |||
| beq ZERO, J, .L_N0 | |||
| move AA, A | |||
| move CC, C | |||
| PTR_SRAI I, M, 4 // M >> 4 | |||
| beqz I, .L_N1_M15 | |||
| .align 4 | |||
| .L_N1_I1: | |||
| GADD , d, C0, CC, ZERO | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_16x1 | |||
| PTR_ADDI I, I, -1 | |||
| PTR_SLLI T0, K, 7 | |||
| PTR_ADDI CC, CC, 0x80 // cc += 16 | |||
| PTR_ADD AA, AA, T0 // aa += 16 * k | |||
| bnez I, .L_N1_I1 | |||
| .L_N1_M15: | |||
| andi I, M, 8 | |||
| beqz I, .L_N1_M7 | |||
| .L_N1_M8: | |||
| GADD , d, C0, CC, ZERO | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_8x1 | |||
| PTR_SLLI T0, K, 6 | |||
| PTR_ADDI CC, CC, 0x40 // cc += 8 | |||
| PTR_ADD AA, AA, T0 // aa += 8 * k | |||
| .L_N1_M7: | |||
| andi I, M, 4 | |||
| beqz I, .L_N1_M3 | |||
| .L_N1_M4: | |||
| GADD , d, C0, CC, ZERO | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_4x1 | |||
| PTR_SLLI T0, K, 5 | |||
| PTR_ADDI CC, CC, 0x20 // cc += 4 | |||
| PTR_ADD AA, AA, T0 // aa += 4 * k | |||
| .L_N1_M3: | |||
| andi I, M, 2 | |||
| beqz I, .L_N1_M1 | |||
| .L_N1_M2: | |||
| GADD , d, C0, CC, ZERO | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_2x1 | |||
| PTR_SLLI T0, K, 4 | |||
| PTR_ADDI CC, CC, 0x10 // cc += 2 | |||
| PTR_ADD AA, AA, T0 // aa += 2 * k | |||
| .L_N1_M1: | |||
| andi I, M, 1 | |||
| beqz I, .L_N1_M0 | |||
| GADD , d, C0, CC, ZERO | |||
| move A0, AA | |||
| move B0, B | |||
| move L, KK | |||
| dgemm_dsolve_1x1 | |||
| PTR_SLLI T0, K, 3 | |||
| PTR_ADDI CC, CC, 0x08 // cc += 1 | |||
| PTR_ADD AA, AA, T0 // aa += 1 * k | |||
| .L_N1_M0: | |||
| .L_N0: | |||
| pop_if_used 26, 32 | |||
| jirl $r0, $r1, 0x0 | |||
| EPILOGUE | |||
| @@ -0,0 +1,953 @@ | |||
| /******************************************************************************* | |||
| Copyright (c) 2023, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *******************************************************************************/ | |||
| #define ASSEMBLER | |||
| #include "common.h" | |||
| #include "loongarch64_asm.S" | |||
| /********************************************************************* | |||
| * 2023/09/26 guxiwei | |||
| * UTEST : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * | |||
| * | |||
| *********************************************************************/ | |||
| /* int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT dummy1, FLOAT *a, FLOAT *b, | |||
| * FLOAT *c, BLASLONG ldc, BLASLONG offset) | |||
| */ | |||
| #define M $r4 // param 1: bm | |||
| #define N $r5 // param 2: bn | |||
| #define K $r6 // param 3: bk | |||
| #define A $r7 // param 5: ba | |||
| #define B $r8 // param 6: bb | |||
| #define C $r9 // param 7: bc | |||
| #define LDC $r10 // param 8: ldc | |||
| #define OFFSET $r11 // param 9: offset | |||
| /* Cycle control parameters */ | |||
| #define I $r13 | |||
| #define J $r14 | |||
| #define L $r15 | |||
| #define TL $r16 | |||
| /* Matrix address */ | |||
| #define A0 $r17 | |||
| #define B0 $r18 | |||
| #define C0 $r19 | |||
| #define C1 $r20 | |||
| #define C2 $r23 | |||
| #define C3 $r24 | |||
| #define T0 $r25 | |||
| #define T1 $r26 | |||
| #define T2 $r27 | |||
| #define KK $r28 | |||
| #define AA $r29 | |||
| #define CC $r30 | |||
| #define BB $r31 | |||
| #undef ZERO | |||
| #define ZERO $r0 | |||
| #define U0 $xr0 | |||
| #define U1 $xr1 | |||
| #define U2 $xr2 | |||
| #define U3 $xr3 | |||
| #define U4 $xr4 | |||
| #define U5 $xr5 | |||
| #define U6 $xr6 | |||
| #define U7 $xr7 | |||
| #define U8 $xr8 | |||
| #define U9 $xr9 | |||
| #define U10 $xr10 | |||
| #define U11 $xr11 | |||
| #define U12 $xr12 | |||
| #define U13 $xr13 | |||
| #define U14 $xr14 | |||
| #define U15 $xr15 | |||
| #define D0 $xr16 | |||
| #define D1 $xr17 | |||
| #define D2 $xr18 | |||
| #define D3 $xr19 | |||
| #define D4 $xr20 | |||
| #define D5 $xr21 | |||
| #define D6 $xr22 | |||
| #define D7 $xr23 | |||
| #define D8 $xr24 | |||
| #define D9 $xr25 | |||
| #define D10 $xr26 | |||
| #define D11 $xr27 | |||
| #define D12 $xr28 | |||
| #define D13 $xr29 | |||
| #define D14 $xr30 | |||
| #define D15 $xr31 | |||
| /* Prefetch interval */ | |||
| #define A_PRE 0x400 | |||
| #define B_PRE 0x100 | |||
| #include "dtrsm_kernel_macro.S" | |||
| .macro ldrepl_macro start, end, stride | |||
| // Load Ux (x = 0...15) | |||
| .if \start <= \end | |||
| GLDREPL xv, d, $xr\start, B0, \stride * 8 | |||
| ldrepl_macro %start + 1, \end, %stride + 1 | |||
| .endif | |||
| .endm | |||
| .macro nmsub_macro start0, end0, start1, reg | |||
| // Ux -= reg * Dx | |||
| .if \start0 <= \end0 | |||
| xvfnmsub.d $xr\start0, \reg, $xr\start1, $xr\start0 | |||
| nmsub_macro %start0 + 1, \end0, %start1 + 1, \reg | |||
| .endif | |||
| .endm | |||
| .macro A_st_macro start, end, stride, N | |||
| // Store Ux(x = 0...15) | |||
| .if \start <= \end | |||
| .if \N == 4 | |||
| xvst $xr\start, A0, \stride * 0x20 | |||
| .elseif \N == 2 | |||
| vst $vr\start, A0, \stride * 0x10 | |||
| .elseif \N == 1 | |||
| fst.d $f\start, A0, \stride * 0x08 | |||
| .endif | |||
| A_st_macro %start + 1, \end, %stride + 1, \N | |||
| .endif | |||
| .endm | |||
| .macro dsolve_16x2 | |||
| // We are going to process matrix B with a size of 2x2, | |||
| // using only the upper triangular portion. The memory layout of | |||
| // matrix B is as follows: | |||
| //0 | |||
| //2 3 | |||
| // Sequentially extract data from B in row order | |||
| ldrepl_macro 16, 16, 0 | |||
| ldrepl_macro 17, 18, 2 | |||
| GMUL xvf, d, U4, D2, U4, U5, D2, U5, U6, D2, U6, U7, D2, U7 | |||
| nmsub_macro 0, 3, 4, D1 | |||
| GMUL xvf, d, U0, D0, U0, U1, D0, U1, U2, D0, U2, U3, D0, U3 | |||
| // Store A | |||
| A_st_macro 0, 7, 0, 4 | |||
| // Store C | |||
| GST xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60, \ | |||
| U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60 | |||
| .endm | |||
| .macro dsolve_8x2 | |||
| // We are going to process matrix B with a size of 2x2, | |||
| // using only the upper triangular portion. The memory layout of | |||
| // matrix B is as follows: | |||
| //0 | |||
| //2 3 | |||
| // Sequentially extract data from B in row order | |||
| ldrepl_macro 16, 16, 0 | |||
| ldrepl_macro 17, 18, 2 | |||
| GMUL xvf, d, U2, D2, U2, U3, D2, U3 | |||
| nmsub_macro 0, 1, 2, D1 | |||
| GMUL xvf, d, U0, D0, U0, U1, D0, U1 | |||
| // Store A | |||
| A_st_macro 0, 3, 0, 4 | |||
| // Store C | |||
| GST xv, , U0, C0, 0x00, U1, C0, 0x20, \ | |||
| U2, C1, 0x00, U3, C1, 0x20 | |||
| .endm | |||
| .macro dsolve_4x2 | |||
| // We are going to process matrix B with a size of 2x2, | |||
| // using only the upper triangular portion. The memory layout of | |||
| // matrix B is as follows: | |||
| //0 | |||
| //2 3 | |||
| // Sequentially extract data from B in row order | |||
| ldrepl_macro 16, 16, 0 | |||
| ldrepl_macro 17, 18, 2 | |||
| GMUL xvf, d, U1, D2, U1 | |||
| nmsub_macro 0, 0, 1, D1 | |||
| GMUL xvf, d, U0, D0, U0 | |||
| // Store A | |||
| A_st_macro 0, 1, 0, 4 | |||
| // Store C | |||
| GST xv, , U0, C0, 0x00, U1, C1, 0x00 | |||
| .endm | |||
| .macro dsolve_2x2 | |||
| // We are going to process matrix B with a size of 2x2, | |||
| // using only the upper triangular portion. The memory layout of | |||
| // matrix B is as follows: | |||
| //0 | |||
| //2 3 | |||
| // Sequentially extract data from B in row order | |||
| ldrepl_macro 16, 16, 0 | |||
| ldrepl_macro 17, 18, 2 | |||
| GMUL xvf, d, U1, D2, U1 | |||
| nmsub_macro 0, 0, 1, D1 | |||
| GMUL xvf, d, U0, D0, U0 | |||
| // Store A | |||
| A_st_macro 0, 1, 0, 2 | |||
| // Store C | |||
| GST v, , $vr0, C0, 0x00, $vr1, C1, 0x00 | |||
| .endm | |||
| .macro dsolve_1x2 | |||
| // We are going to process matrix B with a size of 2x2, | |||
| // using only the upper triangular portion. The memory layout of | |||
| // matrix B is as follows: | |||
| //0 | |||
| //2 3 | |||
| // Sequentially extract data from B in row order | |||
| ldrepl_macro 16, 16, 0 | |||
| ldrepl_macro 17, 18, 2 | |||
| GMUL xvf, d, U1, D2, U1 | |||
| nmsub_macro 0, 0, 1, D1 | |||
| GMUL xvf, d, U0, D0, U0 | |||
| // Store A | |||
| A_st_macro 0, 1, 0, 1 | |||
| // Store C | |||
| GST f, d, $f0, C0, 0x00, $f1, C1, 0x00 | |||
| .endm | |||
| .macro dsolve_16x4 | |||
| // We are going to process matrix B with a size of 4x4, | |||
| // using only the upper triangular portion. The memory layout of | |||
| // matrix B is as follows: | |||
| //0 | |||
| //4 5 | |||
| //8 9 10 | |||
| //12 13 14 15 | |||
| // Sequentially extract data from B in row order | |||
| ldrepl_macro 22, 25, 12 | |||
| GMUL xvf, d, U12, D9, U12, U13, D9, U13, U14, D9, U14, U15, D9, U15 | |||
| ldrepl_macro 19, 21, 8 | |||
| nmsub_macro 8, 11, 12, D8 | |||
| ldrepl_macro 17, 18, 4 | |||
| GMUL xvf, d, U8, D5, U8, U9, D5, U9, U10, D5, U10, U11, D5, U11 | |||
| ldrepl_macro 16, 16, 0 | |||
| nmsub_macro 4, 7, 12, D7 | |||
| nmsub_macro 4, 7, 8, D4 | |||
| GMUL xvf, d, U4, D2, U4, U5, D2, U5, U6, D2, U6, U7, D2, U7 | |||
| nmsub_macro 0, 3, 12, D6 | |||
| nmsub_macro 0, 3, 8, D3 | |||
| nmsub_macro 0, 3, 4, D1 | |||
| GMUL xvf, d, U0, D0, U0, U1, D0, U1, U2, D0, U2, U3, D0, U3 | |||
| // Store A | |||
| A_st_macro 0, 15, 0, 4 | |||
| // Store C | |||
| GST xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60, \ | |||
| U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60, \ | |||
| U8, C2, 0x00, U9, C2, 0x20, U10, C2, 0x40, U11, C2, 0x60, \ | |||
| U12, C3, 0x00, U13, C3, 0x20, U14, C3, 0x40, U15, C3, 0x60 | |||
| .endm | |||
| .macro dsolve_8x4 | |||
| // We are going to process matrix B with a size of 4x4, | |||
| // using only the upper triangular portion. The memory layout of | |||
| // matrix B is as follows: | |||
| //0 | |||
| //4 5 | |||
| //8 9 10 | |||
| //12 13 14 15 | |||
| // Sequentially extract data from B in row order | |||
| ldrepl_macro 22, 25, 12 | |||
| GMUL xvf, d, U6, D9, U6, U7, D9, U7 | |||
| ldrepl_macro 19, 21, 8 | |||
| nmsub_macro 4, 5, 6, D8 | |||
| ldrepl_macro 17, 18, 4 | |||
| GMUL xvf, d, U4, D5, U4, U5, D5, U5 | |||
| ldrepl_macro 16, 16, 0 | |||
| nmsub_macro 2, 3, 6, D7 | |||
| nmsub_macro 2, 3, 4, D4 | |||
| GMUL xvf, d, U2, D2, U2, U3, D2, U3 | |||
| nmsub_macro 0, 1, 6, D6 | |||
| nmsub_macro 0, 1, 4, D3 | |||
| nmsub_macro 0, 1, 2, D1 | |||
| GMUL xvf, d, U0, D0, U0, U1, D0, U1 | |||
| // Store A | |||
| A_st_macro 0, 7, 0, 4 | |||
| // Store C | |||
| GST xv, , U0, C0, 0x00, U1, C0, 0x20, \ | |||
| U2, C1, 0x00, U3, C1, 0x20, \ | |||
| U4, C2, 0x00, U5, C2, 0x20, \ | |||
| U6, C3, 0x00, U7, C3, 0x20 | |||
| .endm | |||
| .macro dsolve_4x4 | |||
| // We are going to process matrix B with a size of 4x4, | |||
| // using only the upper triangular portion. The memory layout of | |||
| // matrix B is as follows: | |||
| //0 | |||
| //4 5 | |||
| //8 9 10 | |||
| //12 13 14 15 | |||
| // Sequentially extract data from B in row order | |||
| ldrepl_macro 22, 25, 12 | |||
| GMUL xvf, d, U3, D9, U3 | |||
| ldrepl_macro 19, 21, 8 | |||
| nmsub_macro 2, 2, 3, D8 | |||
| ldrepl_macro 17, 18, 4 | |||
| GMUL xvf, d, U2, D5, U2 | |||
| ldrepl_macro 16, 16, 0 | |||
| nmsub_macro 1, 1, 3, D7 | |||
| nmsub_macro 1, 1, 2, D4 | |||
| GMUL xvf, d, U1, D2, U1 | |||
| nmsub_macro 0, 0, 3, D6 | |||
| nmsub_macro 0, 0, 2, D3 | |||
| nmsub_macro 0, 0, 1, D1 | |||
| GMUL xvf, d, U0, D0, U0 | |||
| // Store A | |||
| A_st_macro 0, 3, 0, 4 | |||
| // Store C | |||
| GST xv, , U0, C0, 0x00, U1, C1, 0x00, U2, C2, 0x00, U3, C3, 0x00 | |||
| .endm | |||
| .macro dsolve_2x4 | |||
| // We are going to process matrix B with a size of 4x4, | |||
| // using only the upper triangular portion. The memory layout of | |||
| // matrix B is as follows: | |||
| //0 | |||
| //4 5 | |||
| //8 9 10 | |||
| //12 13 14 15 | |||
| // Sequentially extract data from B in row order | |||
| ldrepl_macro 22, 25, 12 | |||
| GMUL xvf, d, U3, D9, U3 | |||
| ldrepl_macro 19, 21, 8 | |||
| nmsub_macro 2, 2, 3, D8 | |||
| ldrepl_macro 17, 18, 4 | |||
| GMUL xvf, d, U2, D5, U2 | |||
| ldrepl_macro 16, 16, 0 | |||
| nmsub_macro 1, 1, 3, D7 | |||
| nmsub_macro 1, 1, 2, D4 | |||
| GMUL xvf, d, U1, D2, U1 | |||
| nmsub_macro 0, 0, 3, D6 | |||
| nmsub_macro 0, 0, 2, D3 | |||
| nmsub_macro 0, 0, 1, D1 | |||
| GMUL xvf, d, U0, D0, U0 | |||
| // Store A | |||
| A_st_macro 0, 3, 0, 2 | |||
| // Store C | |||
| GST v, , $vr0, C0, 0x00, $vr1, C1, 0x00, $vr2, C2, 0x00, $vr3, C3, 0x00 | |||
| .endm | |||
| .macro dsolve_1x4 | |||
| // We are going to process matrix B with a size of 4x4, | |||
| // using only the upper triangular portion. The memory layout of | |||
| // matrix B is as follows: | |||
| //0 | |||
| //4 5 | |||
| //8 9 10 | |||
| //12 13 14 15 | |||
| // Sequentially extract data from B in row order | |||
| ldrepl_macro 22, 25, 12 | |||
| GMUL xvf, d, U3, D9, U3 | |||
| ldrepl_macro 19, 21, 8 | |||
| nmsub_macro 2, 2, 3, D8 | |||
| ldrepl_macro 17, 18, 4 | |||
| GMUL xvf, d, U2, D5, U2 | |||
| ldrepl_macro 16, 16, 0 | |||
| nmsub_macro 1, 1, 3, D7 | |||
| nmsub_macro 1, 1, 2, D4 | |||
| GMUL xvf, d, U1, D2, U1 | |||
| nmsub_macro 0, 0, 3, D6 | |||
| nmsub_macro 0, 0, 2, D3 | |||
| nmsub_macro 0, 0, 1, D1 | |||
| GMUL xvf, d, U0, D0, U0 | |||
| // Store A | |||
| A_st_macro 0, 3, 0, 1 | |||
| // Store C | |||
| GST f, d, $f0, C0, 0x00, $f1, C1, 0x00, $f2, C2, 0x00, $f3, C3, 0x00, | |||
| .endm | |||
| .macro dgemm_dsolve_16x1 | |||
| or T1, A0, A0 | |||
| or T2, B0, B0 | |||
| bge ZERO, L, .L_dsolve_16x1_load | |||
| dgemm_16x1 | |||
| b .L_dsolve_16x1 | |||
| .L_dsolve_16x1_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| xvld U1, C0, 0x20 | |||
| xvld U2, C0, 0x40 | |||
| xvld U3, C0, 0x60 | |||
| .L_dsolve_16x1: | |||
| PTR_ADDI A0, T1, -16 * 8 | |||
| PTR_ADDI B0, T2, -1 * 8 | |||
| ldrepl_macro 16, 16, 0 | |||
| GMUL xvf, d, U0, D0, U0, U1, D0, U1, U2, D0, U2, U3, D0, U3 | |||
| // Store A | |||
| A_st_macro 0, 3, 0, 4 | |||
| // Strore C | |||
| GST xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60 | |||
| .endm | |||
| .macro dgemm_dsolve_8x1 | |||
| or T1, A0, A0 | |||
| or T2, B0, B0 | |||
| bge ZERO, L, .L_dsolve_8x1_load | |||
| dgemm_8x1 | |||
| b .L_dsolve_8x1 | |||
| .L_dsolve_8x1_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| xvld U1, C0, 0x20 | |||
| .L_dsolve_8x1: | |||
| PTR_ADDI A0, T1, -8 * 8 | |||
| PTR_ADDI B0, T2, -1 * 8 | |||
| ldrepl_macro 16, 16, 0 | |||
| GMUL xvf, d, U0, D0, U0, U1, D0, U1 | |||
| // Store A | |||
| A_st_macro 0, 1, 0, 4 | |||
| // Strore C | |||
| GST xv, , U0, C0, 0x00, U1, C0, 0x20 | |||
| .endm | |||
| .macro dgemm_dsolve_4x1 | |||
| or T1, A0, A0 | |||
| or T2, B0, B0 | |||
| bge ZERO, L, .L_dsolve_4x1_load | |||
| dgemm_4x1 | |||
| b .L_dsolve_4x1 | |||
| .L_dsolve_4x1_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| .L_dsolve_4x1: | |||
| PTR_ADDI A0, T1, -4 * 8 | |||
| PTR_ADDI B0, T2, -1 * 8 | |||
| ldrepl_macro 16, 16, 0 | |||
| GMUL xvf, d, U0, D0, U0 | |||
| // Store A | |||
| A_st_macro 0, 0, 0, 4 | |||
| // Strore C | |||
| GST xv, , U0, C0, 0x00 | |||
| .endm | |||
| .macro dgemm_dsolve_2x1 | |||
| or T1, A0, A0 | |||
| or T2, B0, B0 | |||
| bge ZERO, L, .L_dsolve_2x1_load | |||
| dgemm_2x1 | |||
| b .L_dsolve_2x1 | |||
| .L_dsolve_2x1_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| .L_dsolve_2x1: | |||
| PTR_ADDI A0, T1, -2 * 8 | |||
| PTR_ADDI B0, T2, -1 * 8 | |||
| ldrepl_macro 16, 16, 0 | |||
| GMUL xvf, d, U0, D0, U0 | |||
| // Store A | |||
| A_st_macro 0, 0, 0, 2 | |||
| // Strore C | |||
| GST v, , $vr0, C0, 0x00 | |||
| .endm | |||
| .macro dgemm_dsolve_1x1 | |||
| or T1, A0, A0 | |||
| or T2, B0, B0 | |||
| bge ZERO, L, .L_dsolve_1x1_load | |||
| dgemm_1x1 | |||
| b .L_dsolve_1x1 | |||
| .L_dsolve_1x1_load: | |||
| // Load C | |||
| fld.d $f0, C0, 0x00 | |||
| .L_dsolve_1x1: | |||
| PTR_ADDI A0, T1, -1 * 8 | |||
| PTR_ADDI B0, T2, -1 * 8 | |||
| ldrepl_macro 16, 16, 0 | |||
| GMUL xvf, d, U0, D0, U0 | |||
| // Store A | |||
| A_st_macro 0, 0, 0, 1 | |||
| // Strore C | |||
| GST f, d, $f0, C0, 0x00 | |||
| .endm | |||
| .macro dgemm_dsolve_16x2 | |||
| or T1, A0, A0 | |||
| or T2, B0, B0 | |||
| bge ZERO, L, .L_dsolve_16x2_load | |||
| dgemm_16x2 | |||
| b .L_dsolve_16x2 | |||
| .L_dsolve_16x2_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| xvld U1, C0, 0x20 | |||
| xvld U2, C0, 0x40 | |||
| xvld U3, C0, 0x60 | |||
| /* Load C1 */ | |||
| xvld U4, C1, 0x00 | |||
| xvld U5, C1, 0x20 | |||
| xvld U6, C1, 0x40 | |||
| xvld U7, C1, 0x60 | |||
| .L_dsolve_16x2: | |||
| PTR_ADDI A0, T1, -(16 * 2) * 8 | |||
| PTR_ADDI B0, T2, -(2 * 2) * 8 | |||
| dsolve_16x2 | |||
| .endm | |||
| .macro dgemm_dsolve_8x2 | |||
| or T1, A0, A0 | |||
| or T2, B0, B0 | |||
| bge ZERO, L, .L_dsolve_8x2_load | |||
| dgemm_8x2 | |||
| b .L_dsolve_8x2 | |||
| .L_dsolve_8x2_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| xvld U1, C0, 0x20 | |||
| /* Load C1 */ | |||
| xvld U2, C1, 0x00 | |||
| xvld U3, C1, 0x20 | |||
| .L_dsolve_8x2: | |||
| PTR_ADDI A0, T1, -(8 * 2) * 8 | |||
| PTR_ADDI B0, T2, -(2 * 2) * 8 | |||
| dsolve_8x2 | |||
| .endm | |||
| .macro dgemm_dsolve_4x2 | |||
| or T1, A0, A0 | |||
| or T2, B0, B0 | |||
| bge ZERO, L, .L_dsolve_4x2_load | |||
| dgemm_4x2 | |||
| b .L_dsolve_4x2 | |||
| .L_dsolve_4x2_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| /* Load C1 */ | |||
| xvld U1, C1, 0x00 | |||
| .L_dsolve_4x2: | |||
| PTR_ADDI A0, T1, -(4 * 2) * 8 | |||
| PTR_ADDI B0, T2, -(2 * 2) * 8 | |||
| dsolve_4x2 | |||
| .endm | |||
| .macro dgemm_dsolve_2x2 | |||
| or T1, A0, A0 | |||
| or T2, B0, B0 | |||
| bge ZERO, L, .L_dsolve_2x2_load | |||
| dgemm_2x2 | |||
| b .L_dsolve_2x2 | |||
| .L_dsolve_2x2_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| /* Load C1 */ | |||
| xvld U1, C1, 0x00 | |||
| .L_dsolve_2x2: | |||
| PTR_ADDI A0, T1, -(2 * 2) * 8 | |||
| PTR_ADDI B0, T2, -(2 * 2) * 8 | |||
| dsolve_2x2 | |||
| .endm | |||
| .macro dgemm_dsolve_1x2 | |||
| or T1, A0, A0 | |||
| or T2, B0, B0 | |||
| bge ZERO, L, .L_dsolve_1x2_load | |||
| dgemm_1x2 | |||
| xvpackod.d U1, U0, U0 | |||
| b .L_dsolve_1x2 | |||
| .L_dsolve_1x2_load: | |||
| // Load C | |||
| fld.d $f0, C0, 0x00 | |||
| fld.d $f1, C1, 0x00 | |||
| .L_dsolve_1x2: | |||
| PTR_ADDI A0, T1, -(1 * 2) * 8 | |||
| PTR_ADDI B0, T2, -(2 * 2) * 8 | |||
| dsolve_1x2 | |||
| .endm | |||
| .macro dgemm_dsolve_16x4 | |||
| or T1, A0, A0 | |||
| or T2, B0, B0 | |||
| bge ZERO, L, .L_dsolve_16x4_load | |||
| dgemm_16x4 | |||
| b .L_dsolve_16x4 | |||
| .L_dsolve_16x4_load: | |||
| // Load C | |||
| GLD xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60 | |||
| GLD xv, , U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60 | |||
| GLD xv, , U8, C2, 0x00, U9, C2, 0x20, U10, C2, 0x40, U11, C2, 0x60 | |||
| GLD xv, , U12, C3, 0x00, U13, C3, 0x20, U14, C3, 0x40, U15, C3, 0x60 | |||
| /********************** solver ******************/ | |||
| .L_dsolve_16x4: | |||
| PTR_ADDI A0, T1, -(16 * 4) * 8 | |||
| PTR_ADDI B0, T2, -(4 * 4) * 8 | |||
| dsolve_16x4 | |||
| .endm | |||
| .macro dgemm_dsolve_8x4 | |||
| or T1, A0, A0 | |||
| or T2, B0, B0 | |||
| bge ZERO, L, .L_dsolve_8x4_load | |||
| dgemm_8x4 | |||
| b .L_dsolve_8x4 | |||
| .L_dsolve_8x4_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| xvld U1, C0, 0x20 | |||
| /* Load C1 */ | |||
| xvld U2, C1, 0x00 | |||
| xvld U3, C1, 0x20 | |||
| /* Load C2 */ | |||
| xvld U4, C2, 0x00 | |||
| xvld U5, C2, 0x20 | |||
| /* Load C3 */ | |||
| xvld U6, C3, 0x00 | |||
| xvld U7, C3, 0x20 | |||
| /********* solver *********/ | |||
| .L_dsolve_8x4: | |||
| PTR_ADDI A0, T1, -(8 * 4) * 8 | |||
| PTR_ADDI B0, T2, -(4 * 4) * 8 | |||
| dsolve_8x4 | |||
| .endm | |||
| .macro dgemm_dsolve_4x4 | |||
| or T1, A0, A0 | |||
| or T2, B0, B0 | |||
| bge ZERO, L, .L_dsolve_4x4_load | |||
| dgemm_4x4 | |||
| b .L_dsolve_4x4 | |||
| .L_dsolve_4x4_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| /* Load C1 */ | |||
| xvld U1, C1, 0x00 | |||
| /* Load C2 */ | |||
| xvld U2, C2, 0x00 | |||
| /* Load C3 */ | |||
| xvld U3, C3, 0x00 | |||
| /************** solver *****************/ | |||
| .L_dsolve_4x4: | |||
| PTR_ADDI A0, T1, -(4 * 4) * 8 | |||
| PTR_ADDI B0, T2, -(4 * 4) * 8 | |||
| dsolve_4x4 | |||
| .endm | |||
| .macro dgemm_dsolve_2x4 | |||
| or T1, A0, A0 | |||
| or T2, B0, B0 | |||
| bge ZERO, L, .L_dsolve_2x4_load | |||
| dgemm_2x4 | |||
| xvpermi.q U2, U0, 0x01 | |||
| xvpermi.q U3, U1, 0x01 | |||
| b .L_dsolve_2x4 | |||
| .L_dsolve_2x4_load: | |||
| /* Load C0 */ | |||
| xvld U0, C0, 0x00 | |||
| /* Load C1 */ | |||
| xvld U1, C1, 0x00 | |||
| /* Load C2 */ | |||
| xvld U2, C2, 0x00 | |||
| /* Load C3 */ | |||
| xvld U3, C3, 0x00 | |||
| /********************** solver ******************/ | |||
| .L_dsolve_2x4: | |||
| PTR_ADDI A0, T1, -(2 * 4) * 8 | |||
| PTR_ADDI B0, T2, -(4 * 4) * 8 | |||
| dsolve_2x4 | |||
| .endm | |||
| .macro dgemm_dsolve_1x4 | |||
| or T1, A0, A0 | |||
| or T2, B0, B0 | |||
| bge ZERO, L, .L_dsolve_1x4_load | |||
| dgemm_1x4 | |||
| xvpackod.d U1, U0, U0 | |||
| xvpermi.q U2, U0, 0x01 | |||
| xvpermi.q U3, U1, 0x01 | |||
| b .L_dsolve_1x4 | |||
| .L_dsolve_1x4_load: | |||
| // Load C | |||
| fld.d $f0, C0, 0x00 | |||
| fld.d $f1, C1, 0x00 | |||
| fld.d $f2, C2, 0x00 | |||
| fld.d $f3, C3, 0x00 | |||
| .L_dsolve_1x4: | |||
| PTR_ADDI A0, T1, -(1 * 4) * 8 | |||
| PTR_ADDI B0, T2, -(4 * 4) * 8 | |||
| dsolve_1x4 | |||
| .endm | |||
| PROLOGUE | |||
| push_if_used 26, 32 | |||
| PTR_SLLI LDC, LDC, 3 | |||
| PTR_SUB KK, N, OFFSET | |||
| PTR_MUL T0, N, LDC | |||
| PTR_MUL T1, N, K | |||
| PTR_ADD C, C, T0 // c += n * ldc | |||
| PTR_SLLI T1, T1, 3 | |||
| PTR_ADD B, B, T1 | |||
| andi J, N, 1 | |||
| beqz J, .L_N2 | |||
| .L_N1: | |||
| move AA, A | |||
| PTR_SUB C, C, LDC // c -= ldc | |||
| PTR_SLLI T0, K, 3 | |||
| PTR_SLLI T1, KK, 3 | |||
| PTR_SUB B, B, T0 // b -= k | |||
| PTR_ADD BB, B, T1 // bb = b + kk | |||
| move CC, C | |||
| PTR_SRAI I, M, 4 // M >> 4 | |||
| beqz I, .L_N1_M15 | |||
| .align 4 | |||
| .L_N1_I1: | |||
| PTR_SLLI T1, KK, 7 | |||
| GADD , d, C0, CC, ZERO | |||
| PTR_ADD A0, AA, T1 // a0 = aa + 16 * kk | |||
| move B0, BB | |||
| PTR_SUB L, K, KK // L = K - KK | |||
| dgemm_dsolve_16x1 | |||
| PTR_ADDI I, I, -1 | |||
| PTR_SLLI T0, K, 7 | |||
| PTR_ADDI CC, CC, 0x80 // cc += 16 | |||
| PTR_ADD AA, AA, T0 // aa += 16 * k | |||
| bnez I, .L_N1_I1 | |||
| .L_N1_M15: | |||
| andi I, M, 8 | |||
| beqz I, .L_N1_M7 | |||
| .L_N1_M8: | |||
| PTR_SLLI T1, KK, 6 | |||
| GADD , d, C0, CC, ZERO | |||
| PTR_ADD A0, AA, T1 // a0 = aa + 8 * kk | |||
| move B0, BB | |||
| PTR_SUB L, K, KK // L = K - KK | |||
| dgemm_dsolve_8x1 | |||
| PTR_SLLI T0, K, 6 | |||
| PTR_ADDI CC, CC, 0x40 // cc += 8 | |||
| PTR_ADD AA, AA, T0 // aa += 8 * k | |||
| .L_N1_M7: | |||
| andi I, M, 4 | |||
| beqz I, .L_N1_M3 | |||
| .L_N1_M4: | |||
| PTR_SLLI T1, KK, 5 | |||
| GADD , d, C0, CC, ZERO | |||
| PTR_ADD A0, AA, T1 // a0 = aa + 4 * kk | |||
| move B0, BB | |||
| PTR_SUB L, K, KK // L = K - KK | |||
| dgemm_dsolve_4x1 | |||
| PTR_SLLI T0, K, 5 | |||
| PTR_ADDI CC, CC, 0x20 // cc += 4 | |||
| PTR_ADD AA, AA, T0 // aa += 4 * k | |||
| .L_N1_M3: | |||
| andi I, M, 2 | |||
| beqz I, .L_N1_M1 | |||
| .L_N1_M2: | |||
| PTR_SLLI T1, KK, 4 | |||
| GADD , d, C0, CC, ZERO | |||
| PTR_ADD A0, AA, T1 // a0 = aa + 2 * kk | |||
| move B0, BB | |||
| PTR_SUB L, K, KK // L = K - KK | |||
| dgemm_dsolve_2x1 | |||
| PTR_SLLI T0, K, 4 | |||
| PTR_ADDI CC, CC, 0x10 // cc += 2 | |||
| PTR_ADD AA, AA, T0 // aa += 2 * k | |||
| .L_N1_M1: | |||
| andi I, M, 1 | |||
| beqz I, .L_N1_M0 | |||
| PTR_SLLI T1, KK, 3 | |||
| GADD , d, C0, CC, ZERO | |||
| PTR_ADD A0, AA, T1 // a0 = aa + kk | |||
| move B0, BB | |||
| PTR_SUB L, K, KK // L = K - KK | |||
| dgemm_dsolve_1x1 | |||
| PTR_SLLI T0, K, 3 | |||
| PTR_ADDI CC, CC, 0x08 // cc += 1 | |||
| PTR_ADD AA, AA, T0 // aa += 1 * k | |||
| .L_N1_M0: | |||
| PTR_ADDI KK, KK, -1 | |||
| .L_N2: | |||
| andi J, N, 2 | |||
| beq ZERO, J, .L_N4 | |||
| move AA, A | |||
| PTR_SLLI T0, LDC, 1 | |||
| PTR_SLLI T1, K, 4 | |||
| PTR_SLLI T2, KK, 4 | |||
| PTR_SUB B, B, T1 | |||
| PTR_SUB C, C, T0 | |||
| PTR_ADD BB, B, T2 | |||
| move CC, C | |||
| PTR_SRAI I, M, 4 // M >> 4 | |||
| beqz I, .L_N2_M15 | |||
| .align 4 | |||
| .L_N2_I1: | |||
| PTR_SLLI T1, KK, 7 | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC | |||
| PTR_ADD A0, AA, T1 // a0 = aa + 16 * kk | |||
| move B0, BB | |||
| PTR_SUB L, K, KK // L = K - KK | |||
| dgemm_dsolve_16x2 | |||
| PTR_ADDI I, I, -1 | |||
| PTR_SLLI T0, K, 7 | |||
| PTR_ADDI CC, CC, 0x80 // cc += 16 | |||
| PTR_ADD AA, AA, T0 // aa += 16 * k | |||
| bnez I, .L_N2_I1 | |||
| .L_N2_M15: | |||
| andi I, M, 8 | |||
| beqz I, .L_N2_M7 | |||
| .L_N2_M8: | |||
| PTR_SLLI T1, KK, 6 | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC | |||
| PTR_ADD A0, AA, T1 // a0 = aa + 8 * kk | |||
| move B0, BB | |||
| PTR_SUB L, K, KK // L = K - KK | |||
| dgemm_dsolve_8x2 | |||
| PTR_SLLI T0, K, 6 | |||
| PTR_ADDI CC, CC, 0x40 // cc += 8 | |||
| PTR_ADD AA, AA, T0 // aa += 8 * k | |||
| .L_N2_M7: | |||
| andi I, M, 4 | |||
| beqz I, .L_N2_M3 | |||
| .L_N2_M4: | |||
| PTR_SLLI T1, KK, 5 | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC | |||
| PTR_ADD A0, AA, T1 // a0 = aa + 4 * kk | |||
| move B0, BB | |||
| PTR_SUB L, K, KK // L = K - KK | |||
| dgemm_dsolve_4x2 | |||
| PTR_SLLI T0, K, 5 | |||
| PTR_ADDI CC, CC, 0x20 // cc += 4 | |||
| PTR_ADD AA, AA, T0 // aa += 4 * k | |||
| .L_N2_M3: | |||
| andi I, M, 2 | |||
| beqz I, .L_N2_M1 | |||
| .L_N2_M2: | |||
| PTR_SLLI T1, KK, 4 | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC | |||
| PTR_ADD A0, AA, T1 // a0 = aa + 2 * kk | |||
| move B0, BB | |||
| PTR_SUB L, K, KK // L = K - KK | |||
| dgemm_dsolve_2x2 | |||
| PTR_SLLI T0, K, 4 | |||
| PTR_ADDI CC, CC, 0x10 // cc += 2 | |||
| PTR_ADD AA, AA, T0 // aa += 2 * k | |||
| .L_N2_M1: | |||
| andi I, M, 1 | |||
| beqz I, .L_N2_M0 | |||
| PTR_SLLI T1, KK, 3 | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC | |||
| PTR_ADD A0, AA, T1 // a0 = aa + kk | |||
| move B0, BB | |||
| PTR_SUB L, K, KK // L = K - KK | |||
| dgemm_dsolve_1x2 | |||
| PTR_SLLI T0, K, 3 | |||
| PTR_ADDI CC, CC, 0x08 // cc += 1 | |||
| PTR_ADD AA, AA, T0 // aa += 1 * k | |||
| .L_N2_M0: | |||
| PTR_ADDI KK, KK, -2 | |||
| .L_N4: | |||
| PTR_SRAI J, N, 2 /* J = bn >> 2 */ | |||
| beq ZERO, J, .L_N0 | |||
| .align 5 | |||
| .L_J1: | |||
| PTR_ADDI J, J, -1 | |||
| move AA, A | |||
| PTR_SLLI T0, LDC, 2 | |||
| PTR_SLLI T1, K, 5 | |||
| PTR_SLLI T2, KK, 5 | |||
| PTR_SUB B, B, T1 | |||
| PTR_SUB C, C, T0 | |||
| PTR_ADD BB, B, T2 | |||
| move CC, C | |||
| PTR_SRAI I, M, 4 // M >> 4 | |||
| beqz I, .L_M15 | |||
| .align 4 | |||
| .L_I1: | |||
| PTR_SLLI T1, KK, 7 | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC | |||
| PTR_ADD A0, AA, T1 // a0 = aa + 16 * kk | |||
| move B0, BB | |||
| PTR_SUB L, K, KK // L = K - KK | |||
| dgemm_dsolve_16x4 | |||
| PTR_ADDI I, I, -1 | |||
| PTR_SLLI T0, K, 7 | |||
| PTR_ADDI CC, CC, 0x80 // cc += 16 | |||
| PTR_ADD AA, AA, T0 // aa += 16 * k | |||
| bnez I, .L_I1 | |||
| .L_M15: | |||
| andi I, M, 8 | |||
| beqz I, .L_M7 | |||
| .L_M8: | |||
| PTR_SLLI T1, KK, 6 | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC | |||
| PTR_ADD A0, AA, T1 // a0 = aa + 8 * kk | |||
| move B0, BB | |||
| PTR_SUB L, K, KK // L = K - KK | |||
| dgemm_dsolve_8x4 | |||
| PTR_SLLI T0, K, 6 | |||
| PTR_ADDI CC, CC, 0x40 // cc += 8 | |||
| PTR_ADD AA, AA, T0 // aa += 8 * k | |||
| .L_M7: | |||
| andi I, M, 4 | |||
| beqz I, .L_M3 | |||
| .L_M4: | |||
| PTR_SLLI T1, KK, 5 | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC | |||
| PTR_ADD A0, AA, T1 // a0 = aa + 4 * kk | |||
| move B0, BB | |||
| PTR_SUB L, K, KK // L = K - KK | |||
| dgemm_dsolve_4x4 | |||
| PTR_SLLI T0, K, 5 | |||
| PTR_ADDI CC, CC, 0x20 // cc += 4 | |||
| PTR_ADD AA, AA, T0 // aa += 4 * k | |||
| .L_M3: | |||
| andi I, M, 2 | |||
| beqz I, .L_M1 | |||
| .L_M2: | |||
| PTR_SLLI T1, KK, 4 | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC | |||
| PTR_ADD A0, AA, T1 // a0 = aa + 2 * kk | |||
| move B0, BB | |||
| PTR_SUB L, K, KK // L = K - KK | |||
| dgemm_dsolve_2x4 | |||
| PTR_SLLI T0, K, 4 | |||
| PTR_ADDI CC, CC, 0x10 // cc += 2 | |||
| PTR_ADD AA, AA, T0 // aa += 2 * k | |||
| .L_M1: | |||
| andi I, M, 1 | |||
| beqz I, .L_M0 | |||
| PTR_SLLI T1, KK, 3 | |||
| GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC | |||
| PTR_ADD A0, AA, T1 // a0 = aa + kk | |||
| move B0, BB | |||
| PTR_SUB L, K, KK // L = K - KK | |||
| dgemm_dsolve_1x4 | |||
| PTR_SLLI T0, K, 3 | |||
| PTR_ADDI CC, CC, 0x08 // cc += 1 | |||
| PTR_ADD AA, AA, T0 // aa += 1 * k | |||
| .L_M0: | |||
| PTR_ADDI KK, KK, -4 | |||
| bnez J, .L_J1 | |||
| .L_N0: | |||
| pop_if_used 26, 32 | |||
| jirl $r0, $r1, 0x0 | |||
| EPILOGUE | |||
| @@ -0,0 +1,463 @@ | |||
| /******************************************************************************* | |||
| Copyright (c) 2023, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *******************************************************************************/ | |||
| #define ASSEMBLER | |||
| #include "common.h" | |||
| #include "loongarch64_asm.S" | |||
| /********************************************************************* | |||
| * 2023/08/30 guxiwei | |||
| * UTEST : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * | |||
| * | |||
| *********************************************************************/ | |||
| /* int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, | |||
| * FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) | |||
| */ | |||
| #define M $r4 | |||
| #define N $r5 | |||
| #define ALPHA $f0 | |||
| #define A $r7 | |||
| #define LDA $r8 | |||
| #define X $r9 | |||
| #define INC_X $r10 | |||
| #define Y $r11 | |||
| #define INC_Y $r6 | |||
| #define J $r12 | |||
| #define I $r13 | |||
| #define K $r14 | |||
| #define Y_ORG $r15 | |||
| #define OFFSET $r16 | |||
| #define K_LDA $r17 | |||
| #define M4 $r18 | |||
| #define T0 $r19 | |||
| #define PA0 $r20 | |||
| #define PA1 $r23 | |||
| #define PA2 $r24 | |||
| #define PA3 $r25 | |||
| #define PA4 $r26 | |||
| #define PA5 $r27 | |||
| #define PA6 $r28 | |||
| #define PA7 $r29 | |||
| #define VALPHA $xr1 | |||
| #define X0 $xr2 | |||
| #define X1 $xr3 | |||
| #define X2 $xr4 | |||
| #define X3 $xr5 | |||
| #define X4 $xr6 | |||
| #define X5 $xr7 | |||
| #define X6 $xr8 | |||
| #define X7 $xr9 | |||
| #define Y0 $xr10 | |||
| #define A0 $xr11 | |||
| #define A1 $xr12 | |||
| #define A2 $xr13 | |||
| #define A3 $xr14 | |||
| #define A4 $xr15 | |||
| #define A5 $xr16 | |||
| #define A6 $xr17 | |||
| #define A7 $xr18 | |||
| #define X0_F $f2 | |||
| #define X1_F $f3 | |||
| #define X2_F $f4 | |||
| #define X3_F $f5 | |||
| #define X4_F $f6 | |||
| #define X5_F $f7 | |||
| #define X6_F $f8 | |||
| #define X7_F $f9 | |||
| #define Y0_F $f10 | |||
| #define A0_F $f11 | |||
| #define A1_F $f12 | |||
| #define A2_F $f13 | |||
| #define A3_F $f14 | |||
| #define A4_F $f15 | |||
| #define A5_F $f16 | |||
| #define A6_F $f17 | |||
| #define A7_F $f18 | |||
| .macro SLOAD_X_8 | |||
| GLDREPL xv, w, X0, X, 0x00, X1, X, 0x04, X2, X, 0x08, X3, X, 0x0C, \ | |||
| X4, X, 0x10, X5, X, 0x14, X6, X, 0x18, X7, X, 0x1C | |||
| GMUL xvf, s, X0, X0, VALPHA, X1, X1, VALPHA, X2, X2, VALPHA, X3, X3, VALPHA, \ | |||
| X4, X4, VALPHA, X5, X5, VALPHA, X6, X6, VALPHA, X7, X7, VALPHA | |||
| .endm | |||
| .macro SLOAD_X_8_GAP | |||
| xvldrepl.w X0, X, 0x00 | |||
| PTR_ADD T0, X, INC_X | |||
| xvldrepl.w X1, T0, 0x00 | |||
| PTR_ADD T0, T0, INC_X | |||
| xvldrepl.w X2, T0, 0x00 | |||
| PTR_ADD T0, T0, INC_X | |||
| xvldrepl.w X3, T0, 0x00 | |||
| PTR_ADD T0, T0, INC_X | |||
| xvldrepl.w X4, T0, 0x00 | |||
| PTR_ADD T0, T0, INC_X | |||
| xvldrepl.w X5, T0, 0x00 | |||
| PTR_ADD T0, T0, INC_X | |||
| xvldrepl.w X6, T0, 0x00 | |||
| PTR_ADD T0, T0, INC_X | |||
| xvldrepl.w X7, T0, 0x00 | |||
| GMUL xvf, s, X0, X0, VALPHA, X1, X1, VALPHA, X2, X2, VALPHA, X3, X3, VALPHA, \ | |||
| X4, X4, VALPHA, X5, X5, VALPHA, X6, X6, VALPHA, X7, X7, VALPHA | |||
| .endm | |||
| .macro SLOAD_X_4 | |||
| GLDREPL xv, w, X0, X, 0x00, X1, X, 0x04, X2, X, 0x08, X3, X, 0x0C | |||
| GMUL xvf, s, X0, X0, VALPHA, X1, X1, VALPHA, X2, X2, VALPHA, X3, X3, VALPHA | |||
| .endm | |||
| .macro SLOAD_X_4_GAP | |||
| xvldrepl.w X0, X, 0x00 | |||
| PTR_ADD T0, X, INC_X | |||
| xvldrepl.w X1, T0, 0x00 | |||
| PTR_ADD T0, T0, INC_X | |||
| xvldrepl.w X2, T0, 0x00 | |||
| PTR_ADD T0, T0, INC_X | |||
| xvldrepl.w X3, T0, 0x00 | |||
| GMUL xvf, s, X0, X0, VALPHA, X1, X1, VALPHA, X2, X2, VALPHA, X3, X3, VALPHA | |||
| .endm | |||
| .macro SLOAD_X_2 | |||
| GLDREPL xv, w, X0, X, 0x00, X1, X, 0x04 | |||
| GMUL xvf, s, X0, X0, VALPHA, X1, X1, VALPHA | |||
| .endm | |||
| .macro SLOAD_X_2_GAP | |||
| xvldrepl.w X0, X, 0x00 | |||
| PTR_ADD T0, X, INC_X | |||
| xvldrepl.w X1, T0, 0x00 | |||
| GMUL xvf, s, X0, X0, VALPHA, X1, X1, VALPHA | |||
| .endm | |||
| .macro SLOAD_X_1 | |||
| GLDREPL xv, w, X0, X, 0x00 | |||
| GMUL xvf, s, X0, X0, VALPHA | |||
| .endm | |||
| .macro SLOAD_Y_8 | |||
| GLD xv, , Y0, Y, 0 | |||
| .endm | |||
| .macro SLOAD_Y_8_GAP | |||
| fld.s Y0_F, Y, 0 | |||
| fldx.s A0_F, Y, INC_Y | |||
| PTR_ALSL T0, INC_Y, Y, 1 | |||
| fld.s A1_F, T0, 0 | |||
| fldx.s A2_F, T0, INC_Y | |||
| PTR_ALSL T0, INC_Y, Y, 2 | |||
| fld.s A3_F, T0, 0 | |||
| fldx.s A4_F, T0, INC_Y | |||
| PTR_ADD T0, T0, INC_Y | |||
| PTR_ADD T0, T0, INC_Y | |||
| fld.s A5_F, T0, 0 | |||
| fldx.s A6_F, T0, INC_Y | |||
| GINSVE0 xv, w, Y0, A0, 1, Y0, A1, 2, Y0, A2, 3, Y0, A3, 4, \ | |||
| Y0, A4, 5, Y0, A5, 6, Y0, A6, 7 | |||
| .endm | |||
| .macro SLOAD_Y_1 | |||
| GLD f, s, Y0_F, Y, 0 | |||
| .endm | |||
| .macro SGEMV_N_8x8 | |||
| GLD_INC xv, , 0x20, \ | |||
| A0, PA0, 0, A1, PA1, 0, \ | |||
| A2, PA2, 0, A3, PA3, 0, \ | |||
| A4, PA4, 0, A5, PA5, 0, \ | |||
| A6, PA6, 0, A7, PA7, 0 | |||
| GMADD xvf, s, Y0, A0, X0, Y0, Y0, A1, X1, Y0, \ | |||
| Y0, A2, X2, Y0, Y0, A3, X3, Y0, \ | |||
| Y0, A4, X4, Y0, Y0, A5, X5, Y0, \ | |||
| Y0, A6, X6, Y0, Y0, A7, X7, Y0 | |||
| .endm | |||
| .macro SGEMV_N_1x8 | |||
| GLD_INC f, s, 0x04, \ | |||
| A0_F, PA0, 0, A1_F, PA1, 0, \ | |||
| A2_F, PA2, 0, A3_F, PA3, 0, \ | |||
| A4_F, PA4, 0, A5_F, PA5, 0, \ | |||
| A6_F, PA6, 0, A7_F, PA7, 0 | |||
| GMADD f, s, Y0_F, A0_F, X0_F, Y0_F, Y0_F, A1_F, X1_F, Y0_F, \ | |||
| Y0_F, A2_F, X2_F, Y0_F, Y0_F, A3_F, X3_F, Y0_F, \ | |||
| Y0_F, A4_F, X4_F, Y0_F, Y0_F, A5_F, X5_F, Y0_F, \ | |||
| Y0_F, A6_F, X6_F, Y0_F, Y0_F, A7_F, X7_F, Y0_F | |||
| .endm | |||
| .macro SGEMV_N_8x4 | |||
| GLD_INC xv, , 0x20, \ | |||
| A0, PA0, 0, A1, PA1, 0, \ | |||
| A2, PA2, 0, A3, PA3, 0 | |||
| GMADD xvf, s, Y0, A0, X0, Y0, Y0, A1, X1, Y0, \ | |||
| Y0, A2, X2, Y0, Y0, A3, X3, Y0 | |||
| .endm | |||
| .macro SGEMV_N_1x4 | |||
| GLD_INC f, s, 0x04, \ | |||
| A0_F, PA0, 0, A1_F, PA1, 0, \ | |||
| A2_F, PA2, 0, A3_F, PA3, 0 | |||
| GMADD f, s, Y0_F, A0_F, X0_F, Y0_F, Y0_F, A1_F, X1_F, Y0_F, \ | |||
| Y0_F, A2_F, X2_F, Y0_F, Y0_F, A3_F, X3_F, Y0_F | |||
| .endm | |||
| .macro SGEMV_N_8x2 | |||
| GLD_INC xv, , 0x20, \ | |||
| A0, PA0, 0, A1, PA1, 0 | |||
| GMADD xvf, s, Y0, A0, X0, Y0, Y0, A1, X1, Y0 | |||
| .endm | |||
| .macro SGEMV_N_1x2 | |||
| GLD_INC f, s, 0x04, \ | |||
| A0_F, PA0, 0, A1_F, PA1, 0 | |||
| GMADD f, s, Y0_F, A0_F, X0_F, Y0_F, Y0_F, A1_F, X1_F, Y0_F | |||
| .endm | |||
| .macro SGEMV_N_1x1 | |||
| GLD_INC f, s, 0x04, A0_F, PA0, 0 | |||
| GMADD f, s, Y0_F, A0_F, X0_F, Y0_F | |||
| .endm | |||
| .macro SSTORE_Y_8 | |||
| GST xv, , Y0, Y, 0 | |||
| .endm | |||
| .macro SSTORE_Y_8_GAP | |||
| xvstelm.w Y0, Y, 0, 0 | |||
| PTR_ADD T0, Y, INC_Y | |||
| xvstelm.w Y0, T0, 0, 1 | |||
| PTR_ADD T0, T0, INC_Y | |||
| xvstelm.w Y0, T0, 0, 2 | |||
| PTR_ADD T0, T0, INC_Y | |||
| xvstelm.w Y0, T0, 0, 3 | |||
| PTR_ADD T0, T0, INC_Y | |||
| xvstelm.w Y0, T0, 0, 4 | |||
| PTR_ADD T0, T0, INC_Y | |||
| xvstelm.w Y0, T0, 0, 5 | |||
| PTR_ADD T0, T0, INC_Y | |||
| xvstelm.w Y0, T0, 0, 6 | |||
| PTR_ADD T0, T0, INC_Y | |||
| xvstelm.w Y0, T0, 0, 7 | |||
| .endm | |||
| .macro SSTORE_Y_1 | |||
| GST f, s, Y0_F, Y, 0 | |||
| .endm | |||
| .macro SGEMV_N_LASX XW:req, X_8:req, X_4:req, X_2:req, X_1:req, Y_8:req, Y_4:req, Y_1:req | |||
| PTR_SRLI J, N, 3 | |||
| beqz J, .L_\XW\()_N_7 | |||
| PTR_SLLI K_LDA, LDA, 3 | |||
| PTR_SUB K_LDA, K_LDA, M4 | |||
| .L_\XW\()_N_L8: | |||
| SLOAD_\X_8 | |||
| xor K, K, K | |||
| move Y, Y_ORG | |||
| PTR_SRLI I, M, 3 | |||
| beqz I, .L_\XW\()_M_7 | |||
| .align 5 | |||
| .L_\XW\()_M_L8: | |||
| SLOAD_\Y_8 | |||
| SGEMV_N_8x8 | |||
| SSTORE_\Y_8 | |||
| PTR_ADDI I, I, -1 | |||
| PTR_ALSL Y, INC_Y, Y, 3 | |||
| PTR_ADDI K, K, 8 | |||
| bnez I, .L_\XW\()_M_L8 | |||
| .L_\XW\()_M_7: | |||
| andi I, M, 7 | |||
| beqz I, .L_\XW\()_M_END | |||
| .align 5 | |||
| .L_\XW\()_M_L1: | |||
| SLOAD_\Y_1 | |||
| SGEMV_N_1x8 | |||
| SSTORE_\Y_1 | |||
| PTR_ADDI I, I, -1 | |||
| PTR_ADD Y, Y, INC_Y | |||
| PTR_ADDI K, K, 1 | |||
| bnez I, .L_\XW\()_M_L1 | |||
| .L_\XW\()_M_END: | |||
| PTR_ADDI J, J, -1 | |||
| #if __loongarch_grlen == 64 | |||
| GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA, \ | |||
| PA4, PA4, K_LDA, PA5, PA5, K_LDA, PA6, PA6, K_LDA, PA7, PA7, K_LDA | |||
| #elif __loongarch_grlen == 32 | |||
| GADD , w, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA, \ | |||
| PA4, PA4, K_LDA, PA5, PA5, K_LDA, PA6, PA6, K_LDA, PA7, PA7, K_LDA | |||
| #else | |||
| GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA, \ | |||
| PA4, PA4, K_LDA, PA5, PA5, K_LDA, PA6, PA6, K_LDA, PA7, PA7, K_LDA | |||
| #endif | |||
| PTR_ALSL X, INC_X, X, 3 | |||
| bnez J, .L_\XW\()_N_L8 | |||
| .L_\XW\()_N_7: | |||
| andi J, N, 4 | |||
| beqz J, .L_\XW\()_N_3 | |||
| SLOAD_\X_4 | |||
| xor K, K, K | |||
| move Y, Y_ORG | |||
| PTR_SRLI I, M, 3 | |||
| beqz I, .L_\XW\()_N_4_M_7 | |||
| .align 5 | |||
| .L_\XW\()_N_4_M_L8: | |||
| SLOAD_\Y_8 | |||
| SGEMV_N_8x4 | |||
| SSTORE_\Y_8 | |||
| PTR_ADDI I, I, -1 | |||
| PTR_ADDI K, K, 8 | |||
| PTR_ALSL Y, INC_Y, Y, 3 | |||
| bnez I, .L_\XW\()_N_4_M_L8 | |||
| .L_\XW\()_N_4_M_7: | |||
| andi I, M, 7 | |||
| beqz I, .L_\XW\()_N_4_M_END | |||
| .align 5 | |||
| .L_\XW\()_N_4_M_L1: | |||
| SLOAD_\Y_1 | |||
| SGEMV_N_1x4 | |||
| SSTORE_\Y_1 | |||
| PTR_ADDI I, I, -1 | |||
| PTR_ADD Y, Y, INC_Y | |||
| PTR_ADDI K, K, 1 | |||
| bnez I, .L_\XW\()_N_4_M_L1 | |||
| .L_\XW\()_N_4_M_END: | |||
| PTR_SLLI K_LDA, LDA, 2 | |||
| PTR_SUB K_LDA, K_LDA, M4 | |||
| #if __loongarch_grlen == 64 | |||
| GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA | |||
| #elif __loongarch_grlen == 32 | |||
| GADD , w PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA | |||
| #else | |||
| GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA | |||
| #endif | |||
| PTR_ALSL X, INC_X, X, 2 | |||
| .L_\XW\()_N_3: | |||
| andi J, N, 2 | |||
| beqz J, .L_\XW\()_N_1 | |||
| SLOAD_\X_2 | |||
| xor K, K, K | |||
| move Y, Y_ORG | |||
| PTR_SRLI I, M, 3 | |||
| beqz I, .L_\XW\()_N_2_M_7 | |||
| .align 5 | |||
| .L_\XW\()_N_2_M_L8: | |||
| SLOAD_\Y_8 | |||
| SGEMV_N_8x2 | |||
| SSTORE_\Y_8 | |||
| PTR_ADDI I, I, -1 | |||
| PTR_ADDI K, K, 8 | |||
| PTR_ALSL Y, INC_Y, Y, 3 | |||
| bnez I, .L_\XW\()_N_2_M_L8 | |||
| .L_\XW\()_N_2_M_7: | |||
| andi I, M, 7 | |||
| beqz I, .L_\XW\()_N_2_M_END | |||
| .align 5 | |||
| .L_\XW\()_N_2_M_L1: | |||
| SLOAD_\Y_1 | |||
| SGEMV_N_1x2 | |||
| SSTORE_\Y_1 | |||
| PTR_ADDI I, I, -1 | |||
| PTR_ADD Y, Y, INC_Y | |||
| PTR_ADDI K, K, 1 | |||
| bnez I, .L_\XW\()_N_2_M_L1 | |||
| .L_\XW\()_N_2_M_END: | |||
| PTR_SLLI K_LDA, LDA, 1 | |||
| PTR_SUB K_LDA, K_LDA, M4 | |||
| PTR_ADD PA0, PA0, K_LDA | |||
| PTR_ADD PA1, PA1, K_LDA | |||
| PTR_ALSL X, INC_X, X, 1 | |||
| .L_\XW\()_N_1: | |||
| andi J, N, 1 | |||
| beqz J, .L_END | |||
| SLOAD_\X_1 | |||
| xor K, K, K | |||
| move Y, Y_ORG | |||
| move I, M | |||
| beqz I, .L_END | |||
| .align 5 | |||
| .L_\XW\()_N_1_M_L1: | |||
| SLOAD_\Y_1 | |||
| SGEMV_N_1x1 | |||
| SSTORE_\Y_1 | |||
| PTR_ADDI I, I, -1 | |||
| PTR_ADD Y, Y, INC_Y | |||
| PTR_ADDI K, K, 1 | |||
| bnez I, .L_\XW\()_N_1_M_L1 | |||
| b .L_END | |||
| .endm | |||
| PROLOGUE | |||
| PTR_LD INC_Y, $sp, 0 | |||
| push_if_used 17 + 7, 19 | |||
| PTR_ADDI K, $r0, 0x01 | |||
| PTR_SUB I, INC_X, K | |||
| PTR_SUB J, INC_Y, K | |||
| maskeqz I, K, I /* if(inc_x == 1) I = 0; else I = 1; */ | |||
| maskeqz J, K, J /* if(inc_y == 1) j = 0; else j = 1; */ | |||
| PTR_ALSL I, I, J, 1 | |||
| GSLLI , d, LDA, LDA, 2, INC_X, INC_X, 2, INC_Y, INC_Y, 2, M4, M, 2 | |||
| xvreplve0.w VALPHA, $xr0 | |||
| move Y_ORG, Y | |||
| move PA0, A | |||
| #if __loongarch_grlen == 64 | |||
| GADD , d, PA1, PA0, LDA, PA2, PA1, LDA, PA3, PA2, LDA, PA4, PA3, LDA, \ | |||
| PA5, PA4, LDA, PA6, PA5, LDA, PA7, PA6, LDA | |||
| #elif __loongarch_grlen == 32 | |||
| GADD , w, PA1, PA0, LDA, PA2, PA1, LDA, PA3, PA2, LDA, PA4, PA3, LDA, \ | |||
| PA5, PA4, LDA, PA6, PA5, LDA, PA7, PA6, LDA | |||
| #else | |||
| GADD , d, PA1, PA0, LDA, PA2, PA1, LDA, PA3, PA2, LDA, PA4, PA3, LDA, \ | |||
| PA5, PA4, LDA, PA6, PA5, LDA, PA7, PA6, LDA | |||
| #endif | |||
| la.local T0, .L_GAP_TABLE | |||
| PTR_ALSL I, I, T0, 1 | |||
| ld.h K, I, 0 | |||
| PTR_ADD T0, T0, K | |||
| jirl $r0, T0, 0 | |||
| .L_GAP_TABLE: | |||
| .hword .L_GAP_0_0 - .L_GAP_TABLE | |||
| .hword .L_GAP_0_1 - .L_GAP_TABLE | |||
| .hword .L_GAP_1_0 - .L_GAP_TABLE | |||
| .hword .L_GAP_1_1 - .L_GAP_TABLE | |||
| .L_GAP_0_0: /* if (inc_x == 1) && (incy == 1) */ | |||
| SGEMV_N_LASX GAP_0_0, X_8, X_4, X_2, X_1, Y_8, Y_4, Y_1 | |||
| .L_GAP_0_1: /* if (inc_x == 1) && (incy != 1) */ | |||
| SGEMV_N_LASX GAP_0_1, X_8, X_4, X_2, X_1, Y_8_GAP, Y_4_GAP, Y_1 | |||
| .L_GAP_1_0: /* if (inc_x != 1) && (incy == 1) */ | |||
| SGEMV_N_LASX GAP_1_0, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8, Y_4, Y_1 | |||
| .L_GAP_1_1: /* if (inc_x != 1) && (incy != 1) */ | |||
| SGEMV_N_LASX GAP_1_1, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8_GAP, Y_4_GAP, Y_1 | |||
| .L_END: | |||
| pop_if_used 17 + 7, 19 | |||
| jirl $r0, $r1, 0x0 | |||
| EPILOGUE | |||
| @@ -0,0 +1,405 @@ | |||
| /******************************************************************************* | |||
| Copyright (c) 2023, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *******************************************************************************/ | |||
| #define ASSEMBLER | |||
| #include "common.h" | |||
| #include "loongarch64_asm.S" | |||
| /********************************************************************* | |||
| * 2023/08/30 guxiwei | |||
| * UTEST : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * | |||
| * | |||
| *********************************************************************/ | |||
| /* int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, | |||
| * FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) | |||
| */ | |||
| #define M $r4 | |||
| #define N $r5 | |||
| #define ALPHA $f0 | |||
| #define A $r7 | |||
| #define LDA $r8 | |||
| #define X $r9 | |||
| #define INC_X $r10 | |||
| #define Y $r11 | |||
| #define INC_Y $r6 | |||
| #define J $r12 | |||
| #define I $r13 | |||
| #define K $r14 | |||
| #define PY0 $r14 | |||
| #define X_ORG $r15 | |||
| #define PY1 $r16 | |||
| #define K_LDA $r17 | |||
| #define PY2 $r18 | |||
| #define T0 $r19 | |||
| #define PA0 $r20 | |||
| #define PA1 $r23 | |||
| #define PA2 $r24 | |||
| #define PA3 $r25 | |||
| #define PA4 $r26 | |||
| #define PA5 $r27 | |||
| #define PA6 $r28 | |||
| #define PA7 $r29 | |||
| #define M4 $r30 | |||
| #define VALPHA $xr0 | |||
| #define X0 $xr1 | |||
| #define A0 $xr2 | |||
| #define A1 $xr3 | |||
| #define A2 $xr4 | |||
| #define A3 $xr5 | |||
| #define A4 $xr6 | |||
| #define A5 $xr7 | |||
| #define A6 $xr8 | |||
| #define A7 $xr9 | |||
| #define TP0 $xr10 | |||
| #define TP1 $xr11 | |||
| #define TP2 $xr12 | |||
| #define TP3 $xr13 | |||
| #define TP4 $xr14 | |||
| #define TP5 $xr15 | |||
| #define TP6 $xr16 | |||
| #define TP7 $xr17 | |||
| #define Y0 $xr2 | |||
| #define Y1 $xr3 | |||
| #define Y2 $xr4 | |||
| #define Y3 $xr5 | |||
| #define Y4 $xr6 | |||
| #define Y5 $xr7 | |||
| #define Y6 $xr8 | |||
| #define Y7 $xr9 | |||
| .macro ZERO_Y8 | |||
| GXOR xv, v, TP0, TP0, TP0, TP1, TP1, TP1, TP2, TP2, TP2, TP3, TP3, TP3, \ | |||
| TP4, TP4, TP4, TP5, TP5, TP5, TP6, TP6, TP6, TP7, TP7, TP7 | |||
| .endm | |||
| .macro ZERO_Y4 | |||
| GXOR xv, v, TP0, TP0, TP0, TP1, TP1, TP1, TP2, TP2, TP2, TP3, TP3, TP3 | |||
| .endm | |||
| .macro ZERO_Y2 | |||
| GXOR xv, v, TP0, TP0, TP0, TP1, TP1, TP1 | |||
| .endm | |||
| .macro ZERO_Y1 | |||
| GXOR xv, v, TP0, TP0, TP0 | |||
| .endm | |||
| .macro SLOAD_X8 | |||
| GLD xv, , X0, X, 0x00 | |||
| .endm | |||
| .macro SLOAD_X8_GAP | |||
| fld.s $f1, X, 0x00 | |||
| fldx.s $f2, X, INC_X | |||
| PTR_ALSL T0, INC_X, X, 1 | |||
| fld.s $f3, T0, 0x00 | |||
| fldx.s $f4, T0, INC_X | |||
| GINSVE0 xv, w, X0, A0, 1, X0, A1, 2, X0, A2, 3 | |||
| PTR_ALSL T0, INC_X, X, 2 | |||
| fld.s $f2, T0, 0x00 | |||
| fldx.s $f3, T0, INC_X | |||
| PTR_ALSL T0, INC_X, T0, 1 | |||
| fld.s $f4, T0, 0x00 | |||
| fldx.s $f5, T0, INC_X | |||
| GINSVE0 xv, w, X0, A0, 4, X0, A1, 5, X0, A2, 6, X0, A3, 7 | |||
| .endm | |||
| .macro SGEMV_T_8x8 | |||
| GLD_INC xv, , 0x20, \ | |||
| A0, PA0, 0, A1, PA1, 0, \ | |||
| A2, PA2, 0, A3, PA3, 0, \ | |||
| A4, PA4, 0, A5, PA5, 0, \ | |||
| A6, PA6, 0, A7, PA7, 0 | |||
| GMADD xvf, s, TP0, A0, X0, TP0, TP1, A1, X0, TP1, \ | |||
| TP2, A2, X0, TP2, TP3, A3, X0, TP3, \ | |||
| TP4, A4, X0, TP4, TP5, A5, X0, TP5, \ | |||
| TP6, A6, X0, TP6, TP7, A7, X0, TP7 | |||
| .endm | |||
| .macro SGEMV_T_4x8 | |||
| GLD_INC xv, , 0x20, \ | |||
| A0, PA0, 0, A1, PA1, 0, \ | |||
| A2, PA2, 0, A3, PA3, 0 | |||
| GMADD xvf, s, TP0, A0, X0, TP0, TP1, A1, X0, TP1, \ | |||
| TP2, A2, X0, TP2, TP3, A3, X0, TP3 | |||
| .endm | |||
| .macro SGEMV_T_2x8 | |||
| GLD_INC xv, , 0x20, \ | |||
| A0, PA0, 0, A1, PA1, 0 | |||
| GMADD xvf, s, TP0, A0, X0, TP0, TP1, A1, X0, TP1 | |||
| .endm | |||
| .macro SGEMV_T_LASX XW:req X8:req, X4:req | |||
| PTR_SRLI J, N, 3 | |||
| beqz J, .L_\XW\()_N_7 | |||
| PTR_SLLI K_LDA, LDA, 3 | |||
| PTR_SUB K_LDA, K_LDA, M4 | |||
| .L_\XW\()_N_L8: | |||
| ZERO_Y8 | |||
| move X, X_ORG | |||
| PTR_SRLI I, M, 3 | |||
| beqz I, .L_\XW\()_M_7 | |||
| .align 5 | |||
| .L_\XW\()_M_L8: | |||
| SLOAD_\X8 | |||
| SGEMV_T_8x8 | |||
| PTR_ADDI I, I, -1 | |||
| PTR_ALSL X, INC_X, X, 3 | |||
| bnez I, .L_\XW\()_M_L8 | |||
| .L_\XW\()_M_7: | |||
| // Accumulated | |||
| GACC xvf, s, Y0, TP0, Y1, TP1, Y2, TP2, Y3, TP3, Y4, TP4, \ | |||
| Y5, TP5, Y6, TP6, Y7, TP7 | |||
| andi I, M, 7 | |||
| beqz I, .L_\XW\()_M_END | |||
| .align 5 | |||
| .L_\XW\()_M_L1: | |||
| fld.s $f1, X, 0x00 | |||
| fld.s $f10, PA0, 0x00 | |||
| fld.s $f11, PA1, 0x00 | |||
| fld.s $f12, PA2, 0x00 | |||
| fld.s $f13, PA3, 0x00 | |||
| fld.s $f14, PA4, 0x00 | |||
| fld.s $f15, PA5, 0x00 | |||
| fld.s $f16, PA6, 0x00 | |||
| fld.s $f17, PA7, 0x00 | |||
| #if __loongarch_grlen == 64 | |||
| GADDI , d, PA0, PA0, 0x04, PA1, PA1, 0x04, PA2, PA2, 0x04, PA3, PA3, 0x04, \ | |||
| PA4, PA4, 0x04, PA5, PA5, 0x04, PA6, PA6, 0x04, PA7, PA7, 0x04 | |||
| #elif __loongarch_grlen == 32 | |||
| GADDI , w, PA0, PA0, 0x04, PA1, PA1, 0x04, PA2, PA2, 0x04, PA3, PA3, 0x04, \ | |||
| PA4, PA4, 0x04, PA5, PA5, 0x04, PA6, PA6, 0x04, PA7, PA7, 0x04 | |||
| #else | |||
| GADDI , d, PA0, PA0, 0x04, PA1, PA1, 0x04, PA2, PA2, 0x04, PA3, PA3, 0x04, \ | |||
| PA4, PA4, 0x04, PA5, PA5, 0x04, PA6, PA6, 0x04, PA7, PA7, 0x04 | |||
| #endif | |||
| GMADD f, s, $f2, $f10, $f1, $f2, $f3, $f11, $f1, $f3, $f4, $f12, $f1, $f4, $f5, $f13, $f1, $f5, \ | |||
| $f6, $f14, $f1, $f6, $f7, $f15, $f1, $f7, $f8, $f16, $f1, $f8, $f9, $f17, $f1, $f9, | |||
| PTR_ADDI I, I, -1 | |||
| PTR_ADD X, X, INC_X | |||
| bnez I, .L_\XW\()_M_L1 | |||
| .L_\XW\()_M_END: | |||
| fld.s $f10, Y, 0x00 | |||
| fldx.s $f11, Y, INC_Y | |||
| PTR_ALSL PY0, INC_Y, Y, 1 | |||
| fld.s $f12, PY0, 0x00 | |||
| fldx.s $f13, PY0, INC_Y | |||
| PTR_ALSL PY1, INC_Y, Y, 2 | |||
| fld.s $f14, PY1, 0x00 | |||
| fldx.s $f15, PY1, INC_Y | |||
| PTR_ALSL PY2, INC_Y, PY1, 1 | |||
| fld.s $f16, PY2, 0x00 | |||
| fldx.s $f17, PY2, INC_Y | |||
| GMADD f, s, $f10, ALPHA, $f2, $f10, $f11, ALPHA, $f3, $f11, $f12, ALPHA, $f4, $f12, $f13, ALPHA, $f5, $f13, \ | |||
| $f14, ALPHA, $f6, $f14, $f15, ALPHA, $f7, $f15, $f16, ALPHA, $f8, $f16, $f17, ALPHA, $f9, $f17 | |||
| PTR_ADDI J, J, -1 | |||
| #if __loongarch_grlen == 64 | |||
| GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA, \ | |||
| PA4, PA4, K_LDA, PA5, PA5, K_LDA, PA6, PA6, K_LDA, PA7, PA7, K_LDA | |||
| #elif __loongarch_grlen == 32 | |||
| GADD , w, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA, \ | |||
| PA4, PA4, K_LDA, PA5, PA5, K_LDA, PA6, PA6, K_LDA, PA7, PA7, K_LDA | |||
| #else | |||
| GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA, \ | |||
| PA4, PA4, K_LDA, PA5, PA5, K_LDA, PA6, PA6, K_LDA, PA7, PA7, K_LDA | |||
| #endif | |||
| fst.s $f10, Y, 0x00 | |||
| fstx.s $f11, Y, INC_Y | |||
| fst.s $f12, PY0, 0x00 | |||
| fstx.s $f13, PY0, INC_Y | |||
| fst.s $f14, PY1, 0x00 | |||
| fstx.s $f15, PY1, INC_Y | |||
| fst.s $f16, PY2, 0x00 | |||
| fstx.s $f17, PY2, INC_Y | |||
| PTR_ALSL Y, INC_Y, Y, 3 | |||
| bnez J, .L_\XW\()_N_L8 | |||
| .L_\XW\()_N_7: | |||
| andi J, N, 4 | |||
| beqz J, .L_\XW\()_N_3 | |||
| ZERO_Y4 | |||
| move X, X_ORG | |||
| PTR_SRLI I, M, 3 | |||
| beqz I, .L_\XW\()_N_4_M_7 | |||
| .align 5 | |||
| .L_\XW\()_N_4_M_L8: | |||
| SLOAD_\X8 | |||
| SGEMV_T_4x8 | |||
| PTR_ADDI I, I, -1 | |||
| PTR_ALSL X, INC_X, X, 3 | |||
| bnez I, .L_\XW\()_N_4_M_L8 | |||
| .L_\XW\()_N_4_M_7: | |||
| // Accumulated | |||
| GACC xvf, s, Y0, TP0, Y1, TP1, Y2, TP2, Y3, TP3 | |||
| andi I, M, 7 | |||
| beqz I, .L_\XW\()_N_4_M_END | |||
| .align 5 | |||
| .L_\XW\()_N_4_M_L1: | |||
| fld.s $f1, X, 0x00 | |||
| GLD_INC f, s, 0x04, $f10, PA0, 0x00, $f11, PA1, 0x00, $f12, PA2, 0x00, $f13, PA3, 0x00 | |||
| GMADD f, s, $f2, $f10, $f1, $f2, $f3, $f11, $f1, $f3, $f4, $f12, $f1, $f4, $f5, $f13, $f1, $f5 | |||
| PTR_ADDI I, I, -1 | |||
| PTR_ADD X, X, INC_X | |||
| bnez I, .L_\XW\()_N_4_M_L1 | |||
| .L_\XW\()_N_4_M_END: | |||
| fld.s $f10, Y, 0x00 | |||
| fldx.s $f11, Y, INC_Y | |||
| PTR_ALSL PY0, INC_Y, Y, 1 | |||
| fld.s $f12, PY0, 0x00 | |||
| fldx.s $f13, PY0, INC_Y | |||
| GMADD f, s, $f10, ALPHA, $f2, $f10, $f11, ALPHA, $f3, $f11, $f12, ALPHA, $f4, $f12, $f13, ALPHA, $f5, $f13 | |||
| PTR_SLLI K_LDA, LDA, 2 | |||
| PTR_SUB K_LDA, K_LDA, M4 | |||
| #if __loongarch_grlen == 64 | |||
| GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA | |||
| #elif __loongarch_grlen == 32 | |||
| GADD , w, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA | |||
| #else | |||
| GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA | |||
| #endif | |||
| fst.s $f10, Y, 0x00 | |||
| fstx.s $f11, Y, INC_Y | |||
| fst.s $f12, PY0, 0x00 | |||
| fstx.s $f13, PY0, INC_Y | |||
| PTR_ALSL Y, INC_Y, Y, 2 | |||
| .L_\XW\()_N_3: | |||
| andi J, N, 2 | |||
| beqz J, .L_\XW\()_N_1 | |||
| ZERO_Y2 | |||
| move X, X_ORG | |||
| PTR_SRLI I, M, 3 | |||
| beqz I, .L_\XW\()_N_2_M_7 | |||
| .align 5 | |||
| .L_\XW\()_N_2_M_L8: | |||
| SLOAD_\X8 | |||
| SGEMV_T_2x8 | |||
| PTR_ADDI I, I, -1 | |||
| PTR_ALSL X, INC_X, X, 3 | |||
| bnez I, .L_\XW\()_N_2_M_L8 | |||
| .L_\XW\()_N_2_M_7: | |||
| // Accumulated | |||
| GACC xvf, s, Y0, TP0, Y1, TP1 | |||
| andi I, M, 7 | |||
| beqz I, .L_\XW\()_N_2_M_END | |||
| .align 5 | |||
| .L_\XW\()_N_2_M_L1: | |||
| fld.s $f1, X, 0x00 | |||
| GLD_INC f, s, 0x04, $f10, PA0, 0x00, $f11, PA1, 0x00 | |||
| GMADD f, s, $f2, $f10, $f1, $f2, $f3, $f11, $f1, $f3 | |||
| PTR_ADDI I, I, -1 | |||
| PTR_ADD X, X, INC_X | |||
| bnez I, .L_\XW\()_N_2_M_L1 | |||
| .L_\XW\()_N_2_M_END: | |||
| fld.s $f10, Y, 0x00 | |||
| fldx.s $f11, Y, INC_Y | |||
| GMADD f, s, $f10, ALPHA, $f2, $f10, $f11, ALPHA, $f3, $f11 | |||
| PTR_SLLI K_LDA, LDA, 1 | |||
| PTR_SUB K_LDA, K_LDA, M4 | |||
| #if __loongarch_grlen == 64 | |||
| GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA | |||
| #elif __loongarch_grlen == 32 | |||
| GADD , w, PA0, PA0, K_LDA, PA1, PA1, K_LDA | |||
| #else | |||
| GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA | |||
| #endif | |||
| fst.s $f10, Y, 0x00 | |||
| fstx.s $f11, Y, INC_Y | |||
| PTR_ALSL Y, INC_Y, Y, 1 | |||
| .L_\XW\()_N_1: | |||
| andi J, N, 1 | |||
| beqz J, .L_END | |||
| ZERO_Y1 | |||
| move X, X_ORG | |||
| move I, M | |||
| beqz I, .L_END | |||
| .align 5 | |||
| .L_\XW\()_N_1_M_L1: | |||
| fld.s $f2, PA0, 0x00 | |||
| fld.s $f1, X, 0x00 | |||
| fmadd.s $f10, $f2, $f1, $f10 | |||
| PTR_ADDI I, I, -1 | |||
| PTR_ADD X, X, INC_X | |||
| PTR_ADDI PA0, PA0, 0x04 | |||
| bnez I, .L_\XW\()_N_1_M_L1 | |||
| fld.s $f2, Y, 0x00 | |||
| fmadd.s $f2, ALPHA, $f10, $f2 | |||
| fst.s $f2, Y, 0x00 | |||
| b .L_END | |||
| .endm | |||
| PROLOGUE | |||
| PTR_LD INC_Y, $sp, 0 | |||
| push_if_used 17 + 8, 18 | |||
| PTR_ADDI K, $r0, 0x01 | |||
| PTR_SUB I, INC_X, K | |||
| maskeqz I, K, I /* if(inc_x == 1) I = 0; else I = 1; */ | |||
| GSLLI , d, LDA, LDA, 2, INC_X, INC_X, 2, INC_Y, INC_Y, 2, M4, M, 2 | |||
| xvreplve0.w VALPHA, $xr0 | |||
| move X_ORG, X | |||
| move PA0, A | |||
| #if __loongarch_grlen == 64 | |||
| GADD , d, PA1, PA0, LDA, PA2, PA1, LDA, PA3, PA2, LDA, PA4, PA3, LDA, \ | |||
| PA5, PA4, LDA, PA6, PA5, LDA, PA7, PA6, LDA | |||
| #elif __loongarch_grlen == 32 | |||
| GADD , w, PA1, PA0, LDA, PA2, PA1, LDA, PA3, PA2, LDA, PA4, PA3, LDA, \ | |||
| PA5, PA4, LDA, PA6, PA5, LDA, PA7, PA6, LDA | |||
| #else | |||
| GADD , d, PA1, PA0, LDA, PA2, PA1, LDA, PA3, PA2, LDA, PA4, PA3, LDA, \ | |||
| PA5, PA4, LDA, PA6, PA5, LDA, PA7, PA6, LDA | |||
| #endif | |||
| la.local T0, .L_GAP_TABLE | |||
| PTR_ALSL I, I, T0, 1 | |||
| ld.h K, I, 0 | |||
| PTR_ADD T0, T0, K | |||
| jirl $r0, T0, 0 | |||
| .L_GAP_TABLE: | |||
| .hword .L_GAP_0 - .L_GAP_TABLE | |||
| .hword .L_GAP_1 - .L_GAP_TABLE | |||
| .L_GAP_0: /* if (incx == 1) */ | |||
| SGEMV_T_LASX GAP_0, X8, X4 | |||
| .L_GAP_1: /* if (incx != 1) */ | |||
| SGEMV_T_LASX GAP_1, X8_GAP, X4_GAP | |||
| .L_END: | |||
| pop_if_used 17 + 8, 18 | |||
| jirl $r0, $r1, 0x0 | |||
| EPILOGUE | |||
| @@ -1,6 +1,3 @@ | |||
| ifeq ($(HAVE_GAS), 1) | |||
| include $(KERNELDIR)/KERNEL.POWER8 | |||
| else | |||
| #SGEMM_BETA = ../generic/gemm_beta.c | |||
| #DGEMM_BETA = ../generic/gemm_beta.c | |||
| #CGEMM_BETA = ../generic/zgemm_beta.c | |||
| @@ -19,8 +16,13 @@ SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| STRMMKERNEL = sgemm_kernel_power10.c | |||
| DTRMMKERNEL = dgemm_kernel_power10.c | |||
| ifeq ($(OSNAME), AIX) | |||
| CTRMMKERNEL = ctrmm_kernel_8x4_power8.S | |||
| ZTRMMKERNEL = ztrmm_kernel_8x2_power8.S | |||
| else | |||
| CTRMMKERNEL = cgemm_kernel_power10.S | |||
| ZTRMMKERNEL = zgemm_kernel_power10.S | |||
| endif | |||
| SGEMMKERNEL = sgemm_kernel_power10.c | |||
| SGEMMINCOPY = ../generic/gemm_ncopy_16.c | |||
| @@ -62,10 +64,18 @@ DGEMM_SMALL_K_B0_TT = dgemm_small_kernel_tt_power10.c | |||
| DGEMM_SMALL_K_TN = dgemm_small_kernel_tn_power10.c | |||
| DGEMM_SMALL_K_B0_TN = dgemm_small_kernel_tn_power10.c | |||
| ifeq ($(OSNAME), AIX) | |||
| CGEMMKERNEL = cgemm_kernel_8x4_power8.S | |||
| else | |||
| CGEMMKERNEL = cgemm_kernel_power10.S | |||
| endif | |||
| #CGEMMKERNEL = cgemm_kernel_8x4_power8.S | |||
| CGEMMINCOPY = ../generic/zgemm_ncopy_8.c | |||
| ifeq ($(OSNAME), AIX) | |||
| CGEMMITCOPY = cgemm_tcopy_8_power8.S | |||
| else | |||
| CGEMMITCOPY = ../generic/zgemm_tcopy_8.c | |||
| endif | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_4.c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c | |||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| @@ -73,7 +83,11 @@ CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| ifeq ($(OSNAME), AIX) | |||
| ZGEMMKERNEL = zgemm_kernel_8x2_power8.S | |||
| else | |||
| ZGEMMKERNEL = zgemm_kernel_power10.S | |||
| endif | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| ZGEMMINCOPY = ../generic/zgemm_ncopy_8.c | |||
| @@ -124,6 +138,7 @@ ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| #SMINKERNEL = ../arm/min.c | |||
| #DMINKERNEL = ../arm/min.c | |||
| # | |||
| ifeq ($(C_COMPILER), GCC) | |||
| ifneq ($(GCCVERSIONGTEQ9),1) | |||
| ISAMAXKERNEL = isamax_power9.S | |||
| else | |||
| @@ -148,6 +163,15 @@ ICAMINKERNEL = icamin_power9.S | |||
| else | |||
| ICAMINKERNEL = icamin.c | |||
| endif | |||
| else | |||
| ISAMAXKERNEL = isamax.c | |||
| IDAMAXKERNEL = idamax.c | |||
| ICAMAXKERNEL = icamax.c | |||
| IZAMAXKERNEL = izamax.c | |||
| ISAMINKERNEL = isamin.c | |||
| IDAMINKERNEL = idamin.c | |||
| ICAMINKERNEL = icamin.c | |||
| endif | |||
| IZAMINKERNEL = izamin.c | |||
| # | |||
| #ISMAXKERNEL = ../arm/imax.c | |||
| @@ -238,4 +262,3 @@ QCABS_KERNEL = ../generic/cabs.c | |||
| #Dump kernel | |||
| CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | |||
| ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | |||
| endif | |||
| @@ -4,7 +4,7 @@ | |||
| #define ABS_K(a) ((a) > 0 ? (a) : (-(a))) | |||
| #endif | |||
| #if defined(SKYLAKEX) | |||
| #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) | |||
| #include "casum_microk_skylakex-2.c" | |||
| #endif | |||
| @@ -4,7 +4,7 @@ | |||
| #define ABS_K(a) ((a) > 0 ? (a) : (-(a))) | |||
| #endif | |||
| #if defined(SKYLAKEX) | |||
| #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) | |||
| #include "dasum_microk_skylakex-2.c" | |||
| #elif defined(HASWELL) || defined(ZEN) | |||
| #include "dasum_microk_haswell-2.c" | |||
| @@ -159,7 +159,7 @@ static int dot_thread_function(BLASLONG n, BLASLONG dummy0, | |||
| extern int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n, | |||
| BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, | |||
| void *c, BLASLONG ldc, int (*function)(), int nthreads); | |||
| void *c, BLASLONG ldc, int (*function)(void), int nthreads); | |||
| #endif | |||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| @@ -169,7 +169,7 @@ static int rot_thread_function(blas_arg_t *args) | |||
| return 0; | |||
| } | |||
| extern int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int (*function)(), int nthreads); | |||
| extern int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int (*function)(void), int nthreads); | |||
| #endif | |||
| int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s) | |||
| { | |||
| @@ -9,7 +9,7 @@ | |||
| #endif | |||
| #if defined(SKYLAKEX) | |||
| #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) | |||
| #include "sasum_microk_skylakex-2.c" | |||
| #elif defined(HASWELL) || defined(ZEN) | |||
| #include "sasum_microk_haswell-2.c" | |||
| @@ -171,7 +171,7 @@ static int rot_thread_function(blas_arg_t *args) | |||
| return 0; | |||
| } | |||
| extern int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int (*function)(), int nthreads); | |||
| extern int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int (*function)(void), int nthreads); | |||
| #endif | |||
| int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s) | |||
| { | |||
| @@ -4,7 +4,7 @@ | |||
| #define ABS_K(a) ((a) > 0 ? (a) : (-(a))) | |||
| #endif | |||
| #if defined(SKYLAKEX) | |||
| #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) | |||
| #include "zasum_microk_skylakex-2.c" | |||
| #endif | |||
| @@ -92,7 +92,7 @@ static void zdot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *d) | |||
| #if defined(SMP) | |||
| extern int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n, | |||
| BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, | |||
| void *c, BLASLONG ldc, int (*function)(), int nthreads); | |||
| void *c, BLASLONG ldc, int (*function)(void), int nthreads); | |||
| #endif | |||
| @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; | |||
| #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } | |||
| #define sig_die(s, kill) { exit(1); } | |||
| #define s_stop(s, n) {exit(0);} | |||
| static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; | |||
| #define z_abs(z) (cabs(Cd(z))) | |||
| #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} | |||
| #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} | |||
| @@ -261,24 +260,7 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; | |||
| /* procedure parameter types for -A and -C++ */ | |||
| #define F2C_proc_par_types 1 | |||
| #ifdef __cplusplus | |||
| typedef logical (*L_fp)(...); | |||
| #else | |||
| typedef logical (*L_fp)(); | |||
| #endif | |||
| static float spow_ui(float x, integer n) { | |||
| float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static double dpow_ui(double x, integer n) { | |||
| double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| @@ -291,217 +273,7 @@ static double dpow_ui(double x, integer n) { | |||
| } | |||
| return pow; | |||
| } | |||
| #ifdef _MSC_VER | |||
| static _Fcomplex cpow_ui(complex x, integer n) { | |||
| complex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow.r *= x.r, pow.i *= x.i; | |||
| if(u >>= 1) x.r *= x.r, x.i *= x.i; | |||
| else break; | |||
| } | |||
| } | |||
| _Fcomplex p={pow.r, pow.i}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex float cpow_ui(_Complex float x, integer n) { | |||
| _Complex float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| #ifdef _MSC_VER | |||
| static _Dcomplex zpow_ui(_Dcomplex x, integer n) { | |||
| _Dcomplex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; | |||
| if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; | |||
| else break; | |||
| } | |||
| } | |||
| _Dcomplex p = {pow._Val[0], pow._Val[1]}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex double zpow_ui(_Complex double x, integer n) { | |||
| _Complex double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| static integer pow_ii(integer x, integer n) { | |||
| integer pow; unsigned long int u; | |||
| if (n <= 0) { | |||
| if (n == 0 || x == 1) pow = 1; | |||
| else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
| else n = -n; | |||
| } | |||
| if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
| u = n; | |||
| for(pow = 1; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
| { | |||
| double m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
| { | |||
| float m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Fcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0]; | |||
| zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex float zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conjf(Cf(&x[i])) * Cf(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]); | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Dcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0]; | |||
| zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex double zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conj(Cd(&x[i])) * Cd(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]); | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Fcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0]; | |||
| zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex float zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cf(&x[i]) * Cf(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]); | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Dcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0]; | |||
| zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex double zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cd(&x[i]) * Cd(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]); | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #endif | |||
| /* -- translated by f2c (version 20000121). | |||
| You must link the resulting object file with the libraries: | |||
| -lf2c -lm (in that order) | |||
| @@ -223,7 +223,6 @@ typedef struct Namelist Namelist; | |||
| #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } | |||
| #define sig_die(s, kill) { exit(1); } | |||
| #define s_stop(s, n) {exit(0);} | |||
| static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; | |||
| #define z_abs(z) (cabs(Cd(z))) | |||
| #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} | |||
| #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} | |||
| @@ -237,145 +236,5 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; | |||
| /* procedure parameter types for -A and -C++ */ | |||
| #define F2C_proc_par_types 1 | |||
| #ifdef __cplusplus | |||
| typedef logical (*L_fp)(...); | |||
| #else | |||
| typedef logical (*L_fp)(); | |||
| #endif | |||
| static float spow_ui(float x, integer n) { | |||
| float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static double dpow_ui(double x, integer n) { | |||
| double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static _Complex float cpow_ui(_Complex float x, integer n) { | |||
| _Complex float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static _Complex double zpow_ui(_Complex double x, integer n) { | |||
| _Complex double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static integer pow_ii(integer x, integer n) { | |||
| integer pow; unsigned long int u; | |||
| if (n <= 0) { | |||
| if (n == 0 || x == 1) pow = 1; | |||
| else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
| else n = -n; | |||
| } | |||
| if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
| u = n; | |||
| for(pow = 1; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
| { | |||
| double m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
| { | |||
| float m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| _Complex float zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conjf(Cf(&x[i])) * Cf(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]); | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| _Complex double zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conj(Cd(&x[i])) * Cd(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]); | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| _Complex float zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cf(&x[i]) * Cf(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]); | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| _Complex double zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cd(&x[i]) * Cd(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]); | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #endif | |||
| @@ -223,7 +223,6 @@ typedef struct Namelist Namelist; | |||
| #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } | |||
| #define sig_die(s, kill) { exit(1); } | |||
| #define s_stop(s, n) {exit(0);} | |||
| static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; | |||
| #define z_abs(z) (cabs(Cd(z))) | |||
| #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} | |||
| #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} | |||
| @@ -237,149 +236,10 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; | |||
| /* procedure parameter types for -A and -C++ */ | |||
| #define F2C_proc_par_types 1 | |||
| #ifdef __cplusplus | |||
| typedef logical (*L_fp)(...); | |||
| #else | |||
| typedef logical (*L_fp)(); | |||
| #endif | |||
| static float spow_ui(float x, integer n) { | |||
| float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static double dpow_ui(double x, integer n) { | |||
| double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static _Complex float cpow_ui(_Complex float x, integer n) { | |||
| _Complex float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static _Complex double zpow_ui(_Complex double x, integer n) { | |||
| _Complex double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static integer pow_ii(integer x, integer n) { | |||
| integer pow; unsigned long int u; | |||
| if (n <= 0) { | |||
| if (n == 0 || x == 1) pow = 1; | |||
| else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
| else n = -n; | |||
| } | |||
| if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
| u = n; | |||
| for(pow = 1; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
| { | |||
| double m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
| { | |||
| float m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| _Complex float zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conjf(Cf(&x[i])) * Cf(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]); | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| _Complex double zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conj(Cd(&x[i])) * Cd(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]); | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| _Complex float zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cf(&x[i]) * Cf(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]); | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| _Complex double zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cd(&x[i]) * Cd(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]); | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #endif | |||
| /* -- translated by f2c (version 20000121). | |||
| /* | |||
| You must link the resulting object file with the libraries: | |||
| -lf2c -lm (in that order) | |||
| */ | |||
| @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; | |||
| #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } | |||
| #define sig_die(s, kill) { exit(1); } | |||
| #define s_stop(s, n) {exit(0);} | |||
| static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; | |||
| #define z_abs(z) (cabs(Cd(z))) | |||
| #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} | |||
| #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} | |||
| @@ -261,248 +260,8 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; | |||
| /* procedure parameter types for -A and -C++ */ | |||
| #define F2C_proc_par_types 1 | |||
| #ifdef __cplusplus | |||
| typedef logical (*L_fp)(...); | |||
| #else | |||
| typedef logical (*L_fp)(); | |||
| #endif | |||
| static float spow_ui(float x, integer n) { | |||
| float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static double dpow_ui(double x, integer n) { | |||
| double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #ifdef _MSC_VER | |||
| static _Fcomplex cpow_ui(complex x, integer n) { | |||
| complex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow.r *= x.r, pow.i *= x.i; | |||
| if(u >>= 1) x.r *= x.r, x.i *= x.i; | |||
| else break; | |||
| } | |||
| } | |||
| _Fcomplex p={pow.r, pow.i}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex float cpow_ui(_Complex float x, integer n) { | |||
| _Complex float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| #ifdef _MSC_VER | |||
| static _Dcomplex zpow_ui(_Dcomplex x, integer n) { | |||
| _Dcomplex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; | |||
| if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; | |||
| else break; | |||
| } | |||
| } | |||
| _Dcomplex p = {pow._Val[0], pow._Val[1]}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex double zpow_ui(_Complex double x, integer n) { | |||
| _Complex double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| static integer pow_ii(integer x, integer n) { | |||
| integer pow; unsigned long int u; | |||
| if (n <= 0) { | |||
| if (n == 0 || x == 1) pow = 1; | |||
| else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
| else n = -n; | |||
| } | |||
| if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
| u = n; | |||
| for(pow = 1; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
| { | |||
| double m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
| { | |||
| float m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Fcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0]; | |||
| zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex float zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conjf(Cf(&x[i])) * Cf(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]); | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Dcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0]; | |||
| zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex double zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conj(Cd(&x[i])) * Cd(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]); | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Fcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0]; | |||
| zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex float zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cf(&x[i]) * Cf(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]); | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Dcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0]; | |||
| zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex double zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cd(&x[i]) * Cd(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]); | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #endif | |||
| /* -- translated by f2c (version 20000121). | |||
| /* | |||
| You must link the resulting object file with the libraries: | |||
| -lf2c -lm (in that order) | |||
| */ | |||
| @@ -223,7 +223,6 @@ typedef struct Namelist Namelist; | |||
| #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } | |||
| #define sig_die(s, kill) { exit(1); } | |||
| #define s_stop(s, n) {exit(0);} | |||
| static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; | |||
| #define z_abs(z) (cabs(Cd(z))) | |||
| #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} | |||
| #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} | |||
| @@ -237,149 +236,10 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; | |||
| /* procedure parameter types for -A and -C++ */ | |||
| #define F2C_proc_par_types 1 | |||
| #ifdef __cplusplus | |||
| typedef logical (*L_fp)(...); | |||
| #else | |||
| typedef logical (*L_fp)(); | |||
| #endif | |||
| static float spow_ui(float x, integer n) { | |||
| float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static double dpow_ui(double x, integer n) { | |||
| double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static _Complex float cpow_ui(_Complex float x, integer n) { | |||
| _Complex float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static _Complex double zpow_ui(_Complex double x, integer n) { | |||
| _Complex double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static integer pow_ii(integer x, integer n) { | |||
| integer pow; unsigned long int u; | |||
| if (n <= 0) { | |||
| if (n == 0 || x == 1) pow = 1; | |||
| else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
| else n = -n; | |||
| } | |||
| if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
| u = n; | |||
| for(pow = 1; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
| { | |||
| double m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
| { | |||
| float m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| _Complex float zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conjf(Cf(&x[i])) * Cf(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]); | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| _Complex double zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conj(Cd(&x[i])) * Cd(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]); | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| _Complex float zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cf(&x[i]) * Cf(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]); | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| _Complex double zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cd(&x[i]) * Cd(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]); | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #endif | |||
| /* -- translated by f2c (version 20000121). | |||
| /* | |||
| You must link the resulting object file with the libraries: | |||
| -lf2c -lm (in that order) | |||
| */ | |||
| @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; | |||
| #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } | |||
| #define sig_die(s, kill) { exit(1); } | |||
| #define s_stop(s, n) {exit(0);} | |||
| static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; | |||
| #define z_abs(z) (cabs(Cd(z))) | |||
| #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} | |||
| #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} | |||
| @@ -261,11 +260,6 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; | |||
| /* procedure parameter types for -A and -C++ */ | |||
| #define F2C_proc_par_types 1 | |||
| #ifdef __cplusplus | |||
| typedef logical (*L_fp)(...); | |||
| #else | |||
| typedef logical (*L_fp)(); | |||
| #endif | |||
| static float spow_ui(float x, integer n) { | |||
| float pow=1.0; unsigned long int u; | |||
| @@ -279,229 +273,6 @@ static float spow_ui(float x, integer n) { | |||
| } | |||
| return pow; | |||
| } | |||
| static double dpow_ui(double x, integer n) { | |||
| double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #ifdef _MSC_VER | |||
| static _Fcomplex cpow_ui(complex x, integer n) { | |||
| complex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow.r *= x.r, pow.i *= x.i; | |||
| if(u >>= 1) x.r *= x.r, x.i *= x.i; | |||
| else break; | |||
| } | |||
| } | |||
| _Fcomplex p={pow.r, pow.i}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex float cpow_ui(_Complex float x, integer n) { | |||
| _Complex float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| #ifdef _MSC_VER | |||
| static _Dcomplex zpow_ui(_Dcomplex x, integer n) { | |||
| _Dcomplex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; | |||
| if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; | |||
| else break; | |||
| } | |||
| } | |||
| _Dcomplex p = {pow._Val[0], pow._Val[1]}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex double zpow_ui(_Complex double x, integer n) { | |||
| _Complex double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| static integer pow_ii(integer x, integer n) { | |||
| integer pow; unsigned long int u; | |||
| if (n <= 0) { | |||
| if (n == 0 || x == 1) pow = 1; | |||
| else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
| else n = -n; | |||
| } | |||
| if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
| u = n; | |||
| for(pow = 1; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
| { | |||
| double m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
| { | |||
| float m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Fcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0]; | |||
| zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex float zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conjf(Cf(&x[i])) * Cf(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]); | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Dcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0]; | |||
| zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex double zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conj(Cd(&x[i])) * Cd(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]); | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Fcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0]; | |||
| zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex float zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cf(&x[i]) * Cf(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]); | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Dcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0]; | |||
| zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex double zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cd(&x[i]) * Cd(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]); | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #endif | |||
| /* -- translated by f2c (version 20000121). | |||
| You must link the resulting object file with the libraries: | |||
| -lf2c -lm (in that order) | |||
| @@ -223,7 +223,6 @@ typedef struct Namelist Namelist; | |||
| #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } | |||
| #define sig_die(s, kill) { exit(1); } | |||
| #define s_stop(s, n) {exit(0);} | |||
| static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; | |||
| #define z_abs(z) (cabs(Cd(z))) | |||
| #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} | |||
| #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} | |||
| @@ -237,145 +236,5 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; | |||
| /* procedure parameter types for -A and -C++ */ | |||
| #define F2C_proc_par_types 1 | |||
| #ifdef __cplusplus | |||
| typedef logical (*L_fp)(...); | |||
| #else | |||
| typedef logical (*L_fp)(); | |||
| #endif | |||
| static float spow_ui(float x, integer n) { | |||
| float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static double dpow_ui(double x, integer n) { | |||
| double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static _Complex float cpow_ui(_Complex float x, integer n) { | |||
| _Complex float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static _Complex double zpow_ui(_Complex double x, integer n) { | |||
| _Complex double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static integer pow_ii(integer x, integer n) { | |||
| integer pow; unsigned long int u; | |||
| if (n <= 0) { | |||
| if (n == 0 || x == 1) pow = 1; | |||
| else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
| else n = -n; | |||
| } | |||
| if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
| u = n; | |||
| for(pow = 1; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
| { | |||
| double m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
| { | |||
| float m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| _Complex float zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conjf(Cf(&x[i])) * Cf(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]); | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| _Complex double zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conj(Cd(&x[i])) * Cd(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]); | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| _Complex float zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cf(&x[i]) * Cf(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]); | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| _Complex double zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cd(&x[i]) * Cd(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]); | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #endif | |||
| @@ -39,7 +39,7 @@ void LAPACKE_set_nancheck( int flag ) | |||
| nancheck_flag = ( flag ) ? 1 : 0; | |||
| } | |||
| int LAPACKE_get_nancheck( ) | |||
| int LAPACKE_get_nancheck( void ) | |||
| { | |||
| char* env; | |||
| if ( nancheck_flag != -1 ) { | |||
| @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; | |||
| #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } | |||
| #define sig_die(s, kill) { exit(1); } | |||
| #define s_stop(s, n) {exit(0);} | |||
| static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; | |||
| #define z_abs(z) (cabs(Cd(z))) | |||
| #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} | |||
| #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} | |||
| @@ -261,247 +260,7 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; | |||
| /* procedure parameter types for -A and -C++ */ | |||
| #define F2C_proc_par_types 1 | |||
| #ifdef __cplusplus | |||
| typedef logical (*L_fp)(...); | |||
| #else | |||
| typedef logical (*L_fp)(); | |||
| #endif | |||
| static float spow_ui(float x, integer n) { | |||
| float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static double dpow_ui(double x, integer n) { | |||
| double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #ifdef _MSC_VER | |||
| static _Fcomplex cpow_ui(complex x, integer n) { | |||
| complex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow.r *= x.r, pow.i *= x.i; | |||
| if(u >>= 1) x.r *= x.r, x.i *= x.i; | |||
| else break; | |||
| } | |||
| } | |||
| _Fcomplex p={pow.r, pow.i}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex float cpow_ui(_Complex float x, integer n) { | |||
| _Complex float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| #ifdef _MSC_VER | |||
| static _Dcomplex zpow_ui(_Dcomplex x, integer n) { | |||
| _Dcomplex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; | |||
| if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; | |||
| else break; | |||
| } | |||
| } | |||
| _Dcomplex p = {pow._Val[0], pow._Val[1]}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex double zpow_ui(_Complex double x, integer n) { | |||
| _Complex double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| static integer pow_ii(integer x, integer n) { | |||
| integer pow; unsigned long int u; | |||
| if (n <= 0) { | |||
| if (n == 0 || x == 1) pow = 1; | |||
| else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
| else n = -n; | |||
| } | |||
| if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
| u = n; | |||
| for(pow = 1; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
| { | |||
| double m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
| { | |||
| float m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Fcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0]; | |||
| zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex float zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conjf(Cf(&x[i])) * Cf(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]); | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Dcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0]; | |||
| zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex double zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conj(Cd(&x[i])) * Cd(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]); | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Fcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0]; | |||
| zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex float zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cf(&x[i]) * Cf(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]); | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Dcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0]; | |||
| zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex double zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cd(&x[i]) * Cd(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]); | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #endif | |||
| /* -- translated by f2c (version 20000121). | |||
| You must link the resulting object file with the libraries: | |||
| -lf2c -lm (in that order) | |||
| @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; | |||
| #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } | |||
| #define sig_die(s, kill) { exit(1); } | |||
| #define s_stop(s, n) {exit(0);} | |||
| static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; | |||
| #define z_abs(z) (cabs(Cd(z))) | |||
| #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} | |||
| #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} | |||
| @@ -261,247 +260,6 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; | |||
| /* procedure parameter types for -A and -C++ */ | |||
| #define F2C_proc_par_types 1 | |||
| #ifdef __cplusplus | |||
| typedef logical (*L_fp)(...); | |||
| #else | |||
| typedef logical (*L_fp)(); | |||
| #endif | |||
| static float spow_ui(float x, integer n) { | |||
| float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static double dpow_ui(double x, integer n) { | |||
| double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #ifdef _MSC_VER | |||
| static _Fcomplex cpow_ui(complex x, integer n) { | |||
| complex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow.r *= x.r, pow.i *= x.i; | |||
| if(u >>= 1) x.r *= x.r, x.i *= x.i; | |||
| else break; | |||
| } | |||
| } | |||
| _Fcomplex p={pow.r, pow.i}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex float cpow_ui(_Complex float x, integer n) { | |||
| _Complex float pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| #ifdef _MSC_VER | |||
| static _Dcomplex zpow_ui(_Dcomplex x, integer n) { | |||
| _Dcomplex pow={1.0,0.0}; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; | |||
| if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; | |||
| else break; | |||
| } | |||
| } | |||
| _Dcomplex p = {pow._Val[0], pow._Val[1]}; | |||
| return p; | |||
| } | |||
| #else | |||
| static _Complex double zpow_ui(_Complex double x, integer n) { | |||
| _Complex double pow=1.0; unsigned long int u; | |||
| if(n != 0) { | |||
| if(n < 0) n = -n, x = 1/x; | |||
| for(u = n; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| #endif | |||
| static integer pow_ii(integer x, integer n) { | |||
| integer pow; unsigned long int u; | |||
| if (n <= 0) { | |||
| if (n == 0 || x == 1) pow = 1; | |||
| else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
| else n = -n; | |||
| } | |||
| if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
| u = n; | |||
| for(pow = 1; ; ) { | |||
| if(u & 01) pow *= x; | |||
| if(u >>= 1) x *= x; | |||
| else break; | |||
| } | |||
| } | |||
| return pow; | |||
| } | |||
| static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
| { | |||
| double m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
| { | |||
| float m; integer i, mi; | |||
| for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
| if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
| return mi-s+1; | |||
| } | |||
| static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Fcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0]; | |||
| zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex float zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conjf(Cf(&x[i])) * Cf(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]); | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Dcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0]; | |||
| zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex double zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conj(Cd(&x[i])) * Cd(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]); | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Fcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0]; | |||
| zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex float zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cf(&x[i]) * Cf(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]); | |||
| } | |||
| } | |||
| pCf(z) = zdotc; | |||
| } | |||
| #endif | |||
| static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
| integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
| #ifdef _MSC_VER | |||
| _Dcomplex zdotc = {0.0, 0.0}; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0]; | |||
| zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1]; | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0]; | |||
| zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1]; | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #else | |||
| _Complex double zdotc = 0.0; | |||
| if (incx == 1 && incy == 1) { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cd(&x[i]) * Cd(&y[i]); | |||
| } | |||
| } else { | |||
| for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
| zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]); | |||
| } | |||
| } | |||
| pCd(z) = zdotc; | |||
| } | |||
| #endif | |||
| /* -- translated by f2c (version 20000121). | |||
| You must link the resulting object file with the libraries: | |||
| -lf2c -lm (in that order) | |||
| @@ -0,0 +1,479 @@ | |||
| #include <math.h> | |||
| #include <stdlib.h> | |||
| #include <string.h> | |||
| #include <stdio.h> | |||
| #include <complex.h> | |||
| #ifdef complex | |||
| #undef complex | |||
| #endif | |||
| #ifdef I | |||
| #undef I | |||
| #endif | |||
| #if defined(_WIN64) | |||
| typedef long long BLASLONG; | |||
| typedef unsigned long long BLASULONG; | |||
| #else | |||
| typedef long BLASLONG; | |||
| typedef unsigned long BLASULONG; | |||
| #endif | |||
| #ifdef LAPACK_ILP64 | |||
| typedef BLASLONG blasint; | |||
| #if defined(_WIN64) | |||
| #define blasabs(x) llabs(x) | |||
| #else | |||
| #define blasabs(x) labs(x) | |||
| #endif | |||
| #else | |||
| typedef int blasint; | |||
| #define blasabs(x) abs(x) | |||
| #endif | |||
| typedef blasint integer; | |||
| typedef unsigned int uinteger; | |||
| typedef char *address; | |||
| typedef short int shortint; | |||
| typedef float real; | |||
| typedef double doublereal; | |||
| typedef struct { real r, i; } complex; | |||
| typedef struct { doublereal r, i; } doublecomplex; | |||
| #ifdef _MSC_VER | |||
| static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} | |||
| static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} | |||
| static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} | |||
| static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} | |||
| #else | |||
| static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} | |||
| static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} | |||
| static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} | |||
| static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} | |||
| #endif | |||
| #define pCf(z) (*_pCf(z)) | |||
| #define pCd(z) (*_pCd(z)) | |||
| typedef int logical; | |||
| typedef short int shortlogical; | |||
| typedef char logical1; | |||
| typedef char integer1; | |||
| #define TRUE_ (1) | |||
| #define FALSE_ (0) | |||
| /* Extern is for use with -E */ | |||
| #ifndef Extern | |||
| #define Extern extern | |||
| #endif | |||
| /* I/O stuff */ | |||
| typedef int flag; | |||
| typedef int ftnlen; | |||
| typedef int ftnint; | |||
| /*external read, write*/ | |||
| typedef struct | |||
| { flag cierr; | |||
| ftnint ciunit; | |||
| flag ciend; | |||
| char *cifmt; | |||
| ftnint cirec; | |||
| } cilist; | |||
| /*internal read, write*/ | |||
| typedef struct | |||
| { flag icierr; | |||
| char *iciunit; | |||
| flag iciend; | |||
| char *icifmt; | |||
| ftnint icirlen; | |||
| ftnint icirnum; | |||
| } icilist; | |||
| /*open*/ | |||
| typedef struct | |||
| { flag oerr; | |||
| ftnint ounit; | |||
| char *ofnm; | |||
| ftnlen ofnmlen; | |||
| char *osta; | |||
| char *oacc; | |||
| char *ofm; | |||
| ftnint orl; | |||
| char *oblnk; | |||
| } olist; | |||
| /*close*/ | |||
| typedef struct | |||
| { flag cerr; | |||
| ftnint cunit; | |||
| char *csta; | |||
| } cllist; | |||
| /*rewind, backspace, endfile*/ | |||
| typedef struct | |||
| { flag aerr; | |||
| ftnint aunit; | |||
| } alist; | |||
| /* inquire */ | |||
| typedef struct | |||
| { flag inerr; | |||
| ftnint inunit; | |||
| char *infile; | |||
| ftnlen infilen; | |||
| ftnint *inex; /*parameters in standard's order*/ | |||
| ftnint *inopen; | |||
| ftnint *innum; | |||
| ftnint *innamed; | |||
| char *inname; | |||
| ftnlen innamlen; | |||
| char *inacc; | |||
| ftnlen inacclen; | |||
| char *inseq; | |||
| ftnlen inseqlen; | |||
| char *indir; | |||
| ftnlen indirlen; | |||
| char *infmt; | |||
| ftnlen infmtlen; | |||
| char *inform; | |||
| ftnint informlen; | |||
| char *inunf; | |||
| ftnlen inunflen; | |||
| ftnint *inrecl; | |||
| ftnint *innrec; | |||
| char *inblank; | |||
| ftnlen inblanklen; | |||
| } inlist; | |||
| #define VOID void | |||
| union Multitype { /* for multiple entry points */ | |||
| integer1 g; | |||
| shortint h; | |||
| integer i; | |||
| /* longint j; */ | |||
| real r; | |||
| doublereal d; | |||
| complex c; | |||
| doublecomplex z; | |||
| }; | |||
| typedef union Multitype Multitype; | |||
| struct Vardesc { /* for Namelist */ | |||
| char *name; | |||
| char *addr; | |||
| ftnlen *dims; | |||
| int type; | |||
| }; | |||
| typedef struct Vardesc Vardesc; | |||
| struct Namelist { | |||
| char *name; | |||
| Vardesc **vars; | |||
| int nvars; | |||
| }; | |||
| typedef struct Namelist Namelist; | |||
| #define abs(x) ((x) >= 0 ? (x) : -(x)) | |||
| #define dabs(x) (fabs(x)) | |||
| #define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) | |||
| #define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) | |||
| #define dmin(a,b) (f2cmin(a,b)) | |||
| #define dmax(a,b) (f2cmax(a,b)) | |||
| #define bit_test(a,b) ((a) >> (b) & 1) | |||
| #define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) | |||
| #define bit_set(a,b) ((a) | ((uinteger)1 << (b))) | |||
| #define abort_() { sig_die("Fortran abort routine called", 1); } | |||
| #define c_abs(z) (cabsf(Cf(z))) | |||
| #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } | |||
| #ifdef _MSC_VER | |||
| #define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} | |||
| #define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} | |||
| #else | |||
| #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} | |||
| #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} | |||
| #endif | |||
| #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} | |||
| #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} | |||
| #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} | |||
| //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} | |||
| #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} | |||
| #define d_abs(x) (fabs(*(x))) | |||
| #define d_acos(x) (acos(*(x))) | |||
| #define d_asin(x) (asin(*(x))) | |||
| #define d_atan(x) (atan(*(x))) | |||
| #define d_atn2(x, y) (atan2(*(x),*(y))) | |||
| #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } | |||
| #define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } | |||
| #define d_cos(x) (cos(*(x))) | |||
| #define d_cosh(x) (cosh(*(x))) | |||
| #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) | |||
| #define d_exp(x) (exp(*(x))) | |||
| #define d_imag(z) (cimag(Cd(z))) | |||
| #define r_imag(z) (cimagf(Cf(z))) | |||
| #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) | |||
| #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) | |||
| #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) | |||
| #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) | |||
| #define d_log(x) (log(*(x))) | |||
| #define d_mod(x, y) (fmod(*(x), *(y))) | |||
| #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) | |||
| #define d_nint(x) u_nint(*(x)) | |||
| #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) | |||
| #define d_sign(a,b) u_sign(*(a),*(b)) | |||
| #define r_sign(a,b) u_sign(*(a),*(b)) | |||
| #define d_sin(x) (sin(*(x))) | |||
| #define d_sinh(x) (sinh(*(x))) | |||
| #define d_sqrt(x) (sqrt(*(x))) | |||
| #define d_tan(x) (tan(*(x))) | |||
| #define d_tanh(x) (tanh(*(x))) | |||
| #define i_abs(x) abs(*(x)) | |||
| #define i_dnnt(x) ((integer)u_nint(*(x))) | |||
| #define i_len(s, n) (n) | |||
| #define i_nint(x) ((integer)u_nint(*(x))) | |||
| #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) | |||
| #define pow_dd(ap, bp) ( pow(*(ap), *(bp))) | |||
| #define pow_si(B,E) spow_ui(*(B),*(E)) | |||
| #define pow_ri(B,E) spow_ui(*(B),*(E)) | |||
| #define pow_di(B,E) dpow_ui(*(B),*(E)) | |||
| #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} | |||
| #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} | |||
| #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} | |||
| #define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } | |||
| #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) | |||
| #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } | |||
| #define sig_die(s, kill) { exit(1); } | |||
| #define s_stop(s, n) {exit(0);} | |||
| #define z_abs(z) (cabs(Cd(z))) | |||
| #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} | |||
| #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} | |||
| #define myexit_() break; | |||
| #define mycycle_() continue; | |||
| #define myceiling_(w) {ceil(w)} | |||
| #define myhuge_(w) {HUGE_VAL} | |||
| #define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) | |||
| /* procedure parameter types for -A and -C++ */ | |||
| #define F2C_proc_par_types 1 | |||
| #ifdef __cplusplus | |||
| typedef logical (*L_fp)(...); | |||
| #else | |||
| typedef logical (*L_fp)(); | |||
| #endif | |||
| /* -- translated by f2c (version 20000121). | |||
| You must link the resulting object file with the libraries: | |||
| -lf2c -lm (in that order) | |||
| */ | |||
| /* Table of constant values */ | |||
| static complex c_b1 = {0.f,0.f}; | |||
| static complex c_b2 = {1.f,0.f}; | |||
| /* > \brief \b CGELQS */ | |||
| /* =========== DOCUMENTATION =========== */ | |||
| /* Online html documentation available at */ | |||
| /* http://www.netlib.org/lapack/explore-html/ */ | |||
| /* Definition: */ | |||
| /* =========== */ | |||
| /* SUBROUTINE CGELQS( M, N, NRHS, A, LDA, TAU, B, LDB, WORK, LWORK, */ | |||
| /* INFO ) */ | |||
| /* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS */ | |||
| /* COMPLEX A( LDA, * ), B( LDB, * ), TAU( * ), */ | |||
| /* $ WORK( LWORK ) */ | |||
| /* > \par Purpose: */ | |||
| /* ============= */ | |||
| /* > */ | |||
| /* > \verbatim */ | |||
| /* > */ | |||
| /* > Compute a minimum-norm solution */ | |||
| /* > f2cmin || A*X - B || */ | |||
| /* > using the LQ factorization */ | |||
| /* > A = L*Q */ | |||
| /* > computed by CGELQF. */ | |||
| /* > \endverbatim */ | |||
| /* Arguments: */ | |||
| /* ========== */ | |||
| /* > \param[in] M */ | |||
| /* > \verbatim */ | |||
| /* > M is INTEGER */ | |||
| /* > The number of rows of the matrix A. M >= 0. */ | |||
| /* > \endverbatim */ | |||
| /* > */ | |||
| /* > \param[in] N */ | |||
| /* > \verbatim */ | |||
| /* > N is INTEGER */ | |||
| /* > The number of columns of the matrix A. N >= M >= 0. */ | |||
| /* > \endverbatim */ | |||
| /* > */ | |||
| /* > \param[in] NRHS */ | |||
| /* > \verbatim */ | |||
| /* > NRHS is INTEGER */ | |||
| /* > The number of columns of B. NRHS >= 0. */ | |||
| /* > \endverbatim */ | |||
| /* > */ | |||
| /* > \param[in] A */ | |||
| /* > \verbatim */ | |||
| /* > A is COMPLEX array, dimension (LDA,N) */ | |||
| /* > Details of the LQ factorization of the original matrix A as */ | |||
| /* > returned by CGELQF. */ | |||
| /* > \endverbatim */ | |||
| /* > */ | |||
| /* > \param[in] LDA */ | |||
| /* > \verbatim */ | |||
| /* > LDA is INTEGER */ | |||
| /* > The leading dimension of the array A. LDA >= M. */ | |||
| /* > \endverbatim */ | |||
| /* > */ | |||
| /* > \param[in] TAU */ | |||
| /* > \verbatim */ | |||
| /* > TAU is COMPLEX array, dimension (M) */ | |||
| /* > Details of the orthogonal matrix Q. */ | |||
| /* > \endverbatim */ | |||
| /* > */ | |||
| /* > \param[in,out] B */ | |||
| /* > \verbatim */ | |||
| /* > B is COMPLEX array, dimension (LDB,NRHS) */ | |||
| /* > On entry, the m-by-nrhs right hand side matrix B. */ | |||
| /* > On exit, the n-by-nrhs solution matrix X. */ | |||
| /* > \endverbatim */ | |||
| /* > */ | |||
| /* > \param[in] LDB */ | |||
| /* > \verbatim */ | |||
| /* > LDB is INTEGER */ | |||
| /* > The leading dimension of the array B. LDB >= N. */ | |||
| /* > \endverbatim */ | |||
| /* > */ | |||
| /* > \param[out] WORK */ | |||
| /* > \verbatim */ | |||
| /* > WORK is COMPLEX array, dimension (LWORK) */ | |||
| /* > \endverbatim */ | |||
| /* > */ | |||
| /* > \param[in] LWORK */ | |||
| /* > \verbatim */ | |||
| /* > LWORK is INTEGER */ | |||
| /* > The length of the array WORK. LWORK must be at least NRHS, */ | |||
| /* > and should be at least NRHS*NB, where NB is the block size */ | |||
| /* > for this environment. */ | |||
| /* > \endverbatim */ | |||
| /* > */ | |||
| /* > \param[out] INFO */ | |||
| /* > \verbatim */ | |||
| /* > INFO is INTEGER */ | |||
| /* > = 0: successful exit */ | |||
| /* > < 0: if INFO = -i, the i-th argument had an illegal value */ | |||
| /* > \endverbatim */ | |||
| /* Authors: */ | |||
| /* ======== */ | |||
| /* > \author Univ. of Tennessee */ | |||
| /* > \author Univ. of California Berkeley */ | |||
| /* > \author Univ. of Colorado Denver */ | |||
| /* > \author NAG Ltd. */ | |||
| /* > \ingroup complex_lin */ | |||
| /* ===================================================================== */ | |||
| /* Subroutine */ int cgelqs_(integer *m, integer *n, integer *nrhs, complex * | |||
| a, integer *lda, complex *tau, complex *b, integer *ldb, complex * | |||
| work, integer *lwork, integer *info) | |||
| { | |||
| /* System generated locals */ | |||
| integer a_dim1, a_offset, b_dim1, b_offset, i__1; | |||
| /* Local variables */ | |||
| extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, | |||
| integer *, integer *, complex *, complex *, integer *, complex *, | |||
| integer *), claset_(char *, | |||
| integer *, integer *, complex *, complex *, complex *, integer *), xerbla_(char *, integer *), cunmlq_(char *, char | |||
| *, integer *, integer *, integer *, complex *, integer *, complex | |||
| *, complex *, integer *, complex *, integer *, integer *); | |||
| /* -- LAPACK test routine -- */ | |||
| /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ | |||
| /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ | |||
| /* ===================================================================== */ | |||
| /* Test the input parameters. */ | |||
| /* Parameter adjustments */ | |||
| a_dim1 = *lda; | |||
| a_offset = 1 + a_dim1 * 1; | |||
| a -= a_offset; | |||
| --tau; | |||
| b_dim1 = *ldb; | |||
| b_offset = 1 + b_dim1 * 1; | |||
| b -= b_offset; | |||
| --work; | |||
| /* Function Body */ | |||
| *info = 0; | |||
| if (*m < 0) { | |||
| *info = -1; | |||
| } else if (*n < 0 || *m > *n) { | |||
| *info = -2; | |||
| } else if (*nrhs < 0) { | |||
| *info = -3; | |||
| } else if (*lda < f2cmax(1,*m)) { | |||
| *info = -5; | |||
| } else if (*ldb < f2cmax(1,*n)) { | |||
| *info = -8; | |||
| } else if (*lwork < 1 || *lwork < *nrhs && *m > 0 && *n > 0) { | |||
| *info = -10; | |||
| } | |||
| if (*info != 0) { | |||
| i__1 = -(*info); | |||
| xerbla_("CGELQS", &i__1); | |||
| return 0; | |||
| } | |||
| /* Quick return if possible */ | |||
| if (*n == 0 || *nrhs == 0 || *m == 0) { | |||
| return 0; | |||
| } | |||
| /* Solve L*X = B(1:m,:) */ | |||
| ctrsm_("Left", "Lower", "No transpose", "Non-unit", m, nrhs, &c_b2, &a[ | |||
| a_offset], lda, &b[b_offset], ldb); | |||
| /* Set B(m+1:n,:) to zero */ | |||
| if (*m < *n) { | |||
| i__1 = *n - *m; | |||
| claset_("Full", &i__1, nrhs, &c_b1, &c_b1, &b[*m + 1 + b_dim1], ldb); | |||
| } | |||
| /* B := Q' * B */ | |||
| cunmlq_("Left", "Conjugate transpose", n, nrhs, m, &a[a_offset], lda, & | |||
| tau[1], &b[b_offset], ldb, &work[1], lwork, info); | |||
| return 0; | |||
| /* End of CGELQS */ | |||
| } /* cgelqs_ */ | |||