| @@ -94,16 +94,8 @@ task: | |||||
| name: AppleM1/LLVM armv7-androidndk xbuild | name: AppleM1/LLVM armv7-androidndk xbuild | ||||
| compile_script: | compile_script: | ||||
| - brew install --cask android-ndk | - brew install --cask android-ndk | ||||
| - export #PATH=/opt/homebrew/opt/llvm/bin:$PATH | |||||
| - export #LDFLAGS="-L/opt/homebrew/opt/llvm/lib" | |||||
| - export #CPPFLAGS="-I/opt/homebrew/opt/llvm/include" | |||||
| - export ANDROID_NDK_HOME="/opt/homebrew/share/android-ndk" | - export ANDROID_NDK_HOME="/opt/homebrew/share/android-ndk" | ||||
| - ls /opt/homebrew | |||||
| - ls -l /System/Volumes/Data/opt/homebrew/Caskroom/android-ndk | |||||
| - find /opt/homebrew -name "armv7a-linux-androideabi*-ranlib" | |||||
| - #export CC=/Applications/Xcode-13.4.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang | |||||
| - #export CFLAGS="-O2 -unwindlib=none -Wno-macro-redefined -isysroot /Applications/Xcode-13.4.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS16.0.sdk -arch arm64 -miphoneos-version-min=10.0" | |||||
| - export CC=/System/Volumes/Data/opt/homebrew/Caskroom/android-ndk/27/AndroidNDK*.app/Contents/NDK/toolchains/llvm/prebuilt/darwin-x86_64/bin/armv7a-linux-androideabi23-clang | |||||
| - export CC=/opt/homebrew/share/android-ndk/toolchains/llvm/prebuilt/darwin-x86_64/bin/armv7a-linux-androideabi23-clang | |||||
| - make TARGET=ARMV7 ARM_SOFTFP_ABI=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 RANLIB="ls -l" | - make TARGET=ARMV7 ARM_SOFTFP_ABI=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 RANLIB="ls -l" | ||||
| always: | always: | ||||
| config_artifacts: | config_artifacts: | ||||
| @@ -95,7 +95,7 @@ if (DYNAMIC_ARCH) | |||||
| endif () | endif () | ||||
| if (LOONGARCH64) | if (LOONGARCH64) | ||||
| set(DYNAMIC_CORE LOONGSONGENERIC LOONGSON2K1000 LOONGSON3R5) | |||||
| set(DYNAMIC_CORE LA64_GENERIC LA264 LA464) | |||||
| endif () | endif () | ||||
| if (EXISTS ${PROJECT_SOURCE_DIR}/config_kernel.h) | if (EXISTS ${PROJECT_SOURCE_DIR}/config_kernel.h) | ||||
| @@ -1349,7 +1349,7 @@ endif () | |||||
| "#define DTB_DEFAULT_ENTRIES 128\n" | "#define DTB_DEFAULT_ENTRIES 128\n" | ||||
| "#define DTB_SIZE 4096\n" | "#define DTB_SIZE 4096\n" | ||||
| "#define L2_ASSOCIATIVE 4\n") | "#define L2_ASSOCIATIVE 4\n") | ||||
| elseif ("${TCORE}" STREQUAL "LOONGSONGENERIC") | |||||
| elseif ("${TCORE}" STREQUAL "LA64_GENERIC") | |||||
| file(APPEND ${TARGET_CONF_TEMP} | file(APPEND ${TARGET_CONF_TEMP} | ||||
| "#define DTB_DEFAULT_ENTRIES 64\n") | "#define DTB_DEFAULT_ENTRIES 64\n") | ||||
| set(SGEMM_UNROLL_M 2) | set(SGEMM_UNROLL_M 2) | ||||
| @@ -1364,7 +1364,7 @@ endif () | |||||
| set(CGEMM3M_UNROLL_N 8) | set(CGEMM3M_UNROLL_N 8) | ||||
| set(ZGEMM3M_UNROLL_M 2) | set(ZGEMM3M_UNROLL_M 2) | ||||
| set(ZGEMM3M_UNROLL_N 8) | set(ZGEMM3M_UNROLL_N 8) | ||||
| elseif ("${TCORE}" STREQUAL "LOONGSON2K1000") | |||||
| elseif ("${TCORE}" STREQUAL "LA264") | |||||
| file(APPEND ${TARGET_CONF_TEMP} | file(APPEND ${TARGET_CONF_TEMP} | ||||
| "#define DTB_DEFAULT_ENTRIES 64\n") | "#define DTB_DEFAULT_ENTRIES 64\n") | ||||
| set(HAVE_LSX 1) | set(HAVE_LSX 1) | ||||
| @@ -1380,7 +1380,7 @@ endif () | |||||
| set(CGEMM3M_UNROLL_N 8) | set(CGEMM3M_UNROLL_N 8) | ||||
| set(ZGEMM3M_UNROLL_M 8) | set(ZGEMM3M_UNROLL_M 8) | ||||
| set(ZGEMM3M_UNROLL_N 4) | set(ZGEMM3M_UNROLL_N 4) | ||||
| elseif ("${TCORE}" STREQUAL "LOONGSON3R5") | |||||
| elseif ("${TCORE}" STREQUAL "LA464") | |||||
| file(APPEND ${TARGET_CONF_TEMP} | file(APPEND ${TARGET_CONF_TEMP} | ||||
| "#define DTB_DEFAULT_ENTRIES 64\n") | "#define DTB_DEFAULT_ENTRIES 64\n") | ||||
| set(HAVE_LASX 1) | set(HAVE_LASX 1) | ||||
| @@ -55,6 +55,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #ifndef ASSEMBLER | #ifndef ASSEMBLER | ||||
| #ifndef NO_AFFINITY | |||||
| static __inline int WhereAmI(void){ | static __inline int WhereAmI(void){ | ||||
| uint64_t ret; | uint64_t ret; | ||||
| __asm__ volatile ( | __asm__ volatile ( | ||||
| @@ -67,6 +68,7 @@ static __inline int WhereAmI(void){ | |||||
| if ((int)ret <0) ret = 0; | if ((int)ret <0) ret = 0; | ||||
| return (int)ret; | return (int)ret; | ||||
| } | } | ||||
| #endif | |||||
| static __inline void blas_lock(volatile BLASULONG *address){ | static __inline void blas_lock(volatile BLASULONG *address){ | ||||
| @@ -1689,6 +1689,7 @@ int get_cpuname(void){ | |||||
| return CPUTYPE_BARCELONA; | return CPUTYPE_BARCELONA; | ||||
| } | } | ||||
| case 10: // Zen3/4 | case 10: // Zen3/4 | ||||
| case 11: // Zen5 | |||||
| #ifndef NO_AVX512 | #ifndef NO_AVX512 | ||||
| if(support_avx512_bf16()) | if(support_avx512_bf16()) | ||||
| return CPUTYPE_COOPERLAKE; | return CPUTYPE_COOPERLAKE; | ||||
| @@ -2479,7 +2480,7 @@ int get_coretype(void){ | |||||
| } | } | ||||
| break; | break; | ||||
| } | } | ||||
| } else if (exfamily == 8 || exfamily == 10) { | |||||
| } else if (exfamily == 8 || exfamily == 10 || exfamily == 11) { | |||||
| switch (model) { | switch (model) { | ||||
| case 1: | case 1: | ||||
| // AMD Ryzen | // AMD Ryzen | ||||
| @@ -38,9 +38,12 @@ | |||||
| CALL CHECK1(SFAC) | CALL CHECK1(SFAC) | ||||
| END IF | END IF | ||||
| IF (PASS) WRITE (NOUT,99998) | |||||
| IF (PASS) THEN | |||||
| WRITE (NOUT,99998) | |||||
| ELSE | |||||
| CALL ABORT | |||||
| END IF | |||||
| 20 CONTINUE | 20 CONTINUE | ||||
| STOP | |||||
| * | * | ||||
| 99999 FORMAT (' Complex CBLAS Test Program Results',/1X) | 99999 FORMAT (' Complex CBLAS Test Program Results',/1X) | ||||
| 99998 FORMAT (' ----- PASS -----') | 99998 FORMAT (' ----- PASS -----') | ||||
| @@ -228,7 +231,7 @@ | |||||
| CALL ITEST1(ICAMAXTEST(N,CX,INCX),ITRUE3(NP1)) | CALL ITEST1(ICAMAXTEST(N,CX,INCX),ITRUE3(NP1)) | ||||
| ELSE | ELSE | ||||
| WRITE (NOUT,*) ' Shouldn''t be here in CHECK1' | WRITE (NOUT,*) ' Shouldn''t be here in CHECK1' | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| * | * | ||||
| 40 CONTINUE | 40 CONTINUE | ||||
| @@ -512,7 +515,7 @@ | |||||
| CALL CTEST(LENY,CY,CT10Y(1,KN,KI),CSIZE3,1.0E0) | CALL CTEST(LENY,CY,CT10Y(1,KN,KI),CSIZE3,1.0E0) | ||||
| ELSE | ELSE | ||||
| WRITE (NOUT,*) ' Shouldn''t be here in CHECK2' | WRITE (NOUT,*) ' Shouldn''t be here in CHECK2' | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| * | * | ||||
| 40 CONTINUE | 40 CONTINUE | ||||
| @@ -10,7 +10,7 @@ | |||||
| * 'CBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE | * 'CBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
| * -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | * -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
| * F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | * F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
| * F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| * F LOGICAL FLAG, T TO CALL ABORT ON FAILURES. | |||||
| * T LOGICAL FLAG, T TO TEST ERROR EXITS. | * T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
| * 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | * 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | ||||
| * 16.0 THRESHOLD VALUE OF TEST RATIO | * 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
| @@ -243,7 +243,7 @@ | |||||
| $ GO TO 70 | $ GO TO 70 | ||||
| 60 CONTINUE | 60 CONTINUE | ||||
| WRITE( NOUT, FMT = 9986 )SNAMET | WRITE( NOUT, FMT = 9986 )SNAMET | ||||
| STOP | |||||
| CALL ABORT | |||||
| 70 LTEST( I ) = LTESTT | 70 LTEST( I ) = LTESTT | ||||
| GO TO 50 | GO TO 50 | ||||
| * | * | ||||
| @@ -283,7 +283,7 @@ | |||||
| SAME = LCE( YY, YT, N ) | SAME = LCE( YY, YT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | ||||
| WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR | WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| TRANS = 'T' | TRANS = 'T' | ||||
| CALL CMVCH( TRANS, N, N, ONE, A, NMAX, X, -1, ZERO, Y, -1, YT, G, | CALL CMVCH( TRANS, N, N, ONE, A, NMAX, X, -1, ZERO, Y, -1, YT, G, | ||||
| @@ -291,7 +291,7 @@ | |||||
| SAME = LCE( YY, YT, N ) | SAME = LCE( YY, YT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | ||||
| WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR | WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| * | * | ||||
| * Test each subroutine in turn. | * Test each subroutine in turn. | ||||
| @@ -418,7 +418,9 @@ | |||||
| IF( TRACE ) | IF( TRACE ) | ||||
| $ CLOSE ( NTRA ) | $ CLOSE ( NTRA ) | ||||
| CLOSE ( NOUT ) | CLOSE ( NOUT ) | ||||
| STOP | |||||
| IF( FATAL ) THEN | |||||
| CALL ABORT | |||||
| END IF | |||||
| * | * | ||||
| 10002 FORMAT( ' COLUMN-MAJOR AND ROW-MAJOR DATA LAYOUTS ARE TESTED' ) | 10002 FORMAT( ' COLUMN-MAJOR AND ROW-MAJOR DATA LAYOUTS ARE TESTED' ) | ||||
| 10001 FORMAT( ' ROW-MAJOR DATA LAYOUT IS TESTED' ) | 10001 FORMAT( ' ROW-MAJOR DATA LAYOUT IS TESTED' ) | ||||
| @@ -10,7 +10,7 @@ | |||||
| * 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | * 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
| * -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | * -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
| * F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | * F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
| * F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| * F LOGICAL FLAG, T TO CALL ABORT ON FAILURES. | |||||
| * T LOGICAL FLAG, T TO TEST ERROR EXITS. | * T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
| * 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | * 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | ||||
| * 16.0 THRESHOLD VALUE OF TEST RATIO | * 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
| @@ -194,7 +194,7 @@ | |||||
| $ GO TO 50 | $ GO TO 50 | ||||
| 40 CONTINUE | 40 CONTINUE | ||||
| WRITE( NOUT, FMT = 9990 )SNAMET | WRITE( NOUT, FMT = 9990 )SNAMET | ||||
| STOP | |||||
| CALL ABORT | |||||
| 50 LTEST( I ) = LTESTT | 50 LTEST( I ) = LTESTT | ||||
| GO TO 30 | GO TO 30 | ||||
| * | * | ||||
| @@ -237,7 +237,7 @@ | |||||
| SAME = LCE( CC, CT, N ) | SAME = LCE( CC, CT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | ||||
| WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| TRANSB = 'C' | TRANSB = 'C' | ||||
| CALL CMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, | CALL CMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, | ||||
| @@ -246,7 +246,7 @@ | |||||
| SAME = LCE( CC, CT, N ) | SAME = LCE( CC, CT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | ||||
| WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| DO 120 J = 1, N | DO 120 J = 1, N | ||||
| AB( J, NMAX + 1 ) = N - J + 1 | AB( J, NMAX + 1 ) = N - J + 1 | ||||
| @@ -264,7 +264,7 @@ | |||||
| SAME = LCE( CC, CT, N ) | SAME = LCE( CC, CT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | ||||
| WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| TRANSB = 'C' | TRANSB = 'C' | ||||
| CALL CMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, | CALL CMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, | ||||
| @@ -273,7 +273,7 @@ | |||||
| SAME = LCE( CC, CT, N ) | SAME = LCE( CC, CT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | ||||
| WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| * | * | ||||
| * Test each subroutine in turn. | * Test each subroutine in turn. | ||||
| @@ -385,7 +385,9 @@ | |||||
| IF( TRACE ) | IF( TRACE ) | ||||
| $ CLOSE ( NTRA ) | $ CLOSE ( NTRA ) | ||||
| CLOSE ( NOUT ) | CLOSE ( NOUT ) | ||||
| STOP | |||||
| IF( FATAL ) THEN | |||||
| CALL ABORT | |||||
| END IF | |||||
| * | * | ||||
| 10002 FORMAT( ' COLUMN-MAJOR AND ROW-MAJOR DATA LAYOUTS ARE TESTED' ) | 10002 FORMAT( ' COLUMN-MAJOR AND ROW-MAJOR DATA LAYOUTS ARE TESTED' ) | ||||
| 10001 FORMAT(' ROW-MAJOR DATA LAYOUT IS TESTED' ) | 10001 FORMAT(' ROW-MAJOR DATA LAYOUT IS TESTED' ) | ||||
| @@ -10,7 +10,7 @@ | |||||
| * 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | * 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
| * -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | * -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
| * F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | * F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
| * F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| * F LOGICAL FLAG, T TO CALL ABORT ON FAILURES. | |||||
| * T LOGICAL FLAG, T TO TEST ERROR EXITS. | * T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
| * 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | * 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | ||||
| * 16.0 THRESHOLD VALUE OF TEST RATIO | * 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
| @@ -194,7 +194,7 @@ | |||||
| $ GO TO 50 | $ GO TO 50 | ||||
| 40 CONTINUE | 40 CONTINUE | ||||
| WRITE( NOUT, FMT = 9990 )SNAMET | WRITE( NOUT, FMT = 9990 )SNAMET | ||||
| STOP | |||||
| CALL ABORT | |||||
| 50 LTEST( I ) = LTESTT | 50 LTEST( I ) = LTESTT | ||||
| GO TO 30 | GO TO 30 | ||||
| * | * | ||||
| @@ -237,7 +237,7 @@ | |||||
| SAME = LCE( CC, CT, N ) | SAME = LCE( CC, CT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | ||||
| WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| TRANSB = 'C' | TRANSB = 'C' | ||||
| CALL CMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, | CALL CMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, | ||||
| @@ -246,7 +246,7 @@ | |||||
| SAME = LCE( CC, CT, N ) | SAME = LCE( CC, CT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | ||||
| WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| DO 120 J = 1, N | DO 120 J = 1, N | ||||
| AB( J, NMAX + 1 ) = N - J + 1 | AB( J, NMAX + 1 ) = N - J + 1 | ||||
| @@ -264,7 +264,7 @@ | |||||
| SAME = LCE( CC, CT, N ) | SAME = LCE( CC, CT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | ||||
| WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| TRANSB = 'C' | TRANSB = 'C' | ||||
| CALL CMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, | CALL CMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, | ||||
| @@ -273,7 +273,7 @@ | |||||
| SAME = LCE( CC, CT, N ) | SAME = LCE( CC, CT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | ||||
| WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| * | * | ||||
| * Test each subroutine in turn. | * Test each subroutine in turn. | ||||
| @@ -385,7 +385,9 @@ | |||||
| IF( TRACE ) | IF( TRACE ) | ||||
| $ CLOSE ( NTRA ) | $ CLOSE ( NTRA ) | ||||
| CLOSE ( NOUT ) | CLOSE ( NOUT ) | ||||
| STOP | |||||
| IF( FATAL ) THEN | |||||
| CALL ABORT | |||||
| END IF | |||||
| * | * | ||||
| 10002 FORMAT( ' COLUMN-MAJOR AND ROW-MAJOR DATA LAYOUTS ARE TESTED' ) | 10002 FORMAT( ' COLUMN-MAJOR AND ROW-MAJOR DATA LAYOUTS ARE TESTED' ) | ||||
| 10001 FORMAT(' ROW-MAJOR DATA LAYOUT IS TESTED' ) | 10001 FORMAT(' ROW-MAJOR DATA LAYOUT IS TESTED' ) | ||||
| @@ -44,9 +44,12 @@ | |||||
| CALL CHECK3(SFAC) | CALL CHECK3(SFAC) | ||||
| END IF | END IF | ||||
| IF (PASS) WRITE (NOUT,99998) | |||||
| IF (PASS) THEN | |||||
| WRITE (NOUT,99998) | |||||
| ELSE | |||||
| CALL ABORT | |||||
| END IF | |||||
| 20 CONTINUE | 20 CONTINUE | ||||
| STOP | |||||
| * | * | ||||
| 99999 FORMAT (' Real CBLAS Test Program Results',/1X) | 99999 FORMAT (' Real CBLAS Test Program Results',/1X) | ||||
| 99998 FORMAT (' ----- PASS -----') | 99998 FORMAT (' ----- PASS -----') | ||||
| @@ -136,7 +139,7 @@ | |||||
| CALL STEST1(SS,DS1(K),DS1(K),SFAC) | CALL STEST1(SS,DS1(K),DS1(K),SFAC) | ||||
| ELSE | ELSE | ||||
| WRITE (NOUT,*) ' Shouldn''t be here in CHECK0' | WRITE (NOUT,*) ' Shouldn''t be here in CHECK0' | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| 20 CONTINUE | 20 CONTINUE | ||||
| 40 RETURN | 40 RETURN | ||||
| @@ -229,7 +232,7 @@ | |||||
| CALL ITEST1(IDAMAXTEST(N,SX,INCX),ITRUE2(NP1)) | CALL ITEST1(IDAMAXTEST(N,SX,INCX),ITRUE2(NP1)) | ||||
| ELSE | ELSE | ||||
| WRITE (NOUT,*) ' Shouldn''t be here in CHECK1' | WRITE (NOUT,*) ' Shouldn''t be here in CHECK1' | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| 60 CONTINUE | 60 CONTINUE | ||||
| 80 CONTINUE | 80 CONTINUE | ||||
| @@ -384,7 +387,7 @@ | |||||
| CALL STEST(LENY,SY,STY,SSIZE2(1,1),1.0D0) | CALL STEST(LENY,SY,STY,SSIZE2(1,1),1.0D0) | ||||
| ELSE | ELSE | ||||
| WRITE (NOUT,*) ' Shouldn''t be here in CHECK2' | WRITE (NOUT,*) ' Shouldn''t be here in CHECK2' | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| 100 CONTINUE | 100 CONTINUE | ||||
| 120 CONTINUE | 120 CONTINUE | ||||
| @@ -472,7 +475,7 @@ | |||||
| 70 CONTINUE | 70 CONTINUE | ||||
| ELSE | ELSE | ||||
| WRITE (NOUT,*) ' Shouldn''t be here in CHECK3' | WRITE (NOUT,*) ' Shouldn''t be here in CHECK3' | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| 40 CONTINUE | 40 CONTINUE | ||||
| 60 CONTINUE | 60 CONTINUE | ||||
| @@ -10,7 +10,7 @@ | |||||
| * 'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE | * 'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
| * -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | * -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
| * F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | * F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
| * F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| * F LOGICAL FLAG, T TO CALL ABORT ON FAILURES. | |||||
| * T LOGICAL FLAG, T TO TEST ERROR EXITS. | * T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
| * 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | * 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | ||||
| * 16.0 THRESHOLD VALUE OF TEST RATIO | * 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
| @@ -239,7 +239,7 @@ | |||||
| $ GO TO 70 | $ GO TO 70 | ||||
| 60 CONTINUE | 60 CONTINUE | ||||
| WRITE( NOUT, FMT = 9986 )SNAMET | WRITE( NOUT, FMT = 9986 )SNAMET | ||||
| STOP | |||||
| CALL ABORT | |||||
| 70 LTEST( I ) = LTESTT | 70 LTEST( I ) = LTESTT | ||||
| GO TO 50 | GO TO 50 | ||||
| * | * | ||||
| @@ -279,7 +279,7 @@ | |||||
| SAME = LDE( YY, YT, N ) | SAME = LDE( YY, YT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN | ||||
| WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR | WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| TRANS = 'T' | TRANS = 'T' | ||||
| CALL DMVCH( TRANS, N, N, ONE, A, NMAX, X, -1, ZERO, Y, -1, YT, G, | CALL DMVCH( TRANS, N, N, ONE, A, NMAX, X, -1, ZERO, Y, -1, YT, G, | ||||
| @@ -287,7 +287,7 @@ | |||||
| SAME = LDE( YY, YT, N ) | SAME = LDE( YY, YT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN | ||||
| WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR | WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| * | * | ||||
| * Test each subroutine in turn. | * Test each subroutine in turn. | ||||
| @@ -414,7 +414,9 @@ | |||||
| IF( TRACE ) | IF( TRACE ) | ||||
| $ CLOSE ( NTRA ) | $ CLOSE ( NTRA ) | ||||
| CLOSE ( NOUT ) | CLOSE ( NOUT ) | ||||
| STOP | |||||
| IF( FATAL ) THEN | |||||
| CALL ABORT | |||||
| END IF | |||||
| * | * | ||||
| 10002 FORMAT( ' COLUMN-MAJOR AND ROW-MAJOR DATA LAYOUTS ARE TESTED' ) | 10002 FORMAT( ' COLUMN-MAJOR AND ROW-MAJOR DATA LAYOUTS ARE TESTED' ) | ||||
| 10001 FORMAT( ' ROW-MAJOR DATA LAYOUT IS TESTED' ) | 10001 FORMAT( ' ROW-MAJOR DATA LAYOUT IS TESTED' ) | ||||
| @@ -10,7 +10,7 @@ | |||||
| * 'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | * 'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
| * -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | * -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
| * F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | * F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
| * F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| * F LOGICAL FLAG, T TO CALL ABORT ON FAILURES. | |||||
| * T LOGICAL FLAG, T TO TEST ERROR EXITS. | * T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
| * 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | * 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | ||||
| * 16.0 THRESHOLD VALUE OF TEST RATIO | * 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
| @@ -189,7 +189,7 @@ | |||||
| $ GO TO 50 | $ GO TO 50 | ||||
| 40 CONTINUE | 40 CONTINUE | ||||
| WRITE( NOUT, FMT = 9990 )SNAMET | WRITE( NOUT, FMT = 9990 )SNAMET | ||||
| STOP | |||||
| CALL ABORT | |||||
| 50 LTEST( I ) = LTESTT | 50 LTEST( I ) = LTESTT | ||||
| GO TO 30 | GO TO 30 | ||||
| * | * | ||||
| @@ -232,7 +232,7 @@ | |||||
| SAME = LDE( CC, CT, N ) | SAME = LDE( CC, CT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN | ||||
| WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| TRANSB = 'T' | TRANSB = 'T' | ||||
| CALL DMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, | CALL DMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, | ||||
| @@ -241,7 +241,7 @@ | |||||
| SAME = LDE( CC, CT, N ) | SAME = LDE( CC, CT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN | ||||
| WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| DO 120 J = 1, N | DO 120 J = 1, N | ||||
| AB( J, NMAX + 1 ) = N - J + 1 | AB( J, NMAX + 1 ) = N - J + 1 | ||||
| @@ -259,7 +259,7 @@ | |||||
| SAME = LDE( CC, CT, N ) | SAME = LDE( CC, CT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN | ||||
| WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| TRANSB = 'T' | TRANSB = 'T' | ||||
| CALL DMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, | CALL DMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, | ||||
| @@ -268,7 +268,7 @@ | |||||
| SAME = LDE( CC, CT, N ) | SAME = LDE( CC, CT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN | ||||
| WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| * | * | ||||
| * Test each subroutine in turn. | * Test each subroutine in turn. | ||||
| @@ -379,7 +379,9 @@ | |||||
| IF( TRACE ) | IF( TRACE ) | ||||
| $ CLOSE ( NTRA ) | $ CLOSE ( NTRA ) | ||||
| CLOSE ( NOUT ) | CLOSE ( NOUT ) | ||||
| STOP | |||||
| IF( FATAL ) THEN | |||||
| CALL ABORT | |||||
| END IF | |||||
| * | * | ||||
| 10002 FORMAT( ' COLUMN-MAJOR AND ROW-MAJOR DATA LAYOUTS ARE TESTED' ) | 10002 FORMAT( ' COLUMN-MAJOR AND ROW-MAJOR DATA LAYOUTS ARE TESTED' ) | ||||
| 10001 FORMAT( ' ROW-MAJOR DATA LAYOUT IS TESTED' ) | 10001 FORMAT( ' ROW-MAJOR DATA LAYOUT IS TESTED' ) | ||||
| @@ -44,9 +44,12 @@ | |||||
| CALL CHECK3(SFAC) | CALL CHECK3(SFAC) | ||||
| END IF | END IF | ||||
| IF (PASS) WRITE (NOUT,99998) | |||||
| IF (PASS) THEN | |||||
| WRITE (NOUT,99998) | |||||
| ELSE | |||||
| CALL ABORT | |||||
| END IF | |||||
| 20 CONTINUE | 20 CONTINUE | ||||
| STOP | |||||
| * | * | ||||
| 99999 FORMAT (' Real CBLAS Test Program Results',/1X) | 99999 FORMAT (' Real CBLAS Test Program Results',/1X) | ||||
| 99998 FORMAT (' ----- PASS -----') | 99998 FORMAT (' ----- PASS -----') | ||||
| @@ -136,7 +139,7 @@ | |||||
| CALL STEST1(SS,DS1(K),DS1(K),SFAC) | CALL STEST1(SS,DS1(K),DS1(K),SFAC) | ||||
| ELSE | ELSE | ||||
| WRITE (NOUT,*) ' Shouldn''t be here in CHECK0' | WRITE (NOUT,*) ' Shouldn''t be here in CHECK0' | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| 20 CONTINUE | 20 CONTINUE | ||||
| 40 RETURN | 40 RETURN | ||||
| @@ -229,7 +232,7 @@ | |||||
| CALL ITEST1(ISAMAXTEST(N,SX,INCX),ITRUE2(NP1)) | CALL ITEST1(ISAMAXTEST(N,SX,INCX),ITRUE2(NP1)) | ||||
| ELSE | ELSE | ||||
| WRITE (NOUT,*) ' Shouldn''t be here in CHECK1' | WRITE (NOUT,*) ' Shouldn''t be here in CHECK1' | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| 60 CONTINUE | 60 CONTINUE | ||||
| 80 CONTINUE | 80 CONTINUE | ||||
| @@ -384,7 +387,7 @@ | |||||
| CALL STEST(LENY,SY,STY,SSIZE2(1,1),1.0E0) | CALL STEST(LENY,SY,STY,SSIZE2(1,1),1.0E0) | ||||
| ELSE | ELSE | ||||
| WRITE (NOUT,*) ' Shouldn''t be here in CHECK2' | WRITE (NOUT,*) ' Shouldn''t be here in CHECK2' | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| 100 CONTINUE | 100 CONTINUE | ||||
| 120 CONTINUE | 120 CONTINUE | ||||
| @@ -479,7 +482,7 @@ | |||||
| 70 CONTINUE | 70 CONTINUE | ||||
| ELSE | ELSE | ||||
| WRITE (NOUT,*) ' Shouldn''t be here in CHECK3' | WRITE (NOUT,*) ' Shouldn''t be here in CHECK3' | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| 40 CONTINUE | 40 CONTINUE | ||||
| 60 CONTINUE | 60 CONTINUE | ||||
| @@ -759,4 +762,4 @@ | |||||
| END IF | END IF | ||||
| END IF | END IF | ||||
| RETURN | RETURN | ||||
| END | |||||
| END | |||||
| @@ -10,7 +10,7 @@ | |||||
| * 'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE | * 'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
| * -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | * -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
| * F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | * F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
| * F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| * F LOGICAL FLAG, T TO CALL ABORT ON FAILURES. | |||||
| * T LOGICAL FLAG, T TO TEST ERROR EXITS. | * T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
| * 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | * 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | ||||
| * 16.0 THRESHOLD VALUE OF TEST RATIO | * 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
| @@ -239,7 +239,7 @@ | |||||
| $ GO TO 70 | $ GO TO 70 | ||||
| 60 CONTINUE | 60 CONTINUE | ||||
| WRITE( NOUT, FMT = 9986 )SNAMET | WRITE( NOUT, FMT = 9986 )SNAMET | ||||
| STOP | |||||
| CALL ABORT | |||||
| 70 LTEST( I ) = LTESTT | 70 LTEST( I ) = LTESTT | ||||
| GO TO 50 | GO TO 50 | ||||
| * | * | ||||
| @@ -279,7 +279,7 @@ | |||||
| SAME = LSE( YY, YT, N ) | SAME = LSE( YY, YT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN | ||||
| WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR | WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| TRANS = 'T' | TRANS = 'T' | ||||
| CALL SMVCH( TRANS, N, N, ONE, A, NMAX, X, -1, ZERO, Y, -1, YT, G, | CALL SMVCH( TRANS, N, N, ONE, A, NMAX, X, -1, ZERO, Y, -1, YT, G, | ||||
| @@ -287,7 +287,7 @@ | |||||
| SAME = LSE( YY, YT, N ) | SAME = LSE( YY, YT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN | ||||
| WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR | WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| * | * | ||||
| * Test each subroutine in turn. | * Test each subroutine in turn. | ||||
| @@ -414,7 +414,9 @@ | |||||
| IF( TRACE ) | IF( TRACE ) | ||||
| $ CLOSE ( NTRA ) | $ CLOSE ( NTRA ) | ||||
| CLOSE ( NOUT ) | CLOSE ( NOUT ) | ||||
| STOP | |||||
| IF( FATAL ) THEN | |||||
| CALL ABORT | |||||
| END IF | |||||
| * | * | ||||
| 10002 FORMAT( ' COLUMN-MAJOR AND ROW-MAJOR DATA LAYOUTS ARE TESTED' ) | 10002 FORMAT( ' COLUMN-MAJOR AND ROW-MAJOR DATA LAYOUTS ARE TESTED' ) | ||||
| 10001 FORMAT( ' ROW-MAJOR DATA LAYOUT IS TESTED' ) | 10001 FORMAT( ' ROW-MAJOR DATA LAYOUT IS TESTED' ) | ||||
| @@ -10,7 +10,7 @@ | |||||
| * 'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | * 'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
| * -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | * -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
| * F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | * F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
| * F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| * F LOGICAL FLAG, T TO CALL ABORT ON FAILURES. | |||||
| * T LOGICAL FLAG, T TO TEST ERROR EXITS. | * T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
| * 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | * 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | ||||
| * 16.0 THRESHOLD VALUE OF TEST RATIO | * 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
| @@ -188,7 +188,7 @@ | |||||
| $ GO TO 50 | $ GO TO 50 | ||||
| 40 CONTINUE | 40 CONTINUE | ||||
| WRITE( NOUT, FMT = 9990 )SNAMET | WRITE( NOUT, FMT = 9990 )SNAMET | ||||
| STOP | |||||
| CALL ABORT | |||||
| 50 LTEST( I ) = LTESTT | 50 LTEST( I ) = LTESTT | ||||
| GO TO 30 | GO TO 30 | ||||
| * | * | ||||
| @@ -231,7 +231,7 @@ | |||||
| SAME = LSE( CC, CT, N ) | SAME = LSE( CC, CT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN | ||||
| WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| TRANSB = 'T' | TRANSB = 'T' | ||||
| CALL SMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, | CALL SMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, | ||||
| @@ -240,7 +240,7 @@ | |||||
| SAME = LSE( CC, CT, N ) | SAME = LSE( CC, CT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN | ||||
| WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| DO 120 J = 1, N | DO 120 J = 1, N | ||||
| AB( J, NMAX + 1 ) = N - J + 1 | AB( J, NMAX + 1 ) = N - J + 1 | ||||
| @@ -258,7 +258,7 @@ | |||||
| SAME = LSE( CC, CT, N ) | SAME = LSE( CC, CT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN | ||||
| WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| TRANSB = 'T' | TRANSB = 'T' | ||||
| CALL SMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, | CALL SMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, | ||||
| @@ -267,7 +267,7 @@ | |||||
| SAME = LSE( CC, CT, N ) | SAME = LSE( CC, CT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN | ||||
| WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| * | * | ||||
| * Test each subroutine in turn. | * Test each subroutine in turn. | ||||
| @@ -378,7 +378,9 @@ | |||||
| IF( TRACE ) | IF( TRACE ) | ||||
| $ CLOSE ( NTRA ) | $ CLOSE ( NTRA ) | ||||
| CLOSE ( NOUT ) | CLOSE ( NOUT ) | ||||
| STOP | |||||
| IF( FATAL ) THEN | |||||
| CALL ABORT | |||||
| END IF | |||||
| * | * | ||||
| 10002 FORMAT( ' COLUMN-MAJOR AND ROW-MAJOR DATA LAYOUTS ARE TESTED' ) | 10002 FORMAT( ' COLUMN-MAJOR AND ROW-MAJOR DATA LAYOUTS ARE TESTED' ) | ||||
| 10001 FORMAT( ' ROW-MAJOR DATA LAYOUT IS TESTED' ) | 10001 FORMAT( ' ROW-MAJOR DATA LAYOUT IS TESTED' ) | ||||
| @@ -38,9 +38,12 @@ | |||||
| CALL CHECK1(SFAC) | CALL CHECK1(SFAC) | ||||
| END IF | END IF | ||||
| IF (PASS) WRITE (NOUT,99998) | |||||
| IF (PASS) THEN | |||||
| WRITE (NOUT,99998) | |||||
| ELSE | |||||
| CALL ABORT | |||||
| END IF | |||||
| 20 CONTINUE | 20 CONTINUE | ||||
| STOP | |||||
| * | * | ||||
| 99999 FORMAT (' Complex CBLAS Test Program Results',/1X) | 99999 FORMAT (' Complex CBLAS Test Program Results',/1X) | ||||
| 99998 FORMAT (' ----- PASS -----') | 99998 FORMAT (' ----- PASS -----') | ||||
| @@ -228,7 +231,7 @@ | |||||
| CALL ITEST1(IZAMAXTEST(N,CX,INCX),ITRUE3(NP1)) | CALL ITEST1(IZAMAXTEST(N,CX,INCX),ITRUE3(NP1)) | ||||
| ELSE | ELSE | ||||
| WRITE (NOUT,*) ' Shouldn''t be here in CHECK1' | WRITE (NOUT,*) ' Shouldn''t be here in CHECK1' | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| * | * | ||||
| 40 CONTINUE | 40 CONTINUE | ||||
| @@ -512,7 +515,7 @@ | |||||
| CALL CTEST(LENY,CY,CT10Y(1,KN,KI),CSIZE3,1.0D0) | CALL CTEST(LENY,CY,CT10Y(1,KN,KI),CSIZE3,1.0D0) | ||||
| ELSE | ELSE | ||||
| WRITE (NOUT,*) ' Shouldn''t be here in CHECK2' | WRITE (NOUT,*) ' Shouldn''t be here in CHECK2' | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| * | * | ||||
| 40 CONTINUE | 40 CONTINUE | ||||
| @@ -10,7 +10,7 @@ | |||||
| * 'CBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE | * 'CBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
| * -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | * -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
| * F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | * F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
| * F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| * F LOGICAL FLAG, T TO CALL ABORT ON FAILURES. | |||||
| * T LOGICAL FLAG, T TO TEST ERROR EXITS. | * T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
| * 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | * 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | ||||
| * 16.0 THRESHOLD VALUE OF TEST RATIO | * 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
| @@ -243,7 +243,7 @@ | |||||
| $ GO TO 70 | $ GO TO 70 | ||||
| 60 CONTINUE | 60 CONTINUE | ||||
| WRITE( NOUT, FMT = 9986 )SNAMET | WRITE( NOUT, FMT = 9986 )SNAMET | ||||
| STOP | |||||
| CALL ABORT | |||||
| 70 LTEST( I ) = LTESTT | 70 LTEST( I ) = LTESTT | ||||
| GO TO 50 | GO TO 50 | ||||
| * | * | ||||
| @@ -283,7 +283,7 @@ | |||||
| SAME = LZE( YY, YT, N ) | SAME = LZE( YY, YT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | ||||
| WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR | WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| TRANS = 'T' | TRANS = 'T' | ||||
| CALL ZMVCH( TRANS, N, N, ONE, A, NMAX, X, -1, ZERO, Y, -1, YT, G, | CALL ZMVCH( TRANS, N, N, ONE, A, NMAX, X, -1, ZERO, Y, -1, YT, G, | ||||
| @@ -291,7 +291,7 @@ | |||||
| SAME = LZE( YY, YT, N ) | SAME = LZE( YY, YT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | ||||
| WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR | WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| * | * | ||||
| * Test each subroutine in turn. | * Test each subroutine in turn. | ||||
| @@ -418,7 +418,9 @@ | |||||
| IF( TRACE ) | IF( TRACE ) | ||||
| $ CLOSE ( NTRA ) | $ CLOSE ( NTRA ) | ||||
| CLOSE ( NOUT ) | CLOSE ( NOUT ) | ||||
| STOP | |||||
| IF( FATAL ) THEN | |||||
| CALL ABORT | |||||
| END IF | |||||
| * | * | ||||
| 10002 FORMAT( ' COLUMN-MAJOR AND ROW-MAJOR DATA LAYOUTS ARE TESTED' ) | 10002 FORMAT( ' COLUMN-MAJOR AND ROW-MAJOR DATA LAYOUTS ARE TESTED' ) | ||||
| 10001 FORMAT( ' ROW-MAJOR DATA LAYOUT IS TESTED' ) | 10001 FORMAT( ' ROW-MAJOR DATA LAYOUT IS TESTED' ) | ||||
| @@ -10,7 +10,7 @@ | |||||
| * 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | * 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
| * -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | * -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
| * F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | * F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
| * F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| * F LOGICAL FLAG, T TO CALL ABORT ON FAILURES. | |||||
| * T LOGICAL FLAG, T TO TEST ERROR EXITS. | * T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
| * 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | * 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | ||||
| * 16.0 THRESHOLD VALUE OF TEST RATIO | * 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
| @@ -195,7 +195,7 @@ | |||||
| $ GO TO 50 | $ GO TO 50 | ||||
| 40 CONTINUE | 40 CONTINUE | ||||
| WRITE( NOUT, FMT = 9990 )SNAMET | WRITE( NOUT, FMT = 9990 )SNAMET | ||||
| STOP | |||||
| CALL ABORT | |||||
| 50 LTEST( I ) = LTESTT | 50 LTEST( I ) = LTESTT | ||||
| GO TO 30 | GO TO 30 | ||||
| * | * | ||||
| @@ -238,7 +238,7 @@ | |||||
| SAME = LZE( CC, CT, N ) | SAME = LZE( CC, CT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | ||||
| WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| TRANSB = 'C' | TRANSB = 'C' | ||||
| CALL ZMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, | CALL ZMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, | ||||
| @@ -247,7 +247,7 @@ | |||||
| SAME = LZE( CC, CT, N ) | SAME = LZE( CC, CT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | ||||
| WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| DO 120 J = 1, N | DO 120 J = 1, N | ||||
| AB( J, NMAX + 1 ) = N - J + 1 | AB( J, NMAX + 1 ) = N - J + 1 | ||||
| @@ -265,7 +265,7 @@ | |||||
| SAME = LZE( CC, CT, N ) | SAME = LZE( CC, CT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | ||||
| WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| TRANSB = 'C' | TRANSB = 'C' | ||||
| CALL ZMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, | CALL ZMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, | ||||
| @@ -274,7 +274,7 @@ | |||||
| SAME = LZE( CC, CT, N ) | SAME = LZE( CC, CT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | ||||
| WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| * | * | ||||
| * Test each subroutine in turn. | * Test each subroutine in turn. | ||||
| @@ -386,7 +386,9 @@ | |||||
| IF( TRACE ) | IF( TRACE ) | ||||
| $ CLOSE ( NTRA ) | $ CLOSE ( NTRA ) | ||||
| CLOSE ( NOUT ) | CLOSE ( NOUT ) | ||||
| STOP | |||||
| IF( FATAL ) THEN | |||||
| CALL ABORT | |||||
| END IF | |||||
| * | * | ||||
| 10002 FORMAT( ' COLUMN-MAJOR AND ROW-MAJOR DATA LAYOUTS ARE TESTED' ) | 10002 FORMAT( ' COLUMN-MAJOR AND ROW-MAJOR DATA LAYOUTS ARE TESTED' ) | ||||
| 10001 FORMAT(' ROW-MAJOR DATA LAYOUT IS TESTED' ) | 10001 FORMAT(' ROW-MAJOR DATA LAYOUT IS TESTED' ) | ||||
| @@ -10,7 +10,7 @@ | |||||
| * 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | * 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
| * -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | * -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
| * F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | * F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
| * F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| * F LOGICAL FLAG, T TO CALL ABORT ON FAILURES. | |||||
| * T LOGICAL FLAG, T TO TEST ERROR EXITS. | * T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
| * 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | * 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | ||||
| * 16.0 THRESHOLD VALUE OF TEST RATIO | * 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
| @@ -195,7 +195,7 @@ | |||||
| $ GO TO 50 | $ GO TO 50 | ||||
| 40 CONTINUE | 40 CONTINUE | ||||
| WRITE( NOUT, FMT = 9990 )SNAMET | WRITE( NOUT, FMT = 9990 )SNAMET | ||||
| STOP | |||||
| CALL ABORT | |||||
| 50 LTEST( I ) = LTESTT | 50 LTEST( I ) = LTESTT | ||||
| GO TO 30 | GO TO 30 | ||||
| * | * | ||||
| @@ -238,7 +238,7 @@ | |||||
| SAME = LZE( CC, CT, N ) | SAME = LZE( CC, CT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | ||||
| WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| TRANSB = 'C' | TRANSB = 'C' | ||||
| CALL ZMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, | CALL ZMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, | ||||
| @@ -247,7 +247,7 @@ | |||||
| SAME = LZE( CC, CT, N ) | SAME = LZE( CC, CT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | ||||
| WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| DO 120 J = 1, N | DO 120 J = 1, N | ||||
| AB( J, NMAX + 1 ) = N - J + 1 | AB( J, NMAX + 1 ) = N - J + 1 | ||||
| @@ -265,7 +265,7 @@ | |||||
| SAME = LZE( CC, CT, N ) | SAME = LZE( CC, CT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | ||||
| WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| TRANSB = 'C' | TRANSB = 'C' | ||||
| CALL ZMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, | CALL ZMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, | ||||
| @@ -274,7 +274,7 @@ | |||||
| SAME = LZE( CC, CT, N ) | SAME = LZE( CC, CT, N ) | ||||
| IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN | ||||
| WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR | ||||
| STOP | |||||
| CALL ABORT | |||||
| END IF | END IF | ||||
| * | * | ||||
| * Test each subroutine in turn. | * Test each subroutine in turn. | ||||
| @@ -386,7 +386,9 @@ | |||||
| IF( TRACE ) | IF( TRACE ) | ||||
| $ CLOSE ( NTRA ) | $ CLOSE ( NTRA ) | ||||
| CLOSE ( NOUT ) | CLOSE ( NOUT ) | ||||
| STOP | |||||
| IF( FATAL ) THEN | |||||
| CALL ABORT | |||||
| END IF | |||||
| * | * | ||||
| 10002 FORMAT( ' COLUMN-MAJOR AND ROW-MAJOR DATA LAYOUTS ARE TESTED' ) | 10002 FORMAT( ' COLUMN-MAJOR AND ROW-MAJOR DATA LAYOUTS ARE TESTED' ) | ||||
| 10001 FORMAT(' ROW-MAJOR DATA LAYOUT IS TESTED' ) | 10001 FORMAT(' ROW-MAJOR DATA LAYOUT IS TESTED' ) | ||||
| @@ -742,7 +742,7 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG | |||||
| num_parts = 0; | num_parts = 0; | ||||
| while (n > 0){ | while (n > 0){ | ||||
| width = blas_quickdivide(n + nthreads - num_parts - 1, nthreads - num_parts); | width = blas_quickdivide(n + nthreads - num_parts - 1, nthreads - num_parts); | ||||
| if (width < switch_ratio) { | |||||
| if (width < switch_ratio && width > 1) { | |||||
| width = switch_ratio; | width = switch_ratio; | ||||
| } | } | ||||
| width = round_up(n, width, GEMM_PREFERED_SIZE); | width = round_up(n, width, GEMM_PREFERED_SIZE); | ||||
| @@ -319,8 +319,8 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
| lda = LDB; | lda = LDB; | ||||
| ldb = LDA; | ldb = LDA; | ||||
| if (Uplo == CblasUpper) uplo = 0; | |||||
| if (Uplo == CblasLower) uplo = 1; | |||||
| if (Uplo == CblasUpper) uplo = 1; | |||||
| if (Uplo == CblasLower) uplo = 0; | |||||
| if (TransB == CblasNoTrans) | if (TransB == CblasNoTrans) | ||||
| transa = 0; | transa = 0; | ||||
| @@ -17,11 +17,15 @@ SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| STRMMKERNEL = sgemm_kernel_power10.c | STRMMKERNEL = sgemm_kernel_power10.c | ||||
| DTRMMKERNEL = dgemm_kernel_power10.c | DTRMMKERNEL = dgemm_kernel_power10.c | ||||
| ifeq ($(OSNAME), AIX) | ifeq ($(OSNAME), AIX) | ||||
| CTRMMKERNEL = ctrmm_kernel_8x4_power8.S | |||||
| ZTRMMKERNEL = ztrmm_kernel_8x2_power8.S | |||||
| #CTRMMKERNEL = ctrmm_kernel_8x4_power8.S | |||||
| #ZTRMMKERNEL = ztrmm_kernel_8x2_power8.S | |||||
| CTRMMKERNEL = cgemm_kernel_power10.c | |||||
| ZTRMMKERNEL = zgemm_kernel_power10.c | |||||
| else | else | ||||
| CTRMMKERNEL = cgemm_kernel_power10.S | |||||
| ZTRMMKERNEL = zgemm_kernel_power10.S | |||||
| #CTRMMKERNEL = cgemm_kernel_power10.S | |||||
| #ZTRMMKERNEL = zgemm_kernel_power10.S | |||||
| CTRMMKERNEL = cgemm_kernel_power10.c | |||||
| ZTRMMKERNEL = zgemm_kernel_power10.c | |||||
| endif | endif | ||||
| SGEMMKERNEL = sgemm_kernel_power10.c | SGEMMKERNEL = sgemm_kernel_power10.c | ||||
| @@ -65,9 +69,11 @@ DGEMM_SMALL_K_TN = dgemm_small_kernel_tn_power10.c | |||||
| DGEMM_SMALL_K_B0_TN = dgemm_small_kernel_tn_power10.c | DGEMM_SMALL_K_B0_TN = dgemm_small_kernel_tn_power10.c | ||||
| ifeq ($(OSNAME), AIX) | ifeq ($(OSNAME), AIX) | ||||
| CGEMMKERNEL = cgemm_kernel_8x4_power8.S | |||||
| #CGEMMKERNEL = cgemm_kernel_8x4_power8.S | |||||
| CGEMMKERNEL = cgemm_kernel_power10.c | |||||
| else | else | ||||
| CGEMMKERNEL = cgemm_kernel_power10.S | |||||
| #CGEMMKERNEL = cgemm_kernel_power10.S | |||||
| CGEMMKERNEL = cgemm_kernel_power10.c | |||||
| endif | endif | ||||
| #CGEMMKERNEL = cgemm_kernel_8x4_power8.S | #CGEMMKERNEL = cgemm_kernel_8x4_power8.S | ||||
| CGEMMINCOPY = ../generic/zgemm_ncopy_8.c | CGEMMINCOPY = ../generic/zgemm_ncopy_8.c | ||||
| @@ -84,9 +90,11 @@ CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | ||||
| ifeq ($(OSNAME), AIX) | ifeq ($(OSNAME), AIX) | ||||
| ZGEMMKERNEL = zgemm_kernel_8x2_power8.S | |||||
| #ZGEMMKERNEL = zgemm_kernel_8x2_power8.S | |||||
| ZGEMMKERNEL = zgemm_kernel_power10.c | |||||
| else | else | ||||
| ZGEMMKERNEL = zgemm_kernel_power10.S | |||||
| #ZGEMMKERNEL = zgemm_kernel_power10.S | |||||
| ZGEMMKERNEL = zgemm_kernel_power10.c | |||||
| endif | endif | ||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | ||||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | ||||
| @@ -63,6 +63,8 @@ | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #define FLAG r11 | |||||
| #define FZERO f0 | #define FZERO f0 | ||||
| #define ALPHA f1 | #define ALPHA f1 | ||||
| @@ -88,6 +90,10 @@ | |||||
| fcmpu cr0, FZERO, ALPHA | fcmpu cr0, FZERO, ALPHA | ||||
| bne- cr0, LL(A1I1) | bne- cr0, LL(A1I1) | ||||
| lwz FLAG, FRAMESLOT(0)(SP) | |||||
| cmpwi cr0, FLAG, 1 | |||||
| beq- cr0, LL(A1I1) | |||||
| srawi. r0, N, 4 | srawi. r0, N, 4 | ||||
| mtspr CTR, r0 | mtspr CTR, r0 | ||||
| beq- cr0, LL(A0I1_Remain) | beq- cr0, LL(A0I1_Remain) | ||||
| @@ -0,0 +1,761 @@ | |||||
| /********************************************************************************* | |||||
| Copyright (c) 2020, The OpenBLAS Project | |||||
| All rights reserved. | |||||
| Redistribution and use in source and binary forms, with or without | |||||
| modification, are permitted provided that the following conditions are | |||||
| met: | |||||
| 1. Redistributions of source code must retain the above copyright | |||||
| notice, this list of conditions and the following disclaimer. | |||||
| 2. Redistributions in binary form must reproduce the above copyright | |||||
| notice, this list of conditions and the following disclaimer in | |||||
| the documentation and/or other materials provided with the | |||||
| distribution. | |||||
| 3. Neither the name of the OpenBLAS project nor the names of | |||||
| its contributors may be used to endorse or promote products | |||||
| derived from this software without specific prior written permission. | |||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| **********************************************************************************/ | |||||
| #include "common.h" | |||||
| #include <altivec.h> | |||||
| typedef __vector unsigned char vec_t; | |||||
| typedef FLOAT v4sf_t __attribute__ ((vector_size (16))); | |||||
| #define SET_ACC_ZERO() \ | |||||
| __builtin_mma_xxsetaccz (&acc0); \ | |||||
| __builtin_mma_xxsetaccz (&acc1); \ | |||||
| __builtin_mma_xxsetaccz (&acc2); \ | |||||
| __builtin_mma_xxsetaccz (&acc3); \ | |||||
| __builtin_mma_xxsetaccz (&acc4); \ | |||||
| __builtin_mma_xxsetaccz (&acc5); \ | |||||
| __builtin_mma_xxsetaccz (&acc6); \ | |||||
| __builtin_mma_xxsetaccz (&acc7); | |||||
| #if (defined(NN) || defined(NT) || defined(TN) || defined(TT)) | |||||
| #define COMP_MUL(_real, _arbr, _aibi, _imag, _arbi, _aibr) { _real = _arbr - _aibi; _imag = _arbi + _aibr; } | |||||
| #define COMP_MAC(_real, _arbr, _aibi, _imag, _arbi, _aibr) { _real += _arbr - _aibi; _imag += _arbi + _aibr; } | |||||
| #endif | |||||
| #if (defined(NR) || defined(NC) || defined(TR) || defined(TC)) | |||||
| #define COMP_MUL(_real, _arbr, _aibi, _imag, _arbi, _aibr) { _real = _arbr + _aibi; _imag = -_arbi + _aibr; } | |||||
| #define COMP_MAC(_real, _arbr, _aibi, _imag, _arbi, _aibr) { _real += _arbr + _aibi; _imag += -_arbi + _aibr; } | |||||
| #endif | |||||
| #if (defined(RN) || defined(RT) || defined(CN) || defined(CT)) | |||||
| #define COMP_MUL(_real, _arbr, _aibi, _imag, _arbi, _aibr) { _real = _arbr + _aibi; _imag = _arbi - _aibr; } | |||||
| #define COMP_MAC(_real, _arbr, _aibi, _imag, _arbi, _aibr) { _real += _arbr + _aibi; _imag += _arbi - _aibr; } | |||||
| #endif | |||||
| #if (defined(RR) || defined(RC) || defined(CR) || defined(CC)) | |||||
| #define COMP_MUL(_real, _arbr, _aibi, _imag, _arbi, _aibr) { _real = _arbr - _aibi; _imag = -_arbi - _aibr; } | |||||
| #define COMP_MAC(_real, _arbr, _aibi, _imag, _arbi, _aibr) { _real += _arbr - _aibi; _imag += -_arbi - _aibr; } | |||||
| #endif | |||||
| #if defined(TRMMKERNEL) | |||||
| #define A_OP = | |||||
| #else | |||||
| #define A_OP += | |||||
| #endif | |||||
| #define BUILTIN_MMA_DISASSEMBLE_ACC_8 \ | |||||
| __builtin_mma_disassemble_acc ((void *)result, &acc0); \ | |||||
| __builtin_mma_disassemble_acc ((void *)&result[4], &acc1); \ | |||||
| __builtin_mma_disassemble_acc ((void *)&result[8], &acc2); \ | |||||
| __builtin_mma_disassemble_acc ((void *)&result[12], &acc3); \ | |||||
| __builtin_mma_disassemble_acc ((void *)&result[16], &acc4); \ | |||||
| __builtin_mma_disassemble_acc ((void *)&result[20], &acc5); \ | |||||
| __builtin_mma_disassemble_acc ((void *)&result[24], &acc6); \ | |||||
| __builtin_mma_disassemble_acc ((void *)&result[28], &acc7); | |||||
| #define SAVE_ACC_COMPLEX_11 \ | |||||
| BUILTIN_MMA_DISASSEMBLE_ACC_8 \ | |||||
| COMP_MUL(tr[0], res[ 0], res[ 3], ti[0], res[ 1], res[ 2]) \ | |||||
| COMP_MAC(tr[0], res[ 8], res[11], ti[0], res[ 9], res[10]) \ | |||||
| COMP_MAC(tr[0], res[16], res[19], ti[0], res[17], res[18]) \ | |||||
| COMP_MAC(tr[0], res[24], res[27], ti[0], res[25], res[26]) \ | |||||
| COMP_MAC(tr[0], res[32], res[35], ti[0], res[33], res[34]) \ | |||||
| COMP_MAC(tr[0], res[40], res[43], ti[0], res[41], res[42]) \ | |||||
| COMP_MAC(tr[0], res[48], res[51], ti[0], res[49], res[50]) \ | |||||
| COMP_MAC(tr[0], res[56], res[59], ti[0], res[57], res[58]) \ | |||||
| CO[0] A_OP tr[0] * alpha_r - ti[0] * alpha_i; \ | |||||
| CO[1] A_OP ti[0] * alpha_r + tr[0] * alpha_i; | |||||
| #define SAVE_ACC_COMPLEX_12 \ | |||||
| BUILTIN_MMA_DISASSEMBLE_ACC_8 \ | |||||
| COMP_MUL(tr[0], res[ 0], res[ 3], ti[0], res[ 1], res[ 2]) \ | |||||
| COMP_MUL(tr[1], res[ 8], res[11], ti[1], res[ 9], res[10]) \ | |||||
| COMP_MAC(tr[0], res[16], res[19], ti[0], res[17], res[18]) \ | |||||
| COMP_MAC(tr[1], res[24], res[27], ti[1], res[25], res[26]) \ | |||||
| COMP_MAC(tr[0], res[32], res[35], ti[0], res[33], res[34]) \ | |||||
| COMP_MAC(tr[1], res[40], res[43], ti[1], res[41], res[42]) \ | |||||
| COMP_MAC(tr[0], res[48], res[51], ti[0], res[49], res[50]) \ | |||||
| COMP_MAC(tr[1], res[56], res[59], ti[1], res[57], res[58]) \ | |||||
| CO[0] A_OP tr[0] * alpha_r - ti[0] * alpha_i; \ | |||||
| CO[1] A_OP ti[0] * alpha_r + tr[0] * alpha_i; \ | |||||
| CO[2*ldc+0] A_OP tr[1] * alpha_r - ti[1] * alpha_i; \ | |||||
| CO[2*ldc+1] A_OP ti[1] * alpha_r + tr[1] * alpha_i; | |||||
| #define SAVE_ACC_COMPLEX_21_1 \ | |||||
| BUILTIN_MMA_DISASSEMBLE_ACC_8 \ | |||||
| COMP_MUL(tr[0], res[ 0], res[ 3], ti[0], res[ 1], res[ 2]) \ | |||||
| COMP_MUL(tr[1], res[ 4], res[ 7], ti[1], res[ 5], res[ 6]) \ | |||||
| COMP_MAC(tr[0], res[ 8], res[11], ti[0], res[ 9], res[10]) \ | |||||
| COMP_MAC(tr[1], res[12], res[15], ti[1], res[13], res[14]) \ | |||||
| COMP_MAC(tr[0], res[16], res[19], ti[0], res[17], res[18]) \ | |||||
| COMP_MAC(tr[1], res[20], res[23], ti[1], res[21], res[22]) \ | |||||
| COMP_MAC(tr[0], res[24], res[27], ti[0], res[25], res[26]) \ | |||||
| COMP_MAC(tr[1], res[28], res[31], ti[1], res[29], res[30]) \ | |||||
| COMP_MAC(tr[0], res[32], res[35], ti[0], res[33], res[34]) \ | |||||
| COMP_MAC(tr[1], res[36], res[39], ti[1], res[37], res[38]) \ | |||||
| COMP_MAC(tr[0], res[40], res[43], ti[0], res[41], res[42]) \ | |||||
| COMP_MAC(tr[1], res[44], res[47], ti[1], res[45], res[46]) \ | |||||
| COMP_MAC(tr[0], res[48], res[51], ti[0], res[49], res[50]) \ | |||||
| COMP_MAC(tr[1], res[52], res[55], ti[1], res[53], res[54]) \ | |||||
| COMP_MAC(tr[0], res[56], res[59], ti[0], res[57], res[58]) \ | |||||
| COMP_MAC(tr[1], res[60], res[63], ti[1], res[61], res[62]) \ | |||||
| CO[0] A_OP tr[0] * alpha_r - ti[0] * alpha_i; \ | |||||
| CO[1] A_OP ti[0] * alpha_r + tr[0] * alpha_i; \ | |||||
| CO[2] A_OP tr[1] * alpha_r - ti[1] * alpha_i; \ | |||||
| CO[3] A_OP ti[1] * alpha_r + tr[1] * alpha_i; | |||||
| #define SAVE_ACC_COMPLEX_21_2 \ | |||||
| BUILTIN_MMA_DISASSEMBLE_ACC_8 \ | |||||
| COMP_MUL(tr[0], res[ 0], res[ 3], ti[0], res[ 1], res[ 2]) \ | |||||
| COMP_MUL(tr[1], res[ 4], res[ 7], ti[1], res[ 5], res[ 6]) \ | |||||
| COMP_MUL(tr[2], res[ 8], res[11], ti[2], res[ 9], res[10]) \ | |||||
| COMP_MUL(tr[3], res[12], res[15], ti[3], res[13], res[14]) \ | |||||
| COMP_MAC(tr[0], res[16], res[19], ti[0], res[17], res[18]) \ | |||||
| COMP_MAC(tr[1], res[20], res[23], ti[1], res[21], res[22]) \ | |||||
| COMP_MAC(tr[2], res[24], res[27], ti[2], res[25], res[26]) \ | |||||
| COMP_MAC(tr[3], res[28], res[31], ti[3], res[29], res[30]) \ | |||||
| COMP_MAC(tr[0], res[32], res[35], ti[0], res[33], res[34]) \ | |||||
| COMP_MAC(tr[1], res[36], res[39], ti[1], res[37], res[38]) \ | |||||
| COMP_MAC(tr[2], res[40], res[43], ti[2], res[41], res[42]) \ | |||||
| COMP_MAC(tr[3], res[44], res[47], ti[3], res[45], res[46]) \ | |||||
| COMP_MAC(tr[0], res[48], res[51], ti[0], res[49], res[50]) \ | |||||
| COMP_MAC(tr[1], res[52], res[55], ti[1], res[53], res[54]) \ | |||||
| COMP_MAC(tr[2], res[56], res[59], ti[2], res[57], res[58]) \ | |||||
| COMP_MAC(tr[3], res[60], res[63], ti[3], res[61], res[62]) \ | |||||
| CO[0] A_OP tr[0] * alpha_r - ti[0] * alpha_i; \ | |||||
| CO[1] A_OP ti[0] * alpha_r + tr[0] * alpha_i; \ | |||||
| CO[2] A_OP tr[1] * alpha_r - ti[1] * alpha_i; \ | |||||
| CO[3] A_OP ti[1] * alpha_r + tr[1] * alpha_i; \ | |||||
| CO[4] A_OP tr[2] * alpha_r - ti[2] * alpha_i; \ | |||||
| CO[5] A_OP ti[2] * alpha_r + tr[2] * alpha_i; \ | |||||
| CO[6] A_OP tr[3] * alpha_r - ti[3] * alpha_i; \ | |||||
| CO[7] A_OP ti[3] * alpha_r + tr[3] * alpha_i; | |||||
| #define SAVE_ACC_COMPLEX_21_4 \ | |||||
| BUILTIN_MMA_DISASSEMBLE_ACC_8 \ | |||||
| COMP_MUL(tr[0], res[ 0], res[ 3], ti[0], res[ 1], res[ 2]) \ | |||||
| COMP_MUL(tr[1], res[ 4], res[ 7], ti[1], res[ 5], res[ 6]) \ | |||||
| COMP_MUL(tr[2], res[ 8], res[11], ti[2], res[ 9], res[10]) \ | |||||
| COMP_MUL(tr[3], res[12], res[15], ti[3], res[13], res[14]) \ | |||||
| COMP_MUL(tr[4], res[16], res[19], ti[4], res[17], res[18]) \ | |||||
| COMP_MUL(tr[5], res[20], res[23], ti[5], res[21], res[22]) \ | |||||
| COMP_MUL(tr[6], res[24], res[27], ti[6], res[25], res[26]) \ | |||||
| COMP_MUL(tr[7], res[28], res[31], ti[7], res[29], res[30]) \ | |||||
| COMP_MAC(tr[0], res[32], res[35], ti[0], res[33], res[34]) \ | |||||
| COMP_MAC(tr[1], res[36], res[39], ti[1], res[37], res[38]) \ | |||||
| COMP_MAC(tr[2], res[40], res[43], ti[2], res[41], res[42]) \ | |||||
| COMP_MAC(tr[3], res[44], res[47], ti[3], res[45], res[46]) \ | |||||
| COMP_MAC(tr[4], res[48], res[51], ti[4], res[49], res[50]) \ | |||||
| COMP_MAC(tr[5], res[52], res[55], ti[5], res[53], res[54]) \ | |||||
| COMP_MAC(tr[6], res[56], res[59], ti[6], res[57], res[58]) \ | |||||
| COMP_MAC(tr[7], res[60], res[63], ti[7], res[61], res[62]) \ | |||||
| CO[ 0] A_OP tr[0] * alpha_r - ti[0] * alpha_i; \ | |||||
| CO[ 1] A_OP ti[0] * alpha_r + tr[0] * alpha_i; \ | |||||
| CO[ 2] A_OP tr[1] * alpha_r - ti[1] * alpha_i; \ | |||||
| CO[ 3] A_OP ti[1] * alpha_r + tr[1] * alpha_i; \ | |||||
| CO[ 4] A_OP tr[2] * alpha_r - ti[2] * alpha_i; \ | |||||
| CO[ 5] A_OP ti[2] * alpha_r + tr[2] * alpha_i; \ | |||||
| CO[ 6] A_OP tr[3] * alpha_r - ti[3] * alpha_i; \ | |||||
| CO[ 7] A_OP ti[3] * alpha_r + tr[3] * alpha_i; \ | |||||
| CO[ 8] A_OP tr[4] * alpha_r - ti[4] * alpha_i; \ | |||||
| CO[ 9] A_OP ti[4] * alpha_r + tr[4] * alpha_i; \ | |||||
| CO[10] A_OP tr[5] * alpha_r - ti[5] * alpha_i; \ | |||||
| CO[11] A_OP ti[5] * alpha_r + tr[5] * alpha_i; \ | |||||
| CO[12] A_OP tr[6] * alpha_r - ti[6] * alpha_i; \ | |||||
| CO[13] A_OP ti[6] * alpha_r + tr[6] * alpha_i; \ | |||||
| CO[14] A_OP tr[7] * alpha_r - ti[7] * alpha_i; \ | |||||
| CO[15] A_OP ti[7] * alpha_r + tr[7] * alpha_i; | |||||
| #define SAVE_ACC_COMPLEX_22_1 \ | |||||
| __builtin_mma_disassemble_acc ((void *)result, &acc0); \ | |||||
| __builtin_mma_disassemble_acc ((void *)(&result[4]), &acc1); \ | |||||
| COMP_MUL(tr[0], res[0], res[3], ti[0], res[1], res[2]) \ | |||||
| COMP_MUL(tr[1], res[4], res[7], ti[1], res[5], res[6]) \ | |||||
| COMP_MUL(tr[2], res[8], res[11], ti[2], res[9], res[10]) \ | |||||
| COMP_MUL(tr[3], res[12], res[15], ti[3], res[13], res[14] ) \ | |||||
| CO[0] A_OP tr[0] * alpha_r - ti[0] * alpha_i; \ | |||||
| CO[1] A_OP ti[0] * alpha_r + tr[0] * alpha_i; \ | |||||
| CO[2] A_OP tr[1] * alpha_r - ti[1] * alpha_i; \ | |||||
| CO[3] A_OP ti[1] * alpha_r + tr[1] * alpha_i; \ | |||||
| CO[2*ldc+0] A_OP tr[2] * alpha_r - ti[2] * alpha_i; \ | |||||
| CO[2*ldc+1] A_OP ti[2] * alpha_r + tr[2] * alpha_i; \ | |||||
| CO[2*ldc+2] A_OP tr[3] * alpha_r - ti[3] * alpha_i; \ | |||||
| CO[2*ldc+3] A_OP ti[3] * alpha_r + tr[3] * alpha_i; | |||||
| #define SAVE_ACC_COMPLEX_22_2(ACC1, ACC2, CI) \ | |||||
| __builtin_mma_disassemble_acc ((void *)result, ACC1); \ | |||||
| __builtin_mma_disassemble_acc ((void *)(&result[4]), ACC2); \ | |||||
| COMP_MUL(tr[0], res[0], res[3], ti[0], res[1], res[2]) \ | |||||
| COMP_MUL(tr[1], res[4], res[7], ti[1], res[5], res[6]) \ | |||||
| COMP_MUL(tr[2], res[8], res[11], ti[2], res[9], res[10]) \ | |||||
| COMP_MUL(tr[3], res[12], res[15], ti[3], res[13], res[14]) \ | |||||
| CO[CI+0] A_OP tr[0] * alpha_r - ti[0] * alpha_i; \ | |||||
| CO[CI+1] A_OP ti[0] * alpha_r + tr[0] * alpha_i; \ | |||||
| CO[CI+2] A_OP tr[1] * alpha_r - ti[1] * alpha_i; \ | |||||
| CO[CI+3] A_OP ti[1] * alpha_r + tr[1] * alpha_i; \ | |||||
| CO[2*ldc+CI+0] A_OP tr[2] * alpha_r - ti[2] * alpha_i; \ | |||||
| CO[2*ldc+CI+1] A_OP ti[2] * alpha_r + tr[2] * alpha_i; \ | |||||
| CO[2*ldc+CI+2] A_OP tr[3] * alpha_r - ti[3] * alpha_i; \ | |||||
| CO[2*ldc+CI+3] A_OP ti[3] * alpha_r + tr[3] * alpha_i; | |||||
| #define PREFETCH1(x, y) asm volatile ("dcbt %0, %1" : : "r" (x), "b" (y) : "memory"); | |||||
| #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) | |||||
| #define REFRESH_TEMP_BK(x, y) \ | |||||
| temp = k - off; | |||||
| #elif defined(LEFT) | |||||
| #define REFRESH_TEMP_BK(x, y) \ | |||||
| temp = off + x; | |||||
| #else | |||||
| #define REFRESH_TEMP_BK(x, y) \ | |||||
| temp = off + y; | |||||
| #endif | |||||
| #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) | |||||
| #define REFRESH_POINTERS(x, y) \ | |||||
| BO = B; \ | |||||
| REFRESH_TEMP_BK(x, y) | |||||
| #else | |||||
| #define REFRESH_POINTERS(x, y) \ | |||||
| AO += off * (2*x); \ | |||||
| BO = B + off * (2*y); \ | |||||
| REFRESH_TEMP_BK(x, y) | |||||
| #endif | |||||
| #ifdef LEFT | |||||
| #define REFRESH_OFF(x) \ | |||||
| off += x; | |||||
| #else | |||||
| #define REFRESH_OFF(x) | |||||
| #endif | |||||
| #ifdef LEFT | |||||
| #define UPDATE_TEMP(x, y) \ | |||||
| temp -= x; | |||||
| #else | |||||
| #define UPDATE_TEMP(x, y) \ | |||||
| temp -= y; | |||||
| #endif | |||||
| #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) | |||||
| #define REFRESH_TMP_AFTER_SAVE(x, y) \ | |||||
| temp = k - off; \ | |||||
| UPDATE_TEMP(x, y) \ | |||||
| AO += temp * (2*x); \ | |||||
| BO += temp * (2*y); | |||||
| #else | |||||
| #define REFRESH_TMP_AFTER_SAVE(x, y) | |||||
| #endif | |||||
| #define REFRESH_AFTER_SAVE(x,y) \ | |||||
| REFRESH_TMP_AFTER_SAVE(x, y) \ | |||||
| REFRESH_OFF(x) | |||||
| /************************************************************************************* | |||||
| * GEMM Kernel | |||||
| *************************************************************************************/ | |||||
| int | |||||
| CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, FLOAT * A, FLOAT * B, | |||||
| FLOAT * C, BLASLONG ldc | |||||
| #ifdef TRMMKERNEL | |||||
| , BLASLONG offset | |||||
| #endif | |||||
| ) | |||||
| { | |||||
| BLASLONG i1, i, l, temp; | |||||
| FLOAT *AO, *BO, *CO; | |||||
| #if defined(TRMMKERNEL) | |||||
| BLASLONG off; | |||||
| #endif | |||||
| #if defined(TRMMKERNEL) && !defined(LEFT) | |||||
| off = -offset; | |||||
| #endif | |||||
| __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7; | |||||
| v4sf_t result[32]; | |||||
| FLOAT *res, tr[16], ti[16]; | |||||
| res = (FLOAT *) result; | |||||
| for (i1 = 0; i1 < (n >> 1); i1++) | |||||
| { | |||||
| #if defined(TRMMKERNEL) && defined(LEFT) | |||||
| off = offset; | |||||
| #endif | |||||
| AO = A; | |||||
| CO = C; | |||||
| C += ldc<<2; | |||||
| for (i = 0; i < (m >> 3); i++) | |||||
| { | |||||
| #if defined(TRMMKERNEL) | |||||
| REFRESH_POINTERS (8, 2) | |||||
| #else | |||||
| BO = B; | |||||
| temp = k; | |||||
| #endif | |||||
| SET_ACC_ZERO() | |||||
| for (l = 0; l < temp; ++l) | |||||
| { | |||||
| __vector_pair rowA1 = *((__vector_pair *)((void *)&AO[l<<4])); | |||||
| __vector_pair rowA2 = *((__vector_pair *)((void *)&AO[(l<<4)+4])); | |||||
| __vector_pair rowA3 = *((__vector_pair *)((void *)&AO[(l<<4)+8])); | |||||
| __vector_pair rowA4 = *((__vector_pair *)((void *)&AO[(l<<4)+12])); | |||||
| vec_t rowB1 = *(vec_t *) & BO[l<<2]; | |||||
| vec_t rowB2 = *(vec_t *) & BO[(l<<2)+2]; | |||||
| __builtin_mma_xvf64gerpp(&acc0, rowA1, rowB1); | |||||
| __builtin_mma_xvf64gerpp(&acc1, rowA2, rowB1); | |||||
| __builtin_mma_xvf64gerpp(&acc2, rowA3, rowB1); | |||||
| __builtin_mma_xvf64gerpp(&acc3, rowA4, rowB1); | |||||
| __builtin_mma_xvf64gerpp(&acc4, rowA1, rowB2); | |||||
| __builtin_mma_xvf64gerpp(&acc5, rowA2, rowB2); | |||||
| __builtin_mma_xvf64gerpp(&acc6, rowA3, rowB2); | |||||
| __builtin_mma_xvf64gerpp(&acc7, rowA4, rowB2); | |||||
| } | |||||
| __builtin_mma_disassemble_acc ((void *)result, &acc0); | |||||
| __builtin_mma_disassemble_acc ((void *)(&result[ 4]), &acc1); | |||||
| __builtin_mma_disassemble_acc ((void *)(&result[ 8]), &acc2); | |||||
| __builtin_mma_disassemble_acc ((void *)(&result[12]), &acc3); | |||||
| __builtin_mma_disassemble_acc ((void *)(&result[16]), &acc4); | |||||
| __builtin_mma_disassemble_acc ((void *)(&result[20]), &acc5); | |||||
| __builtin_mma_disassemble_acc ((void *)(&result[24]), &acc6); | |||||
| __builtin_mma_disassemble_acc ((void *)(&result[28]), &acc7); | |||||
| COMP_MUL(tr[ 0], res[ 0], res[ 3], ti[ 0], res[ 1], res[ 2]) | |||||
| COMP_MUL(tr[ 1], res[ 4], res[ 7], ti[ 1], res[ 5], res[ 6]) | |||||
| COMP_MUL(tr[ 2], res[ 8], res[11], ti[ 2], res[ 9], res[10]) | |||||
| COMP_MUL(tr[ 3], res[12], res[15], ti[ 3], res[13], res[14]) | |||||
| COMP_MUL(tr[ 4], res[16], res[19], ti[ 4], res[17], res[18]) | |||||
| COMP_MUL(tr[ 5], res[20], res[23], ti[ 5], res[21], res[22]) | |||||
| COMP_MUL(tr[ 6], res[24], res[27], ti[ 6], res[25], res[26]) | |||||
| COMP_MUL(tr[ 7], res[28], res[31], ti[ 7], res[29], res[30]) | |||||
| COMP_MUL(tr[ 8], res[32], res[35], ti[ 8], res[33], res[34]) | |||||
| COMP_MUL(tr[ 9], res[36], res[39], ti[ 9], res[37], res[38]) | |||||
| COMP_MUL(tr[10], res[40], res[43], ti[10], res[41], res[42]) | |||||
| COMP_MUL(tr[11], res[44], res[47], ti[11], res[45], res[46]) | |||||
| COMP_MUL(tr[12], res[48], res[51], ti[12], res[49], res[50]) | |||||
| COMP_MUL(tr[13], res[52], res[55], ti[13], res[53], res[54]) | |||||
| COMP_MUL(tr[14], res[56], res[59], ti[14], res[57], res[58]) | |||||
| COMP_MUL(tr[15], res[60], res[63], ti[15], res[61], res[62]) | |||||
| CO[ 0] A_OP tr[0] * alpha_r - ti[0] * alpha_i; | |||||
| CO[ 1] A_OP ti[0] * alpha_r + tr[0] * alpha_i; | |||||
| CO[ 2] A_OP tr[1] * alpha_r - ti[1] * alpha_i; | |||||
| CO[ 3] A_OP ti[1] * alpha_r + tr[1] * alpha_i; | |||||
| CO[ 4] A_OP tr[2] * alpha_r - ti[2] * alpha_i; | |||||
| CO[ 5] A_OP ti[2] * alpha_r + tr[2] * alpha_i; | |||||
| CO[ 6] A_OP tr[3] * alpha_r - ti[3] * alpha_i; | |||||
| CO[ 7] A_OP ti[3] * alpha_r + tr[3] * alpha_i; | |||||
| CO[ 8] A_OP tr[4] * alpha_r - ti[4] * alpha_i; | |||||
| CO[ 9] A_OP ti[4] * alpha_r + tr[4] * alpha_i; | |||||
| CO[10] A_OP tr[5] * alpha_r - ti[5] * alpha_i; | |||||
| CO[11] A_OP ti[5] * alpha_r + tr[5] * alpha_i; | |||||
| CO[12] A_OP tr[6] * alpha_r - ti[6] * alpha_i; | |||||
| CO[13] A_OP ti[6] * alpha_r + tr[6] * alpha_i; | |||||
| CO[14] A_OP tr[7] * alpha_r - ti[7] * alpha_i; | |||||
| CO[15] A_OP ti[7] * alpha_r + tr[7] * alpha_i; | |||||
| CO[2*ldc+ 0] A_OP tr[ 8] * alpha_r - ti[ 8] * alpha_i; | |||||
| CO[2*ldc+ 1] A_OP ti[ 8] * alpha_r + tr[ 8] * alpha_i; | |||||
| CO[2*ldc+ 2] A_OP tr[ 9] * alpha_r - ti[ 9] * alpha_i; | |||||
| CO[2*ldc+ 3] A_OP ti[ 9] * alpha_r + tr[ 9] * alpha_i; | |||||
| CO[2*ldc+ 4] A_OP tr[10] * alpha_r - ti[10] * alpha_i; | |||||
| CO[2*ldc+ 5] A_OP ti[10] * alpha_r + tr[10] * alpha_i; | |||||
| CO[2*ldc+ 6] A_OP tr[11] * alpha_r - ti[11] * alpha_i; | |||||
| CO[2*ldc+ 7] A_OP ti[11] * alpha_r + tr[11] * alpha_i; | |||||
| CO[2*ldc+ 8] A_OP tr[12] * alpha_r - ti[12] * alpha_i; | |||||
| CO[2*ldc+ 9] A_OP ti[12] * alpha_r + tr[12] * alpha_i; | |||||
| CO[2*ldc+10] A_OP tr[13] * alpha_r - ti[13] * alpha_i; | |||||
| CO[2*ldc+11] A_OP ti[13] * alpha_r + tr[13] * alpha_i; | |||||
| CO[2*ldc+12] A_OP tr[14] * alpha_r - ti[14] * alpha_i; | |||||
| CO[2*ldc+13] A_OP ti[14] * alpha_r + tr[14] * alpha_i; | |||||
| CO[2*ldc+14] A_OP tr[15] * alpha_r - ti[15] * alpha_i; | |||||
| CO[2*ldc+15] A_OP ti[15] * alpha_r + tr[15] * alpha_i; | |||||
| AO += temp << 4; | |||||
| BO += temp << 2; | |||||
| CO += 16; | |||||
| #if defined(TRMMKERNEL) | |||||
| REFRESH_AFTER_SAVE (8, 2) | |||||
| #endif | |||||
| } | |||||
| if (m & 4) | |||||
| { | |||||
| #if defined(TRMMKERNEL) | |||||
| REFRESH_POINTERS (4, 2) | |||||
| #else | |||||
| BO = B; | |||||
| temp = k; | |||||
| #endif | |||||
| SET_ACC_ZERO() | |||||
| for (l = 0; l < (temp & (~1)); l+=2) | |||||
| { | |||||
| __vector_pair rowA1 = *((__vector_pair *)((void *)&AO[l<<3])); | |||||
| __vector_pair rowA2 = *((__vector_pair *)((void *)&AO[(l<<3)+4])); | |||||
| __vector_pair rowA3 = *((__vector_pair *)((void *)&AO[(l<<3)+8])); | |||||
| __vector_pair rowA4 = *((__vector_pair *)((void *)&AO[(l<<3)+12])); | |||||
| vec_t rowB1 = *(vec_t *) & BO[l<<2]; | |||||
| vec_t rowB2 = *(vec_t *) & BO[(l<<2)+2]; | |||||
| vec_t rowB3 = *(vec_t *) & BO[(l<<2)+4]; | |||||
| vec_t rowB4 = *(vec_t *) & BO[(l<<2)+6]; | |||||
| __builtin_mma_xvf64gerpp(&acc0, rowA1, rowB1); | |||||
| __builtin_mma_xvf64gerpp(&acc1, rowA2, rowB1); | |||||
| __builtin_mma_xvf64gerpp(&acc2, rowA1, rowB2); | |||||
| __builtin_mma_xvf64gerpp(&acc3, rowA2, rowB2); | |||||
| __builtin_mma_xvf64gerpp(&acc0, rowA3, rowB3); | |||||
| __builtin_mma_xvf64gerpp(&acc1, rowA4, rowB3); | |||||
| __builtin_mma_xvf64gerpp(&acc2, rowA3, rowB4); | |||||
| __builtin_mma_xvf64gerpp(&acc3, rowA4, rowB4); | |||||
| } | |||||
| for (l = (temp & (~1)); l < temp; ++l) | |||||
| { | |||||
| __vector_pair rowA1 = *((__vector_pair *)((void *)&AO[l<<3])); | |||||
| __vector_pair rowA2 = *((__vector_pair *)((void *)&AO[(l<<3)+4])); | |||||
| vec_t rowB1 = *(vec_t *) & BO[l<<2]; | |||||
| vec_t rowB2 = *(vec_t *) & BO[(l<<2)+2]; | |||||
| __builtin_mma_xvf64gerpp(&acc0, rowA1, rowB1); | |||||
| __builtin_mma_xvf64gerpp(&acc1, rowA2, rowB1); | |||||
| __builtin_mma_xvf64gerpp(&acc2, rowA1, rowB2); | |||||
| __builtin_mma_xvf64gerpp(&acc3, rowA2, rowB2); | |||||
| } | |||||
| SAVE_ACC_COMPLEX_22_2(&acc0, &acc2, 0) | |||||
| SAVE_ACC_COMPLEX_22_2(&acc1, &acc3, 4) | |||||
| AO += temp << 3; | |||||
| BO += temp << 2; | |||||
| CO += 8; | |||||
| #if defined(TRMMKERNEL) | |||||
| REFRESH_AFTER_SAVE (4, 2) | |||||
| #endif | |||||
| } | |||||
| if (m & 2) | |||||
| { | |||||
| #if defined(TRMMKERNEL) | |||||
| REFRESH_POINTERS (2, 2) | |||||
| #else | |||||
| BO = B; | |||||
| temp = k; | |||||
| #endif | |||||
| SET_ACC_ZERO() | |||||
| for (l = 0; l < (temp & (~3)); l+=4) | |||||
| { | |||||
| __vector_pair rowA1 = *((__vector_pair *)((void *)&AO[l<<2])); | |||||
| __vector_pair rowA2 = *((__vector_pair *)((void *)&AO[(l<<2)+4])); | |||||
| __vector_pair rowA3 = *((__vector_pair *)((void *)&AO[(l<<2)+8])); | |||||
| __vector_pair rowA4 = *((__vector_pair *)((void *)&AO[(l<<2)+12])); | |||||
| vec_t rowB1 = *(vec_t *) & BO[l<<2]; | |||||
| vec_t rowB2 = *(vec_t *) & BO[(l<<2)+2]; | |||||
| vec_t rowB3 = *(vec_t *) & BO[(l<<2)+4]; | |||||
| vec_t rowB4 = *(vec_t *) & BO[(l<<2)+6]; | |||||
| vec_t rowB5 = *(vec_t *) & BO[(l<<2)+8]; | |||||
| vec_t rowB6 = *(vec_t *) & BO[(l<<2)+10]; | |||||
| vec_t rowB7 = *(vec_t *) & BO[(l<<2)+12]; | |||||
| vec_t rowB8 = *(vec_t *) & BO[(l<<2)+14]; | |||||
| __builtin_mma_xvf64gerpp(&acc0, rowA1, rowB1); | |||||
| __builtin_mma_xvf64gerpp(&acc1, rowA1, rowB2); | |||||
| __builtin_mma_xvf64gerpp(&acc0, rowA2, rowB3); | |||||
| __builtin_mma_xvf64gerpp(&acc1, rowA2, rowB4); | |||||
| __builtin_mma_xvf64gerpp(&acc0, rowA3, rowB5); | |||||
| __builtin_mma_xvf64gerpp(&acc1, rowA3, rowB6); | |||||
| __builtin_mma_xvf64gerpp(&acc0, rowA4, rowB7); | |||||
| __builtin_mma_xvf64gerpp(&acc1, rowA4, rowB8); | |||||
| } | |||||
| for (l = (temp & (~3)); l < temp; ++l) | |||||
| { | |||||
| __vector_pair rowA1 = *((__vector_pair *)((void *)&AO[l<<2])); | |||||
| vec_t rowB1 = *(vec_t *) & BO[l<<2]; | |||||
| vec_t rowB2 = *(vec_t *) & BO[(l<<2)+2]; | |||||
| __builtin_mma_xvf64gerpp(&acc0, rowA1, rowB1); | |||||
| __builtin_mma_xvf64gerpp(&acc1, rowA1, rowB2); | |||||
| } | |||||
| SAVE_ACC_COMPLEX_22_1 | |||||
| AO += temp << 2; | |||||
| BO += temp << 2; | |||||
| CO += 4; | |||||
| #if defined(TRMMKERNEL) | |||||
| REFRESH_AFTER_SAVE (2, 2) | |||||
| #endif | |||||
| } | |||||
| if (m & 1) | |||||
| { | |||||
| #if defined(TRMMKERNEL) | |||||
| REFRESH_POINTERS (1, 2) | |||||
| #else | |||||
| BO = B; | |||||
| temp = k; | |||||
| #endif | |||||
| // RIP OUT MMA STUFF! | |||||
| SET_ACC_ZERO() | |||||
| for (l = 0; l < (temp & (~3)); l+=4) | |||||
| { | |||||
| __vector_pair rowA1 = *((__vector_pair *)((void *)&AO[l<<1])); | |||||
| __vector_pair rowA2 = *((__vector_pair *)((void *)&AO[(l<<1)+2])); | |||||
| __vector_pair rowA3 = *((__vector_pair *)((void *)&AO[(l<<1)+4])); | |||||
| __vector_pair rowA4 = *((__vector_pair *)((void *)&AO[(l<<1)+6])); | |||||
| vec_t rowB1 = *(vec_t *) & BO[l<<2]; | |||||
| vec_t rowB2 = *(vec_t *) & BO[(l<<2)+2]; | |||||
| vec_t rowB3 = *(vec_t *) & BO[(l<<2)+4]; | |||||
| vec_t rowB4 = *(vec_t *) & BO[(l<<2)+6]; | |||||
| vec_t rowB5 = *(vec_t *) & BO[(l<<2)+8]; | |||||
| vec_t rowB6 = *(vec_t *) & BO[(l<<2)+10]; | |||||
| vec_t rowB7 = *(vec_t *) & BO[(l<<2)+12]; | |||||
| vec_t rowB8 = *(vec_t *) & BO[(l<<2)+14]; | |||||
| __builtin_mma_xvf64gerpp(&acc0, rowA1, rowB1); | |||||
| __builtin_mma_xvf64gerpp(&acc1, rowA1, rowB2); | |||||
| __builtin_mma_xvf64gerpp(&acc0, rowA2, rowB3); | |||||
| __builtin_mma_xvf64gerpp(&acc1, rowA2, rowB4); | |||||
| __builtin_mma_xvf64gerpp(&acc0, rowA3, rowB5); | |||||
| __builtin_mma_xvf64gerpp(&acc1, rowA3, rowB6); | |||||
| __builtin_mma_xvf64gerpp(&acc0, rowA4, rowB7); | |||||
| __builtin_mma_xvf64gerpp(&acc1, rowA4, rowB8); | |||||
| } | |||||
| for (l = (temp & (~3)); l < temp; ++l) | |||||
| { | |||||
| __vector_pair rowA1 = *((__vector_pair *)((void *)&AO[l<<1])); | |||||
| vec_t rowB1 = *(vec_t *) & BO[l<<2]; | |||||
| vec_t rowB2 = *(vec_t *) & BO[(l<<2)+2]; | |||||
| __builtin_mma_xvf64gerpp(&acc0, rowA1, rowB1); | |||||
| __builtin_mma_xvf64gerpp(&acc1, rowA1, rowB2); | |||||
| } | |||||
| SAVE_ACC_COMPLEX_12 | |||||
| AO += temp << 1; | |||||
| BO += temp << 2; | |||||
| CO += 2; | |||||
| #if defined(TRMMKERNEL) | |||||
| REFRESH_AFTER_SAVE (1, 2) | |||||
| #endif | |||||
| } | |||||
| #if defined(TRMMKERNEL) && !defined(LEFT) | |||||
| off += 2; // number of values in A | |||||
| #endif | |||||
| B += k << 2; | |||||
| } | |||||
| if (n & 1) | |||||
| { | |||||
| #if defined(TRMMKERNEL) && defined(LEFT) | |||||
| off = offset; | |||||
| #endif | |||||
| AO = A; | |||||
| CO = C; | |||||
| C += ldc<<1; | |||||
| for (i = 0; i < (m >> 3); i++) | |||||
| { | |||||
| #if defined(TRMMKERNEL) | |||||
| REFRESH_POINTERS (8, 1) | |||||
| #else | |||||
| BO = B; | |||||
| temp = k; | |||||
| #endif | |||||
| SET_ACC_ZERO() | |||||
| for (l = 0; l < (temp & (~1)); l+=2) | |||||
| { | |||||
| __vector_pair rowA1 = *((__vector_pair *)((void *)&AO[l<<4])); | |||||
| __vector_pair rowA2 = *((__vector_pair *)((void *)&AO[(l<<4)+4])); | |||||
| __vector_pair rowA3 = *((__vector_pair *)((void *)&AO[(l<<4)+8])); | |||||
| __vector_pair rowA4 = *((__vector_pair *)((void *)&AO[(l<<4)+12])); | |||||
| __vector_pair rowA5 = *((__vector_pair *)((void *)&AO[(l<<4)+16])); | |||||
| __vector_pair rowA6 = *((__vector_pair *)((void *)&AO[(l<<4)+20])); | |||||
| __vector_pair rowA7 = *((__vector_pair *)((void *)&AO[(l<<4)+24])); | |||||
| __vector_pair rowA8 = *((__vector_pair *)((void *)&AO[(l<<4)+28])); | |||||
| vec_t rowB1 = *(vec_t *) & BO[l<<1]; | |||||
| vec_t rowB2 = *(vec_t *) & BO[(l<<1)+2]; | |||||
| __builtin_mma_xvf64gerpp(&acc0, rowA1, rowB1); | |||||
| __builtin_mma_xvf64gerpp(&acc1, rowA2, rowB1); | |||||
| __builtin_mma_xvf64gerpp(&acc2, rowA3, rowB1); | |||||
| __builtin_mma_xvf64gerpp(&acc3, rowA4, rowB1); | |||||
| __builtin_mma_xvf64gerpp(&acc0, rowA5, rowB2); | |||||
| __builtin_mma_xvf64gerpp(&acc1, rowA6, rowB2); | |||||
| __builtin_mma_xvf64gerpp(&acc2, rowA7, rowB2); | |||||
| __builtin_mma_xvf64gerpp(&acc3, rowA8, rowB2); | |||||
| } | |||||
| for (l = (temp & (~1)); l < temp; ++l) | |||||
| { | |||||
| __vector_pair rowA1 = *((__vector_pair *)((void *)&AO[l<<4])); | |||||
| __vector_pair rowA2 = *((__vector_pair *)((void *)&AO[(l<<4)+4])); | |||||
| __vector_pair rowA3 = *((__vector_pair *)((void *)&AO[(l<<4)+8])); | |||||
| __vector_pair rowA4 = *((__vector_pair *)((void *)&AO[(l<<4)+12])); | |||||
| vec_t rowB1 = *(vec_t *) & BO[l<<1]; | |||||
| __builtin_mma_xvf64gerpp(&acc0, rowA1, rowB1); | |||||
| __builtin_mma_xvf64gerpp(&acc1, rowA2, rowB1); | |||||
| __builtin_mma_xvf64gerpp(&acc2, rowA3, rowB1); | |||||
| __builtin_mma_xvf64gerpp(&acc3, rowA4, rowB1); | |||||
| } | |||||
| SAVE_ACC_COMPLEX_21_4 | |||||
| AO += temp << 4; | |||||
| BO += temp << 1; | |||||
| CO += 16; | |||||
| #if defined(TRMMKERNEL) | |||||
| REFRESH_AFTER_SAVE (8, 1) | |||||
| #endif | |||||
| } | |||||
| if (m & 4) | |||||
| { | |||||
| #if defined(TRMMKERNEL) | |||||
| REFRESH_POINTERS (4, 1) | |||||
| #else | |||||
| BO = B; | |||||
| temp = k; | |||||
| #endif | |||||
| SET_ACC_ZERO() | |||||
| for (l = 0; l < (temp & (~3)); l+=4) | |||||
| { | |||||
| __vector_pair rowA1 = *((__vector_pair *)((void *)&AO[l<<3])); | |||||
| __vector_pair rowA2 = *((__vector_pair *)((void *)&AO[(l<<3)+4])); | |||||
| __vector_pair rowA3 = *((__vector_pair *)((void *)&AO[(l<<3)+8])); | |||||
| __vector_pair rowA4 = *((__vector_pair *)((void *)&AO[(l<<3)+12])); | |||||
| __vector_pair rowA5 = *((__vector_pair *)((void *)&AO[(l<<3)+16])); | |||||
| __vector_pair rowA6 = *((__vector_pair *)((void *)&AO[(l<<3)+20])); | |||||
| __vector_pair rowA7 = *((__vector_pair *)((void *)&AO[(l<<3)+24])); | |||||
| __vector_pair rowA8 = *((__vector_pair *)((void *)&AO[(l<<3)+28])); | |||||
| vec_t rowB1 = *(vec_t *) & BO[l<<1]; | |||||
| vec_t rowB2 = *(vec_t *) & BO[(l<<1)+2]; | |||||
| vec_t rowB3 = *(vec_t *) & BO[(l<<1)+4]; | |||||
| vec_t rowB4 = *(vec_t *) & BO[(l<<1)+6]; | |||||
| __builtin_mma_xvf64gerpp(&acc0, rowA1, rowB1); | |||||
| __builtin_mma_xvf64gerpp(&acc1, rowA2, rowB1); | |||||
| __builtin_mma_xvf64gerpp(&acc2, rowA3, rowB2); | |||||
| __builtin_mma_xvf64gerpp(&acc3, rowA4, rowB2); | |||||
| __builtin_mma_xvf64gerpp(&acc4, rowA5, rowB3); | |||||
| __builtin_mma_xvf64gerpp(&acc5, rowA6, rowB3); | |||||
| __builtin_mma_xvf64gerpp(&acc6, rowA7, rowB4); | |||||
| __builtin_mma_xvf64gerpp(&acc7, rowA8, rowB4); | |||||
| } | |||||
| for (l = (temp & (~3)); l < temp; ++l) | |||||
| { | |||||
| __vector_pair rowA1 = *((__vector_pair *)((void *)&AO[l<<3])); | |||||
| __vector_pair rowA2 = *((__vector_pair *)((void *)&AO[(l<<3)+4])); | |||||
| vec_t rowB1 = *(vec_t *) & BO[l<<1]; | |||||
| __builtin_mma_xvf64gerpp(&acc0, rowA1, rowB1); | |||||
| __builtin_mma_xvf64gerpp(&acc1, rowA2, rowB1); | |||||
| } | |||||
| SAVE_ACC_COMPLEX_21_2 | |||||
| AO += temp << 3; | |||||
| BO += temp << 1; | |||||
| CO += 8; | |||||
| #if defined(TRMMKERNEL) | |||||
| REFRESH_AFTER_SAVE (4, 1) | |||||
| #endif | |||||
| } if (m & 2) | |||||
| { | |||||
| #if defined(TRMMKERNEL) | |||||
| REFRESH_POINTERS (2, 1) | |||||
| #else | |||||
| BO = B; | |||||
| temp = k; | |||||
| #endif | |||||
| SET_ACC_ZERO() | |||||
| for (l = 0; l < (temp & (~7)); l+=8) | |||||
| { | |||||
| __vector_pair rowA1 = *((__vector_pair *)((void *)&AO[l<<2])); | |||||
| __vector_pair rowA2 = *((__vector_pair *)((void *)&AO[(l<<2)+4])); | |||||
| __vector_pair rowA3 = *((__vector_pair *)((void *)&AO[(l<<2)+8])); | |||||
| __vector_pair rowA4 = *((__vector_pair *)((void *)&AO[(l<<2)+12])); | |||||
| __vector_pair rowA5 = *((__vector_pair *)((void *)&AO[(l<<2)+16])); | |||||
| __vector_pair rowA6 = *((__vector_pair *)((void *)&AO[(l<<2)+20])); | |||||
| __vector_pair rowA7 = *((__vector_pair *)((void *)&AO[(l<<2)+24])); | |||||
| __vector_pair rowA8 = *((__vector_pair *)((void *)&AO[(l<<2)+28])); | |||||
| vec_t rowB1 = *(vec_t *) & BO[l<<1]; | |||||
| vec_t rowB2 = *(vec_t *) & BO[(l<<1)+2]; | |||||
| vec_t rowB3 = *(vec_t *) & BO[(l<<1)+4]; | |||||
| vec_t rowB4 = *(vec_t *) & BO[(l<<1)+6]; | |||||
| vec_t rowB5 = *(vec_t *) & BO[(l<<1)+8]; | |||||
| vec_t rowB6 = *(vec_t *) & BO[(l<<1)+10]; | |||||
| vec_t rowB7 = *(vec_t *) & BO[(l<<1)+12]; | |||||
| vec_t rowB8 = *(vec_t *) & BO[(l<<1)+14]; | |||||
| __builtin_mma_xvf64gerpp(&acc0, rowA1, rowB1); | |||||
| __builtin_mma_xvf64gerpp(&acc1, rowA2, rowB2); | |||||
| __builtin_mma_xvf64gerpp(&acc2, rowA3, rowB3); | |||||
| __builtin_mma_xvf64gerpp(&acc3, rowA4, rowB4); | |||||
| __builtin_mma_xvf64gerpp(&acc4, rowA5, rowB5); | |||||
| __builtin_mma_xvf64gerpp(&acc5, rowA6, rowB6); | |||||
| __builtin_mma_xvf64gerpp(&acc6, rowA7, rowB7); | |||||
| __builtin_mma_xvf64gerpp(&acc7, rowA8, rowB8); | |||||
| } | |||||
| for (l = (temp & (~7)); l < temp; ++l) | |||||
| { | |||||
| __vector_pair rowA1 = *((__vector_pair *)((void *)&AO[l<<2])); | |||||
| vec_t rowB1 = *(vec_t *) & BO[l<<1]; | |||||
| __builtin_mma_xvf64gerpp(&acc0, rowA1, rowB1); | |||||
| } | |||||
| SAVE_ACC_COMPLEX_21_1 | |||||
| AO += temp << 2; | |||||
| BO += temp << 1; | |||||
| CO += 4; | |||||
| #if defined(TRMMKERNEL) | |||||
| REFRESH_AFTER_SAVE (2, 1) | |||||
| #endif | |||||
| } | |||||
| if (m & 1) | |||||
| { | |||||
| #if defined(TRMMKERNEL) | |||||
| REFRESH_POINTERS (1, 1) | |||||
| #else | |||||
| BO = B; | |||||
| temp = k; | |||||
| #endif | |||||
| // RIP OUT MMA STUFF! | |||||
| SET_ACC_ZERO() | |||||
| for (l = 0; l < (temp & (~7)); l+=8) | |||||
| { | |||||
| __vector_pair rowA1 = *((__vector_pair *)((void *)&AO[l<<1])); | |||||
| __vector_pair rowA2 = *((__vector_pair *)((void *)&AO[(l<<1)+2])); | |||||
| __vector_pair rowA3 = *((__vector_pair *)((void *)&AO[(l<<1)+4])); | |||||
| __vector_pair rowA4 = *((__vector_pair *)((void *)&AO[(l<<1)+6])); | |||||
| __vector_pair rowA5 = *((__vector_pair *)((void *)&AO[(l<<1)+8])); | |||||
| __vector_pair rowA6 = *((__vector_pair *)((void *)&AO[(l<<1)+10])); | |||||
| __vector_pair rowA7 = *((__vector_pair *)((void *)&AO[(l<<1)+12])); | |||||
| __vector_pair rowA8 = *((__vector_pair *)((void *)&AO[(l<<1)+14])); | |||||
| vec_t rowB1 = *(vec_t *) & BO[l<<1]; | |||||
| vec_t rowB2 = *(vec_t *) & BO[(l<<1)+2]; | |||||
| vec_t rowB3 = *(vec_t *) & BO[(l<<1)+4]; | |||||
| vec_t rowB4 = *(vec_t *) & BO[(l<<1)+6]; | |||||
| vec_t rowB5 = *(vec_t *) & BO[(l<<1)+8]; | |||||
| vec_t rowB6 = *(vec_t *) & BO[(l<<1)+10]; | |||||
| vec_t rowB7 = *(vec_t *) & BO[(l<<1)+12]; | |||||
| vec_t rowB8 = *(vec_t *) & BO[(l<<1)+14]; | |||||
| __builtin_mma_xvf64gerpp(&acc0, rowA1, rowB1); | |||||
| __builtin_mma_xvf64gerpp(&acc1, rowA2, rowB2); | |||||
| __builtin_mma_xvf64gerpp(&acc2, rowA3, rowB3); | |||||
| __builtin_mma_xvf64gerpp(&acc3, rowA4, rowB4); | |||||
| __builtin_mma_xvf64gerpp(&acc4, rowA5, rowB5); | |||||
| __builtin_mma_xvf64gerpp(&acc5, rowA6, rowB6); | |||||
| __builtin_mma_xvf64gerpp(&acc6, rowA7, rowB7); | |||||
| __builtin_mma_xvf64gerpp(&acc7, rowA8, rowB8); | |||||
| } | |||||
| for (l = (temp & (~7)); l < temp; ++l) | |||||
| { | |||||
| __vector_pair rowA1 = *((__vector_pair *)((void *)&AO[l<<1])); | |||||
| vec_t rowB1 = *(vec_t *) & BO[l<<1]; | |||||
| __builtin_mma_xvf64gerpp(&acc0, rowA1, rowB1); | |||||
| } | |||||
| SAVE_ACC_COMPLEX_11 | |||||
| AO += temp << 1; | |||||
| BO += temp << 1; | |||||
| CO += 2; | |||||
| #if defined(TRMMKERNEL) | |||||
| REFRESH_AFTER_SAVE (1, 1) | |||||
| #endif | |||||
| } | |||||
| #if defined(TRMMKERNEL) && !defined(LEFT) | |||||
| off += 1; // number of values in A | |||||
| #endif | |||||
| B += k << 1; | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| @@ -104,7 +104,7 @@ | |||||
| * | * | ||||
| READ( NIN, FMT = * )SUMMRY | READ( NIN, FMT = * )SUMMRY | ||||
| READ( NIN, FMT = * )NOUT | READ( NIN, FMT = * )NOUT | ||||
| OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' ) | |||||
| OPEN( NOUT, FILE = SUMMRY, STATUS = 'REPLACE' ) | |||||
| NOUTC = NOUT | NOUTC = NOUT | ||||
| * | * | ||||
| * Read name and unit number for snapshot output file and open file. | * Read name and unit number for snapshot output file and open file. | ||||
| @@ -113,7 +113,7 @@ | |||||
| READ( NIN, FMT = * )NTRA | READ( NIN, FMT = * )NTRA | ||||
| TRACE = NTRA.GE.0 | TRACE = NTRA.GE.0 | ||||
| IF( TRACE )THEN | IF( TRACE )THEN | ||||
| OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' ) | |||||
| OPEN( NTRA, FILE = SNAPS, STATUS = 'REPLACE' ) | |||||
| END IF | END IF | ||||
| * Read the flag that directs rewinding of the snapshot file. | * Read the flag that directs rewinding of the snapshot file. | ||||
| READ( NIN, FMT = * )REWI | READ( NIN, FMT = * )REWI | ||||
| @@ -3439,4 +3439,3 @@ | |||||
| * End of XERBLA | * End of XERBLA | ||||
| * | * | ||||
| END | END | ||||
| @@ -105,7 +105,7 @@ | |||||
| * | * | ||||
| READ( NIN, FMT = * )SUMMRY | READ( NIN, FMT = * )SUMMRY | ||||
| READ( NIN, FMT = * )NOUT | READ( NIN, FMT = * )NOUT | ||||
| OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' ) | |||||
| OPEN( NOUT, FILE = SUMMRY, STATUS = 'REPLACE' ) | |||||
| NOUTC = NOUT | NOUTC = NOUT | ||||
| * | * | ||||
| * Read name and unit number for snapshot output file and open file. | * Read name and unit number for snapshot output file and open file. | ||||
| @@ -114,7 +114,7 @@ | |||||
| READ( NIN, FMT = * )NTRA | READ( NIN, FMT = * )NTRA | ||||
| TRACE = NTRA.GE.0 | TRACE = NTRA.GE.0 | ||||
| IF( TRACE )THEN | IF( TRACE )THEN | ||||
| OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' ) | |||||
| OPEN( NTRA, FILE = SNAPS, STATUS = 'REPLACE' ) | |||||
| END IF | END IF | ||||
| * Read the flag that directs rewinding of the snapshot file. | * Read the flag that directs rewinding of the snapshot file. | ||||
| READ( NIN, FMT = * )REWI | READ( NIN, FMT = * )REWI | ||||
| @@ -81,6 +81,28 @@ static void cgemmt_trusted(char api, enum CBLAS_ORDER order, char uplo, char tra | |||||
| ldc *= 2; | ldc *= 2; | ||||
| #ifndef NO_CBLAS | |||||
| if (order == CblasRowMajor) { | |||||
| if (uplo == 'U' || uplo == CblasUpper) | |||||
| { | |||||
| for (i = 0; i < m; i++) | |||||
| for (j = i * 2; j < m * 2; j+=2){ | |||||
| data_cgemmt.c_verify[i * ldc + j] = | |||||
| data_cgemmt.c_gemm[i * ldc + j]; | |||||
| data_cgemmt.c_verify[i * ldc + j + 1] = | |||||
| data_cgemmt.c_gemm[i * ldc + j + 1]; | |||||
| } | |||||
| } else { | |||||
| for (i = 0; i < m; i++) | |||||
| for (j = 0; j <= i * 2; j+=2){ | |||||
| data_cgemmt.c_verify[i * ldc + j] = | |||||
| data_cgemmt.c_gemm[i * ldc + j]; | |||||
| data_cgemmt.c_verify[i * ldc + j + 1] = | |||||
| data_cgemmt.c_gemm[i * ldc + j + 1]; | |||||
| } | |||||
| } | |||||
| } else | |||||
| #endif | |||||
| if (uplo == 'L' || uplo == CblasLower) | if (uplo == 'L' || uplo == CblasLower) | ||||
| { | { | ||||
| for (i = 0; i < m; i++) | for (i = 0; i < m; i++) | ||||
| @@ -77,6 +77,21 @@ static void dgemmt_trusted(char api, enum CBLAS_ORDER order, char uplo, char tra | |||||
| else | else | ||||
| cblas_dgemm(order, transa, transb, m, m, k, alpha, data_dgemmt.a_test, lda, | cblas_dgemm(order, transa, transb, m, m, k, alpha, data_dgemmt.a_test, lda, | ||||
| data_dgemmt.b_test, ldb, beta, data_dgemmt.c_gemm, ldc); | data_dgemmt.b_test, ldb, beta, data_dgemmt.c_gemm, ldc); | ||||
| if (order == CblasRowMajor) { | |||||
| if (uplo == 'U' || uplo == CblasUpper) | |||||
| { | |||||
| for (i = 0; i < m; i++) | |||||
| for (j = i; j < m; j++) | |||||
| data_dgemmt.c_verify[i * ldc + j] = | |||||
| data_dgemmt.c_gemm[i * ldc + j]; | |||||
| } else { | |||||
| for (i = 0; i < m; i++) | |||||
| for (j = 0; j <= i; j++) | |||||
| data_dgemmt.c_verify[i * ldc + j] = | |||||
| data_dgemmt.c_gemm[i * ldc + j]; | |||||
| } | |||||
| }else | |||||
| #endif | #endif | ||||
| if (uplo == 'L' || uplo == CblasLower) | if (uplo == 'L' || uplo == CblasLower) | ||||
| @@ -77,6 +77,21 @@ static void sgemmt_trusted(char api, enum CBLAS_ORDER order, char uplo, char tra | |||||
| else | else | ||||
| cblas_sgemm(order, transa, transb, m, m, k, alpha, data_sgemmt.a_test, lda, | cblas_sgemm(order, transa, transb, m, m, k, alpha, data_sgemmt.a_test, lda, | ||||
| data_sgemmt.b_test, ldb, beta, data_sgemmt.c_gemm, ldc); | data_sgemmt.b_test, ldb, beta, data_sgemmt.c_gemm, ldc); | ||||
| if (order == CblasRowMajor) { | |||||
| if (uplo == 'U' || uplo == CblasUpper) | |||||
| { | |||||
| for (i = 0; i < m; i++) | |||||
| for (j = i; j < m; j++) | |||||
| data_sgemmt.c_verify[i * ldc + j] = | |||||
| data_sgemmt.c_gemm[i * ldc + j]; | |||||
| } else { | |||||
| for (i = 0; i < m; i++) | |||||
| for (j = 0; j <= i; j++) | |||||
| data_sgemmt.c_verify[i * ldc + j] = | |||||
| data_sgemmt.c_gemm[i * ldc + j]; | |||||
| } | |||||
| } else | |||||
| #endif | #endif | ||||
| if (uplo == 'L' || uplo == CblasLower) | if (uplo == 'L' || uplo == CblasLower) | ||||
| @@ -80,7 +80,28 @@ static void zgemmt_trusted(char api, enum CBLAS_ORDER order, char uplo, char tra | |||||
| #endif | #endif | ||||
| ldc *= 2; | ldc *= 2; | ||||
| #ifndef NO_CBLAS | |||||
| if (order == CblasRowMajor) { | |||||
| if (uplo == 'U' || uplo == CblasUpper) | |||||
| { | |||||
| for (i = 0; i < m; i++) | |||||
| for (j = i * 2; j < m * 2; j+=2){ | |||||
| data_zgemmt.c_verify[i * ldc + j] = | |||||
| data_zgemmt.c_gemm[i * ldc + j]; | |||||
| data_zgemmt.c_verify[i * ldc + j + 1] = | |||||
| data_zgemmt.c_gemm[i * ldc + j + 1]; | |||||
| } | |||||
| } else { | |||||
| for (i = 0; i < m; i++) | |||||
| for (j = 0; j <= i * 2; j+=2){ | |||||
| data_zgemmt.c_verify[i * ldc + j] = | |||||
| data_zgemmt.c_gemm[i * ldc + j]; | |||||
| data_zgemmt.c_verify[i * ldc + j + 1] = | |||||
| data_zgemmt.c_gemm[i * ldc + j + 1]; | |||||
| } | |||||
| } | |||||
| }else | |||||
| #endif | |||||
| if (uplo == 'L' || uplo == CblasLower) | if (uplo == 'L' || uplo == CblasLower) | ||||
| { | { | ||||
| for (i = 0; i < m; i++) | for (i = 0; i < m; i++) | ||||