| @@ -26,11 +26,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2016/03/18 Werner Saar (wernsaar@googlemail.com) | |||
| * 2016/04/03 Werner Saar (wernsaar@googlemail.com) | |||
| * BLASTEST : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * LAPACK-TEST : OK | |||
| * LAPACK-TEST : OK | |||
| **************************************************************************************/ | |||
| /*********************************************************************/ | |||
| @@ -130,10 +130,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #endif | |||
| #define o0 0 | |||
| #define alpha_r vs30 | |||
| #define alpha_i vs31 | |||
| #define TBUFFER r14 | |||
| #define alpha_dr vs28 | |||
| #define alpha_di vs29 | |||
| #define alpha_sr vs30 | |||
| #define alpha_si vs31 | |||
| #define NOTUSED r14 | |||
| #define L r15 | |||
| #define o12 r16 | |||
| #define o4 r17 | |||
| @@ -271,21 +275,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #include "cgemm_macros_8x4_power8.S" | |||
| cmpwi cr0, M, 0 | |||
| ble .L999_H1 | |||
| ble L999_H1 | |||
| cmpwi cr0, N, 0 | |||
| ble .L999_H1 | |||
| ble L999_H1 | |||
| cmpwi cr0, K, 0 | |||
| ble .L999_H1 | |||
| ble L999_H1 | |||
| slwi LDC, LDC, ZBASE_SHIFT | |||
| li PRE, 256 | |||
| li PRE, 384 | |||
| li o4 , 4 | |||
| li o8 , 8 | |||
| li o12 , 12 | |||
| li o16 , 16 | |||
| li o32 , 32 | |||
| li o48 , 48 | |||
| addi TBUFFER, SP, 360 | |||
| #ifdef __64BIT__ | |||
| @@ -294,14 +297,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| addi T1 , SP, 224 | |||
| #endif | |||
| lxsspx alpha_r, 0, T1 | |||
| lxsspx alpha_i, o8, T1 | |||
| stxsspx vs1, 0, T1 | |||
| lxsspx alpha_dr, 0, T1 | |||
| stxsspx vs2, o8 , T1 | |||
| lxsspx alpha_di, o8, T1 | |||
| addi T1, SP, 360 | |||
| li T2, 0 | |||
| stw T2, 0(T1) | |||
| stw T2, 4(T1) | |||
| stw T2, 8(T1) | |||
| stxsspx alpha_dr, o12, T1 | |||
| lxvw4x alpha_sr, o0 , T1 | |||
| addi T1, T1, 16 | |||
| stw T2, 0(T1) | |||
| stw T2, 4(T1) | |||
| stw T2, 8(T1) | |||
| stxsspx alpha_di, o12, T1 | |||
| lxvw4x alpha_si, o0 , T1 | |||
| .align 5 | |||
| #include "cgemm_logic_8x4_power8.S" | |||
| .L999: | |||
| L999: | |||
| addi r3, 0, 0 | |||
| lfd f14, 0(SP) | |||
| @@ -26,11 +26,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2016/03/18 Werner Saar (wernsaar@googlemail.com) | |||
| * 2016/04/03 Werner Saar (wernsaar@googlemail.com) | |||
| * BLASTEST : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * LAPACK-TEST : OK | |||
| * LAPACK-TEST : OK | |||
| **************************************************************************************/ | |||
| /*********************************************************************/ | |||
| @@ -129,18 +129,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #endif | |||
| #define o0 0 | |||
| #define alpha_r vs30 | |||
| #define alpha_i vs31 | |||
| #define alpha_vr vs28 | |||
| #define alpha_vi vs29 | |||
| #define alpha_dr vs28 | |||
| #define alpha_di vs29 | |||
| #define alpha_sr vs30 | |||
| #define alpha_si vs31 | |||
| #define o12 r12 | |||
| #define KKK r13 | |||
| #define K1 r14 | |||
| #define L r15 | |||
| #define o16 r16 | |||
| #define TBUFFER r17 | |||
| #define NOTUSED r17 | |||
| #define T2 r19 | |||
| #define KK r20 | |||
| #define o8 r21 | |||
| @@ -278,21 +278,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #include "cgemm_macros_8x4_power8.S" | |||
| cmpwi cr0, M, 0 | |||
| ble .L999_H1 | |||
| ble L999_H1 | |||
| cmpwi cr0, N, 0 | |||
| ble .L999_H1 | |||
| ble L999_H1 | |||
| cmpwi cr0, K, 0 | |||
| ble .L999_H1 | |||
| ble L999_H1 | |||
| slwi LDC, LDC, ZBASE_SHIFT | |||
| li PRE, 256 | |||
| li PRE, 384 | |||
| li o4 , 4 | |||
| li o8 , 8 | |||
| li o12 , 12 | |||
| li o16 , 16 | |||
| li o32 , 32 | |||
| li o48 , 48 | |||
| addi TBUFFER, SP, 360 | |||
| #ifdef __64BIT__ | |||
| @@ -301,14 +300,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| addi T1, SP, 224 | |||
| #endif | |||
| lxsspx alpha_r, 0, T1 | |||
| lxsspx alpha_i, o8, T1 | |||
| lxsspx alpha_dr, 0, T1 | |||
| lxsspx alpha_di, o8, T1 | |||
| addi T1, SP, 360 | |||
| li T2, 0 | |||
| stw T2, 0(T1) | |||
| stw T2, 4(T1) | |||
| stw T2, 8(T1) | |||
| stxsspx alpha_dr, o12, T1 | |||
| lxvw4x alpha_sr, o0 , T1 | |||
| addi T1, T1, 16 | |||
| stw T2, 0(T1) | |||
| stw T2, 4(T1) | |||
| stw T2, 8(T1) | |||
| stxsspx alpha_di, o12, T1 | |||
| lxvw4x alpha_si, o0 , T1 | |||
| .align 5 | |||
| #include "ctrmm_logic_8x4_power8.S" | |||
| .L999: | |||
| L999: | |||
| addi r3, 0, 0 | |||
| lfd f14, 0(SP) | |||