| @@ -26,10 +26,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| /************************************************************************************** | /************************************************************************************** | ||||
| * 2016/03/14 Werner Saar (wernsaar@googlemail.com) | |||||
| * 2016/03/18 Werner Saar (wernsaar@googlemail.com) | |||||
| * BLASTEST : OK | * BLASTEST : OK | ||||
| * CTEST : OK | * CTEST : OK | ||||
| * TEST : OK | * TEST : OK | ||||
| * LAPACK-TEST : OK | |||||
| **************************************************************************************/ | **************************************************************************************/ | ||||
| /*********************************************************************/ | /*********************************************************************/ | ||||
| @@ -81,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #ifdef __64BIT__ | #ifdef __64BIT__ | ||||
| #define STACKSIZE 320 | |||||
| #define STACKSIZE 340 | |||||
| #define ALPHA_SP 296(SP) | #define ALPHA_SP 296(SP) | ||||
| #define FZERO 304(SP) | #define FZERO 304(SP) | ||||
| #else | #else | ||||
| @@ -127,10 +128,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #define alpha_r vs30 | #define alpha_r vs30 | ||||
| #define alpha_vr vs31 | |||||
| #define o0 0 | #define o0 0 | ||||
| #define TBUFFER r14 | |||||
| #define o4 r15 | #define o4 r15 | ||||
| #define o12 r16 | #define o12 r16 | ||||
| #define o8 r17 | #define o8 r17 | ||||
| @@ -202,6 +203,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| std r17, 256(SP) | std r17, 256(SP) | ||||
| std r16, 264(SP) | std r16, 264(SP) | ||||
| std r15, 272(SP) | std r15, 272(SP) | ||||
| std r14, 280(SP) | |||||
| #else | #else | ||||
| stw r31, 144(SP) | stw r31, 144(SP) | ||||
| stw r30, 148(SP) | stw r30, 148(SP) | ||||
| @@ -220,6 +222,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| stw r17, 200(SP) | stw r17, 200(SP) | ||||
| stw r16, 204(SP) | stw r16, 204(SP) | ||||
| stw r15, 208(SP) | stw r15, 208(SP) | ||||
| stw r14, 212(SP) | |||||
| #endif | #endif | ||||
| // stfd f1, ALPHA_SP | // stfd f1, ALPHA_SP | ||||
| @@ -259,24 +262,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| cmpwi cr0, K, 0 | cmpwi cr0, K, 0 | ||||
| ble .L999_H1 | ble .L999_H1 | ||||
| li PRE, 384 | |||||
| li PRE, 256 | |||||
| li o4 , 4 | li o4 , 4 | ||||
| li o8 , 8 | li o8 , 8 | ||||
| li o12, 12 | li o12, 12 | ||||
| li o16, 16 | li o16, 16 | ||||
| li o32, 32 | li o32, 32 | ||||
| li o48, 48 | li o48, 48 | ||||
| addi TBUFFER, SP, 320 | |||||
| addi T1, SP, 300 | addi T1, SP, 300 | ||||
| stfs f1, 0(T1) | stfs f1, 0(T1) | ||||
| stfs f1, 4(T1) | |||||
| stfs f1, 8(T1) | |||||
| stfs f1,12(T1) | |||||
| lxsspx vs28, 0, T1 | |||||
| xxspltw alpha_r, vs28 , 0 | |||||
| lxvw4x alpha_vr, 0, T1 | |||||
| lxsspx alpha_r, 0, T1 | |||||
| @@ -326,6 +324,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| ld r17, 256(SP) | ld r17, 256(SP) | ||||
| ld r16, 264(SP) | ld r16, 264(SP) | ||||
| ld r15, 272(SP) | ld r15, 272(SP) | ||||
| ld r14, 280(SP) | |||||
| #else | #else | ||||
| lwz r31, 144(SP) | lwz r31, 144(SP) | ||||
| lwz r30, 148(SP) | lwz r30, 148(SP) | ||||
| @@ -344,6 +343,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| lwz r17, 200(SP) | lwz r17, 200(SP) | ||||
| lwz r16, 204(SP) | lwz r16, 204(SP) | ||||
| lwz r15, 208(SP) | lwz r15, 208(SP) | ||||
| lwz r14, 212(SP) | |||||
| #endif | #endif | ||||
| addi SP, SP, STACKSIZE | addi SP, SP, STACKSIZE | ||||
| @@ -26,13 +26,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| /************************************************************************************** | /************************************************************************************** | ||||
| * 2016/03/14 Werner Saar (wernsaar@googlemail.com) | |||||
| * 2016/03/18 Werner Saar (wernsaar@googlemail.com) | |||||
| * BLASTEST : OK | * BLASTEST : OK | ||||
| * CTEST : OK | * CTEST : OK | ||||
| * TEST : OK | * TEST : OK | ||||
| * LAPACK-TEST : OK | |||||
| **************************************************************************************/ | **************************************************************************************/ | ||||
| srawi. J, N, 3 | srawi. J, N, 3 | ||||
| ble .LSGEMM_L8_END | ble .LSGEMM_L8_END | ||||
| @@ -26,10 +26,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| /************************************************************************************** | /************************************************************************************** | ||||
| * 2016/03/14 Werner Saar (wernsaar@googlemail.com) | |||||
| * 2016/03/18 Werner Saar (wernsaar@googlemail.com) | |||||
| * BLASTEST : OK | * BLASTEST : OK | ||||
| * CTEST : OK | * CTEST : OK | ||||
| * TEST : OK | * TEST : OK | ||||
| * LAPACK-TEST : OK | |||||
| **************************************************************************************/ | **************************************************************************************/ | ||||
| /*********************************************************************/ | /*********************************************************************/ | ||||
| @@ -81,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #ifdef __64BIT__ | #ifdef __64BIT__ | ||||
| #define STACKSIZE 320 | |||||
| #define STACKSIZE 340 | |||||
| #define ALPHA_SP 296(SP) | #define ALPHA_SP 296(SP) | ||||
| #define FZERO 304(SP) | #define FZERO 304(SP) | ||||
| #else | #else | ||||
| @@ -127,10 +128,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #define alpha_r vs30 | #define alpha_r vs30 | ||||
| #define alpha_vr vs31 | |||||
| #define o0 0 | #define o0 0 | ||||
| #define TBUFFER r13 | |||||
| #define o12 r14 | #define o12 r14 | ||||
| #define o4 r15 | #define o4 r15 | ||||
| #define K1 r16 | #define K1 r16 | ||||
| @@ -138,7 +139,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define L r18 | #define L r18 | ||||
| #define T1 r19 | #define T1 r19 | ||||
| #define KK r20 | #define KK r20 | ||||
| #define KKK 21 | |||||
| #define KKK r21 | |||||
| #define I r22 | #define I r22 | ||||
| #define J r23 | #define J r23 | ||||
| #define AO r24 | #define AO r24 | ||||
| @@ -204,6 +205,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| std r16, 264(SP) | std r16, 264(SP) | ||||
| std r15, 272(SP) | std r15, 272(SP) | ||||
| std r14, 280(SP) | std r14, 280(SP) | ||||
| std r13, 288(SP) | |||||
| #else | #else | ||||
| stw r31, 144(SP) | stw r31, 144(SP) | ||||
| stw r30, 148(SP) | stw r30, 148(SP) | ||||
| @@ -223,6 +225,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| stw r16, 204(SP) | stw r16, 204(SP) | ||||
| stw r15, 208(SP) | stw r15, 208(SP) | ||||
| stw r14, 212(SP) | stw r14, 212(SP) | ||||
| stw r13, 216(SP) | |||||
| #endif | #endif | ||||
| // stfd f1, ALPHA_SP | // stfd f1, ALPHA_SP | ||||
| @@ -274,17 +277,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| li o16, 16 | li o16, 16 | ||||
| li o32, 32 | li o32, 32 | ||||
| li o48, 48 | li o48, 48 | ||||
| addi TBUFFER, SP, 320 | |||||
| addi T1, SP, 300 | addi T1, SP, 300 | ||||
| stfs f1, 0(T1) | stfs f1, 0(T1) | ||||
| stfs f1, 4(T1) | |||||
| stfs f1, 8(T1) | |||||
| stfs f1,12(T1) | |||||
| lxsspx vs28, 0, T1 | |||||
| lxsspx alpha_r, 0, T1 | |||||
| xxspltw alpha_r, vs28 , 0 | |||||
| lxvw4x alpha_vr, 0, T1 | |||||
| @@ -335,6 +334,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| ld r16, 264(SP) | ld r16, 264(SP) | ||||
| ld r15, 272(SP) | ld r15, 272(SP) | ||||
| ld r14, 280(SP) | ld r14, 280(SP) | ||||
| ld r13, 288(SP) | |||||
| #else | #else | ||||
| lwz r31, 144(SP) | lwz r31, 144(SP) | ||||
| lwz r30, 148(SP) | lwz r30, 148(SP) | ||||
| @@ -354,6 +354,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| lwz r16, 204(SP) | lwz r16, 204(SP) | ||||
| lwz r15, 208(SP) | lwz r15, 208(SP) | ||||
| lwz r14, 212(SP) | lwz r14, 212(SP) | ||||
| lwz r13, 216(SP) | |||||
| #endif | #endif | ||||
| addi SP, SP, STACKSIZE | addi SP, SP, STACKSIZE | ||||
| @@ -26,14 +26,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| /************************************************************************************** | /************************************************************************************** | ||||
| * 2016/03/14 Werner Saar (wernsaar@googlemail.com) | |||||
| * 2016/03/18 Werner Saar (wernsaar@googlemail.com) | |||||
| * BLASTEST : OK | * BLASTEST : OK | ||||
| * CTEST : OK | * CTEST : OK | ||||
| * TEST : OK | * TEST : OK | ||||
| * LAPACK-TEST : OK | |||||
| **************************************************************************************/ | **************************************************************************************/ | ||||
| srawi. J, N, 3 | srawi. J, N, 3 | ||||
| ble .LSTRMM_L8_END | ble .LSTRMM_L8_END | ||||
| @@ -1977,12 +1977,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define ZGEMM_DEFAULT_UNROLL_M 8 | #define ZGEMM_DEFAULT_UNROLL_M 8 | ||||
| #define ZGEMM_DEFAULT_UNROLL_N 2 | #define ZGEMM_DEFAULT_UNROLL_N 2 | ||||
| #define SGEMM_DEFAULT_P 960 | |||||
| #define SGEMM_DEFAULT_P 480 | |||||
| #define DGEMM_DEFAULT_P 480 | #define DGEMM_DEFAULT_P 480 | ||||
| #define CGEMM_DEFAULT_P 480 | #define CGEMM_DEFAULT_P 480 | ||||
| #define ZGEMM_DEFAULT_P 240 | #define ZGEMM_DEFAULT_P 240 | ||||
| #define SGEMM_DEFAULT_Q 720 | |||||
| #define SGEMM_DEFAULT_Q 1440 | |||||
| #define DGEMM_DEFAULT_Q 720 | #define DGEMM_DEFAULT_Q 720 | ||||
| #define CGEMM_DEFAULT_Q 720 | #define CGEMM_DEFAULT_Q 720 | ||||
| #define ZGEMM_DEFAULT_Q 360 | #define ZGEMM_DEFAULT_Q 360 | ||||