| @@ -798,7 +798,7 @@ Lmcount$lazy_ptr: | |||
| #elif defined(PPC440FP2) | |||
| #define BUFFER_SIZE ( 16 << 20) | |||
| #elif defined(POWER8) | |||
| #define BUFFER_SIZE ( 64 << 20) | |||
| #define BUFFER_SIZE ( 32 << 20) | |||
| #else | |||
| #define BUFFER_SIZE ( 16 << 20) | |||
| #endif | |||
| @@ -82,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #endif | |||
| #ifdef __64BIT__ | |||
| #define STACKSIZE 512 | |||
| #define STACKSIZE 32000 | |||
| #define ALPHA_R_SP 296(SP) | |||
| #define ALPHA_I_SP 304(SP) | |||
| #define FZERO 312(SP) | |||
| @@ -136,6 +136,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define alpha_sr vs30 | |||
| #define alpha_si vs31 | |||
| #define FRAMEPOINTER r12 | |||
| #define BBUFFER r14 | |||
| #define L r15 | |||
| @@ -161,6 +162,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| PROLOGUE | |||
| PROFCODE | |||
| mr FRAMEPOINTER, SP | |||
| addi SP, SP, -STACKSIZE | |||
| addi SP, SP, -STACKSIZE | |||
| addi SP, SP, -STACKSIZE | |||
| addi SP, SP, -STACKSIZE | |||
| li r0, 0 | |||
| @@ -233,37 +238,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #ifdef linux | |||
| #ifdef __64BIT__ | |||
| ld LDC, FRAMESLOT(0) + STACKSIZE(SP) | |||
| ld LDC, FRAMESLOT(0) + 0(FRAMEPOINTER) | |||
| #endif | |||
| #endif | |||
| #if defined(_AIX) || defined(__APPLE__) | |||
| #ifdef __64BIT__ | |||
| ld LDC, FRAMESLOT(0) + STACKSIZE(SP) | |||
| ld LDC, FRAMESLOT(0) + 0(FRAMEPOINTER) | |||
| #else | |||
| #ifdef DOUBLE | |||
| lwz B, FRAMESLOT(0) + STACKSIZE(SP) | |||
| lwz C, FRAMESLOT(1) + STACKSIZE(SP) | |||
| lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) | |||
| lwz B, FRAMESLOT(0) + 0(FRAMEPOINTER) | |||
| lwz C, FRAMESLOT(1) + 0(FRAMEPOINTER) | |||
| lwz LDC, FRAMESLOT(2) + 0(FRAMEPOINTER) | |||
| #else | |||
| lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) | |||
| lwz LDC, FRAMESLOT(0) + 0(FRAMEPOINTER) | |||
| #endif | |||
| #endif | |||
| #endif | |||
| #ifdef TRMMKERNEL | |||
| #if defined(linux) && defined(__64BIT__) | |||
| ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) | |||
| ld OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER) | |||
| #endif | |||
| #if defined(_AIX) || defined(__APPLE__) | |||
| #ifdef __64BIT__ | |||
| ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) | |||
| ld OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER) | |||
| #else | |||
| #ifdef DOUBLE | |||
| lwz OFFSET, FRAMESLOT(3) + STACKSIZE(SP) | |||
| lwz OFFSET, FRAMESLOT(3) + 0(FRAMEPOINTER) | |||
| #else | |||
| lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) | |||
| lwz OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER) | |||
| #endif | |||
| #endif | |||
| #endif | |||
| @@ -290,9 +295,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| li o32 , 32 | |||
| li o48 , 48 | |||
| li T1, 512 | |||
| slwi T1, T1, 16 | |||
| add BBUFFER, A, T1 | |||
| addi BBUFFER, SP, 512+4096 | |||
| li T1, -4096 | |||
| and BBUFFER, BBUFFER, T1 | |||
| #ifdef __64BIT__ | |||
| @@ -392,6 +397,9 @@ L999: | |||
| #endif | |||
| addi SP, SP, STACKSIZE | |||
| addi SP, SP, STACKSIZE | |||
| addi SP, SP, STACKSIZE | |||
| addi SP, SP, STACKSIZE | |||
| blr | |||
| @@ -82,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #endif | |||
| #ifdef __64BIT__ | |||
| #define STACKSIZE 512 | |||
| #define STACKSIZE 32752 | |||
| #define ALPHA_SP 296(SP) | |||
| #define FZERO 304(SP) | |||
| #else | |||
| @@ -132,6 +132,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define o0 0 | |||
| #define FRAMEPOINTER r12 | |||
| #define BBUFFER r14 | |||
| #define o4 r15 | |||
| #define o12 r16 | |||
| @@ -160,6 +162,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| PROLOGUE | |||
| PROFCODE | |||
| mr FRAMEPOINTER, SP | |||
| addi SP, SP, -STACKSIZE | |||
| addi SP, SP, -STACKSIZE | |||
| addi SP, SP, -STACKSIZE | |||
| addi SP, SP, -STACKSIZE | |||
| li r0, 0 | |||
| @@ -231,7 +237,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #if defined(_AIX) || defined(__APPLE__) | |||
| #if !defined(__64BIT__) && defined(DOUBLE) | |||
| lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) | |||
| lwz LDC, FRAMESLOT(0) + 0(FRAMEPOINTER) | |||
| #endif | |||
| #endif | |||
| @@ -239,17 +245,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #if defined(TRMMKERNEL) | |||
| #if defined(linux) && defined(__64BIT__) | |||
| ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) | |||
| ld OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER) | |||
| #endif | |||
| #if defined(_AIX) || defined(__APPLE__) | |||
| #ifdef __64BIT__ | |||
| ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) | |||
| ld OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER) | |||
| #else | |||
| #ifdef DOUBLE | |||
| lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) | |||
| lwz OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER) | |||
| #else | |||
| lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP) | |||
| lwz OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER) | |||
| #endif | |||
| #endif | |||
| #endif | |||
| @@ -271,9 +277,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| li o32, 32 | |||
| li o48, 48 | |||
| li T1, 512 | |||
| slwi T1, T1, 16 | |||
| add BBUFFER, A, T1 | |||
| addi BBUFFER, SP, 512+4096 | |||
| li T1, -4096 | |||
| and BBUFFER, BBUFFER, T1 | |||
| addi T1, SP, 300 | |||
| stxsspx f1, o0 , T1 | |||
| @@ -355,6 +361,9 @@ L999: | |||
| #endif | |||
| addi SP, SP, STACKSIZE | |||
| addi SP, SP, STACKSIZE | |||
| addi SP, SP, STACKSIZE | |||
| addi SP, SP, STACKSIZE | |||
| blr | |||
| @@ -1964,8 +1964,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define SNUMOPT 16 | |||
| #define DNUMOPT 8 | |||
| #define GEMM_DEFAULT_OFFSET_A 131072 | |||
| #define GEMM_DEFAULT_OFFSET_B 131072 | |||
| #define GEMM_DEFAULT_OFFSET_A 4096 | |||
| #define GEMM_DEFAULT_OFFSET_B 4096 | |||
| #define GEMM_DEFAULT_ALIGN 0x03fffUL | |||
| #define SGEMM_DEFAULT_UNROLL_M 16 | |||
| @@ -1987,9 +1987,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define CGEMM_DEFAULT_Q 720 | |||
| #define ZGEMM_DEFAULT_Q 720 | |||
| #define SGEMM_DEFAULT_R 14400 | |||
| #define SGEMM_DEFAULT_R 21600 | |||
| #define DGEMM_DEFAULT_R 14400 | |||
| #define CGEMM_DEFAULT_R 14400 | |||
| #define CGEMM_DEFAULT_R 16200 | |||
| #define ZGEMM_DEFAULT_R 14400 | |||
| #define SYMV_P 8 | |||