| @@ -798,7 +798,7 @@ Lmcount$lazy_ptr: | |||||
| #elif defined(PPC440FP2) | #elif defined(PPC440FP2) | ||||
| #define BUFFER_SIZE ( 16 << 20) | #define BUFFER_SIZE ( 16 << 20) | ||||
| #elif defined(POWER8) | #elif defined(POWER8) | ||||
| #define BUFFER_SIZE ( 64 << 20) | |||||
| #define BUFFER_SIZE ( 32 << 20) | |||||
| #else | #else | ||||
| #define BUFFER_SIZE ( 16 << 20) | #define BUFFER_SIZE ( 16 << 20) | ||||
| #endif | #endif | ||||
| @@ -82,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #ifdef __64BIT__ | #ifdef __64BIT__ | ||||
| #define STACKSIZE 512 | |||||
| #define STACKSIZE 32000 | |||||
| #define ALPHA_R_SP 296(SP) | #define ALPHA_R_SP 296(SP) | ||||
| #define ALPHA_I_SP 304(SP) | #define ALPHA_I_SP 304(SP) | ||||
| #define FZERO 312(SP) | #define FZERO 312(SP) | ||||
| @@ -136,6 +136,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define alpha_sr vs30 | #define alpha_sr vs30 | ||||
| #define alpha_si vs31 | #define alpha_si vs31 | ||||
| #define FRAMEPOINTER r12 | |||||
| #define BBUFFER r14 | #define BBUFFER r14 | ||||
| #define L r15 | #define L r15 | ||||
| @@ -161,6 +162,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| PROLOGUE | PROLOGUE | ||||
| PROFCODE | PROFCODE | ||||
| mr FRAMEPOINTER, SP | |||||
| addi SP, SP, -STACKSIZE | |||||
| addi SP, SP, -STACKSIZE | |||||
| addi SP, SP, -STACKSIZE | |||||
| addi SP, SP, -STACKSIZE | addi SP, SP, -STACKSIZE | ||||
| li r0, 0 | li r0, 0 | ||||
| @@ -233,37 +238,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #ifdef linux | #ifdef linux | ||||
| #ifdef __64BIT__ | #ifdef __64BIT__ | ||||
| ld LDC, FRAMESLOT(0) + STACKSIZE(SP) | |||||
| ld LDC, FRAMESLOT(0) + 0(FRAMEPOINTER) | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #if defined(_AIX) || defined(__APPLE__) | #if defined(_AIX) || defined(__APPLE__) | ||||
| #ifdef __64BIT__ | #ifdef __64BIT__ | ||||
| ld LDC, FRAMESLOT(0) + STACKSIZE(SP) | |||||
| ld LDC, FRAMESLOT(0) + 0(FRAMEPOINTER) | |||||
| #else | #else | ||||
| #ifdef DOUBLE | #ifdef DOUBLE | ||||
| lwz B, FRAMESLOT(0) + STACKSIZE(SP) | |||||
| lwz C, FRAMESLOT(1) + STACKSIZE(SP) | |||||
| lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) | |||||
| lwz B, FRAMESLOT(0) + 0(FRAMEPOINTER) | |||||
| lwz C, FRAMESLOT(1) + 0(FRAMEPOINTER) | |||||
| lwz LDC, FRAMESLOT(2) + 0(FRAMEPOINTER) | |||||
| #else | #else | ||||
| lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) | |||||
| lwz LDC, FRAMESLOT(0) + 0(FRAMEPOINTER) | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #ifdef TRMMKERNEL | #ifdef TRMMKERNEL | ||||
| #if defined(linux) && defined(__64BIT__) | #if defined(linux) && defined(__64BIT__) | ||||
| ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) | |||||
| ld OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER) | |||||
| #endif | #endif | ||||
| #if defined(_AIX) || defined(__APPLE__) | #if defined(_AIX) || defined(__APPLE__) | ||||
| #ifdef __64BIT__ | #ifdef __64BIT__ | ||||
| ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) | |||||
| ld OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER) | |||||
| #else | #else | ||||
| #ifdef DOUBLE | #ifdef DOUBLE | ||||
| lwz OFFSET, FRAMESLOT(3) + STACKSIZE(SP) | |||||
| lwz OFFSET, FRAMESLOT(3) + 0(FRAMEPOINTER) | |||||
| #else | #else | ||||
| lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) | |||||
| lwz OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER) | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| @@ -290,9 +295,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| li o32 , 32 | li o32 , 32 | ||||
| li o48 , 48 | li o48 , 48 | ||||
| li T1, 512 | |||||
| slwi T1, T1, 16 | |||||
| add BBUFFER, A, T1 | |||||
| addi BBUFFER, SP, 512+4096 | |||||
| li T1, -4096 | |||||
| and BBUFFER, BBUFFER, T1 | |||||
| #ifdef __64BIT__ | #ifdef __64BIT__ | ||||
| @@ -392,6 +397,9 @@ L999: | |||||
| #endif | #endif | ||||
| addi SP, SP, STACKSIZE | addi SP, SP, STACKSIZE | ||||
| addi SP, SP, STACKSIZE | |||||
| addi SP, SP, STACKSIZE | |||||
| addi SP, SP, STACKSIZE | |||||
| blr | blr | ||||
| @@ -82,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #ifdef __64BIT__ | #ifdef __64BIT__ | ||||
| #define STACKSIZE 512 | |||||
| #define STACKSIZE 32752 | |||||
| #define ALPHA_SP 296(SP) | #define ALPHA_SP 296(SP) | ||||
| #define FZERO 304(SP) | #define FZERO 304(SP) | ||||
| #else | #else | ||||
| @@ -132,6 +132,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define o0 0 | #define o0 0 | ||||
| #define FRAMEPOINTER r12 | |||||
| #define BBUFFER r14 | #define BBUFFER r14 | ||||
| #define o4 r15 | #define o4 r15 | ||||
| #define o12 r16 | #define o12 r16 | ||||
| @@ -160,6 +162,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| PROLOGUE | PROLOGUE | ||||
| PROFCODE | PROFCODE | ||||
| mr FRAMEPOINTER, SP | |||||
| addi SP, SP, -STACKSIZE | |||||
| addi SP, SP, -STACKSIZE | |||||
| addi SP, SP, -STACKSIZE | |||||
| addi SP, SP, -STACKSIZE | addi SP, SP, -STACKSIZE | ||||
| li r0, 0 | li r0, 0 | ||||
| @@ -231,7 +237,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #if defined(_AIX) || defined(__APPLE__) | #if defined(_AIX) || defined(__APPLE__) | ||||
| #if !defined(__64BIT__) && defined(DOUBLE) | #if !defined(__64BIT__) && defined(DOUBLE) | ||||
| lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) | |||||
| lwz LDC, FRAMESLOT(0) + 0(FRAMEPOINTER) | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| @@ -239,17 +245,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #if defined(TRMMKERNEL) | #if defined(TRMMKERNEL) | ||||
| #if defined(linux) && defined(__64BIT__) | #if defined(linux) && defined(__64BIT__) | ||||
| ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) | |||||
| ld OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER) | |||||
| #endif | #endif | ||||
| #if defined(_AIX) || defined(__APPLE__) | #if defined(_AIX) || defined(__APPLE__) | ||||
| #ifdef __64BIT__ | #ifdef __64BIT__ | ||||
| ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) | |||||
| ld OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER) | |||||
| #else | #else | ||||
| #ifdef DOUBLE | #ifdef DOUBLE | ||||
| lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) | |||||
| lwz OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER) | |||||
| #else | #else | ||||
| lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP) | |||||
| lwz OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER) | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| @@ -271,9 +277,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| li o32, 32 | li o32, 32 | ||||
| li o48, 48 | li o48, 48 | ||||
| li T1, 512 | |||||
| slwi T1, T1, 16 | |||||
| add BBUFFER, A, T1 | |||||
| addi BBUFFER, SP, 512+4096 | |||||
| li T1, -4096 | |||||
| and BBUFFER, BBUFFER, T1 | |||||
| addi T1, SP, 300 | addi T1, SP, 300 | ||||
| stxsspx f1, o0 , T1 | stxsspx f1, o0 , T1 | ||||
| @@ -355,6 +361,9 @@ L999: | |||||
| #endif | #endif | ||||
| addi SP, SP, STACKSIZE | addi SP, SP, STACKSIZE | ||||
| addi SP, SP, STACKSIZE | |||||
| addi SP, SP, STACKSIZE | |||||
| addi SP, SP, STACKSIZE | |||||
| blr | blr | ||||
| @@ -1964,8 +1964,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define SNUMOPT 16 | #define SNUMOPT 16 | ||||
| #define DNUMOPT 8 | #define DNUMOPT 8 | ||||
| #define GEMM_DEFAULT_OFFSET_A 131072 | |||||
| #define GEMM_DEFAULT_OFFSET_B 131072 | |||||
| #define GEMM_DEFAULT_OFFSET_A 4096 | |||||
| #define GEMM_DEFAULT_OFFSET_B 4096 | |||||
| #define GEMM_DEFAULT_ALIGN 0x03fffUL | #define GEMM_DEFAULT_ALIGN 0x03fffUL | ||||
| #define SGEMM_DEFAULT_UNROLL_M 16 | #define SGEMM_DEFAULT_UNROLL_M 16 | ||||
| @@ -1987,9 +1987,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define CGEMM_DEFAULT_Q 720 | #define CGEMM_DEFAULT_Q 720 | ||||
| #define ZGEMM_DEFAULT_Q 720 | #define ZGEMM_DEFAULT_Q 720 | ||||
| #define SGEMM_DEFAULT_R 14400 | |||||
| #define SGEMM_DEFAULT_R 21600 | |||||
| #define DGEMM_DEFAULT_R 14400 | #define DGEMM_DEFAULT_R 14400 | ||||
| #define CGEMM_DEFAULT_R 14400 | |||||
| #define CGEMM_DEFAULT_R 16200 | |||||
| #define ZGEMM_DEFAULT_R 14400 | #define ZGEMM_DEFAULT_R 14400 | ||||
| #define SYMV_P 8 | #define SYMV_P 8 | ||||