Browse Source

Ref #65. Fixed 64-bit Windows calling convention bug in cdot and zdot.

According to 64-bit Windows calling convention, the return value is in %rax instead of %xmm0 in cdot kernel.
In zdot, the caller allocates a memory space for return value and sets this memory address to the first hidden parameter. Thus, the callee (zdot) should assign the result to this memory space and return the memory address in %rax.
tags/v0.1alpha2.5
traits 14 years ago
parent
commit
c852ce3981
2 changed files with 19 additions and 3 deletions
  1. +4
    -0
      kernel/x86_64/zdot_sse.S
  2. +15
    -3
      kernel/x86_64/zdot_sse2.S

+ 4
- 0
kernel/x86_64/zdot_sse.S View File

@@ -3483,6 +3483,10 @@
subss %xmm3, %xmm1
#endif
unpcklps %xmm1, %xmm0
#ifdef WINDOWS_ABI
movq %xmm0, %rax
#endif

RESTOREREGISTERS



+ 15
- 3
kernel/x86_64/zdot_sse2.S View File

@@ -39,14 +39,19 @@
#define ASSEMBLER
#include "common.h"

#ifndef WINDOWS_ABI
#define N ARG1 /* rdi */
#define X ARG2 /* rsi */
#define INCX ARG3 /* rdx */
#define Y ARG4 /* rcx */
#ifndef WINDOWS_ABI
#define INCY ARG5 /* r8 */
#else
#define INCY %r10
#define RESULT_ADDRESS ARG1 /*rcx*/
#define N ARG2 /* rdx */
#define X ARG3 /* r8 */
#define INCX ARG4 /* r9*/
#define Y %r10
#define INCY %r11
#endif

#include "l1param.h"
@@ -64,7 +69,8 @@
PROFCODE

#ifdef WINDOWS_ABI
movq 40(%rsp), INCY
movq 40(%rsp), Y
movq 48(%rsp), INCY
#endif

SAVEREGISTERS
@@ -1544,6 +1550,12 @@
subsd %xmm3, %xmm1
#endif

#ifdef WINDOWS_ABI
movq RESULT_ADDRESS, %rax
movsd %xmm0, (%rax)
movsd %xmm1, 8(%rax)
#endif

RESTOREREGISTERS
ret



Loading…
Cancel
Save