Browse Source

dtrsm_kernel_LT_8x2_bulldozer.S performance optimization

tags/v0.2.9.rc1
wernsaar 12 years ago
parent
commit
44d23881b5
1 changed files with 8 additions and 0 deletions
  1. +8
    -0
      kernel/x86_64/dtrsm_kernel_LT_8x2_bulldozer.S

+ 8
- 0
kernel/x86_64/dtrsm_kernel_LT_8x2_bulldozer.S View File

@@ -84,6 +84,9 @@

#endif

#define A_PR1 384
#define B_PR1 192


.macro KERNEL8x2_SUB
vmovddup -16*SIZE(BO,%rax,2), %xmm1
@@ -708,9 +711,14 @@
ALIGN_4

.L52:
prefetcht0 A_PR1(AO,%rax,8)
prefetcht0 B_PR1(BO,%rax,2)
KERNEL8x2_SUB
prefetcht0 A_PR1(AO,%rax,8)
KERNEL8x2_SUB
prefetcht0 A_PR1(AO,%rax,8)
KERNEL8x2_SUB
prefetcht0 A_PR1(AO,%rax,8)
KERNEL8x2_SUB

jl .L52


Loading…
Cancel
Save