This website works better with JavaScript.
Home
Issues
Pull Requests
Milestones
AI流水线
Repositories
Datasets
Forum
实训
竞赛
大数据
AI开发
Register
Sign In
OSchip
/
OpenBLAS
Not watched
Unwatch
Watch all
Watch but not notify
1
Star
0
Fork
0
Code
Releases
66
Wiki
evaluate
Activity
Issues
0
Pull Requests
0
Datasets
Model
Cloudbrain
HPC
Browse Source
optimized dtrsm_kernel_LT for POWER8
tags/v0.2.19^2
Werner Saar
10 years ago
parent
318cad9c37
commit
8b140220c8
2 changed files
with
45 additions
and
2 deletions
Split View
Diff Options
Show Stats
Download Patch File
Download Diff File
+1
-0
kernel/power/dtrsm_kernel_LT_16x4_power8.S
+44
-2
kernel/power/dtrsm_logic_LT_16x4_power8.S
+ 1
- 0
kernel/power/dtrsm_kernel_LT_16x4_power8.S
View File
@@ -219,6 +219,7 @@
li o24, 24
li o32, 32
li o48, 48
li PRE, 384
mr KK, OFFSET
+ 44
- 2
kernel/power/dtrsm_logic_LT_16x4_power8.S
View File
@@ -18,6 +18,33 @@ DSTRM_LT_L4x16_BEGIN:
mr BO, B
li L, -128
mr T1, CO
add T2, T1, LDC
add T3, T2, LDC
add T4, T3, LDC
and T1, T1, L
and T2, T2, L
and T3, T3, L
and T4, T4, L
dcbt T1, r0
dcbt T2, r0
dcbt T3, r0
dcbt T4, r0
addi T1, T1, 128
addi T2, T2, 128
addi T3, T3, 128
addi T4, T4, 128
dcbt T1, r0
dcbt T2, r0
dcbt T3, r0
dcbt T4, r0
DSTRM_LT_L4x16_LOOP_START:
@@ -26,15 +53,30 @@ DSTRM_LT_L4x16_LOOP_START:
addic. L, KK, 0
ble DSTRM_LT_L4x16_SAVE
ble
-
DSTRM_LT_L4x16_SAVE
DSTRM_LT_L4x16_LOOP:
dcbt AO, PRE
dcbt BO, PRE
KERNEL_16x4
addic. L, L, -1
ble- DSTRM_LT_L4x16_SAVE
dcbt AO, PRE
KERNEL_16x4
addic. L, L, -1
ble- DSTRM_LT_L4x16_SAVE
dcbt AO, PRE
KERNEL_16x4
addic. L, L, -1
ble- DSTRM_LT_L4x16_SAVE
dcbt AO, PRE
KERNEL_16x4
addic. L, L, -1
bgt DSTRM_LT_L4x16_LOOP
bgt
+
DSTRM_LT_L4x16_LOOP
DSTRM_LT_L4x16_SAVE:
Write
Preview
Loading…
Cancel
Save