This website works better with JavaScript.
Home
Issues
Pull Requests
Milestones
AI流水线
Repositories
Datasets
Forum
实训
竞赛
大数据
AI开发
Register
Sign In
OSchip
/
OpenBLAS
Not watched
Unwatch
Watch all
Watch but not notify
1
Star
0
Fork
0
Code
Releases
66
Wiki
evaluate
Activity
Issues
0
Pull Requests
0
Datasets
Model
Cloudbrain
HPC
Browse Source
updated cdot and zdot on arm
tags/v0.2.16.rc1
Werner Saar
10 years ago
parent
d2f84c9c8a
commit
aafd3ab60e
2 changed files
with
13 additions
and
9 deletions
Split View
Diff Options
Show Stats
Download Patch File
Download Diff File
+6
-5
kernel/arm/cdot_vfp.S
+7
-4
kernel/arm/zdot_vfp.S
+ 6
- 5
kernel/arm/cdot_vfp.S
View File
@@ -185,14 +185,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sub r4, fp, #128
vstm r4, { s8 - s15} // store floating point registers
movs r4, #0 // clear floating point register
vmov s0, r4
vmov s1, s0
vmov s2, s0
vmov s3, s0
mov Y, OLD_Y
ldr INC_Y, OLD_INC_Y
vsub.f32 s0 , s0 , s0
vsub.f32 s1 , s1 , s1
vsub.f32 s2 , s2 , s2
vsub.f32 s3 , s3 , s3
cmp N, #0
ble cdot_kernel_L999
+ 7
- 4
kernel/arm/zdot_vfp.S
View File
@@ -187,13 +187,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sub r4, fp, #128
vstm r4, { d8 - d15} // store floating point registers
movs r4, #0 // clear floating point register
vmov s0, r4
vcvt.f64.f32 d0, s0
vcvt.f64.f32 d1, s0
vcvt.f64.f32 d2, s0
vcvt.f64.f32 d3, s0
mov Y, OLD_Y
ldr INC_Y, OLD_INC_Y
vsub.f64 d0 , d0 , d0
vsub.f64 d1 , d1 , d1
vsub.f64 d2 , d2 , d2
vsub.f64 d3 , d3 , d3
cmp N, #0
ble zdot_kernel_L999
Write
Preview
Loading…
Cancel
Save