You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

zasum_ppc440.S 6.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #define ASSEMBLER
  39. #include "common.h"
  40. #define N r3
  41. #define X r4
  42. #define INCX r5
  43. #define INCXM1 r9
  44. #define PREX r8
  45. #define FZERO f0
  46. #define STACKSIZE 160
  47. PROLOGUE
  48. PROFCODE
  49. addi SP, SP, -STACKSIZE
  50. li r0, 0
  51. stfd f14, 0(SP)
  52. stfd f15, 8(SP)
  53. stfd f16, 16(SP)
  54. stfd f17, 24(SP)
  55. stfd f18, 32(SP)
  56. stfd f19, 40(SP)
  57. stfd f20, 48(SP)
  58. stfd f21, 56(SP)
  59. stfd f22, 64(SP)
  60. stfd f23, 72(SP)
  61. stfd f24, 80(SP)
  62. stfd f25, 88(SP)
  63. stfd f26, 96(SP)
  64. stfd f27, 104(SP)
  65. stfd f28, 112(SP)
  66. stfd f29, 120(SP)
  67. stfd f30, 128(SP)
  68. stfd f31, 136(SP)
  69. stw r0, 144(SP)
  70. lfs FZERO,144(SP)
  71. #ifdef F_INTERFACE
  72. LDINT N, 0(N)
  73. LDINT INCX, 0(INCX)
  74. #endif
  75. fmr f1, FZERO
  76. slwi INCX, INCX, ZBASE_SHIFT
  77. fmr f2, FZERO
  78. fmr f3, FZERO
  79. subi INCXM1, INCX, SIZE
  80. fmr f4, FZERO
  81. sub X, X, INCXM1
  82. fmr f5, FZERO
  83. li PREX, 3 * 16 * SIZE
  84. fmr f6, FZERO
  85. cmpwi cr0, N, 0
  86. fmr f7, FZERO
  87. ble- LL(999)
  88. cmpwi cr0, INCX, 0
  89. ble- LL(999)
  90. srawi. r0, N, 3
  91. mtspr CTR, r0
  92. beq- LL(150)
  93. LFDX f8, X, INCXM1
  94. LFDUX f9, X, INCX
  95. LFDX f10, X, INCXM1
  96. LFDUX f11, X, INCX
  97. LFDX f12, X, INCXM1
  98. LFDUX f13, X, INCX
  99. LFDX f14, X, INCXM1
  100. LFDUX f15, X, INCX
  101. fabs f16, f8
  102. LFDX f24, X, INCXM1
  103. fabs f17, f9
  104. LFDUX f25, X, INCX
  105. fabs f18, f10
  106. LFDX f26, X, INCXM1
  107. fabs f19, f11
  108. LFDUX f27, X, INCX
  109. fabs f20, f12
  110. LFDX f28, X, INCXM1
  111. fabs f21, f13
  112. LFDUX f29, X, INCX
  113. fabs f22, f14
  114. LFDX f30, X, INCXM1
  115. fabs f23, f15
  116. LFDUX f31, X, INCX
  117. bdz LL(120)
  118. .align 4
  119. LL(110):
  120. LFDX f8, X, INCXM1
  121. FADD f0, f0, f16
  122. #ifdef PPCG4
  123. dcbt X, PREX
  124. #else
  125. nop
  126. #endif
  127. fabs f16, f24
  128. LFDUX f9, X, INCX
  129. FADD f1, f1, f17
  130. nop
  131. fabs f17, f25
  132. LFDX f10, X, INCXM1
  133. FADD f2, f2, f18
  134. nop
  135. fabs f18, f26
  136. LFDUX f11, X, INCX
  137. FADD f3, f3, f19
  138. nop
  139. fabs f19, f27
  140. LFDX f12, X, INCXM1
  141. FADD f4, f4, f20
  142. #if defined(PPCG4) && defined(DOUBLE)
  143. dcbt X, PREX
  144. #else
  145. nop
  146. #endif
  147. fabs f20, f28
  148. LFDUX f13, X, INCX
  149. FADD f5, f5, f21
  150. nop
  151. fabs f21, f29
  152. LFDX f14, X, INCXM1
  153. FADD f6, f6, f22
  154. nop
  155. fabs f22, f30
  156. LFDUX f15, X, INCX
  157. FADD f7, f7, f23
  158. nop
  159. fabs f23, f31
  160. LFDX f24, X, INCXM1
  161. FADD f0, f0, f16
  162. #ifdef PPCG4
  163. dcbt X, PREX
  164. #else
  165. nop
  166. #endif
  167. fabs f16, f8
  168. LFDUX f25, X, INCX
  169. FADD f1, f1, f17
  170. nop
  171. fabs f17, f9
  172. LFDX f26, X, INCXM1
  173. FADD f2, f2, f18
  174. nop
  175. fabs f18, f10
  176. LFDUX f27, X, INCX
  177. FADD f3, f3, f19
  178. nop
  179. fabs f19, f11
  180. LFDX f28, X, INCXM1
  181. FADD f4, f4, f20
  182. #if defined(PPCG4) && defined(DOUBLE)
  183. dcbt X, PREX
  184. #else
  185. nop
  186. #endif
  187. fabs f20, f12
  188. LFDUX f29, X, INCX
  189. FADD f5, f5, f21
  190. nop
  191. fabs f21, f13
  192. LFDX f30, X, INCXM1
  193. FADD f6, f6, f22
  194. nop
  195. fabs f22, f14
  196. LFDUX f31, X, INCX
  197. FADD f7, f7, f23
  198. fabs f23, f15
  199. bdnz LL(110)
  200. .align 4
  201. LL(120):
  202. FADD f0, f0, f16
  203. fabs f16, f24
  204. FADD f1, f1, f17
  205. fabs f17, f25
  206. FADD f2, f2, f18
  207. fabs f18, f26
  208. FADD f3, f3, f19
  209. fabs f19, f27
  210. FADD f4, f4, f20
  211. fabs f20, f28
  212. FADD f5, f5, f21
  213. fabs f21, f29
  214. FADD f6, f6, f22
  215. fabs f22, f30
  216. FADD f7, f7, f23
  217. fabs f23, f31
  218. FADD f0, f0, f16
  219. FADD f1, f1, f17
  220. FADD f2, f2, f18
  221. FADD f3, f3, f19
  222. FADD f4, f4, f20
  223. FADD f5, f5, f21
  224. FADD f6, f6, f22
  225. FADD f7, f7, f23
  226. .align 4
  227. LL(150):
  228. andi. r0, N, 7
  229. mtspr CTR, r0
  230. beq LL(999)
  231. .align 4
  232. LL(160):
  233. LFDX f8, X, INCXM1
  234. LFDUX f9, X, INCX
  235. fabs f8, f8
  236. fabs f9, f9
  237. FADD f0, f0, f8
  238. FADD f1, f1, f9
  239. bdnz LL(160)
  240. .align 4
  241. LL(999):
  242. FADD f0, f0, f1
  243. FADD f2, f2, f3
  244. FADD f4, f4, f5
  245. FADD f6, f6, f7
  246. FADD f0, f0, f2
  247. FADD f4, f4, f6
  248. FADD f1, f0, f4
  249. lfd f14, 0(SP)
  250. lfd f15, 8(SP)
  251. lfd f16, 16(SP)
  252. lfd f17, 24(SP)
  253. lfd f18, 32(SP)
  254. lfd f19, 40(SP)
  255. lfd f20, 48(SP)
  256. lfd f21, 56(SP)
  257. lfd f22, 64(SP)
  258. lfd f23, 72(SP)
  259. lfd f24, 80(SP)
  260. lfd f25, 88(SP)
  261. lfd f26, 96(SP)
  262. lfd f27, 104(SP)
  263. lfd f28, 112(SP)
  264. lfd f29, 120(SP)
  265. lfd f30, 128(SP)
  266. lfd f31, 136(SP)
  267. addi SP, SP, STACKSIZE
  268. blr
  269. EPILOGUE