You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

ztrsm_kernel_2x2_RT.S 33 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #define ASSEMBLER
  39. #include "common.h"
  40. #if !defined(EV4) && !defined(EV5) && !defined(EV6)
  41. #error "Architecture is not specified."
  42. #endif
  43. #ifdef EV6
  44. #define PREFETCHSIZE 56
  45. #define UNOP unop
  46. #endif
  47. #ifdef EV5
  48. #define PREFETCHSIZE 48
  49. #define UNOP
  50. #endif
  51. #ifdef EV4
  52. #define UNOP
  53. #endif
  54. .set noat
  55. .set noreorder
  56. .arch ev6
  57. .text
  58. .align 5
  59. .globl CNAME
  60. .ent CNAME
  61. #define STACKSIZE 80
  62. #define M $16
  63. #define N $17
  64. #define K $18
  65. #define A $21
  66. #define B $22
  67. #define C $20
  68. #define LDC $23
  69. #define C1 $19
  70. #define C2 $24
  71. #define AO $at
  72. #define BO $5
  73. #define I $6
  74. #define J $7
  75. #define L $8
  76. #define a1 $f16
  77. #define a2 $f17
  78. #define a3 $f18
  79. #define a4 $f19
  80. #define b1 $f20
  81. #define b2 $f21
  82. #define b3 $f22
  83. #define b4 $f23
  84. #define t1 $f24
  85. #define t2 $f25
  86. #define t3 $f26
  87. #define t4 $f27
  88. #define a5 $f28
  89. #define a6 $f30
  90. #define b5 $f29
  91. #define alpha_i $f29
  92. #define alpha_r $f30
  93. #define c01 $f0
  94. #define c02 $f1
  95. #define c03 $f2
  96. #define c04 $f3
  97. #define c05 $f4
  98. #define c06 $f5
  99. #define c07 $f6
  100. #define c08 $f7
  101. #define c09 $f8
  102. #define c10 $f9
  103. #define c11 $f10
  104. #define c12 $f11
  105. #define c13 $f12
  106. #define c14 $f13
  107. #define c15 $f14
  108. #define c16 $f15
  109. #define TMP1 $0
  110. #define TMP2 $1
  111. #define KK $2
  112. #define AORIG $3
  113. #define OFFSET $4
  114. #if defined(LN) || defined(LT)
  115. #ifndef CONJ
  116. #define ADD1 ADD
  117. #define ADD2 SUB
  118. #define ADD3 ADD
  119. #define ADD4 ADD
  120. #define ADD5 SUB
  121. #define ADD6 ADD
  122. #else
  123. #define ADD1 ADD
  124. #define ADD2 ADD
  125. #define ADD3 SUB
  126. #define ADD4 ADD
  127. #define ADD5 ADD
  128. #define ADD6 SUB
  129. #endif
  130. #else
  131. #ifndef CONJ
  132. #define ADD1 ADD
  133. #define ADD2 SUB
  134. #define ADD3 ADD
  135. #define ADD4 ADD
  136. #define ADD5 SUB
  137. #define ADD6 ADD
  138. #else
  139. #define ADD1 ADD
  140. #define ADD2 ADD
  141. #define ADD3 ADD
  142. #define ADD4 SUB
  143. #define ADD5 ADD
  144. #define ADD6 SUB
  145. #endif
  146. #endif
  147. CNAME:
  148. .frame $sp, STACKSIZE, $26, 0
  149. #ifdef PROFILE
  150. ldgp $gp, 0($27)
  151. lda $at, _mcount
  152. jsr $at, ($at), _mcount
  153. #endif
  154. #ifndef PROFILE
  155. .prologue 0
  156. #else
  157. .prologue 1
  158. #endif
  159. lda $sp, -STACKSIZE($sp)
  160. ldq B, 0 + STACKSIZE($sp)
  161. ldq C, 8 + STACKSIZE($sp)
  162. ldq LDC, 16 + STACKSIZE($sp)
  163. ldq OFFSET, 24 + STACKSIZE($sp)
  164. sll LDC, ZBASE_SHIFT, LDC
  165. stt $f2, 0($sp)
  166. stt $f3, 8($sp)
  167. stt $f4, 16($sp)
  168. stt $f5, 24($sp)
  169. stt $f6, 32($sp)
  170. stt $f7, 40($sp)
  171. stt $f8, 48($sp)
  172. stt $f9, 56($sp)
  173. cmple M, 0, $0
  174. cmple N, 0, $1
  175. cmple K, 0, $2
  176. or $0, $1, $0
  177. or $0, $2, $0
  178. bne $0, $L999
  179. #ifdef LN
  180. addq M, M, TMP2
  181. mulq TMP2, K, TMP1
  182. SXADDQ TMP1, A, A
  183. SXADDQ TMP2, C, C
  184. #endif
  185. #ifdef RN
  186. negq OFFSET, KK
  187. #endif
  188. #ifdef RT
  189. mulq N, K, TMP1
  190. addq TMP1, TMP1, TMP1
  191. SXADDQ TMP1, B, B
  192. mulq N, LDC, TMP1
  193. addq TMP1, C, C
  194. subq N, OFFSET, KK
  195. #endif
  196. and N, 1, J
  197. ble J, $L30
  198. #ifdef RT
  199. sll K, ZBASE_SHIFT, TMP1
  200. subq B, TMP1, B
  201. subq C, LDC, C1
  202. subq C, LDC, C
  203. #else
  204. mov C, C1
  205. addq C, LDC, C
  206. #endif
  207. #ifdef LN
  208. addq M, OFFSET, KK
  209. #endif
  210. #ifdef LT
  211. mov OFFSET, KK
  212. #endif
  213. #if defined(LN) || defined(RT)
  214. mov A, AORIG
  215. #else
  216. mov A, AO
  217. #endif
  218. sra M, 1, I
  219. ble I, $L50
  220. .align 4
  221. $L41:
  222. #if defined(LT) || defined(RN)
  223. LD a1, 0 * SIZE(AO)
  224. fclr t1
  225. LD a2, 1 * SIZE(AO)
  226. fclr t2
  227. LD a3, 2 * SIZE(AO)
  228. fclr t3
  229. LD a4, 3 * SIZE(AO)
  230. fclr t4
  231. LD b1, 0 * SIZE(B)
  232. fclr c01
  233. LD b2, 1 * SIZE(B)
  234. fclr c05
  235. LD b3, 2 * SIZE(B)
  236. fclr c02
  237. LD b4, 3 * SIZE(B)
  238. fclr c06
  239. lda BO, 2 * SIZE(B)
  240. fclr c03
  241. lda AO, 4 * SIZE(AO)
  242. fclr c07
  243. lda L, -2(KK)
  244. fclr c04
  245. fclr c08
  246. ble KK, $L48
  247. ble L, $L45
  248. #else
  249. #ifdef LN
  250. sll K, ZBASE_SHIFT + 1, TMP1
  251. subq AORIG, TMP1, AORIG
  252. #endif
  253. sll KK, ZBASE_SHIFT + 1, TMP1
  254. addq AORIG, TMP1, AO
  255. sll KK, ZBASE_SHIFT, TMP1
  256. addq B, TMP1, BO
  257. subq K, KK, TMP1
  258. LD a1, 0 * SIZE(AO)
  259. fclr t1
  260. LD a2, 1 * SIZE(AO)
  261. fclr t2
  262. LD a3, 2 * SIZE(AO)
  263. fclr t3
  264. LD a4, 3 * SIZE(AO)
  265. fclr t4
  266. LD b1, 0 * SIZE(BO)
  267. fclr c01
  268. LD b2, 1 * SIZE(BO)
  269. fclr c05
  270. LD b3, 2 * SIZE(BO)
  271. fclr c02
  272. LD b4, 3 * SIZE(BO)
  273. fclr c06
  274. lda BO, 2 * SIZE(BO)
  275. fclr c03
  276. lda AO, 4 * SIZE(AO)
  277. fclr c07
  278. lda L, -2(TMP1)
  279. fclr c04
  280. fclr c08
  281. ble TMP1, $L48
  282. ble L, $L45
  283. #endif
  284. .align 5
  285. $L42:
  286. ADD4 c05, t1, c05
  287. unop
  288. MUL a1, b1, t1
  289. unop
  290. ADD2 c06, t2, c06
  291. lda L, -2(L)
  292. MUL a2, b1, t2
  293. unop
  294. ADD4 c07, t3, c07
  295. unop
  296. MUL a3, b1, t3
  297. unop
  298. ADD2 c08, t4, c08
  299. unop
  300. MUL a4, b1, t4
  301. LD b1, 2 * SIZE(BO)
  302. ADD1 c01, t1, c01
  303. unop
  304. MUL a1, b2, t1
  305. LD a1, 0 * SIZE(AO)
  306. ADD3 c02, t2, c02
  307. lda BO, 4 * SIZE(BO)
  308. MUL a2, b2, t2
  309. LD a2, 1 * SIZE(AO)
  310. ADD1 c03, t3, c03
  311. unop
  312. MUL a3, b2, t3
  313. LD a3, 2 * SIZE(AO)
  314. ADD3 c04, t4, c04
  315. unop
  316. MUL a4, b2, t4
  317. LD a5, 3 * SIZE(AO)
  318. ADD4 c05, t1, c05
  319. unop
  320. MUL a1, b3, t1
  321. LD b2, -1 * SIZE(BO)
  322. ADD2 c06, t2, c06
  323. unop
  324. MUL a2, b3, t2
  325. unop
  326. ADD4 c07, t3, c07
  327. unop
  328. MUL a3, b3, t3
  329. lda AO, 8 * SIZE(AO)
  330. ADD2 c08, t4, c08
  331. unop
  332. MUL a5, b3, t4
  333. LD b3, 0 * SIZE(BO)
  334. ADD1 c01, t1, c01
  335. unop
  336. MUL a1, b4, t1
  337. LD a1, -4 * SIZE(AO)
  338. ADD3 c02, t2, c02
  339. unop
  340. MUL a2, b4, t2
  341. LD a2, -3 * SIZE(AO)
  342. ADD1 c03, t3, c03
  343. LD a4, -1 * SIZE(AO)
  344. MUL a3, b4, t3
  345. LD a3, -2 * SIZE(AO)
  346. ADD3 c04, t4, c04
  347. MUL a5, b4, t4
  348. LD b4, 1 * SIZE(BO)
  349. bgt L, $L42
  350. .align 4
  351. $L45:
  352. ADD4 c05, t1, c05
  353. MUL b1, a1, t1
  354. #if defined(LT) || defined(RN)
  355. blbs KK, $L47
  356. #else
  357. blbs TMP1, $L47
  358. #endif
  359. .align 4
  360. ADD2 c06, t2, c06
  361. MUL a2, b1, t2
  362. ADD4 c07, t3, c07
  363. MUL a3, b1, t3
  364. ADD2 c08, t4, c08
  365. unop
  366. MUL a4, b1, t4
  367. LD b1, 0 * SIZE(BO)
  368. ADD1 c01, t1, c01
  369. unop
  370. MUL a1, b2, t1
  371. LD a1, 0 * SIZE(AO)
  372. ADD3 c02, t2, c02
  373. unop
  374. MUL a2, b2, t2
  375. LD a2, 1 * SIZE(AO)
  376. ADD1 c03, t3, c03
  377. unop
  378. MUL a3, b2, t3
  379. LD a3, 2 * SIZE(AO)
  380. ADD3 c04, t4, c04
  381. MUL a4, b2, t4
  382. LD a4, 3 * SIZE(AO)
  383. lda AO, 4 * SIZE(AO)
  384. ADD4 c05, t1, c05
  385. LD b2, 1 * SIZE(BO)
  386. MUL a1, b1, t1
  387. lda BO, 2 * SIZE(BO)
  388. .align 4
  389. $L47:
  390. ADD2 c06, t2, c06
  391. MUL a2, b1, t2
  392. ADD4 c07, t3, c07
  393. MUL a3, b1, t3
  394. ADD2 c08, t4, c08
  395. MUL a4, b1, t4
  396. ADD1 c01, t1, c01
  397. MUL a1, b2, t1
  398. ADD3 c02, t2, c02
  399. MUL a2, b2, t2
  400. ADD1 c03, t3, c03
  401. MUL a3, b2, t3
  402. ADD3 c04, t4, c04
  403. lda AO, 4 * SIZE(AO)
  404. MUL a4, b2, t4
  405. lda BO, 2 * SIZE(BO)
  406. ADD4 c05, t1, c05
  407. ADD2 c06, t2, c06
  408. ADD4 c07, t3, c07
  409. ADD2 c08, t4, c08
  410. ADD c01, c06, c01
  411. ADD c02, c05, c02
  412. ADD c03, c08, c03
  413. ADD c04, c07, c04
  414. $L48:
  415. #if defined(LN) || defined(RT)
  416. #ifdef LN
  417. subq KK, 2, TMP1
  418. #else
  419. subq KK, 1, TMP1
  420. #endif
  421. sll TMP1, ZBASE_SHIFT + 1, TMP2
  422. addq AORIG, TMP2, AO
  423. sll TMP1, ZBASE_SHIFT, TMP2
  424. addq B, TMP2, BO
  425. #else
  426. lda AO, -4 * SIZE(AO)
  427. lda BO, -2 * SIZE(BO)
  428. #endif
  429. #if defined(LN) || defined(LT)
  430. LD a1, 0 * SIZE(BO)
  431. LD a2, 1 * SIZE(BO)
  432. LD a3, 2 * SIZE(BO)
  433. LD a4, 3 * SIZE(BO)
  434. SUB a1, c01, c01
  435. SUB a2, c02, c02
  436. SUB a3, c03, c03
  437. SUB a4, c04, c04
  438. #else
  439. LD a1, 0 * SIZE(AO)
  440. LD a2, 1 * SIZE(AO)
  441. LD a3, 2 * SIZE(AO)
  442. LD a4, 3 * SIZE(AO)
  443. SUB a1, c01, c01
  444. SUB a2, c02, c02
  445. SUB a3, c03, c03
  446. SUB a4, c04, c04
  447. #endif
  448. #ifdef LN
  449. LD a1, 6 * SIZE(AO)
  450. LD a2, 7 * SIZE(AO)
  451. LD a3, 4 * SIZE(AO)
  452. LD a4, 5 * SIZE(AO)
  453. MUL a2, c04, t1
  454. MUL a2, c03, t2
  455. MUL a1, c03, c03
  456. MUL a1, c04, c04
  457. ADD5 c03, t1, c03
  458. ADD6 c04, t2, c04
  459. MUL a3, c03, t1
  460. MUL a3, c04, t2
  461. SUB c01, t1, c01
  462. SUB c02, t2, c02
  463. MUL a4, c04, t1
  464. MUL a4, c03, t2
  465. ADD6 c01, t1, c01
  466. ADD5 c02, t2, c02
  467. LD a1, 0 * SIZE(AO)
  468. LD a2, 1 * SIZE(AO)
  469. MUL a2, c02, t1
  470. MUL a2, c01, t2
  471. MUL a1, c01, c01
  472. MUL a1, c02, c02
  473. ADD5 c01, t1, c01
  474. ADD6 c02, t2, c02
  475. #endif
  476. #ifdef LT
  477. LD a1, 0 * SIZE(AO)
  478. LD a2, 1 * SIZE(AO)
  479. LD a3, 2 * SIZE(AO)
  480. LD a4, 3 * SIZE(AO)
  481. MUL a2, c02, t1
  482. MUL a2, c01, t2
  483. MUL a1, c01, c01
  484. MUL a1, c02, c02
  485. ADD5 c01, t1, c01
  486. ADD6 c02, t2, c02
  487. MUL a3, c01, t1
  488. MUL a3, c02, t2
  489. SUB c03, t1, c03
  490. SUB c04, t2, c04
  491. MUL a4, c02, t1
  492. MUL a4, c01, t2
  493. ADD6 c03, t1, c03
  494. ADD5 c04, t2, c04
  495. LD a1, 6 * SIZE(AO)
  496. LD a2, 7 * SIZE(AO)
  497. MUL a2, c04, t1
  498. MUL a2, c03, t2
  499. MUL a1, c03, c03
  500. MUL a1, c04, c04
  501. ADD5 c03, t1, c03
  502. ADD6 c04, t2, c04
  503. #endif
  504. #if defined(RN) || defined(RT)
  505. LD a1, 0 * SIZE(BO)
  506. LD a2, 1 * SIZE(BO)
  507. MUL a2, c02, t1
  508. MUL a2, c01, t2
  509. MUL a2, c04, t3
  510. MUL a2, c03, t4
  511. MUL a1, c01, c01
  512. MUL a1, c02, c02
  513. MUL a1, c03, c03
  514. MUL a1, c04, c04
  515. ADD5 c01, t1, c01
  516. ADD6 c02, t2, c02
  517. ADD5 c03, t3, c03
  518. ADD6 c04, t4, c04
  519. #endif
  520. #if defined(LN) || defined(LT)
  521. ST c01, 0 * SIZE(BO)
  522. ST c02, 1 * SIZE(BO)
  523. ST c03, 2 * SIZE(BO)
  524. ST c04, 3 * SIZE(BO)
  525. #else
  526. ST c01, 0 * SIZE(AO)
  527. ST c02, 1 * SIZE(AO)
  528. ST c03, 2 * SIZE(AO)
  529. ST c04, 3 * SIZE(AO)
  530. #endif
  531. #ifdef LN
  532. lda C1, -4 * SIZE(C1)
  533. #endif
  534. ST c01, 0 * SIZE(C1)
  535. ST c02, 1 * SIZE(C1)
  536. ST c03, 2 * SIZE(C1)
  537. ST c04, 3 * SIZE(C1)
  538. #ifndef LN
  539. lda C1, 4 * SIZE(C1)
  540. #endif
  541. #ifdef RT
  542. sll K, ZBASE_SHIFT + 1, TMP1
  543. addq AORIG, TMP1, AORIG
  544. #endif
  545. #if defined(LT) || defined(RN)
  546. subq K, KK, TMP1
  547. sll TMP1, ZBASE_SHIFT + 1, TMP2
  548. addq AO, TMP2, AO
  549. sll TMP1, ZBASE_SHIFT, TMP2
  550. addq BO, TMP2, BO
  551. #endif
  552. #ifdef LT
  553. addq KK, 2, KK
  554. #endif
  555. #ifdef LN
  556. subq KK, 2, KK
  557. #endif
  558. lda I, -1(I)
  559. bgt I, $L41
  560. .align 4
  561. $L50:
  562. and M, 1, I
  563. ble I, $L59
  564. #if defined(LT) || defined(RN)
  565. LD a1, 0 * SIZE(AO)
  566. fclr t1
  567. LD a2, 1 * SIZE(AO)
  568. fclr t2
  569. LD a3, 2 * SIZE(AO)
  570. fclr t3
  571. LD a4, 3 * SIZE(AO)
  572. fclr t4
  573. LD b1, 0 * SIZE(B)
  574. fclr c01
  575. LD b2, 1 * SIZE(B)
  576. fclr c05
  577. LD b3, 2 * SIZE(B)
  578. fclr c02
  579. LD b4, 3 * SIZE(B)
  580. fclr c06
  581. lda AO, 2 * SIZE(AO)
  582. lda BO, 2 * SIZE(B)
  583. lda L, -2(KK)
  584. ble KK, $L58
  585. ble L, $L55
  586. #else
  587. #ifdef LN
  588. sll K, ZBASE_SHIFT, TMP1
  589. subq AORIG, TMP1, AORIG
  590. #endif
  591. sll KK, ZBASE_SHIFT, TMP1
  592. addq AORIG, TMP1, AO
  593. sll KK, ZBASE_SHIFT, TMP1
  594. addq B, TMP1, BO
  595. subq K, KK, TMP1
  596. LD a1, 0 * SIZE(AO)
  597. fclr t1
  598. LD a2, 1 * SIZE(AO)
  599. fclr t2
  600. LD a3, 2 * SIZE(AO)
  601. fclr t3
  602. LD a4, 3 * SIZE(AO)
  603. fclr t4
  604. LD b1, 0 * SIZE(BO)
  605. fclr c01
  606. LD b2, 1 * SIZE(BO)
  607. fclr c05
  608. LD b3, 2 * SIZE(BO)
  609. fclr c02
  610. LD b4, 3 * SIZE(BO)
  611. fclr c06
  612. lda AO, 2 * SIZE(AO)
  613. lda BO, 2 * SIZE(BO)
  614. lda L, -2(TMP1)
  615. ble TMP1, $L58
  616. ble L, $L55
  617. #endif
  618. .align 5
  619. $L52:
  620. ADD1 c01, t1, c01
  621. unop
  622. MUL a1, b1, t1
  623. unop
  624. ADD3 c02, t2, c02
  625. lda AO, 4 * SIZE(AO)
  626. MUL a2, b1, t2
  627. LD b1, 2 * SIZE(BO)
  628. ADD4 c05, t3, c05
  629. lda L, -2(L)
  630. MUL a1, b2, t3
  631. LD a1, -2 * SIZE(AO)
  632. ADD2 c06, t4, c06
  633. unop
  634. MUL a2, b2, t4
  635. LD a2, -1 * SIZE(AO)
  636. ADD1 c01, t1, c01
  637. LD b2, 3 * SIZE(BO)
  638. MUL a3, b3, t1
  639. lda BO, 4 * SIZE(BO)
  640. ADD3 c02, t2, c02
  641. unop
  642. MUL a4, b3, t2
  643. LD b3, 0 * SIZE(BO)
  644. ADD4 c05, t3, c05
  645. unop
  646. MUL a3, b4, t3
  647. LD a3, 0 * SIZE(AO)
  648. ADD2 c06, t4, c06
  649. MUL a4, b4, t4
  650. LD b4, 1 * SIZE(BO)
  651. unop
  652. LD a4, 1 * SIZE(AO)
  653. unop
  654. unop
  655. bgt L, $L52
  656. .align 4
  657. $L55:
  658. ADD1 c01, t1, c01
  659. MUL a1, b1, t1
  660. #if defined(LT) || defined(RN)
  661. blbs KK, $L57
  662. #else
  663. blbs TMP1, $L57
  664. #endif
  665. .align 4
  666. ADD3 c02, t2, c02
  667. unop
  668. MUL a2, b1, t2
  669. LD b1, 0 * SIZE(BO)
  670. ADD4 c05, t3, c05
  671. lda BO, 2 * SIZE(BO)
  672. MUL a1, b2, t3
  673. LD a1, 0 * SIZE(AO)
  674. ADD2 c06, t4, c06
  675. unop
  676. MUL a2, b2, t4
  677. LD a2, 1 * SIZE(AO)
  678. ADD1 c01, t1, c01
  679. LD b2, -1 * SIZE(BO)
  680. MUL a1, b1, t1
  681. lda AO, 2 * SIZE(AO)
  682. .align 4
  683. $L57:
  684. ADD3 c02, t2, c02
  685. MUL a2, b1, t2
  686. ADD4 c05, t3, c05
  687. MUL a1, b2, t3
  688. ADD2 c06, t4, c06
  689. lda AO, 2 * SIZE(AO)
  690. MUL a2, b2, t4
  691. lda BO, 2 * SIZE(BO)
  692. ADD1 c01, t1, c01
  693. ADD3 c02, t2, c02
  694. ADD4 c05, t3, c05
  695. ADD2 c06, t4, c06
  696. ADD c01, c06, c01
  697. ADD c02, c05, c02
  698. $L58:
  699. #if defined(LN) || defined(RT)
  700. subq KK, 1, TMP1
  701. sll TMP1, ZBASE_SHIFT, TMP2
  702. addq AORIG, TMP2, AO
  703. sll TMP1, ZBASE_SHIFT, TMP2
  704. addq B, TMP2, BO
  705. #else
  706. lda AO, -2 * SIZE(AO)
  707. lda BO, -2 * SIZE(BO)
  708. #endif
  709. #if defined(LN) || defined(LT)
  710. LD a1, 0 * SIZE(BO)
  711. LD a2, 1 * SIZE(BO)
  712. SUB a1, c01, c01
  713. SUB a2, c02, c02
  714. #else
  715. LD a1, 0 * SIZE(AO)
  716. LD a2, 1 * SIZE(AO)
  717. SUB a1, c01, c01
  718. SUB a2, c02, c02
  719. #endif
  720. #if defined(LN) || defined(LT)
  721. LD a1, 0 * SIZE(AO)
  722. LD a2, 1 * SIZE(AO)
  723. MUL a2, c02, t1
  724. MUL a2, c01, t2
  725. MUL a1, c01, c01
  726. MUL a1, c02, c02
  727. ADD5 c01, t1, c01
  728. ADD6 c02, t2, c02
  729. #endif
  730. #if defined(RN) || defined(RT)
  731. LD a1, 0 * SIZE(BO)
  732. LD a2, 1 * SIZE(BO)
  733. MUL a2, c02, t1
  734. MUL a2, c01, t2
  735. MUL a1, c01, c01
  736. MUL a1, c02, c02
  737. ADD5 c01, t1, c01
  738. ADD6 c02, t2, c02
  739. #endif
  740. #if defined(LN) || defined(LT)
  741. ST c01, 0 * SIZE(BO)
  742. ST c02, 1 * SIZE(BO)
  743. #else
  744. ST c01, 0 * SIZE(AO)
  745. ST c02, 1 * SIZE(AO)
  746. #endif
  747. #ifdef LN
  748. lda C1, -2 * SIZE(C1)
  749. #endif
  750. ST c01, 0 * SIZE(C1)
  751. ST c02, 1 * SIZE(C1)
  752. #ifndef LN
  753. lda C1, 2 * SIZE(C1)
  754. #endif
  755. #ifdef RT
  756. sll K, ZBASE_SHIFT, TMP1
  757. addq AORIG, TMP1, AORIG
  758. #endif
  759. #if defined(LT) || defined(RN)
  760. subq K, KK, TMP1
  761. sll TMP1, ZBASE_SHIFT, TMP2
  762. addq AO, TMP2, AO
  763. sll TMP1, ZBASE_SHIFT, TMP2
  764. addq BO, TMP2, BO
  765. #endif
  766. #ifdef LT
  767. addq KK, 1, KK
  768. #endif
  769. #ifdef LN
  770. subq KK, 1, KK
  771. #endif
  772. .align 4
  773. $L59:
  774. #ifdef LN
  775. sll K, ZBASE_SHIFT, TMP1
  776. addq B, TMP1, B
  777. #endif
  778. #if defined(LT) || defined(RN)
  779. mov BO, B
  780. #endif
  781. #ifdef RN
  782. addq KK, 1, KK
  783. #endif
  784. #ifdef RT
  785. subq KK, 1, KK
  786. #endif
  787. .align 4
  788. $L30:
  789. sra N, 1, J
  790. ble J, $L999
  791. .align 4
  792. $L01:
  793. #ifdef RT
  794. sll K, ZBASE_SHIFT + 1, TMP1
  795. subq B, TMP1, B
  796. subq C, LDC, C2
  797. subq C2, LDC, C1
  798. subq C2, LDC, C
  799. #else
  800. mov C, C1
  801. addq C, LDC, C2
  802. addq C2, LDC, C
  803. #endif
  804. #ifdef LN
  805. addq M, OFFSET, KK
  806. #endif
  807. #ifdef LT
  808. mov OFFSET, KK
  809. #endif
  810. #if defined(LN) || defined(RT)
  811. mov A, AORIG
  812. #else
  813. mov A, AO
  814. #endif
  815. sra M, 1, I
  816. fclr t1
  817. fclr t2
  818. fclr t3
  819. fclr t4
  820. fclr c01
  821. fclr c05
  822. ble I, $L20
  823. .align 4
  824. $L11:
  825. #if defined(LT) || defined(RN)
  826. LD a1, 0 * SIZE(AO)
  827. fclr c09
  828. LD a2, 1 * SIZE(AO)
  829. fclr c13
  830. LD a3, 2 * SIZE(AO)
  831. fclr c02
  832. LD a4, 3 * SIZE(AO)
  833. fclr c06
  834. LD b1, 0 * SIZE(B)
  835. fclr c10
  836. LD b2, 1 * SIZE(B)
  837. fclr c14
  838. LD b3, 2 * SIZE(B)
  839. fclr c03
  840. LD b4, 3 * SIZE(B)
  841. fclr c07
  842. lda BO, 4 * SIZE(B)
  843. fclr c11
  844. lda AO, 4 * SIZE(AO)
  845. fclr c15
  846. lds $f31, 4 * SIZE(C1)
  847. fclr c04
  848. lda L, -2(KK)
  849. fclr c08
  850. lds $f31, 4 * SIZE(C2)
  851. fclr c12
  852. fclr c16
  853. ble KK, $L18
  854. ble L, $L15
  855. #else
  856. #ifdef LN
  857. sll K, ZBASE_SHIFT + 1, TMP1
  858. subq AORIG, TMP1, AORIG
  859. #endif
  860. sll KK, ZBASE_SHIFT + 1, TMP1
  861. addq AORIG, TMP1, AO
  862. addq B, TMP1, BO
  863. subq K, KK, TMP1
  864. LD a1, 0 * SIZE(AO)
  865. fclr c09
  866. LD a2, 1 * SIZE(AO)
  867. fclr c13
  868. LD a3, 2 * SIZE(AO)
  869. fclr c02
  870. LD a4, 3 * SIZE(AO)
  871. fclr c06
  872. LD b1, 0 * SIZE(BO)
  873. fclr c10
  874. LD b2, 1 * SIZE(BO)
  875. fclr c14
  876. LD b3, 2 * SIZE(BO)
  877. fclr c03
  878. LD b4, 3 * SIZE(BO)
  879. fclr c07
  880. lda BO, 4 * SIZE(BO)
  881. fclr c11
  882. lda AO, 4 * SIZE(AO)
  883. fclr c15
  884. lds $f31, 4 * SIZE(C1)
  885. fclr c04
  886. lda L, -2(TMP1)
  887. fclr c08
  888. lds $f31, 4 * SIZE(C2)
  889. fclr c12
  890. fclr c16
  891. ble TMP1, $L18
  892. ble L, $L15
  893. #endif
  894. .align 5
  895. $L12:
  896. /* 1 */
  897. ADD1 c11, t1, c11
  898. #ifndef EV4
  899. ldq $31, PREFETCHSIZE * SIZE(AO)
  900. #else
  901. unop
  902. #endif
  903. MUL b1, a1, t1
  904. #ifndef EV4
  905. ldl $31, PREFETCHSIZE * SIZE(BO)
  906. #else
  907. unop
  908. #endif
  909. ADD3 c12, t2, c12
  910. unop
  911. MUL b1, a2, t2
  912. unop
  913. ADD2 c16, t3, c16
  914. unop
  915. MUL b2, a2, t3
  916. LD a5, 0 * SIZE(AO)
  917. ADD4 c15, t4, c15
  918. unop
  919. MUL b2, a1, t4
  920. LD b5, 0 * SIZE(BO)
  921. /* 2 */
  922. ADD1 c01, t1, c01
  923. UNOP
  924. MUL b1, a3, t1
  925. UNOP
  926. ADD3 c02, t2, c02
  927. UNOP
  928. MUL b1, a4, t2
  929. UNOP
  930. ADD2 c06, t3, c06
  931. unop
  932. MUL b2, a4, t3
  933. unop
  934. ADD4 c05, t4, c05
  935. unop
  936. MUL b4, a1, t4
  937. unop
  938. /* 3 */
  939. ADD1 c03, t1, c03
  940. unop
  941. MUL b3, a1, t1
  942. unop
  943. ADD3 c04, t2, c04
  944. unop
  945. MUL b3, a2, t2
  946. unop
  947. ADD2 c08, t3, c08
  948. unop
  949. MUL b4, a2, t3
  950. LD a2, 1 * SIZE(AO)
  951. ADD4 c13, t4, c13
  952. unop
  953. MUL b2, a3, t4
  954. LD b2, 1 * SIZE(BO)
  955. /* 4 */
  956. ADD1 c09, t1, c09
  957. unop
  958. MUL b3, a3, t1
  959. LD a6, 2 * SIZE(AO)
  960. ADD3 c10, t2, c10
  961. unop
  962. MUL b3, a4, t2
  963. LD b3, 2 * SIZE(BO)
  964. ADD2 c14, t3, c14
  965. unop
  966. MUL b4, a4, t3
  967. LD a4, 3 * SIZE(AO)
  968. ADD4 c07, t4, c07
  969. unop
  970. MUL b4, a3, t4
  971. LD b4, 3 * SIZE(BO)
  972. /* 5 */
  973. ADD1 c11, t1, c11
  974. unop
  975. MUL b5, a5, t1
  976. LD a1, 4 * SIZE(AO)
  977. ADD3 c12, t2, c12
  978. lda L, -2(L)
  979. MUL b5, a2, t2
  980. LD b1, 4 * SIZE(BO)
  981. ADD2 c16, t3, c16
  982. unop
  983. MUL b2, a2, t3
  984. unop
  985. ADD4 c15, t4, c15
  986. unop
  987. MUL b2, a5, t4
  988. unop
  989. /* 6 */
  990. ADD1 c01, t1, c01
  991. unop
  992. MUL b5, a6, t1
  993. unop
  994. ADD3 c02, t2, c02
  995. unop
  996. MUL b5, a4, t2
  997. unop
  998. ADD2 c06, t3, c06
  999. unop
  1000. MUL b2, a4, t3
  1001. unop
  1002. ADD4 c05, t4, c05
  1003. unop
  1004. MUL b4, a5, t4
  1005. unop
  1006. /* 7 */
  1007. ADD1 c03, t1, c03
  1008. lda AO, 8 * SIZE(AO)
  1009. MUL b3, a5, t1
  1010. unop
  1011. ADD3 c04, t2, c04
  1012. lda BO, 8 * SIZE(BO)
  1013. MUL b3, a2, t2
  1014. unop
  1015. ADD2 c08, t3, c08
  1016. unop
  1017. MUL b4, a2, t3
  1018. LD a2, -3 * SIZE(AO)
  1019. ADD4 c13, t4, c13
  1020. unop
  1021. MUL b2, a6, t4
  1022. LD b2, -3 * SIZE(BO)
  1023. /* 8 */
  1024. ADD1 c09, t1, c09
  1025. unop
  1026. MUL b3, a6, t1
  1027. LD a3, -2 * SIZE(AO)
  1028. ADD3 c10, t2, c10
  1029. unop
  1030. MUL b3, a4, t2
  1031. LD b3, -2 * SIZE(BO)
  1032. ADD2 c14, t3, c14
  1033. unop
  1034. MUL b4, a4, t3
  1035. LD a4, -1 * SIZE(AO)
  1036. ADD4 c07, t4, c07
  1037. MUL b4, a6, t4
  1038. LD b4, -1 * SIZE(BO)
  1039. bgt L, $L12
  1040. .align 4
  1041. $L15:
  1042. ADD1 c11, t1, c11
  1043. unop
  1044. MUL b1, a1, t1
  1045. #if defined(LT) || defined(RN)
  1046. blbs KK, $L17
  1047. #else
  1048. blbs TMP1, $L17
  1049. #endif
  1050. .align 4
  1051. ADD3 c12, t2, c12
  1052. MUL b1, a2, t2
  1053. ADD2 c16, t3, c16
  1054. MUL b2, a2, t3
  1055. ADD4 c15, t4, c15
  1056. MUL b2, a1, t4
  1057. ADD1 c01, t1, c01
  1058. MUL b1, a3, t1
  1059. ADD3 c02, t2, c02
  1060. unop
  1061. MUL b1, a4, t2
  1062. LD b1, 0 * SIZE(BO)
  1063. ADD2 c06, t3, c06
  1064. MUL b2, a4, t3
  1065. ADD4 c05, t4, c05
  1066. MUL b4, a1, t4
  1067. ADD1 c03, t1, c03
  1068. unop
  1069. MUL b3, a1, t1
  1070. LD a1, 0 * SIZE(AO)
  1071. ADD3 c04, t2, c04
  1072. unop
  1073. MUL b3, a2, t2
  1074. unop
  1075. ADD2 c08, t3, c08
  1076. unop
  1077. MUL b4, a2, t3
  1078. LD a2, 1 * SIZE(AO)
  1079. ADD4 c13, t4, c13
  1080. unop
  1081. MUL b2, a3, t4
  1082. LD b2, 1 * SIZE(BO)
  1083. ADD1 c09, t1, c09
  1084. unop
  1085. MUL b3, a3, t1
  1086. lda AO, 4 * SIZE(AO)
  1087. ADD3 c10, t2, c10
  1088. unop
  1089. MUL b3, a4, t2
  1090. LD b3, 2 * SIZE(BO)
  1091. ADD2 c14, t3, c14
  1092. unop
  1093. MUL b4, a4, t3
  1094. LD a4, -1 * SIZE(AO)
  1095. ADD4 c07, t4, c07
  1096. unop
  1097. MUL b4, a3, t4
  1098. LD a3, -2 * SIZE(AO)
  1099. ADD1 c11, t1, c11
  1100. LD b4, 3 * SIZE(BO)
  1101. MUL b1, a1, t1
  1102. lda BO, 4 * SIZE(BO)
  1103. .align 4
  1104. $L17:
  1105. ADD3 c12, t2, c12
  1106. MUL b1, a2, t2
  1107. ADD2 c16, t3, c16
  1108. MUL b2, a2, t3
  1109. ADD4 c15, t4, c15
  1110. MUL b2, a1, t4
  1111. ADD1 c01, t1, c01
  1112. MUL b1, a3, t1
  1113. ADD3 c02, t2, c02
  1114. MUL b1, a4, t2
  1115. ADD2 c06, t3, c06
  1116. MUL b2, a4, t3
  1117. ADD4 c05, t4, c05
  1118. MUL b4, a1, t4
  1119. ADD1 c03, t1, c03
  1120. MUL b3, a1, t1
  1121. ADD3 c04, t2, c04
  1122. MUL b3, a2, t2
  1123. ADD2 c08, t3, c08
  1124. MUL b4, a2, t3
  1125. ADD4 c13, t4, c13
  1126. MUL b2, a3, t4
  1127. ADD1 c09, t1, c09
  1128. MUL b3, a3, t1
  1129. ADD3 c10, t2, c10
  1130. MUL b3, a4, t2
  1131. ADD2 c14, t3, c14
  1132. MUL b4, a4, t3
  1133. ADD4 c07, t4, c07
  1134. lda AO, 4 * SIZE(AO)
  1135. MUL b4, a3, t4
  1136. lda BO, 4 * SIZE(BO)
  1137. ADD1 c11, t1, c11
  1138. ADD3 c12, t2, c12
  1139. ADD2 c16, t3, c16
  1140. ADD4 c15, t4, c15
  1141. ADD c01, c06, c01
  1142. ADD c02, c05, c02
  1143. ADD c03, c08, c03
  1144. ADD c04, c07, c04
  1145. ADD c09, c14, c09
  1146. ADD c10, c13, c10
  1147. ADD c11, c16, c11
  1148. ADD c12, c15, c12
  1149. .align 4
  1150. $L18:
  1151. #if defined(LN) || defined(RT)
  1152. #ifdef LN
  1153. subq KK, 2, TMP1
  1154. #else
  1155. subq KK, 2, TMP1
  1156. #endif
  1157. sll TMP1, ZBASE_SHIFT + 1, TMP2
  1158. addq AORIG, TMP2, AO
  1159. sll TMP1, ZBASE_SHIFT + 1, TMP2
  1160. addq B, TMP2, BO
  1161. #else
  1162. lda AO, -4 * SIZE(AO)
  1163. lda BO, -4 * SIZE(BO)
  1164. #endif
  1165. #if defined(LN) || defined(LT)
  1166. LD a1, 0 * SIZE(BO)
  1167. LD a2, 1 * SIZE(BO)
  1168. LD a3, 2 * SIZE(BO)
  1169. LD a4, 3 * SIZE(BO)
  1170. LD b1, 4 * SIZE(BO)
  1171. LD b2, 5 * SIZE(BO)
  1172. LD b3, 6 * SIZE(BO)
  1173. LD b4, 7 * SIZE(BO)
  1174. SUB a1, c01, c01
  1175. SUB a2, c02, c02
  1176. SUB a3, c09, c09
  1177. SUB a4, c10, c10
  1178. SUB b1, c03, c03
  1179. SUB b2, c04, c04
  1180. SUB b3, c11, c11
  1181. SUB b4, c12, c12
  1182. #else
  1183. LD a1, 0 * SIZE(AO)
  1184. LD a2, 1 * SIZE(AO)
  1185. LD a3, 2 * SIZE(AO)
  1186. LD a4, 3 * SIZE(AO)
  1187. LD b1, 4 * SIZE(AO)
  1188. LD b2, 5 * SIZE(AO)
  1189. LD b3, 6 * SIZE(AO)
  1190. LD b4, 7 * SIZE(AO)
  1191. SUB a1, c01, c01
  1192. SUB a2, c02, c02
  1193. SUB a3, c03, c03
  1194. SUB a4, c04, c04
  1195. SUB b1, c09, c09
  1196. SUB b2, c10, c10
  1197. SUB b3, c11, c11
  1198. SUB b4, c12, c12
  1199. #endif
  1200. #ifdef LN
  1201. LD a1, 6 * SIZE(AO)
  1202. LD a2, 7 * SIZE(AO)
  1203. LD a3, 4 * SIZE(AO)
  1204. LD a4, 5 * SIZE(AO)
  1205. MUL a2, c04, t1
  1206. MUL a2, c03, t2
  1207. MUL a2, c12, t3
  1208. MUL a2, c11, t4
  1209. MUL a1, c03, c03
  1210. MUL a1, c04, c04
  1211. MUL a1, c11, c11
  1212. MUL a1, c12, c12
  1213. ADD5 c03, t1, c03
  1214. ADD6 c04, t2, c04
  1215. ADD5 c11, t3, c11
  1216. ADD6 c12, t4, c12
  1217. MUL a3, c03, t1
  1218. MUL a3, c04, t2
  1219. MUL a3, c11, t3
  1220. MUL a3, c12, t4
  1221. SUB c01, t1, c01
  1222. SUB c02, t2, c02
  1223. SUB c09, t3, c09
  1224. SUB c10, t4, c10
  1225. MUL a4, c04, t1
  1226. MUL a4, c03, t2
  1227. MUL a4, c12, t3
  1228. MUL a4, c11, t4
  1229. ADD6 c01, t1, c01
  1230. ADD5 c02, t2, c02
  1231. ADD6 c09, t3, c09
  1232. ADD5 c10, t4, c10
  1233. LD a1, 0 * SIZE(AO)
  1234. LD a2, 1 * SIZE(AO)
  1235. MUL a2, c02, t1
  1236. MUL a2, c01, t2
  1237. MUL a2, c10, t3
  1238. MUL a2, c09, t4
  1239. MUL a1, c01, c01
  1240. MUL a1, c02, c02
  1241. MUL a1, c09, c09
  1242. MUL a1, c10, c10
  1243. ADD5 c01, t1, c01
  1244. ADD6 c02, t2, c02
  1245. ADD5 c09, t3, c09
  1246. ADD6 c10, t4, c10
  1247. #endif
  1248. #ifdef LT
  1249. LD a1, 0 * SIZE(AO)
  1250. LD a2, 1 * SIZE(AO)
  1251. LD a3, 2 * SIZE(AO)
  1252. LD a4, 3 * SIZE(AO)
  1253. MUL a2, c02, t1
  1254. MUL a2, c01, t2
  1255. MUL a2, c10, t3
  1256. MUL a2, c09, t4
  1257. MUL a1, c01, c01
  1258. MUL a1, c02, c02
  1259. MUL a1, c09, c09
  1260. MUL a1, c10, c10
  1261. ADD5 c01, t1, c01
  1262. ADD6 c02, t2, c02
  1263. ADD5 c09, t3, c09
  1264. ADD6 c10, t4, c10
  1265. MUL a3, c01, t1
  1266. MUL a3, c02, t2
  1267. MUL a3, c09, t3
  1268. MUL a3, c10, t4
  1269. SUB c03, t1, c03
  1270. SUB c04, t2, c04
  1271. SUB c11, t3, c11
  1272. SUB c12, t4, c12
  1273. MUL a4, c02, t1
  1274. MUL a4, c01, t2
  1275. MUL a4, c10, t3
  1276. MUL a4, c09, t4
  1277. ADD6 c03, t1, c03
  1278. ADD5 c04, t2, c04
  1279. ADD6 c11, t3, c11
  1280. ADD5 c12, t4, c12
  1281. LD a1, 6 * SIZE(AO)
  1282. LD a2, 7 * SIZE(AO)
  1283. MUL a2, c04, t1
  1284. MUL a2, c03, t2
  1285. MUL a2, c12, t3
  1286. MUL a2, c11, t4
  1287. MUL a1, c03, c03
  1288. MUL a1, c04, c04
  1289. MUL a1, c11, c11
  1290. MUL a1, c12, c12
  1291. ADD5 c03, t1, c03
  1292. ADD6 c04, t2, c04
  1293. ADD5 c11, t3, c11
  1294. ADD6 c12, t4, c12
  1295. #endif
  1296. #ifdef RN
  1297. LD a1, 0 * SIZE(BO)
  1298. LD a2, 1 * SIZE(BO)
  1299. LD a3, 2 * SIZE(BO)
  1300. LD a4, 3 * SIZE(BO)
  1301. MUL a2, c02, t1
  1302. MUL a2, c01, t2
  1303. MUL a2, c04, t3
  1304. MUL a2, c03, t4
  1305. MUL a1, c01, c01
  1306. MUL a1, c02, c02
  1307. MUL a1, c03, c03
  1308. MUL a1, c04, c04
  1309. ADD5 c01, t1, c01
  1310. ADD6 c02, t2, c02
  1311. ADD5 c03, t3, c03
  1312. ADD6 c04, t4, c04
  1313. MUL a3, c01, t1
  1314. MUL a3, c02, t2
  1315. MUL a3, c03, t3
  1316. MUL a3, c04, t4
  1317. SUB c09, t1, c09
  1318. SUB c10, t2, c10
  1319. SUB c11, t3, c11
  1320. SUB c12, t4, c12
  1321. MUL a4, c02, t1
  1322. MUL a4, c01, t2
  1323. MUL a4, c04, t3
  1324. MUL a4, c03, t4
  1325. ADD6 c09, t1, c09
  1326. ADD5 c10, t2, c10
  1327. ADD6 c11, t3, c11
  1328. ADD5 c12, t4, c12
  1329. LD a1, 6 * SIZE(BO)
  1330. LD a2, 7 * SIZE(BO)
  1331. MUL a2, c10, t1
  1332. MUL a2, c09, t2
  1333. MUL a2, c12, t3
  1334. MUL a2, c11, t4
  1335. MUL a1, c09, c09
  1336. MUL a1, c10, c10
  1337. MUL a1, c11, c11
  1338. MUL a1, c12, c12
  1339. ADD5 c09, t1, c09
  1340. ADD6 c10, t2, c10
  1341. ADD5 c11, t3, c11
  1342. ADD6 c12, t4, c12
  1343. #endif
  1344. #ifdef RT
  1345. LD a1, 6 * SIZE(BO)
  1346. LD a2, 7 * SIZE(BO)
  1347. LD a3, 4 * SIZE(BO)
  1348. LD a4, 5 * SIZE(BO)
  1349. MUL a2, c10, t1
  1350. MUL a2, c09, t2
  1351. MUL a2, c12, t3
  1352. MUL a2, c11, t4
  1353. MUL a1, c09, c09
  1354. MUL a1, c10, c10
  1355. MUL a1, c11, c11
  1356. MUL a1, c12, c12
  1357. ADD5 c09, t1, c09
  1358. ADD6 c10, t2, c10
  1359. ADD5 c11, t3, c11
  1360. ADD6 c12, t4, c12
  1361. MUL a3, c09, t1
  1362. MUL a3, c10, t2
  1363. MUL a3, c11, t3
  1364. MUL a3, c12, t4
  1365. SUB c01, t1, c01
  1366. SUB c02, t2, c02
  1367. SUB c03, t3, c03
  1368. SUB c04, t4, c04
  1369. MUL a4, c10, t1
  1370. MUL a4, c09, t2
  1371. MUL a4, c12, t3
  1372. MUL a4, c11, t4
  1373. ADD6 c01, t1, c01
  1374. ADD5 c02, t2, c02
  1375. ADD6 c03, t3, c03
  1376. ADD5 c04, t4, c04
  1377. LD a1, 0 * SIZE(BO)
  1378. LD a2, 1 * SIZE(BO)
  1379. MUL a2, c02, t1
  1380. MUL a2, c01, t2
  1381. MUL a2, c04, t3
  1382. MUL a2, c03, t4
  1383. MUL a1, c01, c01
  1384. MUL a1, c02, c02
  1385. MUL a1, c03, c03
  1386. MUL a1, c04, c04
  1387. ADD5 c01, t1, c01
  1388. ADD6 c02, t2, c02
  1389. ADD5 c03, t3, c03
  1390. ADD6 c04, t4, c04
  1391. #endif
  1392. #if defined(LN) || defined(LT)
  1393. ST c01, 0 * SIZE(BO)
  1394. ST c02, 1 * SIZE(BO)
  1395. ST c09, 2 * SIZE(BO)
  1396. ST c10, 3 * SIZE(BO)
  1397. ST c03, 4 * SIZE(BO)
  1398. ST c04, 5 * SIZE(BO)
  1399. ST c11, 6 * SIZE(BO)
  1400. ST c12, 7 * SIZE(BO)
  1401. #else
  1402. ST c01, 0 * SIZE(AO)
  1403. ST c02, 1 * SIZE(AO)
  1404. ST c03, 2 * SIZE(AO)
  1405. ST c04, 3 * SIZE(AO)
  1406. ST c09, 4 * SIZE(AO)
  1407. ST c10, 5 * SIZE(AO)
  1408. ST c11, 6 * SIZE(AO)
  1409. ST c12, 7 * SIZE(AO)
  1410. #endif
  1411. #ifdef LN
  1412. lda C1, -4 * SIZE(C1)
  1413. lda C2, -4 * SIZE(C2)
  1414. #endif
  1415. ST c01, 0 * SIZE(C1)
  1416. ST c02, 1 * SIZE(C1)
  1417. ST c03, 2 * SIZE(C1)
  1418. ST c04, 3 * SIZE(C1)
  1419. ST c09, 0 * SIZE(C2)
  1420. ST c10, 1 * SIZE(C2)
  1421. ST c11, 2 * SIZE(C2)
  1422. ST c12, 3 * SIZE(C2)
  1423. #ifndef LN
  1424. lda C1, 4 * SIZE(C1)
  1425. lda C2, 4 * SIZE(C2)
  1426. #endif
  1427. fclr t1
  1428. fclr t2
  1429. fclr t3
  1430. fclr t4
  1431. #ifdef RT
  1432. sll K, ZBASE_SHIFT + 1, TMP1
  1433. addq AORIG, TMP1, AORIG
  1434. #endif
  1435. #if defined(LT) || defined(RN)
  1436. subq K, KK, TMP1
  1437. sll TMP1, ZBASE_SHIFT + 1, TMP1
  1438. addq AO, TMP1, AO
  1439. addq BO, TMP1, BO
  1440. #endif
  1441. #ifdef LT
  1442. addq KK, 2, KK
  1443. #endif
  1444. #ifdef LN
  1445. subq KK, 2, KK
  1446. #endif
  1447. fclr c01
  1448. fclr c05
  1449. lda I, -1(I)
  1450. bgt I, $L11
  1451. .align 4
  1452. $L20:
  1453. and M, 1, I
  1454. ble I, $L29
  1455. #if defined(LT) || defined(RN)
  1456. LD a1, 0 * SIZE(AO)
  1457. fclr c09
  1458. LD a2, 1 * SIZE(AO)
  1459. fclr c13
  1460. LD a3, 2 * SIZE(AO)
  1461. fclr c02
  1462. LD a4, 3 * SIZE(AO)
  1463. fclr c06
  1464. LD b1, 0 * SIZE(B)
  1465. fclr c10
  1466. LD b2, 1 * SIZE(B)
  1467. fclr c14
  1468. LD b3, 2 * SIZE(B)
  1469. lda AO, 2 * SIZE(AO)
  1470. LD b4, 3 * SIZE(B)
  1471. lda BO, 4 * SIZE(B)
  1472. lda L, -2(KK)
  1473. ble KK, $L28
  1474. ble L, $L25
  1475. #else
  1476. #ifdef LN
  1477. sll K, ZBASE_SHIFT + 0, TMP1
  1478. subq AORIG, TMP1, AORIG
  1479. #endif
  1480. sll KK, ZBASE_SHIFT + 0, TMP1
  1481. addq AORIG, TMP1, AO
  1482. sll KK, ZBASE_SHIFT + 1, TMP1
  1483. addq B, TMP1, BO
  1484. subq K, KK, TMP1
  1485. LD a1, 0 * SIZE(AO)
  1486. fclr c09
  1487. LD a2, 1 * SIZE(AO)
  1488. fclr c13
  1489. LD a3, 2 * SIZE(AO)
  1490. fclr c02
  1491. LD a4, 3 * SIZE(AO)
  1492. fclr c06
  1493. LD b1, 0 * SIZE(BO)
  1494. fclr c10
  1495. LD b2, 1 * SIZE(BO)
  1496. fclr c14
  1497. LD b3, 2 * SIZE(BO)
  1498. lda AO, 2 * SIZE(AO)
  1499. LD b4, 3 * SIZE(BO)
  1500. lda BO, 4 * SIZE(BO)
  1501. lda L, -2(TMP1)
  1502. ble TMP1, $L28
  1503. ble L, $L25
  1504. #endif
  1505. .align 5
  1506. $L22:
  1507. ADD1 c09, t1, c09
  1508. unop
  1509. MUL a1, b1, t1
  1510. unop
  1511. ADD3 c10, t2, c10
  1512. unop
  1513. MUL a2, b1, t2
  1514. LD b1, 0 * SIZE(BO)
  1515. ADD4 c13, t3, c13
  1516. unop
  1517. MUL a1, b2, t3
  1518. lda BO, 8 * SIZE(BO)
  1519. ADD2 c14, t4, c14
  1520. unop
  1521. MUL a2, b2, t4
  1522. LD b2, -7 * SIZE(BO)
  1523. ADD1 c01, t1, c01
  1524. unop
  1525. MUL a1, b3, t1
  1526. unop
  1527. ADD3 c02, t2, c02
  1528. unop
  1529. MUL a2, b3, t2
  1530. LD b3, -6 * SIZE(BO)
  1531. ADD4 c05, t3, c05
  1532. unop
  1533. MUL a1, b4, t3
  1534. LD a1, 2 * SIZE(AO)
  1535. ADD2 c06, t4, c06
  1536. MUL a2, b4, t4
  1537. LD b5, -5 * SIZE(BO)
  1538. ADD1 c09, t1, c09
  1539. unop
  1540. MUL a3, b1, t1
  1541. LD a2, 3 * SIZE(AO)
  1542. ADD3 c10, t2, c10
  1543. unop
  1544. MUL a4, b1, t2
  1545. LD b1, -4 * SIZE(BO)
  1546. ADD4 c13, t3, c13
  1547. unop
  1548. MUL a3, b2, t3
  1549. lda AO, 4 * SIZE(AO)
  1550. ADD2 c14, t4, c14
  1551. MUL a4, b2, t4
  1552. LD b2, -3 * SIZE(BO)
  1553. ADD1 c01, t1, c01
  1554. lda L, -2(L)
  1555. MUL a3, b3, t1
  1556. LD b4, -1 * SIZE(BO)
  1557. ADD3 c02, t2, c02
  1558. unop
  1559. MUL a4, b3, t2
  1560. LD b3, -2 * SIZE(BO)
  1561. ADD4 c05, t3, c05
  1562. unop
  1563. MUL a3, b5, t3
  1564. LD a3, 0 * SIZE(AO)
  1565. ADD2 c06, t4, c06
  1566. MUL a4, b5, t4
  1567. LD a4, 1 * SIZE(AO)
  1568. bgt L, $L22
  1569. .align 4
  1570. $L25:
  1571. ADD1 c09, t1, c09
  1572. MUL a1, b1, t1
  1573. #if defined(LT) || defined(RN)
  1574. blbs KK, $L27
  1575. #else
  1576. blbs TMP1, $L27
  1577. #endif
  1578. .align 4
  1579. ADD3 c10, t2, c10
  1580. unop
  1581. MUL a2, b1, t2
  1582. LD b1, 0 * SIZE(BO)
  1583. ADD4 c13, t3, c13
  1584. unop
  1585. MUL a1, b2, t3
  1586. unop
  1587. ADD2 c14, t4, c14
  1588. unop
  1589. MUL a2, b2, t4
  1590. LD b2, 1 * SIZE(BO)
  1591. ADD1 c01, t1, c01
  1592. unop
  1593. MUL a1, b3, t1
  1594. lda AO, 2 * SIZE(AO)
  1595. ADD3 c02, t2, c02
  1596. unop
  1597. MUL a2, b3, t2
  1598. LD b3, 2 * SIZE(BO)
  1599. ADD4 c05, t3, c05
  1600. unop
  1601. MUL a1, b4, t3
  1602. LD a1, -2 * SIZE(AO)
  1603. ADD2 c06, t4, c06
  1604. unop
  1605. MUL a2, b4, t4
  1606. LD a2, -1 * SIZE(AO)
  1607. ADD1 c09, t1, c09
  1608. LD b4, 3 * SIZE(BO)
  1609. MUL a1, b1, t1
  1610. lda BO, 4 * SIZE(BO)
  1611. .align 4
  1612. $L27:
  1613. ADD3 c10, t2, c10
  1614. MUL a2, b1, t2
  1615. ADD4 c13, t3, c13
  1616. MUL a1, b2, t3
  1617. ADD2 c14, t4, c14
  1618. MUL a2, b2, t4
  1619. ADD1 c01, t1, c01
  1620. MUL a1, b3, t1
  1621. ADD3 c02, t2, c02
  1622. MUL a2, b3, t2
  1623. ADD4 c05, t3, c05
  1624. MUL a1, b4, t3
  1625. ADD2 c06, t4, c06
  1626. lda AO, 2 * SIZE(AO)
  1627. MUL a2, b4, t4
  1628. lda BO, 4 * SIZE(BO)
  1629. ADD1 c09, t1, c09
  1630. ADD3 c10, t2, c10
  1631. ADD4 c13, t3, c13
  1632. ADD2 c14, t4, c14
  1633. ADD c01, c06, c01
  1634. ADD c02, c05, c02
  1635. ADD c09, c14, c09
  1636. ADD c10, c13, c10
  1637. .align 4
  1638. $L28:
  1639. #if defined(LN) || defined(RT)
  1640. #ifdef LN
  1641. subq KK, 1, TMP1
  1642. #else
  1643. subq KK, 2, TMP1
  1644. #endif
  1645. sll TMP1, ZBASE_SHIFT + 0, TMP2
  1646. addq AORIG, TMP2, AO
  1647. sll TMP1, ZBASE_SHIFT + 1, TMP2
  1648. addq B, TMP2, BO
  1649. #else
  1650. lda AO, -2 * SIZE(AO)
  1651. lda BO, -4 * SIZE(BO)
  1652. #endif
  1653. #if defined(LN) || defined(LT)
  1654. LD a1, 0 * SIZE(BO)
  1655. LD a2, 1 * SIZE(BO)
  1656. LD a3, 2 * SIZE(BO)
  1657. LD a4, 3 * SIZE(BO)
  1658. SUB a1, c01, c01
  1659. SUB a2, c02, c02
  1660. SUB a3, c09, c09
  1661. SUB a4, c10, c10
  1662. #else
  1663. LD a1, 0 * SIZE(AO)
  1664. LD a2, 1 * SIZE(AO)
  1665. LD a3, 2 * SIZE(AO)
  1666. LD a4, 3 * SIZE(AO)
  1667. SUB a1, c01, c01
  1668. SUB a2, c02, c02
  1669. SUB a3, c09, c09
  1670. SUB a4, c10, c10
  1671. #endif
  1672. #if defined(LN) || defined(LT)
  1673. LD a1, 0 * SIZE(AO)
  1674. LD a2, 1 * SIZE(AO)
  1675. MUL a2, c02, t1
  1676. MUL a2, c01, t2
  1677. MUL a2, c10, t3
  1678. MUL a2, c09, t4
  1679. MUL a1, c01, c01
  1680. MUL a1, c02, c02
  1681. MUL a1, c09, c09
  1682. MUL a1, c10, c10
  1683. ADD5 c01, t1, c01
  1684. ADD6 c02, t2, c02
  1685. ADD5 c09, t3, c09
  1686. ADD6 c10, t4, c10
  1687. #endif
  1688. #ifdef RN
  1689. LD a1, 0 * SIZE(BO)
  1690. LD a2, 1 * SIZE(BO)
  1691. LD a3, 2 * SIZE(BO)
  1692. LD a4, 3 * SIZE(BO)
  1693. MUL a2, c02, t1
  1694. MUL a2, c01, t2
  1695. MUL a1, c01, c01
  1696. MUL a1, c02, c02
  1697. ADD5 c01, t1, c01
  1698. ADD6 c02, t2, c02
  1699. MUL a3, c01, t1
  1700. MUL a3, c02, t2
  1701. SUB c09, t1, c09
  1702. SUB c10, t2, c10
  1703. MUL a4, c02, t1
  1704. MUL a4, c01, t2
  1705. ADD6 c09, t1, c09
  1706. ADD5 c10, t2, c10
  1707. LD a1, 6 * SIZE(BO)
  1708. LD a2, 7 * SIZE(BO)
  1709. MUL a2, c10, t1
  1710. MUL a2, c09, t2
  1711. MUL a1, c09, c09
  1712. MUL a1, c10, c10
  1713. ADD5 c09, t1, c09
  1714. ADD6 c10, t2, c10
  1715. #endif
  1716. #ifdef RT
  1717. LD a1, 6 * SIZE(BO)
  1718. LD a2, 7 * SIZE(BO)
  1719. LD a3, 4 * SIZE(BO)
  1720. LD a4, 5 * SIZE(BO)
  1721. MUL a2, c10, t1
  1722. MUL a2, c09, t2
  1723. MUL a1, c09, c09
  1724. MUL a1, c10, c10
  1725. ADD5 c09, t1, c09
  1726. ADD6 c10, t2, c10
  1727. MUL a3, c09, t1
  1728. MUL a3, c10, t2
  1729. SUB c01, t1, c01
  1730. SUB c02, t2, c02
  1731. MUL a4, c10, t1
  1732. MUL a4, c09, t2
  1733. ADD6 c01, t1, c01
  1734. ADD5 c02, t2, c02
  1735. LD a1, 0 * SIZE(BO)
  1736. LD a2, 1 * SIZE(BO)
  1737. MUL a2, c02, t1
  1738. MUL a2, c01, t2
  1739. MUL a1, c01, c01
  1740. MUL a1, c02, c02
  1741. ADD5 c01, t1, c01
  1742. ADD6 c02, t2, c02
  1743. #endif
  1744. #if defined(LN) || defined(LT)
  1745. ST c01, 0 * SIZE(BO)
  1746. ST c02, 1 * SIZE(BO)
  1747. ST c09, 2 * SIZE(BO)
  1748. ST c10, 3 * SIZE(BO)
  1749. #else
  1750. ST c01, 0 * SIZE(AO)
  1751. ST c02, 1 * SIZE(AO)
  1752. ST c09, 2 * SIZE(AO)
  1753. ST c10, 3 * SIZE(AO)
  1754. #endif
  1755. #ifdef LN
  1756. lda C1, -2 * SIZE(C1)
  1757. lda C2, -2 * SIZE(C2)
  1758. #endif
  1759. ST c01, 0 * SIZE(C1)
  1760. ST c02, 1 * SIZE(C1)
  1761. ST c09, 0 * SIZE(C2)
  1762. ST c10, 1 * SIZE(C2)
  1763. #ifndef LN
  1764. lda C1, 2 * SIZE(C1)
  1765. lda C2, 2 * SIZE(C2)
  1766. #endif
  1767. #ifdef RT
  1768. sll K, ZBASE_SHIFT, TMP1
  1769. addq AORIG, TMP1, AORIG
  1770. #endif
  1771. #if defined(LT) || defined(RN)
  1772. subq K, KK, TMP1
  1773. sll TMP1, ZBASE_SHIFT + 0, TMP2
  1774. addq AO, TMP2, AO
  1775. sll TMP1, ZBASE_SHIFT + 1, TMP2
  1776. addq BO, TMP2, BO
  1777. #endif
  1778. #ifdef LT
  1779. addq KK, 1, KK
  1780. #endif
  1781. #ifdef LN
  1782. subq KK, 1, KK
  1783. #endif
  1784. .align 4
  1785. $L29:
  1786. #ifdef LN
  1787. sll K, ZBASE_SHIFT + 1, TMP1
  1788. addq B, TMP1, B
  1789. #endif
  1790. #if defined(LT) || defined(RN)
  1791. mov BO, B
  1792. #endif
  1793. #ifdef RN
  1794. addq KK, 2, KK
  1795. #endif
  1796. #ifdef RT
  1797. subq KK, 2, KK
  1798. #endif
  1799. lda J, -1(J)
  1800. bgt J, $L01
  1801. .align 4
  1802. $L999:
  1803. ldt $f2, 0($sp)
  1804. ldt $f3, 8($sp)
  1805. ldt $f4, 16($sp)
  1806. ldt $f5, 24($sp)
  1807. ldt $f6, 32($sp)
  1808. ldt $f7, 40($sp)
  1809. ldt $f8, 48($sp)
  1810. ldt $f9, 56($sp)
  1811. clr $0
  1812. lda $sp, STACKSIZE($sp)
  1813. ret
  1814. .ident VERSION
  1815. .end CNAME