You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

ztrsm_kernel_2x2_RT.S 33 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #define ASSEMBLER
  39. #include "common.h"
  40. #include "version.h"
  41. #if !defined(EV4) && !defined(EV5) && !defined(EV6)
  42. #error "Architecture is not specified."
  43. #endif
  44. #ifdef EV6
  45. #define PREFETCHSIZE 56
  46. #define UNOP unop
  47. #endif
  48. #ifdef EV5
  49. #define PREFETCHSIZE 48
  50. #define UNOP
  51. #endif
  52. #ifdef EV4
  53. #define UNOP
  54. #endif
  55. .set noat
  56. .set noreorder
  57. .arch ev6
  58. .text
  59. .align 5
  60. .globl CNAME
  61. .ent CNAME
  62. #define STACKSIZE 80
  63. #define M $16
  64. #define N $17
  65. #define K $18
  66. #define A $21
  67. #define B $22
  68. #define C $20
  69. #define LDC $23
  70. #define C1 $19
  71. #define C2 $24
  72. #define AO $at
  73. #define BO $5
  74. #define I $6
  75. #define J $7
  76. #define L $8
  77. #define a1 $f16
  78. #define a2 $f17
  79. #define a3 $f18
  80. #define a4 $f19
  81. #define b1 $f20
  82. #define b2 $f21
  83. #define b3 $f22
  84. #define b4 $f23
  85. #define t1 $f24
  86. #define t2 $f25
  87. #define t3 $f26
  88. #define t4 $f27
  89. #define a5 $f28
  90. #define a6 $f30
  91. #define b5 $f29
  92. #define alpha_i $f29
  93. #define alpha_r $f30
  94. #define c01 $f0
  95. #define c02 $f1
  96. #define c03 $f2
  97. #define c04 $f3
  98. #define c05 $f4
  99. #define c06 $f5
  100. #define c07 $f6
  101. #define c08 $f7
  102. #define c09 $f8
  103. #define c10 $f9
  104. #define c11 $f10
  105. #define c12 $f11
  106. #define c13 $f12
  107. #define c14 $f13
  108. #define c15 $f14
  109. #define c16 $f15
  110. #define TMP1 $0
  111. #define TMP2 $1
  112. #define KK $2
  113. #define AORIG $3
  114. #define OFFSET $4
  115. #if defined(LN) || defined(LT)
  116. #ifndef CONJ
  117. #define ADD1 ADD
  118. #define ADD2 SUB
  119. #define ADD3 ADD
  120. #define ADD4 ADD
  121. #define ADD5 SUB
  122. #define ADD6 ADD
  123. #else
  124. #define ADD1 ADD
  125. #define ADD2 ADD
  126. #define ADD3 SUB
  127. #define ADD4 ADD
  128. #define ADD5 ADD
  129. #define ADD6 SUB
  130. #endif
  131. #else
  132. #ifndef CONJ
  133. #define ADD1 ADD
  134. #define ADD2 SUB
  135. #define ADD3 ADD
  136. #define ADD4 ADD
  137. #define ADD5 SUB
  138. #define ADD6 ADD
  139. #else
  140. #define ADD1 ADD
  141. #define ADD2 ADD
  142. #define ADD3 ADD
  143. #define ADD4 SUB
  144. #define ADD5 ADD
  145. #define ADD6 SUB
  146. #endif
  147. #endif
  148. CNAME:
  149. .frame $sp, STACKSIZE, $26, 0
  150. #ifdef PROFILE
  151. ldgp $gp, 0($27)
  152. lda $at, _mcount
  153. jsr $at, ($at), _mcount
  154. #endif
  155. #ifndef PROFILE
  156. .prologue 0
  157. #else
  158. .prologue 1
  159. #endif
  160. lda $sp, -STACKSIZE($sp)
  161. ldq B, 0 + STACKSIZE($sp)
  162. ldq C, 8 + STACKSIZE($sp)
  163. ldq LDC, 16 + STACKSIZE($sp)
  164. ldq OFFSET, 24 + STACKSIZE($sp)
  165. sll LDC, ZBASE_SHIFT, LDC
  166. stt $f2, 0($sp)
  167. stt $f3, 8($sp)
  168. stt $f4, 16($sp)
  169. stt $f5, 24($sp)
  170. stt $f6, 32($sp)
  171. stt $f7, 40($sp)
  172. stt $f8, 48($sp)
  173. stt $f9, 56($sp)
  174. cmple M, 0, $0
  175. cmple N, 0, $1
  176. cmple K, 0, $2
  177. or $0, $1, $0
  178. or $0, $2, $0
  179. bne $0, $L999
  180. #ifdef LN
  181. addq M, M, TMP2
  182. mulq TMP2, K, TMP1
  183. SXADDQ TMP1, A, A
  184. SXADDQ TMP2, C, C
  185. #endif
  186. #ifdef RN
  187. negq OFFSET, KK
  188. #endif
  189. #ifdef RT
  190. mulq N, K, TMP1
  191. addq TMP1, TMP1, TMP1
  192. SXADDQ TMP1, B, B
  193. mulq N, LDC, TMP1
  194. addq TMP1, C, C
  195. subq N, OFFSET, KK
  196. #endif
  197. and N, 1, J
  198. ble J, $L30
  199. #ifdef RT
  200. sll K, ZBASE_SHIFT, TMP1
  201. subq B, TMP1, B
  202. subq C, LDC, C1
  203. subq C, LDC, C
  204. #else
  205. mov C, C1
  206. addq C, LDC, C
  207. #endif
  208. #ifdef LN
  209. addq M, OFFSET, KK
  210. #endif
  211. #ifdef LT
  212. mov OFFSET, KK
  213. #endif
  214. #if defined(LN) || defined(RT)
  215. mov A, AORIG
  216. #else
  217. mov A, AO
  218. #endif
  219. sra M, 1, I
  220. ble I, $L50
  221. .align 4
  222. $L41:
  223. #if defined(LT) || defined(RN)
  224. LD a1, 0 * SIZE(AO)
  225. fclr t1
  226. LD a2, 1 * SIZE(AO)
  227. fclr t2
  228. LD a3, 2 * SIZE(AO)
  229. fclr t3
  230. LD a4, 3 * SIZE(AO)
  231. fclr t4
  232. LD b1, 0 * SIZE(B)
  233. fclr c01
  234. LD b2, 1 * SIZE(B)
  235. fclr c05
  236. LD b3, 2 * SIZE(B)
  237. fclr c02
  238. LD b4, 3 * SIZE(B)
  239. fclr c06
  240. lda BO, 2 * SIZE(B)
  241. fclr c03
  242. lda AO, 4 * SIZE(AO)
  243. fclr c07
  244. lda L, -2(KK)
  245. fclr c04
  246. fclr c08
  247. ble KK, $L48
  248. ble L, $L45
  249. #else
  250. #ifdef LN
  251. sll K, ZBASE_SHIFT + 1, TMP1
  252. subq AORIG, TMP1, AORIG
  253. #endif
  254. sll KK, ZBASE_SHIFT + 1, TMP1
  255. addq AORIG, TMP1, AO
  256. sll KK, ZBASE_SHIFT, TMP1
  257. addq B, TMP1, BO
  258. subq K, KK, TMP1
  259. LD a1, 0 * SIZE(AO)
  260. fclr t1
  261. LD a2, 1 * SIZE(AO)
  262. fclr t2
  263. LD a3, 2 * SIZE(AO)
  264. fclr t3
  265. LD a4, 3 * SIZE(AO)
  266. fclr t4
  267. LD b1, 0 * SIZE(BO)
  268. fclr c01
  269. LD b2, 1 * SIZE(BO)
  270. fclr c05
  271. LD b3, 2 * SIZE(BO)
  272. fclr c02
  273. LD b4, 3 * SIZE(BO)
  274. fclr c06
  275. lda BO, 2 * SIZE(BO)
  276. fclr c03
  277. lda AO, 4 * SIZE(AO)
  278. fclr c07
  279. lda L, -2(TMP1)
  280. fclr c04
  281. fclr c08
  282. ble TMP1, $L48
  283. ble L, $L45
  284. #endif
  285. .align 5
  286. $L42:
  287. ADD4 c05, t1, c05
  288. unop
  289. MUL a1, b1, t1
  290. unop
  291. ADD2 c06, t2, c06
  292. lda L, -2(L)
  293. MUL a2, b1, t2
  294. unop
  295. ADD4 c07, t3, c07
  296. unop
  297. MUL a3, b1, t3
  298. unop
  299. ADD2 c08, t4, c08
  300. unop
  301. MUL a4, b1, t4
  302. LD b1, 2 * SIZE(BO)
  303. ADD1 c01, t1, c01
  304. unop
  305. MUL a1, b2, t1
  306. LD a1, 0 * SIZE(AO)
  307. ADD3 c02, t2, c02
  308. lda BO, 4 * SIZE(BO)
  309. MUL a2, b2, t2
  310. LD a2, 1 * SIZE(AO)
  311. ADD1 c03, t3, c03
  312. unop
  313. MUL a3, b2, t3
  314. LD a3, 2 * SIZE(AO)
  315. ADD3 c04, t4, c04
  316. unop
  317. MUL a4, b2, t4
  318. LD a5, 3 * SIZE(AO)
  319. ADD4 c05, t1, c05
  320. unop
  321. MUL a1, b3, t1
  322. LD b2, -1 * SIZE(BO)
  323. ADD2 c06, t2, c06
  324. unop
  325. MUL a2, b3, t2
  326. unop
  327. ADD4 c07, t3, c07
  328. unop
  329. MUL a3, b3, t3
  330. lda AO, 8 * SIZE(AO)
  331. ADD2 c08, t4, c08
  332. unop
  333. MUL a5, b3, t4
  334. LD b3, 0 * SIZE(BO)
  335. ADD1 c01, t1, c01
  336. unop
  337. MUL a1, b4, t1
  338. LD a1, -4 * SIZE(AO)
  339. ADD3 c02, t2, c02
  340. unop
  341. MUL a2, b4, t2
  342. LD a2, -3 * SIZE(AO)
  343. ADD1 c03, t3, c03
  344. LD a4, -1 * SIZE(AO)
  345. MUL a3, b4, t3
  346. LD a3, -2 * SIZE(AO)
  347. ADD3 c04, t4, c04
  348. MUL a5, b4, t4
  349. LD b4, 1 * SIZE(BO)
  350. bgt L, $L42
  351. .align 4
  352. $L45:
  353. ADD4 c05, t1, c05
  354. MUL b1, a1, t1
  355. #if defined(LT) || defined(RN)
  356. blbs KK, $L47
  357. #else
  358. blbs TMP1, $L47
  359. #endif
  360. .align 4
  361. ADD2 c06, t2, c06
  362. MUL a2, b1, t2
  363. ADD4 c07, t3, c07
  364. MUL a3, b1, t3
  365. ADD2 c08, t4, c08
  366. unop
  367. MUL a4, b1, t4
  368. LD b1, 0 * SIZE(BO)
  369. ADD1 c01, t1, c01
  370. unop
  371. MUL a1, b2, t1
  372. LD a1, 0 * SIZE(AO)
  373. ADD3 c02, t2, c02
  374. unop
  375. MUL a2, b2, t2
  376. LD a2, 1 * SIZE(AO)
  377. ADD1 c03, t3, c03
  378. unop
  379. MUL a3, b2, t3
  380. LD a3, 2 * SIZE(AO)
  381. ADD3 c04, t4, c04
  382. MUL a4, b2, t4
  383. LD a4, 3 * SIZE(AO)
  384. lda AO, 4 * SIZE(AO)
  385. ADD4 c05, t1, c05
  386. LD b2, 1 * SIZE(BO)
  387. MUL a1, b1, t1
  388. lda BO, 2 * SIZE(BO)
  389. .align 4
  390. $L47:
  391. ADD2 c06, t2, c06
  392. MUL a2, b1, t2
  393. ADD4 c07, t3, c07
  394. MUL a3, b1, t3
  395. ADD2 c08, t4, c08
  396. MUL a4, b1, t4
  397. ADD1 c01, t1, c01
  398. MUL a1, b2, t1
  399. ADD3 c02, t2, c02
  400. MUL a2, b2, t2
  401. ADD1 c03, t3, c03
  402. MUL a3, b2, t3
  403. ADD3 c04, t4, c04
  404. lda AO, 4 * SIZE(AO)
  405. MUL a4, b2, t4
  406. lda BO, 2 * SIZE(BO)
  407. ADD4 c05, t1, c05
  408. ADD2 c06, t2, c06
  409. ADD4 c07, t3, c07
  410. ADD2 c08, t4, c08
  411. ADD c01, c06, c01
  412. ADD c02, c05, c02
  413. ADD c03, c08, c03
  414. ADD c04, c07, c04
  415. $L48:
  416. #if defined(LN) || defined(RT)
  417. #ifdef LN
  418. subq KK, 2, TMP1
  419. #else
  420. subq KK, 1, TMP1
  421. #endif
  422. sll TMP1, ZBASE_SHIFT + 1, TMP2
  423. addq AORIG, TMP2, AO
  424. sll TMP1, ZBASE_SHIFT, TMP2
  425. addq B, TMP2, BO
  426. #else
  427. lda AO, -4 * SIZE(AO)
  428. lda BO, -2 * SIZE(BO)
  429. #endif
  430. #if defined(LN) || defined(LT)
  431. LD a1, 0 * SIZE(BO)
  432. LD a2, 1 * SIZE(BO)
  433. LD a3, 2 * SIZE(BO)
  434. LD a4, 3 * SIZE(BO)
  435. SUB a1, c01, c01
  436. SUB a2, c02, c02
  437. SUB a3, c03, c03
  438. SUB a4, c04, c04
  439. #else
  440. LD a1, 0 * SIZE(AO)
  441. LD a2, 1 * SIZE(AO)
  442. LD a3, 2 * SIZE(AO)
  443. LD a4, 3 * SIZE(AO)
  444. SUB a1, c01, c01
  445. SUB a2, c02, c02
  446. SUB a3, c03, c03
  447. SUB a4, c04, c04
  448. #endif
  449. #ifdef LN
  450. LD a1, 6 * SIZE(AO)
  451. LD a2, 7 * SIZE(AO)
  452. LD a3, 4 * SIZE(AO)
  453. LD a4, 5 * SIZE(AO)
  454. MUL a2, c04, t1
  455. MUL a2, c03, t2
  456. MUL a1, c03, c03
  457. MUL a1, c04, c04
  458. ADD5 c03, t1, c03
  459. ADD6 c04, t2, c04
  460. MUL a3, c03, t1
  461. MUL a3, c04, t2
  462. SUB c01, t1, c01
  463. SUB c02, t2, c02
  464. MUL a4, c04, t1
  465. MUL a4, c03, t2
  466. ADD6 c01, t1, c01
  467. ADD5 c02, t2, c02
  468. LD a1, 0 * SIZE(AO)
  469. LD a2, 1 * SIZE(AO)
  470. MUL a2, c02, t1
  471. MUL a2, c01, t2
  472. MUL a1, c01, c01
  473. MUL a1, c02, c02
  474. ADD5 c01, t1, c01
  475. ADD6 c02, t2, c02
  476. #endif
  477. #ifdef LT
  478. LD a1, 0 * SIZE(AO)
  479. LD a2, 1 * SIZE(AO)
  480. LD a3, 2 * SIZE(AO)
  481. LD a4, 3 * SIZE(AO)
  482. MUL a2, c02, t1
  483. MUL a2, c01, t2
  484. MUL a1, c01, c01
  485. MUL a1, c02, c02
  486. ADD5 c01, t1, c01
  487. ADD6 c02, t2, c02
  488. MUL a3, c01, t1
  489. MUL a3, c02, t2
  490. SUB c03, t1, c03
  491. SUB c04, t2, c04
  492. MUL a4, c02, t1
  493. MUL a4, c01, t2
  494. ADD6 c03, t1, c03
  495. ADD5 c04, t2, c04
  496. LD a1, 6 * SIZE(AO)
  497. LD a2, 7 * SIZE(AO)
  498. MUL a2, c04, t1
  499. MUL a2, c03, t2
  500. MUL a1, c03, c03
  501. MUL a1, c04, c04
  502. ADD5 c03, t1, c03
  503. ADD6 c04, t2, c04
  504. #endif
  505. #if defined(RN) || defined(RT)
  506. LD a1, 0 * SIZE(BO)
  507. LD a2, 1 * SIZE(BO)
  508. MUL a2, c02, t1
  509. MUL a2, c01, t2
  510. MUL a2, c04, t3
  511. MUL a2, c03, t4
  512. MUL a1, c01, c01
  513. MUL a1, c02, c02
  514. MUL a1, c03, c03
  515. MUL a1, c04, c04
  516. ADD5 c01, t1, c01
  517. ADD6 c02, t2, c02
  518. ADD5 c03, t3, c03
  519. ADD6 c04, t4, c04
  520. #endif
  521. #if defined(LN) || defined(LT)
  522. ST c01, 0 * SIZE(BO)
  523. ST c02, 1 * SIZE(BO)
  524. ST c03, 2 * SIZE(BO)
  525. ST c04, 3 * SIZE(BO)
  526. #else
  527. ST c01, 0 * SIZE(AO)
  528. ST c02, 1 * SIZE(AO)
  529. ST c03, 2 * SIZE(AO)
  530. ST c04, 3 * SIZE(AO)
  531. #endif
  532. #ifdef LN
  533. lda C1, -4 * SIZE(C1)
  534. #endif
  535. ST c01, 0 * SIZE(C1)
  536. ST c02, 1 * SIZE(C1)
  537. ST c03, 2 * SIZE(C1)
  538. ST c04, 3 * SIZE(C1)
  539. #ifndef LN
  540. lda C1, 4 * SIZE(C1)
  541. #endif
  542. #ifdef RT
  543. sll K, ZBASE_SHIFT + 1, TMP1
  544. addq AORIG, TMP1, AORIG
  545. #endif
  546. #if defined(LT) || defined(RN)
  547. subq K, KK, TMP1
  548. sll TMP1, ZBASE_SHIFT + 1, TMP2
  549. addq AO, TMP2, AO
  550. sll TMP1, ZBASE_SHIFT, TMP2
  551. addq BO, TMP2, BO
  552. #endif
  553. #ifdef LT
  554. addq KK, 2, KK
  555. #endif
  556. #ifdef LN
  557. subq KK, 2, KK
  558. #endif
  559. lda I, -1(I)
  560. bgt I, $L41
  561. .align 4
  562. $L50:
  563. and M, 1, I
  564. ble I, $L59
  565. #if defined(LT) || defined(RN)
  566. LD a1, 0 * SIZE(AO)
  567. fclr t1
  568. LD a2, 1 * SIZE(AO)
  569. fclr t2
  570. LD a3, 2 * SIZE(AO)
  571. fclr t3
  572. LD a4, 3 * SIZE(AO)
  573. fclr t4
  574. LD b1, 0 * SIZE(B)
  575. fclr c01
  576. LD b2, 1 * SIZE(B)
  577. fclr c05
  578. LD b3, 2 * SIZE(B)
  579. fclr c02
  580. LD b4, 3 * SIZE(B)
  581. fclr c06
  582. lda AO, 2 * SIZE(AO)
  583. lda BO, 2 * SIZE(B)
  584. lda L, -2(KK)
  585. ble KK, $L58
  586. ble L, $L55
  587. #else
  588. #ifdef LN
  589. sll K, ZBASE_SHIFT, TMP1
  590. subq AORIG, TMP1, AORIG
  591. #endif
  592. sll KK, ZBASE_SHIFT, TMP1
  593. addq AORIG, TMP1, AO
  594. sll KK, ZBASE_SHIFT, TMP1
  595. addq B, TMP1, BO
  596. subq K, KK, TMP1
  597. LD a1, 0 * SIZE(AO)
  598. fclr t1
  599. LD a2, 1 * SIZE(AO)
  600. fclr t2
  601. LD a3, 2 * SIZE(AO)
  602. fclr t3
  603. LD a4, 3 * SIZE(AO)
  604. fclr t4
  605. LD b1, 0 * SIZE(BO)
  606. fclr c01
  607. LD b2, 1 * SIZE(BO)
  608. fclr c05
  609. LD b3, 2 * SIZE(BO)
  610. fclr c02
  611. LD b4, 3 * SIZE(BO)
  612. fclr c06
  613. lda AO, 2 * SIZE(AO)
  614. lda BO, 2 * SIZE(BO)
  615. lda L, -2(TMP1)
  616. ble TMP1, $L58
  617. ble L, $L55
  618. #endif
  619. .align 5
  620. $L52:
  621. ADD1 c01, t1, c01
  622. unop
  623. MUL a1, b1, t1
  624. unop
  625. ADD3 c02, t2, c02
  626. lda AO, 4 * SIZE(AO)
  627. MUL a2, b1, t2
  628. LD b1, 2 * SIZE(BO)
  629. ADD4 c05, t3, c05
  630. lda L, -2(L)
  631. MUL a1, b2, t3
  632. LD a1, -2 * SIZE(AO)
  633. ADD2 c06, t4, c06
  634. unop
  635. MUL a2, b2, t4
  636. LD a2, -1 * SIZE(AO)
  637. ADD1 c01, t1, c01
  638. LD b2, 3 * SIZE(BO)
  639. MUL a3, b3, t1
  640. lda BO, 4 * SIZE(BO)
  641. ADD3 c02, t2, c02
  642. unop
  643. MUL a4, b3, t2
  644. LD b3, 0 * SIZE(BO)
  645. ADD4 c05, t3, c05
  646. unop
  647. MUL a3, b4, t3
  648. LD a3, 0 * SIZE(AO)
  649. ADD2 c06, t4, c06
  650. MUL a4, b4, t4
  651. LD b4, 1 * SIZE(BO)
  652. unop
  653. LD a4, 1 * SIZE(AO)
  654. unop
  655. unop
  656. bgt L, $L52
  657. .align 4
  658. $L55:
  659. ADD1 c01, t1, c01
  660. MUL a1, b1, t1
  661. #if defined(LT) || defined(RN)
  662. blbs KK, $L57
  663. #else
  664. blbs TMP1, $L57
  665. #endif
  666. .align 4
  667. ADD3 c02, t2, c02
  668. unop
  669. MUL a2, b1, t2
  670. LD b1, 0 * SIZE(BO)
  671. ADD4 c05, t3, c05
  672. lda BO, 2 * SIZE(BO)
  673. MUL a1, b2, t3
  674. LD a1, 0 * SIZE(AO)
  675. ADD2 c06, t4, c06
  676. unop
  677. MUL a2, b2, t4
  678. LD a2, 1 * SIZE(AO)
  679. ADD1 c01, t1, c01
  680. LD b2, -1 * SIZE(BO)
  681. MUL a1, b1, t1
  682. lda AO, 2 * SIZE(AO)
  683. .align 4
  684. $L57:
  685. ADD3 c02, t2, c02
  686. MUL a2, b1, t2
  687. ADD4 c05, t3, c05
  688. MUL a1, b2, t3
  689. ADD2 c06, t4, c06
  690. lda AO, 2 * SIZE(AO)
  691. MUL a2, b2, t4
  692. lda BO, 2 * SIZE(BO)
  693. ADD1 c01, t1, c01
  694. ADD3 c02, t2, c02
  695. ADD4 c05, t3, c05
  696. ADD2 c06, t4, c06
  697. ADD c01, c06, c01
  698. ADD c02, c05, c02
  699. $L58:
  700. #if defined(LN) || defined(RT)
  701. subq KK, 1, TMP1
  702. sll TMP1, ZBASE_SHIFT, TMP2
  703. addq AORIG, TMP2, AO
  704. sll TMP1, ZBASE_SHIFT, TMP2
  705. addq B, TMP2, BO
  706. #else
  707. lda AO, -2 * SIZE(AO)
  708. lda BO, -2 * SIZE(BO)
  709. #endif
  710. #if defined(LN) || defined(LT)
  711. LD a1, 0 * SIZE(BO)
  712. LD a2, 1 * SIZE(BO)
  713. SUB a1, c01, c01
  714. SUB a2, c02, c02
  715. #else
  716. LD a1, 0 * SIZE(AO)
  717. LD a2, 1 * SIZE(AO)
  718. SUB a1, c01, c01
  719. SUB a2, c02, c02
  720. #endif
  721. #if defined(LN) || defined(LT)
  722. LD a1, 0 * SIZE(AO)
  723. LD a2, 1 * SIZE(AO)
  724. MUL a2, c02, t1
  725. MUL a2, c01, t2
  726. MUL a1, c01, c01
  727. MUL a1, c02, c02
  728. ADD5 c01, t1, c01
  729. ADD6 c02, t2, c02
  730. #endif
  731. #if defined(RN) || defined(RT)
  732. LD a1, 0 * SIZE(BO)
  733. LD a2, 1 * SIZE(BO)
  734. MUL a2, c02, t1
  735. MUL a2, c01, t2
  736. MUL a1, c01, c01
  737. MUL a1, c02, c02
  738. ADD5 c01, t1, c01
  739. ADD6 c02, t2, c02
  740. #endif
  741. #if defined(LN) || defined(LT)
  742. ST c01, 0 * SIZE(BO)
  743. ST c02, 1 * SIZE(BO)
  744. #else
  745. ST c01, 0 * SIZE(AO)
  746. ST c02, 1 * SIZE(AO)
  747. #endif
  748. #ifdef LN
  749. lda C1, -2 * SIZE(C1)
  750. #endif
  751. ST c01, 0 * SIZE(C1)
  752. ST c02, 1 * SIZE(C1)
  753. #ifndef LN
  754. lda C1, 2 * SIZE(C1)
  755. #endif
  756. #ifdef RT
  757. sll K, ZBASE_SHIFT, TMP1
  758. addq AORIG, TMP1, AORIG
  759. #endif
  760. #if defined(LT) || defined(RN)
  761. subq K, KK, TMP1
  762. sll TMP1, ZBASE_SHIFT, TMP2
  763. addq AO, TMP2, AO
  764. sll TMP1, ZBASE_SHIFT, TMP2
  765. addq BO, TMP2, BO
  766. #endif
  767. #ifdef LT
  768. addq KK, 1, KK
  769. #endif
  770. #ifdef LN
  771. subq KK, 1, KK
  772. #endif
  773. .align 4
  774. $L59:
  775. #ifdef LN
  776. sll K, ZBASE_SHIFT, TMP1
  777. addq B, TMP1, B
  778. #endif
  779. #if defined(LT) || defined(RN)
  780. mov BO, B
  781. #endif
  782. #ifdef RN
  783. addq KK, 1, KK
  784. #endif
  785. #ifdef RT
  786. subq KK, 1, KK
  787. #endif
  788. .align 4
  789. $L30:
  790. sra N, 1, J
  791. ble J, $L999
  792. .align 4
  793. $L01:
  794. #ifdef RT
  795. sll K, ZBASE_SHIFT + 1, TMP1
  796. subq B, TMP1, B
  797. subq C, LDC, C2
  798. subq C2, LDC, C1
  799. subq C2, LDC, C
  800. #else
  801. mov C, C1
  802. addq C, LDC, C2
  803. addq C2, LDC, C
  804. #endif
  805. #ifdef LN
  806. addq M, OFFSET, KK
  807. #endif
  808. #ifdef LT
  809. mov OFFSET, KK
  810. #endif
  811. #if defined(LN) || defined(RT)
  812. mov A, AORIG
  813. #else
  814. mov A, AO
  815. #endif
  816. sra M, 1, I
  817. fclr t1
  818. fclr t2
  819. fclr t3
  820. fclr t4
  821. fclr c01
  822. fclr c05
  823. ble I, $L20
  824. .align 4
  825. $L11:
  826. #if defined(LT) || defined(RN)
  827. LD a1, 0 * SIZE(AO)
  828. fclr c09
  829. LD a2, 1 * SIZE(AO)
  830. fclr c13
  831. LD a3, 2 * SIZE(AO)
  832. fclr c02
  833. LD a4, 3 * SIZE(AO)
  834. fclr c06
  835. LD b1, 0 * SIZE(B)
  836. fclr c10
  837. LD b2, 1 * SIZE(B)
  838. fclr c14
  839. LD b3, 2 * SIZE(B)
  840. fclr c03
  841. LD b4, 3 * SIZE(B)
  842. fclr c07
  843. lda BO, 4 * SIZE(B)
  844. fclr c11
  845. lda AO, 4 * SIZE(AO)
  846. fclr c15
  847. lds $f31, 4 * SIZE(C1)
  848. fclr c04
  849. lda L, -2(KK)
  850. fclr c08
  851. lds $f31, 4 * SIZE(C2)
  852. fclr c12
  853. fclr c16
  854. ble KK, $L18
  855. ble L, $L15
  856. #else
  857. #ifdef LN
  858. sll K, ZBASE_SHIFT + 1, TMP1
  859. subq AORIG, TMP1, AORIG
  860. #endif
  861. sll KK, ZBASE_SHIFT + 1, TMP1
  862. addq AORIG, TMP1, AO
  863. addq B, TMP1, BO
  864. subq K, KK, TMP1
  865. LD a1, 0 * SIZE(AO)
  866. fclr c09
  867. LD a2, 1 * SIZE(AO)
  868. fclr c13
  869. LD a3, 2 * SIZE(AO)
  870. fclr c02
  871. LD a4, 3 * SIZE(AO)
  872. fclr c06
  873. LD b1, 0 * SIZE(BO)
  874. fclr c10
  875. LD b2, 1 * SIZE(BO)
  876. fclr c14
  877. LD b3, 2 * SIZE(BO)
  878. fclr c03
  879. LD b4, 3 * SIZE(BO)
  880. fclr c07
  881. lda BO, 4 * SIZE(BO)
  882. fclr c11
  883. lda AO, 4 * SIZE(AO)
  884. fclr c15
  885. lds $f31, 4 * SIZE(C1)
  886. fclr c04
  887. lda L, -2(TMP1)
  888. fclr c08
  889. lds $f31, 4 * SIZE(C2)
  890. fclr c12
  891. fclr c16
  892. ble TMP1, $L18
  893. ble L, $L15
  894. #endif
  895. .align 5
  896. $L12:
  897. /* 1 */
  898. ADD1 c11, t1, c11
  899. #ifndef EV4
  900. ldq $31, PREFETCHSIZE * SIZE(AO)
  901. #else
  902. unop
  903. #endif
  904. MUL b1, a1, t1
  905. #ifndef EV4
  906. ldl $31, PREFETCHSIZE * SIZE(BO)
  907. #else
  908. unop
  909. #endif
  910. ADD3 c12, t2, c12
  911. unop
  912. MUL b1, a2, t2
  913. unop
  914. ADD2 c16, t3, c16
  915. unop
  916. MUL b2, a2, t3
  917. LD a5, 0 * SIZE(AO)
  918. ADD4 c15, t4, c15
  919. unop
  920. MUL b2, a1, t4
  921. LD b5, 0 * SIZE(BO)
  922. /* 2 */
  923. ADD1 c01, t1, c01
  924. UNOP
  925. MUL b1, a3, t1
  926. UNOP
  927. ADD3 c02, t2, c02
  928. UNOP
  929. MUL b1, a4, t2
  930. UNOP
  931. ADD2 c06, t3, c06
  932. unop
  933. MUL b2, a4, t3
  934. unop
  935. ADD4 c05, t4, c05
  936. unop
  937. MUL b4, a1, t4
  938. unop
  939. /* 3 */
  940. ADD1 c03, t1, c03
  941. unop
  942. MUL b3, a1, t1
  943. unop
  944. ADD3 c04, t2, c04
  945. unop
  946. MUL b3, a2, t2
  947. unop
  948. ADD2 c08, t3, c08
  949. unop
  950. MUL b4, a2, t3
  951. LD a2, 1 * SIZE(AO)
  952. ADD4 c13, t4, c13
  953. unop
  954. MUL b2, a3, t4
  955. LD b2, 1 * SIZE(BO)
  956. /* 4 */
  957. ADD1 c09, t1, c09
  958. unop
  959. MUL b3, a3, t1
  960. LD a6, 2 * SIZE(AO)
  961. ADD3 c10, t2, c10
  962. unop
  963. MUL b3, a4, t2
  964. LD b3, 2 * SIZE(BO)
  965. ADD2 c14, t3, c14
  966. unop
  967. MUL b4, a4, t3
  968. LD a4, 3 * SIZE(AO)
  969. ADD4 c07, t4, c07
  970. unop
  971. MUL b4, a3, t4
  972. LD b4, 3 * SIZE(BO)
  973. /* 5 */
  974. ADD1 c11, t1, c11
  975. unop
  976. MUL b5, a5, t1
  977. LD a1, 4 * SIZE(AO)
  978. ADD3 c12, t2, c12
  979. lda L, -2(L)
  980. MUL b5, a2, t2
  981. LD b1, 4 * SIZE(BO)
  982. ADD2 c16, t3, c16
  983. unop
  984. MUL b2, a2, t3
  985. unop
  986. ADD4 c15, t4, c15
  987. unop
  988. MUL b2, a5, t4
  989. unop
  990. /* 6 */
  991. ADD1 c01, t1, c01
  992. unop
  993. MUL b5, a6, t1
  994. unop
  995. ADD3 c02, t2, c02
  996. unop
  997. MUL b5, a4, t2
  998. unop
  999. ADD2 c06, t3, c06
  1000. unop
  1001. MUL b2, a4, t3
  1002. unop
  1003. ADD4 c05, t4, c05
  1004. unop
  1005. MUL b4, a5, t4
  1006. unop
  1007. /* 7 */
  1008. ADD1 c03, t1, c03
  1009. lda AO, 8 * SIZE(AO)
  1010. MUL b3, a5, t1
  1011. unop
  1012. ADD3 c04, t2, c04
  1013. lda BO, 8 * SIZE(BO)
  1014. MUL b3, a2, t2
  1015. unop
  1016. ADD2 c08, t3, c08
  1017. unop
  1018. MUL b4, a2, t3
  1019. LD a2, -3 * SIZE(AO)
  1020. ADD4 c13, t4, c13
  1021. unop
  1022. MUL b2, a6, t4
  1023. LD b2, -3 * SIZE(BO)
  1024. /* 8 */
  1025. ADD1 c09, t1, c09
  1026. unop
  1027. MUL b3, a6, t1
  1028. LD a3, -2 * SIZE(AO)
  1029. ADD3 c10, t2, c10
  1030. unop
  1031. MUL b3, a4, t2
  1032. LD b3, -2 * SIZE(BO)
  1033. ADD2 c14, t3, c14
  1034. unop
  1035. MUL b4, a4, t3
  1036. LD a4, -1 * SIZE(AO)
  1037. ADD4 c07, t4, c07
  1038. MUL b4, a6, t4
  1039. LD b4, -1 * SIZE(BO)
  1040. bgt L, $L12
  1041. .align 4
  1042. $L15:
  1043. ADD1 c11, t1, c11
  1044. unop
  1045. MUL b1, a1, t1
  1046. #if defined(LT) || defined(RN)
  1047. blbs KK, $L17
  1048. #else
  1049. blbs TMP1, $L17
  1050. #endif
  1051. .align 4
  1052. ADD3 c12, t2, c12
  1053. MUL b1, a2, t2
  1054. ADD2 c16, t3, c16
  1055. MUL b2, a2, t3
  1056. ADD4 c15, t4, c15
  1057. MUL b2, a1, t4
  1058. ADD1 c01, t1, c01
  1059. MUL b1, a3, t1
  1060. ADD3 c02, t2, c02
  1061. unop
  1062. MUL b1, a4, t2
  1063. LD b1, 0 * SIZE(BO)
  1064. ADD2 c06, t3, c06
  1065. MUL b2, a4, t3
  1066. ADD4 c05, t4, c05
  1067. MUL b4, a1, t4
  1068. ADD1 c03, t1, c03
  1069. unop
  1070. MUL b3, a1, t1
  1071. LD a1, 0 * SIZE(AO)
  1072. ADD3 c04, t2, c04
  1073. unop
  1074. MUL b3, a2, t2
  1075. unop
  1076. ADD2 c08, t3, c08
  1077. unop
  1078. MUL b4, a2, t3
  1079. LD a2, 1 * SIZE(AO)
  1080. ADD4 c13, t4, c13
  1081. unop
  1082. MUL b2, a3, t4
  1083. LD b2, 1 * SIZE(BO)
  1084. ADD1 c09, t1, c09
  1085. unop
  1086. MUL b3, a3, t1
  1087. lda AO, 4 * SIZE(AO)
  1088. ADD3 c10, t2, c10
  1089. unop
  1090. MUL b3, a4, t2
  1091. LD b3, 2 * SIZE(BO)
  1092. ADD2 c14, t3, c14
  1093. unop
  1094. MUL b4, a4, t3
  1095. LD a4, -1 * SIZE(AO)
  1096. ADD4 c07, t4, c07
  1097. unop
  1098. MUL b4, a3, t4
  1099. LD a3, -2 * SIZE(AO)
  1100. ADD1 c11, t1, c11
  1101. LD b4, 3 * SIZE(BO)
  1102. MUL b1, a1, t1
  1103. lda BO, 4 * SIZE(BO)
  1104. .align 4
  1105. $L17:
  1106. ADD3 c12, t2, c12
  1107. MUL b1, a2, t2
  1108. ADD2 c16, t3, c16
  1109. MUL b2, a2, t3
  1110. ADD4 c15, t4, c15
  1111. MUL b2, a1, t4
  1112. ADD1 c01, t1, c01
  1113. MUL b1, a3, t1
  1114. ADD3 c02, t2, c02
  1115. MUL b1, a4, t2
  1116. ADD2 c06, t3, c06
  1117. MUL b2, a4, t3
  1118. ADD4 c05, t4, c05
  1119. MUL b4, a1, t4
  1120. ADD1 c03, t1, c03
  1121. MUL b3, a1, t1
  1122. ADD3 c04, t2, c04
  1123. MUL b3, a2, t2
  1124. ADD2 c08, t3, c08
  1125. MUL b4, a2, t3
  1126. ADD4 c13, t4, c13
  1127. MUL b2, a3, t4
  1128. ADD1 c09, t1, c09
  1129. MUL b3, a3, t1
  1130. ADD3 c10, t2, c10
  1131. MUL b3, a4, t2
  1132. ADD2 c14, t3, c14
  1133. MUL b4, a4, t3
  1134. ADD4 c07, t4, c07
  1135. lda AO, 4 * SIZE(AO)
  1136. MUL b4, a3, t4
  1137. lda BO, 4 * SIZE(BO)
  1138. ADD1 c11, t1, c11
  1139. ADD3 c12, t2, c12
  1140. ADD2 c16, t3, c16
  1141. ADD4 c15, t4, c15
  1142. ADD c01, c06, c01
  1143. ADD c02, c05, c02
  1144. ADD c03, c08, c03
  1145. ADD c04, c07, c04
  1146. ADD c09, c14, c09
  1147. ADD c10, c13, c10
  1148. ADD c11, c16, c11
  1149. ADD c12, c15, c12
  1150. .align 4
  1151. $L18:
  1152. #if defined(LN) || defined(RT)
  1153. #ifdef LN
  1154. subq KK, 2, TMP1
  1155. #else
  1156. subq KK, 2, TMP1
  1157. #endif
  1158. sll TMP1, ZBASE_SHIFT + 1, TMP2
  1159. addq AORIG, TMP2, AO
  1160. sll TMP1, ZBASE_SHIFT + 1, TMP2
  1161. addq B, TMP2, BO
  1162. #else
  1163. lda AO, -4 * SIZE(AO)
  1164. lda BO, -4 * SIZE(BO)
  1165. #endif
  1166. #if defined(LN) || defined(LT)
  1167. LD a1, 0 * SIZE(BO)
  1168. LD a2, 1 * SIZE(BO)
  1169. LD a3, 2 * SIZE(BO)
  1170. LD a4, 3 * SIZE(BO)
  1171. LD b1, 4 * SIZE(BO)
  1172. LD b2, 5 * SIZE(BO)
  1173. LD b3, 6 * SIZE(BO)
  1174. LD b4, 7 * SIZE(BO)
  1175. SUB a1, c01, c01
  1176. SUB a2, c02, c02
  1177. SUB a3, c09, c09
  1178. SUB a4, c10, c10
  1179. SUB b1, c03, c03
  1180. SUB b2, c04, c04
  1181. SUB b3, c11, c11
  1182. SUB b4, c12, c12
  1183. #else
  1184. LD a1, 0 * SIZE(AO)
  1185. LD a2, 1 * SIZE(AO)
  1186. LD a3, 2 * SIZE(AO)
  1187. LD a4, 3 * SIZE(AO)
  1188. LD b1, 4 * SIZE(AO)
  1189. LD b2, 5 * SIZE(AO)
  1190. LD b3, 6 * SIZE(AO)
  1191. LD b4, 7 * SIZE(AO)
  1192. SUB a1, c01, c01
  1193. SUB a2, c02, c02
  1194. SUB a3, c03, c03
  1195. SUB a4, c04, c04
  1196. SUB b1, c09, c09
  1197. SUB b2, c10, c10
  1198. SUB b3, c11, c11
  1199. SUB b4, c12, c12
  1200. #endif
  1201. #ifdef LN
  1202. LD a1, 6 * SIZE(AO)
  1203. LD a2, 7 * SIZE(AO)
  1204. LD a3, 4 * SIZE(AO)
  1205. LD a4, 5 * SIZE(AO)
  1206. MUL a2, c04, t1
  1207. MUL a2, c03, t2
  1208. MUL a2, c12, t3
  1209. MUL a2, c11, t4
  1210. MUL a1, c03, c03
  1211. MUL a1, c04, c04
  1212. MUL a1, c11, c11
  1213. MUL a1, c12, c12
  1214. ADD5 c03, t1, c03
  1215. ADD6 c04, t2, c04
  1216. ADD5 c11, t3, c11
  1217. ADD6 c12, t4, c12
  1218. MUL a3, c03, t1
  1219. MUL a3, c04, t2
  1220. MUL a3, c11, t3
  1221. MUL a3, c12, t4
  1222. SUB c01, t1, c01
  1223. SUB c02, t2, c02
  1224. SUB c09, t3, c09
  1225. SUB c10, t4, c10
  1226. MUL a4, c04, t1
  1227. MUL a4, c03, t2
  1228. MUL a4, c12, t3
  1229. MUL a4, c11, t4
  1230. ADD6 c01, t1, c01
  1231. ADD5 c02, t2, c02
  1232. ADD6 c09, t3, c09
  1233. ADD5 c10, t4, c10
  1234. LD a1, 0 * SIZE(AO)
  1235. LD a2, 1 * SIZE(AO)
  1236. MUL a2, c02, t1
  1237. MUL a2, c01, t2
  1238. MUL a2, c10, t3
  1239. MUL a2, c09, t4
  1240. MUL a1, c01, c01
  1241. MUL a1, c02, c02
  1242. MUL a1, c09, c09
  1243. MUL a1, c10, c10
  1244. ADD5 c01, t1, c01
  1245. ADD6 c02, t2, c02
  1246. ADD5 c09, t3, c09
  1247. ADD6 c10, t4, c10
  1248. #endif
  1249. #ifdef LT
  1250. LD a1, 0 * SIZE(AO)
  1251. LD a2, 1 * SIZE(AO)
  1252. LD a3, 2 * SIZE(AO)
  1253. LD a4, 3 * SIZE(AO)
  1254. MUL a2, c02, t1
  1255. MUL a2, c01, t2
  1256. MUL a2, c10, t3
  1257. MUL a2, c09, t4
  1258. MUL a1, c01, c01
  1259. MUL a1, c02, c02
  1260. MUL a1, c09, c09
  1261. MUL a1, c10, c10
  1262. ADD5 c01, t1, c01
  1263. ADD6 c02, t2, c02
  1264. ADD5 c09, t3, c09
  1265. ADD6 c10, t4, c10
  1266. MUL a3, c01, t1
  1267. MUL a3, c02, t2
  1268. MUL a3, c09, t3
  1269. MUL a3, c10, t4
  1270. SUB c03, t1, c03
  1271. SUB c04, t2, c04
  1272. SUB c11, t3, c11
  1273. SUB c12, t4, c12
  1274. MUL a4, c02, t1
  1275. MUL a4, c01, t2
  1276. MUL a4, c10, t3
  1277. MUL a4, c09, t4
  1278. ADD6 c03, t1, c03
  1279. ADD5 c04, t2, c04
  1280. ADD6 c11, t3, c11
  1281. ADD5 c12, t4, c12
  1282. LD a1, 6 * SIZE(AO)
  1283. LD a2, 7 * SIZE(AO)
  1284. MUL a2, c04, t1
  1285. MUL a2, c03, t2
  1286. MUL a2, c12, t3
  1287. MUL a2, c11, t4
  1288. MUL a1, c03, c03
  1289. MUL a1, c04, c04
  1290. MUL a1, c11, c11
  1291. MUL a1, c12, c12
  1292. ADD5 c03, t1, c03
  1293. ADD6 c04, t2, c04
  1294. ADD5 c11, t3, c11
  1295. ADD6 c12, t4, c12
  1296. #endif
  1297. #ifdef RN
  1298. LD a1, 0 * SIZE(BO)
  1299. LD a2, 1 * SIZE(BO)
  1300. LD a3, 2 * SIZE(BO)
  1301. LD a4, 3 * SIZE(BO)
  1302. MUL a2, c02, t1
  1303. MUL a2, c01, t2
  1304. MUL a2, c04, t3
  1305. MUL a2, c03, t4
  1306. MUL a1, c01, c01
  1307. MUL a1, c02, c02
  1308. MUL a1, c03, c03
  1309. MUL a1, c04, c04
  1310. ADD5 c01, t1, c01
  1311. ADD6 c02, t2, c02
  1312. ADD5 c03, t3, c03
  1313. ADD6 c04, t4, c04
  1314. MUL a3, c01, t1
  1315. MUL a3, c02, t2
  1316. MUL a3, c03, t3
  1317. MUL a3, c04, t4
  1318. SUB c09, t1, c09
  1319. SUB c10, t2, c10
  1320. SUB c11, t3, c11
  1321. SUB c12, t4, c12
  1322. MUL a4, c02, t1
  1323. MUL a4, c01, t2
  1324. MUL a4, c04, t3
  1325. MUL a4, c03, t4
  1326. ADD6 c09, t1, c09
  1327. ADD5 c10, t2, c10
  1328. ADD6 c11, t3, c11
  1329. ADD5 c12, t4, c12
  1330. LD a1, 6 * SIZE(BO)
  1331. LD a2, 7 * SIZE(BO)
  1332. MUL a2, c10, t1
  1333. MUL a2, c09, t2
  1334. MUL a2, c12, t3
  1335. MUL a2, c11, t4
  1336. MUL a1, c09, c09
  1337. MUL a1, c10, c10
  1338. MUL a1, c11, c11
  1339. MUL a1, c12, c12
  1340. ADD5 c09, t1, c09
  1341. ADD6 c10, t2, c10
  1342. ADD5 c11, t3, c11
  1343. ADD6 c12, t4, c12
  1344. #endif
  1345. #ifdef RT
  1346. LD a1, 6 * SIZE(BO)
  1347. LD a2, 7 * SIZE(BO)
  1348. LD a3, 4 * SIZE(BO)
  1349. LD a4, 5 * SIZE(BO)
  1350. MUL a2, c10, t1
  1351. MUL a2, c09, t2
  1352. MUL a2, c12, t3
  1353. MUL a2, c11, t4
  1354. MUL a1, c09, c09
  1355. MUL a1, c10, c10
  1356. MUL a1, c11, c11
  1357. MUL a1, c12, c12
  1358. ADD5 c09, t1, c09
  1359. ADD6 c10, t2, c10
  1360. ADD5 c11, t3, c11
  1361. ADD6 c12, t4, c12
  1362. MUL a3, c09, t1
  1363. MUL a3, c10, t2
  1364. MUL a3, c11, t3
  1365. MUL a3, c12, t4
  1366. SUB c01, t1, c01
  1367. SUB c02, t2, c02
  1368. SUB c03, t3, c03
  1369. SUB c04, t4, c04
  1370. MUL a4, c10, t1
  1371. MUL a4, c09, t2
  1372. MUL a4, c12, t3
  1373. MUL a4, c11, t4
  1374. ADD6 c01, t1, c01
  1375. ADD5 c02, t2, c02
  1376. ADD6 c03, t3, c03
  1377. ADD5 c04, t4, c04
  1378. LD a1, 0 * SIZE(BO)
  1379. LD a2, 1 * SIZE(BO)
  1380. MUL a2, c02, t1
  1381. MUL a2, c01, t2
  1382. MUL a2, c04, t3
  1383. MUL a2, c03, t4
  1384. MUL a1, c01, c01
  1385. MUL a1, c02, c02
  1386. MUL a1, c03, c03
  1387. MUL a1, c04, c04
  1388. ADD5 c01, t1, c01
  1389. ADD6 c02, t2, c02
  1390. ADD5 c03, t3, c03
  1391. ADD6 c04, t4, c04
  1392. #endif
  1393. #if defined(LN) || defined(LT)
  1394. ST c01, 0 * SIZE(BO)
  1395. ST c02, 1 * SIZE(BO)
  1396. ST c09, 2 * SIZE(BO)
  1397. ST c10, 3 * SIZE(BO)
  1398. ST c03, 4 * SIZE(BO)
  1399. ST c04, 5 * SIZE(BO)
  1400. ST c11, 6 * SIZE(BO)
  1401. ST c12, 7 * SIZE(BO)
  1402. #else
  1403. ST c01, 0 * SIZE(AO)
  1404. ST c02, 1 * SIZE(AO)
  1405. ST c03, 2 * SIZE(AO)
  1406. ST c04, 3 * SIZE(AO)
  1407. ST c09, 4 * SIZE(AO)
  1408. ST c10, 5 * SIZE(AO)
  1409. ST c11, 6 * SIZE(AO)
  1410. ST c12, 7 * SIZE(AO)
  1411. #endif
  1412. #ifdef LN
  1413. lda C1, -4 * SIZE(C1)
  1414. lda C2, -4 * SIZE(C2)
  1415. #endif
  1416. ST c01, 0 * SIZE(C1)
  1417. ST c02, 1 * SIZE(C1)
  1418. ST c03, 2 * SIZE(C1)
  1419. ST c04, 3 * SIZE(C1)
  1420. ST c09, 0 * SIZE(C2)
  1421. ST c10, 1 * SIZE(C2)
  1422. ST c11, 2 * SIZE(C2)
  1423. ST c12, 3 * SIZE(C2)
  1424. #ifndef LN
  1425. lda C1, 4 * SIZE(C1)
  1426. lda C2, 4 * SIZE(C2)
  1427. #endif
  1428. fclr t1
  1429. fclr t2
  1430. fclr t3
  1431. fclr t4
  1432. #ifdef RT
  1433. sll K, ZBASE_SHIFT + 1, TMP1
  1434. addq AORIG, TMP1, AORIG
  1435. #endif
  1436. #if defined(LT) || defined(RN)
  1437. subq K, KK, TMP1
  1438. sll TMP1, ZBASE_SHIFT + 1, TMP1
  1439. addq AO, TMP1, AO
  1440. addq BO, TMP1, BO
  1441. #endif
  1442. #ifdef LT
  1443. addq KK, 2, KK
  1444. #endif
  1445. #ifdef LN
  1446. subq KK, 2, KK
  1447. #endif
  1448. fclr c01
  1449. fclr c05
  1450. lda I, -1(I)
  1451. bgt I, $L11
  1452. .align 4
  1453. $L20:
  1454. and M, 1, I
  1455. ble I, $L29
  1456. #if defined(LT) || defined(RN)
  1457. LD a1, 0 * SIZE(AO)
  1458. fclr c09
  1459. LD a2, 1 * SIZE(AO)
  1460. fclr c13
  1461. LD a3, 2 * SIZE(AO)
  1462. fclr c02
  1463. LD a4, 3 * SIZE(AO)
  1464. fclr c06
  1465. LD b1, 0 * SIZE(B)
  1466. fclr c10
  1467. LD b2, 1 * SIZE(B)
  1468. fclr c14
  1469. LD b3, 2 * SIZE(B)
  1470. lda AO, 2 * SIZE(AO)
  1471. LD b4, 3 * SIZE(B)
  1472. lda BO, 4 * SIZE(B)
  1473. lda L, -2(KK)
  1474. ble KK, $L28
  1475. ble L, $L25
  1476. #else
  1477. #ifdef LN
  1478. sll K, ZBASE_SHIFT + 0, TMP1
  1479. subq AORIG, TMP1, AORIG
  1480. #endif
  1481. sll KK, ZBASE_SHIFT + 0, TMP1
  1482. addq AORIG, TMP1, AO
  1483. sll KK, ZBASE_SHIFT + 1, TMP1
  1484. addq B, TMP1, BO
  1485. subq K, KK, TMP1
  1486. LD a1, 0 * SIZE(AO)
  1487. fclr c09
  1488. LD a2, 1 * SIZE(AO)
  1489. fclr c13
  1490. LD a3, 2 * SIZE(AO)
  1491. fclr c02
  1492. LD a4, 3 * SIZE(AO)
  1493. fclr c06
  1494. LD b1, 0 * SIZE(BO)
  1495. fclr c10
  1496. LD b2, 1 * SIZE(BO)
  1497. fclr c14
  1498. LD b3, 2 * SIZE(BO)
  1499. lda AO, 2 * SIZE(AO)
  1500. LD b4, 3 * SIZE(BO)
  1501. lda BO, 4 * SIZE(BO)
  1502. lda L, -2(TMP1)
  1503. ble TMP1, $L28
  1504. ble L, $L25
  1505. #endif
  1506. .align 5
  1507. $L22:
  1508. ADD1 c09, t1, c09
  1509. unop
  1510. MUL a1, b1, t1
  1511. unop
  1512. ADD3 c10, t2, c10
  1513. unop
  1514. MUL a2, b1, t2
  1515. LD b1, 0 * SIZE(BO)
  1516. ADD4 c13, t3, c13
  1517. unop
  1518. MUL a1, b2, t3
  1519. lda BO, 8 * SIZE(BO)
  1520. ADD2 c14, t4, c14
  1521. unop
  1522. MUL a2, b2, t4
  1523. LD b2, -7 * SIZE(BO)
  1524. ADD1 c01, t1, c01
  1525. unop
  1526. MUL a1, b3, t1
  1527. unop
  1528. ADD3 c02, t2, c02
  1529. unop
  1530. MUL a2, b3, t2
  1531. LD b3, -6 * SIZE(BO)
  1532. ADD4 c05, t3, c05
  1533. unop
  1534. MUL a1, b4, t3
  1535. LD a1, 2 * SIZE(AO)
  1536. ADD2 c06, t4, c06
  1537. MUL a2, b4, t4
  1538. LD b5, -5 * SIZE(BO)
  1539. ADD1 c09, t1, c09
  1540. unop
  1541. MUL a3, b1, t1
  1542. LD a2, 3 * SIZE(AO)
  1543. ADD3 c10, t2, c10
  1544. unop
  1545. MUL a4, b1, t2
  1546. LD b1, -4 * SIZE(BO)
  1547. ADD4 c13, t3, c13
  1548. unop
  1549. MUL a3, b2, t3
  1550. lda AO, 4 * SIZE(AO)
  1551. ADD2 c14, t4, c14
  1552. MUL a4, b2, t4
  1553. LD b2, -3 * SIZE(BO)
  1554. ADD1 c01, t1, c01
  1555. lda L, -2(L)
  1556. MUL a3, b3, t1
  1557. LD b4, -1 * SIZE(BO)
  1558. ADD3 c02, t2, c02
  1559. unop
  1560. MUL a4, b3, t2
  1561. LD b3, -2 * SIZE(BO)
  1562. ADD4 c05, t3, c05
  1563. unop
  1564. MUL a3, b5, t3
  1565. LD a3, 0 * SIZE(AO)
  1566. ADD2 c06, t4, c06
  1567. MUL a4, b5, t4
  1568. LD a4, 1 * SIZE(AO)
  1569. bgt L, $L22
  1570. .align 4
  1571. $L25:
  1572. ADD1 c09, t1, c09
  1573. MUL a1, b1, t1
  1574. #if defined(LT) || defined(RN)
  1575. blbs KK, $L27
  1576. #else
  1577. blbs TMP1, $L27
  1578. #endif
  1579. .align 4
  1580. ADD3 c10, t2, c10
  1581. unop
  1582. MUL a2, b1, t2
  1583. LD b1, 0 * SIZE(BO)
  1584. ADD4 c13, t3, c13
  1585. unop
  1586. MUL a1, b2, t3
  1587. unop
  1588. ADD2 c14, t4, c14
  1589. unop
  1590. MUL a2, b2, t4
  1591. LD b2, 1 * SIZE(BO)
  1592. ADD1 c01, t1, c01
  1593. unop
  1594. MUL a1, b3, t1
  1595. lda AO, 2 * SIZE(AO)
  1596. ADD3 c02, t2, c02
  1597. unop
  1598. MUL a2, b3, t2
  1599. LD b3, 2 * SIZE(BO)
  1600. ADD4 c05, t3, c05
  1601. unop
  1602. MUL a1, b4, t3
  1603. LD a1, -2 * SIZE(AO)
  1604. ADD2 c06, t4, c06
  1605. unop
  1606. MUL a2, b4, t4
  1607. LD a2, -1 * SIZE(AO)
  1608. ADD1 c09, t1, c09
  1609. LD b4, 3 * SIZE(BO)
  1610. MUL a1, b1, t1
  1611. lda BO, 4 * SIZE(BO)
  1612. .align 4
  1613. $L27:
  1614. ADD3 c10, t2, c10
  1615. MUL a2, b1, t2
  1616. ADD4 c13, t3, c13
  1617. MUL a1, b2, t3
  1618. ADD2 c14, t4, c14
  1619. MUL a2, b2, t4
  1620. ADD1 c01, t1, c01
  1621. MUL a1, b3, t1
  1622. ADD3 c02, t2, c02
  1623. MUL a2, b3, t2
  1624. ADD4 c05, t3, c05
  1625. MUL a1, b4, t3
  1626. ADD2 c06, t4, c06
  1627. lda AO, 2 * SIZE(AO)
  1628. MUL a2, b4, t4
  1629. lda BO, 4 * SIZE(BO)
  1630. ADD1 c09, t1, c09
  1631. ADD3 c10, t2, c10
  1632. ADD4 c13, t3, c13
  1633. ADD2 c14, t4, c14
  1634. ADD c01, c06, c01
  1635. ADD c02, c05, c02
  1636. ADD c09, c14, c09
  1637. ADD c10, c13, c10
  1638. .align 4
  1639. $L28:
  1640. #if defined(LN) || defined(RT)
  1641. #ifdef LN
  1642. subq KK, 1, TMP1
  1643. #else
  1644. subq KK, 2, TMP1
  1645. #endif
  1646. sll TMP1, ZBASE_SHIFT + 0, TMP2
  1647. addq AORIG, TMP2, AO
  1648. sll TMP1, ZBASE_SHIFT + 1, TMP2
  1649. addq B, TMP2, BO
  1650. #else
  1651. lda AO, -2 * SIZE(AO)
  1652. lda BO, -4 * SIZE(BO)
  1653. #endif
  1654. #if defined(LN) || defined(LT)
  1655. LD a1, 0 * SIZE(BO)
  1656. LD a2, 1 * SIZE(BO)
  1657. LD a3, 2 * SIZE(BO)
  1658. LD a4, 3 * SIZE(BO)
  1659. SUB a1, c01, c01
  1660. SUB a2, c02, c02
  1661. SUB a3, c09, c09
  1662. SUB a4, c10, c10
  1663. #else
  1664. LD a1, 0 * SIZE(AO)
  1665. LD a2, 1 * SIZE(AO)
  1666. LD a3, 2 * SIZE(AO)
  1667. LD a4, 3 * SIZE(AO)
  1668. SUB a1, c01, c01
  1669. SUB a2, c02, c02
  1670. SUB a3, c09, c09
  1671. SUB a4, c10, c10
  1672. #endif
  1673. #if defined(LN) || defined(LT)
  1674. LD a1, 0 * SIZE(AO)
  1675. LD a2, 1 * SIZE(AO)
  1676. MUL a2, c02, t1
  1677. MUL a2, c01, t2
  1678. MUL a2, c10, t3
  1679. MUL a2, c09, t4
  1680. MUL a1, c01, c01
  1681. MUL a1, c02, c02
  1682. MUL a1, c09, c09
  1683. MUL a1, c10, c10
  1684. ADD5 c01, t1, c01
  1685. ADD6 c02, t2, c02
  1686. ADD5 c09, t3, c09
  1687. ADD6 c10, t4, c10
  1688. #endif
  1689. #ifdef RN
  1690. LD a1, 0 * SIZE(BO)
  1691. LD a2, 1 * SIZE(BO)
  1692. LD a3, 2 * SIZE(BO)
  1693. LD a4, 3 * SIZE(BO)
  1694. MUL a2, c02, t1
  1695. MUL a2, c01, t2
  1696. MUL a1, c01, c01
  1697. MUL a1, c02, c02
  1698. ADD5 c01, t1, c01
  1699. ADD6 c02, t2, c02
  1700. MUL a3, c01, t1
  1701. MUL a3, c02, t2
  1702. SUB c09, t1, c09
  1703. SUB c10, t2, c10
  1704. MUL a4, c02, t1
  1705. MUL a4, c01, t2
  1706. ADD6 c09, t1, c09
  1707. ADD5 c10, t2, c10
  1708. LD a1, 6 * SIZE(BO)
  1709. LD a2, 7 * SIZE(BO)
  1710. MUL a2, c10, t1
  1711. MUL a2, c09, t2
  1712. MUL a1, c09, c09
  1713. MUL a1, c10, c10
  1714. ADD5 c09, t1, c09
  1715. ADD6 c10, t2, c10
  1716. #endif
  1717. #ifdef RT
  1718. LD a1, 6 * SIZE(BO)
  1719. LD a2, 7 * SIZE(BO)
  1720. LD a3, 4 * SIZE(BO)
  1721. LD a4, 5 * SIZE(BO)
  1722. MUL a2, c10, t1
  1723. MUL a2, c09, t2
  1724. MUL a1, c09, c09
  1725. MUL a1, c10, c10
  1726. ADD5 c09, t1, c09
  1727. ADD6 c10, t2, c10
  1728. MUL a3, c09, t1
  1729. MUL a3, c10, t2
  1730. SUB c01, t1, c01
  1731. SUB c02, t2, c02
  1732. MUL a4, c10, t1
  1733. MUL a4, c09, t2
  1734. ADD6 c01, t1, c01
  1735. ADD5 c02, t2, c02
  1736. LD a1, 0 * SIZE(BO)
  1737. LD a2, 1 * SIZE(BO)
  1738. MUL a2, c02, t1
  1739. MUL a2, c01, t2
  1740. MUL a1, c01, c01
  1741. MUL a1, c02, c02
  1742. ADD5 c01, t1, c01
  1743. ADD6 c02, t2, c02
  1744. #endif
  1745. #if defined(LN) || defined(LT)
  1746. ST c01, 0 * SIZE(BO)
  1747. ST c02, 1 * SIZE(BO)
  1748. ST c09, 2 * SIZE(BO)
  1749. ST c10, 3 * SIZE(BO)
  1750. #else
  1751. ST c01, 0 * SIZE(AO)
  1752. ST c02, 1 * SIZE(AO)
  1753. ST c09, 2 * SIZE(AO)
  1754. ST c10, 3 * SIZE(AO)
  1755. #endif
  1756. #ifdef LN
  1757. lda C1, -2 * SIZE(C1)
  1758. lda C2, -2 * SIZE(C2)
  1759. #endif
  1760. ST c01, 0 * SIZE(C1)
  1761. ST c02, 1 * SIZE(C1)
  1762. ST c09, 0 * SIZE(C2)
  1763. ST c10, 1 * SIZE(C2)
  1764. #ifndef LN
  1765. lda C1, 2 * SIZE(C1)
  1766. lda C2, 2 * SIZE(C2)
  1767. #endif
  1768. #ifdef RT
  1769. sll K, ZBASE_SHIFT, TMP1
  1770. addq AORIG, TMP1, AORIG
  1771. #endif
  1772. #if defined(LT) || defined(RN)
  1773. subq K, KK, TMP1
  1774. sll TMP1, ZBASE_SHIFT + 0, TMP2
  1775. addq AO, TMP2, AO
  1776. sll TMP1, ZBASE_SHIFT + 1, TMP2
  1777. addq BO, TMP2, BO
  1778. #endif
  1779. #ifdef LT
  1780. addq KK, 1, KK
  1781. #endif
  1782. #ifdef LN
  1783. subq KK, 1, KK
  1784. #endif
  1785. .align 4
  1786. $L29:
  1787. #ifdef LN
  1788. sll K, ZBASE_SHIFT + 1, TMP1
  1789. addq B, TMP1, B
  1790. #endif
  1791. #if defined(LT) || defined(RN)
  1792. mov BO, B
  1793. #endif
  1794. #ifdef RN
  1795. addq KK, 2, KK
  1796. #endif
  1797. #ifdef RT
  1798. subq KK, 2, KK
  1799. #endif
  1800. lda J, -1(J)
  1801. bgt J, $L01
  1802. .align 4
  1803. $L999:
  1804. ldt $f2, 0($sp)
  1805. ldt $f3, 8($sp)
  1806. ldt $f4, 16($sp)
  1807. ldt $f5, 24($sp)
  1808. ldt $f6, 32($sp)
  1809. ldt $f7, 40($sp)
  1810. ldt $f8, 48($sp)
  1811. ldt $f9, 56($sp)
  1812. clr $0
  1813. lda $sp, STACKSIZE($sp)
  1814. ret
  1815. .ident VERSION
  1816. .end CNAME