You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

meson.build 79 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522
  1. # Ordered As per https://netlib.org/blas/blasqr.pdf
  2. # NOTE: xROTG xROTMG xROTM have no kernels?
  3. # TODO: Actually test and set this
  4. if true
  5. fma3_flag = '-mfma'
  6. endif
  7. # TODO: This is currently following x86_64 generic for src and dir, but it needs
  8. # to diversify
  9. # NOTE: The def and undefs are from Makefile.L1
  10. # Construct all ModesymbKERNEL from src and dir via files(dir + src)
  11. # For the modes array, the following mapping is used for c_args:
  12. # addl --> passed AS IS
  13. base_kops = [
  14. # Level 1 BLAS
  15. { 'base': '?rot',
  16. 'modes': {
  17. 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'srot.c', 'addl': [fma3_flag]}}},
  18. 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'drot.c', 'addl': [fma3_flag]}}},
  19. # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zrot.c'}}},
  20. 'cs': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zrot_sse.S'}}},
  21. 'zd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zrot_sse2.S'}}},
  22. # 'xq': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zrot.S'}}},
  23. },
  24. },
  25. { 'base': '?swap',
  26. 'modes': {
  27. 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'swap_sse.S'}}},
  28. 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'swap_sse2.S'}}},
  29. 'c': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zswap_sse.S'}}},
  30. 'z': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zswap_sse2.S'}}},
  31. # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'swap.S'}}},
  32. # 'x': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zswap.S'}}},
  33. },
  34. },
  35. { 'base': '?scal',
  36. 'modes': {
  37. 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'sscal.c'}}},
  38. 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'dscal.c'}}},
  39. 'c': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'cscal.c'}}},
  40. 'z': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zscal.c'}}},
  41. # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'scal.S'}}},
  42. # 'x': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zscal.S'}}},
  43. },
  44. },
  45. { 'base': '?copy',
  46. 'modes': {
  47. 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'copy_sse.S'}}},
  48. 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'copy_sse2.S'}}},
  49. 'c': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zcopy_sse.S'}}},
  50. 'z': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zcopy_sse2.S'}}},
  51. # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'copy.S'}}},
  52. # 'x': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zcopy.S'}}},
  53. },
  54. },
  55. { 'base': '?axpy',
  56. 'modes': {
  57. 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'saxpy.c'}}},
  58. 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'daxpy.c'}}},
  59. 'c': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'caxpy.c', 'addl': ['-UCONJ']}}},
  60. 'z': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zaxpy.c', 'addl': ['-UCONJ']}}},
  61. # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'axpy.S'}}},
  62. # 'x': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zaxpy.S'}}},
  63. },
  64. },
  65. { 'base': '?axpyc',
  66. 'modes': {
  67. 'c': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'caxpy.c', 'addl': ['-DCONJ']}}},
  68. 'z': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zaxpy.c', 'addl': ['-DCONJ']}}},
  69. },
  70. },
  71. { 'base': '?dot',
  72. 'modes': {
  73. 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'sdot.c'}}},
  74. 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'ddot.c'}}},
  75. },
  76. },
  77. { 'base': '?dotc',
  78. 'modes': {
  79. 'c': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'cdot.c'}}},
  80. 'z': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zdot.c'}}},
  81. },
  82. },
  83. { 'base': '?dotu',
  84. 'modes': {
  85. 'c': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'cdot.c'}}},
  86. 'z': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zdot.c'}}},
  87. },
  88. },
  89. # TODO(rg): Check?
  90. { 'base': '?dsdot',
  91. 'modes': {
  92. 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'sdot.c'}}},
  93. '': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'sdot.c', 'addl': ['-DDSDOT']}}},
  94. },
  95. },
  96. # TODO(rg): Add dsdotkernel conditionals
  97. # xDOTU xDOTC xxDOT aren't present
  98. { 'base': '?nrm2',
  99. 'modes': {
  100. 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'nrm2_sse.S'}}},
  101. 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'nrm2.S'}}},
  102. 'c': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'znrm2_sse.S'}}},
  103. 'z': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'znrm2.S'}}},
  104. # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'nrm2.S'}}},
  105. # 'x': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'znrm2.S'}}},
  106. },
  107. },
  108. { 'base': '?asum',
  109. 'modes': {
  110. 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'sasum.c'}}},
  111. 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'dasum.c'}}},
  112. 'c': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zasum_sse.S'}}},
  113. 'z': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zasum_sse2.S'}}},
  114. # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'asum.S'}}},
  115. # 'x': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zasum.S'}}},
  116. },
  117. },
  118. { 'base': '?amax',
  119. 'modes': {
  120. 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'amax_sse.S'}}},
  121. 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'amax_sse2.S'}}},
  122. 'c': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zamax_sse.S'}}},
  123. 'z': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zamax_sse2.S'}}},
  124. # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'amax.S'}}},
  125. # 'x': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zamax.S'}}},
  126. },
  127. },
  128. { 'base': '?sum',
  129. 'modes': {
  130. 's': {'exts': {'_k': {'dir': 'arm', 'kernel': 'sum.c'}}},
  131. 'd': {'exts': {'_k': {'dir': 'arm', 'kernel': 'sum.c'}}},
  132. 'c': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zsum_sse.S'}}},
  133. 'z': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zsum_sse2.S'}}},
  134. },
  135. },
  136. { 'base': '?amin',
  137. 'modes': {
  138. 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'amax_sse.S'}}},
  139. 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'amax_sse2.S'}}},
  140. 'c': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zamax_sse.S'}}},
  141. 'z': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zamax_sse2.S'}}},
  142. # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'iamax.S'}}},
  143. # 'x': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'izamax.S'}}},
  144. },
  145. },
  146. { 'base': 'i?amax',
  147. 'modes': {
  148. 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'iamax_sse.S'}}},
  149. 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'iamax_sse2.S'}}},
  150. 'c': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'izamax_sse.S'}}},
  151. 'z': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'izamax_sse2.S'}}},
  152. # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'iamax.S'}}},
  153. # 'x': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'izamax.S'}}},
  154. },
  155. },
  156. { 'base': 'i?amin',
  157. 'modes': {
  158. 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'iamax_sse.S'}}},
  159. 'd': {'exts': {'_k': {'dir': 'arm', 'kernel': 'iamin.c'}}},
  160. 'c': {'exts': {'_k': {'dir': 'arm', 'kernel': 'izamin.c'}}},
  161. 'z': {'exts': {'_k': {'dir': 'arm', 'kernel': 'izamin.c'}}},
  162. },
  163. },
  164. { 'base': 'i?max',
  165. 'modes': {
  166. 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'iamax_sse.S'}}},
  167. 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'iamax_sse2.S'}}},
  168. # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'iamax.S'}}},
  169. },
  170. },
  171. { 'base': 'i?min',
  172. 'modes': {
  173. 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'iamax_sse.S'}}},
  174. 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'iamax_sse2.S'}}},
  175. # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'iamax.S'}}},
  176. },
  177. },
  178. { 'base': '?max',
  179. 'modes': {
  180. 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'amax_sse.S'}}},
  181. 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'amax_sse2.S'}}},
  182. # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'amax.S'}}},
  183. },
  184. },
  185. { 'base': '?min',
  186. 'modes': {
  187. 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'amax_sse.S'}}},
  188. 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'amax_sse2.S'}}},
  189. # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'amax.S'}}},
  190. },
  191. },
  192. { 'base': '?axpby',
  193. 'modes': {
  194. 's': {'exts': {'_k': {'dir': 'arm', 'kernel': 'axpby.c'}}},
  195. 'd': {'exts': {'_k': {'dir': 'arm', 'kernel': 'axpby.c'}}},
  196. 'c': {'exts': {'_k': {'dir': 'arm', 'kernel': 'zaxpby.c'}}},
  197. 'z': {'exts': {'_k': {'dir': 'arm', 'kernel': 'zaxpby.c'}}},
  198. },
  199. },
  200. # Level 2 BLAS
  201. # There are additional sources so now we have srcs
  202. # Ordered as per KERNEL.generic and Makefile.L2
  203. # exts are used to find the flags for each modality
  204. # ext is attached to base (only useful from Level 2)
  205. # i.e. baseext (e.g., gemv_n, gemv_t, cgeru_k, cgerc_k)
  206. { 'base': '?gemv',
  207. 'modes': {
  208. 's': {
  209. 'exts': {
  210. # TODO(rg): Where are these coming from??
  211. # Most of these have both generic defines and also per-folder defines..
  212. # Makefile lists sgemv_n_4.c as the source, though there is a sgemv_n.c
  213. '_n': {'dir': 'x86_64', 'kernel': 'sgemv_n_4.c'},
  214. '_t': {'dir': 'x86_64', 'kernel': 'sgemv_t_4.c'},
  215. }
  216. },
  217. 'd': {
  218. 'exts': {
  219. '_n': {'dir': 'x86_64', 'kernel': 'dgemv_n_4.c'},
  220. '_t': {'dir': 'x86_64', 'kernel': 'dgemv_t_4.c'},
  221. }
  222. },
  223. # 'q': {
  224. # 'exts': {
  225. # '_n': {'dir': 'x86_64', 'kernel': 'qgemv_n.S'},
  226. # '_t': {'dir': 'x86_64', 'kernel': 'qgemv_t.S'},
  227. # }
  228. # },
  229. 'c': {
  230. 'exts': {
  231. '_n': {'dir': 'x86_64', 'kernel': 'cgemv_n_4.c'},
  232. '_t': {'dir': 'x86_64', 'kernel': 'cgemv_t_4.c'},
  233. '_r': {'dir': 'x86_64', 'kernel': 'cgemv_n_4.c'},
  234. '_c': {'dir': 'x86_64', 'kernel': 'cgemv_t_4.c'},
  235. '_o': {'dir': 'x86_64', 'kernel': 'cgemv_n_4.c'},
  236. '_u': {'dir': 'x86_64', 'kernel': 'cgemv_t_4.c'},
  237. '_s': {'dir': 'x86_64', 'kernel': 'cgemv_n_4.c'},
  238. '_d': {'dir': 'x86_64', 'kernel': 'cgemv_t_4.c'},
  239. }
  240. },
  241. 'z': {
  242. 'exts': {
  243. '_n': {'dir': 'x86_64', 'kernel': 'zgemv_n_4.c'},
  244. '_t': {'dir': 'x86_64', 'kernel': 'zgemv_t_4.c'},
  245. '_r': {'dir': 'x86_64', 'kernel': 'zgemv_n_4.c'},
  246. '_c': {'dir': 'x86_64', 'kernel': 'zgemv_t_4.c'},
  247. '_o': {'dir': 'x86_64', 'kernel': 'zgemv_n_4.c'},
  248. '_u': {'dir': 'x86_64', 'kernel': 'zgemv_t_4.c'},
  249. '_s': {'dir': 'x86_64', 'kernel': 'zgemv_n_4.c'},
  250. '_d': {'dir': 'x86_64', 'kernel': 'zgemv_t_4.c'},
  251. }
  252. },
  253. # 'x': {
  254. # 'exts': {
  255. # '_n': {'dir': 'arm', 'kernel': 'zgemv_n.c'},
  256. # '_t': {'dir': 'arm', 'kernel': 'zgemv_n.c'},
  257. # '_r': {'dir': 'arm', 'kernel': 'zgemv_n.c'},
  258. # '_c': {'dir': 'arm', 'kernel': 'zgemv_n.c'},
  259. # '_o': {'dir': 'arm', 'kernel': 'zgemv_n.c'},
  260. # '_u': {'dir': 'arm', 'kernel': 'zgemv_n.c'},
  261. # '_s': {'dir': 'arm', 'kernel': 'zgemv_n.c'},
  262. # '_d': {'dir': 'arm', 'kernel': 'zgemv_n.c'},
  263. # }
  264. # },
  265. },
  266. },
  267. { 'base': '?symv',
  268. 'modes': {
  269. 's': {
  270. 'exts': {
  271. '_U': {'dir': 'x86_64', 'kernel': 'ssymv_U.c'},
  272. '_L': {'dir': 'x86_64', 'kernel': 'ssymv_L.c'},
  273. }
  274. },
  275. 'd': {
  276. 'exts': {
  277. '_U': {'dir': 'generic', 'kernel': 'symv_k.c'},
  278. '_L': {'dir': 'generic', 'kernel': 'symv_k.c'},
  279. }
  280. },
  281. 'c': {
  282. 'exts': {
  283. '_U': {'dir': 'generic', 'kernel': 'zsymv_k.c'},
  284. '_L': {'dir': 'generic', 'kernel': 'zsymv_k.c'},
  285. }
  286. },
  287. 'z': {
  288. 'exts': {
  289. '_U': {'dir': 'x86_64', 'kernel': 'zsymv_U_sse2.S'},
  290. '_L': {'dir': 'x86_64', 'kernel': 'zsymv_L_sse2.S'},
  291. }
  292. },
  293. # 'q': {
  294. # 'exts': {
  295. # '_U': {'dir': 'generic', 'kernel': 'symv_k.c'},
  296. # '_L': {'dir': 'generic', 'kernel': 'symv_k.c'},
  297. # }
  298. # },
  299. # 'x': {
  300. # 'exts': {
  301. # '_U': {'dir': 'generic', 'kernel': 'zsymv_k.c'},
  302. # '_L': {'dir': 'generic', 'kernel': 'zsymv_k.c'},
  303. # }
  304. # },
  305. },
  306. },
  307. { 'base': '?lsame',
  308. 'modes': {
  309. '': {'exts': {'': {'dir': 'x86_64', 'kernel': 'lsame.S'}}},
  310. },
  311. },
  312. { 'base': '?cabs',
  313. 'modes': {
  314. 's': {'exts': {'1': {'dir': 'x86_64', 'kernel': 'cabs.S'}}},
  315. 'd': {'exts': {'1': {'dir': 'x86_64', 'kernel': 'cabs.S'}}},
  316. # 'q': {'exts': {'': {'dir': 'generic', 'kernel': 'cabs.c'}}},
  317. },
  318. },
  319. { 'base': '?gemm3m',
  320. 'modes': {
  321. 'c': {'exts': {
  322. '_kernel': {'dir': 'x86_64', 'kernel': 'cgemm3m_kernel_8x4_haswell.c',
  323. 'addl': ['-DICOPY', '-UUSE_ALPHA']},
  324. '_oncopyb': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_4.c',
  325. 'addl': ['-DUSE_ALPHA']},
  326. '_otcopyb': {'dir': 'generic', 'kernel': 'zgemm3m_tcopy_4.c',
  327. 'addl': ['-DUSE_ALPHA']},
  328. '_itcopyb': {'dir': 'generic', 'kernel': 'zgemm3m_tcopy_8.c',
  329. 'addl': ['-DICOPY', '-UUSE_ALPHA']},
  330. '_itcopyr': {'dir': 'generic', 'kernel': 'zgemm3m_tcopy_8.c',
  331. 'addl': ['-DICOPY', '-UUSE_ALPHA', '-DREAL_ONLY']},
  332. '_itcopyi': {'dir': 'generic', 'kernel': 'zgemm3m_tcopy_8.c',
  333. 'addl': ['-DICOPY', '-UUSE_ALPHA', '-DIMAGE_ONLY']},
  334. '_incopyb': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_8.c',
  335. 'addl': ['-DICOPY', '-UUSE_ALPHA']},
  336. '_incopyr': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_8.c',
  337. 'addl': ['-DICOPY', '-UUSE_ALPHA', '-DREAL_ONLY']},
  338. '_oncopyr': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_4.c',
  339. 'addl': ['-DUSE_ALPHA', '-DREAL_ONLY']},
  340. '_otcopyr': {'dir': 'generic', 'kernel': 'zgemm3m_tcopy_4.c',
  341. 'addl': ['-DUSE_ALPHA', '-DREAL_ONLY']},
  342. '_incopyi': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_8.c',
  343. 'addl': ['-DICOPY', '-UUSE_ALPHA', '-DIMAGE_ONLY']},
  344. '_oncopyi': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_4.c',
  345. 'addl': ['-DUSE_ALPHA', '-DIMAGE_ONLY']},
  346. '_otcopyi': {'dir': 'generic', 'kernel': 'zgemm3m_tcopy_4.c',
  347. 'addl': ['-DUSE_ALPHA', '-DIMAGE_ONLY']},
  348. }},
  349. 'z': {'exts': {
  350. '_kernel': {'dir': 'x86_64', 'kernel': 'zgemm3m_kernel_4x4_haswell.c',
  351. 'addl': ['-DNN']},
  352. '_oncopyb': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_4.c',
  353. 'addl': ['-DUSE_ALPHA']},
  354. '_otcopyb': {'dir': 'generic', 'kernel': 'zgemm3m_tcopy_4.c',
  355. 'addl': ['-DUSE_ALPHA']},
  356. '_itcopyb': {'dir': 'generic', 'kernel': 'zgemm3m_tcopy_4.c',
  357. 'addl': ['-DICOPY', '-UUSE_ALPHA']},
  358. '_itcopyr': {'dir': 'generic', 'kernel': 'zgemm3m_tcopy_4.c',
  359. 'addl': ['-DICOPY', '-UUSE_ALPHA', '-DREAL_ONLY']},
  360. '_itcopyi': {'dir': 'generic', 'kernel': 'zgemm3m_tcopy_4.c',
  361. 'addl': ['-DICOPY', '-UUSE_ALPHA', '-DIMAGE_ONLY']},
  362. '_incopyb': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_4.c',
  363. 'addl': ['-DICOPY', '-UUSE_ALPHA']},
  364. '_incopyr': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_4.c',
  365. 'addl': ['-DICOPY', '-UUSE_ALPHA', '-DREAL_ONLY']},
  366. '_oncopyr': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_4.c',
  367. 'addl': ['-DUSE_ALPHA', '-DREAL_ONLY']},
  368. '_otcopyr': {'dir': 'generic', 'kernel': 'zgemm3m_tcopy_4.c',
  369. 'addl': ['-DUSE_ALPHA', '-DREAL_ONLY']},
  370. '_incopyi': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_4.c',
  371. 'addl': ['-DICOPY', '-UUSE_ALPHA', '-DIMAGE_ONLY']},
  372. '_oncopyi': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_4.c',
  373. 'addl': ['-DUSE_ALPHA', '-DIMAGE_ONLY']},
  374. '_otcopyi': {'dir': 'generic', 'kernel': 'zgemm3m_tcopy_4.c',
  375. 'addl': ['-DUSE_ALPHA', '-DIMAGE_ONLY']},
  376. }},
  377. },
  378. },
  379. { 'base': '?ger',
  380. 'modes': {
  381. 's': {'exts': {'_k': {'dir': 'generic', 'kernel': 'ger.c'}}},
  382. 'd': {'exts': {'_k': {'dir': 'generic', 'kernel': 'ger.c'}}},
  383. # 'q': {'exts': {'_k': {'dir': 'generic', 'kernel': 'ger.c'}}},
  384. },
  385. },
  386. { 'base': '?geru',
  387. 'modes': {
  388. 'c': {'exts': {'_k': {'dir': 'generic', 'kernel': 'zger.c', 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UCONJ']}}},
  389. 'z': {'exts': {'_k': {'dir': 'generic', 'kernel': 'zger.c', 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UCONJ']}}},
  390. # 'x': {'exts': {'_k': {'dir': 'generic', 'kernel': 'zger.c'}}},
  391. },
  392. },
  393. { 'base': '?gerc',
  394. 'modes': {
  395. 'c': {'exts': {'_k': {'dir': 'generic', 'kernel': 'zger.c', 'addl': ['-UDOUBLE', '-DCOMPLEX', '-DCONJ']}}},
  396. 'z': {'exts': {'_k': {'dir': 'generic', 'kernel': 'zger.c', 'addl': ['-DDOUBLE', '-DCOMPLEX', '-DCONJ']}}},
  397. # 'x': {'exts': {'_k': {'dir': 'generic', 'kernel': 'zger.c'}}},
  398. },
  399. },
  400. { 'base': '?gerv',
  401. 'modes': {
  402. 'c': {'exts': {'_k': {'dir': 'generic', 'kernel': 'zger.c', 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UCONJ', '-DXCONJ']}}},
  403. 'z': {'exts': {'_k': {'dir': 'generic', 'kernel': 'zger.c', 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UCONJ', '-DXCONJ']}}},
  404. },
  405. },
  406. { 'base': '?hemv',
  407. 'modes': {
  408. 'c': {
  409. 'exts': {
  410. '_U': {'dir': 'generic', 'kernel': 'zhemv_k.c', 'addl': ['-DHEMV']},
  411. '_L': {'dir': 'generic', 'kernel': 'zhemv_k.c', 'addl': ['-DHEMV']},
  412. '_V': {'dir': 'generic', 'kernel': 'zhemv_k.c', 'addl': ['-DHEMV', '-DHEMVREV']},
  413. '_M': {'dir': 'generic', 'kernel': 'zhemv_k.c', 'addl': ['-DHEMV', '-DHEMVREV']},
  414. }
  415. },
  416. 'z': {
  417. 'exts': {
  418. '_U': {'dir': 'x86_64', 'kernel': 'zsymv_U_sse2.S', 'addl': ['-DHEMV']},
  419. '_L': {'dir': 'x86_64', 'kernel': 'zsymv_L_sse2.S', 'addl': ['-DHEMV']},
  420. '_V': {'dir': 'generic', 'kernel': 'zhemv_k.c', 'addl': ['-DHEMV', '-DHEMVREV']},
  421. '_M': {'dir': 'generic', 'kernel': 'zhemv_k.c', 'addl': ['-DHEMV', '-DHEMVREV']},
  422. }
  423. },
  424. # 'x': {
  425. # 'exts': {
  426. # '_U': {'dir': 'generic', 'kernel': 'zhemv_k.c'},
  427. # '_L': {'dir': 'generic', 'kernel': 'zhemv_k.c'},
  428. # '_V': {'dir': 'generic', 'kernel': 'zhemv_k.c'},
  429. # '_M': {'dir': 'generic', 'kernel': 'zhemv_k.c'},
  430. # }
  431. # },
  432. },
  433. },
  434. # { 'base': '?bgemv',
  435. # 'modes': {
  436. # 's': {
  437. # 'exts': {
  438. # '_n': {'dir': 'x86_64', 'kernel': 'sbgemv_n.c'},
  439. # '_t': {'dir': 'x86_64', 'kernel': 'sbgemv_n.c'},
  440. # }
  441. # }
  442. # },
  443. # },
  444. # Level 3 symbols
  445. { 'base': '?gemm_kernel',
  446. 'modes': {
  447. 's': {'exts': {'': {'dir': 'x86_64', 'kernel': 'sgemm_kernel_8x4_haswell_2.c'}}},
  448. 'd': {'exts': {'': {'dir': 'x86_64', 'kernel': 'dgemm_kernel_4x8_haswell.S'}}},
  449. 'c': {
  450. 'exts': {
  451. '_n': {'dir': 'x86_64', 'kernel': 'cgemm_kernel_8x2_haswell.c', 'addl': ['-DNN']},
  452. '_l': {'dir': 'x86_64', 'kernel': 'cgemm_kernel_8x2_haswell.c', 'addl': ['-DCN']},
  453. # TODO(rg): What about _r conditionals? Makefile.L3:2969
  454. '_r': {'dir': 'x86_64', 'kernel': 'cgemm_kernel_8x2_haswell.c', 'addl': ['-DNC']},
  455. '_b': {'dir': 'x86_64', 'kernel': 'cgemm_kernel_8x2_haswell.c', 'addl': ['-DCC']},
  456. }
  457. },
  458. 'z': {
  459. 'exts': {
  460. '_n': {'dir': 'x86_64', 'kernel': 'zgemm_kernel_4x2_haswell.c', 'addl': ['-DNN']},
  461. '_l': {'dir': 'x86_64', 'kernel': 'zgemm_kernel_4x2_haswell.c', 'addl': ['-DCN']},
  462. '_r': {'dir': 'x86_64', 'kernel': 'zgemm_kernel_4x2_haswell.c', 'addl': ['-DNC']},
  463. '_b': {'dir': 'x86_64', 'kernel': 'zgemm_kernel_4x2_haswell.c', 'addl': ['-DCC']},
  464. }
  465. }
  466. # 'q': {'exts': {'': {'dir': 'generic', 'kernel': 'gemm_beta.c'}}},
  467. # 'x': {'exts': {'': {'dir': 'generic', 'kernel': 'zgemm_beta.c'}}},
  468. },
  469. },
  470. { 'base': '?trmm_kernel',
  471. 'modes': {
  472. 's': {
  473. 'exts': {
  474. '_LN': {'dir': 'x86_64', 'kernel': 'sgemm_kernel_8x4_haswell.c'},
  475. '_LT': {'dir': 'x86_64', 'kernel': 'sgemm_kernel_8x4_haswell.c', 'addl': ['-DLEFT', '-DTRANSA']},
  476. '_RN': {'dir': 'x86_64', 'kernel': 'sgemm_kernel_8x4_haswell.c'},
  477. '_RT': {'dir': 'x86_64', 'kernel': 'sgemm_kernel_8x4_haswell.c'},
  478. }
  479. },
  480. 'd': {
  481. 'exts': {
  482. '_LN': {'dir': 'x86_64', 'kernel': 'dtrmm_kernel_4x8_haswell.c'},
  483. '_LT': {'dir': 'x86_64', 'kernel': 'dtrmm_kernel_4x8_haswell.c', 'addl': ['-DLEFT', '-DTRANSA']},
  484. '_RN': {'dir': 'x86_64', 'kernel': 'dtrmm_kernel_4x8_haswell.c'},
  485. '_RT': {'dir': 'x86_64', 'kernel': 'dtrmm_kernel_4x8_haswell.c'},
  486. }
  487. },
  488. 'c': {
  489. 'exts': {
  490. '_LN': {'dir': 'x86_64', 'kernel': 'cgemm_kernel_8x2_haswell.S',
  491. 'addl': ['-DLEFT', '-UTRANSA', '-UCONJ', '-DNN']},
  492. '_LT': {'dir': 'x86_64', 'kernel': 'cgemm_kernel_8x2_haswell.S',
  493. 'addl': ['-DLEFT', '-DTRANSA', '-UCONJ', '-DNN']},
  494. '_LR': {'dir': 'x86_64', 'kernel': 'cgemm_kernel_8x2_haswell.S',
  495. 'addl': ['-DLEFT', '-UTRANSA', '-DCONJ', '-DCN']},
  496. '_LC': {'dir': 'x86_64', 'kernel': 'cgemm_kernel_8x2_haswell.S',
  497. 'addl': ['-DLEFT', '-DTRANSA', '-DCONJ', '-DCN']},
  498. '_RN': {'dir': 'x86_64', 'kernel': 'cgemm_kernel_8x2_haswell.S',
  499. 'addl': ['-ULEFT', '-UTRANSA', '-UCONJ', '-DNN']},
  500. '_RT': {'dir': 'x86_64', 'kernel': 'cgemm_kernel_8x2_haswell.S',
  501. 'addl': ['-ULEFT', '-DTRANSA', '-UCONJ', '-DNN']},
  502. '_RR': {'dir': 'x86_64', 'kernel': 'cgemm_kernel_8x2_haswell.S',
  503. 'addl': ['-ULEFT', '-UTRANSA', '-DCONJ', '-DNC']},
  504. '_RC': {'dir': 'x86_64', 'kernel': 'cgemm_kernel_8x2_haswell.S',
  505. 'addl': ['-ULEFT', '-DTRANSA', '-DCONJ', '-DNC']},
  506. }
  507. },
  508. 'z': {
  509. 'exts': {
  510. '_LN': {'dir': 'x86_64', 'kernel': 'zgemm_kernel_4x2_haswell.S',
  511. 'addl': ['-DLEFT', '-UTRANSA', '-UCONJ', '-DNN']},
  512. '_LT': {'dir': 'x86_64', 'kernel': 'zgemm_kernel_4x2_haswell.S',
  513. 'addl': ['-DLEFT', '-DTRANSA', '-UCONJ', '-DNN']},
  514. '_LR': {'dir': 'x86_64', 'kernel': 'zgemm_kernel_4x2_haswell.S',
  515. 'addl': ['-DLEFT', '-UTRANSA', '-DCONJ', '-DCN']},
  516. '_LC': {'dir': 'x86_64', 'kernel': 'zgemm_kernel_4x2_haswell.S',
  517. 'addl': ['-DLEFT', '-DTRANSA', '-DCONJ', '-DCN']},
  518. '_RN': {'dir': 'x86_64', 'kernel': 'zgemm_kernel_4x2_haswell.S',
  519. 'addl': ['-ULEFT', '-UTRANSA', '-UCONJ', '-DNN']},
  520. '_RT': {'dir': 'x86_64', 'kernel': 'zgemm_kernel_4x2_haswell.S',
  521. 'addl': ['-ULEFT', '-DTRANSA', '-UCONJ', '-DNN']},
  522. '_RR': {'dir': 'x86_64', 'kernel': 'zgemm_kernel_4x2_haswell.S',
  523. 'addl': ['-ULEFT', '-UTRANSA', '-DCONJ', '-DNC']},
  524. '_RC': {'dir': 'x86_64', 'kernel': 'zgemm_kernel_4x2_haswell.S',
  525. 'addl': ['-ULEFT', '-DTRANSA', '-DCONJ', '-DNC']},
  526. },
  527. },
  528. },
  529. },
  530. { 'base': '?trsm_kernel',
  531. 'modes': {
  532. 's': {
  533. 'exts': {
  534. '_LN': {'dir': 'x86_64', 'kernel': 'strsm_kernel_8x4_haswell_LN.c',
  535. 'addl': ['-DLN', '-DUPPER', '-UCONJ']},
  536. '_LT': {'dir': 'x86_64', 'kernel': 'strsm_kernel_8x4_haswell_LT.c',
  537. 'addl': ['-DLT', '-UUPPER', '-UCONJ']},
  538. '_RN': {'dir': 'x86_64', 'kernel': 'strsm_kernel_8x4_haswell_RN.c',
  539. 'addl': ['-DRN', '-DUPPER', '-UCONJ']},
  540. '_RT': {'dir': 'x86_64', 'kernel': 'strsm_kernel_8x4_haswell_RT.c',
  541. 'addl': ['-DRT', '-UUPPER', '-UCONJ']},
  542. },
  543. },
  544. 'd': {
  545. 'exts': {
  546. '_LN': {'dir': 'generic', 'kernel': 'trsm_kernel_LN.c',
  547. 'addl': ['-DLN', '-DUPPER', '-UCONJ']},
  548. '_LT': {'dir': 'generic', 'kernel': 'trsm_kernel_LT.c',
  549. 'addl': ['-DLT', '-UUPPER', '-UCONJ']},
  550. '_RN': {'dir': 'x86_64', 'kernel': 'dtrsm_kernel_RN_haswell.c',
  551. 'addl': ['-DRN', '-DUPPER', '-UCONJ']},
  552. '_RT': {'dir': 'generic', 'kernel': 'trsm_kernel_RT.c',
  553. 'addl': ['-DRT', '-UUPPER', '-UCONJ']},
  554. },
  555. },
  556. 'c': {
  557. 'exts': {
  558. '_LN': {'dir': 'generic', 'kernel': 'trsm_kernel_LN.c',
  559. 'addl': ['-DLN', '-DUPPER', '-UCONJ']},
  560. '_LT': {'dir': 'generic', 'kernel': 'trsm_kernel_LT.c',
  561. 'addl': ['-DLT', '-UUPPER', '-UCONJ']},
  562. '_LR': {'dir': 'generic', 'kernel': 'trsm_kernel_LN.c',
  563. 'addl': ['-DLN', '-DUPPER', '-DCONJ']},
  564. '_LC': {'dir': 'generic', 'kernel': 'trsm_kernel_LT.c',
  565. 'addl': ['-DLT', '-UUPPER', '-DCONJ']},
  566. '_RN': {'dir': 'generic', 'kernel': 'trsm_kernel_RN.c',
  567. 'addl': ['-DRN', '-DUPPER', '-UCONJ']},
  568. '_RT': {'dir': 'generic', 'kernel': 'trsm_kernel_RT.c',
  569. 'addl': ['-DRT', '-UUPPER', '-UCONJ']},
  570. '_RR': {'dir': 'generic', 'kernel': 'trsm_kernel_RN.c',
  571. 'addl': ['-DRN', '-DUPPER', '-DCONJ']},
  572. '_RC': {'dir': 'generic', 'kernel': 'trsm_kernel_RT.c',
  573. 'addl': ['-DRT', '-UUPPER', '-DCONJ']},
  574. },
  575. },
  576. 'z': {
  577. 'exts': {
  578. '_LN': {'dir': 'generic', 'kernel': 'trsm_kernel_LN.c',
  579. 'addl': ['-DLN', '-DUPPER', '-UCONJ']},
  580. '_LT': {'dir': 'generic', 'kernel': 'trsm_kernel_LT.c',
  581. 'addl': ['-DLT', '-UUPPER', '-UCONJ']},
  582. '_LR': {'dir': 'generic', 'kernel': 'trsm_kernel_LN.c',
  583. 'addl': ['-DLN', '-DUPPER', '-DCONJ']},
  584. '_LC': {'dir': 'generic', 'kernel': 'trsm_kernel_LT.c',
  585. 'addl': ['-DLT', '-UUPPER', '-DCONJ']},
  586. '_RN': {'dir': 'generic', 'kernel': 'trsm_kernel_RN.c',
  587. 'addl': ['-DRN', '-DUPPER', '-UCONJ']},
  588. '_RT': {'dir': 'generic', 'kernel': 'trsm_kernel_RT.c',
  589. 'addl': ['-DRT', '-UUPPER', '-UCONJ']},
  590. '_RR': {'dir': 'generic', 'kernel': 'trsm_kernel_RN.c',
  591. 'addl': ['-DRN', '-DUPPER', '-DCONJ']},
  592. '_RC': {'dir': 'generic', 'kernel': 'trsm_kernel_RT.c',
  593. 'addl': ['-DRT', '-UUPPER', '-DCONJ']},
  594. },
  595. },
  596. },
  597. },
  598. { 'base': '?gemm',
  599. 'modes': {
  600. 's': {'exts': {
  601. '_beta': {'dir': 'x86_64', 'kernel': 'sgemm_beta_skylakex.c'},
  602. '_small_matrix_permit': {'dir': 'generic',
  603. 'kernel': 'gemm_small_matrix_permit.c'},
  604. # TODO(rg): the _NUM prefixes are arch dependent
  605. '_incopy': {'dir': 'generic', 'kernel': 'gemm_ncopy_8.c'},
  606. '_itcopy': {'dir': 'generic', 'kernel': 'gemm_tcopy_8.c'},
  607. '_oncopy': {'dir': 'x86_64', 'kernel': 'sgemm_ncopy_4_skylakex.c'},
  608. '_otcopy': {'dir': 'generic', 'kernel': 'gemm_tcopy_4.c'},
  609. # TODO(rg): direct and direct_performant are built only conditionally
  610. '_direct': {'dir': 'x86_64', 'kernel': 'sgemm_direct_skylakex.c'},
  611. '_direct_performant': {'dir': 'x86_64', 'kernel': 'sgemm_direct_performant.c'},
  612. }},
  613. 'd': {'exts': {
  614. '_beta': {'dir': 'x86_64', 'kernel': 'dgemm_beta_skylakex.c'},
  615. '_small_matrix_permit': {'dir': 'generic',
  616. 'kernel': 'gemm_small_matrix_permit.c'},
  617. '_incopy': {'dir': 'generic', 'kernel': 'gemm_ncopy_4.c'},
  618. '_itcopy': {'dir': 'generic', 'kernel': 'gemm_tcopy_4.c'},
  619. '_oncopy': {'dir': 'x86_64', 'kernel': 'dgemm_ncopy_8_skylakex.c'},
  620. '_otcopy': {'dir': 'generic', 'kernel': 'gemm_tcopy_8.c'},
  621. }},
  622. 'c': {'exts': {
  623. '_beta': {'dir': 'x86_64', 'kernel': 'zgemm_beta.S'},
  624. '_small_matrix_permit': {'dir': 'generic',
  625. 'kernel': 'zgemm_small_matrix_permit.c'},
  626. '_incopy': {'dir': 'generic', 'kernel': 'zgemm_ncopy_8.c'},
  627. '_itcopy': {'dir': 'generic', 'kernel': 'zgemm_tcopy_8.c'},
  628. '_oncopy': {'dir': 'generic', 'kernel': 'zgemm_ncopy_2.c'},
  629. '_otcopy': {'dir': 'generic', 'kernel': 'zgemm_tcopy_2.c'},
  630. }},
  631. 'z': {'exts': {
  632. '_beta': {'dir': 'x86_64', 'kernel': 'zgemm_beta.S'},
  633. '_small_matrix_permit': {'dir': 'generic',
  634. 'kernel': 'zgemm_small_matrix_permit.c'},
  635. '_incopy': {'dir': 'generic', 'kernel': 'zgemm_ncopy_4.c'},
  636. '_itcopy': {'dir': 'generic', 'kernel': 'zgemm_tcopy_4.c'},
  637. '_oncopy': {'dir': 'generic', 'kernel': 'zgemm_ncopy_2.c'},
  638. '_otcopy': {'dir': 'generic', 'kernel': 'zgemm_tcopy_2.c'},
  639. }},
  640. },
  641. },
  642. { 'base': '?trmm',
  643. 'modes': {
  644. 's': {'exts': {
  645. # TODO(rg): These actually use $(SGEMM_UNROLL_M) to choose the size
  646. '_iunucopy': {'dir': 'generic', 'kernel': 'trmm_uncopy_8.c',
  647. 'addl': ['-UDOUBLE', '-UCOMPLEX', '-UOUTER', '-ULOWER', '-DUNIT']},
  648. '_iunncopy': {'dir': 'generic', 'kernel': 'trmm_uncopy_8.c',
  649. 'addl': ['-UDOUBLE', '-UCOMPLEX', '-UOUTER', '-ULOWER', '-UUNIT']},
  650. '_ilnucopy': {'dir': 'generic', 'kernel': 'trmm_lncopy_8.c',
  651. 'addl': ['-UDOUBLE', '-UCOMPLEX', '-UOUTER', '-DLOWER', '-DUNIT']},
  652. '_ilnncopy': {'dir': 'generic', 'kernel': 'trmm_lncopy_8.c',
  653. 'addl': ['-UDOUBLE', '-UCOMPLEX', '-UOUTER', '-DLOWER', '-UUNIT']},
  654. '_iutucopy': {'dir': 'generic', 'kernel': 'trmm_utcopy_8.c',
  655. 'addl': ['-UDOUBLE', '-UCOMPLEX', '-UOUTER', '-ULOWER', '-DUNIT']},
  656. '_iutncopy': {'dir': 'generic', 'kernel': 'trmm_utcopy_8.c',
  657. 'addl': ['-UDOUBLE', '-UCOMPLEX', '-UOUTER', '-ULOWER', '-UUNIT']},
  658. '_iltucopy': {'dir': 'generic', 'kernel': 'trmm_ltcopy_8.c',
  659. 'addl': ['-UDOUBLE', '-UCOMPLEX', '-UOUTER', '-DLOWER', '-DUNIT']},
  660. '_iltncopy': {'dir': 'generic', 'kernel': 'trmm_ltcopy_8.c',
  661. 'addl': ['-UDOUBLE', '-UCOMPLEX', '-UOUTER', '-DLOWER', '-UUNIT']},
  662. '_ounucopy': {'dir': 'generic', 'kernel': 'trmm_uncopy_4.c',
  663. 'addl': ['-UDOUBLE', '-UCOMPLEX', '-DOUTER', '-ULOWER', '-DUNIT']},
  664. '_ounncopy': {'dir': 'generic', 'kernel': 'trmm_uncopy_4.c',
  665. 'addl': ['-UDOUBLE', '-UCOMPLEX', '-DOUTER', '-ULOWER', '-UUNIT']},
  666. '_olnucopy': {'dir': 'generic', 'kernel': 'trmm_lncopy_4.c',
  667. 'addl': ['-UDOUBLE', '-UCOMPLEX', '-DOUTER', '-DLOWER', '-DUNIT']},
  668. '_olnncopy': {'dir': 'generic', 'kernel': 'trmm_lncopy_4.c',
  669. 'addl': ['-UDOUBLE', '-UCOMPLEX', '-DOUTER', '-DLOWER', '-UUNIT']},
  670. '_outucopy': {'dir': 'generic', 'kernel': 'trmm_utcopy_4.c',
  671. 'addl': ['-UDOUBLE', '-UCOMPLEX', '-DOUTER', '-ULOWER', '-DUNIT']},
  672. '_outncopy': {'dir': 'generic', 'kernel': 'trmm_utcopy_4.c',
  673. 'addl': ['-UDOUBLE', '-UCOMPLEX', '-DOUTER', '-ULOWER', '-UUNIT']},
  674. '_oltucopy': {'dir': 'generic', 'kernel': 'trmm_ltcopy_4.c',
  675. 'addl': ['-UDOUBLE', '-UCOMPLEX', '-DOUTER', '-DLOWER', '-DUNIT']},
  676. '_oltncopy': {'dir': 'generic', 'kernel': 'trmm_ltcopy_4.c',
  677. 'addl': ['-UDOUBLE', '-UCOMPLEX', '-DOUTER', '-DLOWER', '-UUNIT']},
  678. }},
  679. 'd': {'exts': {
  680. '_iunucopy': {'dir': 'generic', 'kernel': 'trmm_uncopy_4.c',
  681. 'addl': ['-DDOUBLE', '-UCOMPLEX', '-UOUTER', '-ULOWER', '-DUNIT']},
  682. '_iunncopy': {'dir': 'generic', 'kernel': 'trmm_uncopy_4.c',
  683. 'addl': ['-DDOUBLE', '-UCOMPLEX', '-UOUTER', '-ULOWER', '-UUNIT']},
  684. '_ilnucopy': {'dir': 'generic', 'kernel': 'trmm_lncopy_4.c',
  685. 'addl': ['-DDOUBLE', '-UCOMPLEX', '-UOUTER', '-DLOWER', '-DUNIT']},
  686. '_ilnncopy': {'dir': 'generic', 'kernel': 'trmm_lncopy_4.c',
  687. 'addl': ['-DDOUBLE', '-UCOMPLEX', '-UOUTER', '-DLOWER', '-UUNIT']},
  688. '_iutucopy': {'dir': 'generic', 'kernel': 'trmm_utcopy_4.c',
  689. 'addl': ['-DDOUBLE', '-UCOMPLEX', '-UOUTER', '-ULOWER', '-DUNIT']},
  690. '_iutncopy': {'dir': 'generic', 'kernel': 'trmm_utcopy_4.c',
  691. 'addl': ['-DDOUBLE', '-UCOMPLEX', '-UOUTER', '-ULOWER', '-UUNIT']},
  692. '_iltucopy': {'dir': 'generic', 'kernel': 'trmm_ltcopy_4.c',
  693. 'addl': ['-DDOUBLE', '-UCOMPLEX', '-UOUTER', '-DLOWER', '-DUNIT']},
  694. '_iltncopy': {'dir': 'generic', 'kernel': 'trmm_ltcopy_4.c',
  695. 'addl': ['-DDOUBLE', '-UCOMPLEX', '-UOUTER', '-DLOWER', '-UUNIT']},
  696. '_ounucopy': {'dir': 'generic', 'kernel': 'trmm_uncopy_8.c',
  697. 'addl': ['-DDOUBLE', '-UCOMPLEX', '-DOUTER', '-ULOWER', '-DUNIT']},
  698. '_ounncopy': {'dir': 'generic', 'kernel': 'trmm_uncopy_8.c',
  699. 'addl': ['-DDOUBLE', '-UCOMPLEX', '-DOUTER', '-ULOWER', '-UUNIT']},
  700. '_olnucopy': {'dir': 'generic', 'kernel': 'trmm_lncopy_8.c',
  701. 'addl': ['-DDOUBLE', '-UCOMPLEX', '-DOUTER', '-DLOWER', '-DUNIT']},
  702. '_olnncopy': {'dir': 'generic', 'kernel': 'trmm_lncopy_8.c',
  703. 'addl': ['-DDOUBLE', '-UCOMPLEX', '-DOUTER', '-DLOWER', '-UUNIT']},
  704. '_outucopy': {'dir': 'generic', 'kernel': 'trmm_utcopy_8.c',
  705. 'addl': ['-DDOUBLE', '-UCOMPLEX', '-DOUTER', '-ULOWER', '-DUNIT']},
  706. '_outncopy': {'dir': 'generic', 'kernel': 'trmm_utcopy_8.c',
  707. 'addl': ['-DDOUBLE', '-UCOMPLEX', '-DOUTER', '-ULOWER', '-UUNIT']},
  708. '_oltucopy': {'dir': 'generic', 'kernel': 'trmm_ltcopy_8.c',
  709. 'addl': ['-DDOUBLE', '-UCOMPLEX', '-DOUTER', '-DLOWER', '-DUNIT']},
  710. '_oltncopy': {'dir': 'generic', 'kernel': 'trmm_ltcopy_8.c',
  711. 'addl': ['-DDOUBLE', '-UCOMPLEX', '-DOUTER', '-DLOWER', '-UUNIT']},
  712. }},
  713. 'c': {'exts': {
  714. '_iunucopy': {'dir': 'generic', 'kernel': 'ztrmm_uncopy_8.c',
  715. 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UOUTER', '-ULOWER', '-DUNIT']},
  716. '_iunncopy': {'dir': 'generic', 'kernel': 'ztrmm_uncopy_8.c',
  717. 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UOUTER', '-ULOWER', '-UUNIT']},
  718. '_ilnucopy': {'dir': 'generic', 'kernel': 'ztrmm_lncopy_8.c',
  719. 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UOUTER', '-DLOWER', '-DUNIT']},
  720. '_ilnncopy': {'dir': 'generic', 'kernel': 'ztrmm_lncopy_8.c',
  721. 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UOUTER', '-DLOWER', '-UUNIT']},
  722. '_iutucopy': {'dir': 'generic', 'kernel': 'ztrmm_utcopy_8.c',
  723. 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UOUTER', '-ULOWER', '-DUNIT']},
  724. '_iutncopy': {'dir': 'generic', 'kernel': 'ztrmm_utcopy_8.c',
  725. 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UOUTER', '-ULOWER', '-UUNIT']},
  726. '_iltucopy': {'dir': 'generic', 'kernel': 'ztrmm_ltcopy_8.c',
  727. 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UOUTER', '-DLOWER', '-DUNIT']},
  728. '_iltncopy': {'dir': 'generic', 'kernel': 'ztrmm_ltcopy_8.c',
  729. 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UOUTER', '-DLOWER', '-UUNIT']},
  730. '_ounucopy': {'dir': 'generic', 'kernel': 'ztrmm_uncopy_2.c',
  731. 'addl': ['-UDOUBLE', '-DCOMPLEX', '-DOUTER', '-ULOWER', '-DUNIT']},
  732. '_ounncopy': {'dir': 'generic', 'kernel': 'ztrmm_uncopy_2.c',
  733. 'addl': ['-UDOUBLE', '-DCOMPLEX', '-DOUTER', '-ULOWER', '-UUNIT']},
  734. '_olnucopy': {'dir': 'generic', 'kernel': 'ztrmm_lncopy_2.c',
  735. 'addl': ['-UDOUBLE', '-DCOMPLEX', '-DOUTER', '-DLOWER', '-DUNIT']},
  736. '_olnncopy': {'dir': 'generic', 'kernel': 'ztrmm_lncopy_2.c',
  737. 'addl': ['-UDOUBLE', '-DCOMPLEX', '-DOUTER', '-DLOWER', '-UUNIT']},
  738. '_outucopy': {'dir': 'generic', 'kernel': 'ztrmm_utcopy_2.c',
  739. 'addl': ['-UDOUBLE', '-DCOMPLEX', '-DOUTER', '-ULOWER', '-DUNIT']},
  740. '_outncopy': {'dir': 'generic', 'kernel': 'ztrmm_utcopy_2.c',
  741. 'addl': ['-UDOUBLE', '-DCOMPLEX', '-DOUTER', '-ULOWER', '-UUNIT']},
  742. '_oltucopy': {'dir': 'generic', 'kernel': 'ztrmm_ltcopy_2.c',
  743. 'addl': ['-UDOUBLE', '-DCOMPLEX', '-DOUTER', '-DLOWER', '-DUNIT']},
  744. '_oltncopy': {'dir': 'generic', 'kernel': 'ztrmm_ltcopy_2.c',
  745. 'addl': ['-UDOUBLE', '-DCOMPLEX', '-DOUTER', '-DLOWER', '-UUNIT']},
  746. }},
  747. 'z': {'exts': {
  748. '_iunucopy': {'dir': 'generic', 'kernel': 'ztrmm_uncopy_4.c',
  749. 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UOUTER', '-ULOWER', '-DUNIT']},
  750. '_iunncopy': {'dir': 'generic', 'kernel': 'ztrmm_uncopy_4.c',
  751. 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UOUTER', '-ULOWER', '-UUNIT']},
  752. '_ilnucopy': {'dir': 'generic', 'kernel': 'ztrmm_lncopy_4.c',
  753. 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UOUTER', '-DLOWER', '-DUNIT']},
  754. '_ilnncopy': {'dir': 'generic', 'kernel': 'ztrmm_lncopy_4.c',
  755. 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UOUTER', '-DLOWER', '-UUNIT']},
  756. '_iutucopy': {'dir': 'generic', 'kernel': 'ztrmm_utcopy_4.c',
  757. 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UOUTER', '-ULOWER', '-DUNIT']},
  758. '_iutncopy': {'dir': 'generic', 'kernel': 'ztrmm_utcopy_4.c',
  759. 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UOUTER', '-ULOWER', '-UUNIT']},
  760. '_iltucopy': {'dir': 'generic', 'kernel': 'ztrmm_ltcopy_4.c',
  761. 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UOUTER', '-DLOWER', '-DUNIT']},
  762. '_iltncopy': {'dir': 'generic', 'kernel': 'ztrmm_ltcopy_4.c',
  763. 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UOUTER', '-DLOWER', '-UUNIT']},
  764. '_ounucopy': {'dir': 'generic', 'kernel': 'ztrmm_uncopy_2.c',
  765. 'addl': ['-DDOUBLE', '-DCOMPLEX', '-DOUTER', '-ULOWER', '-DUNIT']},
  766. '_ounncopy': {'dir': 'generic', 'kernel': 'ztrmm_uncopy_2.c',
  767. 'addl': ['-DDOUBLE', '-DCOMPLEX', '-DOUTER', '-ULOWER', '-UUNIT']},
  768. '_olnucopy': {'dir': 'generic', 'kernel': 'ztrmm_lncopy_2.c',
  769. 'addl': ['-DDOUBLE', '-DCOMPLEX', '-DOUTER', '-DLOWER', '-DUNIT']},
  770. '_olnncopy': {'dir': 'generic', 'kernel': 'ztrmm_lncopy_2.c',
  771. 'addl': ['-DDOUBLE', '-DCOMPLEX', '-DOUTER', '-DLOWER', '-UUNIT']},
  772. '_outucopy': {'dir': 'generic', 'kernel': 'ztrmm_utcopy_2.c',
  773. 'addl': ['-DDOUBLE', '-DCOMPLEX', '-DOUTER', '-ULOWER', '-DUNIT']},
  774. '_outncopy': {'dir': 'generic', 'kernel': 'ztrmm_utcopy_2.c',
  775. 'addl': ['-DDOUBLE', '-DCOMPLEX', '-DOUTER', '-ULOWER', '-UUNIT']},
  776. '_oltucopy': {'dir': 'generic', 'kernel': 'ztrmm_ltcopy_2.c',
  777. 'addl': ['-DDOUBLE', '-DCOMPLEX', '-DOUTER', '-DLOWER', '-DUNIT']},
  778. '_oltncopy': {'dir': 'generic', 'kernel': 'ztrmm_ltcopy_2.c',
  779. 'addl': ['-DDOUBLE', '-DCOMPLEX', '-DOUTER', '-DLOWER', '-UUNIT']},
  780. }},
  781. },
  782. },
  783. { 'base': '?hemm',
  784. 'modes': {
  785. 'c': {'exts': {
  786. '_iutcopy': {'dir': 'generic', 'kernel': 'zhemm_utcopy_8.c',
  787. 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UOUTER', '-ULOWER']},
  788. '_iltcopy': {'dir': 'generic', 'kernel': 'zhemm_ltcopy_8.c',
  789. 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UOUTER', '-DLOWER']},
  790. '_outcopy': {'dir': 'generic', 'kernel': 'zhemm_utcopy_2.c',
  791. 'addl': ['-UDOUBLE', '-DCOMPLEX', '-DOUTER', '-ULOWER']},
  792. '_oltcopy': {'dir': 'generic', 'kernel': 'zhemm_ltcopy_2.c',
  793. 'addl': ['-UDOUBLE', '-DCOMPLEX', '-DOUTER', '-DLOWER']},
  794. }},
  795. 'z': {'exts': {
  796. '_iutcopy': {'dir': 'generic', 'kernel': 'zhemm_utcopy_4.c',
  797. 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UOUTER', '-ULOWER']},
  798. '_iltcopy': {'dir': 'generic', 'kernel': 'zhemm_ltcopy_4.c',
  799. 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UOUTER', '-DLOWER']},
  800. '_outcopy': {'dir': 'generic', 'kernel': 'zhemm_utcopy_2.c',
  801. 'addl': ['-DDOUBLE', '-DCOMPLEX', '-DOUTER', '-ULOWER']},
  802. '_oltcopy': {'dir': 'generic', 'kernel': 'zhemm_ltcopy_2.c',
  803. 'addl': ['-DDOUBLE', '-DCOMPLEX', '-DOUTER', '-DLOWER']},
  804. }},
  805. },
  806. },
  807. { 'base': '?trsm',
  808. 'modes': {
  809. 's': {'exts': {
  810. # TODO(rg): These actually use $(SGEMM_UNROLL_M) to choose the size
  811. '_iunucopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_8.c',
  812. 'addl': ['-UOUTER', '-ULOWER', '-DUNIT']},
  813. '_iunncopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_8.c',
  814. 'addl': ['-UOUTER', '-ULOWER', '-UUNIT']},
  815. '_ilnucopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_8.c',
  816. 'addl': ['-UOUTER', '-DLOWER', '-DUNIT']},
  817. '_ilnncopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_8.c',
  818. 'addl': ['-UOUTER', '-DLOWER', '-UUNIT']},
  819. '_iutucopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_8.c',
  820. 'addl': ['-UOUTER', '-ULOWER', '-DUNIT']},
  821. '_iutncopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_8.c',
  822. 'addl': ['-UOUTER', '-ULOWER', '-UUNIT']},
  823. '_iltucopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_8.c',
  824. 'addl': ['-UOUTER', '-DLOWER', '-DUNIT']},
  825. '_iltncopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_8.c',
  826. 'addl': ['-UOUTER', '-DLOWER', '-UUNIT']},
  827. '_ounucopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_4.c',
  828. 'addl': ['-DOUTER', '-ULOWER', '-DUNIT']},
  829. '_ounncopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_4.c',
  830. 'addl': ['-DOUTER', '-ULOWER', '-UUNIT']},
  831. '_olnucopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_4.c',
  832. 'addl': ['-DOUTER', '-DLOWER', '-DUNIT']},
  833. '_olnncopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_4.c',
  834. 'addl': ['-DOUTER', '-DLOWER', '-UUNIT']},
  835. '_outucopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_4.c',
  836. 'addl': ['-DOUTER', '-ULOWER', '-DUNIT']},
  837. '_outncopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_4.c',
  838. 'addl': ['-DOUTER', '-ULOWER', '-UUNIT']},
  839. '_oltucopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_4.c',
  840. 'addl': ['-DOUTER', '-DLOWER', '-DUNIT']},
  841. '_oltncopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_4.c',
  842. 'addl': ['-DOUTER', '-DLOWER', '-UUNIT']},
  843. }},
  844. 'd': {'exts': {
  845. # TODO(rg): These actually use $(SGEMM_UNROLL_M) to choose the size
  846. '_iunucopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_4.c',
  847. 'addl': ['-UOUTER', '-ULOWER', '-DUNIT']},
  848. '_iunncopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_4.c',
  849. 'addl': ['-UOUTER', '-ULOWER', '-UUNIT']},
  850. '_ilnucopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_4.c',
  851. 'addl': ['-UOUTER', '-DLOWER', '-DUNIT']},
  852. '_ilnncopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_4.c',
  853. 'addl': ['-UOUTER', '-DLOWER', '-UUNIT']},
  854. '_iutucopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_4.c',
  855. 'addl': ['-UOUTER', '-ULOWER', '-DUNIT']},
  856. '_iutncopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_4.c',
  857. 'addl': ['-UOUTER', '-ULOWER', '-UUNIT']},
  858. '_iltucopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_4.c',
  859. 'addl': ['-UOUTER', '-DLOWER', '-DUNIT']},
  860. '_iltncopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_4.c',
  861. 'addl': ['-UOUTER', '-DLOWER', '-UUNIT']},
  862. '_ounucopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_8.c',
  863. 'addl': ['-DOUTER', '-ULOWER', '-DUNIT']},
  864. '_ounncopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_8.c',
  865. 'addl': ['-DOUTER', '-ULOWER', '-UUNIT']},
  866. '_olnucopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_8.c',
  867. 'addl': ['-DOUTER', '-DLOWER', '-DUNIT']},
  868. '_olnncopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_8.c',
  869. 'addl': ['-DOUTER', '-DLOWER', '-UUNIT']},
  870. '_outucopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_8.c',
  871. 'addl': ['-DOUTER', '-ULOWER', '-DUNIT']},
  872. '_outncopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_8.c',
  873. 'addl': ['-DOUTER', '-ULOWER', '-UUNIT']},
  874. '_oltucopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_8.c',
  875. 'addl': ['-DOUTER', '-DLOWER', '-DUNIT']},
  876. '_oltncopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_8.c',
  877. 'addl': ['-DOUTER', '-DLOWER', '-UUNIT']},
  878. }},
  879. 'q': {'exts': {
  880. # TODO(rg): These actually use $(SGEMM_UNROLL_M) to choose the size
  881. '_iunucopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_8.c',
  882. 'addl': ['-UOUTER', '-ULOWER', '-DUNIT']},
  883. '_iunncopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_8.c',
  884. 'addl': ['-UOUTER', '-ULOWER', '-UUNIT']},
  885. '_ilnucopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_8.c',
  886. 'addl': ['-UOUTER', '-DLOWER', '-DUNIT']},
  887. '_ilnncopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_8.c',
  888. 'addl': ['-UOUTER', '-DLOWER', '-UUNIT']},
  889. '_iutucopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_8.c',
  890. 'addl': ['-UOUTER', '-ULOWER', '-DUNIT']},
  891. '_iutncopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_8.c',
  892. 'addl': ['-UOUTER', '-ULOWER', '-UUNIT']},
  893. '_iltucopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_8.c',
  894. 'addl': ['-UOUTER', '-DLOWER', '-DUNIT']},
  895. '_iltncopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_8.c',
  896. 'addl': ['-UOUTER', '-DLOWER', '-UUNIT']},
  897. '_ounucopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_4.c',
  898. 'addl': ['-DOUTER', '-ULOWER', '-DUNIT']},
  899. '_ounncopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_4.c',
  900. 'addl': ['-DOUTER', '-ULOWER', '-UUNIT']},
  901. '_olnucopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_4.c',
  902. 'addl': ['-DOUTER', '-DLOWER', '-DUNIT']},
  903. '_olnncopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_4.c',
  904. 'addl': ['-DOUTER', '-DLOWER', '-UUNIT']},
  905. '_outucopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_4.c',
  906. 'addl': ['-DOUTER', '-ULOWER', '-DUNIT']},
  907. '_outncopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_4.c',
  908. 'addl': ['-DOUTER', '-ULOWER', '-UUNIT']},
  909. '_oltucopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_4.c',
  910. 'addl': ['-DOUTER', '-DLOWER', '-DUNIT']},
  911. '_oltncopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_4.c',
  912. 'addl': ['-DOUTER', '-DLOWER', '-UUNIT']},
  913. }},
  914. 'c': {'exts': {
  915. # TODO(rg): These actually use $(SGEMM_UNROLL_M) to choose the size
  916. '_iunucopy': {'dir': 'generic', 'kernel': 'ztrsm_uncopy_8.c',
  917. 'addl': ['-UOUTER', '-ULOWER', '-DUNIT']},
  918. '_iunncopy': {'dir': 'generic', 'kernel': 'ztrsm_uncopy_8.c',
  919. 'addl': ['-UOUTER', '-ULOWER', '-UUNIT']},
  920. '_ilnucopy': {'dir': 'generic', 'kernel': 'ztrsm_lncopy_8.c',
  921. 'addl': ['-UOUTER', '-DLOWER', '-DUNIT']},
  922. '_ilnncopy': {'dir': 'generic', 'kernel': 'ztrsm_lncopy_8.c',
  923. 'addl': ['-UOUTER', '-DLOWER', '-UUNIT']},
  924. '_iutucopy': {'dir': 'generic', 'kernel': 'ztrsm_utcopy_8.c',
  925. 'addl': ['-UOUTER', '-ULOWER', '-DUNIT']},
  926. '_iutncopy': {'dir': 'generic', 'kernel': 'ztrsm_utcopy_8.c',
  927. 'addl': ['-UOUTER', '-ULOWER', '-UUNIT']},
  928. '_iltucopy': {'dir': 'generic', 'kernel': 'ztrsm_ltcopy_8.c',
  929. 'addl': ['-UOUTER', '-DLOWER', '-DUNIT']},
  930. '_iltncopy': {'dir': 'generic', 'kernel': 'ztrsm_ltcopy_8.c',
  931. 'addl': ['-UOUTER', '-DLOWER', '-UUNIT']},
  932. '_ounucopy': {'dir': 'generic', 'kernel': 'ztrsm_uncopy_2.c',
  933. 'addl': ['-DOUTER', '-ULOWER', '-DUNIT']},
  934. '_ounncopy': {'dir': 'generic', 'kernel': 'ztrsm_uncopy_2.c',
  935. 'addl': ['-DOUTER', '-ULOWER', '-UUNIT']},
  936. '_olnucopy': {'dir': 'generic', 'kernel': 'ztrsm_lncopy_2.c',
  937. 'addl': ['-DOUTER', '-DLOWER', '-DUNIT']},
  938. '_olnncopy': {'dir': 'generic', 'kernel': 'ztrsm_lncopy_2.c',
  939. 'addl': ['-DOUTER', '-DLOWER', '-UUNIT']},
  940. '_outucopy': {'dir': 'generic', 'kernel': 'ztrsm_utcopy_2.c',
  941. 'addl': ['-DOUTER', '-ULOWER', '-DUNIT']},
  942. '_outncopy': {'dir': 'generic', 'kernel': 'ztrsm_utcopy_2.c',
  943. 'addl': ['-DOUTER', '-ULOWER', '-UUNIT']},
  944. '_oltucopy': {'dir': 'generic', 'kernel': 'ztrsm_ltcopy_2.c',
  945. 'addl': ['-DOUTER', '-DLOWER', '-DUNIT']},
  946. '_oltncopy': {'dir': 'generic', 'kernel': 'ztrsm_ltcopy_2.c',
  947. 'addl': ['-DOUTER', '-DLOWER', '-UUNIT']},
  948. }},
  949. 'z': {'exts': {
  950. # TODO(rg): These actually use $(SGEMM_UNROLL_M) to choose the size
  951. '_iunucopy': {'dir': 'generic', 'kernel': 'ztrsm_uncopy_4.c',
  952. 'addl': ['-UOUTER', '-ULOWER', '-DUNIT']},
  953. '_iunncopy': {'dir': 'generic', 'kernel': 'ztrsm_uncopy_4.c',
  954. 'addl': ['-UOUTER', '-ULOWER', '-UUNIT']},
  955. '_ilnucopy': {'dir': 'generic', 'kernel': 'ztrsm_lncopy_4.c',
  956. 'addl': ['-UOUTER', '-DLOWER', '-DUNIT']},
  957. '_ilnncopy': {'dir': 'generic', 'kernel': 'ztrsm_lncopy_4.c',
  958. 'addl': ['-UOUTER', '-DLOWER', '-UUNIT']},
  959. '_iutucopy': {'dir': 'generic', 'kernel': 'ztrsm_utcopy_4.c',
  960. 'addl': ['-UOUTER', '-ULOWER', '-DUNIT']},
  961. '_iutncopy': {'dir': 'generic', 'kernel': 'ztrsm_utcopy_4.c',
  962. 'addl': ['-UOUTER', '-ULOWER', '-UUNIT']},
  963. '_iltucopy': {'dir': 'generic', 'kernel': 'ztrsm_ltcopy_4.c',
  964. 'addl': ['-UOUTER', '-DLOWER', '-DUNIT']},
  965. '_iltncopy': {'dir': 'generic', 'kernel': 'ztrsm_ltcopy_4.c',
  966. 'addl': ['-UOUTER', '-DLOWER', '-UUNIT']},
  967. '_ounucopy': {'dir': 'generic', 'kernel': 'ztrsm_uncopy_2.c',
  968. 'addl': ['-DOUTER', '-ULOWER', '-DUNIT']},
  969. '_ounncopy': {'dir': 'generic', 'kernel': 'ztrsm_uncopy_2.c',
  970. 'addl': ['-DOUTER', '-ULOWER', '-UUNIT']},
  971. '_olnucopy': {'dir': 'generic', 'kernel': 'ztrsm_lncopy_2.c',
  972. 'addl': ['-DOUTER', '-DLOWER', '-DUNIT']},
  973. '_olnncopy': {'dir': 'generic', 'kernel': 'ztrsm_lncopy_2.c',
  974. 'addl': ['-DOUTER', '-DLOWER', '-UUNIT']},
  975. '_outucopy': {'dir': 'generic', 'kernel': 'ztrsm_utcopy_2.c',
  976. 'addl': ['-DOUTER', '-ULOWER', '-DUNIT']},
  977. '_outncopy': {'dir': 'generic', 'kernel': 'ztrsm_utcopy_2.c',
  978. 'addl': ['-DOUTER', '-ULOWER', '-UUNIT']},
  979. '_oltucopy': {'dir': 'generic', 'kernel': 'ztrsm_ltcopy_2.c',
  980. 'addl': ['-DOUTER', '-DLOWER', '-DUNIT']},
  981. '_oltncopy': {'dir': 'generic', 'kernel': 'ztrsm_ltcopy_2.c',
  982. 'addl': ['-DOUTER', '-DLOWER', '-UUNIT']},
  983. }},
  984. },
  985. },
  986. { 'base': '?symm',
  987. 'modes': {
  988. 's': {'exts': {
  989. # TODO(rg): These actually use $(SGEMM_UNROLL_M) to choose the size
  990. '_iutcopy': {'dir': 'generic', 'kernel': 'symm_ucopy_8.c',
  991. 'addl': ['-UDOUBLE', '-UCOMPLEX', '-UOUTER', '-ULOWER']},
  992. '_iltcopy': {'dir': 'generic', 'kernel': 'symm_lcopy_8.c',
  993. 'addl': ['-UDOUBLE', '-UCOMPLEX', '-UOUTER', '-DLOWER']},
  994. '_outcopy': {'dir': 'generic', 'kernel': 'symm_ucopy_4.c',
  995. 'addl': ['-UDOUBLE', '-UCOMPLEX', '-DOUTER', '-ULOWER']},
  996. '_oltcopy': {'dir': 'generic', 'kernel': 'symm_lcopy_4.c',
  997. 'addl': ['-UDOUBLE', '-UCOMPLEX', '-DOUTER', '-DLOWER']},
  998. }},
  999. 'd': {'exts': {
  1000. '_iutcopy': {'dir': 'generic', 'kernel': 'symm_ucopy_4.c',
  1001. 'addl': ['-DDOUBLE', '-UCOMPLEX', '-UOUTER', '-ULOWER']},
  1002. '_iltcopy': {'dir': 'generic', 'kernel': 'symm_lcopy_4.c',
  1003. 'addl': ['-DDOUBLE', '-UCOMPLEX', '-UOUTER', '-DLOWER']},
  1004. '_outcopy': {'dir': 'generic', 'kernel': 'symm_ucopy_8.c',
  1005. 'addl': ['-DDOUBLE', '-UCOMPLEX', '-DOUTER', '-ULOWER']},
  1006. '_oltcopy': {'dir': 'generic', 'kernel': 'symm_lcopy_8.c',
  1007. 'addl': ['-DDOUBLE', '-UCOMPLEX', '-DOUTER', '-DLOWER']},
  1008. }},
  1009. 'c': {'exts': {
  1010. '_iutcopy': {'dir': 'generic', 'kernel': 'zsymm_ucopy_8.c',
  1011. 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UOUTER', '-ULOWER']},
  1012. '_iltcopy': {'dir': 'generic', 'kernel': 'zsymm_lcopy_8.c',
  1013. 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UOUTER', '-DLOWER']},
  1014. '_outcopy': {'dir': 'generic', 'kernel': 'zsymm_ucopy_2.c',
  1015. 'addl': ['-UDOUBLE', '-DCOMPLEX', '-DOUTER', '-ULOWER']},
  1016. '_oltcopy': {'dir': 'generic', 'kernel': 'zsymm_lcopy_2.c',
  1017. 'addl': ['-UDOUBLE', '-DCOMPLEX', '-DOUTER', '-DLOWER']},
  1018. }},
  1019. 'z': {'exts': {
  1020. '_iutcopy': {'dir': 'generic', 'kernel': 'zsymm_ucopy_4.c',
  1021. 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UOUTER', '-ULOWER']},
  1022. '_iltcopy': {'dir': 'generic', 'kernel': 'zsymm_lcopy_4.c',
  1023. 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UOUTER', '-DLOWER']},
  1024. '_outcopy': {'dir': 'generic', 'kernel': 'zsymm_ucopy_2.c',
  1025. 'addl': ['-DDOUBLE', '-DCOMPLEX', '-DOUTER', '-ULOWER']},
  1026. '_oltcopy': {'dir': 'generic', 'kernel': 'zsymm_lcopy_2.c',
  1027. 'addl': ['-DDOUBLE', '-DCOMPLEX', '-DOUTER', '-DLOWER']},
  1028. }},
  1029. },
  1030. },
  1031. { 'base': '?omatcopy_k',
  1032. 'modes': {
  1033. 's': {'exts': {
  1034. '_cn': {'dir': 'arm', 'kernel': 'omatcopy_cn.c', 'addl': ['-UROWM']},
  1035. '_rn': {'dir': 'arm', 'kernel': 'omatcopy_rn.c', 'addl': ['-DROWM']},
  1036. '_ct': {'dir': 'arm', 'kernel': 'omatcopy_ct.c', 'addl': ['-UROWM']},
  1037. '_rt': {'dir': 'arm', 'kernel': 'omatcopy_rt.c', 'addl': ['-DROWM']},
  1038. }},
  1039. 'd': {'exts': {
  1040. '_cn': {'dir': 'arm', 'kernel': 'omatcopy_cn.c', 'addl': ['-UROWM']},
  1041. '_rn': {'dir': 'arm', 'kernel': 'omatcopy_rn.c', 'addl': ['-DROWM']},
  1042. '_ct': {'dir': 'arm', 'kernel': 'omatcopy_ct.c', 'addl': ['-UROWM']},
  1043. '_rt': {'dir': 'arm', 'kernel': 'omatcopy_rt.c', 'addl': ['-DROWM']},
  1044. }},
  1045. 'c': {'exts': {
  1046. '_cn': {'dir': 'arm', 'kernel': 'zomatcopy_cn.c', 'addl': ['-UROWM', '-UCONJ']},
  1047. '_rn': {'dir': 'arm', 'kernel': 'zomatcopy_rn.c', 'addl': ['-DROWM', '-UCONJ']},
  1048. '_ct': {'dir': 'arm', 'kernel': 'zomatcopy_ct.c', 'addl': ['-UROWM', '-UCONJ']},
  1049. '_rt': {'dir': 'arm', 'kernel': 'zomatcopy_rt.c', 'addl': ['-DROWM', '-UCONJ']},
  1050. '_cnc': {'dir': 'arm', 'kernel': 'zomatcopy_cnc.c', 'addl': ['-UROWM', '-DCONJ']},
  1051. '_rnc': {'dir': 'arm', 'kernel': 'zomatcopy_rnc.c', 'addl': ['-DROWM', '-DCONJ']},
  1052. '_ctc': {'dir': 'arm', 'kernel': 'zomatcopy_ctc.c', 'addl': ['-UROWM', '-DCONJ']},
  1053. '_rtc': {'dir': 'arm', 'kernel': 'zomatcopy_rtc.c', 'addl': ['-DROWM', '-DCONJ']},
  1054. }},
  1055. 'z': {'exts': {
  1056. '_cn': {'dir': 'arm', 'kernel': 'zomatcopy_cn.c', 'addl': ['-UROWM', '-UCONJ']},
  1057. '_rn': {'dir': 'arm', 'kernel': 'zomatcopy_rn.c', 'addl': ['-DROWM', '-UCONJ']},
  1058. '_ct': {'dir': 'arm', 'kernel': 'zomatcopy_ct.c', 'addl': ['-UROWM', '-UCONJ']},
  1059. '_rt': {'dir': 'arm', 'kernel': 'zomatcopy_rt.c', 'addl': ['-DROWM', '-UCONJ']},
  1060. '_cnc': {'dir': 'arm', 'kernel': 'zomatcopy_cnc.c', 'addl': ['-UROWM', '-DCONJ']},
  1061. '_rnc': {'dir': 'arm', 'kernel': 'zomatcopy_rnc.c', 'addl': ['-DROWM', '-DCONJ']},
  1062. '_ctc': {'dir': 'arm', 'kernel': 'zomatcopy_ctc.c', 'addl': ['-UROWM', '-DCONJ']},
  1063. '_rtc': {'dir': 'arm', 'kernel': 'zomatcopy_rtc.c', 'addl': ['-DROWM', '-DCONJ']},
  1064. }},
  1065. },
  1066. },
  1067. { 'base': '?imatcopy_k',
  1068. 'modes': {
  1069. 's': {'exts': {
  1070. '_cn': {'dir': 'generic', 'kernel': 'imatcopy_cn.c', 'addl': ['-UROWM']},
  1071. '_rn': {'dir': 'generic', 'kernel': 'imatcopy_rn.c', 'addl': ['-DROWM']},
  1072. '_ct': {'dir': 'generic', 'kernel': 'imatcopy_ct.c', 'addl': ['-UROWM']},
  1073. '_rt': {'dir': 'generic', 'kernel': 'imatcopy_rt.c', 'addl': ['-DROWM']},
  1074. }},
  1075. 'd': {'exts': {
  1076. '_cn': {'dir': 'generic', 'kernel': 'imatcopy_cn.c', 'addl': ['-UROWM']},
  1077. '_rn': {'dir': 'generic', 'kernel': 'imatcopy_rn.c', 'addl': ['-DROWM']},
  1078. '_ct': {'dir': 'generic', 'kernel': 'imatcopy_ct.c', 'addl': ['-UROWM']},
  1079. '_rt': {'dir': 'generic', 'kernel': 'imatcopy_rt.c', 'addl': ['-DROWM']},
  1080. }},
  1081. 'c': {'exts': {
  1082. '_cn': {'dir': 'generic', 'kernel': 'zimatcopy_cn.c', 'addl': ['-UROWM', '-UCONJ']},
  1083. '_rn': {'dir': 'generic', 'kernel': 'zimatcopy_rn.c', 'addl': ['-DROWM', '-UCONJ']},
  1084. '_ct': {'dir': 'generic', 'kernel': 'zimatcopy_ct.c', 'addl': ['-UROWM', '-UCONJ']},
  1085. '_rt': {'dir': 'generic', 'kernel': 'zimatcopy_rt.c', 'addl': ['-DROWM', '-UCONJ']},
  1086. '_cnc': {'dir': 'generic', 'kernel': 'zimatcopy_cnc.c', 'addl': ['-UROWM', '-DCONJ']},
  1087. '_rnc': {'dir': 'generic', 'kernel': 'zimatcopy_rnc.c', 'addl': ['-DROWM', '-DCONJ']},
  1088. '_ctc': {'dir': 'generic', 'kernel': 'zimatcopy_ctc.c', 'addl': ['-UROWM', '-DCONJ']},
  1089. '_rtc': {'dir': 'generic', 'kernel': 'zimatcopy_rtc.c', 'addl': ['-DROWM', '-DCONJ']},
  1090. }},
  1091. 'z': {'exts': {
  1092. '_cn': {'dir': 'generic', 'kernel': 'zimatcopy_cn.c', 'addl': ['-UROWM', '-UCONJ']},
  1093. '_rn': {'dir': 'generic', 'kernel': 'zimatcopy_rn.c', 'addl': ['-DROWM', '-UCONJ']},
  1094. '_ct': {'dir': 'generic', 'kernel': 'zimatcopy_ct.c', 'addl': ['-UROWM', '-UCONJ']},
  1095. '_rt': {'dir': 'generic', 'kernel': 'zimatcopy_rt.c', 'addl': ['-DROWM', '-UCONJ']},
  1096. '_cnc': {'dir': 'generic', 'kernel': 'zimatcopy_cnc.c', 'addl': ['-UROWM', '-DCONJ']},
  1097. '_rnc': {'dir': 'generic', 'kernel': 'zimatcopy_rnc.c', 'addl': ['-DROWM', '-DCONJ']},
  1098. '_ctc': {'dir': 'generic', 'kernel': 'zimatcopy_ctc.c', 'addl': ['-UROWM', '-DCONJ']},
  1099. '_rtc': {'dir': 'generic', 'kernel': 'zimatcopy_rtc.c', 'addl': ['-DROWM', '-DCONJ']},
  1100. }},
  1101. },
  1102. },
  1103. {
  1104. 'base': '?geadd',
  1105. 'modes': {
  1106. 's': {'exts': {'_k': {'dir': 'generic', 'kernel': 'geadd.c', 'addl': ['-UROWM']}}},
  1107. 'd': {'exts': {'_k': {'dir': 'generic', 'kernel': 'geadd.c', 'addl': ['-UROWM']}}},
  1108. 'c': {'exts': {'_k': {'dir': 'generic', 'kernel': 'zgeadd.c', 'addl': ['-UROWM']}}},
  1109. 'z': {'exts': {'_k': {'dir': 'generic', 'kernel': 'zgeadd.c', 'addl': ['-UROWM']}}},
  1110. },
  1111. },
  1112. { 'base': '?gemm_small_kernel',
  1113. 'modes': {
  1114. 's': {
  1115. 'exts': {
  1116. '_nn': {'dir': 'generic',
  1117. 'kernel': 'gemm_small_matrix_kernel_nn.c',
  1118. },
  1119. '_nt': {'dir': 'generic',
  1120. 'kernel': 'gemm_small_matrix_kernel_nt.c',
  1121. },
  1122. '_tn': {'dir': 'generic',
  1123. 'kernel': 'gemm_small_matrix_kernel_tn.c',
  1124. },
  1125. '_tt': {'dir': 'generic',
  1126. 'kernel': 'gemm_small_matrix_kernel_tt.c',
  1127. },
  1128. # '_b0_nn': {'dir': 'generic',
  1129. # 'kernel': 'gemm_small_matrix_kernel_nn.c',
  1130. # 'addl': ['-DB0'],
  1131. # },
  1132. # '_b0_nt': {'dir': 'generic',
  1133. # 'kernel': 'gemm_small_matrix_kernel_nt.c',
  1134. # 'addl': ['-DB0'],
  1135. # },
  1136. # '_b0_tn': {'dir': 'generic',
  1137. # 'kernel': 'gemm_small_matrix_kernel_tn.c',
  1138. # 'addl': ['-DB0'],
  1139. # },
  1140. # '_b0_tt': {'dir': 'generic',
  1141. # 'kernel': 'gemm_small_matrix_kernel_tt.c',
  1142. # 'addl': ['-DB0'],
  1143. # },
  1144. }
  1145. },
  1146. 'd': {
  1147. 'exts': {
  1148. '_nn': {'dir': 'generic',
  1149. 'kernel': 'gemm_small_matrix_kernel_nn.c',
  1150. },
  1151. '_nt': {'dir': 'generic',
  1152. 'kernel': 'gemm_small_matrix_kernel_nt.c',
  1153. },
  1154. '_tn': {'dir': 'generic',
  1155. 'kernel': 'gemm_small_matrix_kernel_tn.c',
  1156. },
  1157. '_tt': {'dir': 'generic',
  1158. 'kernel': 'gemm_small_matrix_kernel_tt.c',
  1159. },
  1160. }
  1161. },
  1162. 'c': {
  1163. 'exts': {
  1164. '_nn': {'dir': 'generic',
  1165. 'kernel': 'zgemm_small_matrix_kernel_nn.c',
  1166. },
  1167. '_nr': {'dir': 'generic',
  1168. 'kernel': 'zgemm_small_matrix_kernel_nn.c',
  1169. },
  1170. '_rn': {'dir': 'generic',
  1171. 'kernel': 'zgemm_small_matrix_kernel_nn.c',
  1172. },
  1173. '_rr': {'dir': 'generic',
  1174. 'kernel': 'zgemm_small_matrix_kernel_nn.c',
  1175. },
  1176. '_nt': {'dir': 'generic',
  1177. 'kernel': 'zgemm_small_matrix_kernel_nt.c',
  1178. },
  1179. '_nc': {'dir': 'generic',
  1180. 'kernel': 'zgemm_small_matrix_kernel_nt.c',
  1181. },
  1182. '_rt': {'dir': 'generic',
  1183. 'kernel': 'zgemm_small_matrix_kernel_nt.c',
  1184. },
  1185. '_rc': {'dir': 'generic',
  1186. 'kernel': 'zgemm_small_matrix_kernel_nt.c',
  1187. },
  1188. '_tn': {'dir': 'generic',
  1189. 'kernel': 'zgemm_small_matrix_kernel_tn.c',
  1190. },
  1191. '_tr': {'dir': 'generic',
  1192. 'kernel': 'zgemm_small_matrix_kernel_tn.c',
  1193. },
  1194. '_cn': {'dir': 'generic',
  1195. 'kernel': 'zgemm_small_matrix_kernel_tn.c',
  1196. },
  1197. '_cr': {'dir': 'generic',
  1198. 'kernel': 'zgemm_small_matrix_kernel_tn.c',
  1199. },
  1200. '_tt': {'dir': 'generic',
  1201. 'kernel': 'zgemm_small_matrix_kernel_tt.c',
  1202. },
  1203. '_tc': {'dir': 'generic',
  1204. 'kernel': 'zgemm_small_matrix_kernel_tt.c',
  1205. },
  1206. '_ct': {'dir': 'generic',
  1207. 'kernel': 'zgemm_small_matrix_kernel_tt.c',
  1208. },
  1209. '_cc': {'dir': 'generic',
  1210. 'kernel': 'zgemm_small_matrix_kernel_tt.c',
  1211. },
  1212. }
  1213. },
  1214. 'z': {
  1215. 'exts': {
  1216. '_nn': {'dir': 'generic',
  1217. 'kernel': 'zgemm_small_matrix_kernel_nn.c',
  1218. },
  1219. '_nr': {'dir': 'generic',
  1220. 'kernel': 'zgemm_small_matrix_kernel_nn.c',
  1221. },
  1222. '_rn': {'dir': 'generic',
  1223. 'kernel': 'zgemm_small_matrix_kernel_nn.c',
  1224. },
  1225. '_rr': {'dir': 'generic',
  1226. 'kernel': 'zgemm_small_matrix_kernel_nn.c',
  1227. },
  1228. '_nt': {'dir': 'generic',
  1229. 'kernel': 'zgemm_small_matrix_kernel_nt.c',
  1230. },
  1231. '_nc': {'dir': 'generic',
  1232. 'kernel': 'zgemm_small_matrix_kernel_nt.c',
  1233. },
  1234. '_rt': {'dir': 'generic',
  1235. 'kernel': 'zgemm_small_matrix_kernel_nt.c',
  1236. },
  1237. '_rc': {'dir': 'generic',
  1238. 'kernel': 'zgemm_small_matrix_kernel_nt.c',
  1239. },
  1240. '_tn': {'dir': 'generic',
  1241. 'kernel': 'zgemm_small_matrix_kernel_tn.c',
  1242. },
  1243. '_tr': {'dir': 'generic',
  1244. 'kernel': 'zgemm_small_matrix_kernel_tn.c',
  1245. },
  1246. '_cn': {'dir': 'generic',
  1247. 'kernel': 'zgemm_small_matrix_kernel_tn.c',
  1248. },
  1249. '_cr': {'dir': 'generic',
  1250. 'kernel': 'zgemm_small_matrix_kernel_tn.c',
  1251. },
  1252. '_tt': {'dir': 'generic',
  1253. 'kernel': 'zgemm_small_matrix_kernel_tt.c',
  1254. },
  1255. '_tc': {'dir': 'generic',
  1256. 'kernel': 'zgemm_small_matrix_kernel_tt.c',
  1257. },
  1258. '_ct': {'dir': 'generic',
  1259. 'kernel': 'zgemm_small_matrix_kernel_tt.c',
  1260. },
  1261. '_cc': {'dir': 'generic',
  1262. 'kernel': 'zgemm_small_matrix_kernel_tt.c',
  1263. },
  1264. }
  1265. },
  1266. },
  1267. },
  1268. { 'base': '?gemm_small_kernel_b0',
  1269. 'modes': {
  1270. 's': {
  1271. 'exts': {
  1272. '_nn': {'dir': 'generic',
  1273. 'kernel': 'gemm_small_matrix_kernel_nn.c',
  1274. },
  1275. '_nt': {'dir': 'generic',
  1276. 'kernel': 'gemm_small_matrix_kernel_nt.c',
  1277. },
  1278. '_tn': {'dir': 'generic',
  1279. 'kernel': 'gemm_small_matrix_kernel_tn.c',
  1280. },
  1281. '_tt': {'dir': 'generic',
  1282. 'kernel': 'gemm_small_matrix_kernel_tt.c',
  1283. },
  1284. }
  1285. },
  1286. 'd': {
  1287. 'exts': {
  1288. '_nn': {'dir': 'generic',
  1289. 'kernel': 'gemm_small_matrix_kernel_nn.c',
  1290. },
  1291. '_nt': {'dir': 'generic',
  1292. 'kernel': 'gemm_small_matrix_kernel_nt.c',
  1293. },
  1294. '_tn': {'dir': 'generic',
  1295. 'kernel': 'gemm_small_matrix_kernel_tn.c',
  1296. },
  1297. '_tt': {'dir': 'generic',
  1298. 'kernel': 'gemm_small_matrix_kernel_tt.c',
  1299. },
  1300. }
  1301. },
  1302. 'c': {
  1303. 'exts': {
  1304. '_nn': {'dir': 'generic',
  1305. 'kernel': 'zgemm_small_matrix_kernel_nn.c',
  1306. },
  1307. '_nr': {'dir': 'generic',
  1308. 'kernel': 'zgemm_small_matrix_kernel_nn.c',
  1309. },
  1310. '_rn': {'dir': 'generic',
  1311. 'kernel': 'zgemm_small_matrix_kernel_nn.c',
  1312. },
  1313. '_rr': {'dir': 'generic',
  1314. 'kernel': 'zgemm_small_matrix_kernel_nn.c',
  1315. },
  1316. '_nt': {'dir': 'generic',
  1317. 'kernel': 'zgemm_small_matrix_kernel_nt.c',
  1318. },
  1319. '_nc': {'dir': 'generic',
  1320. 'kernel': 'zgemm_small_matrix_kernel_nt.c',
  1321. },
  1322. '_rt': {'dir': 'generic',
  1323. 'kernel': 'zgemm_small_matrix_kernel_nt.c',
  1324. },
  1325. '_rc': {'dir': 'generic',
  1326. 'kernel': 'zgemm_small_matrix_kernel_nt.c',
  1327. },
  1328. '_tn': {'dir': 'generic',
  1329. 'kernel': 'zgemm_small_matrix_kernel_tn.c',
  1330. },
  1331. '_tr': {'dir': 'generic',
  1332. 'kernel': 'zgemm_small_matrix_kernel_tn.c',
  1333. },
  1334. '_cn': {'dir': 'generic',
  1335. 'kernel': 'zgemm_small_matrix_kernel_tn.c',
  1336. },
  1337. '_cr': {'dir': 'generic',
  1338. 'kernel': 'zgemm_small_matrix_kernel_tn.c',
  1339. },
  1340. '_tt': {'dir': 'generic',
  1341. 'kernel': 'zgemm_small_matrix_kernel_tt.c',
  1342. },
  1343. '_tc': {'dir': 'generic',
  1344. 'kernel': 'zgemm_small_matrix_kernel_tt.c',
  1345. },
  1346. '_ct': {'dir': 'generic',
  1347. 'kernel': 'zgemm_small_matrix_kernel_tt.c',
  1348. },
  1349. '_cc': {'dir': 'generic',
  1350. 'kernel': 'zgemm_small_matrix_kernel_tt.c',
  1351. },
  1352. }
  1353. },
  1354. 'z': {
  1355. 'exts': {
  1356. '_nn': {'dir': 'generic',
  1357. 'kernel': 'zgemm_small_matrix_kernel_nn.c',
  1358. },
  1359. '_nr': {'dir': 'generic',
  1360. 'kernel': 'zgemm_small_matrix_kernel_nn.c',
  1361. },
  1362. '_rn': {'dir': 'generic',
  1363. 'kernel': 'zgemm_small_matrix_kernel_nn.c',
  1364. },
  1365. '_rr': {'dir': 'generic',
  1366. 'kernel': 'zgemm_small_matrix_kernel_nn.c',
  1367. },
  1368. '_nt': {'dir': 'generic',
  1369. 'kernel': 'zgemm_small_matrix_kernel_nt.c',
  1370. },
  1371. '_nc': {'dir': 'generic',
  1372. 'kernel': 'zgemm_small_matrix_kernel_nt.c',
  1373. },
  1374. '_rt': {'dir': 'generic',
  1375. 'kernel': 'zgemm_small_matrix_kernel_nt.c',
  1376. },
  1377. '_rc': {'dir': 'generic',
  1378. 'kernel': 'zgemm_small_matrix_kernel_nt.c',
  1379. },
  1380. '_tn': {'dir': 'generic',
  1381. 'kernel': 'zgemm_small_matrix_kernel_tn.c',
  1382. },
  1383. '_tr': {'dir': 'generic',
  1384. 'kernel': 'zgemm_small_matrix_kernel_tn.c',
  1385. },
  1386. '_cn': {'dir': 'generic',
  1387. 'kernel': 'zgemm_small_matrix_kernel_tn.c',
  1388. },
  1389. '_cr': {'dir': 'generic',
  1390. 'kernel': 'zgemm_small_matrix_kernel_tn.c',
  1391. },
  1392. '_tt': {'dir': 'generic',
  1393. 'kernel': 'zgemm_small_matrix_kernel_tt.c',
  1394. },
  1395. '_tc': {'dir': 'generic',
  1396. 'kernel': 'zgemm_small_matrix_kernel_tt.c',
  1397. },
  1398. '_ct': {'dir': 'generic',
  1399. 'kernel': 'zgemm_small_matrix_kernel_tt.c',
  1400. },
  1401. '_cc': {'dir': 'generic',
  1402. 'kernel': 'zgemm_small_matrix_kernel_tt.c',
  1403. },
  1404. }
  1405. },
  1406. },
  1407. },
  1408. ]
  1409. kernel_confs = []
  1410. foreach _kop : base_kops
  1411. base = _kop['base']
  1412. modes = _kop['modes']
  1413. # Generate the symbol flags
  1414. _ckop_args = []
  1415. if symb_defs.has_key(base)
  1416. symb_base = symb_defs[base]
  1417. if symb_base.has_key('def')
  1418. foreach _d : symb_base['def']
  1419. _ckop_args += ('-D' + _d)
  1420. endforeach
  1421. endif
  1422. if symb_base.has_key('undef')
  1423. foreach _u : symb_base['undef']
  1424. _ckop_args += ('-U' + _u)
  1425. endforeach
  1426. endif
  1427. endif
  1428. foreach mode, details : modes
  1429. if mode == 'x' or mode == 'q'
  1430. continue
  1431. endif
  1432. # Generally, one list is required for each foreach
  1433. __cargs = _cargs + _ckop_args
  1434. prec_mode = precision_mappings[mode]
  1435. # Generate the mapping for the type
  1436. if prec_mode.has_key('def')
  1437. foreach _d : prec_mode['def']
  1438. __cargs += ('-D' + _d)
  1439. endforeach
  1440. endif
  1441. if prec_mode.has_key('undef')
  1442. foreach _u : prec_mode['undef']
  1443. __cargs += ('-U' + _u)
  1444. endforeach
  1445. endif
  1446. # Now the rest, one run for each ext, to get the final symbols
  1447. foreach ext, extdat : details['exts']
  1448. _ext_cargs = [] # Will be wiped for each ext preventing redefinitions
  1449. # Check ext_mappings first
  1450. if ext_mappings.has_key(ext) and (not ext_mappings.has_key('except') or base not in ext_mappings['except'])
  1451. extmap = ext_mappings[ext]
  1452. if extmap.has_key('def')
  1453. foreach _d : extmap['def']
  1454. _ext_cargs += ['-D' + _d]
  1455. endforeach
  1456. endif
  1457. if extmap.has_key('undef')
  1458. foreach _u : extmap['undef']
  1459. _ext_cargs += ['-U' + _u]
  1460. endforeach
  1461. endif
  1462. else
  1463. # Fallback to ext_mappings_l2
  1464. foreach ext_map : ext_mappings_l2 + ext_mappings_l3
  1465. if ext_map['ext'] == ext and mode in ext_map['for'] and (not ext_map.has_key('except') or base not in ext_map['except'])
  1466. if ext_map.has_key('def')
  1467. foreach _d : ext_map['def']
  1468. _ext_cargs += ['-D' + _d]
  1469. endforeach
  1470. endif
  1471. if ext_map.has_key('undef')
  1472. foreach _u : ext_map['undef']
  1473. _ext_cargs += ['-U' + _u]
  1474. endforeach
  1475. endif
  1476. break
  1477. endif
  1478. endforeach
  1479. endif
  1480. src = join_paths(extdat['dir'], extdat['kernel'])
  1481. if extdat.has_key('addl')
  1482. _ext_cargs += extdat['addl']
  1483. endif
  1484. sym_name = base.replace('?', mode) + ext
  1485. sym_underscored = f'@sym_name@_'
  1486. _ext_cargs += [
  1487. f'-DASMNAME=@sym_name@',
  1488. f'-DASMFNAME=@sym_underscored@',
  1489. f'-DNAME=@sym_underscored@',
  1490. f'-DCNAME=@sym_name@',
  1491. f'-DCHAR_NAME="@sym_underscored@"',
  1492. f'-DCHAR_CNAME="@sym_name@"',
  1493. ]
  1494. current_def = {
  1495. 'c_args': __cargs + _ext_cargs,
  1496. 'name': sym_name,
  1497. 'src': src
  1498. }
  1499. kernel_confs += current_def
  1500. endforeach
  1501. endforeach
  1502. endforeach
  1503. _kern_libs = []
  1504. _kern_deps = []
  1505. _is_asm = false
  1506. foreach conf: kernel_confs
  1507. _kern_libs += static_library(
  1508. conf['name'],
  1509. [conf['src'], config_h],
  1510. include_directories: _inc,
  1511. c_args: conf['c_args'],
  1512. # See gh discussion 13374 for why, basically .S are coded as fortran..
  1513. fortran_args: conf['c_args'],
  1514. )
  1515. endforeach
  1516. _kern = static_library('_kern',
  1517. link_whole: _kern_libs,
  1518. dependencies: _kern_deps)