You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

meson.build 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466
  1. # Ordered As per https://netlib.org/blas/blasqr.pdf
  2. # NOTE: xROTG xROTMG xROTM have no kernels?
  3. # TODO: Actually test and set this
  4. if true
  5. fma3_flag = '-mfma'
  6. endif
  7. # TODO: This is currently following x86_64 generic for src and dir, but it needs
  8. # to diversify
  9. # NOTE: The def and undefs are from Makefile.L1
  10. # Construct all ModesymbKERNEL from src and dir via files(dir + src)
  11. # For the modes array, the following mapping is used for c_args:
  12. # addl --> passed AS IS
  13. base_kops = [
  14. # Level 1 BLAS
  15. {
  16. 'base': 'rot',
  17. 'modes': {
  18. 's' : {
  19. 'dir': 'arm',
  20. 'kernel': 'rot.c',
  21. 'addl': [fma3_flag],
  22. 'exts': ['_k'],
  23. },
  24. 'd' : {
  25. 'dir': 'arm',
  26. 'kernel': 'rot.c',
  27. 'addl': [fma3_flag],
  28. 'exts': ['_k'],
  29. },
  30. 'q' : {
  31. 'dir': 'arm',
  32. 'kernel': 'zrot.c',
  33. 'exts': ['_k'],
  34. },
  35. 'cs' : {
  36. 'dir': 'arm',
  37. 'kernel': 'zrot.c',
  38. 'exts': ['_k'],
  39. },
  40. 'zd' : {
  41. 'dir': 'arm',
  42. 'kernel': 'zrot.c',
  43. 'exts': ['_k'],
  44. },
  45. 'xq' : {
  46. 'dir': 'arm',
  47. 'kernel': 'zrot.c',
  48. 'exts': ['_k'],
  49. },
  50. },
  51. },
  52. {'base': 'swap',
  53. 'modes': {
  54. 's' : {
  55. 'dir': 'arm',
  56. 'kernel': 'swap.c',
  57. 'exts': ['_k'],
  58. },
  59. 'd' : {
  60. 'dir': 'arm',
  61. 'kernel': 'swap.c',
  62. 'exts': ['_k'],
  63. },
  64. 'q' : {
  65. 'dir': 'x86_64',
  66. 'kernel': 'swap.S',
  67. 'exts': ['_k'],
  68. },
  69. 'c' : {
  70. 'dir': 'arm',
  71. 'kernel': 'zswap.c',
  72. 'exts': ['_k'],
  73. },
  74. 'z' : {
  75. 'dir': 'arm',
  76. 'kernel': 'zswap.c',
  77. 'exts': ['_k'],
  78. },
  79. 'x' : {
  80. 'dir': 'arm',
  81. 'kernel': 'zswap.c',
  82. 'exts': ['_k'],
  83. },
  84. },
  85. },
  86. {'base': 'scal',
  87. 'modes': {
  88. 's' : {
  89. 'dir': 'arm',
  90. 'kernel': 'scal.c',
  91. 'exts': ['_k'],
  92. },
  93. 'd' : {
  94. 'dir': 'arm',
  95. 'kernel': 'scal.c',
  96. 'exts': ['_k'],
  97. },
  98. 'q' : {
  99. 'dir': 'x86_64',
  100. 'kernel': 'scal.S',
  101. 'exts': ['_k'],
  102. },
  103. 'c' : {
  104. 'dir': 'arm',
  105. 'kernel': 'zscal.c',
  106. 'exts': ['_k'],
  107. },
  108. 'z' : {
  109. 'dir': 'arm',
  110. 'kernel': 'zscal.c',
  111. 'exts': ['_k'],
  112. },
  113. 'x' : {
  114. 'dir': 'arm',
  115. 'kernel': 'zscal.c',
  116. 'exts': ['_k'],
  117. },
  118. },
  119. },
  120. {'base': 'copy',
  121. 'modes': {
  122. 's' : {
  123. 'dir': 'arm',
  124. 'kernel': 'copy.c',
  125. 'exts': ['_k'],
  126. },
  127. 'd' : {
  128. 'dir': 'arm',
  129. 'kernel': 'copy.c',
  130. 'exts': ['_k'],
  131. },
  132. 'q' : {
  133. 'dir': 'arm',
  134. 'kernel': 'zcopy.c',
  135. 'exts': ['_k'],
  136. },
  137. 'c' : {
  138. 'dir': 'arm',
  139. 'kernel': 'zcopy.c',
  140. 'exts': ['_k'],
  141. },
  142. 'z' : {
  143. 'dir': 'arm',
  144. 'kernel': 'zcopy.c',
  145. 'exts': ['_k'],
  146. },
  147. 'x' : {
  148. 'dir': 'arm',
  149. 'kernel': 'zcopy.c',
  150. 'exts': ['_k'],
  151. },
  152. },
  153. },
  154. {'base': 'axpy',
  155. 'modes': {
  156. 's' : {
  157. 'dir': 'arm',
  158. 'kernel': 'axpy.c',
  159. 'exts': ['_k'],
  160. },
  161. 'd' : {
  162. 'dir': 'arm',
  163. 'kernel': 'axpy.c',
  164. 'exts': ['_k'],
  165. },
  166. 'q' : {
  167. 'dir': 'x86_64',
  168. 'kernel': 'axpy.S',
  169. 'exts': ['_k'],
  170. },
  171. 'c' : {
  172. 'dir': 'arm',
  173. 'kernel': 'zaxpy.c',
  174. 'exts': ['_k'],
  175. },
  176. 'z' : {
  177. 'dir': 'arm',
  178. 'kernel': 'zaxpy.c',
  179. 'exts': ['_k'],
  180. },
  181. 'x' : {
  182. 'dir': 'arm',
  183. 'kernel': 'zaxpy.c',
  184. 'exts': ['_k'],
  185. },
  186. },
  187. },
  188. {'base': 'dot',
  189. 'modes': {
  190. 's' : {
  191. 'dir': 'arm',
  192. 'kernel': 'dot.c',
  193. 'exts': ['_k'],
  194. },
  195. 'd' : {
  196. 'dir': 'arm',
  197. 'kernel': 'dot.c',
  198. 'exts': ['_k'],
  199. },
  200. 'q' : {
  201. 'dir': 'x86_64',
  202. 'kernel': 'dot.S',
  203. 'exts': ['_k'],
  204. },
  205. 'c' : {
  206. 'dir': 'arm',
  207. 'kernel': 'zdot.c',
  208. 'exts': ['_k'],
  209. },
  210. 'z' : {
  211. 'dir': 'arm',
  212. 'kernel': 'zdot.c',
  213. 'exts': ['_k'],
  214. },
  215. 'x' : {
  216. 'dir': 'arm',
  217. 'kernel': 'zdot.c',
  218. 'exts': ['_k'],
  219. },
  220. },
  221. },
  222. # xDOTU xDOTC xxDOT aren't present
  223. {'base': 'nrm2',
  224. 'modes': {
  225. 's' : {
  226. 'dir': 'arm',
  227. 'kernel': 'nrm2.c',
  228. 'exts': ['_k'],
  229. },
  230. 'd' : {
  231. 'dir': 'arm',
  232. 'kernel': 'nrm2.c',
  233. 'exts': ['_k'],
  234. },
  235. 'q' : {
  236. 'dir': 'arm',
  237. 'kernel': 'znrm2.c',
  238. 'exts': ['_k'],
  239. },
  240. 'c' : {
  241. 'dir': 'arm',
  242. 'kernel': 'znrm2.c',
  243. 'exts': ['_k'],
  244. },
  245. 'z' : {
  246. 'dir': 'arm',
  247. 'kernel': 'znrm2.c',
  248. 'exts': ['_k'],
  249. },
  250. 'x' : {
  251. 'dir': 'arm',
  252. 'kernel': 'znrm2.c',
  253. 'exts': ['_k'],
  254. },
  255. },
  256. },
  257. {'base': 'asum',
  258. 'modes': {
  259. 's' : {
  260. 'dir': 'arm',
  261. 'kernel': 'asum.c',
  262. 'exts': ['_k'],
  263. },
  264. 'd' : {
  265. 'dir': 'arm',
  266. 'kernel': 'asum.c',
  267. 'exts': ['_k'],
  268. },
  269. 'q' : {
  270. 'dir': 'arm',
  271. 'kernel': 'zasum.c',
  272. 'exts': ['_k'],
  273. },
  274. 'c' : {
  275. 'dir': 'arm',
  276. 'kernel': 'zasum.c',
  277. 'exts': ['_k'],
  278. },
  279. 'z' : {
  280. 'dir': 'arm',
  281. 'kernel': 'zasum.c',
  282. 'exts': ['_k'],
  283. },
  284. 'x' : {
  285. 'dir': 'arm',
  286. 'kernel': 'zasum.c',
  287. 'exts': ['_k'],
  288. },
  289. },
  290. },
  291. {'base': 'amax',
  292. 'modes': {
  293. 's' : {
  294. 'dir': 'arm',
  295. 'kernel': 'amax.c',
  296. 'exts': ['_k'],
  297. },
  298. 'd' : {
  299. 'dir': 'arm',
  300. 'kernel': 'amax.c',
  301. 'exts': ['_k'],
  302. },
  303. 'q' : {
  304. 'dir': 'arm',
  305. 'kernel': 'zamax.c',
  306. 'exts': ['_k'],
  307. },
  308. 'c' : {
  309. 'dir': 'arm',
  310. 'kernel': 'zamax.c',
  311. 'exts': ['_k'],
  312. },
  313. 'z' : {
  314. 'dir': 'arm',
  315. 'kernel': 'zamax.c',
  316. 'exts': ['_k'],
  317. },
  318. 'x' : {
  319. 'dir': 'arm',
  320. 'kernel': 'zamax.c',
  321. 'exts': ['_k'],
  322. },
  323. },
  324. },
  325. # # TODO: Handle the i*amax style
  326. # {'base': 'axpby',
  327. # 'modes': {'s': {'dir': 'arm', 'kernel': 'axpby.c', 'exts': ['_k']},
  328. # 'd': {'dir': 'arm', 'kernel': 'axpby.c', 'exts': ['_k']},
  329. # 'c': {'dir': 'arm', 'kernel': 'axpby.c', 'exts': ['_k']},
  330. # 'z': {'dir': 'arm', 'kernel': 'axpby.c', 'exts': ['_k']},
  331. # },
  332. # },
  333. # # Level 2 BLAS
  334. # # There are additional sources so now we have srcs
  335. # # Ordered as per KERNEL.generic and Makefile.L2
  336. # # exts are used to find the flags for each modality
  337. # # ext is attached to base (only useful from Level 2)
  338. # # i.e. baseext (e.g., gemv_n, gemv_t, cgeru_k, cgerc_k)
  339. # {'base': 'gemv',
  340. # 'modes': {'s': {'dir': 'arm', 'kernel': 'gemv_n.c', 'exts': ['_n', '_t']},
  341. # 'd': {'dir': 'arm', 'kernel': 'gemv_n.c',
  342. # # TODO: _t should take fmaflag, but then we need a dictionary..
  343. # # From Makefile.L2
  344. # 'exts': ['_n', '_t']},
  345. # 'q': {'dir': 'arm', 'kernel': 'gemv_n.c', 'exts': ['_n', '_t']},
  346. # 'c': {'dir': 'arm', 'kernel': 'zgemv_n.c',
  347. # 'exts': ['_n', '_t', '_r', '_c', '_o', '_u', '_s', '_d']},
  348. # 'z': {'dir': 'arm', 'kernel': 'zgemv_n.c',
  349. # 'exts': ['_n', '_t', '_r', '_c', '_o', '_u', '_s', '_d']},
  350. # 'x': {'dir': 'arm', 'kernel': 'zgemv_n.c',
  351. # 'exts': ['_n', '_t', '_r', '_c', '_o', '_u', '_s', '_d']},
  352. # },
  353. # },
  354. # {'base': 'symv',
  355. # 'modes': {'s': {'dir': 'generic', 'kernel': 'symv_k.c', 'exts': ['_U', '_L']},
  356. # 'd': {'dir': 'generic', 'kernel': 'symv_k.c', 'exts': ['_U', '_L']},
  357. # 'q': {'dir': 'generic', 'kernel': 'symv_k.c', 'exts': ['_U', '_L']},
  358. # 'c': {'dir': 'generic', 'kernel': 'symv_k.c', 'exts': ['_U', '_L']},
  359. # 'z': {'dir': 'generic', 'kernel': 'symv_k.c', 'exts': ['_U', '_L']},
  360. # 'x': {'dir': 'generic', 'kernel': 'symv_k.c', 'exts': ['_U', '_L']},
  361. # },
  362. # },
  363. # {'base': 'ger',
  364. # 'modes': {'s': {'dir': 'generic', 'kernel': 'ger.c', 'exts': ['_k']},
  365. # 'd': {'dir': 'generic', 'kernel': 'ger.c', 'exts': ['_k']},
  366. # 'q': {'dir': 'generic', 'kernel': 'ger.c', 'exts': ['_k']}
  367. # },
  368. # },
  369. # {'base': 'geru',
  370. # 'modes': {'c': {'dir': 'generic', 'kernel': 'zger.c', 'exts': ['_k']},
  371. # 'z': {'dir': 'generic', 'kernel': 'zger.c', 'exts': ['_k']},
  372. # 'x': {'dir': 'generic', 'kernel': 'zger.c', 'exts': ['_k']}
  373. # }
  374. # },
  375. # {'base': 'gerc',
  376. # 'modes': {'c': {'dir': 'generic', 'kernel': 'zger.c', 'exts': ['_k']},
  377. # 'z': {'dir': 'generic', 'kernel': 'zger.c', 'exts': ['_k']},
  378. # 'x': {'dir': 'generic', 'kernel': 'zger.c', 'exts': ['_k']},
  379. # },
  380. # },
  381. # {'base': 'hemv',
  382. # 'modes': {'c': {'dir': 'generic',
  383. # 'kernel': 'zhemv_k.c',
  384. # 'exts': ['_U', '_L', '_V', '_M']},
  385. # 'z': {'dir': 'generic',
  386. # 'kernel': 'zhemv_k.c',
  387. # 'exts': ['_U', '_L', '_V', '_M']},
  388. # 'x': {'dir': 'generic',
  389. # 'kernel': 'zhemv_k.c',
  390. # 'exts': ['_U', '_L', '_V', '_M']},
  391. # },
  392. # },
  393. # {'base': 'bgemv',
  394. # 'modes': {'s': {'dir': 'x86_64',
  395. # 'kernel': 'sbgemv_n.c',
  396. # 'exts': ['_n', '_t']}
  397. # },
  398. # },
  399. ]
  400. kernel_confs = []
  401. foreach _kop : base_kops
  402. base = _kop['base']
  403. modes = _kop['modes']
  404. foreach mode, details : modes
  405. __cargs = _cargs
  406. prec_mode = precision_mappings[mode]
  407. src = join_paths(details['dir'], details['kernel'])
  408. # Generate the mapping for the type
  409. if prec_mode.has_key('def')
  410. foreach _d : prec_mode['def']
  411. __cargs += ('-D' + _d)
  412. endforeach
  413. endif
  414. if prec_mode.has_key('undef')
  415. foreach _u : prec_mode['undef']
  416. __cargs += ('-U' + _u)
  417. endforeach
  418. endif
  419. # Now the rest, one run for each ext
  420. foreach ext : details['exts']
  421. extmap = ext_mappings[ext]
  422. if extmap.has_key('def')
  423. foreach _d : extmap['def']
  424. __cargs += ('-D' + _d)
  425. endforeach
  426. endif
  427. if extmap.has_key('undef')
  428. foreach _u : extmap['undef']
  429. __cargs += ('-U' + _u)
  430. endforeach
  431. endif
  432. message(mode + base + ext)
  433. sym_name = mode + base + ext
  434. sym_underscored = f'@sym_name@_'
  435. __cargs += [
  436. f'-DASMNAME=@sym_name@',
  437. f'-DASMFNAME=@sym_underscored@',
  438. f'-DNAME=@sym_underscored@',
  439. f'-DCNAME=@sym_name@',
  440. f'-DCHAR_NAME="@sym_underscored@"',
  441. f'-DCHAR_CNAME="@sym_name@"',
  442. ]
  443. current_def = {
  444. 'c_args': __cargs,
  445. 'name': sym_name,
  446. 'src': src
  447. }
  448. message(__cargs)
  449. kernel_confs += current_def
  450. endforeach
  451. endforeach
  452. endforeach
  453. _kern_libs = []
  454. foreach conf: kernel_confs
  455. _kern_libs += static_library(
  456. conf['name'],
  457. conf['src'],
  458. include_directories: _inc,
  459. c_args: conf['c_args'],
  460. )
  461. endforeach
  462. _kern = static_library('_kern',
  463. link_whole: _kern_libs)