|
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522 |
- # Ordered As per https://netlib.org/blas/blasqr.pdf
- # NOTE: xROTG xROTMG xROTM have no kernels?
- # TODO: Actually test and set this
- if true
- fma3_flag = '-mfma'
- endif
- # TODO: This is currently following x86_64 generic for src and dir, but it needs
- # to diversify
- # NOTE: The def and undefs are from Makefile.L1
- # Construct all ModesymbKERNEL from src and dir via files(dir + src)
- # For the modes array, the following mapping is used for c_args:
- # addl --> passed AS IS
- base_kops = [
- # Level 1 BLAS
- { 'base': '?rot',
- 'modes': {
- 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'srot.c', 'addl': [fma3_flag]}}},
- 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'drot.c', 'addl': [fma3_flag]}}},
- # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zrot.c'}}},
- 'cs': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zrot_sse.S'}}},
- 'zd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zrot_sse2.S'}}},
- # 'xq': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zrot.S'}}},
- },
- },
- { 'base': '?swap',
- 'modes': {
- 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'swap_sse.S'}}},
- 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'swap_sse2.S'}}},
- 'c': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zswap_sse.S'}}},
- 'z': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zswap_sse2.S'}}},
- # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'swap.S'}}},
- # 'x': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zswap.S'}}},
- },
- },
- { 'base': '?scal',
- 'modes': {
- 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'sscal.c'}}},
- 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'dscal.c'}}},
- 'c': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'cscal.c'}}},
- 'z': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zscal.c'}}},
- # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'scal.S'}}},
- # 'x': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zscal.S'}}},
- },
- },
- { 'base': '?copy',
- 'modes': {
- 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'copy_sse.S'}}},
- 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'copy_sse2.S'}}},
- 'c': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zcopy_sse.S'}}},
- 'z': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zcopy_sse2.S'}}},
- # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'copy.S'}}},
- # 'x': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zcopy.S'}}},
- },
- },
- { 'base': '?axpy',
- 'modes': {
- 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'saxpy.c'}}},
- 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'daxpy.c'}}},
- 'c': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'caxpy.c', 'addl': ['-UCONJ']}}},
- 'z': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zaxpy.c', 'addl': ['-UCONJ']}}},
- # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'axpy.S'}}},
- # 'x': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zaxpy.S'}}},
- },
- },
- { 'base': '?axpyc',
- 'modes': {
- 'c': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'caxpy.c', 'addl': ['-DCONJ']}}},
- 'z': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zaxpy.c', 'addl': ['-DCONJ']}}},
- },
- },
- { 'base': '?dot',
- 'modes': {
- 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'sdot.c'}}},
- 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'ddot.c'}}},
- },
- },
- { 'base': '?dotc',
- 'modes': {
- 'c': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'cdot.c'}}},
- 'z': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zdot.c'}}},
- },
- },
- { 'base': '?dotu',
- 'modes': {
- 'c': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'cdot.c'}}},
- 'z': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zdot.c'}}},
- },
- },
- # TODO(rg): Check?
- { 'base': '?dsdot',
- 'modes': {
- 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'sdot.c'}}},
- '': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'sdot.c', 'addl': ['-DDSDOT']}}},
- },
- },
- # TODO(rg): Add dsdotkernel conditionals
- # xDOTU xDOTC xxDOT aren't present
- { 'base': '?nrm2',
- 'modes': {
- 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'nrm2_sse.S'}}},
- 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'nrm2.S'}}},
- 'c': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'znrm2_sse.S'}}},
- 'z': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'znrm2.S'}}},
- # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'nrm2.S'}}},
- # 'x': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'znrm2.S'}}},
- },
- },
- { 'base': '?asum',
- 'modes': {
- 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'sasum.c'}}},
- 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'dasum.c'}}},
- 'c': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zasum_sse.S'}}},
- 'z': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zasum_sse2.S'}}},
- # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'asum.S'}}},
- # 'x': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zasum.S'}}},
- },
- },
- { 'base': '?amax',
- 'modes': {
- 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'amax_sse.S'}}},
- 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'amax_sse2.S'}}},
- 'c': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zamax_sse.S'}}},
- 'z': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zamax_sse2.S'}}},
- # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'amax.S'}}},
- # 'x': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zamax.S'}}},
- },
- },
- { 'base': '?sum',
- 'modes': {
- 's': {'exts': {'_k': {'dir': 'arm', 'kernel': 'sum.c'}}},
- 'd': {'exts': {'_k': {'dir': 'arm', 'kernel': 'sum.c'}}},
- 'c': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zsum_sse.S'}}},
- 'z': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zsum_sse2.S'}}},
- },
- },
- { 'base': '?amin',
- 'modes': {
- 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'amax_sse.S'}}},
- 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'amax_sse2.S'}}},
- 'c': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zamax_sse.S'}}},
- 'z': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zamax_sse2.S'}}},
- # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'iamax.S'}}},
- # 'x': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'izamax.S'}}},
- },
- },
- { 'base': 'i?amax',
- 'modes': {
- 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'iamax_sse.S'}}},
- 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'iamax_sse2.S'}}},
- 'c': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'izamax_sse.S'}}},
- 'z': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'izamax_sse2.S'}}},
- # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'iamax.S'}}},
- # 'x': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'izamax.S'}}},
- },
- },
- { 'base': 'i?amin',
- 'modes': {
- 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'iamax_sse.S'}}},
- 'd': {'exts': {'_k': {'dir': 'arm', 'kernel': 'iamin.c'}}},
- 'c': {'exts': {'_k': {'dir': 'arm', 'kernel': 'izamin.c'}}},
- 'z': {'exts': {'_k': {'dir': 'arm', 'kernel': 'izamin.c'}}},
- },
- },
- { 'base': 'i?max',
- 'modes': {
- 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'iamax_sse.S'}}},
- 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'iamax_sse2.S'}}},
- # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'iamax.S'}}},
- },
- },
- { 'base': 'i?min',
- 'modes': {
- 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'iamax_sse.S'}}},
- 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'iamax_sse2.S'}}},
- # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'iamax.S'}}},
- },
- },
- { 'base': '?max',
- 'modes': {
- 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'amax_sse.S'}}},
- 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'amax_sse2.S'}}},
- # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'amax.S'}}},
- },
- },
- { 'base': '?min',
- 'modes': {
- 's': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'amax_sse.S'}}},
- 'd': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'amax_sse2.S'}}},
- # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'amax.S'}}},
- },
- },
- { 'base': '?axpby',
- 'modes': {
- 's': {'exts': {'_k': {'dir': 'arm', 'kernel': 'axpby.c'}}},
- 'd': {'exts': {'_k': {'dir': 'arm', 'kernel': 'axpby.c'}}},
- 'c': {'exts': {'_k': {'dir': 'arm', 'kernel': 'zaxpby.c'}}},
- 'z': {'exts': {'_k': {'dir': 'arm', 'kernel': 'zaxpby.c'}}},
- },
- },
- # Level 2 BLAS
- # There are additional sources so now we have srcs
- # Ordered as per KERNEL.generic and Makefile.L2
- # exts are used to find the flags for each modality
- # ext is attached to base (only useful from Level 2)
- # i.e. baseext (e.g., gemv_n, gemv_t, cgeru_k, cgerc_k)
- { 'base': '?gemv',
- 'modes': {
- 's': {
- 'exts': {
- # TODO(rg): Where are these coming from??
- # Most of these have both generic defines and also per-folder defines..
- # Makefile lists sgemv_n_4.c as the source, though there is a sgemv_n.c
- '_n': {'dir': 'x86_64', 'kernel': 'sgemv_n_4.c'},
- '_t': {'dir': 'x86_64', 'kernel': 'sgemv_t_4.c'},
- }
- },
- 'd': {
- 'exts': {
- '_n': {'dir': 'x86_64', 'kernel': 'dgemv_n_4.c'},
- '_t': {'dir': 'x86_64', 'kernel': 'dgemv_t_4.c'},
- }
- },
- # 'q': {
- # 'exts': {
- # '_n': {'dir': 'x86_64', 'kernel': 'qgemv_n.S'},
- # '_t': {'dir': 'x86_64', 'kernel': 'qgemv_t.S'},
- # }
- # },
- 'c': {
- 'exts': {
- '_n': {'dir': 'x86_64', 'kernel': 'cgemv_n_4.c'},
- '_t': {'dir': 'x86_64', 'kernel': 'cgemv_t_4.c'},
- '_r': {'dir': 'x86_64', 'kernel': 'cgemv_n_4.c'},
- '_c': {'dir': 'x86_64', 'kernel': 'cgemv_t_4.c'},
- '_o': {'dir': 'x86_64', 'kernel': 'cgemv_n_4.c'},
- '_u': {'dir': 'x86_64', 'kernel': 'cgemv_t_4.c'},
- '_s': {'dir': 'x86_64', 'kernel': 'cgemv_n_4.c'},
- '_d': {'dir': 'x86_64', 'kernel': 'cgemv_t_4.c'},
- }
- },
- 'z': {
- 'exts': {
- '_n': {'dir': 'x86_64', 'kernel': 'zgemv_n_4.c'},
- '_t': {'dir': 'x86_64', 'kernel': 'zgemv_t_4.c'},
- '_r': {'dir': 'x86_64', 'kernel': 'zgemv_n_4.c'},
- '_c': {'dir': 'x86_64', 'kernel': 'zgemv_t_4.c'},
- '_o': {'dir': 'x86_64', 'kernel': 'zgemv_n_4.c'},
- '_u': {'dir': 'x86_64', 'kernel': 'zgemv_t_4.c'},
- '_s': {'dir': 'x86_64', 'kernel': 'zgemv_n_4.c'},
- '_d': {'dir': 'x86_64', 'kernel': 'zgemv_t_4.c'},
- }
- },
- # 'x': {
- # 'exts': {
- # '_n': {'dir': 'arm', 'kernel': 'zgemv_n.c'},
- # '_t': {'dir': 'arm', 'kernel': 'zgemv_n.c'},
- # '_r': {'dir': 'arm', 'kernel': 'zgemv_n.c'},
- # '_c': {'dir': 'arm', 'kernel': 'zgemv_n.c'},
- # '_o': {'dir': 'arm', 'kernel': 'zgemv_n.c'},
- # '_u': {'dir': 'arm', 'kernel': 'zgemv_n.c'},
- # '_s': {'dir': 'arm', 'kernel': 'zgemv_n.c'},
- # '_d': {'dir': 'arm', 'kernel': 'zgemv_n.c'},
- # }
- # },
- },
- },
- { 'base': '?symv',
- 'modes': {
- 's': {
- 'exts': {
- '_U': {'dir': 'x86_64', 'kernel': 'ssymv_U.c'},
- '_L': {'dir': 'x86_64', 'kernel': 'ssymv_L.c'},
- }
- },
- 'd': {
- 'exts': {
- '_U': {'dir': 'generic', 'kernel': 'symv_k.c'},
- '_L': {'dir': 'generic', 'kernel': 'symv_k.c'},
- }
- },
- 'c': {
- 'exts': {
- '_U': {'dir': 'generic', 'kernel': 'zsymv_k.c'},
- '_L': {'dir': 'generic', 'kernel': 'zsymv_k.c'},
- }
- },
- 'z': {
- 'exts': {
- '_U': {'dir': 'x86_64', 'kernel': 'zsymv_U_sse2.S'},
- '_L': {'dir': 'x86_64', 'kernel': 'zsymv_L_sse2.S'},
- }
- },
- # 'q': {
- # 'exts': {
- # '_U': {'dir': 'generic', 'kernel': 'symv_k.c'},
- # '_L': {'dir': 'generic', 'kernel': 'symv_k.c'},
- # }
- # },
- # 'x': {
- # 'exts': {
- # '_U': {'dir': 'generic', 'kernel': 'zsymv_k.c'},
- # '_L': {'dir': 'generic', 'kernel': 'zsymv_k.c'},
- # }
- # },
- },
- },
- { 'base': '?lsame',
- 'modes': {
- '': {'exts': {'': {'dir': 'x86_64', 'kernel': 'lsame.S'}}},
- },
- },
- { 'base': '?cabs',
- 'modes': {
- 's': {'exts': {'1': {'dir': 'x86_64', 'kernel': 'cabs.S'}}},
- 'd': {'exts': {'1': {'dir': 'x86_64', 'kernel': 'cabs.S'}}},
- # 'q': {'exts': {'': {'dir': 'generic', 'kernel': 'cabs.c'}}},
- },
- },
- { 'base': '?gemm3m',
- 'modes': {
- 'c': {'exts': {
- '_kernel': {'dir': 'x86_64', 'kernel': 'cgemm3m_kernel_8x4_haswell.c',
- 'addl': ['-DICOPY', '-UUSE_ALPHA']},
- '_oncopyb': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_4.c',
- 'addl': ['-DUSE_ALPHA']},
- '_otcopyb': {'dir': 'generic', 'kernel': 'zgemm3m_tcopy_4.c',
- 'addl': ['-DUSE_ALPHA']},
- '_itcopyb': {'dir': 'generic', 'kernel': 'zgemm3m_tcopy_8.c',
- 'addl': ['-DICOPY', '-UUSE_ALPHA']},
- '_itcopyr': {'dir': 'generic', 'kernel': 'zgemm3m_tcopy_8.c',
- 'addl': ['-DICOPY', '-UUSE_ALPHA', '-DREAL_ONLY']},
- '_itcopyi': {'dir': 'generic', 'kernel': 'zgemm3m_tcopy_8.c',
- 'addl': ['-DICOPY', '-UUSE_ALPHA', '-DIMAGE_ONLY']},
- '_incopyb': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_8.c',
- 'addl': ['-DICOPY', '-UUSE_ALPHA']},
- '_incopyr': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_8.c',
- 'addl': ['-DICOPY', '-UUSE_ALPHA', '-DREAL_ONLY']},
- '_oncopyr': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_4.c',
- 'addl': ['-DUSE_ALPHA', '-DREAL_ONLY']},
- '_otcopyr': {'dir': 'generic', 'kernel': 'zgemm3m_tcopy_4.c',
- 'addl': ['-DUSE_ALPHA', '-DREAL_ONLY']},
- '_incopyi': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_8.c',
- 'addl': ['-DICOPY', '-UUSE_ALPHA', '-DIMAGE_ONLY']},
- '_oncopyi': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_4.c',
- 'addl': ['-DUSE_ALPHA', '-DIMAGE_ONLY']},
- '_otcopyi': {'dir': 'generic', 'kernel': 'zgemm3m_tcopy_4.c',
- 'addl': ['-DUSE_ALPHA', '-DIMAGE_ONLY']},
- }},
- 'z': {'exts': {
- '_kernel': {'dir': 'x86_64', 'kernel': 'zgemm3m_kernel_4x4_haswell.c',
- 'addl': ['-DNN']},
- '_oncopyb': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_4.c',
- 'addl': ['-DUSE_ALPHA']},
- '_otcopyb': {'dir': 'generic', 'kernel': 'zgemm3m_tcopy_4.c',
- 'addl': ['-DUSE_ALPHA']},
- '_itcopyb': {'dir': 'generic', 'kernel': 'zgemm3m_tcopy_4.c',
- 'addl': ['-DICOPY', '-UUSE_ALPHA']},
- '_itcopyr': {'dir': 'generic', 'kernel': 'zgemm3m_tcopy_4.c',
- 'addl': ['-DICOPY', '-UUSE_ALPHA', '-DREAL_ONLY']},
- '_itcopyi': {'dir': 'generic', 'kernel': 'zgemm3m_tcopy_4.c',
- 'addl': ['-DICOPY', '-UUSE_ALPHA', '-DIMAGE_ONLY']},
- '_incopyb': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_4.c',
- 'addl': ['-DICOPY', '-UUSE_ALPHA']},
- '_incopyr': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_4.c',
- 'addl': ['-DICOPY', '-UUSE_ALPHA', '-DREAL_ONLY']},
- '_oncopyr': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_4.c',
- 'addl': ['-DUSE_ALPHA', '-DREAL_ONLY']},
- '_otcopyr': {'dir': 'generic', 'kernel': 'zgemm3m_tcopy_4.c',
- 'addl': ['-DUSE_ALPHA', '-DREAL_ONLY']},
- '_incopyi': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_4.c',
- 'addl': ['-DICOPY', '-UUSE_ALPHA', '-DIMAGE_ONLY']},
- '_oncopyi': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_4.c',
- 'addl': ['-DUSE_ALPHA', '-DIMAGE_ONLY']},
- '_otcopyi': {'dir': 'generic', 'kernel': 'zgemm3m_tcopy_4.c',
- 'addl': ['-DUSE_ALPHA', '-DIMAGE_ONLY']},
- }},
- },
- },
- { 'base': '?ger',
- 'modes': {
- 's': {'exts': {'_k': {'dir': 'generic', 'kernel': 'ger.c'}}},
- 'd': {'exts': {'_k': {'dir': 'generic', 'kernel': 'ger.c'}}},
- # 'q': {'exts': {'_k': {'dir': 'generic', 'kernel': 'ger.c'}}},
- },
- },
- { 'base': '?geru',
- 'modes': {
- 'c': {'exts': {'_k': {'dir': 'generic', 'kernel': 'zger.c', 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UCONJ']}}},
- 'z': {'exts': {'_k': {'dir': 'generic', 'kernel': 'zger.c', 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UCONJ']}}},
- # 'x': {'exts': {'_k': {'dir': 'generic', 'kernel': 'zger.c'}}},
- },
- },
- { 'base': '?gerc',
- 'modes': {
- 'c': {'exts': {'_k': {'dir': 'generic', 'kernel': 'zger.c', 'addl': ['-UDOUBLE', '-DCOMPLEX', '-DCONJ']}}},
- 'z': {'exts': {'_k': {'dir': 'generic', 'kernel': 'zger.c', 'addl': ['-DDOUBLE', '-DCOMPLEX', '-DCONJ']}}},
- # 'x': {'exts': {'_k': {'dir': 'generic', 'kernel': 'zger.c'}}},
- },
- },
- { 'base': '?gerv',
- 'modes': {
- 'c': {'exts': {'_k': {'dir': 'generic', 'kernel': 'zger.c', 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UCONJ', '-DXCONJ']}}},
- 'z': {'exts': {'_k': {'dir': 'generic', 'kernel': 'zger.c', 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UCONJ', '-DXCONJ']}}},
- },
- },
- { 'base': '?hemv',
- 'modes': {
- 'c': {
- 'exts': {
- '_U': {'dir': 'generic', 'kernel': 'zhemv_k.c', 'addl': ['-DHEMV']},
- '_L': {'dir': 'generic', 'kernel': 'zhemv_k.c', 'addl': ['-DHEMV']},
- '_V': {'dir': 'generic', 'kernel': 'zhemv_k.c', 'addl': ['-DHEMV', '-DHEMVREV']},
- '_M': {'dir': 'generic', 'kernel': 'zhemv_k.c', 'addl': ['-DHEMV', '-DHEMVREV']},
- }
- },
- 'z': {
- 'exts': {
- '_U': {'dir': 'x86_64', 'kernel': 'zsymv_U_sse2.S', 'addl': ['-DHEMV']},
- '_L': {'dir': 'x86_64', 'kernel': 'zsymv_L_sse2.S', 'addl': ['-DHEMV']},
- '_V': {'dir': 'generic', 'kernel': 'zhemv_k.c', 'addl': ['-DHEMV', '-DHEMVREV']},
- '_M': {'dir': 'generic', 'kernel': 'zhemv_k.c', 'addl': ['-DHEMV', '-DHEMVREV']},
- }
- },
- # 'x': {
- # 'exts': {
- # '_U': {'dir': 'generic', 'kernel': 'zhemv_k.c'},
- # '_L': {'dir': 'generic', 'kernel': 'zhemv_k.c'},
- # '_V': {'dir': 'generic', 'kernel': 'zhemv_k.c'},
- # '_M': {'dir': 'generic', 'kernel': 'zhemv_k.c'},
- # }
- # },
- },
- },
- # { 'base': '?bgemv',
- # 'modes': {
- # 's': {
- # 'exts': {
- # '_n': {'dir': 'x86_64', 'kernel': 'sbgemv_n.c'},
- # '_t': {'dir': 'x86_64', 'kernel': 'sbgemv_n.c'},
- # }
- # }
- # },
- # },
- # Level 3 symbols
- { 'base': '?gemm_kernel',
- 'modes': {
- 's': {'exts': {'': {'dir': 'x86_64', 'kernel': 'sgemm_kernel_8x4_haswell_2.c'}}},
- 'd': {'exts': {'': {'dir': 'x86_64', 'kernel': 'dgemm_kernel_4x8_haswell.S'}}},
- 'c': {
- 'exts': {
- '_n': {'dir': 'x86_64', 'kernel': 'cgemm_kernel_8x2_haswell.c', 'addl': ['-DNN']},
- '_l': {'dir': 'x86_64', 'kernel': 'cgemm_kernel_8x2_haswell.c', 'addl': ['-DCN']},
- # TODO(rg): What about _r conditionals? Makefile.L3:2969
- '_r': {'dir': 'x86_64', 'kernel': 'cgemm_kernel_8x2_haswell.c', 'addl': ['-DNC']},
- '_b': {'dir': 'x86_64', 'kernel': 'cgemm_kernel_8x2_haswell.c', 'addl': ['-DCC']},
- }
- },
- 'z': {
- 'exts': {
- '_n': {'dir': 'x86_64', 'kernel': 'zgemm_kernel_4x2_haswell.c', 'addl': ['-DNN']},
- '_l': {'dir': 'x86_64', 'kernel': 'zgemm_kernel_4x2_haswell.c', 'addl': ['-DCN']},
- '_r': {'dir': 'x86_64', 'kernel': 'zgemm_kernel_4x2_haswell.c', 'addl': ['-DNC']},
- '_b': {'dir': 'x86_64', 'kernel': 'zgemm_kernel_4x2_haswell.c', 'addl': ['-DCC']},
- }
- }
- # 'q': {'exts': {'': {'dir': 'generic', 'kernel': 'gemm_beta.c'}}},
- # 'x': {'exts': {'': {'dir': 'generic', 'kernel': 'zgemm_beta.c'}}},
- },
- },
- { 'base': '?trmm_kernel',
- 'modes': {
- 's': {
- 'exts': {
- '_LN': {'dir': 'x86_64', 'kernel': 'sgemm_kernel_8x4_haswell.c'},
- '_LT': {'dir': 'x86_64', 'kernel': 'sgemm_kernel_8x4_haswell.c', 'addl': ['-DLEFT', '-DTRANSA']},
- '_RN': {'dir': 'x86_64', 'kernel': 'sgemm_kernel_8x4_haswell.c'},
- '_RT': {'dir': 'x86_64', 'kernel': 'sgemm_kernel_8x4_haswell.c'},
- }
- },
- 'd': {
- 'exts': {
- '_LN': {'dir': 'x86_64', 'kernel': 'dtrmm_kernel_4x8_haswell.c'},
- '_LT': {'dir': 'x86_64', 'kernel': 'dtrmm_kernel_4x8_haswell.c', 'addl': ['-DLEFT', '-DTRANSA']},
- '_RN': {'dir': 'x86_64', 'kernel': 'dtrmm_kernel_4x8_haswell.c'},
- '_RT': {'dir': 'x86_64', 'kernel': 'dtrmm_kernel_4x8_haswell.c'},
- }
- },
- 'c': {
- 'exts': {
- '_LN': {'dir': 'x86_64', 'kernel': 'cgemm_kernel_8x2_haswell.S',
- 'addl': ['-DLEFT', '-UTRANSA', '-UCONJ', '-DNN']},
- '_LT': {'dir': 'x86_64', 'kernel': 'cgemm_kernel_8x2_haswell.S',
- 'addl': ['-DLEFT', '-DTRANSA', '-UCONJ', '-DNN']},
- '_LR': {'dir': 'x86_64', 'kernel': 'cgemm_kernel_8x2_haswell.S',
- 'addl': ['-DLEFT', '-UTRANSA', '-DCONJ', '-DCN']},
- '_LC': {'dir': 'x86_64', 'kernel': 'cgemm_kernel_8x2_haswell.S',
- 'addl': ['-DLEFT', '-DTRANSA', '-DCONJ', '-DCN']},
- '_RN': {'dir': 'x86_64', 'kernel': 'cgemm_kernel_8x2_haswell.S',
- 'addl': ['-ULEFT', '-UTRANSA', '-UCONJ', '-DNN']},
- '_RT': {'dir': 'x86_64', 'kernel': 'cgemm_kernel_8x2_haswell.S',
- 'addl': ['-ULEFT', '-DTRANSA', '-UCONJ', '-DNN']},
- '_RR': {'dir': 'x86_64', 'kernel': 'cgemm_kernel_8x2_haswell.S',
- 'addl': ['-ULEFT', '-UTRANSA', '-DCONJ', '-DNC']},
- '_RC': {'dir': 'x86_64', 'kernel': 'cgemm_kernel_8x2_haswell.S',
- 'addl': ['-ULEFT', '-DTRANSA', '-DCONJ', '-DNC']},
- }
- },
- 'z': {
- 'exts': {
- '_LN': {'dir': 'x86_64', 'kernel': 'zgemm_kernel_4x2_haswell.S',
- 'addl': ['-DLEFT', '-UTRANSA', '-UCONJ', '-DNN']},
- '_LT': {'dir': 'x86_64', 'kernel': 'zgemm_kernel_4x2_haswell.S',
- 'addl': ['-DLEFT', '-DTRANSA', '-UCONJ', '-DNN']},
- '_LR': {'dir': 'x86_64', 'kernel': 'zgemm_kernel_4x2_haswell.S',
- 'addl': ['-DLEFT', '-UTRANSA', '-DCONJ', '-DCN']},
- '_LC': {'dir': 'x86_64', 'kernel': 'zgemm_kernel_4x2_haswell.S',
- 'addl': ['-DLEFT', '-DTRANSA', '-DCONJ', '-DCN']},
- '_RN': {'dir': 'x86_64', 'kernel': 'zgemm_kernel_4x2_haswell.S',
- 'addl': ['-ULEFT', '-UTRANSA', '-UCONJ', '-DNN']},
- '_RT': {'dir': 'x86_64', 'kernel': 'zgemm_kernel_4x2_haswell.S',
- 'addl': ['-ULEFT', '-DTRANSA', '-UCONJ', '-DNN']},
- '_RR': {'dir': 'x86_64', 'kernel': 'zgemm_kernel_4x2_haswell.S',
- 'addl': ['-ULEFT', '-UTRANSA', '-DCONJ', '-DNC']},
- '_RC': {'dir': 'x86_64', 'kernel': 'zgemm_kernel_4x2_haswell.S',
- 'addl': ['-ULEFT', '-DTRANSA', '-DCONJ', '-DNC']},
- },
- },
- },
- },
- { 'base': '?trsm_kernel',
- 'modes': {
- 's': {
- 'exts': {
- '_LN': {'dir': 'x86_64', 'kernel': 'strsm_kernel_8x4_haswell_LN.c',
- 'addl': ['-DLN', '-DUPPER', '-UCONJ']},
- '_LT': {'dir': 'x86_64', 'kernel': 'strsm_kernel_8x4_haswell_LT.c',
- 'addl': ['-DLT', '-UUPPER', '-UCONJ']},
- '_RN': {'dir': 'x86_64', 'kernel': 'strsm_kernel_8x4_haswell_RN.c',
- 'addl': ['-DRN', '-DUPPER', '-UCONJ']},
- '_RT': {'dir': 'x86_64', 'kernel': 'strsm_kernel_8x4_haswell_RT.c',
- 'addl': ['-DRT', '-UUPPER', '-UCONJ']},
- },
- },
- 'd': {
- 'exts': {
- '_LN': {'dir': 'generic', 'kernel': 'trsm_kernel_LN.c',
- 'addl': ['-DLN', '-DUPPER', '-UCONJ']},
- '_LT': {'dir': 'generic', 'kernel': 'trsm_kernel_LT.c',
- 'addl': ['-DLT', '-UUPPER', '-UCONJ']},
- '_RN': {'dir': 'x86_64', 'kernel': 'dtrsm_kernel_RN_haswell.c',
- 'addl': ['-DRN', '-DUPPER', '-UCONJ']},
- '_RT': {'dir': 'generic', 'kernel': 'trsm_kernel_RT.c',
- 'addl': ['-DRT', '-UUPPER', '-UCONJ']},
- },
- },
- 'c': {
- 'exts': {
- '_LN': {'dir': 'generic', 'kernel': 'trsm_kernel_LN.c',
- 'addl': ['-DLN', '-DUPPER', '-UCONJ']},
- '_LT': {'dir': 'generic', 'kernel': 'trsm_kernel_LT.c',
- 'addl': ['-DLT', '-UUPPER', '-UCONJ']},
- '_LR': {'dir': 'generic', 'kernel': 'trsm_kernel_LN.c',
- 'addl': ['-DLN', '-DUPPER', '-DCONJ']},
- '_LC': {'dir': 'generic', 'kernel': 'trsm_kernel_LT.c',
- 'addl': ['-DLT', '-UUPPER', '-DCONJ']},
- '_RN': {'dir': 'generic', 'kernel': 'trsm_kernel_RN.c',
- 'addl': ['-DRN', '-DUPPER', '-UCONJ']},
- '_RT': {'dir': 'generic', 'kernel': 'trsm_kernel_RT.c',
- 'addl': ['-DRT', '-UUPPER', '-UCONJ']},
- '_RR': {'dir': 'generic', 'kernel': 'trsm_kernel_RN.c',
- 'addl': ['-DRN', '-DUPPER', '-DCONJ']},
- '_RC': {'dir': 'generic', 'kernel': 'trsm_kernel_RT.c',
- 'addl': ['-DRT', '-UUPPER', '-DCONJ']},
- },
- },
- 'z': {
- 'exts': {
- '_LN': {'dir': 'generic', 'kernel': 'trsm_kernel_LN.c',
- 'addl': ['-DLN', '-DUPPER', '-UCONJ']},
- '_LT': {'dir': 'generic', 'kernel': 'trsm_kernel_LT.c',
- 'addl': ['-DLT', '-UUPPER', '-UCONJ']},
- '_LR': {'dir': 'generic', 'kernel': 'trsm_kernel_LN.c',
- 'addl': ['-DLN', '-DUPPER', '-DCONJ']},
- '_LC': {'dir': 'generic', 'kernel': 'trsm_kernel_LT.c',
- 'addl': ['-DLT', '-UUPPER', '-DCONJ']},
- '_RN': {'dir': 'generic', 'kernel': 'trsm_kernel_RN.c',
- 'addl': ['-DRN', '-DUPPER', '-UCONJ']},
- '_RT': {'dir': 'generic', 'kernel': 'trsm_kernel_RT.c',
- 'addl': ['-DRT', '-UUPPER', '-UCONJ']},
- '_RR': {'dir': 'generic', 'kernel': 'trsm_kernel_RN.c',
- 'addl': ['-DRN', '-DUPPER', '-DCONJ']},
- '_RC': {'dir': 'generic', 'kernel': 'trsm_kernel_RT.c',
- 'addl': ['-DRT', '-UUPPER', '-DCONJ']},
- },
- },
- },
- },
- { 'base': '?gemm',
- 'modes': {
- 's': {'exts': {
- '_beta': {'dir': 'x86_64', 'kernel': 'sgemm_beta_skylakex.c'},
- '_small_matrix_permit': {'dir': 'generic',
- 'kernel': 'gemm_small_matrix_permit.c'},
- # TODO(rg): the _NUM prefixes are arch dependent
- '_incopy': {'dir': 'generic', 'kernel': 'gemm_ncopy_8.c'},
- '_itcopy': {'dir': 'generic', 'kernel': 'gemm_tcopy_8.c'},
- '_oncopy': {'dir': 'x86_64', 'kernel': 'sgemm_ncopy_4_skylakex.c'},
- '_otcopy': {'dir': 'generic', 'kernel': 'gemm_tcopy_4.c'},
- # TODO(rg): direct and direct_performant are built only conditionally
- '_direct': {'dir': 'x86_64', 'kernel': 'sgemm_direct_skylakex.c'},
- '_direct_performant': {'dir': 'x86_64', 'kernel': 'sgemm_direct_performant.c'},
- }},
- 'd': {'exts': {
- '_beta': {'dir': 'x86_64', 'kernel': 'dgemm_beta_skylakex.c'},
- '_small_matrix_permit': {'dir': 'generic',
- 'kernel': 'gemm_small_matrix_permit.c'},
- '_incopy': {'dir': 'generic', 'kernel': 'gemm_ncopy_4.c'},
- '_itcopy': {'dir': 'generic', 'kernel': 'gemm_tcopy_4.c'},
- '_oncopy': {'dir': 'x86_64', 'kernel': 'dgemm_ncopy_8_skylakex.c'},
- '_otcopy': {'dir': 'generic', 'kernel': 'gemm_tcopy_8.c'},
- }},
- 'c': {'exts': {
- '_beta': {'dir': 'x86_64', 'kernel': 'zgemm_beta.S'},
- '_small_matrix_permit': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_permit.c'},
- '_incopy': {'dir': 'generic', 'kernel': 'zgemm_ncopy_8.c'},
- '_itcopy': {'dir': 'generic', 'kernel': 'zgemm_tcopy_8.c'},
- '_oncopy': {'dir': 'generic', 'kernel': 'zgemm_ncopy_2.c'},
- '_otcopy': {'dir': 'generic', 'kernel': 'zgemm_tcopy_2.c'},
- }},
- 'z': {'exts': {
- '_beta': {'dir': 'x86_64', 'kernel': 'zgemm_beta.S'},
- '_small_matrix_permit': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_permit.c'},
- '_incopy': {'dir': 'generic', 'kernel': 'zgemm_ncopy_4.c'},
- '_itcopy': {'dir': 'generic', 'kernel': 'zgemm_tcopy_4.c'},
- '_oncopy': {'dir': 'generic', 'kernel': 'zgemm_ncopy_2.c'},
- '_otcopy': {'dir': 'generic', 'kernel': 'zgemm_tcopy_2.c'},
- }},
- },
- },
- { 'base': '?trmm',
- 'modes': {
- 's': {'exts': {
- # TODO(rg): These actually use $(SGEMM_UNROLL_M) to choose the size
- '_iunucopy': {'dir': 'generic', 'kernel': 'trmm_uncopy_8.c',
- 'addl': ['-UDOUBLE', '-UCOMPLEX', '-UOUTER', '-ULOWER', '-DUNIT']},
- '_iunncopy': {'dir': 'generic', 'kernel': 'trmm_uncopy_8.c',
- 'addl': ['-UDOUBLE', '-UCOMPLEX', '-UOUTER', '-ULOWER', '-UUNIT']},
- '_ilnucopy': {'dir': 'generic', 'kernel': 'trmm_lncopy_8.c',
- 'addl': ['-UDOUBLE', '-UCOMPLEX', '-UOUTER', '-DLOWER', '-DUNIT']},
- '_ilnncopy': {'dir': 'generic', 'kernel': 'trmm_lncopy_8.c',
- 'addl': ['-UDOUBLE', '-UCOMPLEX', '-UOUTER', '-DLOWER', '-UUNIT']},
- '_iutucopy': {'dir': 'generic', 'kernel': 'trmm_utcopy_8.c',
- 'addl': ['-UDOUBLE', '-UCOMPLEX', '-UOUTER', '-ULOWER', '-DUNIT']},
- '_iutncopy': {'dir': 'generic', 'kernel': 'trmm_utcopy_8.c',
- 'addl': ['-UDOUBLE', '-UCOMPLEX', '-UOUTER', '-ULOWER', '-UUNIT']},
- '_iltucopy': {'dir': 'generic', 'kernel': 'trmm_ltcopy_8.c',
- 'addl': ['-UDOUBLE', '-UCOMPLEX', '-UOUTER', '-DLOWER', '-DUNIT']},
- '_iltncopy': {'dir': 'generic', 'kernel': 'trmm_ltcopy_8.c',
- 'addl': ['-UDOUBLE', '-UCOMPLEX', '-UOUTER', '-DLOWER', '-UUNIT']},
- '_ounucopy': {'dir': 'generic', 'kernel': 'trmm_uncopy_4.c',
- 'addl': ['-UDOUBLE', '-UCOMPLEX', '-DOUTER', '-ULOWER', '-DUNIT']},
- '_ounncopy': {'dir': 'generic', 'kernel': 'trmm_uncopy_4.c',
- 'addl': ['-UDOUBLE', '-UCOMPLEX', '-DOUTER', '-ULOWER', '-UUNIT']},
- '_olnucopy': {'dir': 'generic', 'kernel': 'trmm_lncopy_4.c',
- 'addl': ['-UDOUBLE', '-UCOMPLEX', '-DOUTER', '-DLOWER', '-DUNIT']},
- '_olnncopy': {'dir': 'generic', 'kernel': 'trmm_lncopy_4.c',
- 'addl': ['-UDOUBLE', '-UCOMPLEX', '-DOUTER', '-DLOWER', '-UUNIT']},
- '_outucopy': {'dir': 'generic', 'kernel': 'trmm_utcopy_4.c',
- 'addl': ['-UDOUBLE', '-UCOMPLEX', '-DOUTER', '-ULOWER', '-DUNIT']},
- '_outncopy': {'dir': 'generic', 'kernel': 'trmm_utcopy_4.c',
- 'addl': ['-UDOUBLE', '-UCOMPLEX', '-DOUTER', '-ULOWER', '-UUNIT']},
- '_oltucopy': {'dir': 'generic', 'kernel': 'trmm_ltcopy_4.c',
- 'addl': ['-UDOUBLE', '-UCOMPLEX', '-DOUTER', '-DLOWER', '-DUNIT']},
- '_oltncopy': {'dir': 'generic', 'kernel': 'trmm_ltcopy_4.c',
- 'addl': ['-UDOUBLE', '-UCOMPLEX', '-DOUTER', '-DLOWER', '-UUNIT']},
- }},
- 'd': {'exts': {
- '_iunucopy': {'dir': 'generic', 'kernel': 'trmm_uncopy_4.c',
- 'addl': ['-DDOUBLE', '-UCOMPLEX', '-UOUTER', '-ULOWER', '-DUNIT']},
- '_iunncopy': {'dir': 'generic', 'kernel': 'trmm_uncopy_4.c',
- 'addl': ['-DDOUBLE', '-UCOMPLEX', '-UOUTER', '-ULOWER', '-UUNIT']},
- '_ilnucopy': {'dir': 'generic', 'kernel': 'trmm_lncopy_4.c',
- 'addl': ['-DDOUBLE', '-UCOMPLEX', '-UOUTER', '-DLOWER', '-DUNIT']},
- '_ilnncopy': {'dir': 'generic', 'kernel': 'trmm_lncopy_4.c',
- 'addl': ['-DDOUBLE', '-UCOMPLEX', '-UOUTER', '-DLOWER', '-UUNIT']},
- '_iutucopy': {'dir': 'generic', 'kernel': 'trmm_utcopy_4.c',
- 'addl': ['-DDOUBLE', '-UCOMPLEX', '-UOUTER', '-ULOWER', '-DUNIT']},
- '_iutncopy': {'dir': 'generic', 'kernel': 'trmm_utcopy_4.c',
- 'addl': ['-DDOUBLE', '-UCOMPLEX', '-UOUTER', '-ULOWER', '-UUNIT']},
- '_iltucopy': {'dir': 'generic', 'kernel': 'trmm_ltcopy_4.c',
- 'addl': ['-DDOUBLE', '-UCOMPLEX', '-UOUTER', '-DLOWER', '-DUNIT']},
- '_iltncopy': {'dir': 'generic', 'kernel': 'trmm_ltcopy_4.c',
- 'addl': ['-DDOUBLE', '-UCOMPLEX', '-UOUTER', '-DLOWER', '-UUNIT']},
- '_ounucopy': {'dir': 'generic', 'kernel': 'trmm_uncopy_8.c',
- 'addl': ['-DDOUBLE', '-UCOMPLEX', '-DOUTER', '-ULOWER', '-DUNIT']},
- '_ounncopy': {'dir': 'generic', 'kernel': 'trmm_uncopy_8.c',
- 'addl': ['-DDOUBLE', '-UCOMPLEX', '-DOUTER', '-ULOWER', '-UUNIT']},
- '_olnucopy': {'dir': 'generic', 'kernel': 'trmm_lncopy_8.c',
- 'addl': ['-DDOUBLE', '-UCOMPLEX', '-DOUTER', '-DLOWER', '-DUNIT']},
- '_olnncopy': {'dir': 'generic', 'kernel': 'trmm_lncopy_8.c',
- 'addl': ['-DDOUBLE', '-UCOMPLEX', '-DOUTER', '-DLOWER', '-UUNIT']},
- '_outucopy': {'dir': 'generic', 'kernel': 'trmm_utcopy_8.c',
- 'addl': ['-DDOUBLE', '-UCOMPLEX', '-DOUTER', '-ULOWER', '-DUNIT']},
- '_outncopy': {'dir': 'generic', 'kernel': 'trmm_utcopy_8.c',
- 'addl': ['-DDOUBLE', '-UCOMPLEX', '-DOUTER', '-ULOWER', '-UUNIT']},
- '_oltucopy': {'dir': 'generic', 'kernel': 'trmm_ltcopy_8.c',
- 'addl': ['-DDOUBLE', '-UCOMPLEX', '-DOUTER', '-DLOWER', '-DUNIT']},
- '_oltncopy': {'dir': 'generic', 'kernel': 'trmm_ltcopy_8.c',
- 'addl': ['-DDOUBLE', '-UCOMPLEX', '-DOUTER', '-DLOWER', '-UUNIT']},
- }},
- 'c': {'exts': {
- '_iunucopy': {'dir': 'generic', 'kernel': 'ztrmm_uncopy_8.c',
- 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UOUTER', '-ULOWER', '-DUNIT']},
- '_iunncopy': {'dir': 'generic', 'kernel': 'ztrmm_uncopy_8.c',
- 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UOUTER', '-ULOWER', '-UUNIT']},
- '_ilnucopy': {'dir': 'generic', 'kernel': 'ztrmm_lncopy_8.c',
- 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UOUTER', '-DLOWER', '-DUNIT']},
- '_ilnncopy': {'dir': 'generic', 'kernel': 'ztrmm_lncopy_8.c',
- 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UOUTER', '-DLOWER', '-UUNIT']},
- '_iutucopy': {'dir': 'generic', 'kernel': 'ztrmm_utcopy_8.c',
- 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UOUTER', '-ULOWER', '-DUNIT']},
- '_iutncopy': {'dir': 'generic', 'kernel': 'ztrmm_utcopy_8.c',
- 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UOUTER', '-ULOWER', '-UUNIT']},
- '_iltucopy': {'dir': 'generic', 'kernel': 'ztrmm_ltcopy_8.c',
- 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UOUTER', '-DLOWER', '-DUNIT']},
- '_iltncopy': {'dir': 'generic', 'kernel': 'ztrmm_ltcopy_8.c',
- 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UOUTER', '-DLOWER', '-UUNIT']},
- '_ounucopy': {'dir': 'generic', 'kernel': 'ztrmm_uncopy_2.c',
- 'addl': ['-UDOUBLE', '-DCOMPLEX', '-DOUTER', '-ULOWER', '-DUNIT']},
- '_ounncopy': {'dir': 'generic', 'kernel': 'ztrmm_uncopy_2.c',
- 'addl': ['-UDOUBLE', '-DCOMPLEX', '-DOUTER', '-ULOWER', '-UUNIT']},
- '_olnucopy': {'dir': 'generic', 'kernel': 'ztrmm_lncopy_2.c',
- 'addl': ['-UDOUBLE', '-DCOMPLEX', '-DOUTER', '-DLOWER', '-DUNIT']},
- '_olnncopy': {'dir': 'generic', 'kernel': 'ztrmm_lncopy_2.c',
- 'addl': ['-UDOUBLE', '-DCOMPLEX', '-DOUTER', '-DLOWER', '-UUNIT']},
- '_outucopy': {'dir': 'generic', 'kernel': 'ztrmm_utcopy_2.c',
- 'addl': ['-UDOUBLE', '-DCOMPLEX', '-DOUTER', '-ULOWER', '-DUNIT']},
- '_outncopy': {'dir': 'generic', 'kernel': 'ztrmm_utcopy_2.c',
- 'addl': ['-UDOUBLE', '-DCOMPLEX', '-DOUTER', '-ULOWER', '-UUNIT']},
- '_oltucopy': {'dir': 'generic', 'kernel': 'ztrmm_ltcopy_2.c',
- 'addl': ['-UDOUBLE', '-DCOMPLEX', '-DOUTER', '-DLOWER', '-DUNIT']},
- '_oltncopy': {'dir': 'generic', 'kernel': 'ztrmm_ltcopy_2.c',
- 'addl': ['-UDOUBLE', '-DCOMPLEX', '-DOUTER', '-DLOWER', '-UUNIT']},
- }},
- 'z': {'exts': {
- '_iunucopy': {'dir': 'generic', 'kernel': 'ztrmm_uncopy_4.c',
- 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UOUTER', '-ULOWER', '-DUNIT']},
- '_iunncopy': {'dir': 'generic', 'kernel': 'ztrmm_uncopy_4.c',
- 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UOUTER', '-ULOWER', '-UUNIT']},
- '_ilnucopy': {'dir': 'generic', 'kernel': 'ztrmm_lncopy_4.c',
- 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UOUTER', '-DLOWER', '-DUNIT']},
- '_ilnncopy': {'dir': 'generic', 'kernel': 'ztrmm_lncopy_4.c',
- 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UOUTER', '-DLOWER', '-UUNIT']},
- '_iutucopy': {'dir': 'generic', 'kernel': 'ztrmm_utcopy_4.c',
- 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UOUTER', '-ULOWER', '-DUNIT']},
- '_iutncopy': {'dir': 'generic', 'kernel': 'ztrmm_utcopy_4.c',
- 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UOUTER', '-ULOWER', '-UUNIT']},
- '_iltucopy': {'dir': 'generic', 'kernel': 'ztrmm_ltcopy_4.c',
- 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UOUTER', '-DLOWER', '-DUNIT']},
- '_iltncopy': {'dir': 'generic', 'kernel': 'ztrmm_ltcopy_4.c',
- 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UOUTER', '-DLOWER', '-UUNIT']},
- '_ounucopy': {'dir': 'generic', 'kernel': 'ztrmm_uncopy_2.c',
- 'addl': ['-DDOUBLE', '-DCOMPLEX', '-DOUTER', '-ULOWER', '-DUNIT']},
- '_ounncopy': {'dir': 'generic', 'kernel': 'ztrmm_uncopy_2.c',
- 'addl': ['-DDOUBLE', '-DCOMPLEX', '-DOUTER', '-ULOWER', '-UUNIT']},
- '_olnucopy': {'dir': 'generic', 'kernel': 'ztrmm_lncopy_2.c',
- 'addl': ['-DDOUBLE', '-DCOMPLEX', '-DOUTER', '-DLOWER', '-DUNIT']},
- '_olnncopy': {'dir': 'generic', 'kernel': 'ztrmm_lncopy_2.c',
- 'addl': ['-DDOUBLE', '-DCOMPLEX', '-DOUTER', '-DLOWER', '-UUNIT']},
- '_outucopy': {'dir': 'generic', 'kernel': 'ztrmm_utcopy_2.c',
- 'addl': ['-DDOUBLE', '-DCOMPLEX', '-DOUTER', '-ULOWER', '-DUNIT']},
- '_outncopy': {'dir': 'generic', 'kernel': 'ztrmm_utcopy_2.c',
- 'addl': ['-DDOUBLE', '-DCOMPLEX', '-DOUTER', '-ULOWER', '-UUNIT']},
- '_oltucopy': {'dir': 'generic', 'kernel': 'ztrmm_ltcopy_2.c',
- 'addl': ['-DDOUBLE', '-DCOMPLEX', '-DOUTER', '-DLOWER', '-DUNIT']},
- '_oltncopy': {'dir': 'generic', 'kernel': 'ztrmm_ltcopy_2.c',
- 'addl': ['-DDOUBLE', '-DCOMPLEX', '-DOUTER', '-DLOWER', '-UUNIT']},
- }},
- },
- },
- { 'base': '?hemm',
- 'modes': {
- 'c': {'exts': {
- '_iutcopy': {'dir': 'generic', 'kernel': 'zhemm_utcopy_8.c',
- 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UOUTER', '-ULOWER']},
- '_iltcopy': {'dir': 'generic', 'kernel': 'zhemm_ltcopy_8.c',
- 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UOUTER', '-DLOWER']},
- '_outcopy': {'dir': 'generic', 'kernel': 'zhemm_utcopy_2.c',
- 'addl': ['-UDOUBLE', '-DCOMPLEX', '-DOUTER', '-ULOWER']},
- '_oltcopy': {'dir': 'generic', 'kernel': 'zhemm_ltcopy_2.c',
- 'addl': ['-UDOUBLE', '-DCOMPLEX', '-DOUTER', '-DLOWER']},
- }},
- 'z': {'exts': {
- '_iutcopy': {'dir': 'generic', 'kernel': 'zhemm_utcopy_4.c',
- 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UOUTER', '-ULOWER']},
- '_iltcopy': {'dir': 'generic', 'kernel': 'zhemm_ltcopy_4.c',
- 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UOUTER', '-DLOWER']},
- '_outcopy': {'dir': 'generic', 'kernel': 'zhemm_utcopy_2.c',
- 'addl': ['-DDOUBLE', '-DCOMPLEX', '-DOUTER', '-ULOWER']},
- '_oltcopy': {'dir': 'generic', 'kernel': 'zhemm_ltcopy_2.c',
- 'addl': ['-DDOUBLE', '-DCOMPLEX', '-DOUTER', '-DLOWER']},
- }},
- },
- },
- { 'base': '?trsm',
- 'modes': {
- 's': {'exts': {
- # TODO(rg): These actually use $(SGEMM_UNROLL_M) to choose the size
- '_iunucopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_8.c',
- 'addl': ['-UOUTER', '-ULOWER', '-DUNIT']},
- '_iunncopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_8.c',
- 'addl': ['-UOUTER', '-ULOWER', '-UUNIT']},
- '_ilnucopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_8.c',
- 'addl': ['-UOUTER', '-DLOWER', '-DUNIT']},
- '_ilnncopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_8.c',
- 'addl': ['-UOUTER', '-DLOWER', '-UUNIT']},
- '_iutucopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_8.c',
- 'addl': ['-UOUTER', '-ULOWER', '-DUNIT']},
- '_iutncopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_8.c',
- 'addl': ['-UOUTER', '-ULOWER', '-UUNIT']},
- '_iltucopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_8.c',
- 'addl': ['-UOUTER', '-DLOWER', '-DUNIT']},
- '_iltncopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_8.c',
- 'addl': ['-UOUTER', '-DLOWER', '-UUNIT']},
- '_ounucopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_4.c',
- 'addl': ['-DOUTER', '-ULOWER', '-DUNIT']},
- '_ounncopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_4.c',
- 'addl': ['-DOUTER', '-ULOWER', '-UUNIT']},
- '_olnucopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_4.c',
- 'addl': ['-DOUTER', '-DLOWER', '-DUNIT']},
- '_olnncopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_4.c',
- 'addl': ['-DOUTER', '-DLOWER', '-UUNIT']},
- '_outucopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_4.c',
- 'addl': ['-DOUTER', '-ULOWER', '-DUNIT']},
- '_outncopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_4.c',
- 'addl': ['-DOUTER', '-ULOWER', '-UUNIT']},
- '_oltucopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_4.c',
- 'addl': ['-DOUTER', '-DLOWER', '-DUNIT']},
- '_oltncopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_4.c',
- 'addl': ['-DOUTER', '-DLOWER', '-UUNIT']},
- }},
- 'd': {'exts': {
- # TODO(rg): These actually use $(SGEMM_UNROLL_M) to choose the size
- '_iunucopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_4.c',
- 'addl': ['-UOUTER', '-ULOWER', '-DUNIT']},
- '_iunncopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_4.c',
- 'addl': ['-UOUTER', '-ULOWER', '-UUNIT']},
- '_ilnucopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_4.c',
- 'addl': ['-UOUTER', '-DLOWER', '-DUNIT']},
- '_ilnncopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_4.c',
- 'addl': ['-UOUTER', '-DLOWER', '-UUNIT']},
- '_iutucopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_4.c',
- 'addl': ['-UOUTER', '-ULOWER', '-DUNIT']},
- '_iutncopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_4.c',
- 'addl': ['-UOUTER', '-ULOWER', '-UUNIT']},
- '_iltucopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_4.c',
- 'addl': ['-UOUTER', '-DLOWER', '-DUNIT']},
- '_iltncopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_4.c',
- 'addl': ['-UOUTER', '-DLOWER', '-UUNIT']},
- '_ounucopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_8.c',
- 'addl': ['-DOUTER', '-ULOWER', '-DUNIT']},
- '_ounncopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_8.c',
- 'addl': ['-DOUTER', '-ULOWER', '-UUNIT']},
- '_olnucopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_8.c',
- 'addl': ['-DOUTER', '-DLOWER', '-DUNIT']},
- '_olnncopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_8.c',
- 'addl': ['-DOUTER', '-DLOWER', '-UUNIT']},
- '_outucopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_8.c',
- 'addl': ['-DOUTER', '-ULOWER', '-DUNIT']},
- '_outncopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_8.c',
- 'addl': ['-DOUTER', '-ULOWER', '-UUNIT']},
- '_oltucopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_8.c',
- 'addl': ['-DOUTER', '-DLOWER', '-DUNIT']},
- '_oltncopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_8.c',
- 'addl': ['-DOUTER', '-DLOWER', '-UUNIT']},
- }},
- 'q': {'exts': {
- # TODO(rg): These actually use $(SGEMM_UNROLL_M) to choose the size
- '_iunucopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_8.c',
- 'addl': ['-UOUTER', '-ULOWER', '-DUNIT']},
- '_iunncopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_8.c',
- 'addl': ['-UOUTER', '-ULOWER', '-UUNIT']},
- '_ilnucopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_8.c',
- 'addl': ['-UOUTER', '-DLOWER', '-DUNIT']},
- '_ilnncopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_8.c',
- 'addl': ['-UOUTER', '-DLOWER', '-UUNIT']},
- '_iutucopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_8.c',
- 'addl': ['-UOUTER', '-ULOWER', '-DUNIT']},
- '_iutncopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_8.c',
- 'addl': ['-UOUTER', '-ULOWER', '-UUNIT']},
- '_iltucopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_8.c',
- 'addl': ['-UOUTER', '-DLOWER', '-DUNIT']},
- '_iltncopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_8.c',
- 'addl': ['-UOUTER', '-DLOWER', '-UUNIT']},
- '_ounucopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_4.c',
- 'addl': ['-DOUTER', '-ULOWER', '-DUNIT']},
- '_ounncopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_4.c',
- 'addl': ['-DOUTER', '-ULOWER', '-UUNIT']},
- '_olnucopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_4.c',
- 'addl': ['-DOUTER', '-DLOWER', '-DUNIT']},
- '_olnncopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_4.c',
- 'addl': ['-DOUTER', '-DLOWER', '-UUNIT']},
- '_outucopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_4.c',
- 'addl': ['-DOUTER', '-ULOWER', '-DUNIT']},
- '_outncopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_4.c',
- 'addl': ['-DOUTER', '-ULOWER', '-UUNIT']},
- '_oltucopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_4.c',
- 'addl': ['-DOUTER', '-DLOWER', '-DUNIT']},
- '_oltncopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_4.c',
- 'addl': ['-DOUTER', '-DLOWER', '-UUNIT']},
- }},
- 'c': {'exts': {
- # TODO(rg): These actually use $(SGEMM_UNROLL_M) to choose the size
- '_iunucopy': {'dir': 'generic', 'kernel': 'ztrsm_uncopy_8.c',
- 'addl': ['-UOUTER', '-ULOWER', '-DUNIT']},
- '_iunncopy': {'dir': 'generic', 'kernel': 'ztrsm_uncopy_8.c',
- 'addl': ['-UOUTER', '-ULOWER', '-UUNIT']},
- '_ilnucopy': {'dir': 'generic', 'kernel': 'ztrsm_lncopy_8.c',
- 'addl': ['-UOUTER', '-DLOWER', '-DUNIT']},
- '_ilnncopy': {'dir': 'generic', 'kernel': 'ztrsm_lncopy_8.c',
- 'addl': ['-UOUTER', '-DLOWER', '-UUNIT']},
- '_iutucopy': {'dir': 'generic', 'kernel': 'ztrsm_utcopy_8.c',
- 'addl': ['-UOUTER', '-ULOWER', '-DUNIT']},
- '_iutncopy': {'dir': 'generic', 'kernel': 'ztrsm_utcopy_8.c',
- 'addl': ['-UOUTER', '-ULOWER', '-UUNIT']},
- '_iltucopy': {'dir': 'generic', 'kernel': 'ztrsm_ltcopy_8.c',
- 'addl': ['-UOUTER', '-DLOWER', '-DUNIT']},
- '_iltncopy': {'dir': 'generic', 'kernel': 'ztrsm_ltcopy_8.c',
- 'addl': ['-UOUTER', '-DLOWER', '-UUNIT']},
- '_ounucopy': {'dir': 'generic', 'kernel': 'ztrsm_uncopy_2.c',
- 'addl': ['-DOUTER', '-ULOWER', '-DUNIT']},
- '_ounncopy': {'dir': 'generic', 'kernel': 'ztrsm_uncopy_2.c',
- 'addl': ['-DOUTER', '-ULOWER', '-UUNIT']},
- '_olnucopy': {'dir': 'generic', 'kernel': 'ztrsm_lncopy_2.c',
- 'addl': ['-DOUTER', '-DLOWER', '-DUNIT']},
- '_olnncopy': {'dir': 'generic', 'kernel': 'ztrsm_lncopy_2.c',
- 'addl': ['-DOUTER', '-DLOWER', '-UUNIT']},
- '_outucopy': {'dir': 'generic', 'kernel': 'ztrsm_utcopy_2.c',
- 'addl': ['-DOUTER', '-ULOWER', '-DUNIT']},
- '_outncopy': {'dir': 'generic', 'kernel': 'ztrsm_utcopy_2.c',
- 'addl': ['-DOUTER', '-ULOWER', '-UUNIT']},
- '_oltucopy': {'dir': 'generic', 'kernel': 'ztrsm_ltcopy_2.c',
- 'addl': ['-DOUTER', '-DLOWER', '-DUNIT']},
- '_oltncopy': {'dir': 'generic', 'kernel': 'ztrsm_ltcopy_2.c',
- 'addl': ['-DOUTER', '-DLOWER', '-UUNIT']},
- }},
- 'z': {'exts': {
- # TODO(rg): These actually use $(SGEMM_UNROLL_M) to choose the size
- '_iunucopy': {'dir': 'generic', 'kernel': 'ztrsm_uncopy_4.c',
- 'addl': ['-UOUTER', '-ULOWER', '-DUNIT']},
- '_iunncopy': {'dir': 'generic', 'kernel': 'ztrsm_uncopy_4.c',
- 'addl': ['-UOUTER', '-ULOWER', '-UUNIT']},
- '_ilnucopy': {'dir': 'generic', 'kernel': 'ztrsm_lncopy_4.c',
- 'addl': ['-UOUTER', '-DLOWER', '-DUNIT']},
- '_ilnncopy': {'dir': 'generic', 'kernel': 'ztrsm_lncopy_4.c',
- 'addl': ['-UOUTER', '-DLOWER', '-UUNIT']},
- '_iutucopy': {'dir': 'generic', 'kernel': 'ztrsm_utcopy_4.c',
- 'addl': ['-UOUTER', '-ULOWER', '-DUNIT']},
- '_iutncopy': {'dir': 'generic', 'kernel': 'ztrsm_utcopy_4.c',
- 'addl': ['-UOUTER', '-ULOWER', '-UUNIT']},
- '_iltucopy': {'dir': 'generic', 'kernel': 'ztrsm_ltcopy_4.c',
- 'addl': ['-UOUTER', '-DLOWER', '-DUNIT']},
- '_iltncopy': {'dir': 'generic', 'kernel': 'ztrsm_ltcopy_4.c',
- 'addl': ['-UOUTER', '-DLOWER', '-UUNIT']},
- '_ounucopy': {'dir': 'generic', 'kernel': 'ztrsm_uncopy_2.c',
- 'addl': ['-DOUTER', '-ULOWER', '-DUNIT']},
- '_ounncopy': {'dir': 'generic', 'kernel': 'ztrsm_uncopy_2.c',
- 'addl': ['-DOUTER', '-ULOWER', '-UUNIT']},
- '_olnucopy': {'dir': 'generic', 'kernel': 'ztrsm_lncopy_2.c',
- 'addl': ['-DOUTER', '-DLOWER', '-DUNIT']},
- '_olnncopy': {'dir': 'generic', 'kernel': 'ztrsm_lncopy_2.c',
- 'addl': ['-DOUTER', '-DLOWER', '-UUNIT']},
- '_outucopy': {'dir': 'generic', 'kernel': 'ztrsm_utcopy_2.c',
- 'addl': ['-DOUTER', '-ULOWER', '-DUNIT']},
- '_outncopy': {'dir': 'generic', 'kernel': 'ztrsm_utcopy_2.c',
- 'addl': ['-DOUTER', '-ULOWER', '-UUNIT']},
- '_oltucopy': {'dir': 'generic', 'kernel': 'ztrsm_ltcopy_2.c',
- 'addl': ['-DOUTER', '-DLOWER', '-DUNIT']},
- '_oltncopy': {'dir': 'generic', 'kernel': 'ztrsm_ltcopy_2.c',
- 'addl': ['-DOUTER', '-DLOWER', '-UUNIT']},
- }},
- },
- },
- { 'base': '?symm',
- 'modes': {
- 's': {'exts': {
- # TODO(rg): These actually use $(SGEMM_UNROLL_M) to choose the size
- '_iutcopy': {'dir': 'generic', 'kernel': 'symm_ucopy_8.c',
- 'addl': ['-UDOUBLE', '-UCOMPLEX', '-UOUTER', '-ULOWER']},
- '_iltcopy': {'dir': 'generic', 'kernel': 'symm_lcopy_8.c',
- 'addl': ['-UDOUBLE', '-UCOMPLEX', '-UOUTER', '-DLOWER']},
- '_outcopy': {'dir': 'generic', 'kernel': 'symm_ucopy_4.c',
- 'addl': ['-UDOUBLE', '-UCOMPLEX', '-DOUTER', '-ULOWER']},
- '_oltcopy': {'dir': 'generic', 'kernel': 'symm_lcopy_4.c',
- 'addl': ['-UDOUBLE', '-UCOMPLEX', '-DOUTER', '-DLOWER']},
- }},
- 'd': {'exts': {
- '_iutcopy': {'dir': 'generic', 'kernel': 'symm_ucopy_4.c',
- 'addl': ['-DDOUBLE', '-UCOMPLEX', '-UOUTER', '-ULOWER']},
- '_iltcopy': {'dir': 'generic', 'kernel': 'symm_lcopy_4.c',
- 'addl': ['-DDOUBLE', '-UCOMPLEX', '-UOUTER', '-DLOWER']},
- '_outcopy': {'dir': 'generic', 'kernel': 'symm_ucopy_8.c',
- 'addl': ['-DDOUBLE', '-UCOMPLEX', '-DOUTER', '-ULOWER']},
- '_oltcopy': {'dir': 'generic', 'kernel': 'symm_lcopy_8.c',
- 'addl': ['-DDOUBLE', '-UCOMPLEX', '-DOUTER', '-DLOWER']},
- }},
- 'c': {'exts': {
- '_iutcopy': {'dir': 'generic', 'kernel': 'zsymm_ucopy_8.c',
- 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UOUTER', '-ULOWER']},
- '_iltcopy': {'dir': 'generic', 'kernel': 'zsymm_lcopy_8.c',
- 'addl': ['-UDOUBLE', '-DCOMPLEX', '-UOUTER', '-DLOWER']},
- '_outcopy': {'dir': 'generic', 'kernel': 'zsymm_ucopy_2.c',
- 'addl': ['-UDOUBLE', '-DCOMPLEX', '-DOUTER', '-ULOWER']},
- '_oltcopy': {'dir': 'generic', 'kernel': 'zsymm_lcopy_2.c',
- 'addl': ['-UDOUBLE', '-DCOMPLEX', '-DOUTER', '-DLOWER']},
- }},
- 'z': {'exts': {
- '_iutcopy': {'dir': 'generic', 'kernel': 'zsymm_ucopy_4.c',
- 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UOUTER', '-ULOWER']},
- '_iltcopy': {'dir': 'generic', 'kernel': 'zsymm_lcopy_4.c',
- 'addl': ['-DDOUBLE', '-DCOMPLEX', '-UOUTER', '-DLOWER']},
- '_outcopy': {'dir': 'generic', 'kernel': 'zsymm_ucopy_2.c',
- 'addl': ['-DDOUBLE', '-DCOMPLEX', '-DOUTER', '-ULOWER']},
- '_oltcopy': {'dir': 'generic', 'kernel': 'zsymm_lcopy_2.c',
- 'addl': ['-DDOUBLE', '-DCOMPLEX', '-DOUTER', '-DLOWER']},
- }},
- },
- },
- { 'base': '?omatcopy_k',
- 'modes': {
- 's': {'exts': {
- '_cn': {'dir': 'arm', 'kernel': 'omatcopy_cn.c', 'addl': ['-UROWM']},
- '_rn': {'dir': 'arm', 'kernel': 'omatcopy_rn.c', 'addl': ['-DROWM']},
- '_ct': {'dir': 'arm', 'kernel': 'omatcopy_ct.c', 'addl': ['-UROWM']},
- '_rt': {'dir': 'arm', 'kernel': 'omatcopy_rt.c', 'addl': ['-DROWM']},
- }},
- 'd': {'exts': {
- '_cn': {'dir': 'arm', 'kernel': 'omatcopy_cn.c', 'addl': ['-UROWM']},
- '_rn': {'dir': 'arm', 'kernel': 'omatcopy_rn.c', 'addl': ['-DROWM']},
- '_ct': {'dir': 'arm', 'kernel': 'omatcopy_ct.c', 'addl': ['-UROWM']},
- '_rt': {'dir': 'arm', 'kernel': 'omatcopy_rt.c', 'addl': ['-DROWM']},
- }},
- 'c': {'exts': {
- '_cn': {'dir': 'arm', 'kernel': 'zomatcopy_cn.c', 'addl': ['-UROWM', '-UCONJ']},
- '_rn': {'dir': 'arm', 'kernel': 'zomatcopy_rn.c', 'addl': ['-DROWM', '-UCONJ']},
- '_ct': {'dir': 'arm', 'kernel': 'zomatcopy_ct.c', 'addl': ['-UROWM', '-UCONJ']},
- '_rt': {'dir': 'arm', 'kernel': 'zomatcopy_rt.c', 'addl': ['-DROWM', '-UCONJ']},
- '_cnc': {'dir': 'arm', 'kernel': 'zomatcopy_cnc.c', 'addl': ['-UROWM', '-DCONJ']},
- '_rnc': {'dir': 'arm', 'kernel': 'zomatcopy_rnc.c', 'addl': ['-DROWM', '-DCONJ']},
- '_ctc': {'dir': 'arm', 'kernel': 'zomatcopy_ctc.c', 'addl': ['-UROWM', '-DCONJ']},
- '_rtc': {'dir': 'arm', 'kernel': 'zomatcopy_rtc.c', 'addl': ['-DROWM', '-DCONJ']},
- }},
- 'z': {'exts': {
- '_cn': {'dir': 'arm', 'kernel': 'zomatcopy_cn.c', 'addl': ['-UROWM', '-UCONJ']},
- '_rn': {'dir': 'arm', 'kernel': 'zomatcopy_rn.c', 'addl': ['-DROWM', '-UCONJ']},
- '_ct': {'dir': 'arm', 'kernel': 'zomatcopy_ct.c', 'addl': ['-UROWM', '-UCONJ']},
- '_rt': {'dir': 'arm', 'kernel': 'zomatcopy_rt.c', 'addl': ['-DROWM', '-UCONJ']},
- '_cnc': {'dir': 'arm', 'kernel': 'zomatcopy_cnc.c', 'addl': ['-UROWM', '-DCONJ']},
- '_rnc': {'dir': 'arm', 'kernel': 'zomatcopy_rnc.c', 'addl': ['-DROWM', '-DCONJ']},
- '_ctc': {'dir': 'arm', 'kernel': 'zomatcopy_ctc.c', 'addl': ['-UROWM', '-DCONJ']},
- '_rtc': {'dir': 'arm', 'kernel': 'zomatcopy_rtc.c', 'addl': ['-DROWM', '-DCONJ']},
- }},
- },
- },
- { 'base': '?imatcopy_k',
- 'modes': {
- 's': {'exts': {
- '_cn': {'dir': 'generic', 'kernel': 'imatcopy_cn.c', 'addl': ['-UROWM']},
- '_rn': {'dir': 'generic', 'kernel': 'imatcopy_rn.c', 'addl': ['-DROWM']},
- '_ct': {'dir': 'generic', 'kernel': 'imatcopy_ct.c', 'addl': ['-UROWM']},
- '_rt': {'dir': 'generic', 'kernel': 'imatcopy_rt.c', 'addl': ['-DROWM']},
- }},
- 'd': {'exts': {
- '_cn': {'dir': 'generic', 'kernel': 'imatcopy_cn.c', 'addl': ['-UROWM']},
- '_rn': {'dir': 'generic', 'kernel': 'imatcopy_rn.c', 'addl': ['-DROWM']},
- '_ct': {'dir': 'generic', 'kernel': 'imatcopy_ct.c', 'addl': ['-UROWM']},
- '_rt': {'dir': 'generic', 'kernel': 'imatcopy_rt.c', 'addl': ['-DROWM']},
- }},
- 'c': {'exts': {
- '_cn': {'dir': 'generic', 'kernel': 'zimatcopy_cn.c', 'addl': ['-UROWM', '-UCONJ']},
- '_rn': {'dir': 'generic', 'kernel': 'zimatcopy_rn.c', 'addl': ['-DROWM', '-UCONJ']},
- '_ct': {'dir': 'generic', 'kernel': 'zimatcopy_ct.c', 'addl': ['-UROWM', '-UCONJ']},
- '_rt': {'dir': 'generic', 'kernel': 'zimatcopy_rt.c', 'addl': ['-DROWM', '-UCONJ']},
- '_cnc': {'dir': 'generic', 'kernel': 'zimatcopy_cnc.c', 'addl': ['-UROWM', '-DCONJ']},
- '_rnc': {'dir': 'generic', 'kernel': 'zimatcopy_rnc.c', 'addl': ['-DROWM', '-DCONJ']},
- '_ctc': {'dir': 'generic', 'kernel': 'zimatcopy_ctc.c', 'addl': ['-UROWM', '-DCONJ']},
- '_rtc': {'dir': 'generic', 'kernel': 'zimatcopy_rtc.c', 'addl': ['-DROWM', '-DCONJ']},
- }},
- 'z': {'exts': {
- '_cn': {'dir': 'generic', 'kernel': 'zimatcopy_cn.c', 'addl': ['-UROWM', '-UCONJ']},
- '_rn': {'dir': 'generic', 'kernel': 'zimatcopy_rn.c', 'addl': ['-DROWM', '-UCONJ']},
- '_ct': {'dir': 'generic', 'kernel': 'zimatcopy_ct.c', 'addl': ['-UROWM', '-UCONJ']},
- '_rt': {'dir': 'generic', 'kernel': 'zimatcopy_rt.c', 'addl': ['-DROWM', '-UCONJ']},
- '_cnc': {'dir': 'generic', 'kernel': 'zimatcopy_cnc.c', 'addl': ['-UROWM', '-DCONJ']},
- '_rnc': {'dir': 'generic', 'kernel': 'zimatcopy_rnc.c', 'addl': ['-DROWM', '-DCONJ']},
- '_ctc': {'dir': 'generic', 'kernel': 'zimatcopy_ctc.c', 'addl': ['-UROWM', '-DCONJ']},
- '_rtc': {'dir': 'generic', 'kernel': 'zimatcopy_rtc.c', 'addl': ['-DROWM', '-DCONJ']},
- }},
- },
- },
- {
- 'base': '?geadd',
- 'modes': {
- 's': {'exts': {'_k': {'dir': 'generic', 'kernel': 'geadd.c', 'addl': ['-UROWM']}}},
- 'd': {'exts': {'_k': {'dir': 'generic', 'kernel': 'geadd.c', 'addl': ['-UROWM']}}},
- 'c': {'exts': {'_k': {'dir': 'generic', 'kernel': 'zgeadd.c', 'addl': ['-UROWM']}}},
- 'z': {'exts': {'_k': {'dir': 'generic', 'kernel': 'zgeadd.c', 'addl': ['-UROWM']}}},
- },
- },
- { 'base': '?gemm_small_kernel',
- 'modes': {
- 's': {
- 'exts': {
- '_nn': {'dir': 'generic',
- 'kernel': 'gemm_small_matrix_kernel_nn.c',
- },
- '_nt': {'dir': 'generic',
- 'kernel': 'gemm_small_matrix_kernel_nt.c',
- },
- '_tn': {'dir': 'generic',
- 'kernel': 'gemm_small_matrix_kernel_tn.c',
- },
- '_tt': {'dir': 'generic',
- 'kernel': 'gemm_small_matrix_kernel_tt.c',
- },
- # '_b0_nn': {'dir': 'generic',
- # 'kernel': 'gemm_small_matrix_kernel_nn.c',
- # 'addl': ['-DB0'],
- # },
- # '_b0_nt': {'dir': 'generic',
- # 'kernel': 'gemm_small_matrix_kernel_nt.c',
- # 'addl': ['-DB0'],
- # },
- # '_b0_tn': {'dir': 'generic',
- # 'kernel': 'gemm_small_matrix_kernel_tn.c',
- # 'addl': ['-DB0'],
- # },
- # '_b0_tt': {'dir': 'generic',
- # 'kernel': 'gemm_small_matrix_kernel_tt.c',
- # 'addl': ['-DB0'],
- # },
- }
- },
- 'd': {
- 'exts': {
- '_nn': {'dir': 'generic',
- 'kernel': 'gemm_small_matrix_kernel_nn.c',
- },
- '_nt': {'dir': 'generic',
- 'kernel': 'gemm_small_matrix_kernel_nt.c',
- },
- '_tn': {'dir': 'generic',
- 'kernel': 'gemm_small_matrix_kernel_tn.c',
- },
- '_tt': {'dir': 'generic',
- 'kernel': 'gemm_small_matrix_kernel_tt.c',
- },
- }
- },
- 'c': {
- 'exts': {
- '_nn': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nn.c',
- },
- '_nr': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nn.c',
- },
- '_rn': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nn.c',
- },
- '_rr': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nn.c',
- },
- '_nt': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nt.c',
- },
- '_nc': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nt.c',
- },
- '_rt': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nt.c',
- },
- '_rc': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nt.c',
- },
- '_tn': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tn.c',
- },
- '_tr': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tn.c',
- },
- '_cn': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tn.c',
- },
- '_cr': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tn.c',
- },
- '_tt': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tt.c',
- },
- '_tc': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tt.c',
- },
- '_ct': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tt.c',
- },
- '_cc': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tt.c',
- },
- }
- },
- 'z': {
- 'exts': {
- '_nn': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nn.c',
- },
- '_nr': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nn.c',
- },
- '_rn': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nn.c',
- },
- '_rr': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nn.c',
- },
- '_nt': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nt.c',
- },
- '_nc': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nt.c',
- },
- '_rt': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nt.c',
- },
- '_rc': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nt.c',
- },
- '_tn': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tn.c',
- },
- '_tr': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tn.c',
- },
- '_cn': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tn.c',
- },
- '_cr': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tn.c',
- },
- '_tt': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tt.c',
- },
- '_tc': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tt.c',
- },
- '_ct': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tt.c',
- },
- '_cc': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tt.c',
- },
- }
- },
- },
- },
- { 'base': '?gemm_small_kernel_b0',
- 'modes': {
- 's': {
- 'exts': {
- '_nn': {'dir': 'generic',
- 'kernel': 'gemm_small_matrix_kernel_nn.c',
- },
- '_nt': {'dir': 'generic',
- 'kernel': 'gemm_small_matrix_kernel_nt.c',
- },
- '_tn': {'dir': 'generic',
- 'kernel': 'gemm_small_matrix_kernel_tn.c',
- },
- '_tt': {'dir': 'generic',
- 'kernel': 'gemm_small_matrix_kernel_tt.c',
- },
- }
- },
- 'd': {
- 'exts': {
- '_nn': {'dir': 'generic',
- 'kernel': 'gemm_small_matrix_kernel_nn.c',
- },
- '_nt': {'dir': 'generic',
- 'kernel': 'gemm_small_matrix_kernel_nt.c',
- },
- '_tn': {'dir': 'generic',
- 'kernel': 'gemm_small_matrix_kernel_tn.c',
- },
- '_tt': {'dir': 'generic',
- 'kernel': 'gemm_small_matrix_kernel_tt.c',
- },
- }
- },
- 'c': {
- 'exts': {
- '_nn': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nn.c',
- },
- '_nr': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nn.c',
- },
- '_rn': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nn.c',
- },
- '_rr': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nn.c',
- },
- '_nt': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nt.c',
- },
- '_nc': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nt.c',
- },
- '_rt': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nt.c',
- },
- '_rc': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nt.c',
- },
- '_tn': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tn.c',
- },
- '_tr': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tn.c',
- },
- '_cn': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tn.c',
- },
- '_cr': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tn.c',
- },
- '_tt': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tt.c',
- },
- '_tc': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tt.c',
- },
- '_ct': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tt.c',
- },
- '_cc': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tt.c',
- },
- }
- },
- 'z': {
- 'exts': {
- '_nn': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nn.c',
- },
- '_nr': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nn.c',
- },
- '_rn': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nn.c',
- },
- '_rr': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nn.c',
- },
- '_nt': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nt.c',
- },
- '_nc': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nt.c',
- },
- '_rt': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nt.c',
- },
- '_rc': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_nt.c',
- },
- '_tn': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tn.c',
- },
- '_tr': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tn.c',
- },
- '_cn': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tn.c',
- },
- '_cr': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tn.c',
- },
- '_tt': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tt.c',
- },
- '_tc': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tt.c',
- },
- '_ct': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tt.c',
- },
- '_cc': {'dir': 'generic',
- 'kernel': 'zgemm_small_matrix_kernel_tt.c',
- },
- }
- },
- },
- },
- ]
-
- kernel_confs = []
- foreach _kop : base_kops
- base = _kop['base']
- modes = _kop['modes']
- # Generate the symbol flags
- _ckop_args = []
- if symb_defs.has_key(base)
- symb_base = symb_defs[base]
- if symb_base.has_key('def')
- foreach _d : symb_base['def']
- _ckop_args += ('-D' + _d)
- endforeach
- endif
- if symb_base.has_key('undef')
- foreach _u : symb_base['undef']
- _ckop_args += ('-U' + _u)
- endforeach
- endif
- endif
- foreach mode, details : modes
- if mode == 'x' or mode == 'q'
- continue
- endif
- # Generally, one list is required for each foreach
- __cargs = _cargs + _ckop_args
- prec_mode = precision_mappings[mode]
- # Generate the mapping for the type
- if prec_mode.has_key('def')
- foreach _d : prec_mode['def']
- __cargs += ('-D' + _d)
- endforeach
- endif
- if prec_mode.has_key('undef')
- foreach _u : prec_mode['undef']
- __cargs += ('-U' + _u)
- endforeach
- endif
- # Now the rest, one run for each ext, to get the final symbols
- foreach ext, extdat : details['exts']
- _ext_cargs = [] # Will be wiped for each ext preventing redefinitions
- # Check ext_mappings first
- if ext_mappings.has_key(ext) and (not ext_mappings.has_key('except') or base not in ext_mappings['except'])
- extmap = ext_mappings[ext]
- if extmap.has_key('def')
- foreach _d : extmap['def']
- _ext_cargs += ['-D' + _d]
- endforeach
- endif
- if extmap.has_key('undef')
- foreach _u : extmap['undef']
- _ext_cargs += ['-U' + _u]
- endforeach
- endif
- else
- # Fallback to ext_mappings_l2
- foreach ext_map : ext_mappings_l2 + ext_mappings_l3
- if ext_map['ext'] == ext and mode in ext_map['for'] and (not ext_map.has_key('except') or base not in ext_map['except'])
- if ext_map.has_key('def')
- foreach _d : ext_map['def']
- _ext_cargs += ['-D' + _d]
- endforeach
- endif
- if ext_map.has_key('undef')
- foreach _u : ext_map['undef']
- _ext_cargs += ['-U' + _u]
- endforeach
- endif
- break
- endif
- endforeach
- endif
-
- src = join_paths(extdat['dir'], extdat['kernel'])
- if extdat.has_key('addl')
- _ext_cargs += extdat['addl']
- endif
- sym_name = base.replace('?', mode) + ext
- sym_underscored = f'@sym_name@_'
- _ext_cargs += [
- f'-DASMNAME=@sym_name@',
- f'-DASMFNAME=@sym_underscored@',
- f'-DNAME=@sym_underscored@',
- f'-DCNAME=@sym_name@',
- f'-DCHAR_NAME="@sym_underscored@"',
- f'-DCHAR_CNAME="@sym_name@"',
- ]
- current_def = {
- 'c_args': __cargs + _ext_cargs,
- 'name': sym_name,
- 'src': src
- }
- kernel_confs += current_def
- endforeach
- endforeach
- endforeach
-
- _kern_libs = []
- _kern_deps = []
- _is_asm = false
- foreach conf: kernel_confs
- _kern_libs += static_library(
- conf['name'],
- [conf['src'], config_h],
- include_directories: _inc,
- c_args: conf['c_args'],
- # See gh discussion 13374 for why, basically .S are coded as fortran..
- fortran_args: conf['c_args'],
- )
- endforeach
-
- _kern = static_library('_kern',
- link_whole: _kern_libs,
- dependencies: _kern_deps)
|