| @@ -19,7 +19,7 @@ base_kops = [ | |||||
| 'q': {'exts': {'_k': {'dir': 'arm', 'kernel': 'zrot.c'}}}, | 'q': {'exts': {'_k': {'dir': 'arm', 'kernel': 'zrot.c'}}}, | ||||
| 'cs': {'exts': {'_k': {'dir': 'arm', 'kernel': 'zrot.c'}}}, | 'cs': {'exts': {'_k': {'dir': 'arm', 'kernel': 'zrot.c'}}}, | ||||
| 'zd': {'exts': {'_k': {'dir': 'arm', 'kernel': 'zrot.c'}}}, | 'zd': {'exts': {'_k': {'dir': 'arm', 'kernel': 'zrot.c'}}}, | ||||
| 'xq': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zrot.S'}}}, | |||||
| # 'xq': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zrot.S'}}}, | |||||
| }, | }, | ||||
| }, | }, | ||||
| { 'base': '?swap', | { 'base': '?swap', | ||||
| @@ -38,8 +38,8 @@ base_kops = [ | |||||
| 'd': {'exts': {'_k': {'dir': 'arm', 'kernel': 'scal.c'}}}, | 'd': {'exts': {'_k': {'dir': 'arm', 'kernel': 'scal.c'}}}, | ||||
| 'c': {'exts': {'_k': {'dir': 'arm', 'kernel': 'zscal.c'}}}, | 'c': {'exts': {'_k': {'dir': 'arm', 'kernel': 'zscal.c'}}}, | ||||
| 'z': {'exts': {'_k': {'dir': 'arm', 'kernel': 'zscal.c'}}}, | 'z': {'exts': {'_k': {'dir': 'arm', 'kernel': 'zscal.c'}}}, | ||||
| 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'scal.S'}}}, | |||||
| 'x': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zscal.S'}}}, | |||||
| # 'q': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'scal.S'}}}, | |||||
| # 'x': {'exts': {'_k': {'dir': 'x86_64', 'kernel': 'zscal.S'}}}, | |||||
| }, | }, | ||||
| }, | }, | ||||
| { 'base': '?copy', | { 'base': '?copy', | ||||
| @@ -310,7 +310,7 @@ base_kops = [ | |||||
| { 'base': '?gemm3m', | { 'base': '?gemm3m', | ||||
| 'modes': { | 'modes': { | ||||
| 'c': {'exts': { | 'c': {'exts': { | ||||
| '_kernel': {'dir': 'x86_64', 'kernel': 'zgemm3m_kernel_4x8_nehalem.S', | |||||
| '_kernel': {'dir': 'x86_64', 'kernel': 'cgemm3m_kernel_8x4_haswell.c', | |||||
| 'addl': ['-DICOPY', '-UUSE_ALPHA']}, | 'addl': ['-DICOPY', '-UUSE_ALPHA']}, | ||||
| '_oncopyb': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_4.c', | '_oncopyb': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_4.c', | ||||
| 'addl': ['-DUSE_ALPHA']}, | 'addl': ['-DUSE_ALPHA']}, | ||||
| @@ -338,7 +338,7 @@ base_kops = [ | |||||
| 'addl': ['-DUSE_ALPHA', '-DREAL_ONLY']}, | 'addl': ['-DUSE_ALPHA', '-DREAL_ONLY']}, | ||||
| }}, | }}, | ||||
| 'z': {'exts': { | 'z': {'exts': { | ||||
| '_kernel': {'dir': 'x86_64', 'kernel': 'zgemm3m_kernel_4x8_nehalem.S', | |||||
| '_kernel': {'dir': 'x86_64', 'kernel': 'zgemm3m_kernel_4x4_haswell.c', | |||||
| 'addl': ['-DICOPY', '-UUSE_ALPHA']}, | 'addl': ['-DICOPY', '-UUSE_ALPHA']}, | ||||
| '_oncopyb': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_4.c', | '_oncopyb': {'dir': 'generic', 'kernel': 'zgemm3m_ncopy_4.c', | ||||
| 'addl': ['-DUSE_ALPHA']}, | 'addl': ['-DUSE_ALPHA']}, | ||||
| @@ -592,8 +592,8 @@ base_kops = [ | |||||
| '_oncopy': {'dir': 'generic', 'kernel': 'gemm_ncopy_4.c'}, | '_oncopy': {'dir': 'generic', 'kernel': 'gemm_ncopy_4.c'}, | ||||
| '_otcopy': {'dir': 'generic', 'kernel': 'gemm_tcopy_4.c'}, | '_otcopy': {'dir': 'generic', 'kernel': 'gemm_tcopy_4.c'}, | ||||
| # TODO(rg): direct and direct_performant are built only conditionally | # TODO(rg): direct and direct_performant are built only conditionally | ||||
| '_direct': {'dir': 'x86_64', 'kernel': 'sgemm_direct_skylakex.c'}, | |||||
| '_direct_performant': {'dir': 'x86_64', 'kernel': 'sgemm_direct_performant.c'}, | |||||
| # '_direct': {'dir': 'x86_64', 'kernel': 'sgemm_direct_skylakex.c'}, | |||||
| # '_direct_performant': {'dir': 'x86_64', 'kernel': 'sgemm_direct_performant.c'}, | |||||
| }}, | }}, | ||||
| 'd': {'exts': { | 'd': {'exts': { | ||||
| '_beta': {'dir': 'generic', 'kernel': 'gemm_beta.c'}, | '_beta': {'dir': 'generic', 'kernel': 'gemm_beta.c'}, | ||||
| @@ -1159,6 +1159,9 @@ foreach _kop : base_kops | |||||
| endif | endif | ||||
| endif | endif | ||||
| foreach mode, details : modes | foreach mode, details : modes | ||||
| if mode == 'x' or mode == 'q' | |||||
| continue | |||||
| endif | |||||
| # Generally, one list is required for each foreach | # Generally, one list is required for each foreach | ||||
| __cargs = _cargs + _ckop_args | __cargs = _cargs + _ckop_args | ||||
| prec_mode = precision_mappings[mode] | prec_mode = precision_mappings[mode] | ||||
| @@ -1237,13 +1240,16 @@ _kern_deps = [] | |||||
| _is_asm = false | _is_asm = false | ||||
| foreach conf: kernel_confs | foreach conf: kernel_confs | ||||
| if '.s' in conf['src'] or '.S' in conf['src'] | if '.s' in conf['src'] or '.S' in conf['src'] | ||||
| message(conf) | |||||
| _is_asm = true | _is_asm = true | ||||
| endif | endif | ||||
| if _is_asm | |||||
| # TODO(rg): This should be for all _is_asm | |||||
| if 'dgemm_kernel' in conf['name'] | |||||
| message(conf) | |||||
| # See https://github.com/mesonbuild/meson/discussions/13374 | # See https://github.com/mesonbuild/meson/discussions/13374 | ||||
| _kern_deps += declare_dependency( | _kern_deps += declare_dependency( | ||||
| sources: conf['src'], | sources: conf['src'], | ||||
| include_directories: _inc, | |||||
| include_directories: _inc, | |||||
| compile_args: conf['c_args'], | compile_args: conf['c_args'], | ||||
| ) | ) | ||||
| else | else | ||||