| @@ -158,6 +158,173 @@ driver_kops = [ | |||
| '_LN', '_LT']}, | |||
| } | |||
| }, | |||
| { 'base': '?hemm', | |||
| 'sources': { | |||
| 'zhemm_k.c': {'mode': ['c', 'z'], # 'x'], | |||
| # TODO(rg): Do we need ../../param.h ? | |||
| # See Makefile:1612 | |||
| 'exts': ['_LU', '_LL', | |||
| '_RU', '_RL']}, | |||
| } | |||
| }, | |||
| { 'base': '?hemm_thread', | |||
| 'sources': { | |||
| 'zhemm_k.c': {'mode': ['c', 'z'], # 'x'], | |||
| 'addl': ['-DTHREADED_LEVEL3'], | |||
| 'exts': ['_LU', '_LL', | |||
| '_RU', '_RL']}, | |||
| } | |||
| }, | |||
| { 'base': '?herk', | |||
| 'sources': { | |||
| 'zherk_k.c': {'mode': ['c', 'z'], # 'x'], | |||
| 'addl': ['-DHERK'], | |||
| # TODO(rg): Do we need ../../common.h ? | |||
| # See Makefile:1684 | |||
| 'exts': ['_UN', '_UC', | |||
| '_LN', '_LC']}, | |||
| } | |||
| }, | |||
| { 'base': '?herk_kernel', | |||
| 'sources': { | |||
| 'zherk_kernel.c': {'mode': ['c', 'z'], # 'x'], | |||
| 'addl': ['-DHERK'], | |||
| 'exts': ['_UN', '_UC', | |||
| '_LN', '_LC']}, | |||
| } | |||
| }, | |||
| { 'base': '?herk_thread', | |||
| 'sources': { | |||
| 'zherk_k.c': {'mode': ['c', 'z'], # 'x'], | |||
| 'addl': ['-DHERK', '-DTHREADED_LEVEL3'], | |||
| 'exts': ['_UN', '_UC', | |||
| '_LN', '_LC']}, | |||
| } | |||
| }, | |||
| { 'base': '?her2k', | |||
| 'sources': { | |||
| 'zher2k_k.c': {'mode': ['c', 'z'], # 'x'], | |||
| 'addl': ['-DHER2K'], | |||
| # TODO(rg): Do we need ../../common.h ? | |||
| # See Makefile:1793 | |||
| 'exts': ['_UN', '_UC', | |||
| '_LN', '_LC']}, | |||
| } | |||
| }, | |||
| { 'base': '?her2k_kernel', | |||
| 'sources': { | |||
| 'zher2k_kernel.c': {'mode': ['c', 'z'], # 'x'], | |||
| # TODO(rg): Do we need ../../common.h ? | |||
| # See Makefile:1793 | |||
| 'exts': ['_UN', '_UC', | |||
| '_LN', '_LC']}, | |||
| } | |||
| }, | |||
| { 'base': '?gemm3m', | |||
| 'sources': { | |||
| 'gemm3m.c': {'mode': ['c', 'z',],# 'x'], | |||
| 'srcs': ['level3.c'], | |||
| 'exts': ['_nn', '_nt', | |||
| '_nr', '_nc', | |||
| '_tn', '_tt', | |||
| '_tr', '_tc', | |||
| '_rn', '_rt', | |||
| '_rr', '_rc', | |||
| '_cn', '_ct', | |||
| '_cr', '_cc']}, | |||
| } | |||
| }, | |||
| # { 'base': '?gemmf', | |||
| # 'sources': { | |||
| # # TODO(rg): This in the makefile:4401 but the file isn't there.. | |||
| # 'zgemmf.c': {'mode': ['c', 'z',],# 'x'], | |||
| # 'srcs': ['level3.c'], | |||
| # 'exts': ['']}, | |||
| # } | |||
| # }, | |||
| { 'base': '?gemm3m_thread', | |||
| 'sources': { | |||
| 'gemm3m.c': {'mode': ['c', 'z',],# 'x'], | |||
| 'addl': ['-DTHREADED_LEVEL3'], | |||
| 'srcs': ['level3.c'], | |||
| 'exts': ['_nn', '_nt', | |||
| '_nr', '_nc', | |||
| '_tn', '_tt', | |||
| '_tr', '_tc', | |||
| '_rn', '_rt', | |||
| '_rr', '_rc', | |||
| '_cn', '_ct', | |||
| '_cr', '_cc']}, | |||
| } | |||
| }, | |||
| { 'base': '?symm3m', | |||
| 'sources': { | |||
| 'symm3m_k.c': {'mode': ['c', 'z'], # 'x'], | |||
| 'exts': ['_LU', '_LL', | |||
| '_RU', '_RL']}, | |||
| } | |||
| }, | |||
| { 'base': '?symm3m_thread', | |||
| 'sources': { | |||
| 'symm3m_k.c': {'mode': ['c', 'z'], # 'x'], | |||
| 'addl': ['-DTHREADED_LEVEL3'], | |||
| 'exts': ['_LU', '_LL', | |||
| '_RU', '_RL']}, | |||
| } | |||
| }, | |||
| { 'base': '?hemm3m', | |||
| 'sources': { | |||
| 'hemm3m_k.c': {'mode': ['c', 'z'], # 'x'], | |||
| 'exts': ['_LU', '_LL', | |||
| '_RU', '_RL']}, | |||
| } | |||
| }, | |||
| { 'base': '?hemm3m_thread', | |||
| 'sources': { | |||
| 'hemm3m_k.c': {'mode': ['c', 'z'], # 'x'], | |||
| 'addl': ['-DTHREADED_LEVEL3'], | |||
| 'exts': ['_LU', '_LL', | |||
| '_RU', '_RL']}, | |||
| } | |||
| }, | |||
| { 'base': '?trsm', | |||
| 'sources': { | |||
| 'trsm_L.c': {'mode': ['s', 'd'],# 'q'], | |||
| 'exts': ['_LNUU', '_LNUN', | |||
| '_LNLU', '_LNLN', | |||
| '_LTUU', '_LTUN', | |||
| '_LTLU', '_LTLN']}, | |||
| 'trsm_R.c': {'mode': ['s', 'd'],# 'q'], | |||
| 'exts': ['_RNUU', '_RNUN', | |||
| '_RNLU', '_RNLN', | |||
| '_RTUU', '_RTUN', | |||
| '_RTLU', '_RTLN']}, | |||
| } | |||
| }, | |||
| { 'base': '?trsm', | |||
| 'sources': { | |||
| 'trsm_L.c': {'mode': ['c', 'z'],# 'x'], | |||
| 'exts': ['_LNUU', '_LNUN', | |||
| '_LNLU', '_LNLN', | |||
| '_LTUU', '_LTUN', | |||
| '_LTLU', '_LTLN', | |||
| '_LRUU', '_LRUN', | |||
| '_LRLU', '_LRLN', | |||
| '_LCUU', '_LCUN', | |||
| '_LCLU', '_LCLN', | |||
| ]}, | |||
| 'trsm_R.c': {'mode': ['c', 'z'],# 'x'], | |||
| 'exts': ['_RNUU', '_RNUN', | |||
| '_RNLU', '_RNLN', | |||
| '_RTUU', '_RTUN', | |||
| '_RTLU', '_RTLN', | |||
| '_RRUU', '_RRUN', | |||
| '_RRLU', '_RRLN', | |||
| '_RCUU', '_RCUN', | |||
| '_RCLU', '_RCLN', | |||
| ]}, | |||
| } | |||
| }, | |||
| ] | |||
| # Initialize kernel configurations list | |||
| @@ -349,6 +349,7 @@ _blas_roots = [ | |||
| }, | |||
| { 'base': '?herk', '_types': ['c', 'z', 'x'], | |||
| 'fname': 'syrk.c', | |||
| 'addl': ['-DHEMM'], | |||
| 'cblas': true, | |||
| }, | |||
| { 'base': '?her2k', '_types': ['c', 'z', 'x'], | |||
| @@ -556,102 +557,106 @@ _blas_roots = [ | |||
| _interface_libs = [] | |||
| foreach conf : _blas_roots | |||
| foreach type : conf['_types'] | |||
| if 'q' in type or 'x' in type | |||
| # TODO: Figure out when to build these | |||
| # These are the XDOUBLE symbols | |||
| continue | |||
| endif | |||
| # Seed with common args | |||
| compiler_args = _cargs + interface_args | |||
| # Generate the symbol flags | |||
| base = conf['base'] | |||
| if symb_defs.has_key(base) | |||
| symb_base = symb_defs[base] | |||
| if symb_base.has_key('def') | |||
| foreach _d : symb_base['def'] | |||
| compiler_args += ('-D' + _d) | |||
| endforeach | |||
| endif | |||
| if symb_base.has_key('undef') | |||
| foreach _u : symb_base['undef'] | |||
| compiler_args += ('-U' + _u) | |||
| endforeach | |||
| endif | |||
| endif | |||
| # Set the type arguments | |||
| if precision_mappings.get(type).has_key('def') | |||
| foreach d : precision_mappings[type]['def'] | |||
| compiler_args += ['-D' + d] | |||
| foreach type : conf['_types'] | |||
| if 'q' in type or 'x' in type | |||
| # TODO: Figure out when to build these | |||
| # These are the XDOUBLE symbols | |||
| continue | |||
| endif | |||
| # Seed with common args | |||
| compiler_args = _cargs + interface_args | |||
| # Generate the symbol flags | |||
| base = conf['base'] | |||
| if symb_defs.has_key(base) | |||
| symb_base = symb_defs[base] | |||
| if symb_base.has_key('def') | |||
| foreach _d : symb_base['def'] | |||
| compiler_args += ('-D' + _d) | |||
| endforeach | |||
| endif | |||
| if precision_mappings.get(type).has_key('undef') | |||
| foreach u : precision_mappings[type]['undef'] | |||
| compiler_args += ['-U' + u] | |||
| if symb_base.has_key('undef') | |||
| foreach _u : symb_base['undef'] | |||
| compiler_args += ('-U' + _u) | |||
| endforeach | |||
| endif | |||
| endif | |||
| # Set the type arguments | |||
| if precision_mappings.get(type).has_key('def') | |||
| foreach d : precision_mappings[type]['def'] | |||
| compiler_args += ['-D' + d] | |||
| endforeach | |||
| endif | |||
| if precision_mappings.get(type).has_key('undef') | |||
| foreach u : precision_mappings[type]['undef'] | |||
| compiler_args += ['-U' + u] | |||
| endforeach | |||
| endif | |||
| # Construct the actual symbol names, and mangled symbols | |||
| # TODO: This might be conditional on other options | |||
| sym_name = conf['base'].replace('?', type) | |||
| sym_underscored = f'@sym_name@_' | |||
| if conf.get('cblas', false) | |||
| cblas_sym_name = 'cblas_' + sym_name | |||
| cblas_sym_underscored = f'@cblas_sym_name@_' | |||
| endif | |||
| if conf.has_key('addl') | |||
| compiler_args += conf['addl'] | |||
| endif | |||
| # Construct conditionals | |||
| if conf.has_key('def') | |||
| foreach d : conf['def'] | |||
| compiler_args += ['-D' + d] | |||
| endforeach | |||
| foreach u : conf['undef'] | |||
| compiler_args += ['-U' + u] | |||
| endforeach | |||
| endif | |||
| # Construct the actual symbol names, and mangled symbols | |||
| # TODO: This might be conditional on other options | |||
| sym_name = conf['base'].replace('?', type) | |||
| sym_underscored = f'@sym_name@_' | |||
| if conf.get('cblas', false) | |||
| cblas_sym_name = 'cblas_' + sym_name | |||
| cblas_sym_underscored = f'@cblas_sym_name@_' | |||
| endif | |||
| # Make mangled symbols | |||
| # TODO: This might be conditional on other options | |||
| # Construct conditionals | |||
| if conf.has_key('def') | |||
| foreach d : conf['def'] | |||
| compiler_args += ['-D' + d] | |||
| endforeach | |||
| foreach u : conf['undef'] | |||
| compiler_args += ['-U' + u] | |||
| endforeach | |||
| endif | |||
| # Create the static library for each symbol | |||
| lib = static_library( | |||
| sym_name, | |||
| # Make mangled symbols | |||
| # TODO: This might be conditional on other options | |||
| # Create the static library for each symbol | |||
| lib = static_library( | |||
| sym_name, | |||
| sources: conf['fname'], | |||
| include_directories: _inc, | |||
| c_args: compiler_args + [ | |||
| f'-DASMNAME=@sym_name@', | |||
| f'-DASMFNAME=@sym_underscored@', | |||
| f'-DNAME=@sym_underscored@', | |||
| f'-DCNAME=@sym_name@', | |||
| f'-DCHAR_NAME="@sym_underscored@"', | |||
| f'-DCHAR_CNAME="@sym_name@"' | |||
| ] | |||
| ) | |||
| _interface_libs += lib | |||
| # If it's a CBLAS symbol, also create that | |||
| if conf.get('cblas', false) | |||
| if 'q' in type or 'x' in type | |||
| # There are no cblas_q symbols | |||
| # TODO: Handle edge cases around dz zd sc | |||
| continue | |||
| endif | |||
| cblas_lib = static_library( | |||
| cblas_sym_name, | |||
| sources: conf['fname'], | |||
| include_directories: _inc, | |||
| c_args: compiler_args + [ | |||
| f'-DASMNAME=@sym_name@', | |||
| f'-DASMFNAME=@sym_underscored@', | |||
| f'-DNAME=@sym_underscored@', | |||
| f'-DCNAME=@sym_name@', | |||
| f'-DCHAR_NAME="@sym_underscored@"', | |||
| f'-DCHAR_CNAME="@sym_name@"' | |||
| '-DCBLAS', | |||
| f'-DASMNAME=@cblas_sym_name@', | |||
| f'-DASMFNAME=@cblas_sym_underscored@', | |||
| f'-DNAME=@cblas_sym_underscored@', | |||
| f'-DCNAME=@cblas_sym_name@', | |||
| f'-DCHAR_NAME="@cblas_sym_underscored@"', | |||
| f'-DCHAR_CNAME="@cblas_sym_name@"' | |||
| ] | |||
| ) | |||
| _interface_libs += lib | |||
| # If it's a CBLAS symbol, also create that | |||
| if conf.get('cblas', false) | |||
| if 'q' in type or 'x' in type | |||
| # There are no cblas_q symbols | |||
| # TODO: Handle edge cases around dz zd sc | |||
| continue | |||
| endif | |||
| cblas_lib = static_library( | |||
| cblas_sym_name, | |||
| sources: conf['fname'], | |||
| include_directories: _inc, | |||
| c_args: compiler_args + [ | |||
| '-DCBLAS', | |||
| f'-DASMNAME=@cblas_sym_name@', | |||
| f'-DASMFNAME=@cblas_sym_underscored@', | |||
| f'-DNAME=@cblas_sym_underscored@', | |||
| f'-DCNAME=@cblas_sym_name@', | |||
| f'-DCHAR_NAME="@cblas_sym_underscored@"', | |||
| f'-DCHAR_CNAME="@cblas_sym_name@"' | |||
| ] | |||
| ) | |||
| _interface_libs += cblas_lib | |||
| endif | |||
| _interface_libs += cblas_lib | |||
| endif | |||
| endforeach | |||
| endforeach | |||
| @@ -268,7 +268,9 @@ ext_mappings = { | |||
| # '_V': {'def': ['XCONJ'], 'undef': ['CONJ']}, | |||
| '_D': {'def': ['CONJ', 'XCONJ']}, | |||
| '_L': {'def': ['LOWER']}, | |||
| '_LN': {'def': ['LEFT'], 'undef': ['TRANSA'], 'except': ['?syrk', '?syrk_thread', '?syr2k']}, | |||
| '_LN': {'def': ['LEFT'], 'undef': ['TRANSA'], | |||
| 'except': ['?syrk', '?syrk_thread', | |||
| '?syr2k', '?herk', '?herk_kernel']}, | |||
| # Handle HEMV and HEMVREV better | |||
| '_V': {'def': ['HEMV', 'HEMVREV', 'XCONJ'], 'undef': ['LOWER', 'CONJ']}, | |||
| '_M': {'def': ['HEMV', 'HEMVREV', 'LOWER']}, | |||
| @@ -299,10 +301,13 @@ ext_mappings = { | |||
| # Level 3 symbols | |||
| '_LU': {'def': ['NN'], 'undef': ['LOWER', 'RSIDE']}, | |||
| '_LL': {'def': ['LOWER', 'NN'], 'undef': ['RSIDE']}, | |||
| '_RU': {'def': ['RSIDE', 'NN'], 'undef': ['LOWER']}, | |||
| '_RL': {'def': ['RSIDE', 'NN', 'LOWER']}, | |||
| '_UN': {'undef': ['TRANS', 'LOWER'], 'except': ['?syrk']}, | |||
| '_RU': {'def': ['RSIDE', 'NN'], 'undef': ['LOWER'], 'except': ['?hemm', '?hemm_thread']}, | |||
| '_RL': {'def': ['RSIDE', 'NN', 'LOWER'], 'except': ['?hemm', '?hemm_thread']}, | |||
| # TODO(rg): is CONJ OK for interface symbols? | |||
| '_UN': {'undef': ['TRANS', 'LOWER', 'CONJ'], 'except': ['?syrk']}, | |||
| '_UT': {'def': ['TRANS'], 'undef': ['LOWER'], 'except': ['?syrk']}, | |||
| '_UC': {'def': ['TRANS', 'CONJ'], 'undef': ['LOWER']}, | |||
| '_LC': {'def': ['LOWER', 'TRANS', 'CONJ']}, | |||
| } | |||
| ext_mappings_l2 = [ | |||
| @@ -388,8 +393,10 @@ ext_mappings_l3 = [ | |||
| # syrk | |||
| {'ext': '_UN', 'def': [], 'undef': ['LOWER', 'TRANS'], 'for': ['s', 'd', 'c', 'z']}, | |||
| {'ext': '_UT', 'def': ['TRANS'], 'undef': ['LOWER'], 'for': ['s', 'd', 'c', 'z']}, | |||
| {'ext': '_LN', 'def': ['LOWER'], 'undef': ['TRANS'], 'for': ['s', 'd', 'c', 'z']}, | |||
| {'ext': '_LN', 'def': ['LOWER'], 'undef': ['TRANS', 'CONJ'], 'for': ['s', 'd', 'c', 'z']}, | |||
| {'ext': '_LT', 'def': ['TRANS', 'LOWER'], 'for': ['s', 'd', 'c', 'z']}, | |||
| {'ext': '_RU', 'def': ['RSIDE', 'NC'], 'undef': ['LOWER'], 'for': ['c', 'z']}, | |||
| {'ext': '_RL', 'def': ['RSIDE', 'NC', 'LOWER'], 'for': ['c', 'z']}, | |||
| ] | |||
| # cc -c -O2 -DSMALL_MATRIX_OPT -DMAX_STACK_ALLOC=2048 -Wall -m64 -DF_INTERFACE_GFORT -fPIC -DSMP_SERVER -DNO_WARMUP -DMAX_CPU_NUMBER=12 -DMAX_PARALLEL_NUMBER=1 -DBUILD_SINGLE=1 -DBUILD_DOUBLE=1 -DBUILD_COMPLEX=1 -DBUILD_COMPLEX16=1 -DVERSION=\"0.3.26.dev\" -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mavx2 -UASMNAME -UASMFNAME -UNAME -UCNAME -UCHAR_NAME -UCHAR_CNAME -DASMNAME=strmm_RTUU -DASMFNAME=strmm_RTUU_ -DNAME=strmm_RTUU_ -DCNAME=strmm_RTUU -DCHAR_NAME=\"strmm_RTUU_\" -DCHAR_CNAME=\"strmm_RTUU\" -DNO_AFFINITY -I../.. -UDOUBLE -UCOMPLEX -UCOMPLEX -UDOUBLE -DTRANSA -DUPPER -DUNIT trmm_R.c -o strmm_RTUU.o | |||
| @@ -409,7 +416,6 @@ symb_defs = { | |||
| '?geru': {'undef': ['CONJ']}, | |||
| '?gerc': {'def': ['CONJ']}, | |||
| '?hemm': {'def': ['HEMM']}, | |||
| '?herk': {'def': ['HEMM']}, | |||
| '?her2k': {'def': ['HEMM']}, | |||
| '?gemm3m': {'def': ['GEMM3M']}, | |||
| '?symm3m': {'def': ['GEMM3M']}, | |||
| @@ -428,6 +434,7 @@ _inc = [include_directories('.')] | |||
| subdir('interface') | |||
| subdir('driver/level2') | |||
| subdir('driver/level3') | |||
| # subdir('driver/others') | |||
| subdir('kernel') | |||
| _openblas = static_library('openblas', | |||