You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 58 kB

7 years ago
7 years ago
7 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* Copyright 2023, 2025 The OpenBLAS Project. */
  4. /* All rights reserved. */
  5. /* */
  6. /* Redistribution and use in source and binary forms, with or */
  7. /* without modification, are permitted provided that the following */
  8. /* conditions are met: */
  9. /* */
  10. /* 1. Redistributions of source code must retain the above */
  11. /* copyright notice, this list of conditions and the following */
  12. /* disclaimer. */
  13. /* */
  14. /* 2. Redistributions in binary form must reproduce the above */
  15. /* copyright notice, this list of conditions and the following */
  16. /* disclaimer in the documentation and/or other materials */
  17. /* provided with the distribution. */
  18. /* */
  19. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  20. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  21. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  22. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  23. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  24. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  25. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  26. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  27. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  28. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  29. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  30. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  31. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  32. /* POSSIBILITY OF SUCH DAMAGE. */
  33. /* */
  34. /* The views and conclusions contained in the software and */
  35. /* documentation are those of the authors and should not be */
  36. /* interpreted as representing official policies, either expressed */
  37. /* or implied, of The University of Texas at Austin. */
  38. /*********************************************************************/
  39. #include <stdio.h>
  40. #include <string.h>
  41. #include "common.h"
  42. #ifdef BUILD_KERNEL
  43. #include "kernelTS.h"
  44. #endif
  45. #undef DEBUG
  46. static void init_parameter(void);
  47. gotoblas_t TABLE_NAME = {
  48. DTB_DEFAULT_ENTRIES,
  49. SWITCH_RATIO,
  50. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  51. #ifdef BUILD_BFLOAT16_ONLY
  52. 0, 0, 0,
  53. BGEMM_DEFAULT_UNROLL_M, BGEMM_DEFAULT_UNROLL_N,
  54. #ifdef BGEMM_DEFAULT_UNROLL_MN
  55. BGEMM_DEFAULT_UNROLL_MN,
  56. #else
  57. MAX(BGEMM_DEFAULT_UNROLL_M, BGEMM_DEFAULT_UNROLL_N),
  58. #endif
  59. BGEMM_ALIGN_K,
  60. bgemm_kernelTS, bgemm_betaTS,
  61. bgemm_incopyTS, bgemm_itcopyTS,
  62. bgemm_oncopyTS, bgemm_otcopyTS,
  63. #endif
  64. #ifdef BUILD_BFLOAT16
  65. 0, 0, 0,
  66. SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,
  67. #ifdef SBGEMM_DEFAULT_UNROLL_MN
  68. SBGEMM_DEFAULT_UNROLL_MN,
  69. #else
  70. MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
  71. #endif
  72. SBGEMM_ALIGN_K,
  73. 0, // need_amxtile_permission
  74. sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  75. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  76. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  77. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
  78. dsdot_kTS,
  79. srot_kTS, srotm_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  80. sbgemv_nTS, sbgemv_tTS, sger_kTS,
  81. ssymv_LTS, ssymv_UTS,
  82. sbgemm_kernelTS, sbgemm_betaTS,
  83. #if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N
  84. sbgemm_incopyTS, sbgemm_itcopyTS,
  85. #else
  86. sbgemm_oncopyTS, sbgemm_otcopyTS,
  87. #endif
  88. sbgemm_oncopyTS, sbgemm_otcopyTS,
  89. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  90. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  91. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  92. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  93. #else
  94. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  95. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  96. #endif
  97. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  98. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  99. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  100. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  101. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  102. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  103. #else
  104. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  105. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  106. #endif
  107. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  108. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  109. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  110. ssymm_iutcopyTS, ssymm_iltcopyTS,
  111. #else
  112. ssymm_outcopyTS, ssymm_oltcopyTS,
  113. #endif
  114. ssymm_outcopyTS, ssymm_oltcopyTS,
  115. #ifndef NO_LAPACK
  116. sneg_tcopyTS, slaswp_ncopyTS,
  117. #else
  118. NULL,NULL,
  119. #endif
  120. #ifdef SMALL_MATRIX_OPT
  121. sbgemm_small_matrix_permitTS,
  122. sbgemm_small_kernel_nnTS, sbgemm_small_kernel_ntTS, sbgemm_small_kernel_tnTS, sbgemm_small_kernel_ttTS,
  123. sbgemm_small_kernel_b0_nnTS, sbgemm_small_kernel_b0_ntTS, sbgemm_small_kernel_b0_tnTS, sbgemm_small_kernel_b0_ttTS,
  124. #endif
  125. #endif
  126. #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  127. 0, 0, 0,
  128. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  129. #ifdef SGEMM_DEFAULT_UNROLL_MN
  130. SGEMM_DEFAULT_UNROLL_MN,
  131. #else
  132. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  133. #endif
  134. #endif
  135. #ifdef HAVE_EXCLUSIVE_CACHE
  136. 1,
  137. #else
  138. 0,
  139. #endif
  140. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  141. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  142. #endif
  143. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  144. isamax_kTS,
  145. #endif
  146. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  147. isamin_kTS, ismax_kTS, ismin_kTS,
  148. snrm2_kTS, sasum_kTS,
  149. #endif
  150. #if BUILD_SINGLE == 1
  151. ssum_kTS,
  152. #endif
  153. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  154. scopy_kTS, sdot_kTS,
  155. // dsdot_kTS,
  156. srot_kTS, srotm_kTS, saxpy_kTS,
  157. #endif
  158. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  159. sscal_kTS,
  160. #endif
  161. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  162. sswap_kTS,
  163. sgemv_nTS, sgemv_tTS,
  164. #endif
  165. #if BUILD_SINGLE == 1
  166. sger_kTS,
  167. #endif
  168. #if BUILD_SINGLE == 1
  169. ssymv_LTS, ssymv_UTS,
  170. #endif
  171. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  172. #ifdef ARCH_X86_64
  173. sgemm_directTS,
  174. sgemm_direct_performantTS,
  175. #endif
  176. #ifdef ARCH_ARM64
  177. sgemm_directTS,
  178. #endif
  179. sgemm_kernelTS, sgemm_betaTS,
  180. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  181. sgemm_incopyTS, sgemm_itcopyTS,
  182. #else
  183. sgemm_oncopyTS, sgemm_otcopyTS,
  184. #endif
  185. sgemm_oncopyTS, sgemm_otcopyTS,
  186. #endif
  187. #if BUILD_SINGLE == 1 || BUILD_DOUBLE == 1 || BUILD_COMPLEX == 1
  188. #ifdef SMALL_MATRIX_OPT
  189. sgemm_small_matrix_permitTS,
  190. sgemm_small_kernel_nnTS, sgemm_small_kernel_ntTS, sgemm_small_kernel_tnTS, sgemm_small_kernel_ttTS,
  191. sgemm_small_kernel_b0_nnTS, sgemm_small_kernel_b0_ntTS, sgemm_small_kernel_b0_tnTS, sgemm_small_kernel_b0_ttTS,
  192. #endif
  193. #endif
  194. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX == 1)
  195. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  196. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  197. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  198. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  199. #else
  200. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  201. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  202. #endif
  203. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  204. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  205. #endif
  206. #if (BUILD_SINGLE==1)
  207. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  208. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  209. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  210. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  211. #else
  212. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  213. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  214. #endif
  215. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  216. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  217. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  218. ssymm_iutcopyTS, ssymm_iltcopyTS,
  219. #else
  220. ssymm_outcopyTS, ssymm_oltcopyTS,
  221. #endif
  222. ssymm_outcopyTS, ssymm_oltcopyTS,
  223. #ifndef NO_LAPACK
  224. sneg_tcopyTS, slaswp_ncopyTS,
  225. #else
  226. NULL,NULL,
  227. #endif
  228. #endif
  229. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  230. 0, 0, 0,
  231. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  232. #ifdef DGEMM_DEFAULT_UNROLL_MN
  233. DGEMM_DEFAULT_UNROLL_MN,
  234. #else
  235. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  236. #endif
  237. #endif
  238. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  239. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  240. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  241. dnrm2_kTS, dasum_kTS,
  242. #endif
  243. #if (BUILD_DOUBLE==1)
  244. dsum_kTS,
  245. #endif
  246. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  247. dcopy_kTS, ddot_kTS,
  248. #endif
  249. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  250. dsdot_kTS,
  251. #endif
  252. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  253. drot_kTS,
  254. drotm_kTS,
  255. daxpy_kTS,
  256. dscal_kTS,
  257. dswap_kTS,
  258. dgemv_nTS, dgemv_tTS,
  259. #endif
  260. #if (BUILD_DOUBLE==1)
  261. dger_kTS,
  262. dsymv_LTS, dsymv_UTS,
  263. #endif
  264. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  265. dgemm_kernelTS, dgemm_betaTS,
  266. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  267. dgemm_incopyTS, dgemm_itcopyTS,
  268. #else
  269. dgemm_oncopyTS, dgemm_otcopyTS,
  270. #endif
  271. dgemm_oncopyTS, dgemm_otcopyTS,
  272. #endif
  273. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  274. #ifdef SMALL_MATRIX_OPT
  275. dgemm_small_matrix_permitTS,
  276. dgemm_small_kernel_nnTS, dgemm_small_kernel_ntTS, dgemm_small_kernel_tnTS, dgemm_small_kernel_ttTS,
  277. dgemm_small_kernel_b0_nnTS, dgemm_small_kernel_b0_ntTS, dgemm_small_kernel_b0_tnTS, dgemm_small_kernel_b0_ttTS,
  278. #endif
  279. #endif
  280. #if (BUILD_DOUBLE==1)
  281. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  282. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  283. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  284. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  285. #else
  286. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  287. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  288. #endif
  289. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  290. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  291. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  292. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  293. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  294. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  295. #else
  296. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  297. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  298. #endif
  299. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  300. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  301. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  302. dsymm_iutcopyTS, dsymm_iltcopyTS,
  303. #else
  304. dsymm_outcopyTS, dsymm_oltcopyTS,
  305. #endif
  306. dsymm_outcopyTS, dsymm_oltcopyTS,
  307. #ifndef NO_LAPACK
  308. dneg_tcopyTS, dlaswp_ncopyTS,
  309. #else
  310. NULL, NULL,
  311. #endif
  312. #endif
  313. #ifdef EXPRECISION
  314. 0, 0, 0,
  315. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  316. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  317. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  318. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  319. qrot_kTS, qrotm_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  320. qgemv_nTS, qgemv_tTS, qger_kTS,
  321. qsymv_LTS, qsymv_UTS,
  322. qgemm_kernelTS, qgemm_betaTS,
  323. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  324. qgemm_incopyTS, qgemm_itcopyTS,
  325. #else
  326. qgemm_oncopyTS, qgemm_otcopyTS,
  327. #endif
  328. qgemm_oncopyTS, qgemm_otcopyTS,
  329. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  330. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  331. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  332. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  333. #else
  334. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  335. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  336. #endif
  337. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  338. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  339. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  340. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  341. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  342. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  343. #else
  344. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  345. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  346. #endif
  347. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  348. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  349. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  350. qsymm_iutcopyTS, qsymm_iltcopyTS,
  351. #else
  352. qsymm_outcopyTS, qsymm_oltcopyTS,
  353. #endif
  354. qsymm_outcopyTS, qsymm_oltcopyTS,
  355. #ifndef NO_LAPACK
  356. qneg_tcopyTS, qlaswp_ncopyTS,
  357. #else
  358. NULL, NULL,
  359. #endif
  360. #endif
  361. #if (BUILD_COMPLEX)
  362. 0, 0, 0,
  363. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  364. #ifdef CGEMM_DEFAULT_UNROLL_MN
  365. CGEMM_DEFAULT_UNROLL_MN,
  366. #else
  367. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  368. #endif
  369. #if (BUILD_COMPLEX)
  370. camax_kTS, camin_kTS,
  371. #endif
  372. #if (BUILD_COMPLEX)
  373. icamax_kTS,
  374. #endif
  375. #if (BUILD_COMPLEX)
  376. icamin_kTS,
  377. cnrm2_kTS, casum_kTS, csum_kTS,
  378. #endif
  379. #if (BUILD_COMPLEX)
  380. ccopy_kTS, cdotu_kTS, cdotc_kTS,
  381. #endif
  382. #if (BUILD_COMPLEX)
  383. csrot_kTS,
  384. #endif
  385. #if (BUILD_COMPLEX)
  386. caxpy_kTS,
  387. caxpyc_kTS,
  388. cscal_kTS,
  389. cswap_kTS,
  390. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  391. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  392. #endif
  393. #if (BUILD_COMPLEX)
  394. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  395. csymv_LTS, csymv_UTS,
  396. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  397. #endif
  398. #if (BUILD_COMPLEX)
  399. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  400. cgemm_betaTS,
  401. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  402. cgemm_incopyTS, cgemm_itcopyTS,
  403. #else
  404. cgemm_oncopyTS, cgemm_otcopyTS,
  405. #endif
  406. cgemm_oncopyTS, cgemm_otcopyTS,
  407. #ifdef SMALL_MATRIX_OPT
  408. cgemm_small_matrix_permitTS,
  409. cgemm_small_kernel_nnTS, cgemm_small_kernel_ntTS, cgemm_small_kernel_nrTS, cgemm_small_kernel_ncTS,
  410. cgemm_small_kernel_tnTS, cgemm_small_kernel_ttTS, cgemm_small_kernel_trTS, cgemm_small_kernel_tcTS,
  411. cgemm_small_kernel_rnTS, cgemm_small_kernel_rtTS, cgemm_small_kernel_rrTS, cgemm_small_kernel_rcTS,
  412. cgemm_small_kernel_cnTS, cgemm_small_kernel_ctTS, cgemm_small_kernel_crTS, cgemm_small_kernel_ccTS,
  413. cgemm_small_kernel_b0_nnTS, cgemm_small_kernel_b0_ntTS, cgemm_small_kernel_b0_nrTS, cgemm_small_kernel_b0_ncTS,
  414. cgemm_small_kernel_b0_tnTS, cgemm_small_kernel_b0_ttTS, cgemm_small_kernel_b0_trTS, cgemm_small_kernel_b0_tcTS,
  415. cgemm_small_kernel_b0_rnTS, cgemm_small_kernel_b0_rtTS, cgemm_small_kernel_b0_rrTS, cgemm_small_kernel_b0_rcTS,
  416. cgemm_small_kernel_b0_cnTS, cgemm_small_kernel_b0_ctTS, cgemm_small_kernel_b0_crTS, cgemm_small_kernel_b0_ccTS,
  417. #endif
  418. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  419. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  420. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  421. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  422. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  423. #else
  424. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  425. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  426. #endif
  427. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  428. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  429. #endif
  430. #endif
  431. #if (BUILD_COMPLEX)
  432. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  433. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  434. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  435. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  436. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  437. #else
  438. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  439. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  440. #endif
  441. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  442. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  443. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  444. csymm_iutcopyTS, csymm_iltcopyTS,
  445. #else
  446. csymm_outcopyTS, csymm_oltcopyTS,
  447. #endif
  448. csymm_outcopyTS, csymm_oltcopyTS,
  449. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  450. chemm_iutcopyTS, chemm_iltcopyTS,
  451. #else
  452. chemm_outcopyTS, chemm_oltcopyTS,
  453. #endif
  454. chemm_outcopyTS, chemm_oltcopyTS,
  455. 0, 0, 0,
  456. #if (USE_GEMM3M)
  457. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  458. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  459. #else
  460. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  461. #endif
  462. cgemm3m_kernelTS,
  463. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  464. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  465. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  466. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  467. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  468. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  469. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  470. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  471. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  472. csymm3m_oucopybTS, csymm3m_olcopybTS,
  473. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  474. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  475. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  476. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  477. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  478. chemm3m_oucopybTS, chemm3m_olcopybTS,
  479. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  480. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  481. #else
  482. 0, 0, 0,
  483. NULL,
  484. NULL, NULL,
  485. NULL, NULL,
  486. NULL, NULL,
  487. NULL, NULL,
  488. NULL, NULL,
  489. NULL, NULL,
  490. NULL, NULL,
  491. NULL, NULL,
  492. NULL, NULL,
  493. NULL, NULL,
  494. NULL, NULL,
  495. NULL, NULL,
  496. NULL, NULL,
  497. NULL, NULL,
  498. NULL, NULL,
  499. NULL, NULL,
  500. NULL, NULL,
  501. NULL, NULL,
  502. #endif
  503. #endif
  504. #if (BUILD_COMPLEX)
  505. #ifndef NO_LAPACK
  506. cneg_tcopyTS,
  507. claswp_ncopyTS,
  508. #else
  509. NULL, NULL,
  510. #endif
  511. #endif
  512. #if BUILD_COMPLEX16 == 1
  513. 0, 0, 0,
  514. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  515. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  516. ZGEMM_DEFAULT_UNROLL_MN,
  517. #else
  518. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  519. #endif
  520. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  521. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  522. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  523. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  524. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  525. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  526. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  527. zsymv_LTS, zsymv_UTS,
  528. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  529. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  530. zgemm_betaTS,
  531. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  532. zgemm_incopyTS, zgemm_itcopyTS,
  533. #else
  534. zgemm_oncopyTS, zgemm_otcopyTS,
  535. #endif
  536. zgemm_oncopyTS, zgemm_otcopyTS,
  537. #ifdef SMALL_MATRIX_OPT
  538. zgemm_small_matrix_permitTS,
  539. zgemm_small_kernel_nnTS, zgemm_small_kernel_ntTS, zgemm_small_kernel_nrTS, zgemm_small_kernel_ncTS,
  540. zgemm_small_kernel_tnTS, zgemm_small_kernel_ttTS, zgemm_small_kernel_trTS, zgemm_small_kernel_tcTS,
  541. zgemm_small_kernel_rnTS, zgemm_small_kernel_rtTS, zgemm_small_kernel_rrTS, zgemm_small_kernel_rcTS,
  542. zgemm_small_kernel_cnTS, zgemm_small_kernel_ctTS, zgemm_small_kernel_crTS, zgemm_small_kernel_ccTS,
  543. zgemm_small_kernel_b0_nnTS, zgemm_small_kernel_b0_ntTS, zgemm_small_kernel_b0_nrTS, zgemm_small_kernel_b0_ncTS,
  544. zgemm_small_kernel_b0_tnTS, zgemm_small_kernel_b0_ttTS, zgemm_small_kernel_b0_trTS, zgemm_small_kernel_b0_tcTS,
  545. zgemm_small_kernel_b0_rnTS, zgemm_small_kernel_b0_rtTS, zgemm_small_kernel_b0_rrTS, zgemm_small_kernel_b0_rcTS,
  546. zgemm_small_kernel_b0_cnTS, zgemm_small_kernel_b0_ctTS, zgemm_small_kernel_b0_crTS, zgemm_small_kernel_b0_ccTS,
  547. #endif
  548. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  549. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  550. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  551. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  552. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  553. #else
  554. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  555. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  556. #endif
  557. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  558. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  559. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  560. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  561. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  562. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  563. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  564. #else
  565. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  566. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  567. #endif
  568. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  569. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  570. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  571. zsymm_iutcopyTS, zsymm_iltcopyTS,
  572. #else
  573. zsymm_outcopyTS, zsymm_oltcopyTS,
  574. #endif
  575. zsymm_outcopyTS, zsymm_oltcopyTS,
  576. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  577. zhemm_iutcopyTS, zhemm_iltcopyTS,
  578. #else
  579. zhemm_outcopyTS, zhemm_oltcopyTS,
  580. #endif
  581. zhemm_outcopyTS, zhemm_oltcopyTS,
  582. 0, 0, 0,
  583. #if (USE_GEMM3M)
  584. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  585. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  586. #else
  587. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  588. #endif
  589. zgemm3m_kernelTS,
  590. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  591. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  592. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  593. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  594. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  595. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  596. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  597. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  598. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  599. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  600. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  601. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  602. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  603. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  604. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  605. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  606. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  607. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  608. #else
  609. 0, 0, 0,
  610. NULL,
  611. NULL, NULL,
  612. NULL, NULL,
  613. NULL, NULL,
  614. NULL, NULL,
  615. NULL, NULL,
  616. NULL, NULL,
  617. NULL, NULL,
  618. NULL, NULL,
  619. NULL, NULL,
  620. NULL, NULL,
  621. NULL, NULL,
  622. NULL, NULL,
  623. NULL, NULL,
  624. NULL, NULL,
  625. NULL, NULL,
  626. NULL, NULL,
  627. NULL, NULL,
  628. NULL, NULL,
  629. #endif
  630. #ifndef NO_LAPACK
  631. zneg_tcopyTS, zlaswp_ncopyTS,
  632. #else
  633. NULL, NULL,
  634. #endif
  635. #endif
  636. #ifdef EXPRECISION
  637. 0, 0, 0,
  638. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  639. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  640. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  641. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  642. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  643. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  644. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  645. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  646. xsymv_LTS, xsymv_UTS,
  647. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  648. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  649. xgemm_betaTS,
  650. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  651. xgemm_incopyTS, xgemm_itcopyTS,
  652. #else
  653. xgemm_oncopyTS, xgemm_otcopyTS,
  654. #endif
  655. xgemm_oncopyTS, xgemm_otcopyTS,
  656. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  657. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  658. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  659. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  660. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  661. #else
  662. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  663. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  664. #endif
  665. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  666. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  667. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  668. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  669. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  670. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  671. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  672. #else
  673. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  674. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  675. #endif
  676. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  677. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  678. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  679. xsymm_iutcopyTS, xsymm_iltcopyTS,
  680. #else
  681. xsymm_outcopyTS, xsymm_oltcopyTS,
  682. #endif
  683. xsymm_outcopyTS, xsymm_oltcopyTS,
  684. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  685. xhemm_iutcopyTS, xhemm_iltcopyTS,
  686. #else
  687. xhemm_outcopyTS, xhemm_oltcopyTS,
  688. #endif
  689. xhemm_outcopyTS, xhemm_oltcopyTS,
  690. 0, 0, 0,
  691. #if (USE_GEMM3M)
  692. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  693. xgemm3m_kernelTS,
  694. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  695. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  696. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  697. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  698. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  699. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  700. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  701. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  702. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  703. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  704. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  705. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  706. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  707. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  708. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  709. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  710. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  711. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  712. #else
  713. 0, 0, 0,
  714. NULL,
  715. NULL, NULL,
  716. NULL, NULL,
  717. NULL, NULL,
  718. NULL, NULL,
  719. NULL, NULL,
  720. NULL, NULL,
  721. NULL, NULL,
  722. NULL, NULL,
  723. NULL, NULL,
  724. NULL, NULL,
  725. NULL, NULL,
  726. NULL, NULL,
  727. NULL, NULL,
  728. NULL, NULL,
  729. NULL, NULL,
  730. NULL, NULL,
  731. NULL, NULL,
  732. NULL, NULL,
  733. #endif
  734. #ifndef NO_LAPACK
  735. xneg_tcopyTS, xlaswp_ncopyTS,
  736. #else
  737. NULL, NULL,
  738. #endif
  739. #endif
  740. init_parameter,
  741. SNUMOPT, DNUMOPT, QNUMOPT,
  742. #if BUILD_SINGLE == 1
  743. saxpby_kTS,
  744. #endif
  745. #if BUILD_DOUBLE == 1
  746. daxpby_kTS,
  747. #endif
  748. #if BUILD_COMPLEX == 1
  749. caxpby_kTS,
  750. #endif
  751. #if BUILD_COMPLEX16== 1
  752. zaxpby_kTS,
  753. #endif
  754. #if BUILD_SINGLE == 1
  755. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  756. #endif
  757. #if BUILD_DOUBLE== 1
  758. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  759. #endif
  760. #if BUILD_COMPLEX == 1
  761. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  762. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  763. #endif
  764. #if BUILD_COMPLEX16 == 1
  765. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  766. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  767. #endif
  768. #if BUILD_SINGLE == 1
  769. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  770. #endif
  771. #if BUILD_DOUBLE== 1
  772. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  773. #endif
  774. #if BUILD_COMPLEX== 1
  775. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  776. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  777. #endif
  778. #if BUILD_COMPLEX16==1
  779. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  780. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  781. #endif
  782. #if BUILD_SINGLE == 1
  783. sgeadd_kTS,
  784. #endif
  785. #if BUILD_DOUBLE==1
  786. dgeadd_kTS,
  787. #endif
  788. #if BUILD_COMPLEX==1
  789. cgeadd_kTS,
  790. #endif
  791. #if BUILD_COMPLEX16==1
  792. zgeadd_kTS,
  793. #endif
  794. };
  795. #if (ARCH_ARM64)
  796. static void init_parameter(void) {
  797. #if (BUILD_BFLOAT16)
  798. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  799. #endif
  800. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  801. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  802. #endif
  803. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  804. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  805. #endif
  806. #if BUILD_COMPLEX==1
  807. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  808. #endif
  809. #if BUILD_COMPLEX16==1
  810. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  811. #endif
  812. #if (BUILD_BFLOAT16)
  813. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  814. #endif
  815. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  816. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  817. #endif
  818. #if BUILD_DOUBLE== 1 || (BUILD_COMPLEX16==1)
  819. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  820. #endif
  821. #if BUILD_COMPLEX== 1
  822. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  823. #endif
  824. #if BUILD_COMPLEX16==1
  825. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  826. #endif
  827. #if (BUILD_BFLOAT16)
  828. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  829. #endif
  830. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  831. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  832. #endif
  833. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  834. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  835. #endif
  836. #if BUILD_COMPLEX==1
  837. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  838. #endif
  839. #if BUILD_COMPLEX16==1
  840. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  841. #endif
  842. #ifdef EXPRECISION
  843. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  844. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  845. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  846. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  847. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  848. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  849. #endif
  850. #if (USE_GEMM3M)
  851. #ifdef CGEMM3M_DEFAULT_P
  852. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  853. #else
  854. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  855. #endif
  856. #ifdef ZGEMM3M_DEFAULT_P
  857. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  858. #else
  859. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  860. #endif
  861. #ifdef CGEMM3M_DEFAULT_Q
  862. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  863. #else
  864. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  865. #endif
  866. #ifdef ZGEMM3M_DEFAULT_Q
  867. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  868. #else
  869. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  870. #endif
  871. #ifdef CGEMM3M_DEFAULT_R
  872. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  873. #else
  874. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  875. #endif
  876. #ifdef ZGEMM3M_DEFAULT_R
  877. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  878. #else
  879. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  880. #endif
  881. #ifdef EXPRECISION
  882. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  883. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  884. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  885. #endif
  886. #endif
  887. }
  888. #else // (ARCH_ARM64)
  889. #if defined(ARCH_MIPS64)
  890. static void init_parameter(void) {
  891. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  892. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  893. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  894. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  895. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  896. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  897. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  898. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  899. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  900. TABLE_NAME.dgemm_r = 640;
  901. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  902. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  903. #ifdef EXPRECISION
  904. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  905. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  906. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  907. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  908. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  909. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  910. #endif
  911. #if defined(USE_GEMM3M)
  912. #ifdef CGEMM3M_DEFAULT_P
  913. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  914. #else
  915. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  916. #endif
  917. #ifdef ZGEMM3M_DEFAULT_P
  918. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  919. #else
  920. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  921. #endif
  922. #ifdef CGEMM3M_DEFAULT_Q
  923. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  924. #else
  925. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  926. #endif
  927. #ifdef ZGEMM3M_DEFAULT_Q
  928. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  929. #else
  930. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  931. #endif
  932. #ifdef CGEMM3M_DEFAULT_R
  933. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  934. #else
  935. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  936. #endif
  937. #ifdef ZGEMM3M_DEFAULT_R
  938. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  939. #else
  940. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  941. #endif
  942. #ifdef EXPRECISION
  943. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  944. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  945. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  946. #endif
  947. #endif
  948. }
  949. #else // (ARCH_MIPS64)
  950. #if (ARCH_LOONGARCH64)
  951. static int get_L3_size() {
  952. int ret = 0, id = 0x14;
  953. __asm__ volatile (
  954. "cpucfg %[ret], %[id]"
  955. : [ret]"=r"(ret)
  956. : [id]"r"(id)
  957. : "memory"
  958. );
  959. return ((ret & 0xffff) + 1) * pow(2, ((ret >> 16) & 0xff)) * pow(2, ((ret >> 24) & 0x7f)) / 1024 / 1024; // MB
  960. }
  961. static void init_parameter(void) {
  962. #ifdef BUILD_BFLOAT16
  963. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  964. #endif
  965. #ifdef BUILD_BFLOAT16
  966. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  967. #endif
  968. #if defined(LA464)
  969. int L3_size = get_L3_size();
  970. #ifdef SMP
  971. if(blas_num_threads == 1){
  972. #endif
  973. //single thread
  974. if (L3_size == 32){ // 3C5000 and 3D5000
  975. TABLE_NAME.sgemm_p = 256;
  976. TABLE_NAME.sgemm_q = 384;
  977. TABLE_NAME.sgemm_r = 8192;
  978. TABLE_NAME.dgemm_p = 112;
  979. TABLE_NAME.dgemm_q = 289;
  980. TABLE_NAME.dgemm_r = 4096;
  981. TABLE_NAME.cgemm_p = 128;
  982. TABLE_NAME.cgemm_q = 256;
  983. TABLE_NAME.cgemm_r = 4096;
  984. TABLE_NAME.zgemm_p = 128;
  985. TABLE_NAME.zgemm_q = 128;
  986. TABLE_NAME.zgemm_r = 2048;
  987. } else { // 3A5000 and 3C5000L
  988. TABLE_NAME.sgemm_p = 256;
  989. TABLE_NAME.sgemm_q = 384;
  990. TABLE_NAME.sgemm_r = 4096;
  991. TABLE_NAME.dgemm_p = 112;
  992. TABLE_NAME.dgemm_q = 300;
  993. TABLE_NAME.dgemm_r = 3024;
  994. TABLE_NAME.cgemm_p = 128;
  995. TABLE_NAME.cgemm_q = 256;
  996. TABLE_NAME.cgemm_r = 2048;
  997. TABLE_NAME.zgemm_p = 128;
  998. TABLE_NAME.zgemm_q = 128;
  999. TABLE_NAME.zgemm_r = 1024;
  1000. }
  1001. #ifdef SMP
  1002. }else{
  1003. //multi thread
  1004. if (L3_size == 32){ // 3C5000 and 3D5000
  1005. TABLE_NAME.sgemm_p = 256;
  1006. TABLE_NAME.sgemm_q = 384;
  1007. TABLE_NAME.sgemm_r = 1024;
  1008. TABLE_NAME.dgemm_p = 112;
  1009. TABLE_NAME.dgemm_q = 289;
  1010. TABLE_NAME.dgemm_r = 342;
  1011. TABLE_NAME.cgemm_p = 128;
  1012. TABLE_NAME.cgemm_q = 256;
  1013. TABLE_NAME.cgemm_r = 512;
  1014. TABLE_NAME.zgemm_p = 128;
  1015. TABLE_NAME.zgemm_q = 128;
  1016. TABLE_NAME.zgemm_r = 512;
  1017. } else { // 3A5000 and 3C5000L
  1018. TABLE_NAME.sgemm_p = 256;
  1019. TABLE_NAME.sgemm_q = 384;
  1020. TABLE_NAME.sgemm_r = 2048;
  1021. TABLE_NAME.dgemm_p = 112;
  1022. TABLE_NAME.dgemm_q = 300;
  1023. TABLE_NAME.dgemm_r = 738;
  1024. TABLE_NAME.cgemm_p = 128;
  1025. TABLE_NAME.cgemm_q = 256;
  1026. TABLE_NAME.cgemm_r = 1024;
  1027. TABLE_NAME.zgemm_p = 128;
  1028. TABLE_NAME.zgemm_q = 128;
  1029. TABLE_NAME.zgemm_r = 1024;
  1030. }
  1031. }
  1032. #endif
  1033. #else
  1034. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1035. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1036. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1037. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1038. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1039. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1040. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1041. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1042. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1043. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1044. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1045. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1046. #endif
  1047. #ifdef BUILD_BFLOAT16
  1048. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1049. #endif
  1050. }
  1051. #else // (ARCH_LOONGARCH64)
  1052. #if (ARCH_POWER)
  1053. static void init_parameter(void) {
  1054. #ifdef BUILD_BFLOAT16
  1055. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1056. #endif
  1057. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1058. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1059. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1060. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1061. #ifdef BUILD_BFLOAT16
  1062. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1063. #endif
  1064. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1065. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1066. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1067. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1068. #ifdef BUILD_BFLOAT16
  1069. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1070. #endif
  1071. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1072. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1073. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1074. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1075. }
  1076. #else //POWER
  1077. #if (ARCH_ZARCH)
  1078. static void init_parameter(void) {
  1079. #ifdef BUILD_BFLOAT16
  1080. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1081. #endif
  1082. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1083. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1084. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1085. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1086. #ifdef BUILD_BFLOAT16
  1087. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1088. #endif
  1089. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1090. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1091. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1092. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1093. #ifdef BUILD_BFLOAT16
  1094. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1095. #endif
  1096. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1097. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1098. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1099. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1100. }
  1101. #else //ZARCH
  1102. #if (ARCH_RISCV64)
  1103. static void init_parameter(void) {
  1104. #ifdef BUILD_BFLOAT16
  1105. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1106. #endif
  1107. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1108. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1109. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1110. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1111. #ifdef BUILD_BFLOAT16
  1112. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1113. #endif
  1114. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1115. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1116. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1117. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1118. #ifdef BUILD_BFLOAT16
  1119. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1120. #endif
  1121. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1122. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1123. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1124. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1125. }
  1126. #else //RISCV64
  1127. #ifdef ARCH_X86
  1128. static int get_l2_size_old(void){
  1129. int i, eax, ebx, ecx, edx, cpuid_level;
  1130. int info[15];
  1131. cpuid(2, &eax, &ebx, &ecx, &edx);
  1132. info[ 0] = BITMASK(eax, 8, 0xff);
  1133. info[ 1] = BITMASK(eax, 16, 0xff);
  1134. info[ 2] = BITMASK(eax, 24, 0xff);
  1135. info[ 3] = BITMASK(ebx, 0, 0xff);
  1136. info[ 4] = BITMASK(ebx, 8, 0xff);
  1137. info[ 5] = BITMASK(ebx, 16, 0xff);
  1138. info[ 6] = BITMASK(ebx, 24, 0xff);
  1139. info[ 7] = BITMASK(ecx, 0, 0xff);
  1140. info[ 8] = BITMASK(ecx, 8, 0xff);
  1141. info[ 9] = BITMASK(ecx, 16, 0xff);
  1142. info[10] = BITMASK(ecx, 24, 0xff);
  1143. info[11] = BITMASK(edx, 0, 0xff);
  1144. info[12] = BITMASK(edx, 8, 0xff);
  1145. info[13] = BITMASK(edx, 16, 0xff);
  1146. info[14] = BITMASK(edx, 24, 0xff);
  1147. for (i = 0; i < 15; i++){
  1148. switch (info[i]){
  1149. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  1150. case 0x1a :
  1151. return 96;
  1152. case 0x39 :
  1153. case 0x3b :
  1154. case 0x41 :
  1155. case 0x79 :
  1156. case 0x81 :
  1157. return 128;
  1158. case 0x3a :
  1159. return 192;
  1160. case 0x21 :
  1161. case 0x3c :
  1162. case 0x42 :
  1163. case 0x7a :
  1164. case 0x7e :
  1165. case 0x82 :
  1166. return 256;
  1167. case 0x3d :
  1168. return 384;
  1169. case 0x3e :
  1170. case 0x43 :
  1171. case 0x7b :
  1172. case 0x7f :
  1173. case 0x83 :
  1174. case 0x86 :
  1175. return 512;
  1176. case 0x44 :
  1177. case 0x78 :
  1178. case 0x7c :
  1179. case 0x84 :
  1180. case 0x87 :
  1181. return 1024;
  1182. case 0x45 :
  1183. case 0x7d :
  1184. case 0x85 :
  1185. return 2048;
  1186. case 0x48 :
  1187. return 3184;
  1188. case 0x49 :
  1189. return 4096;
  1190. case 0x4e :
  1191. return 6144;
  1192. }
  1193. }
  1194. // return 0;
  1195. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1196. return 256;
  1197. }
  1198. #endif
  1199. static __inline__ int get_l2_size(void){
  1200. int eax, ebx, ecx, edx, l2;
  1201. l2 = readenv_atoi("OPENBLAS_L2_SIZE");
  1202. if (l2 != 0)
  1203. return l2;
  1204. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1205. l2 = BITMASK(ecx, 16, 0xffff);
  1206. #ifndef ARCH_X86
  1207. if (l2 <= 0) {
  1208. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1209. return 256;
  1210. }
  1211. return l2;
  1212. #else
  1213. if (l2 > 0) return l2;
  1214. return get_l2_size_old();
  1215. #endif
  1216. }
  1217. static __inline__ int get_l3_size(void){
  1218. int eax, ebx, ecx, edx;
  1219. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1220. return BITMASK(edx, 18, 0x3fff) * 512;
  1221. }
  1222. static void init_parameter(void) {
  1223. int l2 = get_l2_size();
  1224. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  1225. /* where the GEMM unrolling parameters do not depend on l2 */
  1226. #ifdef BUILD_BFLOAT16
  1227. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1228. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1229. #endif
  1230. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1231. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1232. #endif
  1233. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1234. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1235. #endif
  1236. #if BUILD_COMPLEX == 1
  1237. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1238. #endif
  1239. #if BUILD_COMPLEX16==1
  1240. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1241. #endif
  1242. #if BUILD_COMPLEX == 1
  1243. #ifdef CGEMM3M_DEFAULT_Q
  1244. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  1245. #else
  1246. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  1247. #endif
  1248. #endif
  1249. #if BUILD_COMPLEX16 == 1
  1250. #ifdef ZGEMM3M_DEFAULT_Q
  1251. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  1252. #else
  1253. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  1254. #endif
  1255. #endif
  1256. #ifdef EXPRECISION
  1257. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  1258. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  1259. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  1260. #endif
  1261. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  1262. #ifdef DEBUG
  1263. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  1264. #endif
  1265. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1266. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  1267. #endif
  1268. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1269. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  1270. #endif
  1271. #if BUILD_COMPLEX==1
  1272. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  1273. #endif
  1274. #if BUILD_COMPLEX16==1
  1275. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  1276. #endif
  1277. #ifdef EXPRECISION
  1278. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  1279. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  1280. #endif
  1281. #endif
  1282. #ifdef CORE_NORTHWOOD
  1283. #ifdef DEBUG
  1284. fprintf(stderr, "Northwood\n");
  1285. #endif
  1286. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1287. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  1288. #endif
  1289. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1290. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  1291. #endif
  1292. #if BUILD_COMPLEX==1
  1293. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  1294. #endif
  1295. #if BUILD_COMPLEX16==1
  1296. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  1297. #endif
  1298. #ifdef EXPRECISION
  1299. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  1300. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  1301. #endif
  1302. #endif
  1303. #ifdef ATOM
  1304. #ifdef DEBUG
  1305. fprintf(stderr, "Atom\n");
  1306. #endif
  1307. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1308. TABLE_NAME.sgemm_p = 256;
  1309. #endif
  1310. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1311. TABLE_NAME.dgemm_p = 128;
  1312. #endif
  1313. #if BUILD_COMPLEX==1
  1314. TABLE_NAME.cgemm_p = 128;
  1315. #endif
  1316. #if BUILD_COMPLEX16==1
  1317. TABLE_NAME.zgemm_p = 64;
  1318. #endif
  1319. #ifdef EXPRECISION
  1320. TABLE_NAME.qgemm_p = 64;
  1321. TABLE_NAME.xgemm_p = 32;
  1322. #endif
  1323. #endif
  1324. #ifdef CORE_PRESCOTT
  1325. #ifdef DEBUG
  1326. fprintf(stderr, "Prescott\n");
  1327. #endif
  1328. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1329. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  1330. #endif
  1331. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1332. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  1333. #endif
  1334. #if BUILD_COMPLEX==1
  1335. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  1336. #endif
  1337. #if BUILD_COMPLEX16 == 1
  1338. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  1339. #endif
  1340. #ifdef EXPRECISION
  1341. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  1342. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  1343. #endif
  1344. #endif
  1345. #ifdef CORE2
  1346. #ifdef DEBUG
  1347. fprintf(stderr, "Core2\n");
  1348. #endif
  1349. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1350. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  1351. #endif
  1352. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  1353. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  1354. #endif
  1355. #if BUILD_COMPLEX==1
  1356. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  1357. #endif
  1358. #if BUILD_COMPLEX16==1
  1359. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  1360. #endif
  1361. #ifdef EXPRECISION
  1362. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  1363. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  1364. #endif
  1365. #endif
  1366. #ifdef PENRYN
  1367. #ifdef DEBUG
  1368. fprintf(stderr, "Penryn\n");
  1369. #endif
  1370. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1371. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1372. #endif
  1373. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1374. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1375. #endif
  1376. #if BUILD_COMPLEX==1
  1377. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1378. #endif
  1379. #if BUILD_COMPLEX16==1
  1380. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1381. #endif
  1382. #ifdef EXPRECISION
  1383. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1384. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1385. #endif
  1386. #endif
  1387. #ifdef DUNNINGTON
  1388. #ifdef DEBUG
  1389. fprintf(stderr, "Dunnington\n");
  1390. #endif
  1391. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1392. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1393. #endif
  1394. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1395. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1396. #endif
  1397. #if BUILD_COMPLEX==1
  1398. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1399. #endif
  1400. #if BUILD_COMPLEX16==1
  1401. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1402. #endif
  1403. #ifdef EXPRECISION
  1404. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1405. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1406. #endif
  1407. #endif
  1408. #ifdef NEHALEM
  1409. #ifdef DEBUG
  1410. fprintf(stderr, "Nehalem\n");
  1411. #endif
  1412. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1413. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1414. #endif
  1415. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1416. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1417. #endif
  1418. #if BUILD_COMPLEX
  1419. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1420. #endif
  1421. #if BUILD_COMPLEX16
  1422. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1423. #endif
  1424. #ifdef EXPRECISION
  1425. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1426. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1427. #endif
  1428. #endif
  1429. #ifdef SANDYBRIDGE
  1430. #ifdef DEBUG
  1431. fprintf(stderr, "Sandybridge\n");
  1432. #endif
  1433. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1434. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1435. #endif
  1436. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1437. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1438. #endif
  1439. #if BUILD_COMPLEX
  1440. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1441. #endif
  1442. #if BUILD_COMPLEX16
  1443. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1444. #endif
  1445. #ifdef EXPRECISION
  1446. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1447. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1448. #endif
  1449. #endif
  1450. #ifdef HASWELL
  1451. #ifdef DEBUG
  1452. fprintf(stderr, "Haswell\n");
  1453. #endif
  1454. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1455. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1456. #endif
  1457. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1458. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1459. #endif
  1460. #if BUILD_COMPLEX
  1461. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1462. #endif
  1463. #if BUILD_COMPLEX16
  1464. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1465. #endif
  1466. #ifdef EXPRECISION
  1467. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1468. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1469. #endif
  1470. #endif
  1471. #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
  1472. #ifdef DEBUG
  1473. fprintf(stderr, "SkylakeX\n");
  1474. #endif
  1475. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1476. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1477. #endif
  1478. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1479. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1480. #endif
  1481. #if BUILD_COMPLEX
  1482. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1483. #endif
  1484. #if BUILD_COMPLEX16
  1485. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1486. #endif
  1487. #ifdef EXPRECISION
  1488. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1489. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1490. #endif
  1491. #endif
  1492. #ifdef OPTERON
  1493. #ifdef DEBUG
  1494. fprintf(stderr, "Opteron\n");
  1495. #endif
  1496. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1497. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  1498. #endif
  1499. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1500. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  1501. #endif
  1502. #if BUILD_COMPLEX
  1503. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  1504. #endif
  1505. #if BUILD_COMPLEX16
  1506. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1507. #endif
  1508. #ifdef EXPRECISION
  1509. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1510. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1511. #endif
  1512. #endif
  1513. #ifdef BARCELONA
  1514. #ifdef DEBUG
  1515. fprintf(stderr, "Barcelona\n");
  1516. #endif
  1517. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1518. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1519. #endif
  1520. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1521. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1522. #endif
  1523. #if BUILD_COMPLEX
  1524. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1525. #endif
  1526. #if BUILD_COMPLEX16
  1527. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1528. #endif
  1529. #ifdef EXPRECISION
  1530. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1531. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1532. #endif
  1533. #endif
  1534. #ifdef BOBCAT
  1535. #ifdef DEBUG
  1536. fprintf(stderr, "Bobcate\n");
  1537. #endif
  1538. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1539. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1540. #endif
  1541. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1542. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1543. #endif
  1544. #if BUILD_COMPLEX
  1545. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1546. #endif
  1547. #if BUILD_COMPLEX16
  1548. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1549. #endif
  1550. #ifdef EXPRECISION
  1551. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1552. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1553. #endif
  1554. #endif
  1555. #ifdef BULLDOZER
  1556. #ifdef DEBUG
  1557. fprintf(stderr, "Bulldozer\n");
  1558. #endif
  1559. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1560. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1561. #endif
  1562. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1563. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1564. #endif
  1565. #if BUILD_COMPLEX
  1566. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1567. #endif
  1568. #if BUILD_COMPLEX16
  1569. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1570. #endif
  1571. #ifdef EXPRECISION
  1572. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1573. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1574. #endif
  1575. #endif
  1576. #ifdef EXCAVATOR
  1577. #ifdef DEBUG
  1578. fprintf(stderr, "Excavator\n");
  1579. #endif
  1580. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1581. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1582. #endif
  1583. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1584. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1585. #endif
  1586. #if BUILD_COMPLEX
  1587. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1588. #endif
  1589. #if BUILD_COMPLEX16
  1590. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1591. #endif
  1592. #ifdef EXPRECISION
  1593. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1594. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1595. #endif
  1596. #endif
  1597. #ifdef PILEDRIVER
  1598. #ifdef DEBUG
  1599. fprintf(stderr, "Piledriver\n");
  1600. #endif
  1601. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1602. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1603. #endif
  1604. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1605. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1606. #endif
  1607. #if BUILD_COMPLEX
  1608. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1609. #endif
  1610. #if BUILD_COMPLEX16
  1611. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1612. #endif
  1613. #ifdef EXPRECISION
  1614. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1615. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1616. #endif
  1617. #endif
  1618. #ifdef STEAMROLLER
  1619. #ifdef DEBUG
  1620. fprintf(stderr, "Steamroller\n");
  1621. #endif
  1622. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1623. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1624. #endif
  1625. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1626. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1627. #endif
  1628. #if BUILD_COMPLEX
  1629. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1630. #endif
  1631. #if BUILD_COMPLEX16
  1632. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1633. #endif
  1634. #ifdef EXPRECISION
  1635. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1636. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1637. #endif
  1638. #endif
  1639. #ifdef ZEN
  1640. #ifdef DEBUG
  1641. fprintf(stderr, "Zen\n");
  1642. #endif
  1643. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1644. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1645. #endif
  1646. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1647. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1648. #endif
  1649. #if BUILD_COMPLEX
  1650. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1651. #endif
  1652. #if BUILD_COMPLEX16
  1653. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1654. #endif
  1655. #ifdef EXPRECISION
  1656. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1657. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1658. #endif
  1659. #endif
  1660. #ifdef NANO
  1661. #ifdef DEBUG
  1662. fprintf(stderr, "NANO\n");
  1663. #endif
  1664. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1665. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1666. #endif
  1667. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1668. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1669. #endif
  1670. #if (BUILD_COMPLEX==1)
  1671. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1672. #endif
  1673. #if (BUILD_COMPLEX16==1)
  1674. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1675. #endif
  1676. #ifdef EXPRECISION
  1677. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1678. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1679. #endif
  1680. #endif
  1681. #ifdef SAPPHIRERAPIDS
  1682. #if (BUILD_BFLOAT16 == 1)
  1683. TABLE_NAME.need_amxtile_permission = 1;
  1684. #endif
  1685. #endif
  1686. #if BUILD_COMPLEX==1
  1687. #ifdef CGEMM3M_DEFAULT_P
  1688. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1689. #else
  1690. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1691. #endif
  1692. #endif
  1693. #if BUILD_COMPLEX16==1
  1694. #ifdef ZGEMM3M_DEFAULT_P
  1695. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1696. #else
  1697. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1698. #endif
  1699. #endif
  1700. #ifdef EXPRECISION
  1701. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1702. #endif
  1703. #if BUILD_SINGLE == 1
  1704. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1705. #endif
  1706. #if BUILD_DOUBLE== 1
  1707. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1708. #endif
  1709. #if BUILD_COMPLEX==1
  1710. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1711. #endif
  1712. #if BUILD_COMPLEX16==1
  1713. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1714. #endif
  1715. #if BUILD_COMPLEX==1
  1716. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1717. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1718. #else
  1719. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1720. #endif
  1721. #endif
  1722. #if BUILD_COMPLEX16==1
  1723. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1724. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1725. #else
  1726. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1727. #endif
  1728. #endif
  1729. #ifdef QUAD_PRECISION
  1730. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1731. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1732. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1733. #endif
  1734. #ifdef DEBUG
  1735. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1736. #endif
  1737. #if BUILD_BFLOAT16==1
  1738. TABLE_NAME.sbgemm_r = (((BUFFER_SIZE -
  1739. ((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA
  1740. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1741. ) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15);
  1742. #endif
  1743. #if BUILD_SINGLE==1
  1744. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1745. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1746. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1747. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1748. #endif
  1749. #if BUILD_DOUBLE==1
  1750. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1751. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1752. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1753. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1754. #endif
  1755. #ifdef EXPRECISION
  1756. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1757. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1758. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1759. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1760. #endif
  1761. #if BUILD_COMPLEX ==1
  1762. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1763. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1764. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1765. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1766. #endif
  1767. #if BUILD_COMPLEX16 ==1
  1768. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1769. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1770. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1771. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1772. #endif
  1773. #if BUILD_COMPLEX == 1
  1774. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1775. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1776. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1777. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1778. #endif
  1779. #if BUILD_COMPLEX16 == 1
  1780. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1781. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1782. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1783. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1784. #endif
  1785. #ifdef EXPRECISION
  1786. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1787. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1788. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1789. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1790. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1791. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1792. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1793. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1794. #endif
  1795. }
  1796. #endif //RISCV64
  1797. #endif //POWER
  1798. #endif //ZARCH
  1799. #endif //(ARCH_LOONGARCH64)
  1800. #endif //(ARCH_MIPS64)
  1801. #endif //(ARCH_ARM64)