You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 59 kB

7 years ago
7 years ago
7 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* Copyright 2023 The OpenBLAS Project. */
  4. /* All rights reserved. */
  5. /* */
  6. /* Redistribution and use in source and binary forms, with or */
  7. /* without modification, are permitted provided that the following */
  8. /* conditions are met: */
  9. /* */
  10. /* 1. Redistributions of source code must retain the above */
  11. /* copyright notice, this list of conditions and the following */
  12. /* disclaimer. */
  13. /* */
  14. /* 2. Redistributions in binary form must reproduce the above */
  15. /* copyright notice, this list of conditions and the following */
  16. /* disclaimer in the documentation and/or other materials */
  17. /* provided with the distribution. */
  18. /* */
  19. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  20. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  21. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  22. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  23. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  24. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  25. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  26. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  27. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  28. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  29. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  30. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  31. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  32. /* POSSIBILITY OF SUCH DAMAGE. */
  33. /* */
  34. /* The views and conclusions contained in the software and */
  35. /* documentation are those of the authors and should not be */
  36. /* interpreted as representing official policies, either expressed */
  37. /* or implied, of The University of Texas at Austin. */
  38. /*********************************************************************/
  39. #include <stdio.h>
  40. #include <string.h>
  41. #include "common.h"
  42. #ifdef BUILD_KERNEL
  43. #include "kernelTS.h"
  44. #endif
  45. #undef DEBUG
  46. static void init_parameter(void);
  47. gotoblas_t TABLE_NAME = {
  48. DTB_DEFAULT_ENTRIES,
  49. SWITCH_RATIO,
  50. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  51. #ifdef BUILD_BFLOAT16
  52. 0, 0, 0,
  53. SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,
  54. #ifdef SBGEMM_DEFAULT_UNROLL_MN
  55. SBGEMM_DEFAULT_UNROLL_MN,
  56. #else
  57. MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
  58. #endif
  59. SBGEMM_ALIGN_K,
  60. 0, // need_amxtile_permission
  61. sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  62. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  63. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  64. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
  65. dsdot_kTS,
  66. srot_kTS, srotm_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  67. sbgemv_nTS, sbgemv_tTS, sger_kTS,
  68. ssymv_LTS, ssymv_UTS,
  69. sbgemm_kernelTS, sbgemm_betaTS,
  70. #if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N
  71. sbgemm_incopyTS, sbgemm_itcopyTS,
  72. #else
  73. sbgemm_oncopyTS, sbgemm_otcopyTS,
  74. #endif
  75. sbgemm_oncopyTS, sbgemm_otcopyTS,
  76. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  77. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  78. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  79. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  80. #else
  81. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  82. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  83. #endif
  84. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  85. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  86. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  87. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  88. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  89. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  90. #else
  91. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  92. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  93. #endif
  94. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  95. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  96. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  97. ssymm_iutcopyTS, ssymm_iltcopyTS,
  98. #else
  99. ssymm_outcopyTS, ssymm_oltcopyTS,
  100. #endif
  101. ssymm_outcopyTS, ssymm_oltcopyTS,
  102. #ifndef NO_LAPACK
  103. sneg_tcopyTS, slaswp_ncopyTS,
  104. #else
  105. NULL,NULL,
  106. #endif
  107. #ifdef SMALL_MATRIX_OPT
  108. sbgemm_small_matrix_permitTS,
  109. sbgemm_small_kernel_nnTS, sbgemm_small_kernel_ntTS, sbgemm_small_kernel_tnTS, sbgemm_small_kernel_ttTS,
  110. sbgemm_small_kernel_b0_nnTS, sbgemm_small_kernel_b0_ntTS, sbgemm_small_kernel_b0_tnTS, sbgemm_small_kernel_b0_ttTS,
  111. #endif
  112. #endif
  113. #ifdef BUILD_HFLOAT16
  114. 0, 0, 0,
  115. SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N,
  116. #ifdef SHGEMM_DEFAULT_UNROLL_MN
  117. SHGEMM_DEFAULT_UNROLL_MN,
  118. #else
  119. MAX(SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N),
  120. #endif
  121. shgemm_kernelTS, shgemm_betaTS,
  122. #if SHGEMM_DEFAULT_UNROLL_M != SHGEMM_DEFAULT_UNROLL_N
  123. shgemm_incopyTS, shgemm_itcopyTS,
  124. #else
  125. shgemm_oncopyTS, shgemm_otcopyTS,
  126. #endif
  127. shgemm_oncopyTS, shgemm_otcopyTS,
  128. #endif
  129. #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  130. 0, 0, 0,
  131. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  132. #ifdef SGEMM_DEFAULT_UNROLL_MN
  133. SGEMM_DEFAULT_UNROLL_MN,
  134. #else
  135. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  136. #endif
  137. #endif
  138. #ifdef HAVE_EXCLUSIVE_CACHE
  139. 1,
  140. #else
  141. 0,
  142. #endif
  143. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  144. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  145. #endif
  146. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  147. isamax_kTS,
  148. #endif
  149. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  150. isamin_kTS, ismax_kTS, ismin_kTS,
  151. snrm2_kTS, sasum_kTS,
  152. #endif
  153. #if BUILD_SINGLE == 1
  154. ssum_kTS,
  155. #endif
  156. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  157. scopy_kTS, sdot_kTS,
  158. // dsdot_kTS,
  159. srot_kTS, srotm_kTS, saxpy_kTS,
  160. #endif
  161. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  162. sscal_kTS,
  163. #endif
  164. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  165. sswap_kTS,
  166. sgemv_nTS, sgemv_tTS,
  167. #endif
  168. #if BUILD_SINGLE == 1
  169. sger_kTS,
  170. #endif
  171. #if BUILD_SINGLE == 1
  172. ssymv_LTS, ssymv_UTS,
  173. #endif
  174. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  175. #ifdef ARCH_X86_64
  176. sgemm_directTS,
  177. sgemm_direct_performantTS,
  178. #endif
  179. #ifdef ARCH_ARM64
  180. sgemm_directTS,
  181. sgemm_direct_alpha_betaTS,
  182. #endif
  183. sgemm_kernelTS, sgemm_betaTS,
  184. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  185. sgemm_incopyTS, sgemm_itcopyTS,
  186. #else
  187. sgemm_oncopyTS, sgemm_otcopyTS,
  188. #endif
  189. sgemm_oncopyTS, sgemm_otcopyTS,
  190. #endif
  191. #if BUILD_SINGLE == 1 || BUILD_DOUBLE == 1 || BUILD_COMPLEX == 1
  192. #ifdef SMALL_MATRIX_OPT
  193. sgemm_small_matrix_permitTS,
  194. sgemm_small_kernel_nnTS, sgemm_small_kernel_ntTS, sgemm_small_kernel_tnTS, sgemm_small_kernel_ttTS,
  195. sgemm_small_kernel_b0_nnTS, sgemm_small_kernel_b0_ntTS, sgemm_small_kernel_b0_tnTS, sgemm_small_kernel_b0_ttTS,
  196. #endif
  197. #endif
  198. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX == 1)
  199. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  200. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  201. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  202. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  203. #else
  204. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  205. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  206. #endif
  207. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  208. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  209. #endif
  210. #if (BUILD_SINGLE==1)
  211. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  212. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  213. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  214. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  215. #else
  216. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  217. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  218. #endif
  219. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  220. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  221. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  222. ssymm_iutcopyTS, ssymm_iltcopyTS,
  223. #else
  224. ssymm_outcopyTS, ssymm_oltcopyTS,
  225. #endif
  226. ssymm_outcopyTS, ssymm_oltcopyTS,
  227. #ifndef NO_LAPACK
  228. sneg_tcopyTS, slaswp_ncopyTS,
  229. #else
  230. NULL,NULL,
  231. #endif
  232. #endif
  233. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  234. 0, 0, 0,
  235. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  236. #ifdef DGEMM_DEFAULT_UNROLL_MN
  237. DGEMM_DEFAULT_UNROLL_MN,
  238. #else
  239. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  240. #endif
  241. #endif
  242. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  243. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  244. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  245. dnrm2_kTS, dasum_kTS,
  246. #endif
  247. #if (BUILD_DOUBLE==1)
  248. dsum_kTS,
  249. #endif
  250. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  251. dcopy_kTS, ddot_kTS,
  252. #endif
  253. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  254. dsdot_kTS,
  255. #endif
  256. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  257. drot_kTS,
  258. drotm_kTS,
  259. daxpy_kTS,
  260. dscal_kTS,
  261. dswap_kTS,
  262. dgemv_nTS, dgemv_tTS,
  263. #endif
  264. #if (BUILD_DOUBLE==1)
  265. dger_kTS,
  266. dsymv_LTS, dsymv_UTS,
  267. #endif
  268. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  269. dgemm_kernelTS, dgemm_betaTS,
  270. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  271. dgemm_incopyTS, dgemm_itcopyTS,
  272. #else
  273. dgemm_oncopyTS, dgemm_otcopyTS,
  274. #endif
  275. dgemm_oncopyTS, dgemm_otcopyTS,
  276. #endif
  277. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  278. #ifdef SMALL_MATRIX_OPT
  279. dgemm_small_matrix_permitTS,
  280. dgemm_small_kernel_nnTS, dgemm_small_kernel_ntTS, dgemm_small_kernel_tnTS, dgemm_small_kernel_ttTS,
  281. dgemm_small_kernel_b0_nnTS, dgemm_small_kernel_b0_ntTS, dgemm_small_kernel_b0_tnTS, dgemm_small_kernel_b0_ttTS,
  282. #endif
  283. #endif
  284. #if (BUILD_DOUBLE==1)
  285. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  286. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  287. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  288. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  289. #else
  290. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  291. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  292. #endif
  293. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  294. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  295. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  296. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  297. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  298. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  299. #else
  300. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  301. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  302. #endif
  303. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  304. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  305. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  306. dsymm_iutcopyTS, dsymm_iltcopyTS,
  307. #else
  308. dsymm_outcopyTS, dsymm_oltcopyTS,
  309. #endif
  310. dsymm_outcopyTS, dsymm_oltcopyTS,
  311. #ifndef NO_LAPACK
  312. dneg_tcopyTS, dlaswp_ncopyTS,
  313. #else
  314. NULL, NULL,
  315. #endif
  316. #endif
  317. #ifdef EXPRECISION
  318. 0, 0, 0,
  319. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  320. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  321. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  322. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  323. qrot_kTS, qrotm_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  324. qgemv_nTS, qgemv_tTS, qger_kTS,
  325. qsymv_LTS, qsymv_UTS,
  326. qgemm_kernelTS, qgemm_betaTS,
  327. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  328. qgemm_incopyTS, qgemm_itcopyTS,
  329. #else
  330. qgemm_oncopyTS, qgemm_otcopyTS,
  331. #endif
  332. qgemm_oncopyTS, qgemm_otcopyTS,
  333. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  334. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  335. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  336. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  337. #else
  338. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  339. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  340. #endif
  341. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  342. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  343. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  344. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  345. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  346. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  347. #else
  348. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  349. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  350. #endif
  351. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  352. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  353. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  354. qsymm_iutcopyTS, qsymm_iltcopyTS,
  355. #else
  356. qsymm_outcopyTS, qsymm_oltcopyTS,
  357. #endif
  358. qsymm_outcopyTS, qsymm_oltcopyTS,
  359. #ifndef NO_LAPACK
  360. qneg_tcopyTS, qlaswp_ncopyTS,
  361. #else
  362. NULL, NULL,
  363. #endif
  364. #endif
  365. #if (BUILD_COMPLEX)
  366. 0, 0, 0,
  367. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  368. #ifdef CGEMM_DEFAULT_UNROLL_MN
  369. CGEMM_DEFAULT_UNROLL_MN,
  370. #else
  371. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  372. #endif
  373. #if (BUILD_COMPLEX)
  374. camax_kTS, camin_kTS,
  375. #endif
  376. #if (BUILD_COMPLEX)
  377. icamax_kTS,
  378. #endif
  379. #if (BUILD_COMPLEX)
  380. icamin_kTS,
  381. cnrm2_kTS, casum_kTS, csum_kTS,
  382. #endif
  383. #if (BUILD_COMPLEX)
  384. ccopy_kTS, cdotu_kTS, cdotc_kTS,
  385. #endif
  386. #if (BUILD_COMPLEX)
  387. csrot_kTS,
  388. #endif
  389. #if (BUILD_COMPLEX)
  390. caxpy_kTS,
  391. caxpyc_kTS,
  392. cscal_kTS,
  393. cswap_kTS,
  394. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  395. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  396. #endif
  397. #if (BUILD_COMPLEX)
  398. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  399. csymv_LTS, csymv_UTS,
  400. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  401. #endif
  402. #if (BUILD_COMPLEX)
  403. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  404. cgemm_betaTS,
  405. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  406. cgemm_incopyTS, cgemm_itcopyTS,
  407. #else
  408. cgemm_oncopyTS, cgemm_otcopyTS,
  409. #endif
  410. cgemm_oncopyTS, cgemm_otcopyTS,
  411. #ifdef SMALL_MATRIX_OPT
  412. cgemm_small_matrix_permitTS,
  413. cgemm_small_kernel_nnTS, cgemm_small_kernel_ntTS, cgemm_small_kernel_nrTS, cgemm_small_kernel_ncTS,
  414. cgemm_small_kernel_tnTS, cgemm_small_kernel_ttTS, cgemm_small_kernel_trTS, cgemm_small_kernel_tcTS,
  415. cgemm_small_kernel_rnTS, cgemm_small_kernel_rtTS, cgemm_small_kernel_rrTS, cgemm_small_kernel_rcTS,
  416. cgemm_small_kernel_cnTS, cgemm_small_kernel_ctTS, cgemm_small_kernel_crTS, cgemm_small_kernel_ccTS,
  417. cgemm_small_kernel_b0_nnTS, cgemm_small_kernel_b0_ntTS, cgemm_small_kernel_b0_nrTS, cgemm_small_kernel_b0_ncTS,
  418. cgemm_small_kernel_b0_tnTS, cgemm_small_kernel_b0_ttTS, cgemm_small_kernel_b0_trTS, cgemm_small_kernel_b0_tcTS,
  419. cgemm_small_kernel_b0_rnTS, cgemm_small_kernel_b0_rtTS, cgemm_small_kernel_b0_rrTS, cgemm_small_kernel_b0_rcTS,
  420. cgemm_small_kernel_b0_cnTS, cgemm_small_kernel_b0_ctTS, cgemm_small_kernel_b0_crTS, cgemm_small_kernel_b0_ccTS,
  421. #endif
  422. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  423. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  424. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  425. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  426. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  427. #else
  428. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  429. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  430. #endif
  431. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  432. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  433. #endif
  434. #endif
  435. #if (BUILD_COMPLEX)
  436. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  437. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  438. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  439. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  440. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  441. #else
  442. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  443. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  444. #endif
  445. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  446. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  447. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  448. csymm_iutcopyTS, csymm_iltcopyTS,
  449. #else
  450. csymm_outcopyTS, csymm_oltcopyTS,
  451. #endif
  452. csymm_outcopyTS, csymm_oltcopyTS,
  453. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  454. chemm_iutcopyTS, chemm_iltcopyTS,
  455. #else
  456. chemm_outcopyTS, chemm_oltcopyTS,
  457. #endif
  458. chemm_outcopyTS, chemm_oltcopyTS,
  459. 0, 0, 0,
  460. #if (USE_GEMM3M)
  461. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  462. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  463. #else
  464. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  465. #endif
  466. cgemm3m_kernelTS,
  467. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  468. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  469. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  470. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  471. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  472. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  473. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  474. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  475. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  476. csymm3m_oucopybTS, csymm3m_olcopybTS,
  477. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  478. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  479. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  480. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  481. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  482. chemm3m_oucopybTS, chemm3m_olcopybTS,
  483. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  484. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  485. #else
  486. 0, 0, 0,
  487. NULL,
  488. NULL, NULL,
  489. NULL, NULL,
  490. NULL, NULL,
  491. NULL, NULL,
  492. NULL, NULL,
  493. NULL, NULL,
  494. NULL, NULL,
  495. NULL, NULL,
  496. NULL, NULL,
  497. NULL, NULL,
  498. NULL, NULL,
  499. NULL, NULL,
  500. NULL, NULL,
  501. NULL, NULL,
  502. NULL, NULL,
  503. NULL, NULL,
  504. NULL, NULL,
  505. NULL, NULL,
  506. #endif
  507. #endif
  508. #if (BUILD_COMPLEX)
  509. #ifndef NO_LAPACK
  510. cneg_tcopyTS,
  511. claswp_ncopyTS,
  512. #else
  513. NULL, NULL,
  514. #endif
  515. #endif
  516. #if BUILD_COMPLEX16 == 1
  517. 0, 0, 0,
  518. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  519. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  520. ZGEMM_DEFAULT_UNROLL_MN,
  521. #else
  522. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  523. #endif
  524. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  525. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  526. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  527. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  528. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  529. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  530. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  531. zsymv_LTS, zsymv_UTS,
  532. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  533. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  534. zgemm_betaTS,
  535. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  536. zgemm_incopyTS, zgemm_itcopyTS,
  537. #else
  538. zgemm_oncopyTS, zgemm_otcopyTS,
  539. #endif
  540. zgemm_oncopyTS, zgemm_otcopyTS,
  541. #ifdef SMALL_MATRIX_OPT
  542. zgemm_small_matrix_permitTS,
  543. zgemm_small_kernel_nnTS, zgemm_small_kernel_ntTS, zgemm_small_kernel_nrTS, zgemm_small_kernel_ncTS,
  544. zgemm_small_kernel_tnTS, zgemm_small_kernel_ttTS, zgemm_small_kernel_trTS, zgemm_small_kernel_tcTS,
  545. zgemm_small_kernel_rnTS, zgemm_small_kernel_rtTS, zgemm_small_kernel_rrTS, zgemm_small_kernel_rcTS,
  546. zgemm_small_kernel_cnTS, zgemm_small_kernel_ctTS, zgemm_small_kernel_crTS, zgemm_small_kernel_ccTS,
  547. zgemm_small_kernel_b0_nnTS, zgemm_small_kernel_b0_ntTS, zgemm_small_kernel_b0_nrTS, zgemm_small_kernel_b0_ncTS,
  548. zgemm_small_kernel_b0_tnTS, zgemm_small_kernel_b0_ttTS, zgemm_small_kernel_b0_trTS, zgemm_small_kernel_b0_tcTS,
  549. zgemm_small_kernel_b0_rnTS, zgemm_small_kernel_b0_rtTS, zgemm_small_kernel_b0_rrTS, zgemm_small_kernel_b0_rcTS,
  550. zgemm_small_kernel_b0_cnTS, zgemm_small_kernel_b0_ctTS, zgemm_small_kernel_b0_crTS, zgemm_small_kernel_b0_ccTS,
  551. #endif
  552. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  553. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  554. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  555. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  556. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  557. #else
  558. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  559. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  560. #endif
  561. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  562. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  563. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  564. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  565. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  566. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  567. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  568. #else
  569. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  570. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  571. #endif
  572. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  573. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  574. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  575. zsymm_iutcopyTS, zsymm_iltcopyTS,
  576. #else
  577. zsymm_outcopyTS, zsymm_oltcopyTS,
  578. #endif
  579. zsymm_outcopyTS, zsymm_oltcopyTS,
  580. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  581. zhemm_iutcopyTS, zhemm_iltcopyTS,
  582. #else
  583. zhemm_outcopyTS, zhemm_oltcopyTS,
  584. #endif
  585. zhemm_outcopyTS, zhemm_oltcopyTS,
  586. 0, 0, 0,
  587. #if (USE_GEMM3M)
  588. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  589. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  590. #else
  591. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  592. #endif
  593. zgemm3m_kernelTS,
  594. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  595. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  596. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  597. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  598. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  599. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  600. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  601. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  602. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  603. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  604. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  605. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  606. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  607. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  608. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  609. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  610. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  611. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  612. #else
  613. 0, 0, 0,
  614. NULL,
  615. NULL, NULL,
  616. NULL, NULL,
  617. NULL, NULL,
  618. NULL, NULL,
  619. NULL, NULL,
  620. NULL, NULL,
  621. NULL, NULL,
  622. NULL, NULL,
  623. NULL, NULL,
  624. NULL, NULL,
  625. NULL, NULL,
  626. NULL, NULL,
  627. NULL, NULL,
  628. NULL, NULL,
  629. NULL, NULL,
  630. NULL, NULL,
  631. NULL, NULL,
  632. NULL, NULL,
  633. #endif
  634. #ifndef NO_LAPACK
  635. zneg_tcopyTS, zlaswp_ncopyTS,
  636. #else
  637. NULL, NULL,
  638. #endif
  639. #endif
  640. #ifdef EXPRECISION
  641. 0, 0, 0,
  642. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  643. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  644. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  645. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  646. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  647. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  648. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  649. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  650. xsymv_LTS, xsymv_UTS,
  651. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  652. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  653. xgemm_betaTS,
  654. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  655. xgemm_incopyTS, xgemm_itcopyTS,
  656. #else
  657. xgemm_oncopyTS, xgemm_otcopyTS,
  658. #endif
  659. xgemm_oncopyTS, xgemm_otcopyTS,
  660. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  661. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  662. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  663. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  664. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  665. #else
  666. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  667. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  668. #endif
  669. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  670. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  671. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  672. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  673. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  674. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  675. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  676. #else
  677. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  678. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  679. #endif
  680. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  681. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  682. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  683. xsymm_iutcopyTS, xsymm_iltcopyTS,
  684. #else
  685. xsymm_outcopyTS, xsymm_oltcopyTS,
  686. #endif
  687. xsymm_outcopyTS, xsymm_oltcopyTS,
  688. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  689. xhemm_iutcopyTS, xhemm_iltcopyTS,
  690. #else
  691. xhemm_outcopyTS, xhemm_oltcopyTS,
  692. #endif
  693. xhemm_outcopyTS, xhemm_oltcopyTS,
  694. 0, 0, 0,
  695. #if (USE_GEMM3M)
  696. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  697. xgemm3m_kernelTS,
  698. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  699. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  700. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  701. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  702. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  703. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  704. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  705. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  706. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  707. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  708. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  709. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  710. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  711. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  712. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  713. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  714. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  715. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  716. #else
  717. 0, 0, 0,
  718. NULL,
  719. NULL, NULL,
  720. NULL, NULL,
  721. NULL, NULL,
  722. NULL, NULL,
  723. NULL, NULL,
  724. NULL, NULL,
  725. NULL, NULL,
  726. NULL, NULL,
  727. NULL, NULL,
  728. NULL, NULL,
  729. NULL, NULL,
  730. NULL, NULL,
  731. NULL, NULL,
  732. NULL, NULL,
  733. NULL, NULL,
  734. NULL, NULL,
  735. NULL, NULL,
  736. NULL, NULL,
  737. #endif
  738. #ifndef NO_LAPACK
  739. xneg_tcopyTS, xlaswp_ncopyTS,
  740. #else
  741. NULL, NULL,
  742. #endif
  743. #endif
  744. init_parameter,
  745. SNUMOPT, DNUMOPT, QNUMOPT,
  746. #if BUILD_SINGLE == 1
  747. saxpby_kTS,
  748. #endif
  749. #if BUILD_DOUBLE == 1
  750. daxpby_kTS,
  751. #endif
  752. #if BUILD_COMPLEX == 1
  753. caxpby_kTS,
  754. #endif
  755. #if BUILD_COMPLEX16== 1
  756. zaxpby_kTS,
  757. #endif
  758. #if BUILD_SINGLE == 1
  759. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  760. #endif
  761. #if BUILD_DOUBLE== 1
  762. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  763. #endif
  764. #if BUILD_COMPLEX == 1
  765. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  766. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  767. #endif
  768. #if BUILD_COMPLEX16 == 1
  769. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  770. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  771. #endif
  772. #if BUILD_SINGLE == 1
  773. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  774. #endif
  775. #if BUILD_DOUBLE== 1
  776. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  777. #endif
  778. #if BUILD_COMPLEX== 1
  779. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  780. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  781. #endif
  782. #if BUILD_COMPLEX16==1
  783. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  784. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  785. #endif
  786. #if BUILD_SINGLE == 1
  787. sgeadd_kTS,
  788. #endif
  789. #if BUILD_DOUBLE==1
  790. dgeadd_kTS,
  791. #endif
  792. #if BUILD_COMPLEX==1
  793. cgeadd_kTS,
  794. #endif
  795. #if BUILD_COMPLEX16==1
  796. zgeadd_kTS,
  797. #endif
  798. };
  799. #if (ARCH_ARM64)
  800. static void init_parameter(void) {
  801. #if (BUILD_BFLOAT16)
  802. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  803. #endif
  804. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  805. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  806. #endif
  807. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  808. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  809. #endif
  810. #if BUILD_COMPLEX==1
  811. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  812. #endif
  813. #if BUILD_COMPLEX16==1
  814. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  815. #endif
  816. #if (BUILD_BFLOAT16)
  817. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  818. #endif
  819. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  820. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  821. #endif
  822. #if BUILD_DOUBLE== 1 || (BUILD_COMPLEX16==1)
  823. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  824. #endif
  825. #if BUILD_COMPLEX== 1
  826. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  827. #endif
  828. #if BUILD_COMPLEX16==1
  829. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  830. #endif
  831. #if (BUILD_BFLOAT16)
  832. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  833. #endif
  834. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  835. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  836. #endif
  837. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  838. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  839. #endif
  840. #if BUILD_COMPLEX==1
  841. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  842. #endif
  843. #if BUILD_COMPLEX16==1
  844. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  845. #endif
  846. #ifdef EXPRECISION
  847. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  848. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  849. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  850. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  851. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  852. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  853. #endif
  854. #if (USE_GEMM3M)
  855. #ifdef CGEMM3M_DEFAULT_P
  856. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  857. #else
  858. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  859. #endif
  860. #ifdef ZGEMM3M_DEFAULT_P
  861. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  862. #else
  863. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  864. #endif
  865. #ifdef CGEMM3M_DEFAULT_Q
  866. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  867. #else
  868. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  869. #endif
  870. #ifdef ZGEMM3M_DEFAULT_Q
  871. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  872. #else
  873. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  874. #endif
  875. #ifdef CGEMM3M_DEFAULT_R
  876. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  877. #else
  878. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  879. #endif
  880. #ifdef ZGEMM3M_DEFAULT_R
  881. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  882. #else
  883. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  884. #endif
  885. #ifdef EXPRECISION
  886. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  887. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  888. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  889. #endif
  890. #endif
  891. }
  892. #else // (ARCH_ARM64)
  893. #if defined(ARCH_MIPS64)
  894. static void init_parameter(void) {
  895. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  896. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  897. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  898. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  899. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  900. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  901. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  902. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  903. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  904. TABLE_NAME.dgemm_r = 640;
  905. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  906. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  907. #ifdef EXPRECISION
  908. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  909. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  910. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  911. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  912. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  913. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  914. #endif
  915. #if defined(USE_GEMM3M)
  916. #ifdef CGEMM3M_DEFAULT_P
  917. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  918. #else
  919. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  920. #endif
  921. #ifdef ZGEMM3M_DEFAULT_P
  922. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  923. #else
  924. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  925. #endif
  926. #ifdef CGEMM3M_DEFAULT_Q
  927. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  928. #else
  929. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  930. #endif
  931. #ifdef ZGEMM3M_DEFAULT_Q
  932. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  933. #else
  934. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  935. #endif
  936. #ifdef CGEMM3M_DEFAULT_R
  937. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  938. #else
  939. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  940. #endif
  941. #ifdef ZGEMM3M_DEFAULT_R
  942. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  943. #else
  944. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  945. #endif
  946. #ifdef EXPRECISION
  947. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  948. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  949. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  950. #endif
  951. #endif
  952. }
  953. #else // (ARCH_MIPS64)
  954. #if (ARCH_LOONGARCH64)
  955. static int get_L3_size() {
  956. int ret = 0, id = 0x14;
  957. __asm__ volatile (
  958. "cpucfg %[ret], %[id]"
  959. : [ret]"=r"(ret)
  960. : [id]"r"(id)
  961. : "memory"
  962. );
  963. return ((ret & 0xffff) + 1) * pow(2, ((ret >> 16) & 0xff)) * pow(2, ((ret >> 24) & 0x7f)) / 1024 / 1024; // MB
  964. }
  965. static void init_parameter(void) {
  966. #ifdef BUILD_BFLOAT16
  967. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  968. #endif
  969. #ifdef BUILD_BFLOAT16
  970. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  971. #endif
  972. #if defined(LA464)
  973. int L3_size = get_L3_size();
  974. #ifdef SMP
  975. if(blas_num_threads == 1){
  976. #endif
  977. //single thread
  978. if (L3_size == 32){ // 3C5000 and 3D5000
  979. TABLE_NAME.sgemm_p = 256;
  980. TABLE_NAME.sgemm_q = 384;
  981. TABLE_NAME.sgemm_r = 8192;
  982. TABLE_NAME.dgemm_p = 112;
  983. TABLE_NAME.dgemm_q = 289;
  984. TABLE_NAME.dgemm_r = 4096;
  985. TABLE_NAME.cgemm_p = 128;
  986. TABLE_NAME.cgemm_q = 256;
  987. TABLE_NAME.cgemm_r = 4096;
  988. TABLE_NAME.zgemm_p = 128;
  989. TABLE_NAME.zgemm_q = 128;
  990. TABLE_NAME.zgemm_r = 2048;
  991. } else { // 3A5000 and 3C5000L
  992. TABLE_NAME.sgemm_p = 256;
  993. TABLE_NAME.sgemm_q = 384;
  994. TABLE_NAME.sgemm_r = 4096;
  995. TABLE_NAME.dgemm_p = 112;
  996. TABLE_NAME.dgemm_q = 300;
  997. TABLE_NAME.dgemm_r = 3024;
  998. TABLE_NAME.cgemm_p = 128;
  999. TABLE_NAME.cgemm_q = 256;
  1000. TABLE_NAME.cgemm_r = 2048;
  1001. TABLE_NAME.zgemm_p = 128;
  1002. TABLE_NAME.zgemm_q = 128;
  1003. TABLE_NAME.zgemm_r = 1024;
  1004. }
  1005. #ifdef SMP
  1006. }else{
  1007. //multi thread
  1008. if (L3_size == 32){ // 3C5000 and 3D5000
  1009. TABLE_NAME.sgemm_p = 256;
  1010. TABLE_NAME.sgemm_q = 384;
  1011. TABLE_NAME.sgemm_r = 1024;
  1012. TABLE_NAME.dgemm_p = 112;
  1013. TABLE_NAME.dgemm_q = 289;
  1014. TABLE_NAME.dgemm_r = 342;
  1015. TABLE_NAME.cgemm_p = 128;
  1016. TABLE_NAME.cgemm_q = 256;
  1017. TABLE_NAME.cgemm_r = 512;
  1018. TABLE_NAME.zgemm_p = 128;
  1019. TABLE_NAME.zgemm_q = 128;
  1020. TABLE_NAME.zgemm_r = 512;
  1021. } else { // 3A5000 and 3C5000L
  1022. TABLE_NAME.sgemm_p = 256;
  1023. TABLE_NAME.sgemm_q = 384;
  1024. TABLE_NAME.sgemm_r = 2048;
  1025. TABLE_NAME.dgemm_p = 112;
  1026. TABLE_NAME.dgemm_q = 300;
  1027. TABLE_NAME.dgemm_r = 738;
  1028. TABLE_NAME.cgemm_p = 128;
  1029. TABLE_NAME.cgemm_q = 256;
  1030. TABLE_NAME.cgemm_r = 1024;
  1031. TABLE_NAME.zgemm_p = 128;
  1032. TABLE_NAME.zgemm_q = 128;
  1033. TABLE_NAME.zgemm_r = 1024;
  1034. }
  1035. }
  1036. #endif
  1037. #else
  1038. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1039. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1040. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1041. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1042. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1043. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1044. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1045. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1046. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1047. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1048. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1049. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1050. #endif
  1051. #ifdef BUILD_BFLOAT16
  1052. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1053. #endif
  1054. }
  1055. #else // (ARCH_LOONGARCH64)
  1056. #if (ARCH_POWER)
  1057. static void init_parameter(void) {
  1058. #ifdef BUILD_BFLOAT16
  1059. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1060. #endif
  1061. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1062. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1063. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1064. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1065. #ifdef BUILD_BFLOAT16
  1066. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1067. #endif
  1068. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1069. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1070. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1071. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1072. #ifdef BUILD_BFLOAT16
  1073. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1074. #endif
  1075. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1076. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1077. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1078. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1079. }
  1080. #else //POWER
  1081. #if (ARCH_ZARCH)
  1082. static void init_parameter(void) {
  1083. #ifdef BUILD_BFLOAT16
  1084. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1085. #endif
  1086. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1087. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1088. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1089. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1090. #ifdef BUILD_BFLOAT16
  1091. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1092. #endif
  1093. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1094. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1095. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1096. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1097. #ifdef BUILD_BFLOAT16
  1098. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1099. #endif
  1100. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1101. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1102. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1103. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1104. }
  1105. #else //ZARCH
  1106. #if (ARCH_RISCV64)
  1107. static void init_parameter(void) {
  1108. #ifdef BUILD_BFLOAT16
  1109. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1110. #endif
  1111. #ifdef BUILD_HFLOAT16
  1112. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  1113. #endif
  1114. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1115. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1116. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1117. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1118. #ifdef BUILD_BFLOAT16
  1119. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1120. #endif
  1121. #ifdef BUILD_HFLOAT16
  1122. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  1123. #endif
  1124. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1125. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1126. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1127. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1128. #ifdef BUILD_BFLOAT16
  1129. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1130. #endif
  1131. #ifdef BUILD_HFLOAT16
  1132. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  1133. #endif
  1134. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1135. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1136. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1137. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1138. }
  1139. #else //RISCV64
  1140. #ifdef ARCH_X86
  1141. static int get_l2_size_old(void){
  1142. int i, eax, ebx, ecx, edx, cpuid_level;
  1143. int info[15];
  1144. cpuid(2, &eax, &ebx, &ecx, &edx);
  1145. info[ 0] = BITMASK(eax, 8, 0xff);
  1146. info[ 1] = BITMASK(eax, 16, 0xff);
  1147. info[ 2] = BITMASK(eax, 24, 0xff);
  1148. info[ 3] = BITMASK(ebx, 0, 0xff);
  1149. info[ 4] = BITMASK(ebx, 8, 0xff);
  1150. info[ 5] = BITMASK(ebx, 16, 0xff);
  1151. info[ 6] = BITMASK(ebx, 24, 0xff);
  1152. info[ 7] = BITMASK(ecx, 0, 0xff);
  1153. info[ 8] = BITMASK(ecx, 8, 0xff);
  1154. info[ 9] = BITMASK(ecx, 16, 0xff);
  1155. info[10] = BITMASK(ecx, 24, 0xff);
  1156. info[11] = BITMASK(edx, 0, 0xff);
  1157. info[12] = BITMASK(edx, 8, 0xff);
  1158. info[13] = BITMASK(edx, 16, 0xff);
  1159. info[14] = BITMASK(edx, 24, 0xff);
  1160. for (i = 0; i < 15; i++){
  1161. switch (info[i]){
  1162. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  1163. case 0x1a :
  1164. return 96;
  1165. case 0x39 :
  1166. case 0x3b :
  1167. case 0x41 :
  1168. case 0x79 :
  1169. case 0x81 :
  1170. return 128;
  1171. case 0x3a :
  1172. return 192;
  1173. case 0x21 :
  1174. case 0x3c :
  1175. case 0x42 :
  1176. case 0x7a :
  1177. case 0x7e :
  1178. case 0x82 :
  1179. return 256;
  1180. case 0x3d :
  1181. return 384;
  1182. case 0x3e :
  1183. case 0x43 :
  1184. case 0x7b :
  1185. case 0x7f :
  1186. case 0x83 :
  1187. case 0x86 :
  1188. return 512;
  1189. case 0x44 :
  1190. case 0x78 :
  1191. case 0x7c :
  1192. case 0x84 :
  1193. case 0x87 :
  1194. return 1024;
  1195. case 0x45 :
  1196. case 0x7d :
  1197. case 0x85 :
  1198. return 2048;
  1199. case 0x48 :
  1200. return 3184;
  1201. case 0x49 :
  1202. return 4096;
  1203. case 0x4e :
  1204. return 6144;
  1205. }
  1206. }
  1207. // return 0;
  1208. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1209. return 256;
  1210. }
  1211. #endif
  1212. static __inline__ int get_l2_size(void){
  1213. int eax, ebx, ecx, edx, l2;
  1214. l2 = readenv_atoi("OPENBLAS_L2_SIZE");
  1215. if (l2 != 0)
  1216. return l2;
  1217. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1218. l2 = BITMASK(ecx, 16, 0xffff);
  1219. #ifndef ARCH_X86
  1220. if (l2 <= 0) {
  1221. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1222. return 256;
  1223. }
  1224. return l2;
  1225. #else
  1226. if (l2 > 0) return l2;
  1227. return get_l2_size_old();
  1228. #endif
  1229. }
  1230. static __inline__ int get_l3_size(void){
  1231. int eax, ebx, ecx, edx;
  1232. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1233. return BITMASK(edx, 18, 0x3fff) * 512;
  1234. }
  1235. static void init_parameter(void) {
  1236. int l2 = get_l2_size();
  1237. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  1238. /* where the GEMM unrolling parameters do not depend on l2 */
  1239. #ifdef BUILD_BFLOAT16
  1240. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1241. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1242. #endif
  1243. #ifdef BUILD_HFLOAT16
  1244. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  1245. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  1246. #endif
  1247. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1248. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1249. #endif
  1250. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1251. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1252. #endif
  1253. #if BUILD_COMPLEX == 1
  1254. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1255. #endif
  1256. #if BUILD_COMPLEX16==1
  1257. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1258. #endif
  1259. #if BUILD_COMPLEX == 1
  1260. #ifdef CGEMM3M_DEFAULT_Q
  1261. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  1262. #else
  1263. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  1264. #endif
  1265. #endif
  1266. #if BUILD_COMPLEX16 == 1
  1267. #ifdef ZGEMM3M_DEFAULT_Q
  1268. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  1269. #else
  1270. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  1271. #endif
  1272. #endif
  1273. #ifdef EXPRECISION
  1274. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  1275. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  1276. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  1277. #endif
  1278. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  1279. #ifdef DEBUG
  1280. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  1281. #endif
  1282. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1283. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  1284. #endif
  1285. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1286. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  1287. #endif
  1288. #if BUILD_COMPLEX==1
  1289. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  1290. #endif
  1291. #if BUILD_COMPLEX16==1
  1292. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  1293. #endif
  1294. #ifdef EXPRECISION
  1295. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  1296. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  1297. #endif
  1298. #endif
  1299. #ifdef CORE_NORTHWOOD
  1300. #ifdef DEBUG
  1301. fprintf(stderr, "Northwood\n");
  1302. #endif
  1303. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1304. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  1305. #endif
  1306. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1307. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  1308. #endif
  1309. #if BUILD_COMPLEX==1
  1310. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  1311. #endif
  1312. #if BUILD_COMPLEX16==1
  1313. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  1314. #endif
  1315. #ifdef EXPRECISION
  1316. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  1317. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  1318. #endif
  1319. #endif
  1320. #ifdef ATOM
  1321. #ifdef DEBUG
  1322. fprintf(stderr, "Atom\n");
  1323. #endif
  1324. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1325. TABLE_NAME.sgemm_p = 256;
  1326. #endif
  1327. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1328. TABLE_NAME.dgemm_p = 128;
  1329. #endif
  1330. #if BUILD_COMPLEX==1
  1331. TABLE_NAME.cgemm_p = 128;
  1332. #endif
  1333. #if BUILD_COMPLEX16==1
  1334. TABLE_NAME.zgemm_p = 64;
  1335. #endif
  1336. #ifdef EXPRECISION
  1337. TABLE_NAME.qgemm_p = 64;
  1338. TABLE_NAME.xgemm_p = 32;
  1339. #endif
  1340. #endif
  1341. #ifdef CORE_PRESCOTT
  1342. #ifdef DEBUG
  1343. fprintf(stderr, "Prescott\n");
  1344. #endif
  1345. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1346. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  1347. #endif
  1348. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1349. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  1350. #endif
  1351. #if BUILD_COMPLEX==1
  1352. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  1353. #endif
  1354. #if BUILD_COMPLEX16 == 1
  1355. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  1356. #endif
  1357. #ifdef EXPRECISION
  1358. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  1359. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  1360. #endif
  1361. #endif
  1362. #ifdef CORE2
  1363. #ifdef DEBUG
  1364. fprintf(stderr, "Core2\n");
  1365. #endif
  1366. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1367. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  1368. #endif
  1369. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  1370. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  1371. #endif
  1372. #if BUILD_COMPLEX==1
  1373. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  1374. #endif
  1375. #if BUILD_COMPLEX16==1
  1376. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  1377. #endif
  1378. #ifdef EXPRECISION
  1379. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  1380. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  1381. #endif
  1382. #endif
  1383. #ifdef PENRYN
  1384. #ifdef DEBUG
  1385. fprintf(stderr, "Penryn\n");
  1386. #endif
  1387. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1388. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1389. #endif
  1390. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1391. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1392. #endif
  1393. #if BUILD_COMPLEX==1
  1394. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1395. #endif
  1396. #if BUILD_COMPLEX16==1
  1397. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1398. #endif
  1399. #ifdef EXPRECISION
  1400. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1401. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1402. #endif
  1403. #endif
  1404. #ifdef DUNNINGTON
  1405. #ifdef DEBUG
  1406. fprintf(stderr, "Dunnington\n");
  1407. #endif
  1408. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1409. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1410. #endif
  1411. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1412. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1413. #endif
  1414. #if BUILD_COMPLEX==1
  1415. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1416. #endif
  1417. #if BUILD_COMPLEX16==1
  1418. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1419. #endif
  1420. #ifdef EXPRECISION
  1421. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1422. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1423. #endif
  1424. #endif
  1425. #ifdef NEHALEM
  1426. #ifdef DEBUG
  1427. fprintf(stderr, "Nehalem\n");
  1428. #endif
  1429. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1430. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1431. #endif
  1432. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1433. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1434. #endif
  1435. #if BUILD_COMPLEX
  1436. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1437. #endif
  1438. #if BUILD_COMPLEX16
  1439. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1440. #endif
  1441. #ifdef EXPRECISION
  1442. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1443. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1444. #endif
  1445. #endif
  1446. #ifdef SANDYBRIDGE
  1447. #ifdef DEBUG
  1448. fprintf(stderr, "Sandybridge\n");
  1449. #endif
  1450. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1451. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1452. #endif
  1453. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1454. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1455. #endif
  1456. #if BUILD_COMPLEX
  1457. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1458. #endif
  1459. #if BUILD_COMPLEX16
  1460. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1461. #endif
  1462. #ifdef EXPRECISION
  1463. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1464. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1465. #endif
  1466. #endif
  1467. #ifdef HASWELL
  1468. #ifdef DEBUG
  1469. fprintf(stderr, "Haswell\n");
  1470. #endif
  1471. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1472. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1473. #endif
  1474. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1475. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1476. #endif
  1477. #if BUILD_COMPLEX
  1478. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1479. #endif
  1480. #if BUILD_COMPLEX16
  1481. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1482. #endif
  1483. #ifdef EXPRECISION
  1484. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1485. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1486. #endif
  1487. #endif
  1488. #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
  1489. #ifdef DEBUG
  1490. fprintf(stderr, "SkylakeX\n");
  1491. #endif
  1492. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1493. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1494. #endif
  1495. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1496. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1497. #endif
  1498. #if BUILD_COMPLEX
  1499. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1500. #endif
  1501. #if BUILD_COMPLEX16
  1502. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1503. #endif
  1504. #ifdef EXPRECISION
  1505. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1506. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1507. #endif
  1508. #endif
  1509. #ifdef OPTERON
  1510. #ifdef DEBUG
  1511. fprintf(stderr, "Opteron\n");
  1512. #endif
  1513. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1514. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  1515. #endif
  1516. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1517. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  1518. #endif
  1519. #if BUILD_COMPLEX
  1520. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  1521. #endif
  1522. #if BUILD_COMPLEX16
  1523. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1524. #endif
  1525. #ifdef EXPRECISION
  1526. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1527. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1528. #endif
  1529. #endif
  1530. #ifdef BARCELONA
  1531. #ifdef DEBUG
  1532. fprintf(stderr, "Barcelona\n");
  1533. #endif
  1534. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1535. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1536. #endif
  1537. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1538. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1539. #endif
  1540. #if BUILD_COMPLEX
  1541. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1542. #endif
  1543. #if BUILD_COMPLEX16
  1544. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1545. #endif
  1546. #ifdef EXPRECISION
  1547. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1548. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1549. #endif
  1550. #endif
  1551. #ifdef BOBCAT
  1552. #ifdef DEBUG
  1553. fprintf(stderr, "Bobcate\n");
  1554. #endif
  1555. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1556. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1557. #endif
  1558. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1559. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1560. #endif
  1561. #if BUILD_COMPLEX
  1562. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1563. #endif
  1564. #if BUILD_COMPLEX16
  1565. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1566. #endif
  1567. #ifdef EXPRECISION
  1568. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1569. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1570. #endif
  1571. #endif
  1572. #ifdef BULLDOZER
  1573. #ifdef DEBUG
  1574. fprintf(stderr, "Bulldozer\n");
  1575. #endif
  1576. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1577. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1578. #endif
  1579. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1580. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1581. #endif
  1582. #if BUILD_COMPLEX
  1583. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1584. #endif
  1585. #if BUILD_COMPLEX16
  1586. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1587. #endif
  1588. #ifdef EXPRECISION
  1589. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1590. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1591. #endif
  1592. #endif
  1593. #ifdef EXCAVATOR
  1594. #ifdef DEBUG
  1595. fprintf(stderr, "Excavator\n");
  1596. #endif
  1597. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1598. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1599. #endif
  1600. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1601. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1602. #endif
  1603. #if BUILD_COMPLEX
  1604. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1605. #endif
  1606. #if BUILD_COMPLEX16
  1607. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1608. #endif
  1609. #ifdef EXPRECISION
  1610. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1611. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1612. #endif
  1613. #endif
  1614. #ifdef PILEDRIVER
  1615. #ifdef DEBUG
  1616. fprintf(stderr, "Piledriver\n");
  1617. #endif
  1618. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1619. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1620. #endif
  1621. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1622. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1623. #endif
  1624. #if BUILD_COMPLEX
  1625. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1626. #endif
  1627. #if BUILD_COMPLEX16
  1628. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1629. #endif
  1630. #ifdef EXPRECISION
  1631. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1632. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1633. #endif
  1634. #endif
  1635. #ifdef STEAMROLLER
  1636. #ifdef DEBUG
  1637. fprintf(stderr, "Steamroller\n");
  1638. #endif
  1639. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1640. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1641. #endif
  1642. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1643. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1644. #endif
  1645. #if BUILD_COMPLEX
  1646. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1647. #endif
  1648. #if BUILD_COMPLEX16
  1649. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1650. #endif
  1651. #ifdef EXPRECISION
  1652. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1653. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1654. #endif
  1655. #endif
  1656. #ifdef ZEN
  1657. #ifdef DEBUG
  1658. fprintf(stderr, "Zen\n");
  1659. #endif
  1660. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1661. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1662. #endif
  1663. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1664. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1665. #endif
  1666. #if BUILD_COMPLEX
  1667. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1668. #endif
  1669. #if BUILD_COMPLEX16
  1670. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1671. #endif
  1672. #ifdef EXPRECISION
  1673. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1674. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1675. #endif
  1676. #endif
  1677. #ifdef NANO
  1678. #ifdef DEBUG
  1679. fprintf(stderr, "NANO\n");
  1680. #endif
  1681. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1682. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1683. #endif
  1684. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1685. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1686. #endif
  1687. #if (BUILD_COMPLEX==1)
  1688. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1689. #endif
  1690. #if (BUILD_COMPLEX16==1)
  1691. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1692. #endif
  1693. #ifdef EXPRECISION
  1694. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1695. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1696. #endif
  1697. #endif
  1698. #ifdef SAPPHIRERAPIDS
  1699. #if (BUILD_BFLOAT16 == 1)
  1700. TABLE_NAME.need_amxtile_permission = 1;
  1701. #endif
  1702. #endif
  1703. #if BUILD_COMPLEX==1
  1704. #ifdef CGEMM3M_DEFAULT_P
  1705. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1706. #else
  1707. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1708. #endif
  1709. #endif
  1710. #if BUILD_COMPLEX16==1
  1711. #ifdef ZGEMM3M_DEFAULT_P
  1712. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1713. #else
  1714. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1715. #endif
  1716. #endif
  1717. #ifdef EXPRECISION
  1718. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1719. #endif
  1720. #if BUILD_SINGLE == 1
  1721. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1722. #endif
  1723. #if BUILD_DOUBLE== 1
  1724. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1725. #endif
  1726. #if BUILD_COMPLEX==1
  1727. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1728. #endif
  1729. #if BUILD_COMPLEX16==1
  1730. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1731. #endif
  1732. #if BUILD_COMPLEX==1
  1733. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1734. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1735. #else
  1736. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1737. #endif
  1738. #endif
  1739. #if BUILD_COMPLEX16==1
  1740. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1741. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1742. #else
  1743. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1744. #endif
  1745. #endif
  1746. #ifdef QUAD_PRECISION
  1747. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1748. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1749. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1750. #endif
  1751. #ifdef DEBUG
  1752. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1753. #endif
  1754. #if BUILD_BFLOAT16==1
  1755. TABLE_NAME.sbgemm_r = (((BUFFER_SIZE -
  1756. ((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA
  1757. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1758. ) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15);
  1759. #endif
  1760. #if BUILD_HFLOAT16==1
  1761. TABLE_NAME.shgemm_r = (((BUFFER_SIZE -
  1762. ((TABLE_NAME.shgemm_p * TABLE_NAME.shgemm_q * 4 + TABLE_NAME.offsetA
  1763. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1764. ) / (TABLE_NAME.shgemm_q * 4) - 15) & ~15);
  1765. #endif
  1766. #if BUILD_SINGLE==1
  1767. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1768. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1769. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1770. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1771. #endif
  1772. #if BUILD_DOUBLE==1
  1773. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1774. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1775. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1776. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1777. #endif
  1778. #ifdef EXPRECISION
  1779. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1780. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1781. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1782. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1783. #endif
  1784. #if BUILD_COMPLEX ==1
  1785. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1786. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1787. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1788. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1789. #endif
  1790. #if BUILD_COMPLEX16 ==1
  1791. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1792. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1793. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1794. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1795. #endif
  1796. #if BUILD_COMPLEX == 1
  1797. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1798. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1799. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1800. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1801. #endif
  1802. #if BUILD_COMPLEX16 == 1
  1803. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1804. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1805. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1806. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1807. #endif
  1808. #ifdef EXPRECISION
  1809. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1810. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1811. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1812. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1813. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1814. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1815. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1816. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1817. #endif
  1818. }
  1819. #endif //RISCV64
  1820. #endif //POWER
  1821. #endif //ZARCH
  1822. #endif //(ARCH_LOONGARCH64)
  1823. #endif //(ARCH_MIPS64)
  1824. #endif //(ARCH_ARM64)