You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 54 kB

6 years ago
6 years ago
6 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* Copyright 2023 The OpenBLAS Project. */
  4. /* All rights reserved. */
  5. /* */
  6. /* Redistribution and use in source and binary forms, with or */
  7. /* without modification, are permitted provided that the following */
  8. /* conditions are met: */
  9. /* */
  10. /* 1. Redistributions of source code must retain the above */
  11. /* copyright notice, this list of conditions and the following */
  12. /* disclaimer. */
  13. /* */
  14. /* 2. Redistributions in binary form must reproduce the above */
  15. /* copyright notice, this list of conditions and the following */
  16. /* disclaimer in the documentation and/or other materials */
  17. /* provided with the distribution. */
  18. /* */
  19. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  20. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  21. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  22. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  23. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  24. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  25. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  26. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  27. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  28. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  29. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  30. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  31. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  32. /* POSSIBILITY OF SUCH DAMAGE. */
  33. /* */
  34. /* The views and conclusions contained in the software and */
  35. /* documentation are those of the authors and should not be */
  36. /* interpreted as representing official policies, either expressed */
  37. /* or implied, of The University of Texas at Austin. */
  38. /*********************************************************************/
  39. #include <stdio.h>
  40. #include <string.h>
  41. #include "common.h"
  42. #ifdef BUILD_KERNEL
  43. #include "kernelTS.h"
  44. #endif
  45. #undef DEBUG
  46. static void init_parameter(void);
  47. gotoblas_t TABLE_NAME = {
  48. DTB_DEFAULT_ENTRIES,
  49. SWITCH_RATIO,
  50. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  51. #ifdef BUILD_BFLOAT16
  52. 0, 0, 0,
  53. SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,
  54. #ifdef SBGEMM_DEFAULT_UNROLL_MN
  55. SBGEMM_DEFAULT_UNROLL_MN,
  56. #else
  57. MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
  58. #endif
  59. SBGEMM_ALIGN_K,
  60. sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  61. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  62. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  63. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
  64. dsdot_kTS,
  65. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  66. sbgemv_nTS, sbgemv_tTS, sger_kTS,
  67. ssymv_LTS, ssymv_UTS,
  68. sbgemm_kernelTS, sbgemm_betaTS,
  69. #if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N
  70. sbgemm_incopyTS, sbgemm_itcopyTS,
  71. #else
  72. sbgemm_oncopyTS, sbgemm_otcopyTS,
  73. #endif
  74. sbgemm_oncopyTS, sbgemm_otcopyTS,
  75. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  76. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  77. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  78. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  79. #else
  80. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  81. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  82. #endif
  83. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  84. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  85. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  86. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  87. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  88. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  89. #else
  90. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  91. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  92. #endif
  93. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  94. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  95. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  96. ssymm_iutcopyTS, ssymm_iltcopyTS,
  97. #else
  98. ssymm_outcopyTS, ssymm_oltcopyTS,
  99. #endif
  100. ssymm_outcopyTS, ssymm_oltcopyTS,
  101. #ifndef NO_LAPACK
  102. sneg_tcopyTS, slaswp_ncopyTS,
  103. #else
  104. NULL,NULL,
  105. #endif
  106. #ifdef SMALL_MATRIX_OPT
  107. sbgemm_small_matrix_permitTS,
  108. sbgemm_small_kernel_nnTS, sbgemm_small_kernel_ntTS, sbgemm_small_kernel_tnTS, sbgemm_small_kernel_ttTS,
  109. sbgemm_small_kernel_b0_nnTS, sbgemm_small_kernel_b0_ntTS, sbgemm_small_kernel_b0_tnTS, sbgemm_small_kernel_b0_ttTS,
  110. #endif
  111. #endif
  112. #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  113. 0, 0, 0,
  114. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  115. #ifdef SGEMM_DEFAULT_UNROLL_MN
  116. SGEMM_DEFAULT_UNROLL_MN,
  117. #else
  118. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  119. #endif
  120. #endif
  121. #ifdef HAVE_EXCLUSIVE_CACHE
  122. 1,
  123. #else
  124. 0,
  125. #endif
  126. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  127. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  128. #endif
  129. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  130. isamax_kTS,
  131. #endif
  132. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  133. isamin_kTS, ismax_kTS, ismin_kTS,
  134. snrm2_kTS, sasum_kTS,
  135. #endif
  136. #if BUILD_SINGLE == 1
  137. ssum_kTS,
  138. #endif
  139. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  140. scopy_kTS, sdot_kTS,
  141. // dsdot_kTS,
  142. srot_kTS, saxpy_kTS,
  143. #endif
  144. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  145. sscal_kTS,
  146. #endif
  147. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  148. sswap_kTS,
  149. sgemv_nTS, sgemv_tTS,
  150. #endif
  151. #if BUILD_SINGLE == 1
  152. sger_kTS,
  153. #endif
  154. #if BUILD_SINGLE == 1
  155. ssymv_LTS, ssymv_UTS,
  156. #endif
  157. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  158. #ifdef ARCH_X86_64
  159. sgemm_directTS,
  160. sgemm_direct_performantTS,
  161. #endif
  162. sgemm_kernelTS, sgemm_betaTS,
  163. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  164. sgemm_incopyTS, sgemm_itcopyTS,
  165. #else
  166. sgemm_oncopyTS, sgemm_otcopyTS,
  167. #endif
  168. sgemm_oncopyTS, sgemm_otcopyTS,
  169. #endif
  170. #if BUILD_SINGLE == 1 || BUILD_DOUBLE == 1 || BUILD_COMPLEX == 1
  171. #ifdef SMALL_MATRIX_OPT
  172. sgemm_small_matrix_permitTS,
  173. sgemm_small_kernel_nnTS, sgemm_small_kernel_ntTS, sgemm_small_kernel_tnTS, sgemm_small_kernel_ttTS,
  174. sgemm_small_kernel_b0_nnTS, sgemm_small_kernel_b0_ntTS, sgemm_small_kernel_b0_tnTS, sgemm_small_kernel_b0_ttTS,
  175. #endif
  176. #endif
  177. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX == 1)
  178. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  179. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  180. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  181. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  182. #else
  183. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  184. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  185. #endif
  186. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  187. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  188. #endif
  189. #if (BUILD_SINGLE==1)
  190. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  191. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  192. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  193. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  194. #else
  195. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  196. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  197. #endif
  198. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  199. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  200. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  201. ssymm_iutcopyTS, ssymm_iltcopyTS,
  202. #else
  203. ssymm_outcopyTS, ssymm_oltcopyTS,
  204. #endif
  205. ssymm_outcopyTS, ssymm_oltcopyTS,
  206. #ifndef NO_LAPACK
  207. sneg_tcopyTS, slaswp_ncopyTS,
  208. #else
  209. NULL,NULL,
  210. #endif
  211. #endif
  212. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  213. 0, 0, 0,
  214. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  215. #ifdef DGEMM_DEFAULT_UNROLL_MN
  216. DGEMM_DEFAULT_UNROLL_MN,
  217. #else
  218. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  219. #endif
  220. #endif
  221. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  222. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  223. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  224. dnrm2_kTS, dasum_kTS,
  225. #endif
  226. #if (BUILD_DOUBLE==1)
  227. dsum_kTS,
  228. #endif
  229. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  230. dcopy_kTS, ddot_kTS,
  231. #endif
  232. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  233. dsdot_kTS,
  234. #endif
  235. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  236. drot_kTS,
  237. daxpy_kTS,
  238. dscal_kTS,
  239. dswap_kTS,
  240. dgemv_nTS, dgemv_tTS,
  241. #endif
  242. #if (BUILD_DOUBLE==1)
  243. dger_kTS,
  244. dsymv_LTS, dsymv_UTS,
  245. #endif
  246. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  247. dgemm_kernelTS, dgemm_betaTS,
  248. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  249. dgemm_incopyTS, dgemm_itcopyTS,
  250. #else
  251. dgemm_oncopyTS, dgemm_otcopyTS,
  252. #endif
  253. dgemm_oncopyTS, dgemm_otcopyTS,
  254. #endif
  255. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  256. #ifdef SMALL_MATRIX_OPT
  257. dgemm_small_matrix_permitTS,
  258. dgemm_small_kernel_nnTS, dgemm_small_kernel_ntTS, dgemm_small_kernel_tnTS, dgemm_small_kernel_ttTS,
  259. dgemm_small_kernel_b0_nnTS, dgemm_small_kernel_b0_ntTS, dgemm_small_kernel_b0_tnTS, dgemm_small_kernel_b0_ttTS,
  260. #endif
  261. #endif
  262. #if (BUILD_DOUBLE==1)
  263. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  264. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  265. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  266. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  267. #else
  268. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  269. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  270. #endif
  271. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  272. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  273. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  274. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  275. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  276. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  277. #else
  278. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  279. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  280. #endif
  281. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  282. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  283. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  284. dsymm_iutcopyTS, dsymm_iltcopyTS,
  285. #else
  286. dsymm_outcopyTS, dsymm_oltcopyTS,
  287. #endif
  288. dsymm_outcopyTS, dsymm_oltcopyTS,
  289. #ifndef NO_LAPACK
  290. dneg_tcopyTS, dlaswp_ncopyTS,
  291. #else
  292. NULL, NULL,
  293. #endif
  294. #endif
  295. #ifdef EXPRECISION
  296. 0, 0, 0,
  297. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  298. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  299. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  300. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  301. qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  302. qgemv_nTS, qgemv_tTS, qger_kTS,
  303. qsymv_LTS, qsymv_UTS,
  304. qgemm_kernelTS, qgemm_betaTS,
  305. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  306. qgemm_incopyTS, qgemm_itcopyTS,
  307. #else
  308. qgemm_oncopyTS, qgemm_otcopyTS,
  309. #endif
  310. qgemm_oncopyTS, qgemm_otcopyTS,
  311. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  312. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  313. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  314. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  315. #else
  316. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  317. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  318. #endif
  319. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  320. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  321. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  322. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  323. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  324. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  325. #else
  326. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  327. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  328. #endif
  329. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  330. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  331. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  332. qsymm_iutcopyTS, qsymm_iltcopyTS,
  333. #else
  334. qsymm_outcopyTS, qsymm_oltcopyTS,
  335. #endif
  336. qsymm_outcopyTS, qsymm_oltcopyTS,
  337. #ifndef NO_LAPACK
  338. qneg_tcopyTS, qlaswp_ncopyTS,
  339. #else
  340. NULL, NULL,
  341. #endif
  342. #endif
  343. #if (BUILD_COMPLEX)
  344. 0, 0, 0,
  345. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  346. #ifdef CGEMM_DEFAULT_UNROLL_MN
  347. CGEMM_DEFAULT_UNROLL_MN,
  348. #else
  349. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  350. #endif
  351. #if (BUILD_COMPLEX)
  352. camax_kTS, camin_kTS,
  353. #endif
  354. #if (BUILD_COMPLEX)
  355. icamax_kTS,
  356. #endif
  357. #if (BUILD_COMPLEX)
  358. icamin_kTS,
  359. cnrm2_kTS, casum_kTS, csum_kTS,
  360. #endif
  361. #if (BUILD_COMPLEX)
  362. ccopy_kTS, cdotu_kTS, cdotc_kTS,
  363. #endif
  364. #if (BUILD_COMPLEX)
  365. csrot_kTS,
  366. #endif
  367. #if (BUILD_COMPLEX)
  368. caxpy_kTS,
  369. caxpyc_kTS,
  370. cscal_kTS,
  371. cswap_kTS,
  372. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  373. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  374. #endif
  375. #if (BUILD_COMPLEX)
  376. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  377. csymv_LTS, csymv_UTS,
  378. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  379. #endif
  380. #if (BUILD_COMPLEX)
  381. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  382. cgemm_betaTS,
  383. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  384. cgemm_incopyTS, cgemm_itcopyTS,
  385. #else
  386. cgemm_oncopyTS, cgemm_otcopyTS,
  387. #endif
  388. cgemm_oncopyTS, cgemm_otcopyTS,
  389. #ifdef SMALL_MATRIX_OPT
  390. cgemm_small_matrix_permitTS,
  391. cgemm_small_kernel_nnTS, cgemm_small_kernel_ntTS, cgemm_small_kernel_nrTS, cgemm_small_kernel_ncTS,
  392. cgemm_small_kernel_tnTS, cgemm_small_kernel_ttTS, cgemm_small_kernel_trTS, cgemm_small_kernel_tcTS,
  393. cgemm_small_kernel_rnTS, cgemm_small_kernel_rtTS, cgemm_small_kernel_rrTS, cgemm_small_kernel_rcTS,
  394. cgemm_small_kernel_cnTS, cgemm_small_kernel_ctTS, cgemm_small_kernel_crTS, cgemm_small_kernel_ccTS,
  395. cgemm_small_kernel_b0_nnTS, cgemm_small_kernel_b0_ntTS, cgemm_small_kernel_b0_nrTS, cgemm_small_kernel_b0_ncTS,
  396. cgemm_small_kernel_b0_tnTS, cgemm_small_kernel_b0_ttTS, cgemm_small_kernel_b0_trTS, cgemm_small_kernel_b0_tcTS,
  397. cgemm_small_kernel_b0_rnTS, cgemm_small_kernel_b0_rtTS, cgemm_small_kernel_b0_rrTS, cgemm_small_kernel_b0_rcTS,
  398. cgemm_small_kernel_b0_cnTS, cgemm_small_kernel_b0_ctTS, cgemm_small_kernel_b0_crTS, cgemm_small_kernel_b0_ccTS,
  399. #endif
  400. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  401. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  402. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  403. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  404. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  405. #else
  406. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  407. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  408. #endif
  409. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  410. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  411. #endif
  412. #endif
  413. #if (BUILD_COMPLEX)
  414. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  415. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  416. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  417. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  418. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  419. #else
  420. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  421. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  422. #endif
  423. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  424. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  425. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  426. csymm_iutcopyTS, csymm_iltcopyTS,
  427. #else
  428. csymm_outcopyTS, csymm_oltcopyTS,
  429. #endif
  430. csymm_outcopyTS, csymm_oltcopyTS,
  431. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  432. chemm_iutcopyTS, chemm_iltcopyTS,
  433. #else
  434. chemm_outcopyTS, chemm_oltcopyTS,
  435. #endif
  436. chemm_outcopyTS, chemm_oltcopyTS,
  437. 0, 0, 0,
  438. #if (USE_GEMM3M)
  439. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  440. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  441. #else
  442. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  443. #endif
  444. cgemm3m_kernelTS,
  445. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  446. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  447. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  448. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  449. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  450. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  451. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  452. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  453. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  454. csymm3m_oucopybTS, csymm3m_olcopybTS,
  455. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  456. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  457. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  458. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  459. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  460. chemm3m_oucopybTS, chemm3m_olcopybTS,
  461. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  462. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  463. #else
  464. 0, 0, 0,
  465. NULL,
  466. NULL, NULL,
  467. NULL, NULL,
  468. NULL, NULL,
  469. NULL, NULL,
  470. NULL, NULL,
  471. NULL, NULL,
  472. NULL, NULL,
  473. NULL, NULL,
  474. NULL, NULL,
  475. NULL, NULL,
  476. NULL, NULL,
  477. NULL, NULL,
  478. NULL, NULL,
  479. NULL, NULL,
  480. NULL, NULL,
  481. NULL, NULL,
  482. NULL, NULL,
  483. NULL, NULL,
  484. #endif
  485. #endif
  486. #if (BUILD_COMPLEX)
  487. #ifndef NO_LAPACK
  488. cneg_tcopyTS,
  489. claswp_ncopyTS,
  490. #else
  491. NULL, NULL,
  492. #endif
  493. #endif
  494. #if BUILD_COMPLEX16 == 1
  495. 0, 0, 0,
  496. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  497. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  498. ZGEMM_DEFAULT_UNROLL_MN,
  499. #else
  500. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  501. #endif
  502. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  503. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  504. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  505. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  506. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  507. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  508. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  509. zsymv_LTS, zsymv_UTS,
  510. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  511. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  512. zgemm_betaTS,
  513. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  514. zgemm_incopyTS, zgemm_itcopyTS,
  515. #else
  516. zgemm_oncopyTS, zgemm_otcopyTS,
  517. #endif
  518. zgemm_oncopyTS, zgemm_otcopyTS,
  519. #ifdef SMALL_MATRIX_OPT
  520. zgemm_small_matrix_permitTS,
  521. zgemm_small_kernel_nnTS, zgemm_small_kernel_ntTS, zgemm_small_kernel_nrTS, zgemm_small_kernel_ncTS,
  522. zgemm_small_kernel_tnTS, zgemm_small_kernel_ttTS, zgemm_small_kernel_trTS, zgemm_small_kernel_tcTS,
  523. zgemm_small_kernel_rnTS, zgemm_small_kernel_rtTS, zgemm_small_kernel_rrTS, zgemm_small_kernel_rcTS,
  524. zgemm_small_kernel_cnTS, zgemm_small_kernel_ctTS, zgemm_small_kernel_crTS, zgemm_small_kernel_ccTS,
  525. zgemm_small_kernel_b0_nnTS, zgemm_small_kernel_b0_ntTS, zgemm_small_kernel_b0_nrTS, zgemm_small_kernel_b0_ncTS,
  526. zgemm_small_kernel_b0_tnTS, zgemm_small_kernel_b0_ttTS, zgemm_small_kernel_b0_trTS, zgemm_small_kernel_b0_tcTS,
  527. zgemm_small_kernel_b0_rnTS, zgemm_small_kernel_b0_rtTS, zgemm_small_kernel_b0_rrTS, zgemm_small_kernel_b0_rcTS,
  528. zgemm_small_kernel_b0_cnTS, zgemm_small_kernel_b0_ctTS, zgemm_small_kernel_b0_crTS, zgemm_small_kernel_b0_ccTS,
  529. #endif
  530. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  531. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  532. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  533. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  534. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  535. #else
  536. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  537. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  538. #endif
  539. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  540. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  541. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  542. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  543. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  544. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  545. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  546. #else
  547. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  548. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  549. #endif
  550. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  551. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  552. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  553. zsymm_iutcopyTS, zsymm_iltcopyTS,
  554. #else
  555. zsymm_outcopyTS, zsymm_oltcopyTS,
  556. #endif
  557. zsymm_outcopyTS, zsymm_oltcopyTS,
  558. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  559. zhemm_iutcopyTS, zhemm_iltcopyTS,
  560. #else
  561. zhemm_outcopyTS, zhemm_oltcopyTS,
  562. #endif
  563. zhemm_outcopyTS, zhemm_oltcopyTS,
  564. 0, 0, 0,
  565. #if (USE_GEMM3M)
  566. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  567. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  568. #else
  569. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  570. #endif
  571. zgemm3m_kernelTS,
  572. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  573. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  574. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  575. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  576. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  577. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  578. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  579. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  580. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  581. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  582. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  583. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  584. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  585. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  586. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  587. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  588. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  589. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  590. #else
  591. 0, 0, 0,
  592. NULL,
  593. NULL, NULL,
  594. NULL, NULL,
  595. NULL, NULL,
  596. NULL, NULL,
  597. NULL, NULL,
  598. NULL, NULL,
  599. NULL, NULL,
  600. NULL, NULL,
  601. NULL, NULL,
  602. NULL, NULL,
  603. NULL, NULL,
  604. NULL, NULL,
  605. NULL, NULL,
  606. NULL, NULL,
  607. NULL, NULL,
  608. NULL, NULL,
  609. NULL, NULL,
  610. NULL, NULL,
  611. #endif
  612. #ifndef NO_LAPACK
  613. zneg_tcopyTS, zlaswp_ncopyTS,
  614. #else
  615. NULL, NULL,
  616. #endif
  617. #endif
  618. #ifdef EXPRECISION
  619. 0, 0, 0,
  620. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  621. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  622. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  623. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  624. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  625. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  626. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  627. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  628. xsymv_LTS, xsymv_UTS,
  629. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  630. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  631. xgemm_betaTS,
  632. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  633. xgemm_incopyTS, xgemm_itcopyTS,
  634. #else
  635. xgemm_oncopyTS, xgemm_otcopyTS,
  636. #endif
  637. xgemm_oncopyTS, xgemm_otcopyTS,
  638. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  639. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  640. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  641. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  642. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  643. #else
  644. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  645. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  646. #endif
  647. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  648. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  649. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  650. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  651. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  652. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  653. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  654. #else
  655. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  656. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  657. #endif
  658. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  659. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  660. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  661. xsymm_iutcopyTS, xsymm_iltcopyTS,
  662. #else
  663. xsymm_outcopyTS, xsymm_oltcopyTS,
  664. #endif
  665. xsymm_outcopyTS, xsymm_oltcopyTS,
  666. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  667. xhemm_iutcopyTS, xhemm_iltcopyTS,
  668. #else
  669. xhemm_outcopyTS, xhemm_oltcopyTS,
  670. #endif
  671. xhemm_outcopyTS, xhemm_oltcopyTS,
  672. 0, 0, 0,
  673. #if (USE_GEMM3M)
  674. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  675. xgemm3m_kernelTS,
  676. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  677. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  678. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  679. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  680. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  681. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  682. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  683. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  684. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  685. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  686. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  687. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  688. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  689. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  690. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  691. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  692. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  693. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  694. #else
  695. 0, 0, 0,
  696. NULL,
  697. NULL, NULL,
  698. NULL, NULL,
  699. NULL, NULL,
  700. NULL, NULL,
  701. NULL, NULL,
  702. NULL, NULL,
  703. NULL, NULL,
  704. NULL, NULL,
  705. NULL, NULL,
  706. NULL, NULL,
  707. NULL, NULL,
  708. NULL, NULL,
  709. NULL, NULL,
  710. NULL, NULL,
  711. NULL, NULL,
  712. NULL, NULL,
  713. NULL, NULL,
  714. NULL, NULL,
  715. #endif
  716. #ifndef NO_LAPACK
  717. xneg_tcopyTS, xlaswp_ncopyTS,
  718. #else
  719. NULL, NULL,
  720. #endif
  721. #endif
  722. init_parameter,
  723. SNUMOPT, DNUMOPT, QNUMOPT,
  724. #if BUILD_SINGLE == 1
  725. saxpby_kTS,
  726. #endif
  727. #if BUILD_DOUBLE == 1
  728. daxpby_kTS,
  729. #endif
  730. #if BUILD_COMPLEX == 1
  731. caxpby_kTS,
  732. #endif
  733. #if BUILD_COMPLEX16== 1
  734. zaxpby_kTS,
  735. #endif
  736. #if BUILD_SINGLE == 1
  737. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  738. #endif
  739. #if BUILD_DOUBLE== 1
  740. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  741. #endif
  742. #if BUILD_COMPLEX == 1
  743. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  744. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  745. #endif
  746. #if BUILD_COMPLEX16 == 1
  747. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  748. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  749. #endif
  750. #if BUILD_SINGLE == 1
  751. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  752. #endif
  753. #if BUILD_DOUBLE== 1
  754. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  755. #endif
  756. #if BUILD_COMPLEX== 1
  757. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  758. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  759. #endif
  760. #if BUILD_COMPLEX16==1
  761. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  762. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  763. #endif
  764. #if BUILD_SINGLE == 1
  765. sgeadd_kTS,
  766. #endif
  767. #if BUILD_DOUBLE==1
  768. dgeadd_kTS,
  769. #endif
  770. #if BUILD_COMPLEX==1
  771. cgeadd_kTS,
  772. #endif
  773. #if BUILD_COMPLEX16==1
  774. zgeadd_kTS,
  775. #endif
  776. };
  777. #if (ARCH_ARM64)
  778. static void init_parameter(void) {
  779. #if (BUILD_BFLOAT16)
  780. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  781. #endif
  782. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  783. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  784. #endif
  785. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  786. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  787. #endif
  788. #if BUILD_COMPLEX==1
  789. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  790. #endif
  791. #if BUILD_COMPLEX16==1
  792. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  793. #endif
  794. #if (BUILD_BFLOAT16)
  795. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  796. #endif
  797. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  798. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  799. #endif
  800. #if BUILD_DOUBLE== 1 || (BUILD_COMPLEX16==1)
  801. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  802. #endif
  803. #if BUILD_COMPLEX== 1
  804. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  805. #endif
  806. #if BUILD_COMPLEX16==1
  807. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  808. #endif
  809. #if (BUILD_BFLOAT16)
  810. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  811. #endif
  812. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  813. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  814. #endif
  815. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  816. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  817. #endif
  818. #if BUILD_COMPLEX==1
  819. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  820. #endif
  821. #if BUILD_COMPLEX16==1
  822. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  823. #endif
  824. #ifdef EXPRECISION
  825. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  826. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  827. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  828. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  829. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  830. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  831. #endif
  832. #if (USE_GEMM3M)
  833. #ifdef CGEMM3M_DEFAULT_P
  834. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  835. #else
  836. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  837. #endif
  838. #ifdef ZGEMM3M_DEFAULT_P
  839. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  840. #else
  841. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  842. #endif
  843. #ifdef CGEMM3M_DEFAULT_Q
  844. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  845. #else
  846. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  847. #endif
  848. #ifdef ZGEMM3M_DEFAULT_Q
  849. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  850. #else
  851. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  852. #endif
  853. #ifdef CGEMM3M_DEFAULT_R
  854. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  855. #else
  856. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  857. #endif
  858. #ifdef ZGEMM3M_DEFAULT_R
  859. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  860. #else
  861. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  862. #endif
  863. #ifdef EXPRECISION
  864. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  865. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  866. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  867. #endif
  868. #endif
  869. }
  870. #else // (ARCH_ARM64)
  871. #if defined(ARCH_MIPS64)
  872. static void init_parameter(void) {
  873. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  874. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  875. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  876. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  877. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  878. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  879. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  880. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  881. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  882. TABLE_NAME.dgemm_r = 640;
  883. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  884. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  885. #ifdef EXPRECISION
  886. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  887. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  888. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  889. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  890. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  891. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  892. #endif
  893. #if defined(USE_GEMM3M)
  894. #ifdef CGEMM3M_DEFAULT_P
  895. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  896. #else
  897. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  898. #endif
  899. #ifdef ZGEMM3M_DEFAULT_P
  900. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  901. #else
  902. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  903. #endif
  904. #ifdef CGEMM3M_DEFAULT_Q
  905. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  906. #else
  907. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  908. #endif
  909. #ifdef ZGEMM3M_DEFAULT_Q
  910. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  911. #else
  912. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  913. #endif
  914. #ifdef CGEMM3M_DEFAULT_R
  915. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  916. #else
  917. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  918. #endif
  919. #ifdef ZGEMM3M_DEFAULT_R
  920. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  921. #else
  922. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  923. #endif
  924. #ifdef EXPRECISION
  925. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  926. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  927. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  928. #endif
  929. #endif
  930. }
  931. #else // (ARCH_MIPS64)
  932. #if (ARCH_LOONGARCH64)
  933. static void init_parameter(void) {
  934. #ifdef BUILD_BFLOAT16
  935. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  936. #endif
  937. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  938. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  939. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  940. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  941. #ifdef BUILD_BFLOAT16
  942. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  943. #endif
  944. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  945. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  946. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  947. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  948. #ifdef BUILD_BFLOAT16
  949. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  950. #endif
  951. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  952. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  953. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  954. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  955. }
  956. #else // (ARCH_LOONGARCH64)
  957. #if (ARCH_POWER)
  958. static void init_parameter(void) {
  959. #ifdef BUILD_BFLOAT16
  960. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  961. #endif
  962. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  963. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  964. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  965. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  966. #ifdef BUILD_BFLOAT16
  967. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  968. #endif
  969. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  970. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  971. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  972. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  973. #ifdef BUILD_BFLOAT16
  974. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  975. #endif
  976. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  977. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  978. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  979. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  980. }
  981. #else //POWER
  982. #if (ARCH_ZARCH)
  983. static void init_parameter(void) {
  984. #ifdef BUILD_BFLOAT16
  985. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  986. #endif
  987. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  988. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  989. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  990. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  991. #ifdef BUILD_BFLOAT16
  992. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  993. #endif
  994. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  995. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  996. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  997. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  998. #ifdef BUILD_BFLOAT16
  999. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1000. #endif
  1001. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1002. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1003. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1004. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1005. }
  1006. #else //ZARCH
  1007. #ifdef ARCH_X86
  1008. static int get_l2_size_old(void){
  1009. int i, eax, ebx, ecx, edx, cpuid_level;
  1010. int info[15];
  1011. cpuid(2, &eax, &ebx, &ecx, &edx);
  1012. info[ 0] = BITMASK(eax, 8, 0xff);
  1013. info[ 1] = BITMASK(eax, 16, 0xff);
  1014. info[ 2] = BITMASK(eax, 24, 0xff);
  1015. info[ 3] = BITMASK(ebx, 0, 0xff);
  1016. info[ 4] = BITMASK(ebx, 8, 0xff);
  1017. info[ 5] = BITMASK(ebx, 16, 0xff);
  1018. info[ 6] = BITMASK(ebx, 24, 0xff);
  1019. info[ 7] = BITMASK(ecx, 0, 0xff);
  1020. info[ 8] = BITMASK(ecx, 8, 0xff);
  1021. info[ 9] = BITMASK(ecx, 16, 0xff);
  1022. info[10] = BITMASK(ecx, 24, 0xff);
  1023. info[11] = BITMASK(edx, 0, 0xff);
  1024. info[12] = BITMASK(edx, 8, 0xff);
  1025. info[13] = BITMASK(edx, 16, 0xff);
  1026. info[14] = BITMASK(edx, 24, 0xff);
  1027. for (i = 0; i < 15; i++){
  1028. switch (info[i]){
  1029. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  1030. case 0x1a :
  1031. return 96;
  1032. case 0x39 :
  1033. case 0x3b :
  1034. case 0x41 :
  1035. case 0x79 :
  1036. case 0x81 :
  1037. return 128;
  1038. case 0x3a :
  1039. return 192;
  1040. case 0x21 :
  1041. case 0x3c :
  1042. case 0x42 :
  1043. case 0x7a :
  1044. case 0x7e :
  1045. case 0x82 :
  1046. return 256;
  1047. case 0x3d :
  1048. return 384;
  1049. case 0x3e :
  1050. case 0x43 :
  1051. case 0x7b :
  1052. case 0x7f :
  1053. case 0x83 :
  1054. case 0x86 :
  1055. return 512;
  1056. case 0x44 :
  1057. case 0x78 :
  1058. case 0x7c :
  1059. case 0x84 :
  1060. case 0x87 :
  1061. return 1024;
  1062. case 0x45 :
  1063. case 0x7d :
  1064. case 0x85 :
  1065. return 2048;
  1066. case 0x48 :
  1067. return 3184;
  1068. case 0x49 :
  1069. return 4096;
  1070. case 0x4e :
  1071. return 6144;
  1072. }
  1073. }
  1074. // return 0;
  1075. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1076. return 256;
  1077. }
  1078. #endif
  1079. static __inline__ int get_l2_size(void){
  1080. int eax, ebx, ecx, edx, l2;
  1081. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1082. l2 = BITMASK(ecx, 16, 0xffff);
  1083. #ifndef ARCH_X86
  1084. if (l2 <= 0) {
  1085. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1086. return 256;
  1087. }
  1088. return l2;
  1089. #else
  1090. if (l2 > 0) return l2;
  1091. return get_l2_size_old();
  1092. #endif
  1093. }
  1094. static __inline__ int get_l3_size(void){
  1095. int eax, ebx, ecx, edx;
  1096. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1097. return BITMASK(edx, 18, 0x3fff) * 512;
  1098. }
  1099. static void init_parameter(void) {
  1100. int l2 = get_l2_size();
  1101. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  1102. /* where the GEMM unrolling parameters do not depend on l2 */
  1103. #ifdef BUILD_BFLOAT16
  1104. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1105. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1106. #endif
  1107. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1108. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1109. #endif
  1110. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1111. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1112. #endif
  1113. #if BUILD_COMPLEX == 1
  1114. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1115. #endif
  1116. #if BUILD_COMPLEX16==1
  1117. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1118. #endif
  1119. #if BUILD_COMPLEX == 1
  1120. #ifdef CGEMM3M_DEFAULT_Q
  1121. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  1122. #else
  1123. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  1124. #endif
  1125. #endif
  1126. #if BUILD_COMPLEX16 == 1
  1127. #ifdef ZGEMM3M_DEFAULT_Q
  1128. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  1129. #else
  1130. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  1131. #endif
  1132. #endif
  1133. #ifdef EXPRECISION
  1134. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  1135. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  1136. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  1137. #endif
  1138. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  1139. #ifdef DEBUG
  1140. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  1141. #endif
  1142. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1143. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  1144. #endif
  1145. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1146. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  1147. #endif
  1148. #if BUILD_COMPLEX==1
  1149. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  1150. #endif
  1151. #if BUILD_COMPLEX16==1
  1152. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  1153. #endif
  1154. #ifdef EXPRECISION
  1155. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  1156. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  1157. #endif
  1158. #endif
  1159. #ifdef CORE_NORTHWOOD
  1160. #ifdef DEBUG
  1161. fprintf(stderr, "Northwood\n");
  1162. #endif
  1163. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1164. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  1165. #endif
  1166. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1167. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  1168. #endif
  1169. #if BUILD_COMPLEX==1
  1170. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  1171. #endif
  1172. #if BUILD_COMPLEX16==1
  1173. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  1174. #endif
  1175. #ifdef EXPRECISION
  1176. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  1177. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  1178. #endif
  1179. #endif
  1180. #ifdef ATOM
  1181. #ifdef DEBUG
  1182. fprintf(stderr, "Atom\n");
  1183. #endif
  1184. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1185. TABLE_NAME.sgemm_p = 256;
  1186. #endif
  1187. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1188. TABLE_NAME.dgemm_p = 128;
  1189. #endif
  1190. #if BUILD_COMPLEX==1
  1191. TABLE_NAME.cgemm_p = 128;
  1192. #endif
  1193. #if BUILD_COMPLEX16==1
  1194. TABLE_NAME.zgemm_p = 64;
  1195. #endif
  1196. #ifdef EXPRECISION
  1197. TABLE_NAME.qgemm_p = 64;
  1198. TABLE_NAME.xgemm_p = 32;
  1199. #endif
  1200. #endif
  1201. #ifdef CORE_PRESCOTT
  1202. #ifdef DEBUG
  1203. fprintf(stderr, "Prescott\n");
  1204. #endif
  1205. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1206. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  1207. #endif
  1208. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1209. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  1210. #endif
  1211. #if BUILD_COMPLEX==1
  1212. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  1213. #endif
  1214. #if BUILD_COMPLEX16 == 1
  1215. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  1216. #endif
  1217. #ifdef EXPRECISION
  1218. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  1219. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  1220. #endif
  1221. #endif
  1222. #ifdef CORE2
  1223. #ifdef DEBUG
  1224. fprintf(stderr, "Core2\n");
  1225. #endif
  1226. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1227. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  1228. #endif
  1229. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  1230. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  1231. #endif
  1232. #if BUILD_COMPLEX==1
  1233. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  1234. #endif
  1235. #if BUILD_COMPLEX16==1
  1236. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  1237. #endif
  1238. #ifdef EXPRECISION
  1239. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  1240. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  1241. #endif
  1242. #endif
  1243. #ifdef PENRYN
  1244. #ifdef DEBUG
  1245. fprintf(stderr, "Penryn\n");
  1246. #endif
  1247. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1248. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1249. #endif
  1250. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1251. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1252. #endif
  1253. #if BUILD_COMPLEX==1
  1254. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1255. #endif
  1256. #if BUILD_COMPLEX16==1
  1257. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1258. #endif
  1259. #ifdef EXPRECISION
  1260. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1261. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1262. #endif
  1263. #endif
  1264. #ifdef DUNNINGTON
  1265. #ifdef DEBUG
  1266. fprintf(stderr, "Dunnington\n");
  1267. #endif
  1268. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1269. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1270. #endif
  1271. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1272. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1273. #endif
  1274. #if BUILD_COMPLEX==1
  1275. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1276. #endif
  1277. #if BUILD_COMPLEX16==1
  1278. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1279. #endif
  1280. #ifdef EXPRECISION
  1281. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1282. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1283. #endif
  1284. #endif
  1285. #ifdef NEHALEM
  1286. #ifdef DEBUG
  1287. fprintf(stderr, "Nehalem\n");
  1288. #endif
  1289. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1290. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1291. #endif
  1292. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1293. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1294. #endif
  1295. #if BUILD_COMPLEX
  1296. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1297. #endif
  1298. #if BUILD_COMPLEX16
  1299. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1300. #endif
  1301. #ifdef EXPRECISION
  1302. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1303. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1304. #endif
  1305. #endif
  1306. #ifdef SANDYBRIDGE
  1307. #ifdef DEBUG
  1308. fprintf(stderr, "Sandybridge\n");
  1309. #endif
  1310. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1311. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1312. #endif
  1313. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1314. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1315. #endif
  1316. #if BUILD_COMPLEX
  1317. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1318. #endif
  1319. #if BUILD_COMPLEX16
  1320. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1321. #endif
  1322. #ifdef EXPRECISION
  1323. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1324. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1325. #endif
  1326. #endif
  1327. #ifdef HASWELL
  1328. #ifdef DEBUG
  1329. fprintf(stderr, "Haswell\n");
  1330. #endif
  1331. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1332. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1333. #endif
  1334. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1335. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1336. #endif
  1337. #if BUILD_COMPLEX
  1338. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1339. #endif
  1340. #if BUILD_COMPLEX16
  1341. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1342. #endif
  1343. #ifdef EXPRECISION
  1344. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1345. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1346. #endif
  1347. #endif
  1348. #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
  1349. #ifdef DEBUG
  1350. fprintf(stderr, "SkylakeX\n");
  1351. #endif
  1352. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1353. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1354. #endif
  1355. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1356. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1357. #endif
  1358. #if BUILD_COMPLEX
  1359. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1360. #endif
  1361. #if BUILD_COMPLEX16
  1362. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1363. #endif
  1364. #ifdef EXPRECISION
  1365. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1366. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1367. #endif
  1368. #endif
  1369. #ifdef OPTERON
  1370. #ifdef DEBUG
  1371. fprintf(stderr, "Opteron\n");
  1372. #endif
  1373. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1374. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  1375. #endif
  1376. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1377. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  1378. #endif
  1379. #if BUILD_COMPLEX
  1380. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  1381. #endif
  1382. #if BUILD_COMPLEX16
  1383. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1384. #endif
  1385. #ifdef EXPRECISION
  1386. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1387. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1388. #endif
  1389. #endif
  1390. #ifdef BARCELONA
  1391. #ifdef DEBUG
  1392. fprintf(stderr, "Barcelona\n");
  1393. #endif
  1394. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1395. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1396. #endif
  1397. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1398. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1399. #endif
  1400. #if BUILD_COMPLEX
  1401. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1402. #endif
  1403. #if BUILD_COMPLEX16
  1404. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1405. #endif
  1406. #ifdef EXPRECISION
  1407. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1408. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1409. #endif
  1410. #endif
  1411. #ifdef BOBCAT
  1412. #ifdef DEBUG
  1413. fprintf(stderr, "Bobcate\n");
  1414. #endif
  1415. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1416. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1417. #endif
  1418. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1419. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1420. #endif
  1421. #if BUILD_COMPLEX
  1422. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1423. #endif
  1424. #if BUILD_COMPLEX16
  1425. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1426. #endif
  1427. #ifdef EXPRECISION
  1428. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1429. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1430. #endif
  1431. #endif
  1432. #ifdef BULLDOZER
  1433. #ifdef DEBUG
  1434. fprintf(stderr, "Bulldozer\n");
  1435. #endif
  1436. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1437. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1438. #endif
  1439. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1440. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1441. #endif
  1442. #if BUILD_COMPLEX
  1443. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1444. #endif
  1445. #if BUILD_COMPLEX16
  1446. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1447. #endif
  1448. #ifdef EXPRECISION
  1449. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1450. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1451. #endif
  1452. #endif
  1453. #ifdef EXCAVATOR
  1454. #ifdef DEBUG
  1455. fprintf(stderr, "Excavator\n");
  1456. #endif
  1457. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1458. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1459. #endif
  1460. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1461. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1462. #endif
  1463. #if BUILD_COMPLEX
  1464. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1465. #endif
  1466. #if BUILD_COMPLEX16
  1467. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1468. #endif
  1469. #ifdef EXPRECISION
  1470. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1471. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1472. #endif
  1473. #endif
  1474. #ifdef PILEDRIVER
  1475. #ifdef DEBUG
  1476. fprintf(stderr, "Piledriver\n");
  1477. #endif
  1478. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1479. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1480. #endif
  1481. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1482. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1483. #endif
  1484. #if BUILD_COMPLEX
  1485. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1486. #endif
  1487. #if BUILD_COMPLEX16
  1488. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1489. #endif
  1490. #ifdef EXPRECISION
  1491. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1492. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1493. #endif
  1494. #endif
  1495. #ifdef STEAMROLLER
  1496. #ifdef DEBUG
  1497. fprintf(stderr, "Steamroller\n");
  1498. #endif
  1499. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1500. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1501. #endif
  1502. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1503. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1504. #endif
  1505. #if BUILD_COMPLEX
  1506. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1507. #endif
  1508. #if BUILD_COMPLEX16
  1509. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1510. #endif
  1511. #ifdef EXPRECISION
  1512. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1513. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1514. #endif
  1515. #endif
  1516. #ifdef ZEN
  1517. #ifdef DEBUG
  1518. fprintf(stderr, "Zen\n");
  1519. #endif
  1520. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1521. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1522. #endif
  1523. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1524. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1525. #endif
  1526. #if BUILD_COMPLEX
  1527. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1528. #endif
  1529. #if BUILD_COMPLEX16
  1530. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1531. #endif
  1532. #ifdef EXPRECISION
  1533. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1534. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1535. #endif
  1536. #endif
  1537. #ifdef NANO
  1538. #ifdef DEBUG
  1539. fprintf(stderr, "NANO\n");
  1540. #endif
  1541. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1542. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1543. #endif
  1544. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1545. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1546. #endif
  1547. #if (BUILD_COMPLEX==1)
  1548. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1549. #endif
  1550. #if (BUILD_COMPLEX16==1)
  1551. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1552. #endif
  1553. #ifdef EXPRECISION
  1554. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1555. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1556. #endif
  1557. #endif
  1558. #if BUILD_COMPLEX==1
  1559. #ifdef CGEMM3M_DEFAULT_P
  1560. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1561. #else
  1562. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1563. #endif
  1564. #endif
  1565. #if BUILD_COMPLEX16==1
  1566. #ifdef ZGEMM3M_DEFAULT_P
  1567. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1568. #else
  1569. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1570. #endif
  1571. #endif
  1572. #ifdef EXPRECISION
  1573. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1574. #endif
  1575. #if BUILD_SINGLE == 1
  1576. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1577. #endif
  1578. #if BUILD_DOUBLE== 1
  1579. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1580. #endif
  1581. #if BUILD_COMPLEX==1
  1582. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1583. #endif
  1584. #if BUILD_COMPLEX16==1
  1585. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1586. #endif
  1587. #if BUILD_COMPLEX==1
  1588. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1589. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1590. #else
  1591. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1592. #endif
  1593. #endif
  1594. #if BUILD_COMPLEX16==1
  1595. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1596. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1597. #else
  1598. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1599. #endif
  1600. #endif
  1601. #ifdef QUAD_PRECISION
  1602. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1603. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1604. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1605. #endif
  1606. #ifdef DEBUG
  1607. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1608. #endif
  1609. #if BUILD_BFLOAT16==1
  1610. TABLE_NAME.sbgemm_r = (((BUFFER_SIZE -
  1611. ((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA
  1612. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1613. ) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15);
  1614. #endif
  1615. #if BUILD_SINGLE==1
  1616. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1617. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1618. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1619. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1620. #endif
  1621. #if BUILD_DOUBLE==1
  1622. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1623. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1624. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1625. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1626. #endif
  1627. #ifdef EXPRECISION
  1628. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1629. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1630. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1631. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1632. #endif
  1633. #if BUILD_COMPLEX ==1
  1634. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1635. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1636. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1637. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1638. #endif
  1639. #if BUILD_COMPLEX16 ==1
  1640. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1641. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1642. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1643. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1644. #endif
  1645. #if BUILD_COMPLEX == 1
  1646. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1647. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1648. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1649. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1650. #endif
  1651. #if BUILD_COMPLEX16 == 1
  1652. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1653. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1654. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1655. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1656. #endif
  1657. #ifdef EXPRECISION
  1658. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1659. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1660. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1661. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1662. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1663. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1664. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1665. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1666. #endif
  1667. }
  1668. #endif //POWER
  1669. #endif //ZARCH
  1670. #endif //(ARCH_LOONGARCH64)
  1671. #endif //(ARCH_MIPS64)
  1672. #endif //(ARCH_ARM64)