You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 58 kB

6 years ago
6 years ago
6 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* Copyright 2023 The OpenBLAS Project. */
  4. /* All rights reserved. */
  5. /* */
  6. /* Redistribution and use in source and binary forms, with or */
  7. /* without modification, are permitted provided that the following */
  8. /* conditions are met: */
  9. /* */
  10. /* 1. Redistributions of source code must retain the above */
  11. /* copyright notice, this list of conditions and the following */
  12. /* disclaimer. */
  13. /* */
  14. /* 2. Redistributions in binary form must reproduce the above */
  15. /* copyright notice, this list of conditions and the following */
  16. /* disclaimer in the documentation and/or other materials */
  17. /* provided with the distribution. */
  18. /* */
  19. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  20. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  21. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  22. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  23. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  24. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  25. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  26. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  27. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  28. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  29. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  30. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  31. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  32. /* POSSIBILITY OF SUCH DAMAGE. */
  33. /* */
  34. /* The views and conclusions contained in the software and */
  35. /* documentation are those of the authors and should not be */
  36. /* interpreted as representing official policies, either expressed */
  37. /* or implied, of The University of Texas at Austin. */
  38. /*********************************************************************/
  39. #include <stdio.h>
  40. #include <string.h>
  41. #include "common.h"
  42. #ifdef BUILD_KERNEL
  43. #include "kernelTS.h"
  44. #endif
  45. #undef DEBUG
  46. static void init_parameter(void);
  47. gotoblas_t TABLE_NAME = {
  48. DTB_DEFAULT_ENTRIES,
  49. SWITCH_RATIO,
  50. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  51. #ifdef BUILD_BFLOAT16
  52. 0, 0, 0,
  53. SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,
  54. #ifdef SBGEMM_DEFAULT_UNROLL_MN
  55. SBGEMM_DEFAULT_UNROLL_MN,
  56. #else
  57. MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
  58. #endif
  59. SBGEMM_ALIGN_K,
  60. 0, // need_amxtile_permission
  61. sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  62. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  63. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  64. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
  65. dsdot_kTS,
  66. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  67. sbgemv_nTS, sbgemv_tTS, sger_kTS,
  68. ssymv_LTS, ssymv_UTS,
  69. sbgemm_kernelTS, sbgemm_betaTS,
  70. #if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N
  71. sbgemm_incopyTS, sbgemm_itcopyTS,
  72. #else
  73. sbgemm_oncopyTS, sbgemm_otcopyTS,
  74. #endif
  75. sbgemm_oncopyTS, sbgemm_otcopyTS,
  76. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  77. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  78. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  79. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  80. #else
  81. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  82. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  83. #endif
  84. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  85. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  86. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  87. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  88. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  89. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  90. #else
  91. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  92. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  93. #endif
  94. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  95. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  96. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  97. ssymm_iutcopyTS, ssymm_iltcopyTS,
  98. #else
  99. ssymm_outcopyTS, ssymm_oltcopyTS,
  100. #endif
  101. ssymm_outcopyTS, ssymm_oltcopyTS,
  102. #ifndef NO_LAPACK
  103. sneg_tcopyTS, slaswp_ncopyTS,
  104. #else
  105. NULL,NULL,
  106. #endif
  107. #ifdef SMALL_MATRIX_OPT
  108. sbgemm_small_matrix_permitTS,
  109. sbgemm_small_kernel_nnTS, sbgemm_small_kernel_ntTS, sbgemm_small_kernel_tnTS, sbgemm_small_kernel_ttTS,
  110. sbgemm_small_kernel_b0_nnTS, sbgemm_small_kernel_b0_ntTS, sbgemm_small_kernel_b0_tnTS, sbgemm_small_kernel_b0_ttTS,
  111. #endif
  112. #endif
  113. #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  114. 0, 0, 0,
  115. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  116. #ifdef SGEMM_DEFAULT_UNROLL_MN
  117. SGEMM_DEFAULT_UNROLL_MN,
  118. #else
  119. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  120. #endif
  121. #endif
  122. #ifdef HAVE_EXCLUSIVE_CACHE
  123. 1,
  124. #else
  125. 0,
  126. #endif
  127. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  128. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  129. #endif
  130. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  131. isamax_kTS,
  132. #endif
  133. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  134. isamin_kTS, ismax_kTS, ismin_kTS,
  135. snrm2_kTS, sasum_kTS,
  136. #endif
  137. #if BUILD_SINGLE == 1
  138. ssum_kTS,
  139. #endif
  140. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  141. scopy_kTS, sdot_kTS,
  142. // dsdot_kTS,
  143. srot_kTS, saxpy_kTS, srotm_kTS,
  144. #endif
  145. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  146. sscal_kTS,
  147. #endif
  148. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  149. sswap_kTS,
  150. sgemv_nTS, sgemv_tTS,
  151. #endif
  152. #if BUILD_SINGLE == 1
  153. sger_kTS,
  154. #endif
  155. #if BUILD_SINGLE == 1
  156. ssymv_LTS, ssymv_UTS,
  157. #endif
  158. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  159. #ifdef ARCH_X86_64
  160. sgemm_directTS,
  161. sgemm_direct_performantTS,
  162. #endif
  163. sgemm_kernelTS, sgemm_betaTS,
  164. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  165. sgemm_incopyTS, sgemm_itcopyTS,
  166. #else
  167. sgemm_oncopyTS, sgemm_otcopyTS,
  168. #endif
  169. sgemm_oncopyTS, sgemm_otcopyTS,
  170. #endif
  171. #if BUILD_SINGLE == 1 || BUILD_DOUBLE == 1 || BUILD_COMPLEX == 1
  172. #ifdef SMALL_MATRIX_OPT
  173. sgemm_small_matrix_permitTS,
  174. sgemm_small_kernel_nnTS, sgemm_small_kernel_ntTS, sgemm_small_kernel_tnTS, sgemm_small_kernel_ttTS,
  175. sgemm_small_kernel_b0_nnTS, sgemm_small_kernel_b0_ntTS, sgemm_small_kernel_b0_tnTS, sgemm_small_kernel_b0_ttTS,
  176. #endif
  177. #endif
  178. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX == 1)
  179. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  180. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  181. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  182. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  183. #else
  184. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  185. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  186. #endif
  187. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  188. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  189. #endif
  190. #if (BUILD_SINGLE==1)
  191. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  192. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  193. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  194. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  195. #else
  196. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  197. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  198. #endif
  199. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  200. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  201. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  202. ssymm_iutcopyTS, ssymm_iltcopyTS,
  203. #else
  204. ssymm_outcopyTS, ssymm_oltcopyTS,
  205. #endif
  206. ssymm_outcopyTS, ssymm_oltcopyTS,
  207. #ifndef NO_LAPACK
  208. sneg_tcopyTS, slaswp_ncopyTS,
  209. #else
  210. NULL,NULL,
  211. #endif
  212. #endif
  213. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  214. 0, 0, 0,
  215. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  216. #ifdef DGEMM_DEFAULT_UNROLL_MN
  217. DGEMM_DEFAULT_UNROLL_MN,
  218. #else
  219. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  220. #endif
  221. #endif
  222. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  223. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  224. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  225. dnrm2_kTS, dasum_kTS,
  226. #endif
  227. #if (BUILD_DOUBLE==1)
  228. dsum_kTS,
  229. #endif
  230. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  231. dcopy_kTS, ddot_kTS,
  232. #endif
  233. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  234. dsdot_kTS,
  235. #endif
  236. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  237. drot_kTS,
  238. drotm_kTS,
  239. daxpy_kTS,
  240. dscal_kTS,
  241. dswap_kTS,
  242. dgemv_nTS, dgemv_tTS,
  243. #endif
  244. #if (BUILD_DOUBLE==1)
  245. dger_kTS,
  246. dsymv_LTS, dsymv_UTS,
  247. #endif
  248. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  249. dgemm_kernelTS, dgemm_betaTS,
  250. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  251. dgemm_incopyTS, dgemm_itcopyTS,
  252. #else
  253. dgemm_oncopyTS, dgemm_otcopyTS,
  254. #endif
  255. dgemm_oncopyTS, dgemm_otcopyTS,
  256. #endif
  257. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  258. #ifdef SMALL_MATRIX_OPT
  259. dgemm_small_matrix_permitTS,
  260. dgemm_small_kernel_nnTS, dgemm_small_kernel_ntTS, dgemm_small_kernel_tnTS, dgemm_small_kernel_ttTS,
  261. dgemm_small_kernel_b0_nnTS, dgemm_small_kernel_b0_ntTS, dgemm_small_kernel_b0_tnTS, dgemm_small_kernel_b0_ttTS,
  262. #endif
  263. #endif
  264. #if (BUILD_DOUBLE==1)
  265. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  266. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  267. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  268. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  269. #else
  270. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  271. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  272. #endif
  273. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  274. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  275. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  276. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  277. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  278. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  279. #else
  280. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  281. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  282. #endif
  283. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  284. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  285. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  286. dsymm_iutcopyTS, dsymm_iltcopyTS,
  287. #else
  288. dsymm_outcopyTS, dsymm_oltcopyTS,
  289. #endif
  290. dsymm_outcopyTS, dsymm_oltcopyTS,
  291. #ifndef NO_LAPACK
  292. dneg_tcopyTS, dlaswp_ncopyTS,
  293. #else
  294. NULL, NULL,
  295. #endif
  296. #endif
  297. #ifdef EXPRECISION
  298. 0, 0, 0,
  299. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  300. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  301. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  302. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  303. qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  304. qgemv_nTS, qgemv_tTS, qger_kTS,
  305. qsymv_LTS, qsymv_UTS,
  306. qrotm_kTS,
  307. qgemm_kernelTS, qgemm_betaTS,
  308. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  309. qgemm_incopyTS, qgemm_itcopyTS,
  310. #else
  311. qgemm_oncopyTS, qgemm_otcopyTS,
  312. #endif
  313. qgemm_oncopyTS, qgemm_otcopyTS,
  314. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  315. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  316. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  317. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  318. #else
  319. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  320. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  321. #endif
  322. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  323. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  324. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  325. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  326. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  327. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  328. #else
  329. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  330. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  331. #endif
  332. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  333. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  334. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  335. qsymm_iutcopyTS, qsymm_iltcopyTS,
  336. #else
  337. qsymm_outcopyTS, qsymm_oltcopyTS,
  338. #endif
  339. qsymm_outcopyTS, qsymm_oltcopyTS,
  340. #ifndef NO_LAPACK
  341. qneg_tcopyTS, qlaswp_ncopyTS,
  342. #else
  343. NULL, NULL,
  344. #endif
  345. #endif
  346. #if (BUILD_COMPLEX)
  347. 0, 0, 0,
  348. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  349. #ifdef CGEMM_DEFAULT_UNROLL_MN
  350. CGEMM_DEFAULT_UNROLL_MN,
  351. #else
  352. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  353. #endif
  354. #if (BUILD_COMPLEX)
  355. camax_kTS, camin_kTS,
  356. #endif
  357. #if (BUILD_COMPLEX)
  358. icamax_kTS,
  359. #endif
  360. #if (BUILD_COMPLEX)
  361. icamin_kTS,
  362. cnrm2_kTS, casum_kTS, csum_kTS,
  363. #endif
  364. #if (BUILD_COMPLEX)
  365. ccopy_kTS, cdotu_kTS, cdotc_kTS,
  366. #endif
  367. #if (BUILD_COMPLEX)
  368. csrot_kTS,
  369. #endif
  370. #if (BUILD_COMPLEX)
  371. caxpy_kTS,
  372. caxpyc_kTS,
  373. cscal_kTS,
  374. cswap_kTS,
  375. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  376. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  377. #endif
  378. #if (BUILD_COMPLEX)
  379. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  380. csymv_LTS, csymv_UTS,
  381. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  382. #endif
  383. #if (BUILD_COMPLEX)
  384. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  385. cgemm_betaTS,
  386. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  387. cgemm_incopyTS, cgemm_itcopyTS,
  388. #else
  389. cgemm_oncopyTS, cgemm_otcopyTS,
  390. #endif
  391. cgemm_oncopyTS, cgemm_otcopyTS,
  392. #ifdef SMALL_MATRIX_OPT
  393. cgemm_small_matrix_permitTS,
  394. cgemm_small_kernel_nnTS, cgemm_small_kernel_ntTS, cgemm_small_kernel_nrTS, cgemm_small_kernel_ncTS,
  395. cgemm_small_kernel_tnTS, cgemm_small_kernel_ttTS, cgemm_small_kernel_trTS, cgemm_small_kernel_tcTS,
  396. cgemm_small_kernel_rnTS, cgemm_small_kernel_rtTS, cgemm_small_kernel_rrTS, cgemm_small_kernel_rcTS,
  397. cgemm_small_kernel_cnTS, cgemm_small_kernel_ctTS, cgemm_small_kernel_crTS, cgemm_small_kernel_ccTS,
  398. cgemm_small_kernel_b0_nnTS, cgemm_small_kernel_b0_ntTS, cgemm_small_kernel_b0_nrTS, cgemm_small_kernel_b0_ncTS,
  399. cgemm_small_kernel_b0_tnTS, cgemm_small_kernel_b0_ttTS, cgemm_small_kernel_b0_trTS, cgemm_small_kernel_b0_tcTS,
  400. cgemm_small_kernel_b0_rnTS, cgemm_small_kernel_b0_rtTS, cgemm_small_kernel_b0_rrTS, cgemm_small_kernel_b0_rcTS,
  401. cgemm_small_kernel_b0_cnTS, cgemm_small_kernel_b0_ctTS, cgemm_small_kernel_b0_crTS, cgemm_small_kernel_b0_ccTS,
  402. #endif
  403. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  404. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  405. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  406. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  407. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  408. #else
  409. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  410. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  411. #endif
  412. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  413. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  414. #endif
  415. #endif
  416. #if (BUILD_COMPLEX)
  417. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  418. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  419. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  420. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  421. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  422. #else
  423. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  424. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  425. #endif
  426. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  427. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  428. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  429. csymm_iutcopyTS, csymm_iltcopyTS,
  430. #else
  431. csymm_outcopyTS, csymm_oltcopyTS,
  432. #endif
  433. csymm_outcopyTS, csymm_oltcopyTS,
  434. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  435. chemm_iutcopyTS, chemm_iltcopyTS,
  436. #else
  437. chemm_outcopyTS, chemm_oltcopyTS,
  438. #endif
  439. chemm_outcopyTS, chemm_oltcopyTS,
  440. 0, 0, 0,
  441. #if (USE_GEMM3M)
  442. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  443. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  444. #else
  445. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  446. #endif
  447. cgemm3m_kernelTS,
  448. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  449. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  450. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  451. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  452. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  453. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  454. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  455. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  456. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  457. csymm3m_oucopybTS, csymm3m_olcopybTS,
  458. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  459. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  460. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  461. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  462. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  463. chemm3m_oucopybTS, chemm3m_olcopybTS,
  464. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  465. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  466. #else
  467. 0, 0, 0,
  468. NULL,
  469. NULL, NULL,
  470. NULL, NULL,
  471. NULL, NULL,
  472. NULL, NULL,
  473. NULL, NULL,
  474. NULL, NULL,
  475. NULL, NULL,
  476. NULL, NULL,
  477. NULL, NULL,
  478. NULL, NULL,
  479. NULL, NULL,
  480. NULL, NULL,
  481. NULL, NULL,
  482. NULL, NULL,
  483. NULL, NULL,
  484. NULL, NULL,
  485. NULL, NULL,
  486. NULL, NULL,
  487. #endif
  488. #endif
  489. #if (BUILD_COMPLEX)
  490. #ifndef NO_LAPACK
  491. cneg_tcopyTS,
  492. claswp_ncopyTS,
  493. #else
  494. NULL, NULL,
  495. #endif
  496. #endif
  497. #if BUILD_COMPLEX16 == 1
  498. 0, 0, 0,
  499. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  500. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  501. ZGEMM_DEFAULT_UNROLL_MN,
  502. #else
  503. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  504. #endif
  505. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  506. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  507. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  508. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  509. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  510. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  511. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  512. zsymv_LTS, zsymv_UTS,
  513. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  514. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  515. zgemm_betaTS,
  516. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  517. zgemm_incopyTS, zgemm_itcopyTS,
  518. #else
  519. zgemm_oncopyTS, zgemm_otcopyTS,
  520. #endif
  521. zgemm_oncopyTS, zgemm_otcopyTS,
  522. #ifdef SMALL_MATRIX_OPT
  523. zgemm_small_matrix_permitTS,
  524. zgemm_small_kernel_nnTS, zgemm_small_kernel_ntTS, zgemm_small_kernel_nrTS, zgemm_small_kernel_ncTS,
  525. zgemm_small_kernel_tnTS, zgemm_small_kernel_ttTS, zgemm_small_kernel_trTS, zgemm_small_kernel_tcTS,
  526. zgemm_small_kernel_rnTS, zgemm_small_kernel_rtTS, zgemm_small_kernel_rrTS, zgemm_small_kernel_rcTS,
  527. zgemm_small_kernel_cnTS, zgemm_small_kernel_ctTS, zgemm_small_kernel_crTS, zgemm_small_kernel_ccTS,
  528. zgemm_small_kernel_b0_nnTS, zgemm_small_kernel_b0_ntTS, zgemm_small_kernel_b0_nrTS, zgemm_small_kernel_b0_ncTS,
  529. zgemm_small_kernel_b0_tnTS, zgemm_small_kernel_b0_ttTS, zgemm_small_kernel_b0_trTS, zgemm_small_kernel_b0_tcTS,
  530. zgemm_small_kernel_b0_rnTS, zgemm_small_kernel_b0_rtTS, zgemm_small_kernel_b0_rrTS, zgemm_small_kernel_b0_rcTS,
  531. zgemm_small_kernel_b0_cnTS, zgemm_small_kernel_b0_ctTS, zgemm_small_kernel_b0_crTS, zgemm_small_kernel_b0_ccTS,
  532. #endif
  533. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  534. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  535. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  536. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  537. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  538. #else
  539. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  540. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  541. #endif
  542. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  543. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  544. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  545. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  546. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  547. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  548. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  549. #else
  550. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  551. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  552. #endif
  553. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  554. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  555. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  556. zsymm_iutcopyTS, zsymm_iltcopyTS,
  557. #else
  558. zsymm_outcopyTS, zsymm_oltcopyTS,
  559. #endif
  560. zsymm_outcopyTS, zsymm_oltcopyTS,
  561. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  562. zhemm_iutcopyTS, zhemm_iltcopyTS,
  563. #else
  564. zhemm_outcopyTS, zhemm_oltcopyTS,
  565. #endif
  566. zhemm_outcopyTS, zhemm_oltcopyTS,
  567. 0, 0, 0,
  568. #if (USE_GEMM3M)
  569. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  570. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  571. #else
  572. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  573. #endif
  574. zgemm3m_kernelTS,
  575. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  576. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  577. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  578. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  579. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  580. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  581. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  582. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  583. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  584. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  585. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  586. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  587. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  588. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  589. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  590. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  591. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  592. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  593. #else
  594. 0, 0, 0,
  595. NULL,
  596. NULL, NULL,
  597. NULL, NULL,
  598. NULL, NULL,
  599. NULL, NULL,
  600. NULL, NULL,
  601. NULL, NULL,
  602. NULL, NULL,
  603. NULL, NULL,
  604. NULL, NULL,
  605. NULL, NULL,
  606. NULL, NULL,
  607. NULL, NULL,
  608. NULL, NULL,
  609. NULL, NULL,
  610. NULL, NULL,
  611. NULL, NULL,
  612. NULL, NULL,
  613. NULL, NULL,
  614. #endif
  615. #ifndef NO_LAPACK
  616. zneg_tcopyTS, zlaswp_ncopyTS,
  617. #else
  618. NULL, NULL,
  619. #endif
  620. #endif
  621. #ifdef EXPRECISION
  622. 0, 0, 0,
  623. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  624. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  625. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  626. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  627. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  628. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  629. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  630. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  631. xsymv_LTS, xsymv_UTS,
  632. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  633. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  634. xgemm_betaTS,
  635. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  636. xgemm_incopyTS, xgemm_itcopyTS,
  637. #else
  638. xgemm_oncopyTS, xgemm_otcopyTS,
  639. #endif
  640. xgemm_oncopyTS, xgemm_otcopyTS,
  641. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  642. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  643. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  644. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  645. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  646. #else
  647. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  648. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  649. #endif
  650. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  651. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  652. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  653. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  654. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  655. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  656. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  657. #else
  658. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  659. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  660. #endif
  661. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  662. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  663. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  664. xsymm_iutcopyTS, xsymm_iltcopyTS,
  665. #else
  666. xsymm_outcopyTS, xsymm_oltcopyTS,
  667. #endif
  668. xsymm_outcopyTS, xsymm_oltcopyTS,
  669. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  670. xhemm_iutcopyTS, xhemm_iltcopyTS,
  671. #else
  672. xhemm_outcopyTS, xhemm_oltcopyTS,
  673. #endif
  674. xhemm_outcopyTS, xhemm_oltcopyTS,
  675. 0, 0, 0,
  676. #if (USE_GEMM3M)
  677. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  678. xgemm3m_kernelTS,
  679. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  680. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  681. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  682. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  683. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  684. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  685. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  686. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  687. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  688. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  689. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  690. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  691. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  692. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  693. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  694. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  695. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  696. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  697. #else
  698. 0, 0, 0,
  699. NULL,
  700. NULL, NULL,
  701. NULL, NULL,
  702. NULL, NULL,
  703. NULL, NULL,
  704. NULL, NULL,
  705. NULL, NULL,
  706. NULL, NULL,
  707. NULL, NULL,
  708. NULL, NULL,
  709. NULL, NULL,
  710. NULL, NULL,
  711. NULL, NULL,
  712. NULL, NULL,
  713. NULL, NULL,
  714. NULL, NULL,
  715. NULL, NULL,
  716. NULL, NULL,
  717. NULL, NULL,
  718. #endif
  719. #ifndef NO_LAPACK
  720. xneg_tcopyTS, xlaswp_ncopyTS,
  721. #else
  722. NULL, NULL,
  723. #endif
  724. #endif
  725. init_parameter,
  726. SNUMOPT, DNUMOPT, QNUMOPT,
  727. #if BUILD_SINGLE == 1
  728. saxpby_kTS,
  729. #endif
  730. #if BUILD_DOUBLE == 1
  731. daxpby_kTS,
  732. #endif
  733. #if BUILD_COMPLEX == 1
  734. caxpby_kTS,
  735. #endif
  736. #if BUILD_COMPLEX16== 1
  737. zaxpby_kTS,
  738. #endif
  739. #if BUILD_SINGLE == 1
  740. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  741. #endif
  742. #if BUILD_DOUBLE== 1
  743. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  744. #endif
  745. #if BUILD_COMPLEX == 1
  746. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  747. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  748. #endif
  749. #if BUILD_COMPLEX16 == 1
  750. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  751. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  752. #endif
  753. #if BUILD_SINGLE == 1
  754. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  755. #endif
  756. #if BUILD_DOUBLE== 1
  757. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  758. #endif
  759. #if BUILD_COMPLEX== 1
  760. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  761. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  762. #endif
  763. #if BUILD_COMPLEX16==1
  764. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  765. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  766. #endif
  767. #if BUILD_SINGLE == 1
  768. sgeadd_kTS,
  769. #endif
  770. #if BUILD_DOUBLE==1
  771. dgeadd_kTS,
  772. #endif
  773. #if BUILD_COMPLEX==1
  774. cgeadd_kTS,
  775. #endif
  776. #if BUILD_COMPLEX16==1
  777. zgeadd_kTS,
  778. #endif
  779. };
  780. #if (ARCH_ARM64)
  781. static void init_parameter(void) {
  782. #if (BUILD_BFLOAT16)
  783. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  784. #endif
  785. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  786. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  787. #endif
  788. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  789. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  790. #endif
  791. #if BUILD_COMPLEX==1
  792. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  793. #endif
  794. #if BUILD_COMPLEX16==1
  795. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  796. #endif
  797. #if (BUILD_BFLOAT16)
  798. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  799. #endif
  800. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  801. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  802. #endif
  803. #if BUILD_DOUBLE== 1 || (BUILD_COMPLEX16==1)
  804. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  805. #endif
  806. #if BUILD_COMPLEX== 1
  807. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  808. #endif
  809. #if BUILD_COMPLEX16==1
  810. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  811. #endif
  812. #if (BUILD_BFLOAT16)
  813. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  814. #endif
  815. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  816. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  817. #endif
  818. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  819. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  820. #endif
  821. #if BUILD_COMPLEX==1
  822. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  823. #endif
  824. #if BUILD_COMPLEX16==1
  825. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  826. #endif
  827. #ifdef EXPRECISION
  828. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  829. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  830. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  831. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  832. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  833. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  834. #endif
  835. #if (USE_GEMM3M)
  836. #ifdef CGEMM3M_DEFAULT_P
  837. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  838. #else
  839. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  840. #endif
  841. #ifdef ZGEMM3M_DEFAULT_P
  842. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  843. #else
  844. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  845. #endif
  846. #ifdef CGEMM3M_DEFAULT_Q
  847. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  848. #else
  849. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  850. #endif
  851. #ifdef ZGEMM3M_DEFAULT_Q
  852. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  853. #else
  854. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  855. #endif
  856. #ifdef CGEMM3M_DEFAULT_R
  857. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  858. #else
  859. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  860. #endif
  861. #ifdef ZGEMM3M_DEFAULT_R
  862. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  863. #else
  864. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  865. #endif
  866. #ifdef EXPRECISION
  867. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  868. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  869. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  870. #endif
  871. #endif
  872. }
  873. #else // (ARCH_ARM64)
  874. #if defined(ARCH_MIPS64)
  875. static void init_parameter(void) {
  876. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  877. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  878. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  879. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  880. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  881. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  882. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  883. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  884. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  885. TABLE_NAME.dgemm_r = 640;
  886. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  887. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  888. #ifdef EXPRECISION
  889. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  890. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  891. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  892. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  893. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  894. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  895. #endif
  896. #if defined(USE_GEMM3M)
  897. #ifdef CGEMM3M_DEFAULT_P
  898. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  899. #else
  900. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  901. #endif
  902. #ifdef ZGEMM3M_DEFAULT_P
  903. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  904. #else
  905. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  906. #endif
  907. #ifdef CGEMM3M_DEFAULT_Q
  908. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  909. #else
  910. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  911. #endif
  912. #ifdef ZGEMM3M_DEFAULT_Q
  913. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  914. #else
  915. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  916. #endif
  917. #ifdef CGEMM3M_DEFAULT_R
  918. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  919. #else
  920. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  921. #endif
  922. #ifdef ZGEMM3M_DEFAULT_R
  923. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  924. #else
  925. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  926. #endif
  927. #ifdef EXPRECISION
  928. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  929. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  930. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  931. #endif
  932. #endif
  933. }
  934. #else // (ARCH_MIPS64)
  935. #if (ARCH_LOONGARCH64)
  936. static int get_L3_size() {
  937. int ret = 0, id = 0x14;
  938. __asm__ volatile (
  939. "cpucfg %[ret], %[id]"
  940. : [ret]"=r"(ret)
  941. : [id]"r"(id)
  942. : "memory"
  943. );
  944. return ((ret & 0xffff) + 1) * pow(2, ((ret >> 16) & 0xff)) * pow(2, ((ret >> 24) & 0x7f)) / 1024 / 1024; // MB
  945. }
  946. static void init_parameter(void) {
  947. #ifdef BUILD_BFLOAT16
  948. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  949. #endif
  950. #ifdef BUILD_BFLOAT16
  951. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  952. #endif
  953. #if defined(LA464)
  954. int L3_size = get_L3_size();
  955. #ifdef SMP
  956. if(blas_num_threads == 1){
  957. #endif
  958. //single thread
  959. if (L3_size == 32){ // 3C5000 and 3D5000
  960. TABLE_NAME.sgemm_p = 256;
  961. TABLE_NAME.sgemm_q = 384;
  962. TABLE_NAME.sgemm_r = 8192;
  963. TABLE_NAME.dgemm_p = 112;
  964. TABLE_NAME.dgemm_q = 289;
  965. TABLE_NAME.dgemm_r = 4096;
  966. TABLE_NAME.cgemm_p = 128;
  967. TABLE_NAME.cgemm_q = 256;
  968. TABLE_NAME.cgemm_r = 4096;
  969. TABLE_NAME.zgemm_p = 128;
  970. TABLE_NAME.zgemm_q = 128;
  971. TABLE_NAME.zgemm_r = 2048;
  972. } else { // 3A5000 and 3C5000L
  973. TABLE_NAME.sgemm_p = 256;
  974. TABLE_NAME.sgemm_q = 384;
  975. TABLE_NAME.sgemm_r = 4096;
  976. TABLE_NAME.dgemm_p = 112;
  977. TABLE_NAME.dgemm_q = 300;
  978. TABLE_NAME.dgemm_r = 3024;
  979. TABLE_NAME.cgemm_p = 128;
  980. TABLE_NAME.cgemm_q = 256;
  981. TABLE_NAME.cgemm_r = 2048;
  982. TABLE_NAME.zgemm_p = 128;
  983. TABLE_NAME.zgemm_q = 128;
  984. TABLE_NAME.zgemm_r = 1024;
  985. }
  986. #ifdef SMP
  987. }else{
  988. //multi thread
  989. if (L3_size == 32){ // 3C5000 and 3D5000
  990. TABLE_NAME.sgemm_p = 256;
  991. TABLE_NAME.sgemm_q = 384;
  992. TABLE_NAME.sgemm_r = 1024;
  993. TABLE_NAME.dgemm_p = 112;
  994. TABLE_NAME.dgemm_q = 289;
  995. TABLE_NAME.dgemm_r = 342;
  996. TABLE_NAME.cgemm_p = 128;
  997. TABLE_NAME.cgemm_q = 256;
  998. TABLE_NAME.cgemm_r = 512;
  999. TABLE_NAME.zgemm_p = 128;
  1000. TABLE_NAME.zgemm_q = 128;
  1001. TABLE_NAME.zgemm_r = 512;
  1002. } else { // 3A5000 and 3C5000L
  1003. TABLE_NAME.sgemm_p = 256;
  1004. TABLE_NAME.sgemm_q = 384;
  1005. TABLE_NAME.sgemm_r = 2048;
  1006. TABLE_NAME.dgemm_p = 112;
  1007. TABLE_NAME.dgemm_q = 300;
  1008. TABLE_NAME.dgemm_r = 738;
  1009. TABLE_NAME.cgemm_p = 128;
  1010. TABLE_NAME.cgemm_q = 256;
  1011. TABLE_NAME.cgemm_r = 1024;
  1012. TABLE_NAME.zgemm_p = 128;
  1013. TABLE_NAME.zgemm_q = 128;
  1014. TABLE_NAME.zgemm_r = 1024;
  1015. }
  1016. }
  1017. #endif
  1018. #else
  1019. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1020. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1021. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1022. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1023. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1024. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1025. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1026. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1027. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1028. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1029. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1030. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1031. #endif
  1032. #ifdef BUILD_BFLOAT16
  1033. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1034. #endif
  1035. }
  1036. #else // (ARCH_LOONGARCH64)
  1037. #if (ARCH_POWER)
  1038. static void init_parameter(void) {
  1039. #ifdef BUILD_BFLOAT16
  1040. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1041. #endif
  1042. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1043. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1044. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1045. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1046. #ifdef BUILD_BFLOAT16
  1047. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1048. #endif
  1049. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1050. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1051. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1052. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1053. #ifdef BUILD_BFLOAT16
  1054. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1055. #endif
  1056. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1057. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1058. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1059. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1060. }
  1061. #else //POWER
  1062. #if (ARCH_ZARCH)
  1063. static void init_parameter(void) {
  1064. #ifdef BUILD_BFLOAT16
  1065. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1066. #endif
  1067. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1068. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1069. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1070. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1071. #ifdef BUILD_BFLOAT16
  1072. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1073. #endif
  1074. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1075. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1076. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1077. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1078. #ifdef BUILD_BFLOAT16
  1079. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1080. #endif
  1081. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1082. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1083. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1084. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1085. }
  1086. #else //ZARCH
  1087. #if (ARCH_RISCV64)
  1088. static void init_parameter(void) {
  1089. #ifdef BUILD_BFLOAT16
  1090. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1091. #endif
  1092. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1093. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1094. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1095. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1096. #ifdef BUILD_BFLOAT16
  1097. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1098. #endif
  1099. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1100. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1101. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1102. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1103. #ifdef BUILD_BFLOAT16
  1104. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1105. #endif
  1106. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1107. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1108. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1109. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1110. }
  1111. #else //RISCV64
  1112. #ifdef ARCH_X86
  1113. static int get_l2_size_old(void){
  1114. int i, eax, ebx, ecx, edx, cpuid_level;
  1115. int info[15];
  1116. cpuid(2, &eax, &ebx, &ecx, &edx);
  1117. info[ 0] = BITMASK(eax, 8, 0xff);
  1118. info[ 1] = BITMASK(eax, 16, 0xff);
  1119. info[ 2] = BITMASK(eax, 24, 0xff);
  1120. info[ 3] = BITMASK(ebx, 0, 0xff);
  1121. info[ 4] = BITMASK(ebx, 8, 0xff);
  1122. info[ 5] = BITMASK(ebx, 16, 0xff);
  1123. info[ 6] = BITMASK(ebx, 24, 0xff);
  1124. info[ 7] = BITMASK(ecx, 0, 0xff);
  1125. info[ 8] = BITMASK(ecx, 8, 0xff);
  1126. info[ 9] = BITMASK(ecx, 16, 0xff);
  1127. info[10] = BITMASK(ecx, 24, 0xff);
  1128. info[11] = BITMASK(edx, 0, 0xff);
  1129. info[12] = BITMASK(edx, 8, 0xff);
  1130. info[13] = BITMASK(edx, 16, 0xff);
  1131. info[14] = BITMASK(edx, 24, 0xff);
  1132. for (i = 0; i < 15; i++){
  1133. switch (info[i]){
  1134. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  1135. case 0x1a :
  1136. return 96;
  1137. case 0x39 :
  1138. case 0x3b :
  1139. case 0x41 :
  1140. case 0x79 :
  1141. case 0x81 :
  1142. return 128;
  1143. case 0x3a :
  1144. return 192;
  1145. case 0x21 :
  1146. case 0x3c :
  1147. case 0x42 :
  1148. case 0x7a :
  1149. case 0x7e :
  1150. case 0x82 :
  1151. return 256;
  1152. case 0x3d :
  1153. return 384;
  1154. case 0x3e :
  1155. case 0x43 :
  1156. case 0x7b :
  1157. case 0x7f :
  1158. case 0x83 :
  1159. case 0x86 :
  1160. return 512;
  1161. case 0x44 :
  1162. case 0x78 :
  1163. case 0x7c :
  1164. case 0x84 :
  1165. case 0x87 :
  1166. return 1024;
  1167. case 0x45 :
  1168. case 0x7d :
  1169. case 0x85 :
  1170. return 2048;
  1171. case 0x48 :
  1172. return 3184;
  1173. case 0x49 :
  1174. return 4096;
  1175. case 0x4e :
  1176. return 6144;
  1177. }
  1178. }
  1179. // return 0;
  1180. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1181. return 256;
  1182. }
  1183. #endif
  1184. static __inline__ int get_l2_size(void){
  1185. int eax, ebx, ecx, edx, l2;
  1186. l2 = readenv_atoi("OPENBLAS_L2_SIZE");
  1187. if (l2 != 0)
  1188. return l2;
  1189. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1190. l2 = BITMASK(ecx, 16, 0xffff);
  1191. #ifndef ARCH_X86
  1192. if (l2 <= 0) {
  1193. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1194. return 256;
  1195. }
  1196. return l2;
  1197. #else
  1198. if (l2 > 0) return l2;
  1199. return get_l2_size_old();
  1200. #endif
  1201. }
  1202. static __inline__ int get_l3_size(void){
  1203. int eax, ebx, ecx, edx;
  1204. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1205. return BITMASK(edx, 18, 0x3fff) * 512;
  1206. }
  1207. static void init_parameter(void) {
  1208. int l2 = get_l2_size();
  1209. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  1210. /* where the GEMM unrolling parameters do not depend on l2 */
  1211. #ifdef BUILD_BFLOAT16
  1212. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1213. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1214. #endif
  1215. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1216. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1217. #endif
  1218. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1219. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1220. #endif
  1221. #if BUILD_COMPLEX == 1
  1222. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1223. #endif
  1224. #if BUILD_COMPLEX16==1
  1225. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1226. #endif
  1227. #if BUILD_COMPLEX == 1
  1228. #ifdef CGEMM3M_DEFAULT_Q
  1229. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  1230. #else
  1231. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  1232. #endif
  1233. #endif
  1234. #if BUILD_COMPLEX16 == 1
  1235. #ifdef ZGEMM3M_DEFAULT_Q
  1236. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  1237. #else
  1238. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  1239. #endif
  1240. #endif
  1241. #ifdef EXPRECISION
  1242. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  1243. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  1244. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  1245. #endif
  1246. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  1247. #ifdef DEBUG
  1248. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  1249. #endif
  1250. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1251. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  1252. #endif
  1253. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1254. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  1255. #endif
  1256. #if BUILD_COMPLEX==1
  1257. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  1258. #endif
  1259. #if BUILD_COMPLEX16==1
  1260. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  1261. #endif
  1262. #ifdef EXPRECISION
  1263. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  1264. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  1265. #endif
  1266. #endif
  1267. #ifdef CORE_NORTHWOOD
  1268. #ifdef DEBUG
  1269. fprintf(stderr, "Northwood\n");
  1270. #endif
  1271. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1272. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  1273. #endif
  1274. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1275. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  1276. #endif
  1277. #if BUILD_COMPLEX==1
  1278. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  1279. #endif
  1280. #if BUILD_COMPLEX16==1
  1281. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  1282. #endif
  1283. #ifdef EXPRECISION
  1284. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  1285. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  1286. #endif
  1287. #endif
  1288. #ifdef ATOM
  1289. #ifdef DEBUG
  1290. fprintf(stderr, "Atom\n");
  1291. #endif
  1292. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1293. TABLE_NAME.sgemm_p = 256;
  1294. #endif
  1295. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1296. TABLE_NAME.dgemm_p = 128;
  1297. #endif
  1298. #if BUILD_COMPLEX==1
  1299. TABLE_NAME.cgemm_p = 128;
  1300. #endif
  1301. #if BUILD_COMPLEX16==1
  1302. TABLE_NAME.zgemm_p = 64;
  1303. #endif
  1304. #ifdef EXPRECISION
  1305. TABLE_NAME.qgemm_p = 64;
  1306. TABLE_NAME.xgemm_p = 32;
  1307. #endif
  1308. #endif
  1309. #ifdef CORE_PRESCOTT
  1310. #ifdef DEBUG
  1311. fprintf(stderr, "Prescott\n");
  1312. #endif
  1313. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1314. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  1315. #endif
  1316. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1317. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  1318. #endif
  1319. #if BUILD_COMPLEX==1
  1320. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  1321. #endif
  1322. #if BUILD_COMPLEX16 == 1
  1323. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  1324. #endif
  1325. #ifdef EXPRECISION
  1326. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  1327. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  1328. #endif
  1329. #endif
  1330. #ifdef CORE2
  1331. #ifdef DEBUG
  1332. fprintf(stderr, "Core2\n");
  1333. #endif
  1334. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1335. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  1336. #endif
  1337. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  1338. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  1339. #endif
  1340. #if BUILD_COMPLEX==1
  1341. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  1342. #endif
  1343. #if BUILD_COMPLEX16==1
  1344. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  1345. #endif
  1346. #ifdef EXPRECISION
  1347. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  1348. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  1349. #endif
  1350. #endif
  1351. #ifdef PENRYN
  1352. #ifdef DEBUG
  1353. fprintf(stderr, "Penryn\n");
  1354. #endif
  1355. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1356. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1357. #endif
  1358. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1359. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1360. #endif
  1361. #if BUILD_COMPLEX==1
  1362. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1363. #endif
  1364. #if BUILD_COMPLEX16==1
  1365. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1366. #endif
  1367. #ifdef EXPRECISION
  1368. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1369. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1370. #endif
  1371. #endif
  1372. #ifdef DUNNINGTON
  1373. #ifdef DEBUG
  1374. fprintf(stderr, "Dunnington\n");
  1375. #endif
  1376. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1377. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1378. #endif
  1379. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1380. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1381. #endif
  1382. #if BUILD_COMPLEX==1
  1383. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1384. #endif
  1385. #if BUILD_COMPLEX16==1
  1386. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1387. #endif
  1388. #ifdef EXPRECISION
  1389. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1390. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1391. #endif
  1392. #endif
  1393. #ifdef NEHALEM
  1394. #ifdef DEBUG
  1395. fprintf(stderr, "Nehalem\n");
  1396. #endif
  1397. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1398. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1399. #endif
  1400. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1401. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1402. #endif
  1403. #if BUILD_COMPLEX
  1404. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1405. #endif
  1406. #if BUILD_COMPLEX16
  1407. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1408. #endif
  1409. #ifdef EXPRECISION
  1410. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1411. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1412. #endif
  1413. #endif
  1414. #ifdef SANDYBRIDGE
  1415. #ifdef DEBUG
  1416. fprintf(stderr, "Sandybridge\n");
  1417. #endif
  1418. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1419. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1420. #endif
  1421. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1422. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1423. #endif
  1424. #if BUILD_COMPLEX
  1425. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1426. #endif
  1427. #if BUILD_COMPLEX16
  1428. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1429. #endif
  1430. #ifdef EXPRECISION
  1431. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1432. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1433. #endif
  1434. #endif
  1435. #ifdef HASWELL
  1436. #ifdef DEBUG
  1437. fprintf(stderr, "Haswell\n");
  1438. #endif
  1439. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1440. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1441. #endif
  1442. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1443. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1444. #endif
  1445. #if BUILD_COMPLEX
  1446. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1447. #endif
  1448. #if BUILD_COMPLEX16
  1449. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1450. #endif
  1451. #ifdef EXPRECISION
  1452. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1453. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1454. #endif
  1455. #endif
  1456. #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
  1457. #ifdef DEBUG
  1458. fprintf(stderr, "SkylakeX\n");
  1459. #endif
  1460. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1461. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1462. #endif
  1463. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1464. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1465. #endif
  1466. #if BUILD_COMPLEX
  1467. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1468. #endif
  1469. #if BUILD_COMPLEX16
  1470. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1471. #endif
  1472. #ifdef EXPRECISION
  1473. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1474. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1475. #endif
  1476. #endif
  1477. #ifdef OPTERON
  1478. #ifdef DEBUG
  1479. fprintf(stderr, "Opteron\n");
  1480. #endif
  1481. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1482. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  1483. #endif
  1484. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1485. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  1486. #endif
  1487. #if BUILD_COMPLEX
  1488. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  1489. #endif
  1490. #if BUILD_COMPLEX16
  1491. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1492. #endif
  1493. #ifdef EXPRECISION
  1494. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1495. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1496. #endif
  1497. #endif
  1498. #ifdef BARCELONA
  1499. #ifdef DEBUG
  1500. fprintf(stderr, "Barcelona\n");
  1501. #endif
  1502. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1503. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1504. #endif
  1505. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1506. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1507. #endif
  1508. #if BUILD_COMPLEX
  1509. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1510. #endif
  1511. #if BUILD_COMPLEX16
  1512. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1513. #endif
  1514. #ifdef EXPRECISION
  1515. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1516. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1517. #endif
  1518. #endif
  1519. #ifdef BOBCAT
  1520. #ifdef DEBUG
  1521. fprintf(stderr, "Bobcate\n");
  1522. #endif
  1523. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1524. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1525. #endif
  1526. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1527. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1528. #endif
  1529. #if BUILD_COMPLEX
  1530. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1531. #endif
  1532. #if BUILD_COMPLEX16
  1533. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1534. #endif
  1535. #ifdef EXPRECISION
  1536. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1537. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1538. #endif
  1539. #endif
  1540. #ifdef BULLDOZER
  1541. #ifdef DEBUG
  1542. fprintf(stderr, "Bulldozer\n");
  1543. #endif
  1544. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1545. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1546. #endif
  1547. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1548. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1549. #endif
  1550. #if BUILD_COMPLEX
  1551. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1552. #endif
  1553. #if BUILD_COMPLEX16
  1554. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1555. #endif
  1556. #ifdef EXPRECISION
  1557. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1558. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1559. #endif
  1560. #endif
  1561. #ifdef EXCAVATOR
  1562. #ifdef DEBUG
  1563. fprintf(stderr, "Excavator\n");
  1564. #endif
  1565. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1566. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1567. #endif
  1568. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1569. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1570. #endif
  1571. #if BUILD_COMPLEX
  1572. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1573. #endif
  1574. #if BUILD_COMPLEX16
  1575. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1576. #endif
  1577. #ifdef EXPRECISION
  1578. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1579. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1580. #endif
  1581. #endif
  1582. #ifdef PILEDRIVER
  1583. #ifdef DEBUG
  1584. fprintf(stderr, "Piledriver\n");
  1585. #endif
  1586. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1587. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1588. #endif
  1589. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1590. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1591. #endif
  1592. #if BUILD_COMPLEX
  1593. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1594. #endif
  1595. #if BUILD_COMPLEX16
  1596. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1597. #endif
  1598. #ifdef EXPRECISION
  1599. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1600. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1601. #endif
  1602. #endif
  1603. #ifdef STEAMROLLER
  1604. #ifdef DEBUG
  1605. fprintf(stderr, "Steamroller\n");
  1606. #endif
  1607. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1608. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1609. #endif
  1610. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1611. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1612. #endif
  1613. #if BUILD_COMPLEX
  1614. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1615. #endif
  1616. #if BUILD_COMPLEX16
  1617. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1618. #endif
  1619. #ifdef EXPRECISION
  1620. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1621. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1622. #endif
  1623. #endif
  1624. #ifdef ZEN
  1625. #ifdef DEBUG
  1626. fprintf(stderr, "Zen\n");
  1627. #endif
  1628. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1629. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1630. #endif
  1631. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1632. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1633. #endif
  1634. #if BUILD_COMPLEX
  1635. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1636. #endif
  1637. #if BUILD_COMPLEX16
  1638. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1639. #endif
  1640. #ifdef EXPRECISION
  1641. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1642. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1643. #endif
  1644. #endif
  1645. #ifdef NANO
  1646. #ifdef DEBUG
  1647. fprintf(stderr, "NANO\n");
  1648. #endif
  1649. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1650. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1651. #endif
  1652. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1653. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1654. #endif
  1655. #if (BUILD_COMPLEX==1)
  1656. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1657. #endif
  1658. #if (BUILD_COMPLEX16==1)
  1659. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1660. #endif
  1661. #ifdef EXPRECISION
  1662. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1663. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1664. #endif
  1665. #endif
  1666. #ifdef SAPPHIRERAPIDS
  1667. #if (BUILD_BFLOAT16 == 1)
  1668. TABLE_NAME.need_amxtile_permission = 1;
  1669. #endif
  1670. #endif
  1671. #if BUILD_COMPLEX==1
  1672. #ifdef CGEMM3M_DEFAULT_P
  1673. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1674. #else
  1675. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1676. #endif
  1677. #endif
  1678. #if BUILD_COMPLEX16==1
  1679. #ifdef ZGEMM3M_DEFAULT_P
  1680. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1681. #else
  1682. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1683. #endif
  1684. #endif
  1685. #ifdef EXPRECISION
  1686. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1687. #endif
  1688. #if BUILD_SINGLE == 1
  1689. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1690. #endif
  1691. #if BUILD_DOUBLE== 1
  1692. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1693. #endif
  1694. #if BUILD_COMPLEX==1
  1695. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1696. #endif
  1697. #if BUILD_COMPLEX16==1
  1698. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1699. #endif
  1700. #if BUILD_COMPLEX==1
  1701. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1702. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1703. #else
  1704. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1705. #endif
  1706. #endif
  1707. #if BUILD_COMPLEX16==1
  1708. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1709. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1710. #else
  1711. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1712. #endif
  1713. #endif
  1714. #ifdef QUAD_PRECISION
  1715. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1716. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1717. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1718. #endif
  1719. #ifdef DEBUG
  1720. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1721. #endif
  1722. #if BUILD_BFLOAT16==1
  1723. TABLE_NAME.sbgemm_r = (((BUFFER_SIZE -
  1724. ((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA
  1725. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1726. ) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15);
  1727. #endif
  1728. #if BUILD_SINGLE==1
  1729. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1730. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1731. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1732. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1733. #endif
  1734. #if BUILD_DOUBLE==1
  1735. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1736. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1737. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1738. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1739. #endif
  1740. #ifdef EXPRECISION
  1741. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1742. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1743. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1744. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1745. #endif
  1746. #if BUILD_COMPLEX ==1
  1747. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1748. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1749. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1750. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1751. #endif
  1752. #if BUILD_COMPLEX16 ==1
  1753. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1754. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1755. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1756. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1757. #endif
  1758. #if BUILD_COMPLEX == 1
  1759. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1760. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1761. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1762. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1763. #endif
  1764. #if BUILD_COMPLEX16 == 1
  1765. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1766. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1767. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1768. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1769. #endif
  1770. #ifdef EXPRECISION
  1771. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1772. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1773. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1774. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1775. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1776. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1777. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1778. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1779. #endif
  1780. }
  1781. #endif //RISCV64
  1782. #endif //POWER
  1783. #endif //ZARCH
  1784. #endif //(ARCH_LOONGARCH64)
  1785. #endif //(ARCH_MIPS64)
  1786. #endif //(ARCH_ARM64)