You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 42 kB

7 years ago
5 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
12 years ago
12 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include <stdio.h>
  39. #include <string.h>
  40. #include "common.h"
  41. #ifdef BUILD_KERNEL
  42. #include "kernelTS.h"
  43. #endif
  44. #undef DEBUG
  45. static void init_parameter(void);
  46. gotoblas_t TABLE_NAME = {
  47. DTB_DEFAULT_ENTRIES ,
  48. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  49. #ifdef BUILD_HALF
  50. 0, 0, 0,
  51. SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N,
  52. #ifdef SHGEMM_DEFAULT_UNROLL_MN
  53. SHGEMM_DEFAULT_UNROLL_MN,
  54. #else
  55. MAX(SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N),
  56. #endif
  57. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  58. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  59. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sdot_kTS,
  60. dsdot_kTS,
  61. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  62. sgemv_nTS, sgemv_tTS, sger_kTS,
  63. ssymv_LTS, ssymv_UTS,
  64. shgemm_kernelTS, shgemm_betaTS,
  65. #if SHGEMM_DEFAULT_UNROLL_M != SHGEMM_DEFAULT_UNROLL_N
  66. shgemm_incopyTS, shgemm_itcopyTS,
  67. #else
  68. shgemm_oncopyTS, shgemm_otcopyTS,
  69. #endif
  70. shgemm_oncopyTS, shgemm_otcopyTS,
  71. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  72. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  73. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  74. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  75. #else
  76. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  77. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  78. #endif
  79. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  80. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  81. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  82. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  83. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  84. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  85. #else
  86. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  87. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  88. #endif
  89. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  90. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  91. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  92. ssymm_iutcopyTS, ssymm_iltcopyTS,
  93. #else
  94. ssymm_outcopyTS, ssymm_oltcopyTS,
  95. #endif
  96. ssymm_outcopyTS, ssymm_oltcopyTS,
  97. #ifndef NO_LAPACK
  98. sneg_tcopyTS, slaswp_ncopyTS,
  99. #else
  100. NULL,NULL,
  101. #endif
  102. #endif
  103. 0, 0, 0,
  104. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  105. #ifdef SGEMM_DEFAULT_UNROLL_MN
  106. SGEMM_DEFAULT_UNROLL_MN,
  107. #else
  108. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  109. #endif
  110. #ifdef HAVE_EXCLUSIVE_CACHE
  111. 1,
  112. #else
  113. 0,
  114. #endif
  115. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  116. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  117. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sdot_kTS,
  118. dsdot_kTS,
  119. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  120. sgemv_nTS, sgemv_tTS, sger_kTS,
  121. ssymv_LTS, ssymv_UTS,
  122. #ifdef ARCH_X86_64
  123. sgemm_directTS,
  124. sgemm_direct_performantTS,
  125. #else
  126. sgemm_direct,
  127. sgemm_direct_performant,
  128. #endif
  129. sgemm_kernelTS, sgemm_betaTS,
  130. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  131. sgemm_incopyTS, sgemm_itcopyTS,
  132. #else
  133. sgemm_oncopyTS, sgemm_otcopyTS,
  134. #endif
  135. sgemm_oncopyTS, sgemm_otcopyTS,
  136. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  137. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  138. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  139. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  140. #else
  141. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  142. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  143. #endif
  144. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  145. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  146. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  147. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  148. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  149. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  150. #else
  151. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  152. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  153. #endif
  154. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  155. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  156. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  157. ssymm_iutcopyTS, ssymm_iltcopyTS,
  158. #else
  159. ssymm_outcopyTS, ssymm_oltcopyTS,
  160. #endif
  161. ssymm_outcopyTS, ssymm_oltcopyTS,
  162. #ifndef NO_LAPACK
  163. sneg_tcopyTS, slaswp_ncopyTS,
  164. #else
  165. NULL,NULL,
  166. #endif
  167. 0, 0, 0,
  168. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  169. #ifdef DGEMM_DEFAULT_UNROLL_MN
  170. DGEMM_DEFAULT_UNROLL_MN,
  171. #else
  172. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  173. #endif
  174. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  175. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  176. dnrm2_kTS, dasum_kTS, dsum_kTS, dcopy_kTS, ddot_kTS,
  177. drot_kTS, daxpy_kTS, dscal_kTS, dswap_kTS,
  178. dgemv_nTS, dgemv_tTS, dger_kTS,
  179. dsymv_LTS, dsymv_UTS,
  180. dgemm_kernelTS, dgemm_betaTS,
  181. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  182. dgemm_incopyTS, dgemm_itcopyTS,
  183. #else
  184. dgemm_oncopyTS, dgemm_otcopyTS,
  185. #endif
  186. dgemm_oncopyTS, dgemm_otcopyTS,
  187. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  188. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  189. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  190. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  191. #else
  192. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  193. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  194. #endif
  195. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  196. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  197. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  198. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  199. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  200. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  201. #else
  202. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  203. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  204. #endif
  205. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  206. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  207. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  208. dsymm_iutcopyTS, dsymm_iltcopyTS,
  209. #else
  210. dsymm_outcopyTS, dsymm_oltcopyTS,
  211. #endif
  212. dsymm_outcopyTS, dsymm_oltcopyTS,
  213. #ifndef NO_LAPACK
  214. dneg_tcopyTS, dlaswp_ncopyTS,
  215. #else
  216. NULL, NULL,
  217. #endif
  218. #ifdef EXPRECISION
  219. 0, 0, 0,
  220. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  221. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  222. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  223. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  224. qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  225. qgemv_nTS, qgemv_tTS, qger_kTS,
  226. qsymv_LTS, qsymv_UTS,
  227. qgemm_kernelTS, qgemm_betaTS,
  228. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  229. qgemm_incopyTS, qgemm_itcopyTS,
  230. #else
  231. qgemm_oncopyTS, qgemm_otcopyTS,
  232. #endif
  233. qgemm_oncopyTS, qgemm_otcopyTS,
  234. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  235. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  236. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  237. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  238. #else
  239. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  240. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  241. #endif
  242. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  243. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  244. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  245. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  246. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  247. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  248. #else
  249. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  250. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  251. #endif
  252. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  253. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  254. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  255. qsymm_iutcopyTS, qsymm_iltcopyTS,
  256. #else
  257. qsymm_outcopyTS, qsymm_oltcopyTS,
  258. #endif
  259. qsymm_outcopyTS, qsymm_oltcopyTS,
  260. #ifndef NO_LAPACK
  261. qneg_tcopyTS, qlaswp_ncopyTS,
  262. #else
  263. NULL, NULL,
  264. #endif
  265. #endif
  266. 0, 0, 0,
  267. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  268. #ifdef CGEMM_DEFAULT_UNROLL_MN
  269. CGEMM_DEFAULT_UNROLL_MN,
  270. #else
  271. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  272. #endif
  273. camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
  274. cnrm2_kTS, casum_kTS, csum_kTS, ccopy_kTS,
  275. cdotu_kTS, cdotc_kTS, csrot_kTS,
  276. caxpy_kTS, caxpyc_kTS, cscal_kTS, cswap_kTS,
  277. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  278. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  279. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  280. csymv_LTS, csymv_UTS,
  281. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  282. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  283. cgemm_betaTS,
  284. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  285. cgemm_incopyTS, cgemm_itcopyTS,
  286. #else
  287. cgemm_oncopyTS, cgemm_otcopyTS,
  288. #endif
  289. cgemm_oncopyTS, cgemm_otcopyTS,
  290. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  291. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  292. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  293. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  294. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  295. #else
  296. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  297. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  298. #endif
  299. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  300. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  301. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  302. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  303. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  304. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  305. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  306. #else
  307. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  308. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  309. #endif
  310. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  311. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  312. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  313. csymm_iutcopyTS, csymm_iltcopyTS,
  314. #else
  315. csymm_outcopyTS, csymm_oltcopyTS,
  316. #endif
  317. csymm_outcopyTS, csymm_oltcopyTS,
  318. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  319. chemm_iutcopyTS, chemm_iltcopyTS,
  320. #else
  321. chemm_outcopyTS, chemm_oltcopyTS,
  322. #endif
  323. chemm_outcopyTS, chemm_oltcopyTS,
  324. 0, 0, 0,
  325. #if defined(USE_GEMM3M)
  326. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  327. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  328. #else
  329. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  330. #endif
  331. cgemm3m_kernelTS,
  332. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  333. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  334. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  335. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  336. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  337. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  338. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  339. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  340. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  341. csymm3m_oucopybTS, csymm3m_olcopybTS,
  342. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  343. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  344. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  345. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  346. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  347. chemm3m_oucopybTS, chemm3m_olcopybTS,
  348. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  349. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  350. #else
  351. 0, 0, 0,
  352. NULL,
  353. NULL, NULL,
  354. NULL, NULL,
  355. NULL, NULL,
  356. NULL, NULL,
  357. NULL, NULL,
  358. NULL, NULL,
  359. NULL, NULL,
  360. NULL, NULL,
  361. NULL, NULL,
  362. NULL, NULL,
  363. NULL, NULL,
  364. NULL, NULL,
  365. NULL, NULL,
  366. NULL, NULL,
  367. NULL, NULL,
  368. NULL, NULL,
  369. NULL, NULL,
  370. NULL, NULL,
  371. #endif
  372. #ifndef NO_LAPACK
  373. cneg_tcopyTS, claswp_ncopyTS,
  374. #else
  375. NULL, NULL,
  376. #endif
  377. 0, 0, 0,
  378. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  379. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  380. ZGEMM_DEFAULT_UNROLL_MN,
  381. #else
  382. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  383. #endif
  384. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  385. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  386. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  387. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  388. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  389. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  390. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  391. zsymv_LTS, zsymv_UTS,
  392. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  393. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  394. zgemm_betaTS,
  395. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  396. zgemm_incopyTS, zgemm_itcopyTS,
  397. #else
  398. zgemm_oncopyTS, zgemm_otcopyTS,
  399. #endif
  400. zgemm_oncopyTS, zgemm_otcopyTS,
  401. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  402. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  403. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  404. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  405. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  406. #else
  407. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  408. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  409. #endif
  410. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  411. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  412. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  413. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  414. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  415. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  416. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  417. #else
  418. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  419. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  420. #endif
  421. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  422. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  423. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  424. zsymm_iutcopyTS, zsymm_iltcopyTS,
  425. #else
  426. zsymm_outcopyTS, zsymm_oltcopyTS,
  427. #endif
  428. zsymm_outcopyTS, zsymm_oltcopyTS,
  429. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  430. zhemm_iutcopyTS, zhemm_iltcopyTS,
  431. #else
  432. zhemm_outcopyTS, zhemm_oltcopyTS,
  433. #endif
  434. zhemm_outcopyTS, zhemm_oltcopyTS,
  435. 0, 0, 0,
  436. #if defined(USE_GEMM3M)
  437. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  438. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  439. #else
  440. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  441. #endif
  442. zgemm3m_kernelTS,
  443. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  444. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  445. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  446. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  447. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  448. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  449. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  450. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  451. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  452. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  453. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  454. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  455. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  456. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  457. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  458. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  459. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  460. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  461. #else
  462. 0, 0, 0,
  463. NULL,
  464. NULL, NULL,
  465. NULL, NULL,
  466. NULL, NULL,
  467. NULL, NULL,
  468. NULL, NULL,
  469. NULL, NULL,
  470. NULL, NULL,
  471. NULL, NULL,
  472. NULL, NULL,
  473. NULL, NULL,
  474. NULL, NULL,
  475. NULL, NULL,
  476. NULL, NULL,
  477. NULL, NULL,
  478. NULL, NULL,
  479. NULL, NULL,
  480. NULL, NULL,
  481. NULL, NULL,
  482. #endif
  483. #ifndef NO_LAPACK
  484. zneg_tcopyTS, zlaswp_ncopyTS,
  485. #else
  486. NULL, NULL,
  487. #endif
  488. #ifdef EXPRECISION
  489. 0, 0, 0,
  490. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  491. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  492. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  493. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  494. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  495. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  496. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  497. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  498. xsymv_LTS, xsymv_UTS,
  499. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  500. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  501. xgemm_betaTS,
  502. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  503. xgemm_incopyTS, xgemm_itcopyTS,
  504. #else
  505. xgemm_oncopyTS, xgemm_otcopyTS,
  506. #endif
  507. xgemm_oncopyTS, xgemm_otcopyTS,
  508. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  509. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  510. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  511. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  512. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  513. #else
  514. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  515. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  516. #endif
  517. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  518. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  519. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  520. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  521. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  522. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  523. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  524. #else
  525. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  526. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  527. #endif
  528. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  529. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  530. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  531. xsymm_iutcopyTS, xsymm_iltcopyTS,
  532. #else
  533. xsymm_outcopyTS, xsymm_oltcopyTS,
  534. #endif
  535. xsymm_outcopyTS, xsymm_oltcopyTS,
  536. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  537. xhemm_iutcopyTS, xhemm_iltcopyTS,
  538. #else
  539. xhemm_outcopyTS, xhemm_oltcopyTS,
  540. #endif
  541. xhemm_outcopyTS, xhemm_oltcopyTS,
  542. 0, 0, 0,
  543. #if defined(USE_GEMM3M)
  544. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  545. xgemm3m_kernelTS,
  546. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  547. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  548. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  549. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  550. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  551. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  552. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  553. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  554. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  555. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  556. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  557. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  558. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  559. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  560. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  561. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  562. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  563. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  564. #else
  565. 0, 0, 0,
  566. NULL,
  567. NULL, NULL,
  568. NULL, NULL,
  569. NULL, NULL,
  570. NULL, NULL,
  571. NULL, NULL,
  572. NULL, NULL,
  573. NULL, NULL,
  574. NULL, NULL,
  575. NULL, NULL,
  576. NULL, NULL,
  577. NULL, NULL,
  578. NULL, NULL,
  579. NULL, NULL,
  580. NULL, NULL,
  581. NULL, NULL,
  582. NULL, NULL,
  583. NULL, NULL,
  584. NULL, NULL,
  585. #endif
  586. #ifndef NO_LAPACK
  587. xneg_tcopyTS, xlaswp_ncopyTS,
  588. #else
  589. NULL, NULL,
  590. #endif
  591. #endif
  592. init_parameter,
  593. SNUMOPT, DNUMOPT, QNUMOPT,
  594. saxpby_kTS, daxpby_kTS, caxpby_kTS, zaxpby_kTS,
  595. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  596. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  597. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  598. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  599. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  600. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  601. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  602. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  603. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  604. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  605. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  606. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  607. sgeadd_kTS, dgeadd_kTS, cgeadd_kTS, zgeadd_kTS
  608. };
  609. #if defined(ARCH_ARM64)
  610. static void init_parameter(void) {
  611. #if defined(BUILD_HALF)
  612. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  613. #endif
  614. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  615. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  616. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  617. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  618. #if defined(BUILD_HALF)
  619. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  620. #endif
  621. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  622. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  623. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  624. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  625. #if defined(BUILD_HALF)
  626. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  627. #endif
  628. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  629. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  630. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  631. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  632. #ifdef EXPRECISION
  633. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  634. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  635. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  636. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  637. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  638. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  639. #endif
  640. #if defined(USE_GEMM3M)
  641. #ifdef CGEMM3M_DEFAULT_P
  642. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  643. #else
  644. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  645. #endif
  646. #ifdef ZGEMM3M_DEFAULT_P
  647. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  648. #else
  649. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  650. #endif
  651. #ifdef CGEMM3M_DEFAULT_Q
  652. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  653. #else
  654. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  655. #endif
  656. #ifdef ZGEMM3M_DEFAULT_Q
  657. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  658. #else
  659. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  660. #endif
  661. #ifdef CGEMM3M_DEFAULT_R
  662. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  663. #else
  664. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  665. #endif
  666. #ifdef ZGEMM3M_DEFAULT_R
  667. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  668. #else
  669. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  670. #endif
  671. #ifdef EXPRECISION
  672. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  673. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  674. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  675. #endif
  676. #endif
  677. }
  678. #else // defined(ARCH_ARM64)
  679. #if defined(ARCH_POWER)
  680. static void init_parameter(void) {
  681. #ifdef BUILD_HALF
  682. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  683. #endif
  684. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  685. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  686. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  687. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  688. #ifdef BUILD_HALF
  689. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  690. #endif
  691. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  692. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  693. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  694. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  695. #ifdef BUILD_HALF
  696. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  697. #endif
  698. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  699. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  700. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  701. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  702. }
  703. #else //POWER
  704. #if defined(ARCH_ZARCH)
  705. static void init_parameter(void) {
  706. #ifdef BUILD_HALF
  707. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  708. #endif
  709. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  710. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  711. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  712. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  713. #ifdef BUILD_HALF
  714. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  715. #endif
  716. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  717. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  718. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  719. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  720. #ifdef BUILD_HALF
  721. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  722. #endif
  723. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  724. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  725. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  726. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  727. }
  728. #else //ZARCH
  729. #ifdef ARCH_X86
  730. static int get_l2_size_old(void){
  731. int i, eax, ebx, ecx, edx, cpuid_level;
  732. int info[15];
  733. cpuid(2, &eax, &ebx, &ecx, &edx);
  734. info[ 0] = BITMASK(eax, 8, 0xff);
  735. info[ 1] = BITMASK(eax, 16, 0xff);
  736. info[ 2] = BITMASK(eax, 24, 0xff);
  737. info[ 3] = BITMASK(ebx, 0, 0xff);
  738. info[ 4] = BITMASK(ebx, 8, 0xff);
  739. info[ 5] = BITMASK(ebx, 16, 0xff);
  740. info[ 6] = BITMASK(ebx, 24, 0xff);
  741. info[ 7] = BITMASK(ecx, 0, 0xff);
  742. info[ 8] = BITMASK(ecx, 8, 0xff);
  743. info[ 9] = BITMASK(ecx, 16, 0xff);
  744. info[10] = BITMASK(ecx, 24, 0xff);
  745. info[11] = BITMASK(edx, 0, 0xff);
  746. info[12] = BITMASK(edx, 8, 0xff);
  747. info[13] = BITMASK(edx, 16, 0xff);
  748. info[14] = BITMASK(edx, 24, 0xff);
  749. for (i = 0; i < 15; i++){
  750. switch (info[i]){
  751. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  752. case 0x1a :
  753. return 96;
  754. case 0x39 :
  755. case 0x3b :
  756. case 0x41 :
  757. case 0x79 :
  758. case 0x81 :
  759. return 128;
  760. case 0x3a :
  761. return 192;
  762. case 0x21 :
  763. case 0x3c :
  764. case 0x42 :
  765. case 0x7a :
  766. case 0x7e :
  767. case 0x82 :
  768. return 256;
  769. case 0x3d :
  770. return 384;
  771. case 0x3e :
  772. case 0x43 :
  773. case 0x7b :
  774. case 0x7f :
  775. case 0x83 :
  776. case 0x86 :
  777. return 512;
  778. case 0x44 :
  779. case 0x78 :
  780. case 0x7c :
  781. case 0x84 :
  782. case 0x87 :
  783. return 1024;
  784. case 0x45 :
  785. case 0x7d :
  786. case 0x85 :
  787. return 2048;
  788. case 0x48 :
  789. return 3184;
  790. case 0x49 :
  791. return 4096;
  792. case 0x4e :
  793. return 6144;
  794. }
  795. }
  796. // return 0;
  797. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  798. return 256;
  799. }
  800. #endif
  801. static __inline__ int get_l2_size(void){
  802. int eax, ebx, ecx, edx, l2;
  803. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  804. l2 = BITMASK(ecx, 16, 0xffff);
  805. #ifndef ARCH_X86
  806. if (l2 <= 0) {
  807. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  808. return 256;
  809. }
  810. return l2;
  811. #else
  812. if (l2 > 0) return l2;
  813. return get_l2_size_old();
  814. #endif
  815. }
  816. static __inline__ int get_l3_size(void){
  817. int eax, ebx, ecx, edx;
  818. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  819. return BITMASK(edx, 18, 0x3fff) * 512;
  820. }
  821. static void init_parameter(void) {
  822. int l2 = get_l2_size();
  823. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  824. /* where the GEMM unrolling parameters do not depend on l2 */
  825. #ifdef BUILD_HALF
  826. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  827. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  828. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  829. #endif
  830. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  831. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  832. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  833. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  834. #ifdef CGEMM3M_DEFAULT_Q
  835. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  836. #else
  837. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  838. #endif
  839. #ifdef ZGEMM3M_DEFAULT_Q
  840. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  841. #else
  842. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  843. #endif
  844. #ifdef EXPRECISION
  845. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  846. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  847. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  848. #endif
  849. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  850. #ifdef DEBUG
  851. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  852. #endif
  853. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  854. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  855. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  856. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  857. #ifdef EXPRECISION
  858. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  859. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  860. #endif
  861. #endif
  862. #ifdef CORE_NORTHWOOD
  863. #ifdef DEBUG
  864. fprintf(stderr, "Northwood\n");
  865. #endif
  866. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  867. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  868. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  869. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  870. #ifdef EXPRECISION
  871. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  872. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  873. #endif
  874. #endif
  875. #ifdef ATOM
  876. #ifdef DEBUG
  877. fprintf(stderr, "Atom\n");
  878. #endif
  879. TABLE_NAME.sgemm_p = 256;
  880. TABLE_NAME.dgemm_p = 128;
  881. TABLE_NAME.cgemm_p = 128;
  882. TABLE_NAME.zgemm_p = 64;
  883. #ifdef EXPRECISION
  884. TABLE_NAME.qgemm_p = 64;
  885. TABLE_NAME.xgemm_p = 32;
  886. #endif
  887. #endif
  888. #ifdef CORE_PRESCOTT
  889. #ifdef DEBUG
  890. fprintf(stderr, "Prescott\n");
  891. #endif
  892. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  893. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  894. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  895. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  896. #ifdef EXPRECISION
  897. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  898. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  899. #endif
  900. #endif
  901. #ifdef CORE2
  902. #ifdef DEBUG
  903. fprintf(stderr, "Core2\n");
  904. #endif
  905. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  906. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  907. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  908. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  909. #ifdef EXPRECISION
  910. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  911. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  912. #endif
  913. #endif
  914. #ifdef PENRYN
  915. #ifdef DEBUG
  916. fprintf(stderr, "Penryn\n");
  917. #endif
  918. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  919. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  920. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  921. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  922. #ifdef EXPRECISION
  923. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  924. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  925. #endif
  926. #endif
  927. #ifdef DUNNINGTON
  928. #ifdef DEBUG
  929. fprintf(stderr, "Dunnington\n");
  930. #endif
  931. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  932. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  933. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  934. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  935. #ifdef EXPRECISION
  936. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  937. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  938. #endif
  939. #endif
  940. #ifdef NEHALEM
  941. #ifdef DEBUG
  942. fprintf(stderr, "Nehalem\n");
  943. #endif
  944. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  945. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  946. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  947. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  948. #ifdef EXPRECISION
  949. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  950. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  951. #endif
  952. #endif
  953. #ifdef SANDYBRIDGE
  954. #ifdef DEBUG
  955. fprintf(stderr, "Sandybridge\n");
  956. #endif
  957. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  958. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  959. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  960. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  961. #ifdef EXPRECISION
  962. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  963. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  964. #endif
  965. #endif
  966. #ifdef HASWELL
  967. #ifdef DEBUG
  968. fprintf(stderr, "Haswell\n");
  969. #endif
  970. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  971. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  972. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  973. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  974. #ifdef EXPRECISION
  975. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  976. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  977. #endif
  978. #endif
  979. #ifdef SKYLAKEX
  980. #ifdef DEBUG
  981. fprintf(stderr, "SkylakeX\n");
  982. #endif
  983. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  984. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  985. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  986. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  987. #ifdef EXPRECISION
  988. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  989. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  990. #endif
  991. #endif
  992. #ifdef OPTERON
  993. #ifdef DEBUG
  994. fprintf(stderr, "Opteron\n");
  995. #endif
  996. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  997. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  998. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  999. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1000. #ifdef EXPRECISION
  1001. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1002. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1003. #endif
  1004. #endif
  1005. #ifdef BARCELONA
  1006. #ifdef DEBUG
  1007. fprintf(stderr, "Barcelona\n");
  1008. #endif
  1009. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1010. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1011. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1012. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1013. #ifdef EXPRECISION
  1014. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1015. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1016. #endif
  1017. #endif
  1018. #ifdef BOBCAT
  1019. #ifdef DEBUG
  1020. fprintf(stderr, "Bobcate\n");
  1021. #endif
  1022. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1023. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1024. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1025. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1026. #ifdef EXPRECISION
  1027. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1028. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1029. #endif
  1030. #endif
  1031. #ifdef BULLDOZER
  1032. #ifdef DEBUG
  1033. fprintf(stderr, "Bulldozer\n");
  1034. #endif
  1035. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1036. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1037. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1038. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1039. #ifdef EXPRECISION
  1040. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1041. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1042. #endif
  1043. #endif
  1044. #ifdef EXCAVATOR
  1045. #ifdef DEBUG
  1046. fprintf(stderr, "Excavator\n");
  1047. #endif
  1048. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1049. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1050. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1051. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1052. #ifdef EXPRECISION
  1053. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1054. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1055. #endif
  1056. #endif
  1057. #ifdef PILEDRIVER
  1058. #ifdef DEBUG
  1059. fprintf(stderr, "Piledriver\n");
  1060. #endif
  1061. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1062. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1063. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1064. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1065. #ifdef EXPRECISION
  1066. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1067. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1068. #endif
  1069. #endif
  1070. #ifdef STEAMROLLER
  1071. #ifdef DEBUG
  1072. fprintf(stderr, "Steamroller\n");
  1073. #endif
  1074. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1075. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1076. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1077. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1078. #ifdef EXPRECISION
  1079. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1080. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1081. #endif
  1082. #endif
  1083. #ifdef ZEN
  1084. #ifdef DEBUG
  1085. fprintf(stderr, "Zen\n");
  1086. #endif
  1087. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1088. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1089. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1090. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1091. #ifdef EXPRECISION
  1092. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1093. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1094. #endif
  1095. #endif
  1096. #ifdef NANO
  1097. #ifdef DEBUG
  1098. fprintf(stderr, "NANO\n");
  1099. #endif
  1100. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1101. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1102. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1103. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1104. #ifdef EXPRECISION
  1105. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1106. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1107. #endif
  1108. #endif
  1109. #ifdef CGEMM3M_DEFAULT_P
  1110. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1111. #else
  1112. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1113. #endif
  1114. #ifdef ZGEMM3M_DEFAULT_P
  1115. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1116. #else
  1117. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1118. #endif
  1119. #ifdef EXPRECISION
  1120. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1121. #endif
  1122. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1123. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1124. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1125. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1126. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1127. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1128. #else
  1129. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1130. #endif
  1131. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1132. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1133. #else
  1134. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1135. #endif
  1136. #ifdef QUAD_PRECISION
  1137. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1138. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1139. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1140. #endif
  1141. #ifdef DEBUG
  1142. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1143. #endif
  1144. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1145. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1146. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1147. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1148. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1149. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1150. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1151. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1152. #ifdef EXPRECISION
  1153. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1154. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1155. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1156. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1157. #endif
  1158. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1159. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1160. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1161. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1162. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1163. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1164. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1165. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1166. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1167. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1168. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1169. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1170. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1171. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1172. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1173. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1174. #ifdef EXPRECISION
  1175. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1176. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1177. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1178. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1179. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1180. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1181. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1182. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1183. #endif
  1184. }
  1185. #endif //POWER
  1186. #endif //ZARCH
  1187. #endif //defined(ARCH_ARM64)