You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 42 kB

7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
12 years ago
12 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include <stdio.h>
  39. #include <string.h>
  40. #include "common.h"
  41. #ifdef BUILD_KERNEL
  42. #include "kernelTS.h"
  43. #endif
  44. #undef DEBUG
  45. static void init_parameter(void);
  46. gotoblas_t TABLE_NAME = {
  47. DTB_DEFAULT_ENTRIES ,
  48. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  49. #ifdef BUILD_HALF
  50. 0, 0, 0,
  51. SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N,
  52. #ifdef SHGEMM_DEFAULT_UNROLL_MN
  53. SHGEMM_DEFAULT_UNROLL_MN,
  54. #else
  55. MAX(SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N),
  56. #endif
  57. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  58. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  59. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sdot_kTS,
  60. dsdot_kTS,
  61. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  62. sgemv_nTS, sgemv_tTS, sger_kTS,
  63. ssymv_LTS, ssymv_UTS,
  64. shgemm_kernelTS, shgemm_betaTS,
  65. #if SHGEMM_DEFAULT_UNROLL_M != SHGEMM_DEFAULT_UNROLL_N
  66. shgemm_incopyTS, shgemm_itcopyTS,
  67. #else
  68. shgemm_oncopyTS, shgemm_otcopyTS,
  69. #endif
  70. shgemm_oncopyTS, shgemm_otcopyTS,
  71. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  72. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  73. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  74. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  75. #else
  76. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  77. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  78. #endif
  79. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  80. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  81. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  82. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  83. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  84. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  85. #else
  86. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  87. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  88. #endif
  89. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  90. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  91. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  92. ssymm_iutcopyTS, ssymm_iltcopyTS,
  93. #else
  94. ssymm_outcopyTS, ssymm_oltcopyTS,
  95. #endif
  96. ssymm_outcopyTS, ssymm_oltcopyTS,
  97. #ifndef NO_LAPACK
  98. sneg_tcopyTS, slaswp_ncopyTS,
  99. #else
  100. NULL,NULL,
  101. #endif
  102. #endif
  103. 0, 0, 0,
  104. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  105. #ifdef SGEMM_DEFAULT_UNROLL_MN
  106. SGEMM_DEFAULT_UNROLL_MN,
  107. #else
  108. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  109. #endif
  110. #ifdef HAVE_EXCLUSIVE_CACHE
  111. 1,
  112. #else
  113. 0,
  114. #endif
  115. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  116. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  117. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sdot_kTS,
  118. dsdot_kTS,
  119. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  120. sgemv_nTS, sgemv_tTS, sger_kTS,
  121. ssymv_LTS, ssymv_UTS,
  122. sgemm_kernelTS, sgemm_betaTS,
  123. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  124. sgemm_incopyTS, sgemm_itcopyTS,
  125. #else
  126. sgemm_oncopyTS, sgemm_otcopyTS,
  127. #endif
  128. sgemm_oncopyTS, sgemm_otcopyTS,
  129. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  130. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  131. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  132. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  133. #else
  134. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  135. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  136. #endif
  137. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  138. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  139. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  140. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  141. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  142. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  143. #else
  144. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  145. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  146. #endif
  147. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  148. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  149. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  150. ssymm_iutcopyTS, ssymm_iltcopyTS,
  151. #else
  152. ssymm_outcopyTS, ssymm_oltcopyTS,
  153. #endif
  154. ssymm_outcopyTS, ssymm_oltcopyTS,
  155. #ifndef NO_LAPACK
  156. sneg_tcopyTS, slaswp_ncopyTS,
  157. #else
  158. NULL,NULL,
  159. #endif
  160. 0, 0, 0,
  161. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  162. #ifdef DGEMM_DEFAULT_UNROLL_MN
  163. DGEMM_DEFAULT_UNROLL_MN,
  164. #else
  165. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  166. #endif
  167. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  168. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  169. dnrm2_kTS, dasum_kTS, dsum_kTS, dcopy_kTS, ddot_kTS,
  170. drot_kTS, daxpy_kTS, dscal_kTS, dswap_kTS,
  171. dgemv_nTS, dgemv_tTS, dger_kTS,
  172. dsymv_LTS, dsymv_UTS,
  173. dgemm_kernelTS, dgemm_betaTS,
  174. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  175. dgemm_incopyTS, dgemm_itcopyTS,
  176. #else
  177. dgemm_oncopyTS, dgemm_otcopyTS,
  178. #endif
  179. dgemm_oncopyTS, dgemm_otcopyTS,
  180. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  181. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  182. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  183. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  184. #else
  185. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  186. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  187. #endif
  188. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  189. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  190. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  191. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  192. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  193. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  194. #else
  195. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  196. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  197. #endif
  198. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  199. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  200. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  201. dsymm_iutcopyTS, dsymm_iltcopyTS,
  202. #else
  203. dsymm_outcopyTS, dsymm_oltcopyTS,
  204. #endif
  205. dsymm_outcopyTS, dsymm_oltcopyTS,
  206. #ifndef NO_LAPACK
  207. dneg_tcopyTS, dlaswp_ncopyTS,
  208. #else
  209. NULL, NULL,
  210. #endif
  211. #ifdef EXPRECISION
  212. 0, 0, 0,
  213. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  214. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  215. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  216. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  217. qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  218. qgemv_nTS, qgemv_tTS, qger_kTS,
  219. qsymv_LTS, qsymv_UTS,
  220. qgemm_kernelTS, qgemm_betaTS,
  221. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  222. qgemm_incopyTS, qgemm_itcopyTS,
  223. #else
  224. qgemm_oncopyTS, qgemm_otcopyTS,
  225. #endif
  226. qgemm_oncopyTS, qgemm_otcopyTS,
  227. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  228. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  229. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  230. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  231. #else
  232. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  233. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  234. #endif
  235. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  236. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  237. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  238. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  239. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  240. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  241. #else
  242. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  243. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  244. #endif
  245. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  246. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  247. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  248. qsymm_iutcopyTS, qsymm_iltcopyTS,
  249. #else
  250. qsymm_outcopyTS, qsymm_oltcopyTS,
  251. #endif
  252. qsymm_outcopyTS, qsymm_oltcopyTS,
  253. #ifndef NO_LAPACK
  254. qneg_tcopyTS, qlaswp_ncopyTS,
  255. #else
  256. NULL, NULL,
  257. #endif
  258. #endif
  259. 0, 0, 0,
  260. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  261. #ifdef CGEMM_DEFAULT_UNROLL_MN
  262. CGEMM_DEFAULT_UNROLL_MN,
  263. #else
  264. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  265. #endif
  266. camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
  267. cnrm2_kTS, casum_kTS, csum_kTS, ccopy_kTS,
  268. cdotu_kTS, cdotc_kTS, csrot_kTS,
  269. caxpy_kTS, caxpyc_kTS, cscal_kTS, cswap_kTS,
  270. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  271. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  272. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  273. csymv_LTS, csymv_UTS,
  274. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  275. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  276. cgemm_betaTS,
  277. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  278. cgemm_incopyTS, cgemm_itcopyTS,
  279. #else
  280. cgemm_oncopyTS, cgemm_otcopyTS,
  281. #endif
  282. cgemm_oncopyTS, cgemm_otcopyTS,
  283. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  284. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  285. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  286. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  287. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  288. #else
  289. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  290. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  291. #endif
  292. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  293. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  294. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  295. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  296. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  297. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  298. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  299. #else
  300. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  301. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  302. #endif
  303. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  304. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  305. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  306. csymm_iutcopyTS, csymm_iltcopyTS,
  307. #else
  308. csymm_outcopyTS, csymm_oltcopyTS,
  309. #endif
  310. csymm_outcopyTS, csymm_oltcopyTS,
  311. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  312. chemm_iutcopyTS, chemm_iltcopyTS,
  313. #else
  314. chemm_outcopyTS, chemm_oltcopyTS,
  315. #endif
  316. chemm_outcopyTS, chemm_oltcopyTS,
  317. 0, 0, 0,
  318. #if defined(USE_GEMM3M)
  319. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  320. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  321. #else
  322. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  323. #endif
  324. cgemm3m_kernelTS,
  325. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  326. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  327. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  328. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  329. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  330. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  331. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  332. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  333. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  334. csymm3m_oucopybTS, csymm3m_olcopybTS,
  335. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  336. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  337. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  338. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  339. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  340. chemm3m_oucopybTS, chemm3m_olcopybTS,
  341. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  342. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  343. #else
  344. 0, 0, 0,
  345. NULL,
  346. NULL, NULL,
  347. NULL, NULL,
  348. NULL, NULL,
  349. NULL, NULL,
  350. NULL, NULL,
  351. NULL, NULL,
  352. NULL, NULL,
  353. NULL, NULL,
  354. NULL, NULL,
  355. NULL, NULL,
  356. NULL, NULL,
  357. NULL, NULL,
  358. NULL, NULL,
  359. NULL, NULL,
  360. NULL, NULL,
  361. NULL, NULL,
  362. NULL, NULL,
  363. NULL, NULL,
  364. #endif
  365. #ifndef NO_LAPACK
  366. cneg_tcopyTS, claswp_ncopyTS,
  367. #else
  368. NULL, NULL,
  369. #endif
  370. 0, 0, 0,
  371. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  372. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  373. ZGEMM_DEFAULT_UNROLL_MN,
  374. #else
  375. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  376. #endif
  377. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  378. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  379. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  380. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  381. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  382. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  383. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  384. zsymv_LTS, zsymv_UTS,
  385. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  386. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  387. zgemm_betaTS,
  388. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  389. zgemm_incopyTS, zgemm_itcopyTS,
  390. #else
  391. zgemm_oncopyTS, zgemm_otcopyTS,
  392. #endif
  393. zgemm_oncopyTS, zgemm_otcopyTS,
  394. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  395. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  396. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  397. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  398. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  399. #else
  400. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  401. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  402. #endif
  403. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  404. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  405. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  406. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  407. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  408. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  409. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  410. #else
  411. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  412. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  413. #endif
  414. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  415. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  416. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  417. zsymm_iutcopyTS, zsymm_iltcopyTS,
  418. #else
  419. zsymm_outcopyTS, zsymm_oltcopyTS,
  420. #endif
  421. zsymm_outcopyTS, zsymm_oltcopyTS,
  422. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  423. zhemm_iutcopyTS, zhemm_iltcopyTS,
  424. #else
  425. zhemm_outcopyTS, zhemm_oltcopyTS,
  426. #endif
  427. zhemm_outcopyTS, zhemm_oltcopyTS,
  428. 0, 0, 0,
  429. #if defined(USE_GEMM3M)
  430. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  431. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  432. #else
  433. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  434. #endif
  435. zgemm3m_kernelTS,
  436. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  437. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  438. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  439. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  440. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  441. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  442. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  443. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  444. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  445. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  446. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  447. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  448. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  449. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  450. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  451. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  452. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  453. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  454. #else
  455. 0, 0, 0,
  456. NULL,
  457. NULL, NULL,
  458. NULL, NULL,
  459. NULL, NULL,
  460. NULL, NULL,
  461. NULL, NULL,
  462. NULL, NULL,
  463. NULL, NULL,
  464. NULL, NULL,
  465. NULL, NULL,
  466. NULL, NULL,
  467. NULL, NULL,
  468. NULL, NULL,
  469. NULL, NULL,
  470. NULL, NULL,
  471. NULL, NULL,
  472. NULL, NULL,
  473. NULL, NULL,
  474. NULL, NULL,
  475. #endif
  476. #ifndef NO_LAPACK
  477. zneg_tcopyTS, zlaswp_ncopyTS,
  478. #else
  479. NULL, NULL,
  480. #endif
  481. #ifdef EXPRECISION
  482. 0, 0, 0,
  483. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  484. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  485. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  486. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  487. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  488. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  489. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  490. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  491. xsymv_LTS, xsymv_UTS,
  492. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  493. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  494. xgemm_betaTS,
  495. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  496. xgemm_incopyTS, xgemm_itcopyTS,
  497. #else
  498. xgemm_oncopyTS, xgemm_otcopyTS,
  499. #endif
  500. xgemm_oncopyTS, xgemm_otcopyTS,
  501. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  502. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  503. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  504. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  505. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  506. #else
  507. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  508. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  509. #endif
  510. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  511. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  512. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  513. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  514. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  515. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  516. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  517. #else
  518. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  519. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  520. #endif
  521. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  522. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  523. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  524. xsymm_iutcopyTS, xsymm_iltcopyTS,
  525. #else
  526. xsymm_outcopyTS, xsymm_oltcopyTS,
  527. #endif
  528. xsymm_outcopyTS, xsymm_oltcopyTS,
  529. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  530. xhemm_iutcopyTS, xhemm_iltcopyTS,
  531. #else
  532. xhemm_outcopyTS, xhemm_oltcopyTS,
  533. #endif
  534. xhemm_outcopyTS, xhemm_oltcopyTS,
  535. 0, 0, 0,
  536. #if defined(USE_GEMM3M)
  537. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  538. xgemm3m_kernelTS,
  539. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  540. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  541. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  542. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  543. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  544. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  545. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  546. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  547. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  548. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  549. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  550. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  551. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  552. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  553. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  554. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  555. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  556. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  557. #else
  558. 0, 0, 0,
  559. NULL,
  560. NULL, NULL,
  561. NULL, NULL,
  562. NULL, NULL,
  563. NULL, NULL,
  564. NULL, NULL,
  565. NULL, NULL,
  566. NULL, NULL,
  567. NULL, NULL,
  568. NULL, NULL,
  569. NULL, NULL,
  570. NULL, NULL,
  571. NULL, NULL,
  572. NULL, NULL,
  573. NULL, NULL,
  574. NULL, NULL,
  575. NULL, NULL,
  576. NULL, NULL,
  577. NULL, NULL,
  578. #endif
  579. #ifndef NO_LAPACK
  580. xneg_tcopyTS, xlaswp_ncopyTS,
  581. #else
  582. NULL, NULL,
  583. #endif
  584. #endif
  585. init_parameter,
  586. SNUMOPT, DNUMOPT, QNUMOPT,
  587. saxpby_kTS, daxpby_kTS, caxpby_kTS, zaxpby_kTS,
  588. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  589. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  590. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  591. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  592. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  593. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  594. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  595. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  596. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  597. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  598. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  599. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  600. sgeadd_kTS, dgeadd_kTS, cgeadd_kTS, zgeadd_kTS
  601. };
  602. #if defined(ARCH_ARM64)
  603. static void init_parameter(void) {
  604. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  605. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  606. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  607. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  608. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  609. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  610. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  611. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  612. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  613. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  614. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  615. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  616. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  617. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  618. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  619. #ifdef EXPRECISION
  620. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  621. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  622. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  623. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  624. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  625. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  626. #endif
  627. #if defined(USE_GEMM3M)
  628. #ifdef CGEMM3M_DEFAULT_P
  629. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  630. #else
  631. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  632. #endif
  633. #ifdef ZGEMM3M_DEFAULT_P
  634. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  635. #else
  636. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  637. #endif
  638. #ifdef CGEMM3M_DEFAULT_Q
  639. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  640. #else
  641. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  642. #endif
  643. #ifdef ZGEMM3M_DEFAULT_Q
  644. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  645. #else
  646. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  647. #endif
  648. #ifdef CGEMM3M_DEFAULT_R
  649. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  650. #else
  651. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  652. #endif
  653. #ifdef ZGEMM3M_DEFAULT_R
  654. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  655. #else
  656. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  657. #endif
  658. #ifdef EXPRECISION
  659. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  660. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  661. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  662. #endif
  663. #endif
  664. }
  665. #else // defined(ARCH_ARM64)
  666. #if defined(ARCH_POWER)
  667. static void init_parameter(void) {
  668. #ifdef BUILD_HALF
  669. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  670. #endif
  671. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  672. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  673. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  674. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  675. #ifdef BUILD_HALF
  676. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  677. #endif
  678. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  679. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  680. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  681. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  682. #ifdef BUILD_HALF
  683. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  684. #endif
  685. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  686. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  687. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  688. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  689. }
  690. #else //POWER
  691. #if defined(ARCH_ZARCH)
  692. static void init_parameter(void) {
  693. #ifdef BUILD_HALF
  694. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  695. #endif
  696. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  697. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  698. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  699. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  700. #ifdef BUILD_HALF
  701. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  702. #endif
  703. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  704. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  705. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  706. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  707. #ifdef BUILD_HALF
  708. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  709. #endif
  710. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  711. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  712. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  713. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  714. }
  715. #else //ZARCH
  716. #ifdef ARCH_X86
  717. static int get_l2_size_old(void){
  718. int i, eax, ebx, ecx, edx, cpuid_level;
  719. int info[15];
  720. cpuid(2, &eax, &ebx, &ecx, &edx);
  721. info[ 0] = BITMASK(eax, 8, 0xff);
  722. info[ 1] = BITMASK(eax, 16, 0xff);
  723. info[ 2] = BITMASK(eax, 24, 0xff);
  724. info[ 3] = BITMASK(ebx, 0, 0xff);
  725. info[ 4] = BITMASK(ebx, 8, 0xff);
  726. info[ 5] = BITMASK(ebx, 16, 0xff);
  727. info[ 6] = BITMASK(ebx, 24, 0xff);
  728. info[ 7] = BITMASK(ecx, 0, 0xff);
  729. info[ 8] = BITMASK(ecx, 8, 0xff);
  730. info[ 9] = BITMASK(ecx, 16, 0xff);
  731. info[10] = BITMASK(ecx, 24, 0xff);
  732. info[11] = BITMASK(edx, 0, 0xff);
  733. info[12] = BITMASK(edx, 8, 0xff);
  734. info[13] = BITMASK(edx, 16, 0xff);
  735. info[14] = BITMASK(edx, 24, 0xff);
  736. for (i = 0; i < 15; i++){
  737. switch (info[i]){
  738. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  739. case 0x1a :
  740. return 96;
  741. case 0x39 :
  742. case 0x3b :
  743. case 0x41 :
  744. case 0x79 :
  745. case 0x81 :
  746. return 128;
  747. case 0x3a :
  748. return 192;
  749. case 0x21 :
  750. case 0x3c :
  751. case 0x42 :
  752. case 0x7a :
  753. case 0x7e :
  754. case 0x82 :
  755. return 256;
  756. case 0x3d :
  757. return 384;
  758. case 0x3e :
  759. case 0x43 :
  760. case 0x7b :
  761. case 0x7f :
  762. case 0x83 :
  763. case 0x86 :
  764. return 512;
  765. case 0x44 :
  766. case 0x78 :
  767. case 0x7c :
  768. case 0x84 :
  769. case 0x87 :
  770. return 1024;
  771. case 0x45 :
  772. case 0x7d :
  773. case 0x85 :
  774. return 2048;
  775. case 0x48 :
  776. return 3184;
  777. case 0x49 :
  778. return 4096;
  779. case 0x4e :
  780. return 6144;
  781. }
  782. }
  783. // return 0;
  784. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  785. return 256;
  786. }
  787. #endif
  788. static __inline__ int get_l2_size(void){
  789. int eax, ebx, ecx, edx, l2;
  790. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  791. l2 = BITMASK(ecx, 16, 0xffff);
  792. #ifndef ARCH_X86
  793. if (l2 <= 0) {
  794. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  795. return 256;
  796. }
  797. return l2;
  798. #else
  799. if (l2 > 0) return l2;
  800. return get_l2_size_old();
  801. #endif
  802. }
  803. static __inline__ int get_l3_size(void){
  804. int eax, ebx, ecx, edx;
  805. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  806. return BITMASK(edx, 18, 0x3fff) * 512;
  807. }
  808. static void init_parameter(void) {
  809. int l2 = get_l2_size();
  810. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  811. /* where the GEMM unrolling parameters do not depend on l2 */
  812. #ifdef BUILD_HALF
  813. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  814. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  815. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  816. #endif
  817. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  818. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  819. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  820. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  821. #ifdef CGEMM3M_DEFAULT_Q
  822. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  823. #else
  824. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  825. #endif
  826. #ifdef ZGEMM3M_DEFAULT_Q
  827. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  828. #else
  829. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  830. #endif
  831. #ifdef EXPRECISION
  832. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  833. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  834. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  835. #endif
  836. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  837. #ifdef DEBUG
  838. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  839. #endif
  840. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  841. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  842. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  843. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  844. #ifdef EXPRECISION
  845. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  846. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  847. #endif
  848. #endif
  849. #ifdef CORE_NORTHWOOD
  850. #ifdef DEBUG
  851. fprintf(stderr, "Northwood\n");
  852. #endif
  853. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  854. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  855. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  856. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  857. #ifdef EXPRECISION
  858. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  859. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  860. #endif
  861. #endif
  862. #ifdef ATOM
  863. #ifdef DEBUG
  864. fprintf(stderr, "Atom\n");
  865. #endif
  866. TABLE_NAME.sgemm_p = 256;
  867. TABLE_NAME.dgemm_p = 128;
  868. TABLE_NAME.cgemm_p = 128;
  869. TABLE_NAME.zgemm_p = 64;
  870. #ifdef EXPRECISION
  871. TABLE_NAME.qgemm_p = 64;
  872. TABLE_NAME.xgemm_p = 32;
  873. #endif
  874. #endif
  875. #ifdef CORE_PRESCOTT
  876. #ifdef DEBUG
  877. fprintf(stderr, "Prescott\n");
  878. #endif
  879. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  880. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  881. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  882. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  883. #ifdef EXPRECISION
  884. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  885. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  886. #endif
  887. #endif
  888. #ifdef CORE2
  889. #ifdef DEBUG
  890. fprintf(stderr, "Core2\n");
  891. #endif
  892. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  893. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  894. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  895. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  896. #ifdef EXPRECISION
  897. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  898. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  899. #endif
  900. #endif
  901. #ifdef PENRYN
  902. #ifdef DEBUG
  903. fprintf(stderr, "Penryn\n");
  904. #endif
  905. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  906. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  907. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  908. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  909. #ifdef EXPRECISION
  910. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  911. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  912. #endif
  913. #endif
  914. #ifdef DUNNINGTON
  915. #ifdef DEBUG
  916. fprintf(stderr, "Dunnington\n");
  917. #endif
  918. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  919. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  920. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  921. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  922. #ifdef EXPRECISION
  923. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  924. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  925. #endif
  926. #endif
  927. #ifdef NEHALEM
  928. #ifdef DEBUG
  929. fprintf(stderr, "Nehalem\n");
  930. #endif
  931. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  932. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  933. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  934. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  935. #ifdef EXPRECISION
  936. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  937. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  938. #endif
  939. #endif
  940. #ifdef SANDYBRIDGE
  941. #ifdef DEBUG
  942. fprintf(stderr, "Sandybridge\n");
  943. #endif
  944. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  945. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  946. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  947. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  948. #ifdef EXPRECISION
  949. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  950. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  951. #endif
  952. #endif
  953. #ifdef HASWELL
  954. #ifdef DEBUG
  955. fprintf(stderr, "Haswell\n");
  956. #endif
  957. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  958. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  959. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  960. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  961. #ifdef EXPRECISION
  962. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  963. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  964. #endif
  965. #endif
  966. #ifdef SKYLAKEX
  967. #ifdef DEBUG
  968. fprintf(stderr, "SkylakeX\n");
  969. #endif
  970. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  971. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  972. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  973. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  974. #ifdef EXPRECISION
  975. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  976. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  977. #endif
  978. #endif
  979. #ifdef OPTERON
  980. #ifdef DEBUG
  981. fprintf(stderr, "Opteron\n");
  982. #endif
  983. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  984. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  985. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  986. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  987. #ifdef EXPRECISION
  988. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  989. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  990. #endif
  991. #endif
  992. #ifdef BARCELONA
  993. #ifdef DEBUG
  994. fprintf(stderr, "Barcelona\n");
  995. #endif
  996. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  997. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  998. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  999. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1000. #ifdef EXPRECISION
  1001. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1002. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1003. #endif
  1004. #endif
  1005. #ifdef BOBCAT
  1006. #ifdef DEBUG
  1007. fprintf(stderr, "Bobcate\n");
  1008. #endif
  1009. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1010. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1011. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1012. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1013. #ifdef EXPRECISION
  1014. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1015. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1016. #endif
  1017. #endif
  1018. #ifdef BULLDOZER
  1019. #ifdef DEBUG
  1020. fprintf(stderr, "Bulldozer\n");
  1021. #endif
  1022. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1023. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1024. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1025. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1026. #ifdef EXPRECISION
  1027. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1028. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1029. #endif
  1030. #endif
  1031. #ifdef EXCAVATOR
  1032. #ifdef DEBUG
  1033. fprintf(stderr, "Excavator\n");
  1034. #endif
  1035. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1036. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1037. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1038. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1039. #ifdef EXPRECISION
  1040. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1041. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1042. #endif
  1043. #endif
  1044. #ifdef PILEDRIVER
  1045. #ifdef DEBUG
  1046. fprintf(stderr, "Piledriver\n");
  1047. #endif
  1048. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1049. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1050. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1051. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1052. #ifdef EXPRECISION
  1053. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1054. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1055. #endif
  1056. #endif
  1057. #ifdef STEAMROLLER
  1058. #ifdef DEBUG
  1059. fprintf(stderr, "Steamroller\n");
  1060. #endif
  1061. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1062. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1063. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1064. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1065. #ifdef EXPRECISION
  1066. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1067. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1068. #endif
  1069. #endif
  1070. #ifdef ZEN
  1071. #ifdef DEBUG
  1072. fprintf(stderr, "Zen\n");
  1073. #endif
  1074. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1075. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1076. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1077. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1078. #ifdef EXPRECISION
  1079. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1080. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1081. #endif
  1082. #endif
  1083. #ifdef NANO
  1084. #ifdef DEBUG
  1085. fprintf(stderr, "NANO\n");
  1086. #endif
  1087. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1088. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1089. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1090. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1091. #ifdef EXPRECISION
  1092. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1093. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1094. #endif
  1095. #endif
  1096. #ifdef CGEMM3M_DEFAULT_P
  1097. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1098. #else
  1099. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1100. #endif
  1101. #ifdef ZGEMM3M_DEFAULT_P
  1102. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1103. #else
  1104. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1105. #endif
  1106. #ifdef EXPRECISION
  1107. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1108. #endif
  1109. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1110. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1111. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1112. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1113. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1114. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1115. #else
  1116. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1117. #endif
  1118. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1119. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1120. #else
  1121. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1122. #endif
  1123. #ifdef QUAD_PRECISION
  1124. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1125. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1126. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1127. #endif
  1128. #ifdef DEBUG
  1129. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1130. #endif
  1131. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1132. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1133. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1134. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1135. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1136. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1137. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1138. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1139. #ifdef EXPRECISION
  1140. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1141. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1142. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1143. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1144. #endif
  1145. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1146. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1147. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1148. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1149. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1150. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1151. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1152. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1153. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1154. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1155. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1156. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1157. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1158. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1159. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1160. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1161. #ifdef EXPRECISION
  1162. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1163. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1164. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1165. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1166. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1167. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1168. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1169. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1170. #endif
  1171. }
  1172. #endif //POWER
  1173. #endif //ZARCH
  1174. #endif //defined(ARCH_ARM64)