You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 42 kB

7 years ago
5 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
12 years ago
12 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include <stdio.h>
  39. #include <string.h>
  40. #include "common.h"
  41. #ifdef BUILD_KERNEL
  42. #include "kernelTS.h"
  43. #endif
  44. #undef DEBUG
  45. static void init_parameter(void);
  46. gotoblas_t TABLE_NAME = {
  47. DTB_DEFAULT_ENTRIES ,
  48. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  49. #ifdef BUILD_HALF
  50. 0, 0, 0,
  51. SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N,
  52. #ifdef SHGEMM_DEFAULT_UNROLL_MN
  53. SHGEMM_DEFAULT_UNROLL_MN,
  54. #else
  55. MAX(SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N),
  56. #endif
  57. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  58. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  59. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sdot_kTS,
  60. dsdot_kTS,
  61. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  62. sgemv_nTS, sgemv_tTS, sger_kTS,
  63. ssymv_LTS, ssymv_UTS,
  64. shgemm_kernelTS, shgemm_betaTS,
  65. #if SHGEMM_DEFAULT_UNROLL_M != SHGEMM_DEFAULT_UNROLL_N
  66. shgemm_incopyTS, shgemm_itcopyTS,
  67. #else
  68. shgemm_oncopyTS, shgemm_otcopyTS,
  69. #endif
  70. shgemm_oncopyTS, shgemm_otcopyTS,
  71. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  72. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  73. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  74. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  75. #else
  76. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  77. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  78. #endif
  79. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  80. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  81. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  82. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  83. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  84. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  85. #else
  86. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  87. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  88. #endif
  89. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  90. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  91. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  92. ssymm_iutcopyTS, ssymm_iltcopyTS,
  93. #else
  94. ssymm_outcopyTS, ssymm_oltcopyTS,
  95. #endif
  96. ssymm_outcopyTS, ssymm_oltcopyTS,
  97. #ifndef NO_LAPACK
  98. sneg_tcopyTS, slaswp_ncopyTS,
  99. #else
  100. NULL,NULL,
  101. #endif
  102. #endif
  103. 0, 0, 0,
  104. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  105. #ifdef SGEMM_DEFAULT_UNROLL_MN
  106. SGEMM_DEFAULT_UNROLL_MN,
  107. #else
  108. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  109. #endif
  110. #ifdef HAVE_EXCLUSIVE_CACHE
  111. 1,
  112. #else
  113. 0,
  114. #endif
  115. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  116. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  117. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sdot_kTS,
  118. dsdot_kTS,
  119. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  120. sgemv_nTS, sgemv_tTS, sger_kTS,
  121. ssymv_LTS, ssymv_UTS,
  122. sgemm_directTS,
  123. sgemm_direct_performantTS,
  124. sgemm_kernelTS, sgemm_betaTS,
  125. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  126. sgemm_incopyTS, sgemm_itcopyTS,
  127. #else
  128. sgemm_oncopyTS, sgemm_otcopyTS,
  129. #endif
  130. sgemm_oncopyTS, sgemm_otcopyTS,
  131. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  132. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  133. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  134. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  135. #else
  136. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  137. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  138. #endif
  139. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  140. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  141. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  142. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  143. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  144. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  145. #else
  146. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  147. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  148. #endif
  149. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  150. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  151. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  152. ssymm_iutcopyTS, ssymm_iltcopyTS,
  153. #else
  154. ssymm_outcopyTS, ssymm_oltcopyTS,
  155. #endif
  156. ssymm_outcopyTS, ssymm_oltcopyTS,
  157. #ifndef NO_LAPACK
  158. sneg_tcopyTS, slaswp_ncopyTS,
  159. #else
  160. NULL,NULL,
  161. #endif
  162. 0, 0, 0,
  163. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  164. #ifdef DGEMM_DEFAULT_UNROLL_MN
  165. DGEMM_DEFAULT_UNROLL_MN,
  166. #else
  167. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  168. #endif
  169. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  170. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  171. dnrm2_kTS, dasum_kTS, dsum_kTS, dcopy_kTS, ddot_kTS,
  172. drot_kTS, daxpy_kTS, dscal_kTS, dswap_kTS,
  173. dgemv_nTS, dgemv_tTS, dger_kTS,
  174. dsymv_LTS, dsymv_UTS,
  175. dgemm_kernelTS, dgemm_betaTS,
  176. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  177. dgemm_incopyTS, dgemm_itcopyTS,
  178. #else
  179. dgemm_oncopyTS, dgemm_otcopyTS,
  180. #endif
  181. dgemm_oncopyTS, dgemm_otcopyTS,
  182. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  183. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  184. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  185. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  186. #else
  187. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  188. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  189. #endif
  190. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  191. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  192. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  193. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  194. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  195. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  196. #else
  197. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  198. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  199. #endif
  200. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  201. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  202. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  203. dsymm_iutcopyTS, dsymm_iltcopyTS,
  204. #else
  205. dsymm_outcopyTS, dsymm_oltcopyTS,
  206. #endif
  207. dsymm_outcopyTS, dsymm_oltcopyTS,
  208. #ifndef NO_LAPACK
  209. dneg_tcopyTS, dlaswp_ncopyTS,
  210. #else
  211. NULL, NULL,
  212. #endif
  213. #ifdef EXPRECISION
  214. 0, 0, 0,
  215. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  216. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  217. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  218. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  219. qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  220. qgemv_nTS, qgemv_tTS, qger_kTS,
  221. qsymv_LTS, qsymv_UTS,
  222. qgemm_kernelTS, qgemm_betaTS,
  223. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  224. qgemm_incopyTS, qgemm_itcopyTS,
  225. #else
  226. qgemm_oncopyTS, qgemm_otcopyTS,
  227. #endif
  228. qgemm_oncopyTS, qgemm_otcopyTS,
  229. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  230. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  231. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  232. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  233. #else
  234. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  235. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  236. #endif
  237. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  238. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  239. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  240. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  241. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  242. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  243. #else
  244. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  245. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  246. #endif
  247. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  248. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  249. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  250. qsymm_iutcopyTS, qsymm_iltcopyTS,
  251. #else
  252. qsymm_outcopyTS, qsymm_oltcopyTS,
  253. #endif
  254. qsymm_outcopyTS, qsymm_oltcopyTS,
  255. #ifndef NO_LAPACK
  256. qneg_tcopyTS, qlaswp_ncopyTS,
  257. #else
  258. NULL, NULL,
  259. #endif
  260. #endif
  261. 0, 0, 0,
  262. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  263. #ifdef CGEMM_DEFAULT_UNROLL_MN
  264. CGEMM_DEFAULT_UNROLL_MN,
  265. #else
  266. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  267. #endif
  268. camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
  269. cnrm2_kTS, casum_kTS, csum_kTS, ccopy_kTS,
  270. cdotu_kTS, cdotc_kTS, csrot_kTS,
  271. caxpy_kTS, caxpyc_kTS, cscal_kTS, cswap_kTS,
  272. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  273. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  274. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  275. csymv_LTS, csymv_UTS,
  276. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  277. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  278. cgemm_betaTS,
  279. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  280. cgemm_incopyTS, cgemm_itcopyTS,
  281. #else
  282. cgemm_oncopyTS, cgemm_otcopyTS,
  283. #endif
  284. cgemm_oncopyTS, cgemm_otcopyTS,
  285. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  286. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  287. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  288. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  289. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  290. #else
  291. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  292. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  293. #endif
  294. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  295. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  296. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  297. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  298. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  299. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  300. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  301. #else
  302. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  303. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  304. #endif
  305. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  306. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  307. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  308. csymm_iutcopyTS, csymm_iltcopyTS,
  309. #else
  310. csymm_outcopyTS, csymm_oltcopyTS,
  311. #endif
  312. csymm_outcopyTS, csymm_oltcopyTS,
  313. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  314. chemm_iutcopyTS, chemm_iltcopyTS,
  315. #else
  316. chemm_outcopyTS, chemm_oltcopyTS,
  317. #endif
  318. chemm_outcopyTS, chemm_oltcopyTS,
  319. 0, 0, 0,
  320. #if defined(USE_GEMM3M)
  321. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  322. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  323. #else
  324. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  325. #endif
  326. cgemm3m_kernelTS,
  327. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  328. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  329. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  330. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  331. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  332. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  333. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  334. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  335. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  336. csymm3m_oucopybTS, csymm3m_olcopybTS,
  337. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  338. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  339. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  340. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  341. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  342. chemm3m_oucopybTS, chemm3m_olcopybTS,
  343. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  344. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  345. #else
  346. 0, 0, 0,
  347. NULL,
  348. NULL, NULL,
  349. NULL, NULL,
  350. NULL, NULL,
  351. NULL, NULL,
  352. NULL, NULL,
  353. NULL, NULL,
  354. NULL, NULL,
  355. NULL, NULL,
  356. NULL, NULL,
  357. NULL, NULL,
  358. NULL, NULL,
  359. NULL, NULL,
  360. NULL, NULL,
  361. NULL, NULL,
  362. NULL, NULL,
  363. NULL, NULL,
  364. NULL, NULL,
  365. NULL, NULL,
  366. #endif
  367. #ifndef NO_LAPACK
  368. cneg_tcopyTS, claswp_ncopyTS,
  369. #else
  370. NULL, NULL,
  371. #endif
  372. 0, 0, 0,
  373. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  374. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  375. ZGEMM_DEFAULT_UNROLL_MN,
  376. #else
  377. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  378. #endif
  379. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  380. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  381. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  382. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  383. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  384. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  385. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  386. zsymv_LTS, zsymv_UTS,
  387. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  388. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  389. zgemm_betaTS,
  390. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  391. zgemm_incopyTS, zgemm_itcopyTS,
  392. #else
  393. zgemm_oncopyTS, zgemm_otcopyTS,
  394. #endif
  395. zgemm_oncopyTS, zgemm_otcopyTS,
  396. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  397. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  398. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  399. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  400. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  401. #else
  402. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  403. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  404. #endif
  405. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  406. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  407. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  408. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  409. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  410. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  411. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  412. #else
  413. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  414. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  415. #endif
  416. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  417. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  418. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  419. zsymm_iutcopyTS, zsymm_iltcopyTS,
  420. #else
  421. zsymm_outcopyTS, zsymm_oltcopyTS,
  422. #endif
  423. zsymm_outcopyTS, zsymm_oltcopyTS,
  424. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  425. zhemm_iutcopyTS, zhemm_iltcopyTS,
  426. #else
  427. zhemm_outcopyTS, zhemm_oltcopyTS,
  428. #endif
  429. zhemm_outcopyTS, zhemm_oltcopyTS,
  430. 0, 0, 0,
  431. #if defined(USE_GEMM3M)
  432. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  433. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  434. #else
  435. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  436. #endif
  437. zgemm3m_kernelTS,
  438. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  439. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  440. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  441. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  442. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  443. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  444. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  445. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  446. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  447. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  448. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  449. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  450. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  451. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  452. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  453. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  454. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  455. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  456. #else
  457. 0, 0, 0,
  458. NULL,
  459. NULL, NULL,
  460. NULL, NULL,
  461. NULL, NULL,
  462. NULL, NULL,
  463. NULL, NULL,
  464. NULL, NULL,
  465. NULL, NULL,
  466. NULL, NULL,
  467. NULL, NULL,
  468. NULL, NULL,
  469. NULL, NULL,
  470. NULL, NULL,
  471. NULL, NULL,
  472. NULL, NULL,
  473. NULL, NULL,
  474. NULL, NULL,
  475. NULL, NULL,
  476. NULL, NULL,
  477. #endif
  478. #ifndef NO_LAPACK
  479. zneg_tcopyTS, zlaswp_ncopyTS,
  480. #else
  481. NULL, NULL,
  482. #endif
  483. #ifdef EXPRECISION
  484. 0, 0, 0,
  485. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  486. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  487. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  488. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  489. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  490. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  491. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  492. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  493. xsymv_LTS, xsymv_UTS,
  494. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  495. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  496. xgemm_betaTS,
  497. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  498. xgemm_incopyTS, xgemm_itcopyTS,
  499. #else
  500. xgemm_oncopyTS, xgemm_otcopyTS,
  501. #endif
  502. xgemm_oncopyTS, xgemm_otcopyTS,
  503. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  504. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  505. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  506. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  507. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  508. #else
  509. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  510. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  511. #endif
  512. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  513. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  514. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  515. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  516. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  517. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  518. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  519. #else
  520. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  521. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  522. #endif
  523. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  524. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  525. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  526. xsymm_iutcopyTS, xsymm_iltcopyTS,
  527. #else
  528. xsymm_outcopyTS, xsymm_oltcopyTS,
  529. #endif
  530. xsymm_outcopyTS, xsymm_oltcopyTS,
  531. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  532. xhemm_iutcopyTS, xhemm_iltcopyTS,
  533. #else
  534. xhemm_outcopyTS, xhemm_oltcopyTS,
  535. #endif
  536. xhemm_outcopyTS, xhemm_oltcopyTS,
  537. 0, 0, 0,
  538. #if defined(USE_GEMM3M)
  539. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  540. xgemm3m_kernelTS,
  541. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  542. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  543. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  544. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  545. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  546. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  547. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  548. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  549. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  550. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  551. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  552. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  553. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  554. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  555. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  556. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  557. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  558. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  559. #else
  560. 0, 0, 0,
  561. NULL,
  562. NULL, NULL,
  563. NULL, NULL,
  564. NULL, NULL,
  565. NULL, NULL,
  566. NULL, NULL,
  567. NULL, NULL,
  568. NULL, NULL,
  569. NULL, NULL,
  570. NULL, NULL,
  571. NULL, NULL,
  572. NULL, NULL,
  573. NULL, NULL,
  574. NULL, NULL,
  575. NULL, NULL,
  576. NULL, NULL,
  577. NULL, NULL,
  578. NULL, NULL,
  579. NULL, NULL,
  580. #endif
  581. #ifndef NO_LAPACK
  582. xneg_tcopyTS, xlaswp_ncopyTS,
  583. #else
  584. NULL, NULL,
  585. #endif
  586. #endif
  587. init_parameter,
  588. SNUMOPT, DNUMOPT, QNUMOPT,
  589. saxpby_kTS, daxpby_kTS, caxpby_kTS, zaxpby_kTS,
  590. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  591. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  592. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  593. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  594. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  595. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  596. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  597. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  598. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  599. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  600. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  601. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  602. sgeadd_kTS, dgeadd_kTS, cgeadd_kTS, zgeadd_kTS
  603. };
  604. #if defined(ARCH_ARM64)
  605. static void init_parameter(void) {
  606. #if defined(BUILD_HALF)
  607. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  608. #endif
  609. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  610. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  611. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  612. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  613. #if defined(BUILD_HALF)
  614. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  615. #endif
  616. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  617. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  618. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  619. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  620. #if defined(BUILD_HALF)
  621. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  622. #endif
  623. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  624. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  625. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  626. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  627. #ifdef EXPRECISION
  628. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  629. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  630. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  631. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  632. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  633. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  634. #endif
  635. #if defined(USE_GEMM3M)
  636. #ifdef CGEMM3M_DEFAULT_P
  637. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  638. #else
  639. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  640. #endif
  641. #ifdef ZGEMM3M_DEFAULT_P
  642. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  643. #else
  644. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  645. #endif
  646. #ifdef CGEMM3M_DEFAULT_Q
  647. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  648. #else
  649. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  650. #endif
  651. #ifdef ZGEMM3M_DEFAULT_Q
  652. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  653. #else
  654. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  655. #endif
  656. #ifdef CGEMM3M_DEFAULT_R
  657. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  658. #else
  659. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  660. #endif
  661. #ifdef ZGEMM3M_DEFAULT_R
  662. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  663. #else
  664. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  665. #endif
  666. #ifdef EXPRECISION
  667. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  668. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  669. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  670. #endif
  671. #endif
  672. }
  673. #else // defined(ARCH_ARM64)
  674. #if defined(ARCH_POWER)
  675. static void init_parameter(void) {
  676. #ifdef BUILD_HALF
  677. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  678. #endif
  679. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  680. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  681. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  682. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  683. #ifdef BUILD_HALF
  684. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  685. #endif
  686. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  687. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  688. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  689. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  690. #ifdef BUILD_HALF
  691. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  692. #endif
  693. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  694. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  695. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  696. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  697. }
  698. #else //POWER
  699. #if defined(ARCH_ZARCH)
  700. static void init_parameter(void) {
  701. #ifdef BUILD_HALF
  702. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  703. #endif
  704. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  705. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  706. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  707. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  708. #ifdef BUILD_HALF
  709. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  710. #endif
  711. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  712. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  713. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  714. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  715. #ifdef BUILD_HALF
  716. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  717. #endif
  718. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  719. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  720. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  721. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  722. }
  723. #else //ZARCH
  724. #ifdef ARCH_X86
  725. static int get_l2_size_old(void){
  726. int i, eax, ebx, ecx, edx, cpuid_level;
  727. int info[15];
  728. cpuid(2, &eax, &ebx, &ecx, &edx);
  729. info[ 0] = BITMASK(eax, 8, 0xff);
  730. info[ 1] = BITMASK(eax, 16, 0xff);
  731. info[ 2] = BITMASK(eax, 24, 0xff);
  732. info[ 3] = BITMASK(ebx, 0, 0xff);
  733. info[ 4] = BITMASK(ebx, 8, 0xff);
  734. info[ 5] = BITMASK(ebx, 16, 0xff);
  735. info[ 6] = BITMASK(ebx, 24, 0xff);
  736. info[ 7] = BITMASK(ecx, 0, 0xff);
  737. info[ 8] = BITMASK(ecx, 8, 0xff);
  738. info[ 9] = BITMASK(ecx, 16, 0xff);
  739. info[10] = BITMASK(ecx, 24, 0xff);
  740. info[11] = BITMASK(edx, 0, 0xff);
  741. info[12] = BITMASK(edx, 8, 0xff);
  742. info[13] = BITMASK(edx, 16, 0xff);
  743. info[14] = BITMASK(edx, 24, 0xff);
  744. for (i = 0; i < 15; i++){
  745. switch (info[i]){
  746. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  747. case 0x1a :
  748. return 96;
  749. case 0x39 :
  750. case 0x3b :
  751. case 0x41 :
  752. case 0x79 :
  753. case 0x81 :
  754. return 128;
  755. case 0x3a :
  756. return 192;
  757. case 0x21 :
  758. case 0x3c :
  759. case 0x42 :
  760. case 0x7a :
  761. case 0x7e :
  762. case 0x82 :
  763. return 256;
  764. case 0x3d :
  765. return 384;
  766. case 0x3e :
  767. case 0x43 :
  768. case 0x7b :
  769. case 0x7f :
  770. case 0x83 :
  771. case 0x86 :
  772. return 512;
  773. case 0x44 :
  774. case 0x78 :
  775. case 0x7c :
  776. case 0x84 :
  777. case 0x87 :
  778. return 1024;
  779. case 0x45 :
  780. case 0x7d :
  781. case 0x85 :
  782. return 2048;
  783. case 0x48 :
  784. return 3184;
  785. case 0x49 :
  786. return 4096;
  787. case 0x4e :
  788. return 6144;
  789. }
  790. }
  791. // return 0;
  792. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  793. return 256;
  794. }
  795. #endif
  796. static __inline__ int get_l2_size(void){
  797. int eax, ebx, ecx, edx, l2;
  798. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  799. l2 = BITMASK(ecx, 16, 0xffff);
  800. #ifndef ARCH_X86
  801. if (l2 <= 0) {
  802. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  803. return 256;
  804. }
  805. return l2;
  806. #else
  807. if (l2 > 0) return l2;
  808. return get_l2_size_old();
  809. #endif
  810. }
  811. static __inline__ int get_l3_size(void){
  812. int eax, ebx, ecx, edx;
  813. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  814. return BITMASK(edx, 18, 0x3fff) * 512;
  815. }
  816. static void init_parameter(void) {
  817. int l2 = get_l2_size();
  818. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  819. /* where the GEMM unrolling parameters do not depend on l2 */
  820. #ifdef BUILD_HALF
  821. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  822. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  823. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  824. #endif
  825. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  826. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  827. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  828. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  829. #ifdef CGEMM3M_DEFAULT_Q
  830. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  831. #else
  832. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  833. #endif
  834. #ifdef ZGEMM3M_DEFAULT_Q
  835. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  836. #else
  837. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  838. #endif
  839. #ifdef EXPRECISION
  840. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  841. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  842. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  843. #endif
  844. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  845. #ifdef DEBUG
  846. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  847. #endif
  848. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  849. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  850. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  851. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  852. #ifdef EXPRECISION
  853. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  854. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  855. #endif
  856. #endif
  857. #ifdef CORE_NORTHWOOD
  858. #ifdef DEBUG
  859. fprintf(stderr, "Northwood\n");
  860. #endif
  861. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  862. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  863. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  864. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  865. #ifdef EXPRECISION
  866. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  867. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  868. #endif
  869. #endif
  870. #ifdef ATOM
  871. #ifdef DEBUG
  872. fprintf(stderr, "Atom\n");
  873. #endif
  874. TABLE_NAME.sgemm_p = 256;
  875. TABLE_NAME.dgemm_p = 128;
  876. TABLE_NAME.cgemm_p = 128;
  877. TABLE_NAME.zgemm_p = 64;
  878. #ifdef EXPRECISION
  879. TABLE_NAME.qgemm_p = 64;
  880. TABLE_NAME.xgemm_p = 32;
  881. #endif
  882. #endif
  883. #ifdef CORE_PRESCOTT
  884. #ifdef DEBUG
  885. fprintf(stderr, "Prescott\n");
  886. #endif
  887. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  888. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  889. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  890. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  891. #ifdef EXPRECISION
  892. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  893. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  894. #endif
  895. #endif
  896. #ifdef CORE2
  897. #ifdef DEBUG
  898. fprintf(stderr, "Core2\n");
  899. #endif
  900. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  901. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  902. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  903. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  904. #ifdef EXPRECISION
  905. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  906. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  907. #endif
  908. #endif
  909. #ifdef PENRYN
  910. #ifdef DEBUG
  911. fprintf(stderr, "Penryn\n");
  912. #endif
  913. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  914. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  915. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  916. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  917. #ifdef EXPRECISION
  918. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  919. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  920. #endif
  921. #endif
  922. #ifdef DUNNINGTON
  923. #ifdef DEBUG
  924. fprintf(stderr, "Dunnington\n");
  925. #endif
  926. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  927. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  928. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  929. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  930. #ifdef EXPRECISION
  931. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  932. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  933. #endif
  934. #endif
  935. #ifdef NEHALEM
  936. #ifdef DEBUG
  937. fprintf(stderr, "Nehalem\n");
  938. #endif
  939. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  940. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  941. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  942. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  943. #ifdef EXPRECISION
  944. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  945. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  946. #endif
  947. #endif
  948. #ifdef SANDYBRIDGE
  949. #ifdef DEBUG
  950. fprintf(stderr, "Sandybridge\n");
  951. #endif
  952. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  953. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  954. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  955. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  956. #ifdef EXPRECISION
  957. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  958. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  959. #endif
  960. #endif
  961. #ifdef HASWELL
  962. #ifdef DEBUG
  963. fprintf(stderr, "Haswell\n");
  964. #endif
  965. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  966. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  967. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  968. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  969. #ifdef EXPRECISION
  970. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  971. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  972. #endif
  973. #endif
  974. #ifdef SKYLAKEX
  975. #ifdef DEBUG
  976. fprintf(stderr, "SkylakeX\n");
  977. #endif
  978. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  979. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  980. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  981. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  982. #ifdef EXPRECISION
  983. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  984. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  985. #endif
  986. #endif
  987. #ifdef OPTERON
  988. #ifdef DEBUG
  989. fprintf(stderr, "Opteron\n");
  990. #endif
  991. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  992. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  993. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  994. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  995. #ifdef EXPRECISION
  996. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  997. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  998. #endif
  999. #endif
  1000. #ifdef BARCELONA
  1001. #ifdef DEBUG
  1002. fprintf(stderr, "Barcelona\n");
  1003. #endif
  1004. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1005. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1006. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1007. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1008. #ifdef EXPRECISION
  1009. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1010. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1011. #endif
  1012. #endif
  1013. #ifdef BOBCAT
  1014. #ifdef DEBUG
  1015. fprintf(stderr, "Bobcate\n");
  1016. #endif
  1017. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1018. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1019. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1020. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1021. #ifdef EXPRECISION
  1022. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1023. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1024. #endif
  1025. #endif
  1026. #ifdef BULLDOZER
  1027. #ifdef DEBUG
  1028. fprintf(stderr, "Bulldozer\n");
  1029. #endif
  1030. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1031. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1032. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1033. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1034. #ifdef EXPRECISION
  1035. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1036. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1037. #endif
  1038. #endif
  1039. #ifdef EXCAVATOR
  1040. #ifdef DEBUG
  1041. fprintf(stderr, "Excavator\n");
  1042. #endif
  1043. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1044. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1045. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1046. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1047. #ifdef EXPRECISION
  1048. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1049. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1050. #endif
  1051. #endif
  1052. #ifdef PILEDRIVER
  1053. #ifdef DEBUG
  1054. fprintf(stderr, "Piledriver\n");
  1055. #endif
  1056. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1057. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1058. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1059. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1060. #ifdef EXPRECISION
  1061. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1062. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1063. #endif
  1064. #endif
  1065. #ifdef STEAMROLLER
  1066. #ifdef DEBUG
  1067. fprintf(stderr, "Steamroller\n");
  1068. #endif
  1069. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1070. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1071. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1072. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1073. #ifdef EXPRECISION
  1074. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1075. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1076. #endif
  1077. #endif
  1078. #ifdef ZEN
  1079. #ifdef DEBUG
  1080. fprintf(stderr, "Zen\n");
  1081. #endif
  1082. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1083. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1084. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1085. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1086. #ifdef EXPRECISION
  1087. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1088. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1089. #endif
  1090. #endif
  1091. #ifdef NANO
  1092. #ifdef DEBUG
  1093. fprintf(stderr, "NANO\n");
  1094. #endif
  1095. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1096. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1097. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1098. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1099. #ifdef EXPRECISION
  1100. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1101. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1102. #endif
  1103. #endif
  1104. #ifdef CGEMM3M_DEFAULT_P
  1105. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1106. #else
  1107. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1108. #endif
  1109. #ifdef ZGEMM3M_DEFAULT_P
  1110. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1111. #else
  1112. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1113. #endif
  1114. #ifdef EXPRECISION
  1115. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1116. #endif
  1117. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1118. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1119. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1120. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1121. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1122. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1123. #else
  1124. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1125. #endif
  1126. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1127. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1128. #else
  1129. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1130. #endif
  1131. #ifdef QUAD_PRECISION
  1132. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1133. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1134. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1135. #endif
  1136. #ifdef DEBUG
  1137. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1138. #endif
  1139. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1140. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1141. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1142. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1143. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1144. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1145. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1146. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1147. #ifdef EXPRECISION
  1148. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1149. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1150. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1151. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1152. #endif
  1153. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1154. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1155. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1156. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1157. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1158. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1159. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1160. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1161. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1162. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1163. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1164. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1165. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1166. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1167. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1168. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1169. #ifdef EXPRECISION
  1170. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1171. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1172. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1173. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1174. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1175. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1176. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1177. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1178. #endif
  1179. }
  1180. #endif //POWER
  1181. #endif //ZARCH
  1182. #endif //defined(ARCH_ARM64)