You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 54 kB

6 years ago
6 years ago
6 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* Copyright 2023 The OpenBLAS Project. */
  4. /* All rights reserved. */
  5. /* */
  6. /* Redistribution and use in source and binary forms, with or */
  7. /* without modification, are permitted provided that the following */
  8. /* conditions are met: */
  9. /* */
  10. /* 1. Redistributions of source code must retain the above */
  11. /* copyright notice, this list of conditions and the following */
  12. /* disclaimer. */
  13. /* */
  14. /* 2. Redistributions in binary form must reproduce the above */
  15. /* copyright notice, this list of conditions and the following */
  16. /* disclaimer in the documentation and/or other materials */
  17. /* provided with the distribution. */
  18. /* */
  19. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  20. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  21. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  22. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  23. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  24. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  25. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  26. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  27. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  28. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  29. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  30. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  31. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  32. /* POSSIBILITY OF SUCH DAMAGE. */
  33. /* */
  34. /* The views and conclusions contained in the software and */
  35. /* documentation are those of the authors and should not be */
  36. /* interpreted as representing official policies, either expressed */
  37. /* or implied, of The University of Texas at Austin. */
  38. /*********************************************************************/
  39. #include <stdio.h>
  40. #include <string.h>
  41. #include "common.h"
  42. #ifdef BUILD_KERNEL
  43. #include "kernelTS.h"
  44. #endif
  45. #undef DEBUG
  46. static void init_parameter(void);
  47. gotoblas_t TABLE_NAME = {
  48. DTB_DEFAULT_ENTRIES,
  49. SWITCH_RATIO,
  50. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  51. #ifdef BUILD_BFLOAT16
  52. 0, 0, 0,
  53. SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,
  54. #ifdef SBGEMM_DEFAULT_UNROLL_MN
  55. SBGEMM_DEFAULT_UNROLL_MN,
  56. #else
  57. MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
  58. #endif
  59. SBGEMM_ALIGN_K,
  60. sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  61. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  62. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  63. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
  64. dsdot_kTS,
  65. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  66. sbgemv_nTS, sbgemv_tTS, sger_kTS,
  67. ssymv_LTS, ssymv_UTS,
  68. sbgemm_kernelTS, sbgemm_betaTS,
  69. #if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N
  70. sbgemm_incopyTS, sbgemm_itcopyTS,
  71. #else
  72. sbgemm_oncopyTS, sbgemm_otcopyTS,
  73. #endif
  74. sbgemm_oncopyTS, sbgemm_otcopyTS,
  75. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  76. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  77. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  78. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  79. #else
  80. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  81. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  82. #endif
  83. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  84. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  85. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  86. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  87. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  88. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  89. #else
  90. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  91. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  92. #endif
  93. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  94. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  95. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  96. ssymm_iutcopyTS, ssymm_iltcopyTS,
  97. #else
  98. ssymm_outcopyTS, ssymm_oltcopyTS,
  99. #endif
  100. ssymm_outcopyTS, ssymm_oltcopyTS,
  101. #ifndef NO_LAPACK
  102. sneg_tcopyTS, slaswp_ncopyTS,
  103. #else
  104. NULL,NULL,
  105. #endif
  106. #ifdef SMALL_MATRIX_OPT
  107. sbgemm_small_matrix_permitTS,
  108. sbgemm_small_kernel_nnTS, sbgemm_small_kernel_ntTS, sbgemm_small_kernel_tnTS, sbgemm_small_kernel_ttTS,
  109. sbgemm_small_kernel_b0_nnTS, sbgemm_small_kernel_b0_ntTS, sbgemm_small_kernel_b0_tnTS, sbgemm_small_kernel_b0_ttTS,
  110. #endif
  111. #endif
  112. #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  113. 0, 0, 0,
  114. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  115. #ifdef SGEMM_DEFAULT_UNROLL_MN
  116. SGEMM_DEFAULT_UNROLL_MN,
  117. #else
  118. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  119. #endif
  120. #endif
  121. #ifdef HAVE_EXCLUSIVE_CACHE
  122. 1,
  123. #else
  124. 0,
  125. #endif
  126. #if (BUILD_SINGLE==1 ) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  127. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  128. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  129. snrm2_kTS, sasum_kTS,
  130. #endif
  131. #if BUILD_SINGLE == 1
  132. ssum_kTS,
  133. #endif
  134. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  135. scopy_kTS, sdot_kTS,
  136. // dsdot_kTS,
  137. srot_kTS, saxpy_kTS,
  138. #endif
  139. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  140. sscal_kTS,
  141. #endif
  142. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  143. sswap_kTS,
  144. sgemv_nTS, sgemv_tTS,
  145. #endif
  146. #if BUILD_SINGLE == 1
  147. sger_kTS,
  148. ssymv_LTS, ssymv_UTS,
  149. #endif
  150. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  151. #ifdef ARCH_X86_64
  152. sgemm_directTS,
  153. sgemm_direct_performantTS,
  154. #endif
  155. sgemm_kernelTS, sgemm_betaTS,
  156. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  157. sgemm_incopyTS, sgemm_itcopyTS,
  158. #else
  159. sgemm_oncopyTS, sgemm_otcopyTS,
  160. #endif
  161. sgemm_oncopyTS, sgemm_otcopyTS,
  162. #endif
  163. #if BUILD_SINGLE == 1
  164. #ifdef SMALL_MATRIX_OPT
  165. sgemm_small_matrix_permitTS,
  166. sgemm_small_kernel_nnTS, sgemm_small_kernel_ntTS, sgemm_small_kernel_tnTS, sgemm_small_kernel_ttTS,
  167. sgemm_small_kernel_b0_nnTS, sgemm_small_kernel_b0_ntTS, sgemm_small_kernel_b0_tnTS, sgemm_small_kernel_b0_ttTS,
  168. #endif
  169. #endif
  170. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  171. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  172. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  173. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  174. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  175. #else
  176. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  177. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  178. #endif
  179. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  180. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  181. #endif
  182. #if BUILD_SINGLE == 1
  183. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  184. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  185. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  186. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  187. #else
  188. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  189. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  190. #endif
  191. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  192. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  193. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  194. ssymm_iutcopyTS, ssymm_iltcopyTS,
  195. #else
  196. ssymm_outcopyTS, ssymm_oltcopyTS,
  197. #endif
  198. ssymm_outcopyTS, ssymm_oltcopyTS,
  199. #endif
  200. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  201. #ifndef NO_LAPACK
  202. sneg_tcopyTS, slaswp_ncopyTS,
  203. #else
  204. NULL,NULL,
  205. #endif
  206. #endif
  207. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  208. 0, 0, 0,
  209. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  210. #ifdef DGEMM_DEFAULT_UNROLL_MN
  211. DGEMM_DEFAULT_UNROLL_MN,
  212. #else
  213. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  214. #endif
  215. #endif
  216. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  217. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  218. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  219. dnrm2_kTS, dasum_kTS,
  220. #endif
  221. #if (BUILD_DOUBLE==1)
  222. dsum_kTS,
  223. #endif
  224. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  225. dcopy_kTS, ddot_kTS,
  226. #endif
  227. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  228. dsdot_kTS,
  229. #endif
  230. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  231. drot_kTS,
  232. daxpy_kTS,
  233. dscal_kTS,
  234. dswap_kTS,
  235. dgemv_nTS, dgemv_tTS,
  236. #endif
  237. #if (BUILD_DOUBLE==1)
  238. dger_kTS,
  239. dsymv_LTS, dsymv_UTS,
  240. #endif
  241. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  242. dgemm_kernelTS, dgemm_betaTS,
  243. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  244. dgemm_incopyTS, dgemm_itcopyTS,
  245. #else
  246. dgemm_oncopyTS, dgemm_otcopyTS,
  247. #endif
  248. dgemm_oncopyTS, dgemm_otcopyTS,
  249. #endif
  250. #if (BUILD_DOUBLE==1)
  251. #ifdef SMALL_MATRIX_OPT
  252. dgemm_small_matrix_permitTS,
  253. dgemm_small_kernel_nnTS, dgemm_small_kernel_ntTS, dgemm_small_kernel_tnTS, dgemm_small_kernel_ttTS,
  254. dgemm_small_kernel_b0_nnTS, dgemm_small_kernel_b0_ntTS, dgemm_small_kernel_b0_tnTS, dgemm_small_kernel_b0_ttTS,
  255. #endif
  256. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  257. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  258. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  259. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  260. #else
  261. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  262. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  263. #endif
  264. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  265. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  266. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  267. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  268. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  269. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  270. #else
  271. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  272. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  273. #endif
  274. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  275. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  276. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  277. dsymm_iutcopyTS, dsymm_iltcopyTS,
  278. #else
  279. dsymm_outcopyTS, dsymm_oltcopyTS,
  280. #endif
  281. dsymm_outcopyTS, dsymm_oltcopyTS,
  282. #ifndef NO_LAPACK
  283. dneg_tcopyTS, dlaswp_ncopyTS,
  284. #else
  285. NULL, NULL,
  286. #endif
  287. #endif
  288. #ifdef EXPRECISION
  289. 0, 0, 0,
  290. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  291. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  292. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  293. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  294. qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  295. qgemv_nTS, qgemv_tTS, qger_kTS,
  296. qsymv_LTS, qsymv_UTS,
  297. qgemm_kernelTS, qgemm_betaTS,
  298. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  299. qgemm_incopyTS, qgemm_itcopyTS,
  300. #else
  301. qgemm_oncopyTS, qgemm_otcopyTS,
  302. #endif
  303. qgemm_oncopyTS, qgemm_otcopyTS,
  304. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  305. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  306. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  307. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  308. #else
  309. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  310. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  311. #endif
  312. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  313. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  314. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  315. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  316. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  317. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  318. #else
  319. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  320. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  321. #endif
  322. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  323. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  324. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  325. qsymm_iutcopyTS, qsymm_iltcopyTS,
  326. #else
  327. qsymm_outcopyTS, qsymm_oltcopyTS,
  328. #endif
  329. qsymm_outcopyTS, qsymm_oltcopyTS,
  330. #ifndef NO_LAPACK
  331. qneg_tcopyTS, qlaswp_ncopyTS,
  332. #else
  333. NULL, NULL,
  334. #endif
  335. #endif
  336. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  337. 0, 0, 0,
  338. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  339. #ifdef CGEMM_DEFAULT_UNROLL_MN
  340. CGEMM_DEFAULT_UNROLL_MN,
  341. #else
  342. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  343. #endif
  344. camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
  345. #endif
  346. #if (BUILD_COMPLEX)
  347. cnrm2_kTS, casum_kTS, csum_kTS,
  348. #endif
  349. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  350. ccopy_kTS, cdotu_kTS, cdotc_kTS,
  351. #endif
  352. #if (BUILD_COMPLEX)
  353. csrot_kTS,
  354. #endif
  355. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  356. caxpy_kTS,
  357. caxpyc_kTS,
  358. cscal_kTS,
  359. cswap_kTS,
  360. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  361. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  362. #endif
  363. #if (BUILD_COMPLEX)
  364. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  365. csymv_LTS, csymv_UTS,
  366. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  367. #endif
  368. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  369. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  370. cgemm_betaTS,
  371. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  372. cgemm_incopyTS, cgemm_itcopyTS,
  373. #else
  374. cgemm_oncopyTS, cgemm_otcopyTS,
  375. #endif
  376. cgemm_oncopyTS, cgemm_otcopyTS,
  377. #ifdef SMALL_MATRIX_OPT
  378. cgemm_small_matrix_permitTS,
  379. cgemm_small_kernel_nnTS, cgemm_small_kernel_ntTS, cgemm_small_kernel_nrTS, cgemm_small_kernel_ncTS,
  380. cgemm_small_kernel_tnTS, cgemm_small_kernel_ttTS, cgemm_small_kernel_trTS, cgemm_small_kernel_tcTS,
  381. cgemm_small_kernel_rnTS, cgemm_small_kernel_rtTS, cgemm_small_kernel_rrTS, cgemm_small_kernel_rcTS,
  382. cgemm_small_kernel_cnTS, cgemm_small_kernel_ctTS, cgemm_small_kernel_crTS, cgemm_small_kernel_ccTS,
  383. cgemm_small_kernel_b0_nnTS, cgemm_small_kernel_b0_ntTS, cgemm_small_kernel_b0_nrTS, cgemm_small_kernel_b0_ncTS,
  384. cgemm_small_kernel_b0_tnTS, cgemm_small_kernel_b0_ttTS, cgemm_small_kernel_b0_trTS, cgemm_small_kernel_b0_tcTS,
  385. cgemm_small_kernel_b0_rnTS, cgemm_small_kernel_b0_rtTS, cgemm_small_kernel_b0_rrTS, cgemm_small_kernel_b0_rcTS,
  386. cgemm_small_kernel_b0_cnTS, cgemm_small_kernel_b0_ctTS, cgemm_small_kernel_b0_crTS, cgemm_small_kernel_b0_ccTS,
  387. #endif
  388. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  389. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  390. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  391. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  392. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  393. #else
  394. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  395. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  396. #endif
  397. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  398. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  399. #endif
  400. #if (BUILD_COMPLEX)
  401. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  402. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  403. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  404. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  405. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  406. #else
  407. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  408. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  409. #endif
  410. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  411. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  412. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  413. csymm_iutcopyTS, csymm_iltcopyTS,
  414. #else
  415. csymm_outcopyTS, csymm_oltcopyTS,
  416. #endif
  417. csymm_outcopyTS, csymm_oltcopyTS,
  418. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  419. chemm_iutcopyTS, chemm_iltcopyTS,
  420. #else
  421. chemm_outcopyTS, chemm_oltcopyTS,
  422. #endif
  423. chemm_outcopyTS, chemm_oltcopyTS,
  424. 0, 0, 0,
  425. #if (USE_GEMM3M)
  426. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  427. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  428. #else
  429. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  430. #endif
  431. cgemm3m_kernelTS,
  432. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  433. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  434. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  435. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  436. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  437. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  438. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  439. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  440. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  441. csymm3m_oucopybTS, csymm3m_olcopybTS,
  442. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  443. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  444. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  445. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  446. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  447. chemm3m_oucopybTS, chemm3m_olcopybTS,
  448. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  449. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  450. #else
  451. 0, 0, 0,
  452. NULL,
  453. NULL, NULL,
  454. NULL, NULL,
  455. NULL, NULL,
  456. NULL, NULL,
  457. NULL, NULL,
  458. NULL, NULL,
  459. NULL, NULL,
  460. NULL, NULL,
  461. NULL, NULL,
  462. NULL, NULL,
  463. NULL, NULL,
  464. NULL, NULL,
  465. NULL, NULL,
  466. NULL, NULL,
  467. NULL, NULL,
  468. NULL, NULL,
  469. NULL, NULL,
  470. NULL, NULL,
  471. #endif
  472. #endif
  473. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  474. #ifndef NO_LAPACK
  475. cneg_tcopyTS,
  476. claswp_ncopyTS,
  477. #else
  478. NULL, NULL,
  479. #endif
  480. #endif
  481. #if BUILD_COMPLEX16 == 1
  482. 0, 0, 0,
  483. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  484. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  485. ZGEMM_DEFAULT_UNROLL_MN,
  486. #else
  487. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  488. #endif
  489. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  490. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  491. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  492. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  493. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  494. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  495. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  496. zsymv_LTS, zsymv_UTS,
  497. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  498. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  499. zgemm_betaTS,
  500. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  501. zgemm_incopyTS, zgemm_itcopyTS,
  502. #else
  503. zgemm_oncopyTS, zgemm_otcopyTS,
  504. #endif
  505. zgemm_oncopyTS, zgemm_otcopyTS,
  506. #ifdef SMALL_MATRIX_OPT
  507. zgemm_small_matrix_permitTS,
  508. zgemm_small_kernel_nnTS, zgemm_small_kernel_ntTS, zgemm_small_kernel_nrTS, zgemm_small_kernel_ncTS,
  509. zgemm_small_kernel_tnTS, zgemm_small_kernel_ttTS, zgemm_small_kernel_trTS, zgemm_small_kernel_tcTS,
  510. zgemm_small_kernel_rnTS, zgemm_small_kernel_rtTS, zgemm_small_kernel_rrTS, zgemm_small_kernel_rcTS,
  511. zgemm_small_kernel_cnTS, zgemm_small_kernel_ctTS, zgemm_small_kernel_crTS, zgemm_small_kernel_ccTS,
  512. zgemm_small_kernel_b0_nnTS, zgemm_small_kernel_b0_ntTS, zgemm_small_kernel_b0_nrTS, zgemm_small_kernel_b0_ncTS,
  513. zgemm_small_kernel_b0_tnTS, zgemm_small_kernel_b0_ttTS, zgemm_small_kernel_b0_trTS, zgemm_small_kernel_b0_tcTS,
  514. zgemm_small_kernel_b0_rnTS, zgemm_small_kernel_b0_rtTS, zgemm_small_kernel_b0_rrTS, zgemm_small_kernel_b0_rcTS,
  515. zgemm_small_kernel_b0_cnTS, zgemm_small_kernel_b0_ctTS, zgemm_small_kernel_b0_crTS, zgemm_small_kernel_b0_ccTS,
  516. #endif
  517. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  518. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  519. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  520. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  521. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  522. #else
  523. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  524. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  525. #endif
  526. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  527. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  528. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  529. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  530. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  531. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  532. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  533. #else
  534. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  535. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  536. #endif
  537. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  538. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  539. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  540. zsymm_iutcopyTS, zsymm_iltcopyTS,
  541. #else
  542. zsymm_outcopyTS, zsymm_oltcopyTS,
  543. #endif
  544. zsymm_outcopyTS, zsymm_oltcopyTS,
  545. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  546. zhemm_iutcopyTS, zhemm_iltcopyTS,
  547. #else
  548. zhemm_outcopyTS, zhemm_oltcopyTS,
  549. #endif
  550. zhemm_outcopyTS, zhemm_oltcopyTS,
  551. 0, 0, 0,
  552. #if (USE_GEMM3M)
  553. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  554. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  555. #else
  556. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  557. #endif
  558. zgemm3m_kernelTS,
  559. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  560. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  561. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  562. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  563. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  564. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  565. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  566. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  567. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  568. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  569. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  570. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  571. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  572. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  573. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  574. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  575. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  576. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  577. #else
  578. 0, 0, 0,
  579. NULL,
  580. NULL, NULL,
  581. NULL, NULL,
  582. NULL, NULL,
  583. NULL, NULL,
  584. NULL, NULL,
  585. NULL, NULL,
  586. NULL, NULL,
  587. NULL, NULL,
  588. NULL, NULL,
  589. NULL, NULL,
  590. NULL, NULL,
  591. NULL, NULL,
  592. NULL, NULL,
  593. NULL, NULL,
  594. NULL, NULL,
  595. NULL, NULL,
  596. NULL, NULL,
  597. NULL, NULL,
  598. #endif
  599. #ifndef NO_LAPACK
  600. zneg_tcopyTS, zlaswp_ncopyTS,
  601. #else
  602. NULL, NULL,
  603. #endif
  604. #endif
  605. #ifdef EXPRECISION
  606. 0, 0, 0,
  607. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  608. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  609. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  610. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  611. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  612. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  613. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  614. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  615. xsymv_LTS, xsymv_UTS,
  616. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  617. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  618. xgemm_betaTS,
  619. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  620. xgemm_incopyTS, xgemm_itcopyTS,
  621. #else
  622. xgemm_oncopyTS, xgemm_otcopyTS,
  623. #endif
  624. xgemm_oncopyTS, xgemm_otcopyTS,
  625. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  626. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  627. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  628. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  629. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  630. #else
  631. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  632. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  633. #endif
  634. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  635. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  636. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  637. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  638. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  639. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  640. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  641. #else
  642. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  643. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  644. #endif
  645. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  646. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  647. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  648. xsymm_iutcopyTS, xsymm_iltcopyTS,
  649. #else
  650. xsymm_outcopyTS, xsymm_oltcopyTS,
  651. #endif
  652. xsymm_outcopyTS, xsymm_oltcopyTS,
  653. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  654. xhemm_iutcopyTS, xhemm_iltcopyTS,
  655. #else
  656. xhemm_outcopyTS, xhemm_oltcopyTS,
  657. #endif
  658. xhemm_outcopyTS, xhemm_oltcopyTS,
  659. 0, 0, 0,
  660. #if (USE_GEMM3M)
  661. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  662. xgemm3m_kernelTS,
  663. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  664. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  665. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  666. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  667. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  668. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  669. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  670. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  671. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  672. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  673. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  674. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  675. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  676. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  677. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  678. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  679. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  680. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  681. #else
  682. 0, 0, 0,
  683. NULL,
  684. NULL, NULL,
  685. NULL, NULL,
  686. NULL, NULL,
  687. NULL, NULL,
  688. NULL, NULL,
  689. NULL, NULL,
  690. NULL, NULL,
  691. NULL, NULL,
  692. NULL, NULL,
  693. NULL, NULL,
  694. NULL, NULL,
  695. NULL, NULL,
  696. NULL, NULL,
  697. NULL, NULL,
  698. NULL, NULL,
  699. NULL, NULL,
  700. NULL, NULL,
  701. NULL, NULL,
  702. #endif
  703. #ifndef NO_LAPACK
  704. xneg_tcopyTS, xlaswp_ncopyTS,
  705. #else
  706. NULL, NULL,
  707. #endif
  708. #endif
  709. init_parameter,
  710. SNUMOPT, DNUMOPT, QNUMOPT,
  711. #if BUILD_SINGLE == 1
  712. saxpby_kTS,
  713. #endif
  714. #if BUILD_DOUBLE == 1
  715. daxpby_kTS,
  716. #endif
  717. #if BUILD_COMPLEX == 1
  718. caxpby_kTS,
  719. #endif
  720. #if BUILD_COMPLEX16== 1
  721. zaxpby_kTS,
  722. #endif
  723. #if BUILD_SINGLE == 1
  724. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  725. #endif
  726. #if BUILD_DOUBLE== 1
  727. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  728. #endif
  729. #if BUILD_COMPLEX == 1
  730. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  731. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  732. #endif
  733. #if BUILD_COMPLEX16 == 1
  734. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  735. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  736. #endif
  737. #if BUILD_SINGLE == 1
  738. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  739. #endif
  740. #if BUILD_DOUBLE== 1
  741. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  742. #endif
  743. #if BUILD_COMPLEX== 1
  744. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  745. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  746. #endif
  747. #if BUILD_COMPLEX16==1
  748. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  749. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  750. #endif
  751. #if BUILD_SINGLE == 1
  752. sgeadd_kTS,
  753. #endif
  754. #if BUILD_DOUBLE==1
  755. dgeadd_kTS,
  756. #endif
  757. #if BUILD_COMPLEX==1
  758. cgeadd_kTS,
  759. #endif
  760. #if BUILD_COMPLEX16==1
  761. zgeadd_kTS,
  762. #endif
  763. };
  764. #if (ARCH_ARM64)
  765. static void init_parameter(void) {
  766. #if (BUILD_BFLOAT16)
  767. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  768. #endif
  769. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  770. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  771. #endif
  772. #if BUILD_DOUBLE == 1
  773. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  774. #endif
  775. #if BUILD_COMPLEX==1
  776. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  777. #endif
  778. #if BUILD_COMPLEX16==1
  779. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  780. #endif
  781. #if (BUILD_BFLOAT16)
  782. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  783. #endif
  784. #if BUILD_SINGLE == 1
  785. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  786. #endif
  787. #if BUILD_DOUBLE== 1
  788. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  789. #endif
  790. #if BUILD_COMPLEX== 1
  791. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  792. #endif
  793. #if BUILD_COMPLEX16==1
  794. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  795. #endif
  796. #if (BUILD_BFLOAT16)
  797. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  798. #endif
  799. #if BUILD_SINGLE == 1
  800. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  801. #endif
  802. #if BUILD_DOUBLE==1
  803. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  804. #endif
  805. #if BUILD_COMPLEX==1
  806. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  807. #endif
  808. #if BUILD_COMPLEX16==1
  809. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  810. #endif
  811. #ifdef EXPRECISION
  812. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  813. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  814. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  815. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  816. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  817. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  818. #endif
  819. #if (USE_GEMM3M)
  820. #ifdef CGEMM3M_DEFAULT_P
  821. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  822. #else
  823. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  824. #endif
  825. #ifdef ZGEMM3M_DEFAULT_P
  826. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  827. #else
  828. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  829. #endif
  830. #ifdef CGEMM3M_DEFAULT_Q
  831. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  832. #else
  833. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  834. #endif
  835. #ifdef ZGEMM3M_DEFAULT_Q
  836. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  837. #else
  838. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  839. #endif
  840. #ifdef CGEMM3M_DEFAULT_R
  841. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  842. #else
  843. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  844. #endif
  845. #ifdef ZGEMM3M_DEFAULT_R
  846. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  847. #else
  848. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  849. #endif
  850. #ifdef EXPRECISION
  851. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  852. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  853. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  854. #endif
  855. #endif
  856. }
  857. #else // (ARCH_ARM64)
  858. #if defined(ARCH_MIPS64)
  859. static void init_parameter(void) {
  860. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  861. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  862. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  863. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  864. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  865. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  866. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  867. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  868. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  869. TABLE_NAME.dgemm_r = 640;
  870. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  871. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  872. #ifdef EXPRECISION
  873. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  874. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  875. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  876. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  877. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  878. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  879. #endif
  880. #if defined(USE_GEMM3M)
  881. #ifdef CGEMM3M_DEFAULT_P
  882. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  883. #else
  884. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  885. #endif
  886. #ifdef ZGEMM3M_DEFAULT_P
  887. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  888. #else
  889. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  890. #endif
  891. #ifdef CGEMM3M_DEFAULT_Q
  892. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  893. #else
  894. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  895. #endif
  896. #ifdef ZGEMM3M_DEFAULT_Q
  897. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  898. #else
  899. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  900. #endif
  901. #ifdef CGEMM3M_DEFAULT_R
  902. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  903. #else
  904. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  905. #endif
  906. #ifdef ZGEMM3M_DEFAULT_R
  907. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  908. #else
  909. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  910. #endif
  911. #ifdef EXPRECISION
  912. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  913. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  914. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  915. #endif
  916. #endif
  917. }
  918. #else // (ARCH_MIPS64)
  919. #if (ARCH_LOONGARCH64)
  920. static void init_parameter(void) {
  921. #ifdef BUILD_BFLOAT16
  922. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  923. #endif
  924. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  925. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  926. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  927. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  928. #ifdef BUILD_BFLOAT16
  929. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  930. #endif
  931. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  932. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  933. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  934. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  935. #ifdef BUILD_BFLOAT16
  936. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  937. #endif
  938. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  939. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  940. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  941. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  942. }
  943. #else // (ARCH_LOONGARCH64)
  944. #if (ARCH_POWER)
  945. static void init_parameter(void) {
  946. #ifdef BUILD_BFLOAT16
  947. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  948. #endif
  949. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  950. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  951. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  952. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  953. #ifdef BUILD_BFLOAT16
  954. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  955. #endif
  956. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  957. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  958. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  959. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  960. #ifdef BUILD_BFLOAT16
  961. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  962. #endif
  963. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  964. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  965. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  966. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  967. }
  968. #else //POWER
  969. #if (ARCH_ZARCH)
  970. static void init_parameter(void) {
  971. #ifdef BUILD_BFLOAT16
  972. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  973. #endif
  974. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  975. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  976. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  977. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  978. #ifdef BUILD_BFLOAT16
  979. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  980. #endif
  981. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  982. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  983. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  984. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  985. #ifdef BUILD_BFLOAT16
  986. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  987. #endif
  988. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  989. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  990. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  991. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  992. }
  993. #else //ZARCH
  994. #ifdef ARCH_X86
  995. static int get_l2_size_old(void){
  996. int i, eax, ebx, ecx, edx, cpuid_level;
  997. int info[15];
  998. cpuid(2, &eax, &ebx, &ecx, &edx);
  999. info[ 0] = BITMASK(eax, 8, 0xff);
  1000. info[ 1] = BITMASK(eax, 16, 0xff);
  1001. info[ 2] = BITMASK(eax, 24, 0xff);
  1002. info[ 3] = BITMASK(ebx, 0, 0xff);
  1003. info[ 4] = BITMASK(ebx, 8, 0xff);
  1004. info[ 5] = BITMASK(ebx, 16, 0xff);
  1005. info[ 6] = BITMASK(ebx, 24, 0xff);
  1006. info[ 7] = BITMASK(ecx, 0, 0xff);
  1007. info[ 8] = BITMASK(ecx, 8, 0xff);
  1008. info[ 9] = BITMASK(ecx, 16, 0xff);
  1009. info[10] = BITMASK(ecx, 24, 0xff);
  1010. info[11] = BITMASK(edx, 0, 0xff);
  1011. info[12] = BITMASK(edx, 8, 0xff);
  1012. info[13] = BITMASK(edx, 16, 0xff);
  1013. info[14] = BITMASK(edx, 24, 0xff);
  1014. for (i = 0; i < 15; i++){
  1015. switch (info[i]){
  1016. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  1017. case 0x1a :
  1018. return 96;
  1019. case 0x39 :
  1020. case 0x3b :
  1021. case 0x41 :
  1022. case 0x79 :
  1023. case 0x81 :
  1024. return 128;
  1025. case 0x3a :
  1026. return 192;
  1027. case 0x21 :
  1028. case 0x3c :
  1029. case 0x42 :
  1030. case 0x7a :
  1031. case 0x7e :
  1032. case 0x82 :
  1033. return 256;
  1034. case 0x3d :
  1035. return 384;
  1036. case 0x3e :
  1037. case 0x43 :
  1038. case 0x7b :
  1039. case 0x7f :
  1040. case 0x83 :
  1041. case 0x86 :
  1042. return 512;
  1043. case 0x44 :
  1044. case 0x78 :
  1045. case 0x7c :
  1046. case 0x84 :
  1047. case 0x87 :
  1048. return 1024;
  1049. case 0x45 :
  1050. case 0x7d :
  1051. case 0x85 :
  1052. return 2048;
  1053. case 0x48 :
  1054. return 3184;
  1055. case 0x49 :
  1056. return 4096;
  1057. case 0x4e :
  1058. return 6144;
  1059. }
  1060. }
  1061. // return 0;
  1062. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1063. return 256;
  1064. }
  1065. #endif
  1066. static __inline__ int get_l2_size(void){
  1067. int eax, ebx, ecx, edx, l2;
  1068. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1069. l2 = BITMASK(ecx, 16, 0xffff);
  1070. #ifndef ARCH_X86
  1071. if (l2 <= 0) {
  1072. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1073. return 256;
  1074. }
  1075. return l2;
  1076. #else
  1077. if (l2 > 0) return l2;
  1078. return get_l2_size_old();
  1079. #endif
  1080. }
  1081. static __inline__ int get_l3_size(void){
  1082. int eax, ebx, ecx, edx;
  1083. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1084. return BITMASK(edx, 18, 0x3fff) * 512;
  1085. }
  1086. static void init_parameter(void) {
  1087. int l2 = get_l2_size();
  1088. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  1089. /* where the GEMM unrolling parameters do not depend on l2 */
  1090. #ifdef BUILD_BFLOAT16
  1091. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1092. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1093. #endif
  1094. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1095. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1096. #endif
  1097. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1098. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1099. #endif
  1100. #if BUILD_COMPLEX == 1
  1101. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1102. #endif
  1103. #if BUILD_COMPLEX16==1
  1104. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1105. #endif
  1106. #if BUILD_COMPLEX == 1
  1107. #ifdef CGEMM3M_DEFAULT_Q
  1108. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  1109. #else
  1110. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  1111. #endif
  1112. #endif
  1113. #if BUILD_COMPLEX16 == 1
  1114. #ifdef ZGEMM3M_DEFAULT_Q
  1115. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  1116. #else
  1117. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  1118. #endif
  1119. #endif
  1120. #ifdef EXPRECISION
  1121. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  1122. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  1123. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  1124. #endif
  1125. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  1126. #ifdef DEBUG
  1127. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  1128. #endif
  1129. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1130. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  1131. #endif
  1132. #if BUILD_DOUBLE == 1
  1133. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  1134. #endif
  1135. #if BUILD_COMPLEX==1
  1136. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  1137. #endif
  1138. #if BUILD_COMPLEX16==1
  1139. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  1140. #endif
  1141. #ifdef EXPRECISION
  1142. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  1143. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  1144. #endif
  1145. #endif
  1146. #ifdef CORE_NORTHWOOD
  1147. #ifdef DEBUG
  1148. fprintf(stderr, "Northwood\n");
  1149. #endif
  1150. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1151. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  1152. #endif
  1153. #if BUILD_DOUBLE == 1
  1154. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  1155. #endif
  1156. #if BUILD_COMPLEX==1
  1157. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  1158. #endif
  1159. #if BUILD_COMPLEX16==1
  1160. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  1161. #endif
  1162. #ifdef EXPRECISION
  1163. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  1164. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  1165. #endif
  1166. #endif
  1167. #ifdef ATOM
  1168. #ifdef DEBUG
  1169. fprintf(stderr, "Atom\n");
  1170. #endif
  1171. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1172. TABLE_NAME.sgemm_p = 256;
  1173. #endif
  1174. #if BUILD_DOUBLE ==1
  1175. TABLE_NAME.dgemm_p = 128;
  1176. #endif
  1177. #if BUILD_COMPLEX==1
  1178. TABLE_NAME.cgemm_p = 128;
  1179. #endif
  1180. #if BUILD_COMPLEX16==1
  1181. TABLE_NAME.zgemm_p = 64;
  1182. #endif
  1183. #ifdef EXPRECISION
  1184. TABLE_NAME.qgemm_p = 64;
  1185. TABLE_NAME.xgemm_p = 32;
  1186. #endif
  1187. #endif
  1188. #ifdef CORE_PRESCOTT
  1189. #ifdef DEBUG
  1190. fprintf(stderr, "Prescott\n");
  1191. #endif
  1192. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1193. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  1194. #endif
  1195. #if BUILD_DOUBLE ==1
  1196. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  1197. #endif
  1198. #if BUILD_COMPLEX==1
  1199. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  1200. #endif
  1201. #if BUILD_COMPLEX16 == 1
  1202. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  1203. #endif
  1204. #ifdef EXPRECISION
  1205. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  1206. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  1207. #endif
  1208. #endif
  1209. #ifdef CORE2
  1210. #ifdef DEBUG
  1211. fprintf(stderr, "Core2\n");
  1212. #endif
  1213. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1214. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  1215. #endif
  1216. #if BUILD_DOUBLE==1
  1217. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  1218. #endif
  1219. #if BUILD_COMPLEX==1
  1220. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  1221. #endif
  1222. #if BUILD_COMPLEX16==1
  1223. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  1224. #endif
  1225. #ifdef EXPRECISION
  1226. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  1227. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  1228. #endif
  1229. #endif
  1230. #ifdef PENRYN
  1231. #ifdef DEBUG
  1232. fprintf(stderr, "Penryn\n");
  1233. #endif
  1234. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1235. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1236. #endif
  1237. #if BUILD_DOUBLE == 1
  1238. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1239. #endif
  1240. #if BUILD_COMPLEX==1
  1241. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1242. #endif
  1243. #if BUILD_COMPLEX16==1
  1244. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1245. #endif
  1246. #ifdef EXPRECISION
  1247. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1248. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1249. #endif
  1250. #endif
  1251. #ifdef DUNNINGTON
  1252. #ifdef DEBUG
  1253. fprintf(stderr, "Dunnington\n");
  1254. #endif
  1255. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1256. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1257. #endif
  1258. #if BUILD_DOUBLE ==1
  1259. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1260. #endif
  1261. #if BUILD_COMPLEX==1
  1262. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1263. #endif
  1264. #if BUILD_COMPLEX16==1
  1265. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1266. #endif
  1267. #ifdef EXPRECISION
  1268. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1269. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1270. #endif
  1271. #endif
  1272. #ifdef NEHALEM
  1273. #ifdef DEBUG
  1274. fprintf(stderr, "Nehalem\n");
  1275. #endif
  1276. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1277. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1278. #endif
  1279. #if BUILD_DOUBLE
  1280. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1281. #endif
  1282. #if BUILD_COMPLEX
  1283. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1284. #endif
  1285. #if BUILD_COMPLEX16
  1286. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1287. #endif
  1288. #ifdef EXPRECISION
  1289. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1290. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1291. #endif
  1292. #endif
  1293. #ifdef SANDYBRIDGE
  1294. #ifdef DEBUG
  1295. fprintf(stderr, "Sandybridge\n");
  1296. #endif
  1297. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1298. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1299. #endif
  1300. #if BUILD_DOUBLE
  1301. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1302. #endif
  1303. #if BUILD_COMPLEX
  1304. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1305. #endif
  1306. #if BUILD_COMPLEX16
  1307. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1308. #endif
  1309. #ifdef EXPRECISION
  1310. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1311. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1312. #endif
  1313. #endif
  1314. #ifdef HASWELL
  1315. #ifdef DEBUG
  1316. fprintf(stderr, "Haswell\n");
  1317. #endif
  1318. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1319. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1320. #endif
  1321. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1322. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1323. #endif
  1324. #if BUILD_COMPLEX
  1325. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1326. #endif
  1327. #if BUILD_COMPLEX16
  1328. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1329. #endif
  1330. #ifdef EXPRECISION
  1331. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1332. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1333. #endif
  1334. #endif
  1335. #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
  1336. #ifdef DEBUG
  1337. fprintf(stderr, "SkylakeX\n");
  1338. #endif
  1339. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1340. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1341. #endif
  1342. #if BUILD_DOUBLE
  1343. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1344. #endif
  1345. #if BUILD_COMPLEX
  1346. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1347. #endif
  1348. #if BUILD_COMPLEX16
  1349. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1350. #endif
  1351. #ifdef EXPRECISION
  1352. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1353. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1354. #endif
  1355. #endif
  1356. #ifdef OPTERON
  1357. #ifdef DEBUG
  1358. fprintf(stderr, "Opteron\n");
  1359. #endif
  1360. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1361. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  1362. #endif
  1363. #if BUILD_DOUBLE
  1364. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  1365. #endif
  1366. #if BUILD_COMPLEX
  1367. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  1368. #endif
  1369. #if BUILD_COMPLEX16
  1370. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1371. #endif
  1372. #ifdef EXPRECISION
  1373. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1374. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1375. #endif
  1376. #endif
  1377. #ifdef BARCELONA
  1378. #ifdef DEBUG
  1379. fprintf(stderr, "Barcelona\n");
  1380. #endif
  1381. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1382. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1383. #endif
  1384. #if BUILD_DOUBLE
  1385. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1386. #endif
  1387. #if BUILD_COMPLEX
  1388. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1389. #endif
  1390. #if BUILD_COMPLEX16
  1391. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1392. #endif
  1393. #ifdef EXPRECISION
  1394. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1395. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1396. #endif
  1397. #endif
  1398. #ifdef BOBCAT
  1399. #ifdef DEBUG
  1400. fprintf(stderr, "Bobcate\n");
  1401. #endif
  1402. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1403. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1404. #endif
  1405. #if BUILD_DOUBLE
  1406. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1407. #endif
  1408. #if BUILD_COMPLEX
  1409. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1410. #endif
  1411. #if BUILD_COMPLEX16
  1412. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1413. #endif
  1414. #ifdef EXPRECISION
  1415. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1416. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1417. #endif
  1418. #endif
  1419. #ifdef BULLDOZER
  1420. #ifdef DEBUG
  1421. fprintf(stderr, "Bulldozer\n");
  1422. #endif
  1423. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1424. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1425. #endif
  1426. #if BUILD_DOUBLE
  1427. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1428. #endif
  1429. #if BUILD_COMPLEX
  1430. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1431. #endif
  1432. #if BUILD_COMPLEX16
  1433. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1434. #endif
  1435. #ifdef EXPRECISION
  1436. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1437. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1438. #endif
  1439. #endif
  1440. #ifdef EXCAVATOR
  1441. #ifdef DEBUG
  1442. fprintf(stderr, "Excavator\n");
  1443. #endif
  1444. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1445. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1446. #endif
  1447. #if BUILD_DOUBLE
  1448. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1449. #endif
  1450. #if BUILD_COMPLEX
  1451. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1452. #endif
  1453. #if BUILD_COMPLEX16
  1454. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1455. #endif
  1456. #ifdef EXPRECISION
  1457. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1458. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1459. #endif
  1460. #endif
  1461. #ifdef PILEDRIVER
  1462. #ifdef DEBUG
  1463. fprintf(stderr, "Piledriver\n");
  1464. #endif
  1465. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1466. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1467. #endif
  1468. #if BUILD_DOUBLE
  1469. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1470. #endif
  1471. #if BUILD_COMPLEX
  1472. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1473. #endif
  1474. #if BUILD_COMPLEX16
  1475. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1476. #endif
  1477. #ifdef EXPRECISION
  1478. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1479. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1480. #endif
  1481. #endif
  1482. #ifdef STEAMROLLER
  1483. #ifdef DEBUG
  1484. fprintf(stderr, "Steamroller\n");
  1485. #endif
  1486. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1487. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1488. #endif
  1489. #if BUILD_DOUBLE
  1490. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1491. #endif
  1492. #if BUILD_COMPLEX
  1493. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1494. #endif
  1495. #if BUILD_COMPLEX16
  1496. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1497. #endif
  1498. #ifdef EXPRECISION
  1499. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1500. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1501. #endif
  1502. #endif
  1503. #ifdef ZEN
  1504. #ifdef DEBUG
  1505. fprintf(stderr, "Zen\n");
  1506. #endif
  1507. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1508. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1509. #endif
  1510. #if BUILD_DOUBLE
  1511. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1512. #endif
  1513. #if BUILD_COMPLEX
  1514. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1515. #endif
  1516. #if BUILD_COMPLEX16
  1517. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1518. #endif
  1519. #ifdef EXPRECISION
  1520. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1521. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1522. #endif
  1523. #endif
  1524. #ifdef NANO
  1525. #ifdef DEBUG
  1526. fprintf(stderr, "NANO\n");
  1527. #endif
  1528. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1529. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1530. #endif
  1531. #if (BUILD_DOUBLE==1)
  1532. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1533. #endif
  1534. #if (BUILD_COMPLEX==1)
  1535. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1536. #endif
  1537. #if (BUILD_COMPLEX16==1)
  1538. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1539. #endif
  1540. #ifdef EXPRECISION
  1541. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1542. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1543. #endif
  1544. #endif
  1545. #if BUILD_COMPLEX==1
  1546. #ifdef CGEMM3M_DEFAULT_P
  1547. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1548. #else
  1549. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1550. #endif
  1551. #endif
  1552. #if BUILD_COMPLEX16==1
  1553. #ifdef ZGEMM3M_DEFAULT_P
  1554. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1555. #else
  1556. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1557. #endif
  1558. #endif
  1559. #ifdef EXPRECISION
  1560. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1561. #endif
  1562. #if BUILD_SINGLE == 1
  1563. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1564. #endif
  1565. #if BUILD_DOUBLE== 1
  1566. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1567. #endif
  1568. #if BUILD_COMPLEX==1
  1569. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1570. #endif
  1571. #if BUILD_COMPLEX16==1
  1572. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1573. #endif
  1574. #if BUILD_COMPLEX==1
  1575. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1576. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1577. #else
  1578. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1579. #endif
  1580. #endif
  1581. #if BUILD_COMPLEX16==1
  1582. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1583. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1584. #else
  1585. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1586. #endif
  1587. #endif
  1588. #ifdef QUAD_PRECISION
  1589. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1590. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1591. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1592. #endif
  1593. #ifdef DEBUG
  1594. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1595. #endif
  1596. #if BUILD_BFLOAT16==1
  1597. TABLE_NAME.sbgemm_r = (((BUFFER_SIZE -
  1598. ((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA
  1599. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1600. ) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15);
  1601. #endif
  1602. #if BUILD_SINGLE==1
  1603. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1604. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1605. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1606. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1607. #endif
  1608. #if BUILD_DOUBLE==1
  1609. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1610. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1611. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1612. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1613. #endif
  1614. #ifdef EXPRECISION
  1615. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1616. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1617. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1618. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1619. #endif
  1620. #if BUILD_COMPLEX ==1
  1621. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1622. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1623. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1624. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1625. #endif
  1626. #if BUILD_COMPLEX16 ==1
  1627. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1628. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1629. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1630. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1631. #endif
  1632. #if BUILD_COMPLEX == 1
  1633. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1634. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1635. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1636. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1637. #endif
  1638. #if BUILD_COMPLEX16 == 1
  1639. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1640. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1641. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1642. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1643. #endif
  1644. #ifdef EXPRECISION
  1645. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1646. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1647. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1648. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1649. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1650. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1651. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1652. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1653. #endif
  1654. }
  1655. #endif //POWER
  1656. #endif //ZARCH
  1657. #endif //(ARCH_LOONGARCH64)
  1658. #endif //(ARCH_MIPS64)
  1659. #endif //(ARCH_ARM64)