You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 60 kB

6 years ago
6 years ago
6 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago

  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* Copyright 2023, 2025 The OpenBLAS Project. */
  4. /* All rights reserved. */
  5. /* */
  6. /* Redistribution and use in source and binary forms, with or */
  7. /* without modification, are permitted provided that the following */
  8. /* conditions are met: */
  9. /* */
  10. /* 1. Redistributions of source code must retain the above */
  11. /* copyright notice, this list of conditions and the following */
  12. /* disclaimer. */
  13. /* */
  14. /* 2. Redistributions in binary form must reproduce the above */
  15. /* copyright notice, this list of conditions and the following */
  16. /* disclaimer in the documentation and/or other materials */
  17. /* provided with the distribution. */
  18. /* */
  19. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  20. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  21. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  22. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  23. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  24. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  25. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  26. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  27. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  28. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  29. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  30. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  31. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  32. /* POSSIBILITY OF SUCH DAMAGE. */
  33. /* */
  34. /* The views and conclusions contained in the software and */
  35. /* documentation are those of the authors and should not be */
  36. /* interpreted as representing official policies, either expressed */
  37. /* or implied, of The University of Texas at Austin. */
  38. /*********************************************************************/
  39. #include <stdio.h>
  40. #include <string.h>
  41. #include "common.h"
  42. #ifdef BUILD_KERNEL
  43. #include "kernelTS.h"
  44. #endif
  45. #undef DEBUG
  46. static void init_parameter(void);
  47. gotoblas_t TABLE_NAME = {
  48. DTB_DEFAULT_ENTRIES,
  49. SWITCH_RATIO,
  50. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  51. #ifdef BUILD_BFLOAT16
  52. 0, 0, 0,
  53. BGEMM_DEFAULT_UNROLL_M, BGEMM_DEFAULT_UNROLL_N,
  54. #ifdef BGEMM_DEFAULT_UNROLL_MN
  55. BGEMM_DEFAULT_UNROLL_MN,
  56. #else
  57. MAX(BGEMM_DEFAULT_UNROLL_M, BGEMM_DEFAULT_UNROLL_N),
  58. #endif
  59. BGEMM_ALIGN_K,
  60. 0, 0, 0,
  61. SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,
  62. #ifdef SBGEMM_DEFAULT_UNROLL_MN
  63. SBGEMM_DEFAULT_UNROLL_MN,
  64. #else
  65. MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
  66. #endif
  67. SBGEMM_ALIGN_K,
  68. 0, // need_amxtile_permission
  69. sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  70. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  71. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  72. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
  73. dsdot_kTS,
  74. srot_kTS, srotm_kTS, bscal_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  75. bgemv_nTS, bgemv_tTS, sbgemv_nTS, sbgemv_tTS, sger_kTS,
  76. ssymv_LTS, ssymv_UTS,
  77. bgemm_kernelTS, bgemm_betaTS,
  78. #if BGEMM_DEFAULT_UNROLL_M != BGEMM_DEFAULT_UNROLL_N
  79. bgemm_incopyTS, bgemm_itcopyTS,
  80. #else
  81. bgemm_oncopyTS, bgemm_otcopyTS,
  82. #endif
  83. bgemm_oncopyTS, bgemm_otcopyTS,
  84. sbgemm_kernelTS, sbgemm_betaTS,
  85. #if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N
  86. sbgemm_incopyTS, sbgemm_itcopyTS,
  87. #else
  88. sbgemm_oncopyTS, sbgemm_otcopyTS,
  89. #endif
  90. sbgemm_oncopyTS, sbgemm_otcopyTS,
  91. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  92. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  93. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  94. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  95. #else
  96. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  97. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  98. #endif
  99. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  100. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  101. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  102. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  103. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  104. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  105. #else
  106. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  107. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  108. #endif
  109. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  110. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  111. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  112. ssymm_iutcopyTS, ssymm_iltcopyTS,
  113. #else
  114. ssymm_outcopyTS, ssymm_oltcopyTS,
  115. #endif
  116. ssymm_outcopyTS, ssymm_oltcopyTS,
  117. #ifndef NO_LAPACK
  118. sneg_tcopyTS, slaswp_ncopyTS,
  119. #else
  120. NULL,NULL,
  121. #endif
  122. #ifdef SMALL_MATRIX_OPT
  123. sbgemm_small_matrix_permitTS,
  124. sbgemm_small_kernel_nnTS, sbgemm_small_kernel_ntTS, sbgemm_small_kernel_tnTS, sbgemm_small_kernel_ttTS,
  125. sbgemm_small_kernel_b0_nnTS, sbgemm_small_kernel_b0_ntTS, sbgemm_small_kernel_b0_tnTS, sbgemm_small_kernel_b0_ttTS,
  126. #endif
  127. #endif
  128. #ifdef BUILD_HFLOAT16
  129. 0, 0, 0,
  130. SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N,
  131. #ifdef SHGEMM_DEFAULT_UNROLL_MN
  132. SHGEMM_DEFAULT_UNROLL_MN,
  133. #else
  134. MAX(SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N),
  135. #endif
  136. shgemm_kernelTS, shgemm_betaTS,
  137. #if SHGEMM_DEFAULT_UNROLL_M != SHGEMM_DEFAULT_UNROLL_N
  138. shgemm_incopyTS, shgemm_itcopyTS,
  139. #else
  140. shgemm_oncopyTS, shgemm_otcopyTS,
  141. #endif
  142. shgemm_oncopyTS, shgemm_otcopyTS,
  143. #endif
  144. #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  145. 0, 0, 0,
  146. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  147. #ifdef SGEMM_DEFAULT_UNROLL_MN
  148. SGEMM_DEFAULT_UNROLL_MN,
  149. #else
  150. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  151. #endif
  152. #endif
  153. #ifdef HAVE_EXCLUSIVE_CACHE
  154. 1,
  155. #else
  156. 0,
  157. #endif
  158. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  159. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  160. #endif
  161. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  162. isamax_kTS,
  163. #endif
  164. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  165. isamin_kTS, ismax_kTS, ismin_kTS,
  166. snrm2_kTS, sasum_kTS,
  167. #endif
  168. #if BUILD_SINGLE == 1
  169. ssum_kTS,
  170. #endif
  171. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  172. scopy_kTS, sdot_kTS,
  173. // dsdot_kTS,
  174. srot_kTS, srotm_kTS, saxpy_kTS,
  175. #endif
  176. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  177. sscal_kTS,
  178. #endif
  179. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  180. sswap_kTS,
  181. sgemv_nTS, sgemv_tTS,
  182. #endif
  183. #if BUILD_SINGLE == 1
  184. sger_kTS,
  185. #endif
  186. #if BUILD_SINGLE == 1
  187. ssymv_LTS, ssymv_UTS,
  188. #endif
  189. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  190. #ifdef ARCH_X86_64
  191. sgemm_directTS,
  192. sgemm_direct_performantTS,
  193. #endif
  194. #ifdef ARCH_ARM64
  195. sgemm_directTS,
  196. sgemm_direct_alpha_betaTS,
  197. sgemm_direct_performantTS,
  198. #endif
  199. sgemm_kernelTS, sgemm_betaTS,
  200. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  201. sgemm_incopyTS, sgemm_itcopyTS,
  202. #else
  203. sgemm_oncopyTS, sgemm_otcopyTS,
  204. #endif
  205. sgemm_oncopyTS, sgemm_otcopyTS,
  206. #endif
  207. #if BUILD_SINGLE == 1 || BUILD_DOUBLE == 1 || BUILD_COMPLEX == 1
  208. #ifdef SMALL_MATRIX_OPT
  209. sgemm_small_matrix_permitTS,
  210. sgemm_small_kernel_nnTS, sgemm_small_kernel_ntTS, sgemm_small_kernel_tnTS, sgemm_small_kernel_ttTS,
  211. sgemm_small_kernel_b0_nnTS, sgemm_small_kernel_b0_ntTS, sgemm_small_kernel_b0_tnTS, sgemm_small_kernel_b0_ttTS,
  212. #endif
  213. #endif
  214. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX == 1)
  215. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  216. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  217. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  218. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  219. #else
  220. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  221. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  222. #endif
  223. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  224. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  225. #endif
  226. #if (BUILD_SINGLE==1)
  227. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  228. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  229. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  230. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  231. #else
  232. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  233. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  234. #endif
  235. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  236. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  237. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  238. ssymm_iutcopyTS, ssymm_iltcopyTS,
  239. #else
  240. ssymm_outcopyTS, ssymm_oltcopyTS,
  241. #endif
  242. ssymm_outcopyTS, ssymm_oltcopyTS,
  243. #ifndef NO_LAPACK
  244. sneg_tcopyTS, slaswp_ncopyTS,
  245. #else
  246. NULL,NULL,
  247. #endif
  248. #endif
  249. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  250. 0, 0, 0,
  251. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  252. #ifdef DGEMM_DEFAULT_UNROLL_MN
  253. DGEMM_DEFAULT_UNROLL_MN,
  254. #else
  255. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  256. #endif
  257. #endif
  258. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  259. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  260. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  261. dnrm2_kTS, dasum_kTS,
  262. #endif
  263. #if (BUILD_DOUBLE==1)
  264. dsum_kTS,
  265. #endif
  266. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  267. dcopy_kTS, ddot_kTS,
  268. #endif
  269. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  270. dsdot_kTS,
  271. #endif
  272. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  273. drot_kTS,
  274. drotm_kTS,
  275. daxpy_kTS,
  276. dscal_kTS,
  277. dswap_kTS,
  278. dgemv_nTS, dgemv_tTS,
  279. #endif
  280. #if (BUILD_DOUBLE==1)
  281. dger_kTS,
  282. dsymv_LTS, dsymv_UTS,
  283. #endif
  284. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  285. dgemm_kernelTS, dgemm_betaTS,
  286. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  287. dgemm_incopyTS, dgemm_itcopyTS,
  288. #else
  289. dgemm_oncopyTS, dgemm_otcopyTS,
  290. #endif
  291. dgemm_oncopyTS, dgemm_otcopyTS,
  292. #endif
  293. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  294. #ifdef SMALL_MATRIX_OPT
  295. dgemm_small_matrix_permitTS,
  296. dgemm_small_kernel_nnTS, dgemm_small_kernel_ntTS, dgemm_small_kernel_tnTS, dgemm_small_kernel_ttTS,
  297. dgemm_small_kernel_b0_nnTS, dgemm_small_kernel_b0_ntTS, dgemm_small_kernel_b0_tnTS, dgemm_small_kernel_b0_ttTS,
  298. #endif
  299. #endif
  300. #if (BUILD_DOUBLE==1)
  301. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  302. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  303. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  304. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  305. #else
  306. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  307. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  308. #endif
  309. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  310. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  311. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  312. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  313. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  314. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  315. #else
  316. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  317. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  318. #endif
  319. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  320. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  321. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  322. dsymm_iutcopyTS, dsymm_iltcopyTS,
  323. #else
  324. dsymm_outcopyTS, dsymm_oltcopyTS,
  325. #endif
  326. dsymm_outcopyTS, dsymm_oltcopyTS,
  327. #ifndef NO_LAPACK
  328. dneg_tcopyTS, dlaswp_ncopyTS,
  329. #else
  330. NULL, NULL,
  331. #endif
  332. #endif
  333. #ifdef EXPRECISION
  334. 0, 0, 0,
  335. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  336. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  337. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  338. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  339. qrot_kTS, qrotm_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  340. qgemv_nTS, qgemv_tTS, qger_kTS,
  341. qsymv_LTS, qsymv_UTS,
  342. qgemm_kernelTS, qgemm_betaTS,
  343. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  344. qgemm_incopyTS, qgemm_itcopyTS,
  345. #else
  346. qgemm_oncopyTS, qgemm_otcopyTS,
  347. #endif
  348. qgemm_oncopyTS, qgemm_otcopyTS,
  349. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  350. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  351. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  352. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  353. #else
  354. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  355. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  356. #endif
  357. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  358. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  359. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  360. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  361. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  362. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  363. #else
  364. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  365. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  366. #endif
  367. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  368. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  369. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  370. qsymm_iutcopyTS, qsymm_iltcopyTS,
  371. #else
  372. qsymm_outcopyTS, qsymm_oltcopyTS,
  373. #endif
  374. qsymm_outcopyTS, qsymm_oltcopyTS,
  375. #ifndef NO_LAPACK
  376. qneg_tcopyTS, qlaswp_ncopyTS,
  377. #else
  378. NULL, NULL,
  379. #endif
  380. #endif
  381. #if (BUILD_COMPLEX)
  382. 0, 0, 0,
  383. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  384. #ifdef CGEMM_DEFAULT_UNROLL_MN
  385. CGEMM_DEFAULT_UNROLL_MN,
  386. #else
  387. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  388. #endif
  389. #if (BUILD_COMPLEX)
  390. camax_kTS, camin_kTS,
  391. #endif
  392. #if (BUILD_COMPLEX)
  393. icamax_kTS,
  394. #endif
  395. #if (BUILD_COMPLEX)
  396. icamin_kTS,
  397. cnrm2_kTS, casum_kTS, csum_kTS,
  398. #endif
  399. #if (BUILD_COMPLEX)
  400. ccopy_kTS, cdotu_kTS, cdotc_kTS,
  401. #endif
  402. #if (BUILD_COMPLEX)
  403. csrot_kTS,
  404. #endif
  405. #if (BUILD_COMPLEX)
  406. caxpy_kTS,
  407. caxpyc_kTS,
  408. cscal_kTS,
  409. cswap_kTS,
  410. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  411. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  412. #endif
  413. #if (BUILD_COMPLEX)
  414. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  415. csymv_LTS, csymv_UTS,
  416. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  417. #endif
  418. #if (BUILD_COMPLEX)
  419. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  420. cgemm_betaTS,
  421. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  422. cgemm_incopyTS, cgemm_itcopyTS,
  423. #else
  424. cgemm_oncopyTS, cgemm_otcopyTS,
  425. #endif
  426. cgemm_oncopyTS, cgemm_otcopyTS,
  427. #ifdef SMALL_MATRIX_OPT
  428. cgemm_small_matrix_permitTS,
  429. cgemm_small_kernel_nnTS, cgemm_small_kernel_ntTS, cgemm_small_kernel_nrTS, cgemm_small_kernel_ncTS,
  430. cgemm_small_kernel_tnTS, cgemm_small_kernel_ttTS, cgemm_small_kernel_trTS, cgemm_small_kernel_tcTS,
  431. cgemm_small_kernel_rnTS, cgemm_small_kernel_rtTS, cgemm_small_kernel_rrTS, cgemm_small_kernel_rcTS,
  432. cgemm_small_kernel_cnTS, cgemm_small_kernel_ctTS, cgemm_small_kernel_crTS, cgemm_small_kernel_ccTS,
  433. cgemm_small_kernel_b0_nnTS, cgemm_small_kernel_b0_ntTS, cgemm_small_kernel_b0_nrTS, cgemm_small_kernel_b0_ncTS,
  434. cgemm_small_kernel_b0_tnTS, cgemm_small_kernel_b0_ttTS, cgemm_small_kernel_b0_trTS, cgemm_small_kernel_b0_tcTS,
  435. cgemm_small_kernel_b0_rnTS, cgemm_small_kernel_b0_rtTS, cgemm_small_kernel_b0_rrTS, cgemm_small_kernel_b0_rcTS,
  436. cgemm_small_kernel_b0_cnTS, cgemm_small_kernel_b0_ctTS, cgemm_small_kernel_b0_crTS, cgemm_small_kernel_b0_ccTS,
  437. #endif
  438. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  439. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  440. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  441. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  442. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  443. #else
  444. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  445. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  446. #endif
  447. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  448. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  449. #endif
  450. #endif
  451. #if (BUILD_COMPLEX)
  452. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  453. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  454. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  455. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  456. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  457. #else
  458. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  459. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  460. #endif
  461. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  462. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  463. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  464. csymm_iutcopyTS, csymm_iltcopyTS,
  465. #else
  466. csymm_outcopyTS, csymm_oltcopyTS,
  467. #endif
  468. csymm_outcopyTS, csymm_oltcopyTS,
  469. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  470. chemm_iutcopyTS, chemm_iltcopyTS,
  471. #else
  472. chemm_outcopyTS, chemm_oltcopyTS,
  473. #endif
  474. chemm_outcopyTS, chemm_oltcopyTS,
  475. 0, 0, 0,
  476. #if (USE_GEMM3M)
  477. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  478. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  479. #else
  480. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  481. #endif
  482. cgemm3m_kernelTS,
  483. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  484. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  485. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  486. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  487. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  488. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  489. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  490. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  491. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  492. csymm3m_oucopybTS, csymm3m_olcopybTS,
  493. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  494. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  495. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  496. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  497. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  498. chemm3m_oucopybTS, chemm3m_olcopybTS,
  499. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  500. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  501. #else
  502. 0, 0, 0,
  503. NULL,
  504. NULL, NULL,
  505. NULL, NULL,
  506. NULL, NULL,
  507. NULL, NULL,
  508. NULL, NULL,
  509. NULL, NULL,
  510. NULL, NULL,
  511. NULL, NULL,
  512. NULL, NULL,
  513. NULL, NULL,
  514. NULL, NULL,
  515. NULL, NULL,
  516. NULL, NULL,
  517. NULL, NULL,
  518. NULL, NULL,
  519. NULL, NULL,
  520. NULL, NULL,
  521. NULL, NULL,
  522. #endif
  523. #endif
  524. #if (BUILD_COMPLEX)
  525. #ifndef NO_LAPACK
  526. cneg_tcopyTS,
  527. claswp_ncopyTS,
  528. #else
  529. NULL, NULL,
  530. #endif
  531. #endif
  532. #if BUILD_COMPLEX16 == 1
  533. 0, 0, 0,
  534. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  535. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  536. ZGEMM_DEFAULT_UNROLL_MN,
  537. #else
  538. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  539. #endif
  540. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  541. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  542. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  543. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  544. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  545. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  546. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  547. zsymv_LTS, zsymv_UTS,
  548. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  549. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  550. zgemm_betaTS,
  551. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  552. zgemm_incopyTS, zgemm_itcopyTS,
  553. #else
  554. zgemm_oncopyTS, zgemm_otcopyTS,
  555. #endif
  556. zgemm_oncopyTS, zgemm_otcopyTS,
  557. #ifdef SMALL_MATRIX_OPT
  558. zgemm_small_matrix_permitTS,
  559. zgemm_small_kernel_nnTS, zgemm_small_kernel_ntTS, zgemm_small_kernel_nrTS, zgemm_small_kernel_ncTS,
  560. zgemm_small_kernel_tnTS, zgemm_small_kernel_ttTS, zgemm_small_kernel_trTS, zgemm_small_kernel_tcTS,
  561. zgemm_small_kernel_rnTS, zgemm_small_kernel_rtTS, zgemm_small_kernel_rrTS, zgemm_small_kernel_rcTS,
  562. zgemm_small_kernel_cnTS, zgemm_small_kernel_ctTS, zgemm_small_kernel_crTS, zgemm_small_kernel_ccTS,
  563. zgemm_small_kernel_b0_nnTS, zgemm_small_kernel_b0_ntTS, zgemm_small_kernel_b0_nrTS, zgemm_small_kernel_b0_ncTS,
  564. zgemm_small_kernel_b0_tnTS, zgemm_small_kernel_b0_ttTS, zgemm_small_kernel_b0_trTS, zgemm_small_kernel_b0_tcTS,
  565. zgemm_small_kernel_b0_rnTS, zgemm_small_kernel_b0_rtTS, zgemm_small_kernel_b0_rrTS, zgemm_small_kernel_b0_rcTS,
  566. zgemm_small_kernel_b0_cnTS, zgemm_small_kernel_b0_ctTS, zgemm_small_kernel_b0_crTS, zgemm_small_kernel_b0_ccTS,
  567. #endif
  568. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  569. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  570. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  571. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  572. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  573. #else
  574. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  575. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  576. #endif
  577. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  578. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  579. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  580. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  581. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  582. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  583. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  584. #else
  585. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  586. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  587. #endif
  588. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  589. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  590. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  591. zsymm_iutcopyTS, zsymm_iltcopyTS,
  592. #else
  593. zsymm_outcopyTS, zsymm_oltcopyTS,
  594. #endif
  595. zsymm_outcopyTS, zsymm_oltcopyTS,
  596. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  597. zhemm_iutcopyTS, zhemm_iltcopyTS,
  598. #else
  599. zhemm_outcopyTS, zhemm_oltcopyTS,
  600. #endif
  601. zhemm_outcopyTS, zhemm_oltcopyTS,
  602. 0, 0, 0,
  603. #if (USE_GEMM3M)
  604. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  605. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  606. #else
  607. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  608. #endif
  609. zgemm3m_kernelTS,
  610. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  611. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  612. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  613. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  614. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  615. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  616. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  617. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  618. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  619. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  620. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  621. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  622. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  623. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  624. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  625. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  626. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  627. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  628. #else
  629. 0, 0, 0,
  630. NULL,
  631. NULL, NULL,
  632. NULL, NULL,
  633. NULL, NULL,
  634. NULL, NULL,
  635. NULL, NULL,
  636. NULL, NULL,
  637. NULL, NULL,
  638. NULL, NULL,
  639. NULL, NULL,
  640. NULL, NULL,
  641. NULL, NULL,
  642. NULL, NULL,
  643. NULL, NULL,
  644. NULL, NULL,
  645. NULL, NULL,
  646. NULL, NULL,
  647. NULL, NULL,
  648. NULL, NULL,
  649. #endif
  650. #ifndef NO_LAPACK
  651. zneg_tcopyTS, zlaswp_ncopyTS,
  652. #else
  653. NULL, NULL,
  654. #endif
  655. #endif
  656. #ifdef EXPRECISION
  657. 0, 0, 0,
  658. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  659. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  660. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  661. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  662. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  663. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  664. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  665. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  666. xsymv_LTS, xsymv_UTS,
  667. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  668. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  669. xgemm_betaTS,
  670. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  671. xgemm_incopyTS, xgemm_itcopyTS,
  672. #else
  673. xgemm_oncopyTS, xgemm_otcopyTS,
  674. #endif
  675. xgemm_oncopyTS, xgemm_otcopyTS,
  676. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  677. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  678. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  679. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  680. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  681. #else
  682. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  683. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  684. #endif
  685. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  686. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  687. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  688. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  689. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  690. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  691. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  692. #else
  693. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  694. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  695. #endif
  696. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  697. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  698. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  699. xsymm_iutcopyTS, xsymm_iltcopyTS,
  700. #else
  701. xsymm_outcopyTS, xsymm_oltcopyTS,
  702. #endif
  703. xsymm_outcopyTS, xsymm_oltcopyTS,
  704. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  705. xhemm_iutcopyTS, xhemm_iltcopyTS,
  706. #else
  707. xhemm_outcopyTS, xhemm_oltcopyTS,
  708. #endif
  709. xhemm_outcopyTS, xhemm_oltcopyTS,
  710. 0, 0, 0,
  711. #if (USE_GEMM3M)
  712. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  713. xgemm3m_kernelTS,
  714. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  715. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  716. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  717. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  718. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  719. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  720. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  721. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  722. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  723. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  724. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  725. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  726. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  727. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  728. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  729. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  730. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  731. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  732. #else
  733. 0, 0, 0,
  734. NULL,
  735. NULL, NULL,
  736. NULL, NULL,
  737. NULL, NULL,
  738. NULL, NULL,
  739. NULL, NULL,
  740. NULL, NULL,
  741. NULL, NULL,
  742. NULL, NULL,
  743. NULL, NULL,
  744. NULL, NULL,
  745. NULL, NULL,
  746. NULL, NULL,
  747. NULL, NULL,
  748. NULL, NULL,
  749. NULL, NULL,
  750. NULL, NULL,
  751. NULL, NULL,
  752. NULL, NULL,
  753. #endif
  754. #ifndef NO_LAPACK
  755. xneg_tcopyTS, xlaswp_ncopyTS,
  756. #else
  757. NULL, NULL,
  758. #endif
  759. #endif
  760. init_parameter,
  761. SNUMOPT, DNUMOPT, QNUMOPT,
  762. #if BUILD_SINGLE == 1
  763. saxpby_kTS,
  764. #endif
  765. #if BUILD_DOUBLE == 1
  766. daxpby_kTS,
  767. #endif
  768. #if BUILD_COMPLEX == 1
  769. caxpby_kTS,
  770. #endif
  771. #if BUILD_COMPLEX16== 1
  772. zaxpby_kTS,
  773. #endif
  774. #if BUILD_SINGLE == 1
  775. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  776. #endif
  777. #if BUILD_DOUBLE== 1
  778. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  779. #endif
  780. #if BUILD_COMPLEX == 1
  781. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  782. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  783. #endif
  784. #if BUILD_COMPLEX16 == 1
  785. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  786. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  787. #endif
  788. #if BUILD_SINGLE == 1
  789. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  790. #endif
  791. #if BUILD_DOUBLE== 1
  792. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  793. #endif
  794. #if BUILD_COMPLEX== 1
  795. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  796. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  797. #endif
  798. #if BUILD_COMPLEX16==1
  799. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  800. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  801. #endif
  802. #if BUILD_SINGLE == 1
  803. sgeadd_kTS,
  804. #endif
  805. #if BUILD_DOUBLE==1
  806. dgeadd_kTS,
  807. #endif
  808. #if BUILD_COMPLEX==1
  809. cgeadd_kTS,
  810. #endif
  811. #if BUILD_COMPLEX16==1
  812. zgeadd_kTS,
  813. #endif
  814. };
  815. #if (ARCH_ARM64)
  816. static void init_parameter(void) {
  817. #if (BUILD_BFLOAT16)
  818. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  819. TABLE_NAME.bgemm_p = BGEMM_DEFAULT_P;
  820. #endif
  821. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  822. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  823. #endif
  824. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  825. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  826. #endif
  827. #if BUILD_COMPLEX==1
  828. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  829. #endif
  830. #if BUILD_COMPLEX16==1
  831. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  832. #endif
  833. #if (BUILD_BFLOAT16)
  834. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  835. TABLE_NAME.bgemm_q = BGEMM_DEFAULT_Q;
  836. #endif
  837. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  838. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  839. #endif
  840. #if BUILD_DOUBLE== 1 || (BUILD_COMPLEX16==1)
  841. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  842. #endif
  843. #if BUILD_COMPLEX== 1
  844. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  845. #endif
  846. #if BUILD_COMPLEX16==1
  847. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  848. #endif
  849. #if (BUILD_BFLOAT16)
  850. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  851. TABLE_NAME.bgemm_r = BGEMM_DEFAULT_R;
  852. #endif
  853. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  854. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  855. #endif
  856. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  857. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  858. #endif
  859. #if BUILD_COMPLEX==1
  860. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  861. #endif
  862. #if BUILD_COMPLEX16==1
  863. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  864. #endif
  865. #ifdef EXPRECISION
  866. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  867. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  868. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  869. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  870. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  871. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  872. #endif
  873. #if (USE_GEMM3M)
  874. #ifdef CGEMM3M_DEFAULT_P
  875. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  876. #else
  877. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  878. #endif
  879. #ifdef ZGEMM3M_DEFAULT_P
  880. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  881. #else
  882. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  883. #endif
  884. #ifdef CGEMM3M_DEFAULT_Q
  885. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  886. #else
  887. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  888. #endif
  889. #ifdef ZGEMM3M_DEFAULT_Q
  890. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  891. #else
  892. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  893. #endif
  894. #ifdef CGEMM3M_DEFAULT_R
  895. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  896. #else
  897. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  898. #endif
  899. #ifdef ZGEMM3M_DEFAULT_R
  900. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  901. #else
  902. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  903. #endif
  904. #ifdef EXPRECISION
  905. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  906. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  907. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  908. #endif
  909. #endif
  910. }
  911. #else // (ARCH_ARM64)
  912. #if defined(ARCH_MIPS64)
  913. static void init_parameter(void) {
  914. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  915. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  916. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  917. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  918. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  919. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  920. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  921. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  922. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  923. TABLE_NAME.dgemm_r = 640;
  924. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  925. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  926. #ifdef EXPRECISION
  927. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  928. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  929. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  930. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  931. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  932. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  933. #endif
  934. #if defined(USE_GEMM3M)
  935. #ifdef CGEMM3M_DEFAULT_P
  936. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  937. #else
  938. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  939. #endif
  940. #ifdef ZGEMM3M_DEFAULT_P
  941. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  942. #else
  943. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  944. #endif
  945. #ifdef CGEMM3M_DEFAULT_Q
  946. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  947. #else
  948. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  949. #endif
  950. #ifdef ZGEMM3M_DEFAULT_Q
  951. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  952. #else
  953. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  954. #endif
  955. #ifdef CGEMM3M_DEFAULT_R
  956. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  957. #else
  958. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  959. #endif
  960. #ifdef ZGEMM3M_DEFAULT_R
  961. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  962. #else
  963. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  964. #endif
  965. #ifdef EXPRECISION
  966. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  967. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  968. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  969. #endif
  970. #endif
  971. }
  972. #else // (ARCH_MIPS64)
  973. #if (ARCH_LOONGARCH64)
  974. static int get_L3_size() {
  975. int ret = 0, id = 0x14;
  976. __asm__ volatile (
  977. "cpucfg %[ret], %[id]"
  978. : [ret]"=r"(ret)
  979. : [id]"r"(id)
  980. : "memory"
  981. );
  982. return ((ret & 0xffff) + 1) * pow(2, ((ret >> 16) & 0xff)) * pow(2, ((ret >> 24) & 0x7f)) / 1024 / 1024; // MB
  983. }
  984. static void init_parameter(void) {
  985. #ifdef BUILD_BFLOAT16
  986. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  987. TABLE_NAME.bgemm_p = BGEMM_DEFAULT_P;
  988. #endif
  989. #ifdef BUILD_BFLOAT16
  990. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  991. TABLE_NAME.bgemm_r = BGEMM_DEFAULT_R;
  992. #endif
  993. #if defined(LA464)
  994. int L3_size = get_L3_size();
  995. #ifdef SMP
  996. if(blas_num_threads == 1){
  997. #endif
  998. //single thread
  999. if (L3_size == 32){ // 3C5000 and 3D5000
  1000. TABLE_NAME.sgemm_p = 256;
  1001. TABLE_NAME.sgemm_q = 384;
  1002. TABLE_NAME.sgemm_r = 8192;
  1003. TABLE_NAME.dgemm_p = 112;
  1004. TABLE_NAME.dgemm_q = 289;
  1005. TABLE_NAME.dgemm_r = 4096;
  1006. TABLE_NAME.cgemm_p = 128;
  1007. TABLE_NAME.cgemm_q = 256;
  1008. TABLE_NAME.cgemm_r = 4096;
  1009. TABLE_NAME.zgemm_p = 128;
  1010. TABLE_NAME.zgemm_q = 128;
  1011. TABLE_NAME.zgemm_r = 2048;
  1012. } else { // 3A5000 and 3C5000L
  1013. TABLE_NAME.sgemm_p = 256;
  1014. TABLE_NAME.sgemm_q = 384;
  1015. TABLE_NAME.sgemm_r = 4096;
  1016. TABLE_NAME.dgemm_p = 112;
  1017. TABLE_NAME.dgemm_q = 300;
  1018. TABLE_NAME.dgemm_r = 3024;
  1019. TABLE_NAME.cgemm_p = 128;
  1020. TABLE_NAME.cgemm_q = 256;
  1021. TABLE_NAME.cgemm_r = 2048;
  1022. TABLE_NAME.zgemm_p = 128;
  1023. TABLE_NAME.zgemm_q = 128;
  1024. TABLE_NAME.zgemm_r = 1024;
  1025. }
  1026. #ifdef SMP
  1027. }else{
  1028. //multi thread
  1029. if (L3_size == 32){ // 3C5000 and 3D5000
  1030. TABLE_NAME.sgemm_p = 256;
  1031. TABLE_NAME.sgemm_q = 384;
  1032. TABLE_NAME.sgemm_r = 1024;
  1033. TABLE_NAME.dgemm_p = 112;
  1034. TABLE_NAME.dgemm_q = 289;
  1035. TABLE_NAME.dgemm_r = 342;
  1036. TABLE_NAME.cgemm_p = 128;
  1037. TABLE_NAME.cgemm_q = 256;
  1038. TABLE_NAME.cgemm_r = 512;
  1039. TABLE_NAME.zgemm_p = 128;
  1040. TABLE_NAME.zgemm_q = 128;
  1041. TABLE_NAME.zgemm_r = 512;
  1042. } else { // 3A5000 and 3C5000L
  1043. TABLE_NAME.sgemm_p = 256;
  1044. TABLE_NAME.sgemm_q = 384;
  1045. TABLE_NAME.sgemm_r = 2048;
  1046. TABLE_NAME.dgemm_p = 112;
  1047. TABLE_NAME.dgemm_q = 300;
  1048. TABLE_NAME.dgemm_r = 738;
  1049. TABLE_NAME.cgemm_p = 128;
  1050. TABLE_NAME.cgemm_q = 256;
  1051. TABLE_NAME.cgemm_r = 1024;
  1052. TABLE_NAME.zgemm_p = 128;
  1053. TABLE_NAME.zgemm_q = 128;
  1054. TABLE_NAME.zgemm_r = 1024;
  1055. }
  1056. }
  1057. #endif
  1058. #else
  1059. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1060. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1061. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1062. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1063. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1064. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1065. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1066. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1067. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1068. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1069. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1070. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1071. #endif
  1072. #ifdef BUILD_BFLOAT16
  1073. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1074. TABLE_NAME.bgemm_q = BGEMM_DEFAULT_Q;
  1075. #endif
  1076. }
  1077. #else // (ARCH_LOONGARCH64)
  1078. #if (ARCH_POWER)
  1079. static void init_parameter(void) {
  1080. #ifdef BUILD_BFLOAT16
  1081. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1082. TABLE_NAME.bgemm_p = BGEMM_DEFAULT_P;
  1083. #endif
  1084. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1085. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1086. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1087. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1088. #ifdef BUILD_BFLOAT16
  1089. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1090. TABLE_NAME.bgemm_r = BGEMM_DEFAULT_R;
  1091. #endif
  1092. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1093. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1094. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1095. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1096. #ifdef BUILD_BFLOAT16
  1097. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1098. TABLE_NAME.bgemm_q = BGEMM_DEFAULT_Q;
  1099. #endif
  1100. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1101. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1102. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1103. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1104. }
  1105. #else //POWER
  1106. #if (ARCH_ZARCH)
  1107. static void init_parameter(void) {
  1108. #ifdef BUILD_BFLOAT16
  1109. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1110. TABLE_NAME.bgemm_p = BGEMM_DEFAULT_P;
  1111. #endif
  1112. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1113. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1114. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1115. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1116. #ifdef BUILD_BFLOAT16
  1117. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1118. TABLE_NAME.bgemm_r = BGEMM_DEFAULT_R;
  1119. #endif
  1120. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1121. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1122. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1123. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1124. #ifdef BUILD_BFLOAT16
  1125. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1126. TABLE_NAME.bgemm_q = BGEMM_DEFAULT_Q;
  1127. #endif
  1128. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1129. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1130. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1131. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1132. }
  1133. #else //ZARCH
  1134. #if (ARCH_RISCV64)
  1135. static void init_parameter(void) {
  1136. #ifdef BUILD_BFLOAT16
  1137. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1138. TABLE_NAME.bgemm_p = BGEMM_DEFAULT_P;
  1139. #endif
  1140. #ifdef BUILD_HFLOAT16
  1141. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  1142. #endif
  1143. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1144. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1145. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1146. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1147. #ifdef BUILD_BFLOAT16
  1148. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1149. TABLE_NAME.bgemm_r = BGEMM_DEFAULT_R;
  1150. #endif
  1151. #ifdef BUILD_HFLOAT16
  1152. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  1153. #endif
  1154. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1155. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1156. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1157. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1158. #ifdef BUILD_BFLOAT16
  1159. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1160. TABLE_NAME.bgemm_q = BGEMM_DEFAULT_Q;
  1161. #endif
  1162. #ifdef BUILD_HFLOAT16
  1163. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  1164. #endif
  1165. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1166. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1167. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1168. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1169. }
  1170. #else //RISCV64
  1171. #ifdef ARCH_X86
  1172. static int get_l2_size_old(void){
  1173. int i, eax, ebx, ecx, edx, cpuid_level;
  1174. int info[15];
  1175. cpuid(2, &eax, &ebx, &ecx, &edx);
  1176. info[ 0] = BITMASK(eax, 8, 0xff);
  1177. info[ 1] = BITMASK(eax, 16, 0xff);
  1178. info[ 2] = BITMASK(eax, 24, 0xff);
  1179. info[ 3] = BITMASK(ebx, 0, 0xff);
  1180. info[ 4] = BITMASK(ebx, 8, 0xff);
  1181. info[ 5] = BITMASK(ebx, 16, 0xff);
  1182. info[ 6] = BITMASK(ebx, 24, 0xff);
  1183. info[ 7] = BITMASK(ecx, 0, 0xff);
  1184. info[ 8] = BITMASK(ecx, 8, 0xff);
  1185. info[ 9] = BITMASK(ecx, 16, 0xff);
  1186. info[10] = BITMASK(ecx, 24, 0xff);
  1187. info[11] = BITMASK(edx, 0, 0xff);
  1188. info[12] = BITMASK(edx, 8, 0xff);
  1189. info[13] = BITMASK(edx, 16, 0xff);
  1190. info[14] = BITMASK(edx, 24, 0xff);
  1191. for (i = 0; i < 15; i++){
  1192. switch (info[i]){
  1193. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  1194. case 0x1a :
  1195. return 96;
  1196. case 0x39 :
  1197. case 0x3b :
  1198. case 0x41 :
  1199. case 0x79 :
  1200. case 0x81 :
  1201. return 128;
  1202. case 0x3a :
  1203. return 192;
  1204. case 0x21 :
  1205. case 0x3c :
  1206. case 0x42 :
  1207. case 0x7a :
  1208. case 0x7e :
  1209. case 0x82 :
  1210. return 256;
  1211. case 0x3d :
  1212. return 384;
  1213. case 0x3e :
  1214. case 0x43 :
  1215. case 0x7b :
  1216. case 0x7f :
  1217. case 0x83 :
  1218. case 0x86 :
  1219. return 512;
  1220. case 0x44 :
  1221. case 0x78 :
  1222. case 0x7c :
  1223. case 0x84 :
  1224. case 0x87 :
  1225. return 1024;
  1226. case 0x45 :
  1227. case 0x7d :
  1228. case 0x85 :
  1229. return 2048;
  1230. case 0x48 :
  1231. return 3184;
  1232. case 0x49 :
  1233. return 4096;
  1234. case 0x4e :
  1235. return 6144;
  1236. }
  1237. }
  1238. // return 0;
  1239. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1240. return 256;
  1241. }
  1242. #endif
  1243. static __inline__ int get_l2_size(void){
  1244. int eax, ebx, ecx, edx, l2;
  1245. l2 = readenv_atoi("OPENBLAS_L2_SIZE");
  1246. if (l2 != 0)
  1247. return l2;
  1248. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1249. l2 = BITMASK(ecx, 16, 0xffff);
  1250. #ifndef ARCH_X86
  1251. if (l2 <= 0) {
  1252. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1253. return 256;
  1254. }
  1255. return l2;
  1256. #else
  1257. if (l2 > 0) return l2;
  1258. return get_l2_size_old();
  1259. #endif
  1260. }
  1261. static __inline__ int get_l3_size(void){
  1262. int eax, ebx, ecx, edx;
  1263. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1264. return BITMASK(edx, 18, 0x3fff) * 512;
  1265. }
  1266. static void init_parameter(void) {
  1267. int l2 = get_l2_size();
  1268. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  1269. /* where the GEMM unrolling parameters do not depend on l2 */
  1270. #ifdef BUILD_BFLOAT16
  1271. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1272. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1273. TABLE_NAME.bgemm_p = BGEMM_DEFAULT_P;
  1274. TABLE_NAME.bgemm_q = BGEMM_DEFAULT_Q;
  1275. #endif
  1276. #ifdef BUILD_HFLOAT16
  1277. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  1278. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  1279. #endif
  1280. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1281. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1282. #endif
  1283. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1284. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1285. #endif
  1286. #if BUILD_COMPLEX == 1
  1287. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1288. #endif
  1289. #if BUILD_COMPLEX16==1
  1290. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1291. #endif
  1292. #if BUILD_COMPLEX == 1
  1293. #ifdef CGEMM3M_DEFAULT_Q
  1294. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  1295. #else
  1296. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  1297. #endif
  1298. #endif
  1299. #if BUILD_COMPLEX16 == 1
  1300. #ifdef ZGEMM3M_DEFAULT_Q
  1301. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  1302. #else
  1303. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  1304. #endif
  1305. #endif
  1306. #ifdef EXPRECISION
  1307. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  1308. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  1309. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  1310. #endif
  1311. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  1312. #ifdef DEBUG
  1313. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  1314. #endif
  1315. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1316. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  1317. #endif
  1318. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1319. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  1320. #endif
  1321. #if BUILD_COMPLEX==1
  1322. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  1323. #endif
  1324. #if BUILD_COMPLEX16==1
  1325. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  1326. #endif
  1327. #ifdef EXPRECISION
  1328. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  1329. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  1330. #endif
  1331. #endif
  1332. #ifdef CORE_NORTHWOOD
  1333. #ifdef DEBUG
  1334. fprintf(stderr, "Northwood\n");
  1335. #endif
  1336. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1337. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  1338. #endif
  1339. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1340. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  1341. #endif
  1342. #if BUILD_COMPLEX==1
  1343. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  1344. #endif
  1345. #if BUILD_COMPLEX16==1
  1346. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  1347. #endif
  1348. #ifdef EXPRECISION
  1349. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  1350. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  1351. #endif
  1352. #endif
  1353. #ifdef ATOM
  1354. #ifdef DEBUG
  1355. fprintf(stderr, "Atom\n");
  1356. #endif
  1357. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1358. TABLE_NAME.sgemm_p = 256;
  1359. #endif
  1360. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1361. TABLE_NAME.dgemm_p = 128;
  1362. #endif
  1363. #if BUILD_COMPLEX==1
  1364. TABLE_NAME.cgemm_p = 128;
  1365. #endif
  1366. #if BUILD_COMPLEX16==1
  1367. TABLE_NAME.zgemm_p = 64;
  1368. #endif
  1369. #ifdef EXPRECISION
  1370. TABLE_NAME.qgemm_p = 64;
  1371. TABLE_NAME.xgemm_p = 32;
  1372. #endif
  1373. #endif
  1374. #ifdef CORE_PRESCOTT
  1375. #ifdef DEBUG
  1376. fprintf(stderr, "Prescott\n");
  1377. #endif
  1378. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1379. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  1380. #endif
  1381. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1382. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  1383. #endif
  1384. #if BUILD_COMPLEX==1
  1385. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  1386. #endif
  1387. #if BUILD_COMPLEX16 == 1
  1388. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  1389. #endif
  1390. #ifdef EXPRECISION
  1391. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  1392. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  1393. #endif
  1394. #endif
  1395. #ifdef CORE2
  1396. #ifdef DEBUG
  1397. fprintf(stderr, "Core2\n");
  1398. #endif
  1399. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1400. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  1401. #endif
  1402. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  1403. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  1404. #endif
  1405. #if BUILD_COMPLEX==1
  1406. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  1407. #endif
  1408. #if BUILD_COMPLEX16==1
  1409. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  1410. #endif
  1411. #ifdef EXPRECISION
  1412. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  1413. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  1414. #endif
  1415. #endif
  1416. #ifdef PENRYN
  1417. #ifdef DEBUG
  1418. fprintf(stderr, "Penryn\n");
  1419. #endif
  1420. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1421. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1422. #endif
  1423. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1424. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1425. #endif
  1426. #if BUILD_COMPLEX==1
  1427. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1428. #endif
  1429. #if BUILD_COMPLEX16==1
  1430. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1431. #endif
  1432. #ifdef EXPRECISION
  1433. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1434. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1435. #endif
  1436. #endif
  1437. #ifdef DUNNINGTON
  1438. #ifdef DEBUG
  1439. fprintf(stderr, "Dunnington\n");
  1440. #endif
  1441. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1442. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1443. #endif
  1444. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1445. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1446. #endif
  1447. #if BUILD_COMPLEX==1
  1448. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1449. #endif
  1450. #if BUILD_COMPLEX16==1
  1451. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1452. #endif
  1453. #ifdef EXPRECISION
  1454. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1455. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1456. #endif
  1457. #endif
  1458. #ifdef NEHALEM
  1459. #ifdef DEBUG
  1460. fprintf(stderr, "Nehalem\n");
  1461. #endif
  1462. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1463. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1464. #endif
  1465. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1466. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1467. #endif
  1468. #if BUILD_COMPLEX
  1469. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1470. #endif
  1471. #if BUILD_COMPLEX16
  1472. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1473. #endif
  1474. #ifdef EXPRECISION
  1475. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1476. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1477. #endif
  1478. #endif
  1479. #ifdef SANDYBRIDGE
  1480. #ifdef DEBUG
  1481. fprintf(stderr, "Sandybridge\n");
  1482. #endif
  1483. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1484. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1485. #endif
  1486. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1487. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1488. #endif
  1489. #if BUILD_COMPLEX
  1490. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1491. #endif
  1492. #if BUILD_COMPLEX16
  1493. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1494. #endif
  1495. #ifdef EXPRECISION
  1496. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1497. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1498. #endif
  1499. #endif
  1500. #ifdef HASWELL
  1501. #ifdef DEBUG
  1502. fprintf(stderr, "Haswell\n");
  1503. #endif
  1504. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1505. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1506. #endif
  1507. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1508. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1509. #endif
  1510. #if BUILD_COMPLEX
  1511. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1512. #endif
  1513. #if BUILD_COMPLEX16
  1514. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1515. #endif
  1516. #ifdef EXPRECISION
  1517. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1518. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1519. #endif
  1520. #endif
  1521. #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
  1522. #ifdef DEBUG
  1523. fprintf(stderr, "SkylakeX\n");
  1524. #endif
  1525. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1526. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1527. #endif
  1528. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1529. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1530. #endif
  1531. #if BUILD_COMPLEX
  1532. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1533. #endif
  1534. #if BUILD_COMPLEX16
  1535. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1536. #endif
  1537. #ifdef EXPRECISION
  1538. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1539. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1540. #endif
  1541. #endif
  1542. #ifdef OPTERON
  1543. #ifdef DEBUG
  1544. fprintf(stderr, "Opteron\n");
  1545. #endif
  1546. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1547. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  1548. #endif
  1549. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1550. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  1551. #endif
  1552. #if BUILD_COMPLEX
  1553. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  1554. #endif
  1555. #if BUILD_COMPLEX16
  1556. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1557. #endif
  1558. #ifdef EXPRECISION
  1559. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1560. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1561. #endif
  1562. #endif
  1563. #ifdef BARCELONA
  1564. #ifdef DEBUG
  1565. fprintf(stderr, "Barcelona\n");
  1566. #endif
  1567. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1568. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1569. #endif
  1570. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1571. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1572. #endif
  1573. #if BUILD_COMPLEX
  1574. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1575. #endif
  1576. #if BUILD_COMPLEX16
  1577. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1578. #endif
  1579. #ifdef EXPRECISION
  1580. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1581. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1582. #endif
  1583. #endif
  1584. #ifdef BOBCAT
  1585. #ifdef DEBUG
  1586. fprintf(stderr, "Bobcate\n");
  1587. #endif
  1588. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1589. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1590. #endif
  1591. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1592. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1593. #endif
  1594. #if BUILD_COMPLEX
  1595. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1596. #endif
  1597. #if BUILD_COMPLEX16
  1598. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1599. #endif
  1600. #ifdef EXPRECISION
  1601. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1602. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1603. #endif
  1604. #endif
  1605. #ifdef BULLDOZER
  1606. #ifdef DEBUG
  1607. fprintf(stderr, "Bulldozer\n");
  1608. #endif
  1609. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1610. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1611. #endif
  1612. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1613. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1614. #endif
  1615. #if BUILD_COMPLEX
  1616. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1617. #endif
  1618. #if BUILD_COMPLEX16
  1619. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1620. #endif
  1621. #ifdef EXPRECISION
  1622. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1623. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1624. #endif
  1625. #endif
  1626. #ifdef EXCAVATOR
  1627. #ifdef DEBUG
  1628. fprintf(stderr, "Excavator\n");
  1629. #endif
  1630. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1631. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1632. #endif
  1633. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1634. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1635. #endif
  1636. #if BUILD_COMPLEX
  1637. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1638. #endif
  1639. #if BUILD_COMPLEX16
  1640. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1641. #endif
  1642. #ifdef EXPRECISION
  1643. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1644. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1645. #endif
  1646. #endif
  1647. #ifdef PILEDRIVER
  1648. #ifdef DEBUG
  1649. fprintf(stderr, "Piledriver\n");
  1650. #endif
  1651. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1652. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1653. #endif
  1654. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1655. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1656. #endif
  1657. #if BUILD_COMPLEX
  1658. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1659. #endif
  1660. #if BUILD_COMPLEX16
  1661. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1662. #endif
  1663. #ifdef EXPRECISION
  1664. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1665. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1666. #endif
  1667. #endif
  1668. #ifdef STEAMROLLER
  1669. #ifdef DEBUG
  1670. fprintf(stderr, "Steamroller\n");
  1671. #endif
  1672. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1673. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1674. #endif
  1675. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1676. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1677. #endif
  1678. #if BUILD_COMPLEX
  1679. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1680. #endif
  1681. #if BUILD_COMPLEX16
  1682. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1683. #endif
  1684. #ifdef EXPRECISION
  1685. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1686. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1687. #endif
  1688. #endif
  1689. #ifdef ZEN
  1690. #ifdef DEBUG
  1691. fprintf(stderr, "Zen\n");
  1692. #endif
  1693. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1694. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1695. #endif
  1696. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1697. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1698. #endif
  1699. #if BUILD_COMPLEX
  1700. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1701. #endif
  1702. #if BUILD_COMPLEX16
  1703. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1704. #endif
  1705. #ifdef EXPRECISION
  1706. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1707. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1708. #endif
  1709. #endif
  1710. #ifdef NANO
  1711. #ifdef DEBUG
  1712. fprintf(stderr, "NANO\n");
  1713. #endif
  1714. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1715. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1716. #endif
  1717. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1718. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1719. #endif
  1720. #if (BUILD_COMPLEX==1)
  1721. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1722. #endif
  1723. #if (BUILD_COMPLEX16==1)
  1724. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1725. #endif
  1726. #ifdef EXPRECISION
  1727. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1728. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1729. #endif
  1730. #endif
  1731. #ifdef SAPPHIRERAPIDS
  1732. #if (BUILD_BFLOAT16 == 1)
  1733. TABLE_NAME.need_amxtile_permission = 1;
  1734. #endif
  1735. #endif
  1736. #if BUILD_COMPLEX==1
  1737. #ifdef CGEMM3M_DEFAULT_P
  1738. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1739. #else
  1740. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1741. #endif
  1742. #endif
  1743. #if BUILD_COMPLEX16==1
  1744. #ifdef ZGEMM3M_DEFAULT_P
  1745. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1746. #else
  1747. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1748. #endif
  1749. #endif
  1750. #ifdef EXPRECISION
  1751. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1752. #endif
  1753. #if BUILD_SINGLE == 1
  1754. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1755. #endif
  1756. #if BUILD_DOUBLE== 1
  1757. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1758. #endif
  1759. #if BUILD_COMPLEX==1
  1760. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1761. #endif
  1762. #if BUILD_COMPLEX16==1
  1763. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1764. #endif
  1765. #if BUILD_COMPLEX==1
  1766. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1767. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1768. #else
  1769. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1770. #endif
  1771. #endif
  1772. #if BUILD_COMPLEX16==1
  1773. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1774. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1775. #else
  1776. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1777. #endif
  1778. #endif
  1779. #ifdef QUAD_PRECISION
  1780. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1781. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1782. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1783. #endif
  1784. #ifdef DEBUG
  1785. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1786. #endif
  1787. #if BUILD_BFLOAT16==1
  1788. TABLE_NAME.sbgemm_r = (((BUFFER_SIZE -
  1789. ((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA
  1790. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1791. ) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15);
  1792. TABLE_NAME.bgemm_r = (((BUFFER_SIZE -
  1793. ((TABLE_NAME.bgemm_p * TABLE_NAME.bgemm_q * 4 + TABLE_NAME.offsetA
  1794. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1795. ) / (TABLE_NAME.bgemm_q * 4) - 15) & ~15);
  1796. #endif
  1797. #if BUILD_HFLOAT16==1
  1798. TABLE_NAME.shgemm_r = (((BUFFER_SIZE -
  1799. ((TABLE_NAME.shgemm_p * TABLE_NAME.shgemm_q * 4 + TABLE_NAME.offsetA
  1800. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1801. ) / (TABLE_NAME.shgemm_q * 4) - 15) & ~15);
  1802. #endif
  1803. #if BUILD_SINGLE==1
  1804. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1805. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1806. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1807. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1808. #endif
  1809. #if BUILD_DOUBLE==1
  1810. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1811. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1812. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1813. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1814. #endif
  1815. #ifdef EXPRECISION
  1816. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1817. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1818. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1819. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1820. #endif
  1821. #if BUILD_COMPLEX ==1
  1822. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1823. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1824. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1825. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1826. #endif
  1827. #if BUILD_COMPLEX16 ==1
  1828. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1829. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1830. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1831. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1832. #endif
  1833. #if BUILD_COMPLEX == 1
  1834. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1835. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1836. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1837. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1838. #endif
  1839. #if BUILD_COMPLEX16 == 1
  1840. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1841. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1842. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1843. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1844. #endif
  1845. #ifdef EXPRECISION
  1846. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1847. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1848. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1849. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1850. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1851. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1852. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1853. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1854. #endif
  1855. }
  1856. #endif //RISCV64
  1857. #endif //POWER
  1858. #endif //ZARCH
  1859. #endif //(ARCH_LOONGARCH64)
  1860. #endif //(ARCH_MIPS64)
  1861. #endif //(ARCH_ARM64)