You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

param.h 52 kB

12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218
  1. /*****************************************************************************
  2. Copyright (c) 2011-2014, The OpenBLAS Project
  3. All rights reserved.
  4. Redistribution and use in source and binary forms, with or without
  5. modification, are permitted provided that the following conditions are
  6. met:
  7. 1. Redistributions of source code must retain the above copyright
  8. notice, this list of conditions and the following disclaimer.
  9. 2. Redistributions in binary form must reproduce the above copyright
  10. notice, this list of conditions and the following disclaimer in
  11. the documentation and/or other materials provided with the
  12. distribution.
  13. 3. Neither the name of the OpenBLAS project nor the names of
  14. its contributors may be used to endorse or promote products
  15. derived from this software without specific prior written
  16. permission.
  17. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  18. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  21. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  23. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  24. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  25. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  26. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27. **********************************************************************************/
  28. /*********************************************************************/
  29. /* Copyright 2009, 2010 The University of Texas at Austin. */
  30. /* All rights reserved. */
  31. /* */
  32. /* Redistribution and use in source and binary forms, with or */
  33. /* without modification, are permitted provided that the following */
  34. /* conditions are met: */
  35. /* */
  36. /* 1. Redistributions of source code must retain the above */
  37. /* copyright notice, this list of conditions and the following */
  38. /* disclaimer. */
  39. /* */
  40. /* 2. Redistributions in binary form must reproduce the above */
  41. /* copyright notice, this list of conditions and the following */
  42. /* disclaimer in the documentation and/or other materials */
  43. /* provided with the distribution. */
  44. /* */
  45. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  46. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  47. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  48. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  49. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  50. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  51. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  52. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  53. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  54. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  55. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  56. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  57. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  58. /* POSSIBILITY OF SUCH DAMAGE. */
  59. /* */
  60. /* The views and conclusions contained in the software and */
  61. /* documentation are those of the authors and should not be */
  62. /* interpreted as representing official policies, either expressed */
  63. /* or implied, of The University of Texas at Austin. */
  64. /*********************************************************************/
  65. #ifndef PARAM_H
  66. #define PARAM_H
  67. #ifdef OPTERON
  68. #define SNUMOPT 4
  69. #define DNUMOPT 2
  70. #define GEMM_DEFAULT_OFFSET_A 64
  71. #define GEMM_DEFAULT_OFFSET_B 256
  72. #define GEMM_DEFAULT_ALIGN 0x01ffffUL
  73. #define SGEMM_DEFAULT_UNROLL_N 4
  74. #define DGEMM_DEFAULT_UNROLL_N 4
  75. #define QGEMM_DEFAULT_UNROLL_N 2
  76. #define CGEMM_DEFAULT_UNROLL_N 2
  77. #define ZGEMM_DEFAULT_UNROLL_N 2
  78. #define XGEMM_DEFAULT_UNROLL_N 1
  79. #ifdef ARCH_X86
  80. #define SGEMM_DEFAULT_UNROLL_M 4
  81. #define DGEMM_DEFAULT_UNROLL_M 2
  82. #define QGEMM_DEFAULT_UNROLL_M 2
  83. #define CGEMM_DEFAULT_UNROLL_M 2
  84. #define ZGEMM_DEFAULT_UNROLL_M 1
  85. #define XGEMM_DEFAULT_UNROLL_M 1
  86. #else
  87. #define SGEMM_DEFAULT_UNROLL_M 8
  88. #define DGEMM_DEFAULT_UNROLL_M 4
  89. #define QGEMM_DEFAULT_UNROLL_M 2
  90. #define CGEMM_DEFAULT_UNROLL_M 4
  91. #define ZGEMM_DEFAULT_UNROLL_M 2
  92. #define XGEMM_DEFAULT_UNROLL_M 1
  93. #endif
  94. #define SGEMM_DEFAULT_P sgemm_p
  95. #define DGEMM_DEFAULT_P dgemm_p
  96. #define QGEMM_DEFAULT_P qgemm_p
  97. #define CGEMM_DEFAULT_P cgemm_p
  98. #define ZGEMM_DEFAULT_P zgemm_p
  99. #define XGEMM_DEFAULT_P xgemm_p
  100. #define SGEMM_DEFAULT_R sgemm_r
  101. #define DGEMM_DEFAULT_R dgemm_r
  102. #define QGEMM_DEFAULT_R qgemm_r
  103. #define CGEMM_DEFAULT_R cgemm_r
  104. #define ZGEMM_DEFAULT_R zgemm_r
  105. #define XGEMM_DEFAULT_R xgemm_r
  106. #ifdef ALLOC_HUGETLB
  107. #define SGEMM_DEFAULT_Q 248
  108. #define DGEMM_DEFAULT_Q 248
  109. #define QGEMM_DEFAULT_Q 248
  110. #define CGEMM_DEFAULT_Q 248
  111. #define ZGEMM_DEFAULT_Q 248
  112. #define XGEMM_DEFAULT_Q 248
  113. #else
  114. #define SGEMM_DEFAULT_Q 240
  115. #define DGEMM_DEFAULT_Q 240
  116. #define QGEMM_DEFAULT_Q 240
  117. #define CGEMM_DEFAULT_Q 240
  118. #define ZGEMM_DEFAULT_Q 240
  119. #define XGEMM_DEFAULT_Q 240
  120. #endif
  121. #define SYMV_P 16
  122. #define HAVE_EXCLUSIVE_CACHE
  123. #endif
  124. #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
  125. #define SNUMOPT 8
  126. #define DNUMOPT 4
  127. #define GEMM_DEFAULT_OFFSET_A 64
  128. #define GEMM_DEFAULT_OFFSET_B 832
  129. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  130. #define SGEMM_DEFAULT_UNROLL_N 4
  131. #define DGEMM_DEFAULT_UNROLL_N 4
  132. #define QGEMM_DEFAULT_UNROLL_N 2
  133. #define CGEMM_DEFAULT_UNROLL_N 2
  134. #define ZGEMM_DEFAULT_UNROLL_N 2
  135. #define XGEMM_DEFAULT_UNROLL_N 1
  136. #ifdef ARCH_X86
  137. #define SGEMM_DEFAULT_UNROLL_M 4
  138. #define DGEMM_DEFAULT_UNROLL_M 2
  139. #define QGEMM_DEFAULT_UNROLL_M 2
  140. #define CGEMM_DEFAULT_UNROLL_M 2
  141. #define ZGEMM_DEFAULT_UNROLL_M 1
  142. #define XGEMM_DEFAULT_UNROLL_M 1
  143. #else
  144. #define SGEMM_DEFAULT_UNROLL_M 8
  145. #define DGEMM_DEFAULT_UNROLL_M 4
  146. #define QGEMM_DEFAULT_UNROLL_M 2
  147. #define CGEMM_DEFAULT_UNROLL_M 4
  148. #define ZGEMM_DEFAULT_UNROLL_M 2
  149. #define XGEMM_DEFAULT_UNROLL_M 1
  150. #endif
  151. #if 0
  152. #define SGEMM_DEFAULT_P 496
  153. #define DGEMM_DEFAULT_P 248
  154. #define QGEMM_DEFAULT_P 124
  155. #define CGEMM_DEFAULT_P 248
  156. #define ZGEMM_DEFAULT_P 124
  157. #define XGEMM_DEFAULT_P 62
  158. #define SGEMM_DEFAULT_Q 248
  159. #define DGEMM_DEFAULT_Q 248
  160. #define QGEMM_DEFAULT_Q 248
  161. #define CGEMM_DEFAULT_Q 248
  162. #define ZGEMM_DEFAULT_Q 248
  163. #define XGEMM_DEFAULT_Q 248
  164. #else
  165. #define SGEMM_DEFAULT_P 448
  166. #define DGEMM_DEFAULT_P 224
  167. #define QGEMM_DEFAULT_P 112
  168. #define CGEMM_DEFAULT_P 224
  169. #define ZGEMM_DEFAULT_P 112
  170. #define XGEMM_DEFAULT_P 56
  171. #define SGEMM_DEFAULT_Q 224
  172. #define DGEMM_DEFAULT_Q 224
  173. #define QGEMM_DEFAULT_Q 224
  174. #define CGEMM_DEFAULT_Q 224
  175. #define ZGEMM_DEFAULT_Q 224
  176. #define XGEMM_DEFAULT_Q 224
  177. #endif
  178. #define SGEMM_DEFAULT_R sgemm_r
  179. #define QGEMM_DEFAULT_R qgemm_r
  180. #define DGEMM_DEFAULT_R dgemm_r
  181. #define CGEMM_DEFAULT_R cgemm_r
  182. #define ZGEMM_DEFAULT_R zgemm_r
  183. #define XGEMM_DEFAULT_R xgemm_r
  184. #define SYMV_P 16
  185. #define HAVE_EXCLUSIVE_CACHE
  186. #define GEMM_THREAD gemm_thread_mn
  187. #endif
  188. #ifdef BULLDOZER
  189. #define SNUMOPT 8
  190. #define DNUMOPT 4
  191. #define GEMM_DEFAULT_OFFSET_A 64
  192. #define GEMM_DEFAULT_OFFSET_B 832
  193. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  194. #define QGEMM_DEFAULT_UNROLL_N 2
  195. #define CGEMM_DEFAULT_UNROLL_N 2
  196. #define ZGEMM_DEFAULT_UNROLL_N 2
  197. #define XGEMM_DEFAULT_UNROLL_N 1
  198. #ifdef ARCH_X86
  199. #define SGEMM_DEFAULT_UNROLL_N 4
  200. #define DGEMM_DEFAULT_UNROLL_N 4
  201. #define SGEMM_DEFAULT_UNROLL_M 4
  202. #define DGEMM_DEFAULT_UNROLL_M 2
  203. #define QGEMM_DEFAULT_UNROLL_M 2
  204. #define CGEMM_DEFAULT_UNROLL_M 2
  205. #define ZGEMM_DEFAULT_UNROLL_M 1
  206. #define XGEMM_DEFAULT_UNROLL_M 1
  207. #else
  208. #define SGEMM_DEFAULT_UNROLL_N 2
  209. #define DGEMM_DEFAULT_UNROLL_N 2
  210. #define SGEMM_DEFAULT_UNROLL_M 16
  211. #define DGEMM_DEFAULT_UNROLL_M 8
  212. #define QGEMM_DEFAULT_UNROLL_M 2
  213. #define CGEMM_DEFAULT_UNROLL_M 4
  214. #define ZGEMM_DEFAULT_UNROLL_M 2
  215. #define XGEMM_DEFAULT_UNROLL_M 1
  216. #define CGEMM3M_DEFAULT_UNROLL_N 4
  217. #define CGEMM3M_DEFAULT_UNROLL_M 8
  218. #define ZGEMM3M_DEFAULT_UNROLL_N 4
  219. #define ZGEMM3M_DEFAULT_UNROLL_M 4
  220. #define GEMV_UNROLL 8
  221. #endif
  222. #if defined(ARCH_X86_64)
  223. #define SGEMM_DEFAULT_P 768
  224. #define DGEMM_DEFAULT_P 384
  225. #else
  226. #define SGEMM_DEFAULT_P 448
  227. #define DGEMM_DEFAULT_P 224
  228. #endif
  229. #define QGEMM_DEFAULT_P 112
  230. #define CGEMM_DEFAULT_P 224
  231. #define ZGEMM_DEFAULT_P 112
  232. #define XGEMM_DEFAULT_P 56
  233. #if defined(ARCH_X86_64)
  234. #define SGEMM_DEFAULT_Q 168
  235. #define DGEMM_DEFAULT_Q 168
  236. #else
  237. #define SGEMM_DEFAULT_Q 224
  238. #define DGEMM_DEFAULT_Q 224
  239. #endif
  240. #define QGEMM_DEFAULT_Q 224
  241. #define CGEMM_DEFAULT_Q 224
  242. #define ZGEMM_DEFAULT_Q 224
  243. #define XGEMM_DEFAULT_Q 224
  244. #define CGEMM3M_DEFAULT_P 448
  245. #define ZGEMM3M_DEFAULT_P 224
  246. #define XGEMM3M_DEFAULT_P 112
  247. #define CGEMM3M_DEFAULT_Q 224
  248. #define ZGEMM3M_DEFAULT_Q 224
  249. #define XGEMM3M_DEFAULT_Q 224
  250. #define CGEMM3M_DEFAULT_R 12288
  251. #define ZGEMM3M_DEFAULT_R 12288
  252. #define XGEMM3M_DEFAULT_R 12288
  253. #define SGEMM_DEFAULT_R sgemm_r
  254. #define QGEMM_DEFAULT_R qgemm_r
  255. #define DGEMM_DEFAULT_R dgemm_r
  256. #define CGEMM_DEFAULT_R cgemm_r
  257. #define ZGEMM_DEFAULT_R zgemm_r
  258. #define XGEMM_DEFAULT_R xgemm_r
  259. #define SYMV_P 16
  260. #define HAVE_EXCLUSIVE_CACHE
  261. #define GEMM_THREAD gemm_thread_mn
  262. #endif
  263. #ifdef PILEDRIVER
  264. #define SNUMOPT 8
  265. #define DNUMOPT 4
  266. #define GEMM_DEFAULT_OFFSET_A 64
  267. #define GEMM_DEFAULT_OFFSET_B 832
  268. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  269. #define QGEMM_DEFAULT_UNROLL_N 2
  270. #define CGEMM_DEFAULT_UNROLL_N 2
  271. #define ZGEMM_DEFAULT_UNROLL_N 2
  272. #define XGEMM_DEFAULT_UNROLL_N 1
  273. #ifdef ARCH_X86
  274. #define SGEMM_DEFAULT_UNROLL_N 4
  275. #define DGEMM_DEFAULT_UNROLL_N 4
  276. #define SGEMM_DEFAULT_UNROLL_M 4
  277. #define DGEMM_DEFAULT_UNROLL_M 2
  278. #define QGEMM_DEFAULT_UNROLL_M 2
  279. #define CGEMM_DEFAULT_UNROLL_M 2
  280. #define ZGEMM_DEFAULT_UNROLL_M 1
  281. #define XGEMM_DEFAULT_UNROLL_M 1
  282. #else
  283. #define SGEMM_DEFAULT_UNROLL_N 2
  284. #define DGEMM_DEFAULT_UNROLL_N 2
  285. #define SGEMM_DEFAULT_UNROLL_M 16
  286. #define DGEMM_DEFAULT_UNROLL_M 8
  287. #define QGEMM_DEFAULT_UNROLL_M 2
  288. #define CGEMM_DEFAULT_UNROLL_M 4
  289. #define ZGEMM_DEFAULT_UNROLL_M 2
  290. #define XGEMM_DEFAULT_UNROLL_M 1
  291. #define CGEMM3M_DEFAULT_UNROLL_N 4
  292. #define CGEMM3M_DEFAULT_UNROLL_M 8
  293. #define ZGEMM3M_DEFAULT_UNROLL_N 4
  294. #define ZGEMM3M_DEFAULT_UNROLL_M 4
  295. #define GEMV_UNROLL 8
  296. #endif
  297. #if defined(ARCH_X86_64)
  298. #define SGEMM_DEFAULT_P 768
  299. #define DGEMM_DEFAULT_P 768
  300. #define ZGEMM_DEFAULT_P 384
  301. #define CGEMM_DEFAULT_P 768
  302. #else
  303. #define SGEMM_DEFAULT_P 448
  304. #define DGEMM_DEFAULT_P 480
  305. #define ZGEMM_DEFAULT_P 112
  306. #define CGEMM_DEFAULT_P 224
  307. #endif
  308. #define QGEMM_DEFAULT_P 112
  309. #define XGEMM_DEFAULT_P 56
  310. #if defined(ARCH_X86_64)
  311. #define SGEMM_DEFAULT_Q 192
  312. #define DGEMM_DEFAULT_Q 168
  313. #define ZGEMM_DEFAULT_Q 168
  314. #define CGEMM_DEFAULT_Q 168
  315. #else
  316. #define SGEMM_DEFAULT_Q 224
  317. #define DGEMM_DEFAULT_Q 224
  318. #define ZGEMM_DEFAULT_Q 224
  319. #define CGEMM_DEFAULT_Q 224
  320. #endif
  321. #define QGEMM_DEFAULT_Q 224
  322. #define XGEMM_DEFAULT_Q 224
  323. #define CGEMM3M_DEFAULT_P 448
  324. #define ZGEMM3M_DEFAULT_P 224
  325. #define XGEMM3M_DEFAULT_P 112
  326. #define CGEMM3M_DEFAULT_Q 224
  327. #define ZGEMM3M_DEFAULT_Q 224
  328. #define XGEMM3M_DEFAULT_Q 224
  329. #define CGEMM3M_DEFAULT_R 12288
  330. #define ZGEMM3M_DEFAULT_R 12288
  331. #define XGEMM3M_DEFAULT_R 12288
  332. #define SGEMM_DEFAULT_R 12288
  333. #define QGEMM_DEFAULT_R qgemm_r
  334. #define DGEMM_DEFAULT_R 12288
  335. #define CGEMM_DEFAULT_R cgemm_r
  336. #define ZGEMM_DEFAULT_R zgemm_r
  337. #define XGEMM_DEFAULT_R xgemm_r
  338. #define SYMV_P 16
  339. #define HAVE_EXCLUSIVE_CACHE
  340. #define GEMM_THREAD gemm_thread_mn
  341. #endif
  342. #ifdef ATHLON
  343. #define SNUMOPT 4
  344. #define DNUMOPT 2
  345. #define GEMM_DEFAULT_OFFSET_A 0
  346. #define GEMM_DEFAULT_OFFSET_B 384
  347. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  348. #define SGEMM_DEFAULT_UNROLL_N 4
  349. #define DGEMM_DEFAULT_UNROLL_N 4
  350. #define QGEMM_DEFAULT_UNROLL_N 2
  351. #define CGEMM_DEFAULT_UNROLL_N 2
  352. #define ZGEMM_DEFAULT_UNROLL_N 2
  353. #define XGEMM_DEFAULT_UNROLL_N 1
  354. #define SGEMM_DEFAULT_UNROLL_M 2
  355. #define DGEMM_DEFAULT_UNROLL_M 1
  356. #define QGEMM_DEFAULT_UNROLL_M 2
  357. #define CGEMM_DEFAULT_UNROLL_M 1
  358. #define ZGEMM_DEFAULT_UNROLL_M 1
  359. #define XGEMM_DEFAULT_UNROLL_M 1
  360. #define SGEMM_DEFAULT_R sgemm_r
  361. #define DGEMM_DEFAULT_R dgemm_r
  362. #define QGEMM_DEFAULT_R qgemm_r
  363. #define CGEMM_DEFAULT_R cgemm_r
  364. #define ZGEMM_DEFAULT_R zgemm_r
  365. #define XGEMM_DEFAULT_R xgemm_r
  366. #define SGEMM_DEFAULT_P 208
  367. #define DGEMM_DEFAULT_P 104
  368. #define QGEMM_DEFAULT_P 56
  369. #define CGEMM_DEFAULT_P 104
  370. #define ZGEMM_DEFAULT_P 56
  371. #define XGEMM_DEFAULT_P 28
  372. #define SGEMM_DEFAULT_Q 208
  373. #define DGEMM_DEFAULT_Q 208
  374. #define QGEMM_DEFAULT_Q 208
  375. #define CGEMM_DEFAULT_Q 208
  376. #define ZGEMM_DEFAULT_Q 208
  377. #define XGEMM_DEFAULT_Q 208
  378. #define SYMV_P 16
  379. #define HAVE_EXCLUSIVE_CACHE
  380. #endif
  381. #ifdef VIAC3
  382. #define SNUMOPT 2
  383. #define DNUMOPT 1
  384. #define GEMM_DEFAULT_OFFSET_A 0
  385. #define GEMM_DEFAULT_OFFSET_B 256
  386. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  387. #define SGEMM_DEFAULT_UNROLL_N 4
  388. #define DGEMM_DEFAULT_UNROLL_N 4
  389. #define QGEMM_DEFAULT_UNROLL_N 2
  390. #define CGEMM_DEFAULT_UNROLL_N 2
  391. #define ZGEMM_DEFAULT_UNROLL_N 2
  392. #define XGEMM_DEFAULT_UNROLL_N 1
  393. #define SGEMM_DEFAULT_UNROLL_M 2
  394. #define DGEMM_DEFAULT_UNROLL_M 1
  395. #define QGEMM_DEFAULT_UNROLL_M 2
  396. #define CGEMM_DEFAULT_UNROLL_M 1
  397. #define ZGEMM_DEFAULT_UNROLL_M 1
  398. #define XGEMM_DEFAULT_UNROLL_M 1
  399. #define SGEMM_DEFAULT_R sgemm_r
  400. #define DGEMM_DEFAULT_R dgemm_r
  401. #define QGEMM_DEFAULT_R qgemm_r
  402. #define CGEMM_DEFAULT_R cgemm_r
  403. #define ZGEMM_DEFAULT_R zgemm_r
  404. #define XGEMM_DEFAULT_R xgemm_r
  405. #define SGEMM_DEFAULT_P 128
  406. #define DGEMM_DEFAULT_P 128
  407. #define QGEMM_DEFAULT_P 128
  408. #define CGEMM_DEFAULT_P 128
  409. #define ZGEMM_DEFAULT_P 128
  410. #define XGEMM_DEFAULT_P 128
  411. #define SGEMM_DEFAULT_Q 512
  412. #define DGEMM_DEFAULT_Q 256
  413. #define QGEMM_DEFAULT_Q 256
  414. #define CGEMM_DEFAULT_Q 256
  415. #define ZGEMM_DEFAULT_Q 128
  416. #define XGEMM_DEFAULT_Q 128
  417. #define SYMV_P 16
  418. #endif
  419. #ifdef NANO
  420. #define SNUMOPT 4
  421. #define DNUMOPT 2
  422. #define GEMM_DEFAULT_OFFSET_A 64
  423. #define GEMM_DEFAULT_OFFSET_B 256
  424. #define GEMM_DEFAULT_ALIGN 0x01ffffUL
  425. #ifdef ARCH_X86
  426. #define SGEMM_DEFAULT_UNROLL_N 4
  427. #define DGEMM_DEFAULT_UNROLL_N 4
  428. #define QGEMM_DEFAULT_UNROLL_N 2
  429. #define CGEMM_DEFAULT_UNROLL_N 2
  430. #define ZGEMM_DEFAULT_UNROLL_N 2
  431. #define XGEMM_DEFAULT_UNROLL_N 1
  432. #define SGEMM_DEFAULT_UNROLL_M 4
  433. #define DGEMM_DEFAULT_UNROLL_M 2
  434. #define QGEMM_DEFAULT_UNROLL_M 2
  435. #define CGEMM_DEFAULT_UNROLL_M 2
  436. #define ZGEMM_DEFAULT_UNROLL_M 1
  437. #define XGEMM_DEFAULT_UNROLL_M 1
  438. #else
  439. #define SGEMM_DEFAULT_UNROLL_N 8
  440. #define DGEMM_DEFAULT_UNROLL_N 4
  441. #define QGEMM_DEFAULT_UNROLL_N 2
  442. #define CGEMM_DEFAULT_UNROLL_N 4
  443. #define ZGEMM_DEFAULT_UNROLL_N 2
  444. #define XGEMM_DEFAULT_UNROLL_N 1
  445. #define SGEMM_DEFAULT_UNROLL_M 4
  446. #define DGEMM_DEFAULT_UNROLL_M 4
  447. #define QGEMM_DEFAULT_UNROLL_M 2
  448. #define CGEMM_DEFAULT_UNROLL_M 2
  449. #define ZGEMM_DEFAULT_UNROLL_M 2
  450. #define XGEMM_DEFAULT_UNROLL_M 1
  451. #endif
  452. #define SGEMM_DEFAULT_P 288
  453. #define DGEMM_DEFAULT_P 288
  454. #define QGEMM_DEFAULT_P 288
  455. #define CGEMM_DEFAULT_P 288
  456. #define ZGEMM_DEFAULT_P 288
  457. #define XGEMM_DEFAULT_P 288
  458. #define SGEMM_DEFAULT_R sgemm_r
  459. #define DGEMM_DEFAULT_R dgemm_r
  460. #define QGEMM_DEFAULT_R qgemm_r
  461. #define CGEMM_DEFAULT_R cgemm_r
  462. #define ZGEMM_DEFAULT_R zgemm_r
  463. #define XGEMM_DEFAULT_R xgemm_r
  464. #define SGEMM_DEFAULT_Q 256
  465. #define DGEMM_DEFAULT_Q 128
  466. #define QGEMM_DEFAULT_Q 64
  467. #define CGEMM_DEFAULT_Q 128
  468. #define ZGEMM_DEFAULT_Q 64
  469. #define XGEMM_DEFAULT_Q 32
  470. #define SYMV_P 16
  471. #define HAVE_EXCLUSIVE_CACHE
  472. #endif
  473. #if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3)
  474. #ifdef HAVE_SSE
  475. #define SNUMOPT 2
  476. #else
  477. #define SNUMOPT 1
  478. #endif
  479. #define DNUMOPT 1
  480. #define GEMM_DEFAULT_OFFSET_A 0
  481. #define GEMM_DEFAULT_OFFSET_B 0
  482. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  483. #ifdef HAVE_SSE
  484. #define SGEMM_DEFAULT_UNROLL_M 8
  485. #define CGEMM_DEFAULT_UNROLL_M 4
  486. #else
  487. #define SGEMM_DEFAULT_UNROLL_M 4
  488. #define CGEMM_DEFAULT_UNROLL_M 2
  489. #endif
  490. #define DGEMM_DEFAULT_UNROLL_M 2
  491. #define SGEMM_DEFAULT_UNROLL_N 2
  492. #define DGEMM_DEFAULT_UNROLL_N 2
  493. #define QGEMM_DEFAULT_UNROLL_M 2
  494. #define QGEMM_DEFAULT_UNROLL_N 2
  495. #define CGEMM_DEFAULT_UNROLL_N 1
  496. #define ZGEMM_DEFAULT_UNROLL_M 1
  497. #define ZGEMM_DEFAULT_UNROLL_N 1
  498. #define XGEMM_DEFAULT_UNROLL_M 1
  499. #define XGEMM_DEFAULT_UNROLL_N 1
  500. #define SGEMM_DEFAULT_P sgemm_p
  501. #define SGEMM_DEFAULT_Q 256
  502. #define SGEMM_DEFAULT_R sgemm_r
  503. #define DGEMM_DEFAULT_P dgemm_p
  504. #define DGEMM_DEFAULT_Q 256
  505. #define DGEMM_DEFAULT_R dgemm_r
  506. #define QGEMM_DEFAULT_P qgemm_p
  507. #define QGEMM_DEFAULT_Q 256
  508. #define QGEMM_DEFAULT_R qgemm_r
  509. #define CGEMM_DEFAULT_P cgemm_p
  510. #define CGEMM_DEFAULT_Q 256
  511. #define CGEMM_DEFAULT_R cgemm_r
  512. #define ZGEMM_DEFAULT_P zgemm_p
  513. #define ZGEMM_DEFAULT_Q 256
  514. #define ZGEMM_DEFAULT_R zgemm_r
  515. #define XGEMM_DEFAULT_P xgemm_p
  516. #define XGEMM_DEFAULT_Q 256
  517. #define XGEMM_DEFAULT_R xgemm_r
  518. #define SYMV_P 4
  519. #endif
  520. #ifdef PENTIUMM
  521. #define SNUMOPT 2
  522. #define DNUMOPT 1
  523. #define GEMM_DEFAULT_OFFSET_A 0
  524. #define GEMM_DEFAULT_OFFSET_B 0
  525. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  526. #ifdef CORE_YONAH
  527. #define SGEMM_DEFAULT_UNROLL_M 4
  528. #define SGEMM_DEFAULT_UNROLL_N 4
  529. #define DGEMM_DEFAULT_UNROLL_M 2
  530. #define DGEMM_DEFAULT_UNROLL_N 4
  531. #define QGEMM_DEFAULT_UNROLL_M 2
  532. #define QGEMM_DEFAULT_UNROLL_N 2
  533. #define CGEMM_DEFAULT_UNROLL_M 2
  534. #define CGEMM_DEFAULT_UNROLL_N 2
  535. #define ZGEMM_DEFAULT_UNROLL_M 1
  536. #define ZGEMM_DEFAULT_UNROLL_N 2
  537. #define XGEMM_DEFAULT_UNROLL_M 1
  538. #define XGEMM_DEFAULT_UNROLL_N 1
  539. #else
  540. #define SGEMM_DEFAULT_UNROLL_M 8
  541. #define SGEMM_DEFAULT_UNROLL_N 2
  542. #define DGEMM_DEFAULT_UNROLL_M 2
  543. #define DGEMM_DEFAULT_UNROLL_N 2
  544. #define QGEMM_DEFAULT_UNROLL_M 2
  545. #define QGEMM_DEFAULT_UNROLL_N 2
  546. #define CGEMM_DEFAULT_UNROLL_M 4
  547. #define CGEMM_DEFAULT_UNROLL_N 1
  548. #define ZGEMM_DEFAULT_UNROLL_M 1
  549. #define ZGEMM_DEFAULT_UNROLL_N 1
  550. #define XGEMM_DEFAULT_UNROLL_M 1
  551. #define XGEMM_DEFAULT_UNROLL_N 1
  552. #endif
  553. #define SGEMM_DEFAULT_P sgemm_p
  554. #define SGEMM_DEFAULT_Q 256
  555. #define SGEMM_DEFAULT_R sgemm_r
  556. #define DGEMM_DEFAULT_P dgemm_p
  557. #define DGEMM_DEFAULT_Q 256
  558. #define DGEMM_DEFAULT_R dgemm_r
  559. #define QGEMM_DEFAULT_P qgemm_p
  560. #define QGEMM_DEFAULT_Q 256
  561. #define QGEMM_DEFAULT_R qgemm_r
  562. #define CGEMM_DEFAULT_P cgemm_p
  563. #define CGEMM_DEFAULT_Q 256
  564. #define CGEMM_DEFAULT_R cgemm_r
  565. #define ZGEMM_DEFAULT_P zgemm_p
  566. #define ZGEMM_DEFAULT_Q 256
  567. #define ZGEMM_DEFAULT_R zgemm_r
  568. #define XGEMM_DEFAULT_P xgemm_p
  569. #define XGEMM_DEFAULT_Q 256
  570. #define XGEMM_DEFAULT_R xgemm_r
  571. #define SYMV_P 4
  572. #endif
  573. #ifdef CORE_NORTHWOOD
  574. #define SNUMOPT 4
  575. #define DNUMOPT 2
  576. #define GEMM_DEFAULT_OFFSET_A 0
  577. #define GEMM_DEFAULT_OFFSET_B 32
  578. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  579. #define SYMV_P 8
  580. #define SGEMM_DEFAULT_UNROLL_M 8
  581. #define DGEMM_DEFAULT_UNROLL_M 4
  582. #define QGEMM_DEFAULT_UNROLL_M 2
  583. #define CGEMM_DEFAULT_UNROLL_M 4
  584. #define ZGEMM_DEFAULT_UNROLL_M 2
  585. #define XGEMM_DEFAULT_UNROLL_M 1
  586. #define SGEMM_DEFAULT_UNROLL_N 2
  587. #define DGEMM_DEFAULT_UNROLL_N 2
  588. #define QGEMM_DEFAULT_UNROLL_N 2
  589. #define CGEMM_DEFAULT_UNROLL_N 1
  590. #define ZGEMM_DEFAULT_UNROLL_N 1
  591. #define XGEMM_DEFAULT_UNROLL_N 1
  592. #define SGEMM_DEFAULT_P sgemm_p
  593. #define SGEMM_DEFAULT_R sgemm_r
  594. #define DGEMM_DEFAULT_P dgemm_p
  595. #define DGEMM_DEFAULT_R dgemm_r
  596. #define QGEMM_DEFAULT_P qgemm_p
  597. #define QGEMM_DEFAULT_R qgemm_r
  598. #define CGEMM_DEFAULT_P cgemm_p
  599. #define CGEMM_DEFAULT_R cgemm_r
  600. #define ZGEMM_DEFAULT_P zgemm_p
  601. #define ZGEMM_DEFAULT_R zgemm_r
  602. #define XGEMM_DEFAULT_P xgemm_p
  603. #define XGEMM_DEFAULT_R xgemm_r
  604. #define SGEMM_DEFAULT_Q 128
  605. #define DGEMM_DEFAULT_Q 128
  606. #define QGEMM_DEFAULT_Q 128
  607. #define CGEMM_DEFAULT_Q 128
  608. #define ZGEMM_DEFAULT_Q 128
  609. #define XGEMM_DEFAULT_Q 128
  610. #endif
  611. #ifdef CORE_PRESCOTT
  612. #define SNUMOPT 4
  613. #define DNUMOPT 2
  614. #ifndef __64BIT__
  615. #define GEMM_DEFAULT_OFFSET_A 128
  616. #define GEMM_DEFAULT_OFFSET_B 192
  617. #else
  618. #define GEMM_DEFAULT_OFFSET_A 0
  619. #define GEMM_DEFAULT_OFFSET_B 256
  620. #endif
  621. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  622. #define SYMV_P 8
  623. #ifdef ARCH_X86
  624. #define SGEMM_DEFAULT_UNROLL_M 4
  625. #define DGEMM_DEFAULT_UNROLL_M 2
  626. #define QGEMM_DEFAULT_UNROLL_M 2
  627. #define CGEMM_DEFAULT_UNROLL_M 2
  628. #define ZGEMM_DEFAULT_UNROLL_M 1
  629. #define XGEMM_DEFAULT_UNROLL_M 1
  630. #else
  631. #define SGEMM_DEFAULT_UNROLL_M 8
  632. #define DGEMM_DEFAULT_UNROLL_M 4
  633. #define QGEMM_DEFAULT_UNROLL_M 2
  634. #define CGEMM_DEFAULT_UNROLL_M 4
  635. #define ZGEMM_DEFAULT_UNROLL_M 2
  636. #define XGEMM_DEFAULT_UNROLL_M 1
  637. #endif
  638. #define SGEMM_DEFAULT_UNROLL_N 4
  639. #define DGEMM_DEFAULT_UNROLL_N 4
  640. #define QGEMM_DEFAULT_UNROLL_N 2
  641. #define CGEMM_DEFAULT_UNROLL_N 2
  642. #define ZGEMM_DEFAULT_UNROLL_N 2
  643. #define XGEMM_DEFAULT_UNROLL_N 1
  644. #define SGEMM_DEFAULT_P sgemm_p
  645. #define SGEMM_DEFAULT_R sgemm_r
  646. #define DGEMM_DEFAULT_P dgemm_p
  647. #define DGEMM_DEFAULT_R dgemm_r
  648. #define QGEMM_DEFAULT_P qgemm_p
  649. #define QGEMM_DEFAULT_R qgemm_r
  650. #define CGEMM_DEFAULT_P cgemm_p
  651. #define CGEMM_DEFAULT_R cgemm_r
  652. #define ZGEMM_DEFAULT_P zgemm_p
  653. #define ZGEMM_DEFAULT_R zgemm_r
  654. #define XGEMM_DEFAULT_P xgemm_p
  655. #define XGEMM_DEFAULT_R xgemm_r
  656. #define SGEMM_DEFAULT_Q 128
  657. #define DGEMM_DEFAULT_Q 128
  658. #define QGEMM_DEFAULT_Q 128
  659. #define CGEMM_DEFAULT_Q 128
  660. #define ZGEMM_DEFAULT_Q 128
  661. #define XGEMM_DEFAULT_Q 128
  662. #endif
  663. #ifdef CORE2
  664. #define SNUMOPT 8
  665. #define DNUMOPT 4
  666. #define GEMM_DEFAULT_OFFSET_A 448
  667. #define GEMM_DEFAULT_OFFSET_B 128
  668. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  669. #define SYMV_P 8
  670. #define SWITCH_RATIO 4
  671. #ifdef ARCH_X86
  672. #define SGEMM_DEFAULT_UNROLL_M 8
  673. #define DGEMM_DEFAULT_UNROLL_M 4
  674. #define QGEMM_DEFAULT_UNROLL_M 2
  675. #define CGEMM_DEFAULT_UNROLL_M 4
  676. #define ZGEMM_DEFAULT_UNROLL_M 2
  677. #define XGEMM_DEFAULT_UNROLL_M 1
  678. #define SGEMM_DEFAULT_UNROLL_N 2
  679. #define DGEMM_DEFAULT_UNROLL_N 2
  680. #define QGEMM_DEFAULT_UNROLL_N 2
  681. #define CGEMM_DEFAULT_UNROLL_N 1
  682. #define ZGEMM_DEFAULT_UNROLL_N 1
  683. #define XGEMM_DEFAULT_UNROLL_N 1
  684. #define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b))
  685. #else
  686. #define SGEMM_DEFAULT_UNROLL_M 8
  687. #define DGEMM_DEFAULT_UNROLL_M 4
  688. #define QGEMM_DEFAULT_UNROLL_M 2
  689. #define CGEMM_DEFAULT_UNROLL_M 4
  690. #define ZGEMM_DEFAULT_UNROLL_M 2
  691. #define XGEMM_DEFAULT_UNROLL_M 1
  692. #define SGEMM_DEFAULT_UNROLL_N 4
  693. #define DGEMM_DEFAULT_UNROLL_N 4
  694. #define QGEMM_DEFAULT_UNROLL_N 2
  695. #define CGEMM_DEFAULT_UNROLL_N 2
  696. #define ZGEMM_DEFAULT_UNROLL_N 2
  697. #define XGEMM_DEFAULT_UNROLL_N 1
  698. #endif
  699. #define SGEMM_DEFAULT_P sgemm_p
  700. #define SGEMM_DEFAULT_R sgemm_r
  701. #define DGEMM_DEFAULT_P dgemm_p
  702. #define DGEMM_DEFAULT_R dgemm_r
  703. #define QGEMM_DEFAULT_P qgemm_p
  704. #define QGEMM_DEFAULT_R qgemm_r
  705. #define CGEMM_DEFAULT_P cgemm_p
  706. #define CGEMM_DEFAULT_R cgemm_r
  707. #define ZGEMM_DEFAULT_P zgemm_p
  708. #define ZGEMM_DEFAULT_R zgemm_r
  709. #define XGEMM_DEFAULT_P xgemm_p
  710. #define XGEMM_DEFAULT_R xgemm_r
  711. #define SGEMM_DEFAULT_Q 256
  712. #define DGEMM_DEFAULT_Q 256
  713. #define QGEMM_DEFAULT_Q 256
  714. #define CGEMM_DEFAULT_Q 256
  715. #define ZGEMM_DEFAULT_Q 256
  716. #define XGEMM_DEFAULT_Q 256
  717. #endif
  718. #ifdef PENRYN
  719. #define SNUMOPT 8
  720. #define DNUMOPT 4
  721. #define GEMM_DEFAULT_OFFSET_A 128
  722. #define GEMM_DEFAULT_OFFSET_B 0
  723. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  724. #define SYMV_P 8
  725. #define SWITCH_RATIO 4
  726. #ifdef ARCH_X86
  727. #define SGEMM_DEFAULT_UNROLL_M 4
  728. #define DGEMM_DEFAULT_UNROLL_M 2
  729. #define QGEMM_DEFAULT_UNROLL_M 2
  730. #define CGEMM_DEFAULT_UNROLL_M 2
  731. #define ZGEMM_DEFAULT_UNROLL_M 1
  732. #define XGEMM_DEFAULT_UNROLL_M 1
  733. #define SGEMM_DEFAULT_UNROLL_N 4
  734. #define DGEMM_DEFAULT_UNROLL_N 4
  735. #define QGEMM_DEFAULT_UNROLL_N 2
  736. #define CGEMM_DEFAULT_UNROLL_N 2
  737. #define ZGEMM_DEFAULT_UNROLL_N 2
  738. #define XGEMM_DEFAULT_UNROLL_N 1
  739. #else
  740. #define SGEMM_DEFAULT_UNROLL_M 8
  741. #define DGEMM_DEFAULT_UNROLL_M 4
  742. #define QGEMM_DEFAULT_UNROLL_M 2
  743. #define CGEMM_DEFAULT_UNROLL_M 4
  744. #define ZGEMM_DEFAULT_UNROLL_M 2
  745. #define XGEMM_DEFAULT_UNROLL_M 1
  746. #define SGEMM_DEFAULT_UNROLL_N 4
  747. #define DGEMM_DEFAULT_UNROLL_N 4
  748. #define QGEMM_DEFAULT_UNROLL_N 2
  749. #define CGEMM_DEFAULT_UNROLL_N 2
  750. #define ZGEMM_DEFAULT_UNROLL_N 2
  751. #define XGEMM_DEFAULT_UNROLL_N 1
  752. #endif
  753. #define SGEMM_DEFAULT_P sgemm_p
  754. #define SGEMM_DEFAULT_R sgemm_r
  755. #define DGEMM_DEFAULT_P dgemm_p
  756. #define DGEMM_DEFAULT_R dgemm_r
  757. #define QGEMM_DEFAULT_P qgemm_p
  758. #define QGEMM_DEFAULT_R qgemm_r
  759. #define CGEMM_DEFAULT_P cgemm_p
  760. #define CGEMM_DEFAULT_R cgemm_r
  761. #define ZGEMM_DEFAULT_P zgemm_p
  762. #define ZGEMM_DEFAULT_R zgemm_r
  763. #define XGEMM_DEFAULT_P xgemm_p
  764. #define XGEMM_DEFAULT_R xgemm_r
  765. #define SGEMM_DEFAULT_Q 512
  766. #define DGEMM_DEFAULT_Q 256
  767. #define QGEMM_DEFAULT_Q 128
  768. #define CGEMM_DEFAULT_Q 512
  769. #define ZGEMM_DEFAULT_Q 256
  770. #define XGEMM_DEFAULT_Q 128
  771. #define GETRF_FACTOR 0.75
  772. #endif
  773. #ifdef DUNNINGTON
  774. #define SNUMOPT 8
  775. #define DNUMOPT 4
  776. #define GEMM_DEFAULT_OFFSET_A 128
  777. #define GEMM_DEFAULT_OFFSET_B 0
  778. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  779. #define SYMV_P 8
  780. #define SWITCH_RATIO 4
  781. #ifdef ARCH_X86
  782. #define SGEMM_DEFAULT_UNROLL_M 4
  783. #define DGEMM_DEFAULT_UNROLL_M 2
  784. #define QGEMM_DEFAULT_UNROLL_M 2
  785. #define CGEMM_DEFAULT_UNROLL_M 2
  786. #define ZGEMM_DEFAULT_UNROLL_M 1
  787. #define XGEMM_DEFAULT_UNROLL_M 1
  788. #define SGEMM_DEFAULT_UNROLL_N 4
  789. #define DGEMM_DEFAULT_UNROLL_N 4
  790. #define QGEMM_DEFAULT_UNROLL_N 2
  791. #define CGEMM_DEFAULT_UNROLL_N 2
  792. #define ZGEMM_DEFAULT_UNROLL_N 2
  793. #define XGEMM_DEFAULT_UNROLL_N 1
  794. #else
  795. #define SGEMM_DEFAULT_UNROLL_M 8
  796. #define DGEMM_DEFAULT_UNROLL_M 4
  797. #define QGEMM_DEFAULT_UNROLL_M 2
  798. #define CGEMM_DEFAULT_UNROLL_M 4
  799. #define ZGEMM_DEFAULT_UNROLL_M 2
  800. #define XGEMM_DEFAULT_UNROLL_M 1
  801. #define SGEMM_DEFAULT_UNROLL_N 4
  802. #define DGEMM_DEFAULT_UNROLL_N 4
  803. #define QGEMM_DEFAULT_UNROLL_N 2
  804. #define CGEMM_DEFAULT_UNROLL_N 2
  805. #define ZGEMM_DEFAULT_UNROLL_N 2
  806. #define XGEMM_DEFAULT_UNROLL_N 1
  807. #endif
  808. #define SGEMM_DEFAULT_P sgemm_p
  809. #define SGEMM_DEFAULT_R sgemm_r
  810. #define DGEMM_DEFAULT_P dgemm_p
  811. #define DGEMM_DEFAULT_R dgemm_r
  812. #define QGEMM_DEFAULT_P qgemm_p
  813. #define QGEMM_DEFAULT_R qgemm_r
  814. #define CGEMM_DEFAULT_P cgemm_p
  815. #define CGEMM_DEFAULT_R cgemm_r
  816. #define ZGEMM_DEFAULT_P zgemm_p
  817. #define ZGEMM_DEFAULT_R zgemm_r
  818. #define XGEMM_DEFAULT_P xgemm_p
  819. #define XGEMM_DEFAULT_R xgemm_r
  820. #define SGEMM_DEFAULT_Q 768
  821. #define DGEMM_DEFAULT_Q 384
  822. #define QGEMM_DEFAULT_Q 192
  823. #define CGEMM_DEFAULT_Q 768
  824. #define ZGEMM_DEFAULT_Q 384
  825. #define XGEMM_DEFAULT_Q 192
  826. #define GETRF_FACTOR 0.75
  827. #define GEMM_THREAD gemm_thread_mn
  828. #endif
  829. #ifdef NEHALEM
  830. #define SNUMOPT 8
  831. #define DNUMOPT 4
  832. #define GEMM_DEFAULT_OFFSET_A 32
  833. #define GEMM_DEFAULT_OFFSET_B 0
  834. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  835. #define SYMV_P 8
  836. #define SWITCH_RATIO 4
  837. #ifdef ARCH_X86
  838. #define SGEMM_DEFAULT_UNROLL_M 4
  839. #define DGEMM_DEFAULT_UNROLL_M 2
  840. #define QGEMM_DEFAULT_UNROLL_M 2
  841. #define CGEMM_DEFAULT_UNROLL_M 2
  842. #define ZGEMM_DEFAULT_UNROLL_M 1
  843. #define XGEMM_DEFAULT_UNROLL_M 1
  844. #define SGEMM_DEFAULT_UNROLL_N 4
  845. #define DGEMM_DEFAULT_UNROLL_N 4
  846. #define QGEMM_DEFAULT_UNROLL_N 2
  847. #define CGEMM_DEFAULT_UNROLL_N 2
  848. #define ZGEMM_DEFAULT_UNROLL_N 2
  849. #define XGEMM_DEFAULT_UNROLL_N 1
  850. #else
  851. #define SGEMM_DEFAULT_UNROLL_M 4
  852. #define DGEMM_DEFAULT_UNROLL_M 2
  853. #define QGEMM_DEFAULT_UNROLL_M 2
  854. #define CGEMM_DEFAULT_UNROLL_M 2
  855. #define ZGEMM_DEFAULT_UNROLL_M 1
  856. #define XGEMM_DEFAULT_UNROLL_M 1
  857. #define SGEMM_DEFAULT_UNROLL_N 8
  858. #define DGEMM_DEFAULT_UNROLL_N 8
  859. #define QGEMM_DEFAULT_UNROLL_N 2
  860. #define CGEMM_DEFAULT_UNROLL_N 4
  861. #define ZGEMM_DEFAULT_UNROLL_N 4
  862. #define XGEMM_DEFAULT_UNROLL_N 1
  863. #endif
  864. #define SGEMM_DEFAULT_P 504
  865. #define SGEMM_DEFAULT_R sgemm_r
  866. #define DGEMM_DEFAULT_P 504
  867. #define DGEMM_DEFAULT_R dgemm_r
  868. #define QGEMM_DEFAULT_P 504
  869. #define QGEMM_DEFAULT_R qgemm_r
  870. #define CGEMM_DEFAULT_P 252
  871. #define CGEMM_DEFAULT_R cgemm_r
  872. #define ZGEMM_DEFAULT_P 252
  873. #define ZGEMM_DEFAULT_R zgemm_r
  874. #define XGEMM_DEFAULT_P 252
  875. #define XGEMM_DEFAULT_R xgemm_r
  876. #define SGEMM_DEFAULT_Q 512
  877. #define DGEMM_DEFAULT_Q 256
  878. #define QGEMM_DEFAULT_Q 128
  879. #define CGEMM_DEFAULT_Q 512
  880. #define ZGEMM_DEFAULT_Q 256
  881. #define XGEMM_DEFAULT_Q 128
  882. #define GETRF_FACTOR 0.72
  883. #endif
  884. #ifdef SANDYBRIDGE
  885. #define SNUMOPT 8
  886. #define DNUMOPT 4
  887. #define GEMM_DEFAULT_OFFSET_A 0
  888. #define GEMM_DEFAULT_OFFSET_B 0
  889. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  890. #define SYMV_P 8
  891. #define SWITCH_RATIO 4
  892. #ifdef ARCH_X86
  893. #define SGEMM_DEFAULT_UNROLL_M 4
  894. #define DGEMM_DEFAULT_UNROLL_M 2
  895. #define QGEMM_DEFAULT_UNROLL_M 2
  896. #define CGEMM_DEFAULT_UNROLL_M 2
  897. #define ZGEMM_DEFAULT_UNROLL_M 1
  898. #define XGEMM_DEFAULT_UNROLL_M 1
  899. #define SGEMM_DEFAULT_UNROLL_N 4
  900. #define DGEMM_DEFAULT_UNROLL_N 4
  901. #define QGEMM_DEFAULT_UNROLL_N 2
  902. #define CGEMM_DEFAULT_UNROLL_N 2
  903. #define ZGEMM_DEFAULT_UNROLL_N 2
  904. #define XGEMM_DEFAULT_UNROLL_N 1
  905. #else
  906. #define SGEMM_DEFAULT_UNROLL_M 16
  907. #define DGEMM_DEFAULT_UNROLL_M 8
  908. #define QGEMM_DEFAULT_UNROLL_M 2
  909. #define CGEMM_DEFAULT_UNROLL_M 8
  910. #define ZGEMM_DEFAULT_UNROLL_M 1
  911. #define XGEMM_DEFAULT_UNROLL_M 1
  912. #define SGEMM_DEFAULT_UNROLL_N 4
  913. #define DGEMM_DEFAULT_UNROLL_N 4
  914. #define QGEMM_DEFAULT_UNROLL_N 2
  915. #define CGEMM_DEFAULT_UNROLL_N 2
  916. #define ZGEMM_DEFAULT_UNROLL_N 4
  917. #define XGEMM_DEFAULT_UNROLL_N 1
  918. #endif
  919. #define SGEMM_DEFAULT_P 768
  920. #define SGEMM_DEFAULT_R sgemm_r
  921. //#define SGEMM_DEFAULT_R 1024
  922. #define DGEMM_DEFAULT_P 512
  923. #define DGEMM_DEFAULT_R dgemm_r
  924. //#define DGEMM_DEFAULT_R 1024
  925. #define QGEMM_DEFAULT_P 504
  926. #define QGEMM_DEFAULT_R qgemm_r
  927. #define CGEMM_DEFAULT_P 768
  928. #define CGEMM_DEFAULT_R cgemm_r
  929. //#define CGEMM_DEFAULT_R 1024
  930. #define ZGEMM_DEFAULT_P 512
  931. #define ZGEMM_DEFAULT_R zgemm_r
  932. //#define ZGEMM_DEFAULT_R 1024
  933. #define XGEMM_DEFAULT_P 252
  934. #define XGEMM_DEFAULT_R xgemm_r
  935. #define SGEMM_DEFAULT_Q 384
  936. #define DGEMM_DEFAULT_Q 256
  937. #define QGEMM_DEFAULT_Q 128
  938. #define CGEMM_DEFAULT_Q 512
  939. #define ZGEMM_DEFAULT_Q 192
  940. #define XGEMM_DEFAULT_Q 128
  941. #define CGEMM3M_DEFAULT_UNROLL_N 8
  942. #define CGEMM3M_DEFAULT_UNROLL_M 4
  943. #define ZGEMM3M_DEFAULT_UNROLL_N 8
  944. #define ZGEMM3M_DEFAULT_UNROLL_M 2
  945. #define CGEMM3M_DEFAULT_P 448
  946. #define ZGEMM3M_DEFAULT_P 224
  947. #define XGEMM3M_DEFAULT_P 112
  948. #define CGEMM3M_DEFAULT_Q 224
  949. #define ZGEMM3M_DEFAULT_Q 224
  950. #define XGEMM3M_DEFAULT_Q 224
  951. #define CGEMM3M_DEFAULT_R 12288
  952. #define ZGEMM3M_DEFAULT_R 12288
  953. #define XGEMM3M_DEFAULT_R 12288
  954. #define GETRF_FACTOR 0.72
  955. #endif
  956. #ifdef HASWELL
  957. #define SNUMOPT 16
  958. #define DNUMOPT 8
  959. #define GEMM_DEFAULT_OFFSET_A 0
  960. #define GEMM_DEFAULT_OFFSET_B 0
  961. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  962. #define SYMV_P 8
  963. #define SWITCH_RATIO 4
  964. #ifdef ARCH_X86
  965. #define SGEMM_DEFAULT_UNROLL_M 4
  966. #define DGEMM_DEFAULT_UNROLL_M 2
  967. #define QGEMM_DEFAULT_UNROLL_M 2
  968. #define CGEMM_DEFAULT_UNROLL_M 2
  969. #define ZGEMM_DEFAULT_UNROLL_M 1
  970. #define XGEMM_DEFAULT_UNROLL_M 1
  971. #define SGEMM_DEFAULT_UNROLL_N 4
  972. #define DGEMM_DEFAULT_UNROLL_N 4
  973. #define QGEMM_DEFAULT_UNROLL_N 2
  974. #define CGEMM_DEFAULT_UNROLL_N 2
  975. #define ZGEMM_DEFAULT_UNROLL_N 2
  976. #define XGEMM_DEFAULT_UNROLL_N 1
  977. #else
  978. #define SGEMM_DEFAULT_UNROLL_M 16
  979. #define DGEMM_DEFAULT_UNROLL_M 4
  980. #define QGEMM_DEFAULT_UNROLL_M 2
  981. #define CGEMM_DEFAULT_UNROLL_M 8
  982. #define ZGEMM_DEFAULT_UNROLL_M 4
  983. #define XGEMM_DEFAULT_UNROLL_M 1
  984. #define SGEMM_DEFAULT_UNROLL_N 4
  985. #define DGEMM_DEFAULT_UNROLL_N 4
  986. #define QGEMM_DEFAULT_UNROLL_N 2
  987. #define CGEMM_DEFAULT_UNROLL_N 2
  988. #define ZGEMM_DEFAULT_UNROLL_N 2
  989. #define XGEMM_DEFAULT_UNROLL_N 1
  990. #define DGEMM_DEFAULT_UNROLL_MN 16
  991. #endif
  992. #ifdef ARCH_X86
  993. #define SGEMM_DEFAULT_P 512
  994. #define SGEMM_DEFAULT_R sgemm_r
  995. #define DGEMM_DEFAULT_P 512
  996. #define DGEMM_DEFAULT_R dgemm_r
  997. #define QGEMM_DEFAULT_P 504
  998. #define QGEMM_DEFAULT_R qgemm_r
  999. #define CGEMM_DEFAULT_P 128
  1000. #define CGEMM_DEFAULT_R 1024
  1001. #define ZGEMM_DEFAULT_P 512
  1002. #define ZGEMM_DEFAULT_R zgemm_r
  1003. #define XGEMM_DEFAULT_P 252
  1004. #define XGEMM_DEFAULT_R xgemm_r
  1005. #define SGEMM_DEFAULT_Q 256
  1006. #define DGEMM_DEFAULT_Q 256
  1007. #define QGEMM_DEFAULT_Q 128
  1008. #define CGEMM_DEFAULT_Q 256
  1009. #define ZGEMM_DEFAULT_Q 192
  1010. #define XGEMM_DEFAULT_Q 128
  1011. #else
  1012. #define SGEMM_DEFAULT_P 768
  1013. #define DGEMM_DEFAULT_P 512
  1014. #define CGEMM_DEFAULT_P 384
  1015. #define ZGEMM_DEFAULT_P 256
  1016. #ifdef WINDOWS_ABI
  1017. #define SGEMM_DEFAULT_Q 320
  1018. #define DGEMM_DEFAULT_Q 128
  1019. #else
  1020. #define SGEMM_DEFAULT_Q 384
  1021. #define DGEMM_DEFAULT_Q 256
  1022. #endif
  1023. #define CGEMM_DEFAULT_Q 192
  1024. #define ZGEMM_DEFAULT_Q 128
  1025. #define SGEMM_DEFAULT_R sgemm_r
  1026. #define DGEMM_DEFAULT_R 13824
  1027. #define CGEMM_DEFAULT_R cgemm_r
  1028. #define ZGEMM_DEFAULT_R zgemm_r
  1029. #define QGEMM_DEFAULT_Q 128
  1030. #define QGEMM_DEFAULT_P 504
  1031. #define QGEMM_DEFAULT_R qgemm_r
  1032. #define XGEMM_DEFAULT_P 252
  1033. #define XGEMM_DEFAULT_R xgemm_r
  1034. #define XGEMM_DEFAULT_Q 128
  1035. #define CGEMM3M_DEFAULT_UNROLL_N 8
  1036. #define CGEMM3M_DEFAULT_UNROLL_M 4
  1037. #define ZGEMM3M_DEFAULT_UNROLL_N 8
  1038. #define ZGEMM3M_DEFAULT_UNROLL_M 2
  1039. #define CGEMM3M_DEFAULT_P 448
  1040. #define ZGEMM3M_DEFAULT_P 224
  1041. #define XGEMM3M_DEFAULT_P 112
  1042. #define CGEMM3M_DEFAULT_Q 224
  1043. #define ZGEMM3M_DEFAULT_Q 224
  1044. #define XGEMM3M_DEFAULT_Q 224
  1045. #define CGEMM3M_DEFAULT_R 12288
  1046. #define ZGEMM3M_DEFAULT_R 12288
  1047. #define XGEMM3M_DEFAULT_R 12288
  1048. #endif
  1049. #endif
  1050. #ifdef ATOM
  1051. #define SNUMOPT 2
  1052. #define DNUMOPT 1
  1053. #define GEMM_DEFAULT_OFFSET_A 64
  1054. #define GEMM_DEFAULT_OFFSET_B 0
  1055. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1056. #define SYMV_P 8
  1057. #ifdef ARCH_X86
  1058. #define SGEMM_DEFAULT_UNROLL_M 4
  1059. #define DGEMM_DEFAULT_UNROLL_M 2
  1060. #define QGEMM_DEFAULT_UNROLL_M 2
  1061. #define CGEMM_DEFAULT_UNROLL_M 2
  1062. #define ZGEMM_DEFAULT_UNROLL_M 1
  1063. #define XGEMM_DEFAULT_UNROLL_M 1
  1064. #else
  1065. #define SGEMM_DEFAULT_UNROLL_M 8
  1066. #define DGEMM_DEFAULT_UNROLL_M 4
  1067. #define QGEMM_DEFAULT_UNROLL_M 2
  1068. #define CGEMM_DEFAULT_UNROLL_M 4
  1069. #define ZGEMM_DEFAULT_UNROLL_M 2
  1070. #define XGEMM_DEFAULT_UNROLL_M 1
  1071. #endif
  1072. #define SGEMM_DEFAULT_UNROLL_N 4
  1073. #define DGEMM_DEFAULT_UNROLL_N 2
  1074. #define QGEMM_DEFAULT_UNROLL_N 2
  1075. #define CGEMM_DEFAULT_UNROLL_N 2
  1076. #define ZGEMM_DEFAULT_UNROLL_N 1
  1077. #define XGEMM_DEFAULT_UNROLL_N 1
  1078. #define SGEMM_DEFAULT_P sgemm_p
  1079. #define SGEMM_DEFAULT_R sgemm_r
  1080. #define DGEMM_DEFAULT_P dgemm_p
  1081. #define DGEMM_DEFAULT_R dgemm_r
  1082. #define QGEMM_DEFAULT_P qgemm_p
  1083. #define QGEMM_DEFAULT_R qgemm_r
  1084. #define CGEMM_DEFAULT_P cgemm_p
  1085. #define CGEMM_DEFAULT_R cgemm_r
  1086. #define ZGEMM_DEFAULT_P zgemm_p
  1087. #define ZGEMM_DEFAULT_R zgemm_r
  1088. #define XGEMM_DEFAULT_P xgemm_p
  1089. #define XGEMM_DEFAULT_R xgemm_r
  1090. #define SGEMM_DEFAULT_Q 256
  1091. #define DGEMM_DEFAULT_Q 256
  1092. #define QGEMM_DEFAULT_Q 256
  1093. #define CGEMM_DEFAULT_Q 256
  1094. #define ZGEMM_DEFAULT_Q 256
  1095. #define XGEMM_DEFAULT_Q 256
  1096. #endif
  1097. #ifdef ITANIUM2
  1098. #define SNUMOPT 4
  1099. #define DNUMOPT 4
  1100. #define GEMM_DEFAULT_OFFSET_A 0
  1101. #define GEMM_DEFAULT_OFFSET_B 128
  1102. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1103. #define SGEMM_DEFAULT_UNROLL_M 8
  1104. #define SGEMM_DEFAULT_UNROLL_N 8
  1105. #define DGEMM_DEFAULT_UNROLL_M 8
  1106. #define DGEMM_DEFAULT_UNROLL_N 8
  1107. #define QGEMM_DEFAULT_UNROLL_M 8
  1108. #define QGEMM_DEFAULT_UNROLL_N 8
  1109. #define CGEMM_DEFAULT_UNROLL_M 4
  1110. #define CGEMM_DEFAULT_UNROLL_N 4
  1111. #define ZGEMM_DEFAULT_UNROLL_M 4
  1112. #define ZGEMM_DEFAULT_UNROLL_N 4
  1113. #define XGEMM_DEFAULT_UNROLL_M 4
  1114. #define XGEMM_DEFAULT_UNROLL_N 4
  1115. #define SGEMM_DEFAULT_P sgemm_p
  1116. #define DGEMM_DEFAULT_P dgemm_p
  1117. #define QGEMM_DEFAULT_P qgemm_p
  1118. #define CGEMM_DEFAULT_P cgemm_p
  1119. #define ZGEMM_DEFAULT_P zgemm_p
  1120. #define XGEMM_DEFAULT_P xgemm_p
  1121. #define SGEMM_DEFAULT_Q 1024
  1122. #define DGEMM_DEFAULT_Q 1024
  1123. #define QGEMM_DEFAULT_Q 1024
  1124. #define CGEMM_DEFAULT_Q 1024
  1125. #define ZGEMM_DEFAULT_Q 1024
  1126. #define XGEMM_DEFAULT_Q 1024
  1127. #define SGEMM_DEFAULT_R sgemm_r
  1128. #define DGEMM_DEFAULT_R dgemm_r
  1129. #define QGEMM_DEFAULT_R qgemm_r
  1130. #define CGEMM_DEFAULT_R cgemm_r
  1131. #define ZGEMM_DEFAULT_R zgemm_r
  1132. #define XGEMM_DEFAULT_R xgemm_r
  1133. #define SYMV_P 16
  1134. #define GETRF_FACTOR 0.65
  1135. #endif
  1136. #if defined(EV4) || defined(EV5) || defined(EV6)
  1137. #ifdef EV4
  1138. #define SNUMOPT 1
  1139. #define DNUMOPT 1
  1140. #else
  1141. #define SNUMOPT 2
  1142. #define DNUMOPT 2
  1143. #endif
  1144. #define GEMM_DEFAULT_OFFSET_A 512
  1145. #define GEMM_DEFAULT_OFFSET_B 512
  1146. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1147. #define SGEMM_DEFAULT_UNROLL_M 4
  1148. #define SGEMM_DEFAULT_UNROLL_N 4
  1149. #define DGEMM_DEFAULT_UNROLL_M 4
  1150. #define DGEMM_DEFAULT_UNROLL_N 4
  1151. #define CGEMM_DEFAULT_UNROLL_M 2
  1152. #define CGEMM_DEFAULT_UNROLL_N 2
  1153. #define ZGEMM_DEFAULT_UNROLL_M 2
  1154. #define ZGEMM_DEFAULT_UNROLL_N 2
  1155. #define SYMV_P 8
  1156. #ifdef EV4
  1157. #define SGEMM_DEFAULT_P 32
  1158. #define SGEMM_DEFAULT_Q 112
  1159. #define SGEMM_DEFAULT_R 256
  1160. #define DGEMM_DEFAULT_P 32
  1161. #define DGEMM_DEFAULT_Q 56
  1162. #define DGEMM_DEFAULT_R 256
  1163. #define CGEMM_DEFAULT_P 32
  1164. #define CGEMM_DEFAULT_Q 64
  1165. #define CGEMM_DEFAULT_R 240
  1166. #define ZGEMM_DEFAULT_P 32
  1167. #define ZGEMM_DEFAULT_Q 32
  1168. #define ZGEMM_DEFAULT_R 240
  1169. #endif
  1170. #ifdef EV5
  1171. #define SGEMM_DEFAULT_P 64
  1172. #define SGEMM_DEFAULT_Q 256
  1173. #define DGEMM_DEFAULT_P 64
  1174. #define DGEMM_DEFAULT_Q 128
  1175. #define CGEMM_DEFAULT_P 64
  1176. #define CGEMM_DEFAULT_Q 128
  1177. #define ZGEMM_DEFAULT_P 64
  1178. #define ZGEMM_DEFAULT_Q 64
  1179. #endif
  1180. #ifdef EV6
  1181. #define SGEMM_DEFAULT_P 256
  1182. #define SGEMM_DEFAULT_Q 512
  1183. #define DGEMM_DEFAULT_P 256
  1184. #define DGEMM_DEFAULT_Q 256
  1185. #define CGEMM_DEFAULT_P 256
  1186. #define CGEMM_DEFAULT_Q 256
  1187. #define ZGEMM_DEFAULT_P 128
  1188. #define ZGEMM_DEFAULT_Q 256
  1189. #endif
  1190. #endif
  1191. #ifdef CELL
  1192. #define SNUMOPT 2
  1193. #define DNUMOPT 2
  1194. #define GEMM_DEFAULT_OFFSET_A 0
  1195. #define GEMM_DEFAULT_OFFSET_B 8192
  1196. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1197. #define SGEMM_DEFAULT_UNROLL_M 16
  1198. #define SGEMM_DEFAULT_UNROLL_N 4
  1199. #define DGEMM_DEFAULT_UNROLL_M 4
  1200. #define DGEMM_DEFAULT_UNROLL_N 4
  1201. #define CGEMM_DEFAULT_UNROLL_M 8
  1202. #define CGEMM_DEFAULT_UNROLL_N 2
  1203. #define ZGEMM_DEFAULT_UNROLL_M 2
  1204. #define ZGEMM_DEFAULT_UNROLL_N 2
  1205. #define SGEMM_DEFAULT_P 128
  1206. #define DGEMM_DEFAULT_P 128
  1207. #define CGEMM_DEFAULT_P 128
  1208. #define ZGEMM_DEFAULT_P 128
  1209. #define SGEMM_DEFAULT_Q 512
  1210. #define DGEMM_DEFAULT_Q 256
  1211. #define CGEMM_DEFAULT_Q 256
  1212. #define ZGEMM_DEFAULT_Q 128
  1213. #define SYMV_P 4
  1214. #endif
  1215. #ifdef PPCG4
  1216. #define GEMM_DEFAULT_OFFSET_A 0
  1217. #define GEMM_DEFAULT_OFFSET_B 1024
  1218. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1219. #define SGEMM_DEFAULT_UNROLL_M 16
  1220. #define SGEMM_DEFAULT_UNROLL_N 4
  1221. #define DGEMM_DEFAULT_UNROLL_M 4
  1222. #define DGEMM_DEFAULT_UNROLL_N 4
  1223. #define CGEMM_DEFAULT_UNROLL_M 8
  1224. #define CGEMM_DEFAULT_UNROLL_N 2
  1225. #define ZGEMM_DEFAULT_UNROLL_M 2
  1226. #define ZGEMM_DEFAULT_UNROLL_N 2
  1227. #define SGEMM_DEFAULT_P 256
  1228. #define DGEMM_DEFAULT_P 128
  1229. #define CGEMM_DEFAULT_P 128
  1230. #define ZGEMM_DEFAULT_P 64
  1231. #define SGEMM_DEFAULT_Q 256
  1232. #define DGEMM_DEFAULT_Q 256
  1233. #define CGEMM_DEFAULT_Q 256
  1234. #define ZGEMM_DEFAULT_Q 256
  1235. #define SYMV_P 4
  1236. #endif
  1237. #ifdef PPC970
  1238. #define SNUMOPT 4
  1239. #define DNUMOPT 4
  1240. #define GEMM_DEFAULT_OFFSET_A 2688
  1241. #define GEMM_DEFAULT_OFFSET_B 3072
  1242. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1243. #define SGEMM_DEFAULT_UNROLL_M 16
  1244. #define SGEMM_DEFAULT_UNROLL_N 4
  1245. #define DGEMM_DEFAULT_UNROLL_M 4
  1246. #define DGEMM_DEFAULT_UNROLL_N 4
  1247. #define CGEMM_DEFAULT_UNROLL_M 8
  1248. #define CGEMM_DEFAULT_UNROLL_N 2
  1249. #define ZGEMM_DEFAULT_UNROLL_M 2
  1250. #define ZGEMM_DEFAULT_UNROLL_N 2
  1251. #ifdef OS_LINUX
  1252. #if L2_SIZE == 1024976
  1253. #define SGEMM_DEFAULT_P 320
  1254. #define DGEMM_DEFAULT_P 256
  1255. #define CGEMM_DEFAULT_P 256
  1256. #define ZGEMM_DEFAULT_P 256
  1257. #else
  1258. #define SGEMM_DEFAULT_P 176
  1259. #define DGEMM_DEFAULT_P 176
  1260. #define CGEMM_DEFAULT_P 176
  1261. #define ZGEMM_DEFAULT_P 176
  1262. #endif
  1263. #endif
  1264. #define SGEMM_DEFAULT_Q 512
  1265. #define DGEMM_DEFAULT_Q 256
  1266. #define CGEMM_DEFAULT_Q 256
  1267. #define ZGEMM_DEFAULT_Q 128
  1268. #define SYMV_P 4
  1269. #endif
  1270. #ifdef PPC440
  1271. #define SNUMOPT 2
  1272. #define DNUMOPT 2
  1273. #define GEMM_DEFAULT_OFFSET_A (32 * 0)
  1274. #define GEMM_DEFAULT_OFFSET_B (32 * 0)
  1275. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1276. #define SGEMM_DEFAULT_UNROLL_M 4
  1277. #define SGEMM_DEFAULT_UNROLL_N 4
  1278. #define DGEMM_DEFAULT_UNROLL_M 4
  1279. #define DGEMM_DEFAULT_UNROLL_N 4
  1280. #define CGEMM_DEFAULT_UNROLL_M 2
  1281. #define CGEMM_DEFAULT_UNROLL_N 2
  1282. #define ZGEMM_DEFAULT_UNROLL_M 2
  1283. #define ZGEMM_DEFAULT_UNROLL_N 2
  1284. #define SGEMM_DEFAULT_P 512
  1285. #define DGEMM_DEFAULT_P 512
  1286. #define CGEMM_DEFAULT_P 512
  1287. #define ZGEMM_DEFAULT_P 512
  1288. #define SGEMM_DEFAULT_Q 1024
  1289. #define DGEMM_DEFAULT_Q 512
  1290. #define CGEMM_DEFAULT_Q 512
  1291. #define ZGEMM_DEFAULT_Q 256
  1292. #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
  1293. #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
  1294. #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
  1295. #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
  1296. #define SYMV_P 4
  1297. #endif
  1298. #ifdef PPC440FP2
  1299. #define SNUMOPT 4
  1300. #define DNUMOPT 4
  1301. #define GEMM_DEFAULT_OFFSET_A (32 * 0)
  1302. #define GEMM_DEFAULT_OFFSET_B (32 * 0)
  1303. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1304. #define SGEMM_DEFAULT_UNROLL_M 8
  1305. #define SGEMM_DEFAULT_UNROLL_N 4
  1306. #define DGEMM_DEFAULT_UNROLL_M 8
  1307. #define DGEMM_DEFAULT_UNROLL_N 4
  1308. #define CGEMM_DEFAULT_UNROLL_M 4
  1309. #define CGEMM_DEFAULT_UNROLL_N 2
  1310. #define ZGEMM_DEFAULT_UNROLL_M 4
  1311. #define ZGEMM_DEFAULT_UNROLL_N 2
  1312. #define SGEMM_DEFAULT_P 128
  1313. #define DGEMM_DEFAULT_P 128
  1314. #define CGEMM_DEFAULT_P 128
  1315. #define ZGEMM_DEFAULT_P 128
  1316. #if 1
  1317. #define SGEMM_DEFAULT_Q 4096
  1318. #define DGEMM_DEFAULT_Q 3072
  1319. #define CGEMM_DEFAULT_Q 2048
  1320. #define ZGEMM_DEFAULT_Q 1024
  1321. #else
  1322. #define SGEMM_DEFAULT_Q 512
  1323. #define DGEMM_DEFAULT_Q 256
  1324. #define CGEMM_DEFAULT_Q 256
  1325. #define ZGEMM_DEFAULT_Q 128
  1326. #endif
  1327. #define SYMV_P 4
  1328. #endif
  1329. #if defined(POWER3) || defined(POWER4) || defined(POWER5)
  1330. #define GEMM_DEFAULT_OFFSET_A 0
  1331. #define GEMM_DEFAULT_OFFSET_B 2048
  1332. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1333. #define SGEMM_DEFAULT_UNROLL_M 4
  1334. #define SGEMM_DEFAULT_UNROLL_N 4
  1335. #define DGEMM_DEFAULT_UNROLL_M 4
  1336. #define DGEMM_DEFAULT_UNROLL_N 4
  1337. #define CGEMM_DEFAULT_UNROLL_M 2
  1338. #define CGEMM_DEFAULT_UNROLL_N 2
  1339. #define ZGEMM_DEFAULT_UNROLL_M 2
  1340. #define ZGEMM_DEFAULT_UNROLL_N 2
  1341. #ifdef POWER3
  1342. #define SNUMOPT 4
  1343. #define DNUMOPT 4
  1344. #define SGEMM_DEFAULT_P 256
  1345. #define SGEMM_DEFAULT_Q 432
  1346. #define SGEMM_DEFAULT_R 1012
  1347. #define DGEMM_DEFAULT_P 256
  1348. #define DGEMM_DEFAULT_Q 216
  1349. #define DGEMM_DEFAULT_R 1012
  1350. #define ZGEMM_DEFAULT_P 256
  1351. #define ZGEMM_DEFAULT_Q 104
  1352. #define ZGEMM_DEFAULT_R 1012
  1353. #endif
  1354. #if defined(POWER4)
  1355. #ifdef ALLOC_HUGETLB
  1356. #define SGEMM_DEFAULT_P 184
  1357. #define DGEMM_DEFAULT_P 184
  1358. #define CGEMM_DEFAULT_P 184
  1359. #define ZGEMM_DEFAULT_P 184
  1360. #else
  1361. #define SGEMM_DEFAULT_P 144
  1362. #define DGEMM_DEFAULT_P 144
  1363. #define CGEMM_DEFAULT_P 144
  1364. #define ZGEMM_DEFAULT_P 144
  1365. #endif
  1366. #endif
  1367. #if defined(POWER5)
  1368. #ifdef ALLOC_HUGETLB
  1369. #define SGEMM_DEFAULT_P 512
  1370. #define DGEMM_DEFAULT_P 256
  1371. #define CGEMM_DEFAULT_P 256
  1372. #define ZGEMM_DEFAULT_P 128
  1373. #else
  1374. #define SGEMM_DEFAULT_P 320
  1375. #define DGEMM_DEFAULT_P 160
  1376. #define CGEMM_DEFAULT_P 160
  1377. #define ZGEMM_DEFAULT_P 80
  1378. #endif
  1379. #define SGEMM_DEFAULT_Q 256
  1380. #define CGEMM_DEFAULT_Q 256
  1381. #define DGEMM_DEFAULT_Q 256
  1382. #define ZGEMM_DEFAULT_Q 256
  1383. #endif
  1384. #define SYMV_P 8
  1385. #endif
  1386. #if defined(POWER6)
  1387. #define SNUMOPT 4
  1388. #define DNUMOPT 4
  1389. #define GEMM_DEFAULT_OFFSET_A 384
  1390. #define GEMM_DEFAULT_OFFSET_B 1024
  1391. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1392. #define SGEMM_DEFAULT_UNROLL_M 4
  1393. #define SGEMM_DEFAULT_UNROLL_N 4
  1394. #define DGEMM_DEFAULT_UNROLL_M 4
  1395. #define DGEMM_DEFAULT_UNROLL_N 4
  1396. #define CGEMM_DEFAULT_UNROLL_M 2
  1397. #define CGEMM_DEFAULT_UNROLL_N 4
  1398. #define ZGEMM_DEFAULT_UNROLL_M 2
  1399. #define ZGEMM_DEFAULT_UNROLL_N 4
  1400. #define SGEMM_DEFAULT_P 992
  1401. #define DGEMM_DEFAULT_P 480
  1402. #define CGEMM_DEFAULT_P 488
  1403. #define ZGEMM_DEFAULT_P 248
  1404. #define SGEMM_DEFAULT_Q 504
  1405. #define DGEMM_DEFAULT_Q 504
  1406. #define CGEMM_DEFAULT_Q 400
  1407. #define ZGEMM_DEFAULT_Q 400
  1408. #define SYMV_P 8
  1409. #endif
  1410. #if defined(SPARC) && defined(V7)
  1411. #define SNUMOPT 4
  1412. #define DNUMOPT 4
  1413. #define GEMM_DEFAULT_OFFSET_A 0
  1414. #define GEMM_DEFAULT_OFFSET_B 2048
  1415. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1416. #define SGEMM_DEFAULT_UNROLL_M 2
  1417. #define SGEMM_DEFAULT_UNROLL_N 8
  1418. #define DGEMM_DEFAULT_UNROLL_M 2
  1419. #define DGEMM_DEFAULT_UNROLL_N 8
  1420. #define CGEMM_DEFAULT_UNROLL_M 1
  1421. #define CGEMM_DEFAULT_UNROLL_N 4
  1422. #define ZGEMM_DEFAULT_UNROLL_M 1
  1423. #define ZGEMM_DEFAULT_UNROLL_N 4
  1424. #define SGEMM_DEFAULT_P 256
  1425. #define DGEMM_DEFAULT_P 256
  1426. #define CGEMM_DEFAULT_P 256
  1427. #define ZGEMM_DEFAULT_P 256
  1428. #define SGEMM_DEFAULT_Q 512
  1429. #define DGEMM_DEFAULT_Q 256
  1430. #define CGEMM_DEFAULT_Q 256
  1431. #define ZGEMM_DEFAULT_Q 128
  1432. #define SYMV_P 8
  1433. #define GEMM_THREAD gemm_thread_mn
  1434. #endif
  1435. #if (defined(SPARC) && defined(V9)) || defined(__sparc_v9__)
  1436. #define SNUMOPT 2
  1437. #define DNUMOPT 2
  1438. #define GEMM_DEFAULT_OFFSET_A 0
  1439. #define GEMM_DEFAULT_OFFSET_B 2048
  1440. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1441. #define SGEMM_DEFAULT_UNROLL_M 4
  1442. #define SGEMM_DEFAULT_UNROLL_N 4
  1443. #define DGEMM_DEFAULT_UNROLL_M 4
  1444. #define DGEMM_DEFAULT_UNROLL_N 4
  1445. #define CGEMM_DEFAULT_UNROLL_M 2
  1446. #define CGEMM_DEFAULT_UNROLL_N 2
  1447. #define ZGEMM_DEFAULT_UNROLL_M 2
  1448. #define ZGEMM_DEFAULT_UNROLL_N 2
  1449. #define SGEMM_DEFAULT_P 512
  1450. #define DGEMM_DEFAULT_P 512
  1451. #define CGEMM_DEFAULT_P 512
  1452. #define ZGEMM_DEFAULT_P 512
  1453. #define SGEMM_DEFAULT_Q 1024
  1454. #define DGEMM_DEFAULT_Q 512
  1455. #define CGEMM_DEFAULT_Q 512
  1456. #define ZGEMM_DEFAULT_Q 256
  1457. #define SYMV_P 8
  1458. #endif
  1459. #ifdef SICORTEX
  1460. #define SNUMOPT 2
  1461. #define DNUMOPT 2
  1462. #define GEMM_DEFAULT_OFFSET_A 0
  1463. #define GEMM_DEFAULT_OFFSET_B 0
  1464. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1465. #define SGEMM_DEFAULT_UNROLL_M 2
  1466. #define SGEMM_DEFAULT_UNROLL_N 8
  1467. #define DGEMM_DEFAULT_UNROLL_M 2
  1468. #define DGEMM_DEFAULT_UNROLL_N 8
  1469. #define CGEMM_DEFAULT_UNROLL_M 1
  1470. #define CGEMM_DEFAULT_UNROLL_N 4
  1471. #define ZGEMM_DEFAULT_UNROLL_M 1
  1472. #define ZGEMM_DEFAULT_UNROLL_N 4
  1473. #define SGEMM_DEFAULT_P 108
  1474. #define DGEMM_DEFAULT_P 112
  1475. #define CGEMM_DEFAULT_P 108
  1476. #define ZGEMM_DEFAULT_P 112
  1477. #define SGEMM_DEFAULT_Q 288
  1478. #define DGEMM_DEFAULT_Q 144
  1479. #define CGEMM_DEFAULT_Q 144
  1480. #define ZGEMM_DEFAULT_Q 72
  1481. #define SGEMM_DEFAULT_R 2000
  1482. #define DGEMM_DEFAULT_R 2000
  1483. #define CGEMM_DEFAULT_R 2000
  1484. #define ZGEMM_DEFAULT_R 2000
  1485. #define SYMV_P 16
  1486. #endif
  1487. #ifdef LOONGSON3A
  1488. ////Copy from SICORTEX
  1489. #define SNUMOPT 2
  1490. #define DNUMOPT 2
  1491. #define GEMM_DEFAULT_OFFSET_A 0
  1492. #define GEMM_DEFAULT_OFFSET_B 0
  1493. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1494. #define SGEMM_DEFAULT_UNROLL_M 8
  1495. #define SGEMM_DEFAULT_UNROLL_N 4
  1496. #define DGEMM_DEFAULT_UNROLL_M 4
  1497. #define DGEMM_DEFAULT_UNROLL_N 4
  1498. #define CGEMM_DEFAULT_UNROLL_M 4
  1499. #define CGEMM_DEFAULT_UNROLL_N 2
  1500. #define ZGEMM_DEFAULT_UNROLL_M 2
  1501. #define ZGEMM_DEFAULT_UNROLL_N 2
  1502. #define SGEMM_DEFAULT_P 64
  1503. #define DGEMM_DEFAULT_P 44
  1504. #define CGEMM_DEFAULT_P 64
  1505. #define ZGEMM_DEFAULT_P 32
  1506. #define SGEMM_DEFAULT_Q 192
  1507. #define DGEMM_DEFAULT_Q 92
  1508. #define CGEMM_DEFAULT_Q 128
  1509. #define ZGEMM_DEFAULT_Q 80
  1510. #define SGEMM_DEFAULT_R 640
  1511. #define DGEMM_DEFAULT_R dgemm_r
  1512. #define CGEMM_DEFAULT_R 640
  1513. #define ZGEMM_DEFAULT_R 640
  1514. #define GEMM_OFFSET_A1 0x10000
  1515. #define GEMM_OFFSET_B1 0x100000
  1516. #define SYMV_P 16
  1517. #endif
  1518. #ifdef LOONGSON3B
  1519. #define SNUMOPT 2
  1520. #define DNUMOPT 2
  1521. #define GEMM_DEFAULT_OFFSET_A 0
  1522. #define GEMM_DEFAULT_OFFSET_B 0
  1523. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1524. #define SGEMM_DEFAULT_UNROLL_M 2
  1525. #define SGEMM_DEFAULT_UNROLL_N 2
  1526. #define DGEMM_DEFAULT_UNROLL_M 2
  1527. #define DGEMM_DEFAULT_UNROLL_N 2
  1528. #define CGEMM_DEFAULT_UNROLL_M 2
  1529. #define CGEMM_DEFAULT_UNROLL_N 2
  1530. #define ZGEMM_DEFAULT_UNROLL_M 2
  1531. #define ZGEMM_DEFAULT_UNROLL_N 2
  1532. #define SGEMM_DEFAULT_P 64
  1533. #define DGEMM_DEFAULT_P 24
  1534. #define CGEMM_DEFAULT_P 24
  1535. #define ZGEMM_DEFAULT_P 20
  1536. #define SGEMM_DEFAULT_Q 192
  1537. #define DGEMM_DEFAULT_Q 128
  1538. #define CGEMM_DEFAULT_Q 128
  1539. #define ZGEMM_DEFAULT_Q 64
  1540. #define SGEMM_DEFAULT_R 512
  1541. #define DGEMM_DEFAULT_R 512
  1542. #define CGEMM_DEFAULT_R 512
  1543. #define ZGEMM_DEFAULT_R 512
  1544. #define GEMM_OFFSET_A1 0x10000
  1545. #define GEMM_OFFSET_B1 0x100000
  1546. #define SYMV_P 16
  1547. #endif
  1548. #ifdef ARMV7
  1549. #define SNUMOPT 2
  1550. #define DNUMOPT 2
  1551. #define GEMM_DEFAULT_OFFSET_A 0
  1552. #define GEMM_DEFAULT_OFFSET_B 0
  1553. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1554. #define SGEMM_DEFAULT_UNROLL_M 4
  1555. #define SGEMM_DEFAULT_UNROLL_N 4
  1556. #define DGEMM_DEFAULT_UNROLL_M 4
  1557. #define DGEMM_DEFAULT_UNROLL_N 4
  1558. #define CGEMM_DEFAULT_UNROLL_M 2
  1559. #define CGEMM_DEFAULT_UNROLL_N 2
  1560. #define ZGEMM_DEFAULT_UNROLL_M 2
  1561. #define ZGEMM_DEFAULT_UNROLL_N 2
  1562. #define SGEMM_DEFAULT_P 128
  1563. #define DGEMM_DEFAULT_P 128
  1564. #define CGEMM_DEFAULT_P 96
  1565. #define ZGEMM_DEFAULT_P 64
  1566. #define SGEMM_DEFAULT_Q 240
  1567. #define DGEMM_DEFAULT_Q 120
  1568. #define CGEMM_DEFAULT_Q 120
  1569. #define ZGEMM_DEFAULT_Q 120
  1570. #define SGEMM_DEFAULT_R 12288
  1571. #define DGEMM_DEFAULT_R 8192
  1572. #define CGEMM_DEFAULT_R 4096
  1573. #define ZGEMM_DEFAULT_R 4096
  1574. #define SYMV_P 16
  1575. #endif
  1576. #if defined(ARMV6)
  1577. #define SNUMOPT 2
  1578. #define DNUMOPT 2
  1579. #define GEMM_DEFAULT_OFFSET_A 0
  1580. #define GEMM_DEFAULT_OFFSET_B 0
  1581. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1582. #define SGEMM_DEFAULT_UNROLL_M 4
  1583. #define SGEMM_DEFAULT_UNROLL_N 2
  1584. #define DGEMM_DEFAULT_UNROLL_M 4
  1585. #define DGEMM_DEFAULT_UNROLL_N 2
  1586. #define CGEMM_DEFAULT_UNROLL_M 2
  1587. #define CGEMM_DEFAULT_UNROLL_N 2
  1588. #define ZGEMM_DEFAULT_UNROLL_M 2
  1589. #define ZGEMM_DEFAULT_UNROLL_N 2
  1590. #define SGEMM_DEFAULT_P 128
  1591. #define DGEMM_DEFAULT_P 128
  1592. #define CGEMM_DEFAULT_P 96
  1593. #define ZGEMM_DEFAULT_P 64
  1594. #define SGEMM_DEFAULT_Q 240
  1595. #define DGEMM_DEFAULT_Q 120
  1596. #define CGEMM_DEFAULT_Q 120
  1597. #define ZGEMM_DEFAULT_Q 120
  1598. #define SGEMM_DEFAULT_R 12288
  1599. #define DGEMM_DEFAULT_R 8192
  1600. #define CGEMM_DEFAULT_R 4096
  1601. #define ZGEMM_DEFAULT_R 4096
  1602. #define SYMV_P 16
  1603. #endif
  1604. #if defined(ARMV8)
  1605. #define SNUMOPT 2
  1606. #define DNUMOPT 2
  1607. #define GEMM_DEFAULT_OFFSET_A 0
  1608. #define GEMM_DEFAULT_OFFSET_B 0
  1609. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1610. #define SGEMM_DEFAULT_UNROLL_M 4
  1611. #define SGEMM_DEFAULT_UNROLL_N 4
  1612. #define DGEMM_DEFAULT_UNROLL_M 2
  1613. #define DGEMM_DEFAULT_UNROLL_N 2
  1614. #define CGEMM_DEFAULT_UNROLL_M 2
  1615. #define CGEMM_DEFAULT_UNROLL_N 2
  1616. #define ZGEMM_DEFAULT_UNROLL_M 2
  1617. #define ZGEMM_DEFAULT_UNROLL_N 2
  1618. #define SGEMM_DEFAULT_P 128
  1619. #define DGEMM_DEFAULT_P 128
  1620. #define CGEMM_DEFAULT_P 96
  1621. #define ZGEMM_DEFAULT_P 64
  1622. #define SGEMM_DEFAULT_Q 240
  1623. #define DGEMM_DEFAULT_Q 120
  1624. #define CGEMM_DEFAULT_Q 120
  1625. #define ZGEMM_DEFAULT_Q 120
  1626. #define SGEMM_DEFAULT_R 12288
  1627. #define DGEMM_DEFAULT_R 8192
  1628. #define CGEMM_DEFAULT_R 4096
  1629. #define ZGEMM_DEFAULT_R 4096
  1630. #define SYMV_P 16
  1631. #endif
  1632. #if defined(ARMV5)
  1633. #define SNUMOPT 2
  1634. #define DNUMOPT 2
  1635. #define GEMM_DEFAULT_OFFSET_A 0
  1636. #define GEMM_DEFAULT_OFFSET_B 0
  1637. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1638. #define SGEMM_DEFAULT_UNROLL_M 2
  1639. #define SGEMM_DEFAULT_UNROLL_N 2
  1640. #define DGEMM_DEFAULT_UNROLL_M 2
  1641. #define DGEMM_DEFAULT_UNROLL_N 2
  1642. #define CGEMM_DEFAULT_UNROLL_M 2
  1643. #define CGEMM_DEFAULT_UNROLL_N 2
  1644. #define ZGEMM_DEFAULT_UNROLL_M 2
  1645. #define ZGEMM_DEFAULT_UNROLL_N 2
  1646. #define SGEMM_DEFAULT_P 128
  1647. #define DGEMM_DEFAULT_P 128
  1648. #define CGEMM_DEFAULT_P 96
  1649. #define ZGEMM_DEFAULT_P 64
  1650. #define SGEMM_DEFAULT_Q 240
  1651. #define DGEMM_DEFAULT_Q 120
  1652. #define CGEMM_DEFAULT_Q 120
  1653. #define ZGEMM_DEFAULT_Q 120
  1654. #define SGEMM_DEFAULT_R 12288
  1655. #define DGEMM_DEFAULT_R 8192
  1656. #define CGEMM_DEFAULT_R 4096
  1657. #define ZGEMM_DEFAULT_R 4096
  1658. #define SYMV_P 16
  1659. #endif
  1660. #ifdef GENERIC
  1661. #define SNUMOPT 2
  1662. #define DNUMOPT 2
  1663. #define GEMM_DEFAULT_OFFSET_A 0
  1664. #define GEMM_DEFAULT_OFFSET_B 0
  1665. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1666. #define SGEMM_DEFAULT_UNROLL_N 2
  1667. #define DGEMM_DEFAULT_UNROLL_N 2
  1668. #define QGEMM_DEFAULT_UNROLL_N 2
  1669. #define CGEMM_DEFAULT_UNROLL_N 2
  1670. #define ZGEMM_DEFAULT_UNROLL_N 2
  1671. #define XGEMM_DEFAULT_UNROLL_N 1
  1672. #ifdef ARCH_X86
  1673. #define SGEMM_DEFAULT_UNROLL_M 2
  1674. #define DGEMM_DEFAULT_UNROLL_M 2
  1675. #define QGEMM_DEFAULT_UNROLL_M 2
  1676. #define CGEMM_DEFAULT_UNROLL_M 2
  1677. #define ZGEMM_DEFAULT_UNROLL_M 2
  1678. #define XGEMM_DEFAULT_UNROLL_M 1
  1679. #else
  1680. #define SGEMM_DEFAULT_UNROLL_M 2
  1681. #define DGEMM_DEFAULT_UNROLL_M 2
  1682. #define QGEMM_DEFAULT_UNROLL_M 2
  1683. #define CGEMM_DEFAULT_UNROLL_M 2
  1684. #define ZGEMM_DEFAULT_UNROLL_M 2
  1685. #define XGEMM_DEFAULT_UNROLL_M 1
  1686. #endif
  1687. #define SGEMM_DEFAULT_P sgemm_p
  1688. #define DGEMM_DEFAULT_P dgemm_p
  1689. #define QGEMM_DEFAULT_P qgemm_p
  1690. #define CGEMM_DEFAULT_P cgemm_p
  1691. #define ZGEMM_DEFAULT_P zgemm_p
  1692. #define XGEMM_DEFAULT_P xgemm_p
  1693. #define SGEMM_DEFAULT_R sgemm_r
  1694. #define DGEMM_DEFAULT_R dgemm_r
  1695. #define QGEMM_DEFAULT_R qgemm_r
  1696. #define CGEMM_DEFAULT_R cgemm_r
  1697. #define ZGEMM_DEFAULT_R zgemm_r
  1698. #define XGEMM_DEFAULT_R xgemm_r
  1699. #define SGEMM_DEFAULT_Q 128
  1700. #define DGEMM_DEFAULT_Q 128
  1701. #define QGEMM_DEFAULT_Q 128
  1702. #define CGEMM_DEFAULT_Q 128
  1703. #define ZGEMM_DEFAULT_Q 128
  1704. #define XGEMM_DEFAULT_Q 128
  1705. #define SYMV_P 16
  1706. #endif
  1707. #ifndef QGEMM_DEFAULT_UNROLL_M
  1708. #define QGEMM_DEFAULT_UNROLL_M 2
  1709. #endif
  1710. #ifndef QGEMM_DEFAULT_UNROLL_N
  1711. #define QGEMM_DEFAULT_UNROLL_N 2
  1712. #endif
  1713. #ifndef XGEMM_DEFAULT_UNROLL_M
  1714. #define XGEMM_DEFAULT_UNROLL_M 2
  1715. #endif
  1716. #ifndef XGEMM_DEFAULT_UNROLL_N
  1717. #define XGEMM_DEFAULT_UNROLL_N 2
  1718. #endif
  1719. #ifndef HAVE_SSE2
  1720. #define SHUFPD_0 shufps $0x44,
  1721. #define SHUFPD_1 shufps $0x4e,
  1722. #define SHUFPD_2 shufps $0xe4,
  1723. #define SHUFPD_3 shufps $0xee,
  1724. #endif
  1725. #ifndef SHUFPD_0
  1726. #define SHUFPD_0 shufpd $0,
  1727. #endif
  1728. #ifndef SHUFPD_1
  1729. #define SHUFPD_1 shufpd $1,
  1730. #endif
  1731. #ifndef SHUFPD_2
  1732. #define SHUFPD_2 shufpd $2,
  1733. #endif
  1734. #ifndef SHUFPD_3
  1735. #define SHUFPD_3 shufpd $3,
  1736. #endif
  1737. #ifndef SHUFPS_39
  1738. #define SHUFPS_39 shufps $0x39,
  1739. #endif
  1740. #endif