You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

testutil.cpp 49 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include "testutil.h"
  15. #include "cpu.h"
  16. #include "layer.h"
  17. #include "mat.h"
  18. #include "prng.h"
  19. #include <limits.h>
  20. #include <stdio.h>
  21. #include <stdlib.h>
  22. #if NCNN_VULKAN
  23. #include "command.h"
  24. #include "gpu.h"
  25. #endif // NCNN_VULKAN
  26. static struct prng_rand_t g_prng_rand_state;
  27. void SRAND(int seed)
  28. {
  29. prng_srand(seed, &g_prng_rand_state);
  30. }
  31. uint64_t RAND()
  32. {
  33. return prng_rand(&g_prng_rand_state);
  34. }
  35. float RandomFloat(float a, float b)
  36. {
  37. float random = ((float)RAND()) / (float)uint64_t(-1); //RAND_MAX;
  38. float diff = b - a;
  39. float r = random * diff;
  40. float v = a + r;
  41. // generate denormal as zero
  42. if (v < 0.0001 && v > -0.0001)
  43. v = 0.f;
  44. return v;
  45. }
  46. int RandomInt(int a, int b)
  47. {
  48. float random = ((float)RAND()) / (float)uint64_t(-1); //RAND_MAX;
  49. int diff = b - a;
  50. float r = random * diff;
  51. return a + (int)r;
  52. }
  53. signed char RandomS8()
  54. {
  55. return (signed char)RandomInt(-127, 127);
  56. }
  57. void Randomize(ncnn::Mat& m, float a, float b)
  58. {
  59. for (size_t i = 0; i < m.total(); i++)
  60. {
  61. m[i] = RandomFloat(a, b);
  62. }
  63. }
  64. void RandomizeInt(ncnn::Mat& m, int a, int b)
  65. {
  66. for (size_t i = 0; i < m.total(); i++)
  67. {
  68. ((int*)m)[i] = RandomInt(a, b);
  69. }
  70. }
  71. void RandomizeS8(ncnn::Mat& m)
  72. {
  73. for (size_t i = 0; i < m.total(); i++)
  74. {
  75. ((signed char*)m)[i] = RandomS8();
  76. }
  77. }
  78. ncnn::Mat RandomMat(int w, float a, float b)
  79. {
  80. ncnn::Mat m(w);
  81. Randomize(m, a, b);
  82. return m;
  83. }
  84. ncnn::Mat RandomMat(int w, int h, float a, float b)
  85. {
  86. ncnn::Mat m(w, h);
  87. Randomize(m, a, b);
  88. return m;
  89. }
  90. ncnn::Mat RandomMat(int w, int h, int c, float a, float b)
  91. {
  92. ncnn::Mat m(w, h, c);
  93. Randomize(m, a, b);
  94. return m;
  95. }
  96. ncnn::Mat RandomMat(int w, int h, int d, int c, float a, float b)
  97. {
  98. ncnn::Mat m(w, h, d, c);
  99. Randomize(m, a, b);
  100. return m;
  101. }
  102. ncnn::Mat RandomIntMat(int w)
  103. {
  104. ncnn::Mat m(w);
  105. RandomizeInt(m);
  106. return m;
  107. }
  108. ncnn::Mat RandomIntMat(int w, int h)
  109. {
  110. ncnn::Mat m(w, h);
  111. RandomizeInt(m);
  112. return m;
  113. }
  114. ncnn::Mat RandomIntMat(int w, int h, int c)
  115. {
  116. ncnn::Mat m(w, h, c);
  117. RandomizeInt(m);
  118. return m;
  119. }
  120. ncnn::Mat RandomIntMat(int w, int h, int d, int c)
  121. {
  122. ncnn::Mat m(w, h, d, c);
  123. RandomizeInt(m);
  124. return m;
  125. }
  126. ncnn::Mat RandomS8Mat(int w)
  127. {
  128. ncnn::Mat m(w, (size_t)1u);
  129. RandomizeS8(m);
  130. return m;
  131. }
  132. ncnn::Mat RandomS8Mat(int w, int h)
  133. {
  134. ncnn::Mat m(w, h, (size_t)1u);
  135. RandomizeS8(m);
  136. return m;
  137. }
  138. ncnn::Mat RandomS8Mat(int w, int h, int c)
  139. {
  140. ncnn::Mat m(w, h, c, (size_t)1u);
  141. RandomizeS8(m);
  142. return m;
  143. }
  144. ncnn::Mat RandomS8Mat(int w, int h, int d, int c)
  145. {
  146. ncnn::Mat m(w, h, d, c, (size_t)1u);
  147. RandomizeS8(m);
  148. return m;
  149. }
  150. ncnn::Mat scales_mat(const ncnn::Mat& mat, int m, int k, int ldx)
  151. {
  152. ncnn::Mat weight_scales(m);
  153. for (int i = 0; i < m; ++i)
  154. {
  155. float min = mat[0], _max = mat[0];
  156. const float* ptr = (const float*)(mat.data) + i * ldx;
  157. for (int j = 0; j < k; ++j)
  158. {
  159. if (min > ptr[j])
  160. {
  161. min = ptr[j];
  162. }
  163. if (_max < ptr[j])
  164. {
  165. _max = ptr[j];
  166. }
  167. }
  168. const float abs_min = abs(min), abs_max = abs(_max);
  169. weight_scales[i] = 127.f / (abs_min > abs_max ? abs_min : abs_max);
  170. }
  171. return weight_scales;
  172. }
  173. bool NearlyEqual(float a, float b, float epsilon)
  174. {
  175. if (a == b)
  176. return true;
  177. float diff = (float)fabs(a - b);
  178. if (diff <= epsilon)
  179. return true;
  180. // relative error
  181. return diff < epsilon * std::max(fabs(a), fabs(b));
  182. }
  183. int Compare(const ncnn::Mat& a, const ncnn::Mat& b, float epsilon)
  184. {
  185. #define CHECK_MEMBER(m) \
  186. if (a.m != b.m) \
  187. { \
  188. fprintf(stderr, #m " not match expect %d but got %d\n", (int)a.m, (int)b.m); \
  189. return -1; \
  190. }
  191. CHECK_MEMBER(dims)
  192. CHECK_MEMBER(w)
  193. CHECK_MEMBER(h)
  194. CHECK_MEMBER(d)
  195. CHECK_MEMBER(c)
  196. CHECK_MEMBER(elemsize)
  197. CHECK_MEMBER(elempack)
  198. #undef CHECK_MEMBER
  199. for (int q = 0; q < a.c; q++)
  200. {
  201. const ncnn::Mat ma = a.channel(q);
  202. const ncnn::Mat mb = b.channel(q);
  203. for (int z = 0; z < a.d; z++)
  204. {
  205. const ncnn::Mat da = ma.depth(z);
  206. const ncnn::Mat db = mb.depth(z);
  207. for (int i = 0; i < a.h; i++)
  208. {
  209. const float* pa = da.row(i);
  210. const float* pb = db.row(i);
  211. for (int j = 0; j < a.w; j++)
  212. {
  213. if (!NearlyEqual(pa[j], pb[j], epsilon))
  214. {
  215. fprintf(stderr, "value not match at c:%d d:%d h:%d w:%d expect %f but got %f\n", q, z, i, j, pa[j], pb[j]);
  216. return -1;
  217. }
  218. }
  219. }
  220. }
  221. }
  222. return 0;
  223. }
  224. int CompareMat(const ncnn::Mat& a, const ncnn::Mat& b, float epsilon)
  225. {
  226. ncnn::Option opt;
  227. opt.num_threads = 1;
  228. if (a.elempack != 1)
  229. {
  230. ncnn::Mat a1;
  231. ncnn::convert_packing(a, a1, 1, opt);
  232. return CompareMat(a1, b, epsilon);
  233. }
  234. if (b.elempack != 1)
  235. {
  236. ncnn::Mat b1;
  237. ncnn::convert_packing(b, b1, 1, opt);
  238. return CompareMat(a, b1, epsilon);
  239. }
  240. if (a.elemsize == 2u)
  241. {
  242. ncnn::Mat a32;
  243. cast_float16_to_float32(a, a32, opt);
  244. return CompareMat(a32, b, epsilon);
  245. }
  246. if (a.elemsize == 1u)
  247. {
  248. ncnn::Mat a32;
  249. cast_int8_to_float32(a, a32, opt);
  250. return CompareMat(a32, b, epsilon);
  251. }
  252. if (b.elemsize == 2u)
  253. {
  254. ncnn::Mat b32;
  255. cast_float16_to_float32(b, b32, opt);
  256. return CompareMat(a, b32, epsilon);
  257. }
  258. if (b.elemsize == 1u)
  259. {
  260. ncnn::Mat b32;
  261. cast_int8_to_float32(b, b32, opt);
  262. return CompareMat(a, b32, epsilon);
  263. }
  264. return Compare(a, b, epsilon);
  265. }
  266. int CompareMat(const std::vector<ncnn::Mat>& a, const std::vector<ncnn::Mat>& b, float epsilon)
  267. {
  268. if (a.size() != b.size())
  269. {
  270. fprintf(stderr, "output blob count not match %zu %zu\n", a.size(), b.size());
  271. return -1;
  272. }
  273. for (size_t i = 0; i < a.size(); i++)
  274. {
  275. if (CompareMat(a[i], b[i], epsilon))
  276. {
  277. fprintf(stderr, "output blob %zu not match\n", i);
  278. return -1;
  279. }
  280. }
  281. return 0;
  282. }
  283. static int convert_to_optimal_layout(const ncnn::Mat& a, ncnn::Mat& a4, const ncnn::Option& opt, const ncnn::Layer* op, int flag)
  284. {
  285. // clang-format off
  286. // *INDENT-OFF*
  287. #if NCNN_ARM82
  288. if (opt.use_fp16_storage && ncnn::cpu_support_arm_asimdhp() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
  289. {
  290. ncnn::cast_float32_to_float16(a, a4, opt);
  291. }
  292. else
  293. #endif // NCNN_ARM82
  294. #if NCNN_VFPV4
  295. if (opt.use_fp16_storage && !opt.use_bf16_storage && ncnn::cpu_support_arm_vfpv4() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
  296. {
  297. ncnn::cast_float32_to_float16(a, a4, opt);
  298. }
  299. else
  300. #endif // NCNN_VFPV4
  301. #if NCNN_RVV
  302. if (opt.use_fp16_storage && ncnn::cpu_support_riscv_v() && ncnn::cpu_support_riscv_zfh() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
  303. {
  304. ncnn::cast_float32_to_float16(a, a4, opt);
  305. }
  306. else
  307. #endif // NCNN_RVV
  308. #if NCNN_BF16
  309. if (opt.use_bf16_storage && op->support_bf16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
  310. {
  311. ncnn::cast_float32_to_bfloat16(a, a4, opt);
  312. }
  313. else
  314. #endif // NCNN_BF16
  315. if (opt.use_fp16_storage && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
  316. {
  317. ncnn::cast_float32_to_float16(a, a4, opt);
  318. }
  319. else
  320. {
  321. a4 = a;
  322. }
  323. // *INDENT-ON*
  324. // clang-format on
  325. if (opt.use_packing_layout && op->support_packing && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_PACKING))
  326. {
  327. // resolve dst_elempack
  328. int dims = a4.dims;
  329. int elemcount = 0;
  330. if (dims == 1) elemcount = a4.elempack * a4.w;
  331. if (dims == 2) elemcount = a4.elempack * a4.h;
  332. if (dims == 3 || dims == 4) elemcount = a4.elempack * a4.c;
  333. int elembits = a4.elembits();
  334. int dst_elempack = 1;
  335. if (elembits == 32)
  336. {
  337. #if NCNN_AVX512
  338. if (elemcount % 16 == 0 && ncnn::cpu_support_x86_avx512())
  339. dst_elempack = 16;
  340. else if (elemcount % 8 == 0 && ncnn::cpu_support_x86_avx())
  341. dst_elempack = 8;
  342. else if (elemcount % 4 == 0)
  343. dst_elempack = 4;
  344. #elif NCNN_AVX
  345. if (elemcount % 8 == 0 && ncnn::cpu_support_x86_avx())
  346. dst_elempack = 8;
  347. else if (elemcount % 4 == 0)
  348. dst_elempack = 4;
  349. #elif NCNN_RVV
  350. const int packn = ncnn::cpu_riscv_vlenb() / (elembits / 8);
  351. if (elemcount % packn == 0)
  352. dst_elempack = packn;
  353. #else
  354. if (elemcount % 4 == 0)
  355. dst_elempack = 4;
  356. #endif
  357. }
  358. if (elembits == 16)
  359. {
  360. #if NCNN_ARM82
  361. if (elemcount % 8 == 0 && ncnn::cpu_support_arm_asimdhp() && opt.use_fp16_arithmetic && op->support_fp16_storage)
  362. dst_elempack = 8;
  363. else if (elemcount % 4 == 0)
  364. dst_elempack = 4;
  365. #elif NCNN_RVV
  366. const int packn = ncnn::cpu_riscv_vlenb() / 2;
  367. if (elemcount % packn == 0)
  368. dst_elempack = packn;
  369. #else
  370. if (elemcount % 4 == 0)
  371. dst_elempack = 4;
  372. #endif
  373. }
  374. if (elembits == 8)
  375. {
  376. #if NCNN_RVV
  377. const int packn = ncnn::cpu_riscv_vlenb() / 1;
  378. if (elemcount % packn == 0)
  379. dst_elempack = packn;
  380. #else
  381. if (elemcount % 8 == 0)
  382. dst_elempack = 8;
  383. #endif
  384. }
  385. if (flag & TEST_LAYER_ENABLE_FORCE_INPUT_PACK8)
  386. dst_elempack = 8;
  387. ncnn::Mat a4_packed;
  388. ncnn::convert_packing(a4, a4_packed, dst_elempack, opt);
  389. a4 = a4_packed;
  390. }
  391. return 0;
  392. }
  393. static int convert_to_vanilla_layout(const ncnn::Mat& c4, ncnn::Mat& c, const ncnn::Option& opt, const ncnn::Layer* op, int flag)
  394. {
  395. ncnn::Mat c4_unpacked;
  396. if (c4.elempack != 1)
  397. {
  398. ncnn::convert_packing(c4, c4_unpacked, 1, opt);
  399. }
  400. else
  401. {
  402. c4_unpacked = c4;
  403. }
  404. // clang-format off
  405. // *INDENT-OFF*
  406. #if NCNN_ARM82
  407. if (opt.use_fp16_storage && ncnn::cpu_support_arm_asimdhp() && op->support_fp16_storage && c4_unpacked.elembits() == 16)
  408. {
  409. ncnn::cast_float16_to_float32(c4_unpacked, c, opt);
  410. }
  411. else
  412. #endif // NCNN_ARM82
  413. #if NCNN_VFPV4
  414. if (opt.use_fp16_storage && !opt.use_bf16_storage && ncnn::cpu_support_arm_vfpv4() && op->support_fp16_storage && c4_unpacked.elembits() == 16)
  415. {
  416. ncnn::cast_float16_to_float32(c4_unpacked, c, opt);
  417. }
  418. else
  419. #endif // NCNN_VFPV4
  420. #if NCNN_RVV
  421. if (opt.use_fp16_storage && ncnn::cpu_support_riscv_v() && ncnn::cpu_support_riscv_zfh() && op->support_fp16_storage && c4_unpacked.elembits() == 16)
  422. {
  423. ncnn::cast_float16_to_float32(c4_unpacked, c, opt);
  424. }
  425. else
  426. #endif // NCNN_RVV
  427. #if NCNN_BF16
  428. if (opt.use_bf16_storage && op->support_bf16_storage && c4_unpacked.elembits() == 16)
  429. {
  430. ncnn::cast_bfloat16_to_float32(c4_unpacked, c, opt);
  431. }
  432. else
  433. #endif // NCNN_BF16
  434. if (opt.use_fp16_storage && op->support_fp16_storage && c4_unpacked.elembits() == 16)
  435. {
  436. ncnn::cast_float16_to_float32(c4_unpacked, c, opt);
  437. }
  438. else
  439. {
  440. c = c4_unpacked;
  441. }
  442. // *INDENT-ON*
  443. // clang-format on
  444. return 0;
  445. }
  446. int test_layer_naive(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const std::vector<ncnn::Mat>& a, int top_blob_count, std::vector<ncnn::Mat>& b, void (*func)(ncnn::Layer*), int flag)
  447. {
  448. ncnn::Layer* op = ncnn::create_layer_naive(typeindex);
  449. if (func)
  450. {
  451. (*func)((ncnn::Layer*)op);
  452. }
  453. op->load_param(pd);
  454. if (op->one_blob_only && a.size() != 1)
  455. {
  456. fprintf(stderr, "layer with one_blob_only but consume multiple inputs\n");
  457. delete op;
  458. return -1;
  459. }
  460. ncnn::ModelBinFromMatArray mb(weights.data());
  461. op->load_model(mb);
  462. ncnn::Option opt;
  463. opt.num_threads = 1;
  464. opt.lightmode = false;
  465. opt.use_packing_layout = false;
  466. opt.use_fp16_packed = false;
  467. opt.use_fp16_storage = false;
  468. opt.use_fp16_arithmetic = false;
  469. opt.use_shader_pack8 = false;
  470. opt.use_image_storage = false;
  471. opt.use_bf16_storage = false;
  472. opt.use_vulkan_compute = false;
  473. op->create_pipeline(opt);
  474. b.resize(top_blob_count);
  475. if (op->support_inplace)
  476. {
  477. for (size_t i = 0; i < a.size(); i++)
  478. {
  479. b[i] = a[i].clone();
  480. }
  481. op->forward_inplace(b, opt);
  482. }
  483. else
  484. {
  485. op->forward(a, b, opt);
  486. }
  487. op->destroy_pipeline(opt);
  488. delete op;
  489. return 0;
  490. }
  491. int test_layer_cpu(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Option& _opt, const std::vector<ncnn::Mat>& a, int top_blob_count, std::vector<ncnn::Mat>& c, const std::vector<ncnn::Mat>& top_shapes, void (*func)(ncnn::Layer*), int flag)
  492. {
  493. ncnn::Layer* op = ncnn::create_layer_cpu(typeindex);
  494. if (!op->support_packing && _opt.use_packing_layout)
  495. {
  496. delete op;
  497. return 233;
  498. }
  499. if (!op->support_bf16_storage && !op->support_fp16_storage && (_opt.use_bf16_storage || _opt.use_fp16_arithmetic))
  500. {
  501. delete op;
  502. return 233;
  503. }
  504. if (func)
  505. {
  506. (*func)((ncnn::Layer*)op);
  507. }
  508. if (!top_shapes.empty())
  509. {
  510. op->bottom_shapes = a;
  511. op->top_shapes = top_shapes;
  512. }
  513. op->load_param(pd);
  514. if (op->one_blob_only && a.size() != 1)
  515. {
  516. fprintf(stderr, "layer with one_blob_only but consume multiple inputs\n");
  517. delete op;
  518. return -1;
  519. }
  520. ncnn::ModelBinFromMatArray mb(weights.data());
  521. op->load_model(mb);
  522. ncnn::Option opt = _opt;
  523. opt.num_threads = 1;
  524. opt.use_vulkan_compute = false;
  525. op->create_pipeline(opt);
  526. if (!op->support_packing && _opt.use_packing_layout)
  527. {
  528. op->destroy_pipeline(opt);
  529. delete op;
  530. return 233;
  531. }
  532. if (!op->support_bf16_storage && !op->support_fp16_storage && (_opt.use_bf16_storage || _opt.use_fp16_arithmetic))
  533. {
  534. op->destroy_pipeline(opt);
  535. delete op;
  536. return 233;
  537. }
  538. std::vector<ncnn::Mat> a4(a.size());
  539. for (size_t i = 0; i < a4.size(); i++)
  540. {
  541. convert_to_optimal_layout(a[i], a4[i], opt, op, flag);
  542. }
  543. c.resize(top_blob_count);
  544. if (op->support_inplace)
  545. {
  546. for (size_t i = 0; i < a4.size(); i++)
  547. {
  548. c[i] = a4[i].clone();
  549. }
  550. op->forward_inplace(c, opt);
  551. }
  552. else
  553. {
  554. op->forward(a4, c, opt);
  555. }
  556. for (size_t i = 0; i < c.size(); i++)
  557. {
  558. convert_to_vanilla_layout(c[i], c[i], opt, op, flag);
  559. }
  560. op->destroy_pipeline(opt);
  561. delete op;
  562. return 0;
  563. }
  564. #if NCNN_VULKAN
  565. int test_layer_gpu(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Option& _opt, const std::vector<ncnn::Mat>& a, int top_blob_count, std::vector<ncnn::Mat>& d, const std::vector<ncnn::Mat>& top_shapes, void (*func)(ncnn::Layer*), int flag)
  566. {
  567. if (!_opt.use_packing_layout)
  568. {
  569. // pack1 test is useless for gpu
  570. return 233;
  571. }
  572. ncnn::Layer* op = ncnn::create_layer_vulkan(typeindex);
  573. if (!op)
  574. {
  575. return 233;
  576. }
  577. op->load_param(pd);
  578. if (!op->support_vulkan)
  579. {
  580. delete op;
  581. return 233;
  582. }
  583. ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
  584. op->vkdev = vkdev;
  585. if (func)
  586. {
  587. (*func)((ncnn::Layer*)op);
  588. }
  589. if (!top_shapes.empty())
  590. {
  591. op->bottom_shapes = a;
  592. op->top_shapes = top_shapes;
  593. }
  594. if (op->one_blob_only && a.size() != 1)
  595. {
  596. fprintf(stderr, "layer with one_blob_only but consume multiple inputs\n");
  597. delete op;
  598. return -1;
  599. }
  600. ncnn::ModelBinFromMatArray mb(weights.data());
  601. op->load_model(mb);
  602. ncnn::VkWeightAllocator g_weight_vkallocator(vkdev);
  603. ncnn::VkWeightStagingAllocator g_weight_staging_vkallocator(vkdev);
  604. ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
  605. ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
  606. ncnn::Option opt = _opt;
  607. opt.num_threads = 1;
  608. opt.use_vulkan_compute = true;
  609. #if __APPLE__
  610. opt.use_image_storage = false;
  611. #endif
  612. opt.blob_vkallocator = blob_vkallocator;
  613. opt.workspace_vkallocator = blob_vkallocator;
  614. opt.staging_vkallocator = staging_vkallocator;
  615. if (!vkdev->info.support_fp16_packed()) opt.use_fp16_packed = false;
  616. if (!vkdev->info.support_fp16_storage()) opt.use_fp16_storage = false;
  617. if (!vkdev->info.support_fp16_uniform()) opt.use_fp16_uniform = false;
  618. if (!vkdev->info.support_fp16_arithmetic()) opt.use_fp16_arithmetic = false;
  619. if (!vkdev->info.support_int8_packed()) opt.use_int8_packed = false;
  620. if (!vkdev->info.support_int8_storage()) opt.use_int8_storage = false;
  621. if (!vkdev->info.support_int8_uniform()) opt.use_int8_uniform = false;
  622. if (!vkdev->info.support_int8_arithmetic()) opt.use_int8_arithmetic = false;
  623. if (!vkdev->info.support_cooperative_matrix()) opt.use_cooperative_matrix = false;
  624. // FIXME fp16a may produce large error
  625. opt.use_fp16_arithmetic = false;
  626. op->create_pipeline(opt);
  627. if (!op->support_vulkan)
  628. {
  629. op->destroy_pipeline(opt);
  630. delete op;
  631. return 233;
  632. }
  633. {
  634. ncnn::VkTransfer cmd(vkdev);
  635. ncnn::Option opt_upload = opt;
  636. opt_upload.blob_vkallocator = &g_weight_vkallocator;
  637. opt_upload.workspace_vkallocator = &g_weight_vkallocator;
  638. opt_upload.staging_vkallocator = &g_weight_staging_vkallocator;
  639. op->upload_model(cmd, opt_upload);
  640. cmd.submit_and_wait();
  641. }
  642. d.resize(top_blob_count);
  643. {
  644. // forward
  645. ncnn::VkCompute cmd(vkdev);
  646. if (op->support_image_storage && opt.use_image_storage)
  647. {
  648. // upload
  649. std::vector<ncnn::VkImageMat> a_gpu(a.size());
  650. for (size_t i = 0; i < a_gpu.size(); i++)
  651. {
  652. cmd.record_upload(a[i], a_gpu[i], opt);
  653. }
  654. std::vector<ncnn::VkImageMat> d_gpu(top_blob_count);
  655. if (op->support_inplace)
  656. {
  657. op->forward_inplace(a_gpu, cmd, opt);
  658. d_gpu = a_gpu;
  659. }
  660. else
  661. {
  662. op->forward(a_gpu, d_gpu, cmd, opt);
  663. }
  664. // download
  665. for (size_t i = 0; i < d_gpu.size(); i++)
  666. {
  667. cmd.record_download(d_gpu[i], d[i], opt);
  668. }
  669. }
  670. else
  671. {
  672. // upload
  673. std::vector<ncnn::VkMat> a_gpu(a.size());
  674. for (size_t i = 0; i < a_gpu.size(); i++)
  675. {
  676. cmd.record_upload(a[i], a_gpu[i], opt);
  677. }
  678. std::vector<ncnn::VkMat> d_gpu(top_blob_count);
  679. if (op->support_inplace)
  680. {
  681. op->forward_inplace(a_gpu, cmd, opt);
  682. d_gpu = a_gpu;
  683. }
  684. else
  685. {
  686. op->forward(a_gpu, d_gpu, cmd, opt);
  687. }
  688. // download
  689. for (size_t i = 0; i < d_gpu.size(); i++)
  690. {
  691. cmd.record_download(d_gpu[i], d[i], opt);
  692. }
  693. }
  694. cmd.submit_and_wait();
  695. }
  696. op->destroy_pipeline(opt);
  697. delete op;
  698. vkdev->reclaim_blob_allocator(blob_vkallocator);
  699. vkdev->reclaim_staging_allocator(staging_vkallocator);
  700. g_weight_vkallocator.clear();
  701. g_weight_staging_vkallocator.clear();
  702. return 0;
  703. }
  704. #endif // NCNN_VULKAN
  705. int test_layer(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Option& _opt, const std::vector<ncnn::Mat>& a, int top_blob_count, const std::vector<ncnn::Mat>& top_shapes, float epsilon, void (*func)(ncnn::Layer*), int flag)
  706. {
  707. // naive
  708. std::vector<ncnn::Mat> b;
  709. {
  710. int ret = test_layer_naive(typeindex, pd, weights, a, top_blob_count, b, func, flag);
  711. if (ret != 233 && ret != 0)
  712. {
  713. fprintf(stderr, "test_layer_naive failed\n");
  714. return -1;
  715. }
  716. }
  717. // cpu
  718. {
  719. std::vector<ncnn::Mat> c;
  720. int ret = test_layer_cpu(typeindex, pd, weights, _opt, a, top_blob_count, c, std::vector<ncnn::Mat>(), func, flag);
  721. if (ret != 233 && (ret != 0 || CompareMat(b, c, epsilon) != 0))
  722. {
  723. fprintf(stderr, "test_layer_cpu failed\n");
  724. return -1;
  725. }
  726. }
  727. // cpu shape hint
  728. {
  729. std::vector<ncnn::Mat> c;
  730. int ret = test_layer_cpu(typeindex, pd, weights, _opt, a, top_blob_count, c, b, func, flag);
  731. if (ret != 233 && (ret != 0 || CompareMat(b, c, epsilon) != 0))
  732. {
  733. fprintf(stderr, "test_layer_cpu failed with shape hint\n");
  734. return -1;
  735. }
  736. }
  737. #if NCNN_VULKAN
  738. // gpu
  739. if (!(flag & TEST_LAYER_DISABLE_GPU_TESTING))
  740. {
  741. std::vector<ncnn::Mat> d;
  742. int ret = test_layer_gpu(typeindex, pd, weights, _opt, a, top_blob_count, d, std::vector<ncnn::Mat>(), func, flag);
  743. if (ret != 233 && (ret != 0 || CompareMat(b, d, epsilon) != 0))
  744. {
  745. fprintf(stderr, "test_layer_gpu failed\n");
  746. return -1;
  747. }
  748. }
  749. // gpu shape hint
  750. if (!(flag & TEST_LAYER_DISABLE_GPU_TESTING))
  751. {
  752. std::vector<ncnn::Mat> d;
  753. int ret = test_layer_gpu(typeindex, pd, weights, _opt, a, top_blob_count, d, b, func, flag);
  754. if (ret != 233 && (ret != 0 || CompareMat(b, d, epsilon) != 0))
  755. {
  756. fprintf(stderr, "test_layer_gpu failed with shape hint\n");
  757. return -1;
  758. }
  759. }
  760. #endif // NCNN_VULKAN
  761. return 0;
  762. }
  763. int test_layer_naive(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Mat& a, ncnn::Mat& b, void (*func)(ncnn::Layer*), int flag)
  764. {
  765. ncnn::Layer* op = ncnn::create_layer_naive(typeindex);
  766. if (func)
  767. {
  768. (*func)((ncnn::Layer*)op);
  769. }
  770. op->load_param(pd);
  771. ncnn::ModelBinFromMatArray mb(weights.data());
  772. op->load_model(mb);
  773. ncnn::Option opt;
  774. opt.num_threads = 1;
  775. opt.lightmode = false;
  776. opt.use_packing_layout = false;
  777. opt.use_fp16_packed = false;
  778. opt.use_fp16_storage = false;
  779. opt.use_fp16_arithmetic = false;
  780. opt.use_shader_pack8 = false;
  781. opt.use_image_storage = false;
  782. opt.use_bf16_storage = false;
  783. opt.use_vulkan_compute = false;
  784. op->create_pipeline(opt);
  785. if (op->support_inplace)
  786. {
  787. b = a.clone();
  788. op->forward_inplace(b, opt);
  789. }
  790. else
  791. {
  792. op->forward(a, b, opt);
  793. }
  794. op->destroy_pipeline(opt);
  795. delete op;
  796. return 0;
  797. }
  798. int test_layer_cpu(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Option& _opt, const ncnn::Mat& a, ncnn::Mat& c, const ncnn::Mat& top_shape, void (*func)(ncnn::Layer*), int flag)
  799. {
  800. ncnn::Layer* op = ncnn::create_layer_cpu(typeindex);
  801. if (!op->support_packing && _opt.use_packing_layout)
  802. {
  803. delete op;
  804. return 233;
  805. }
  806. if (!op->support_bf16_storage && !op->support_fp16_storage && (_opt.use_bf16_storage || _opt.use_fp16_arithmetic))
  807. {
  808. delete op;
  809. return 233;
  810. }
  811. if (func)
  812. {
  813. (*func)((ncnn::Layer*)op);
  814. }
  815. if (top_shape.dims)
  816. {
  817. op->bottom_shapes.resize(1);
  818. op->top_shapes.resize(1);
  819. op->bottom_shapes[0] = a;
  820. op->top_shapes[0] = top_shape;
  821. }
  822. op->load_param(pd);
  823. ncnn::ModelBinFromMatArray mb(weights.data());
  824. op->load_model(mb);
  825. ncnn::Option opt = _opt;
  826. opt.num_threads = 1;
  827. opt.use_vulkan_compute = false;
  828. op->create_pipeline(opt);
  829. if (!op->support_packing && _opt.use_packing_layout)
  830. {
  831. op->destroy_pipeline(opt);
  832. delete op;
  833. return 233;
  834. }
  835. if (!op->support_bf16_storage && !op->support_fp16_storage && (_opt.use_bf16_storage || _opt.use_fp16_arithmetic))
  836. {
  837. op->destroy_pipeline(opt);
  838. delete op;
  839. return 233;
  840. }
  841. ncnn::Mat a4;
  842. convert_to_optimal_layout(a, a4, opt, op, flag);
  843. if (op->support_inplace)
  844. {
  845. c = a4.clone();
  846. op->forward_inplace(c, opt);
  847. }
  848. else
  849. {
  850. op->forward(a4, c, opt);
  851. }
  852. convert_to_vanilla_layout(c, c, opt, op, flag);
  853. op->destroy_pipeline(opt);
  854. delete op;
  855. return 0;
  856. }
  857. #if NCNN_VULKAN
  858. int test_layer_gpu(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Option& _opt, const ncnn::Mat& a, ncnn::Mat& d, const ncnn::Mat& top_shape, void (*func)(ncnn::Layer*), int flag)
  859. {
  860. if (!_opt.use_packing_layout)
  861. {
  862. // pack1 test is useless for gpu
  863. return 233;
  864. }
  865. ncnn::Layer* op = ncnn::create_layer_vulkan(typeindex);
  866. if (!op)
  867. {
  868. return 233;
  869. }
  870. op->load_param(pd);
  871. if (!op->support_vulkan)
  872. {
  873. delete op;
  874. return 233;
  875. }
  876. ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
  877. op->vkdev = vkdev;
  878. if (func)
  879. {
  880. (*func)((ncnn::Layer*)op);
  881. }
  882. if (top_shape.dims)
  883. {
  884. op->bottom_shapes.resize(1);
  885. op->top_shapes.resize(1);
  886. op->bottom_shapes[0] = a;
  887. op->top_shapes[0] = top_shape;
  888. }
  889. ncnn::ModelBinFromMatArray mb(weights.data());
  890. op->load_model(mb);
  891. ncnn::VkWeightAllocator g_weight_vkallocator(vkdev);
  892. ncnn::VkWeightStagingAllocator g_weight_staging_vkallocator(vkdev);
  893. ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
  894. ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
  895. ncnn::Option opt = _opt;
  896. opt.num_threads = 1;
  897. opt.use_vulkan_compute = true;
  898. #if __APPLE__
  899. opt.use_image_storage = false;
  900. #endif
  901. opt.blob_vkallocator = blob_vkallocator;
  902. opt.workspace_vkallocator = blob_vkallocator;
  903. opt.staging_vkallocator = staging_vkallocator;
  904. if (!vkdev->info.support_fp16_packed()) opt.use_fp16_packed = false;
  905. if (!vkdev->info.support_fp16_storage()) opt.use_fp16_storage = false;
  906. if (!vkdev->info.support_fp16_uniform()) opt.use_fp16_uniform = false;
  907. if (!vkdev->info.support_fp16_arithmetic()) opt.use_fp16_arithmetic = false;
  908. if (!vkdev->info.support_int8_packed()) opt.use_int8_packed = false;
  909. if (!vkdev->info.support_int8_storage()) opt.use_int8_storage = false;
  910. if (!vkdev->info.support_int8_uniform()) opt.use_int8_uniform = false;
  911. if (!vkdev->info.support_int8_arithmetic()) opt.use_int8_arithmetic = false;
  912. if (!vkdev->info.support_cooperative_matrix()) opt.use_cooperative_matrix = false;
  913. // FIXME fp16a may produce large error
  914. opt.use_fp16_arithmetic = false;
  915. op->create_pipeline(opt);
  916. if (!op->support_vulkan)
  917. {
  918. op->destroy_pipeline(opt);
  919. delete op;
  920. return 233;
  921. }
  922. {
  923. ncnn::VkTransfer cmd(vkdev);
  924. ncnn::Option opt_upload = opt;
  925. opt_upload.blob_vkallocator = &g_weight_vkallocator;
  926. opt_upload.workspace_vkallocator = &g_weight_vkallocator;
  927. opt_upload.staging_vkallocator = &g_weight_staging_vkallocator;
  928. op->upload_model(cmd, opt_upload);
  929. cmd.submit_and_wait();
  930. }
  931. {
  932. // forward
  933. ncnn::VkCompute cmd(vkdev);
  934. if (op->support_image_storage && opt.use_image_storage)
  935. {
  936. // upload
  937. ncnn::VkImageMat a_gpu;
  938. cmd.record_upload(a, a_gpu, opt);
  939. ncnn::VkImageMat d_gpu;
  940. if (op->support_inplace)
  941. {
  942. op->forward_inplace(a_gpu, cmd, opt);
  943. d_gpu = a_gpu;
  944. }
  945. else
  946. {
  947. op->forward(a_gpu, d_gpu, cmd, opt);
  948. }
  949. // download
  950. cmd.record_download(d_gpu, d, opt);
  951. }
  952. else
  953. {
  954. // upload
  955. ncnn::VkMat a_gpu;
  956. cmd.record_upload(a, a_gpu, opt);
  957. ncnn::VkMat d_gpu;
  958. if (op->support_inplace)
  959. {
  960. op->forward_inplace(a_gpu, cmd, opt);
  961. d_gpu = a_gpu;
  962. }
  963. else
  964. {
  965. op->forward(a_gpu, d_gpu, cmd, opt);
  966. }
  967. // download
  968. cmd.record_download(d_gpu, d, opt);
  969. }
  970. cmd.submit_and_wait();
  971. }
  972. op->destroy_pipeline(opt);
  973. delete op;
  974. vkdev->reclaim_blob_allocator(blob_vkallocator);
  975. vkdev->reclaim_staging_allocator(staging_vkallocator);
  976. g_weight_vkallocator.clear();
  977. g_weight_staging_vkallocator.clear();
  978. return 0;
  979. }
  980. #endif // NCNN_VULKAN
  981. int test_layer(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Option& _opt, const ncnn::Mat& a, const ncnn::Mat& top_shape, float epsilon, void (*func)(ncnn::Layer*), int flag)
  982. {
  983. // naive
  984. ncnn::Mat b;
  985. {
  986. int ret = test_layer_naive(typeindex, pd, weights, a, b, func, flag);
  987. if (ret != 233 && ret != 0)
  988. {
  989. fprintf(stderr, "test_layer_naive failed\n");
  990. return -1;
  991. }
  992. }
  993. // cpu
  994. {
  995. ncnn::Mat c;
  996. int ret = test_layer_cpu(typeindex, pd, weights, _opt, a, c, ncnn::Mat(), func, flag);
  997. if (ret != 233 && (ret != 0 || CompareMat(b, c, epsilon) != 0))
  998. {
  999. fprintf(stderr, "test_layer_cpu failed\n");
  1000. return -1;
  1001. }
  1002. }
  1003. // cpu shape hint
  1004. {
  1005. ncnn::Mat c;
  1006. int ret = test_layer_cpu(typeindex, pd, weights, _opt, a, c, b, func, flag);
  1007. if (ret != 233 && (ret != 0 || CompareMat(b, c, epsilon) != 0))
  1008. {
  1009. fprintf(stderr, "test_layer_cpu failed with shape hint\n");
  1010. return -1;
  1011. }
  1012. }
  1013. #if NCNN_VULKAN
  1014. // gpu
  1015. if (!(flag & TEST_LAYER_DISABLE_GPU_TESTING))
  1016. {
  1017. ncnn::Mat d;
  1018. int ret = test_layer_gpu(typeindex, pd, weights, _opt, a, d, ncnn::Mat(), func, flag);
  1019. if (ret != 233 && (ret != 0 || CompareMat(b, d, epsilon) != 0))
  1020. {
  1021. fprintf(stderr, "test_layer_gpu failed\n");
  1022. return -1;
  1023. }
  1024. }
  1025. // gpu shape hint
  1026. if (!(flag & TEST_LAYER_DISABLE_GPU_TESTING))
  1027. {
  1028. ncnn::Mat d;
  1029. int ret = test_layer_gpu(typeindex, pd, weights, _opt, a, d, b, func, flag);
  1030. if (ret != 233 && (ret != 0 || CompareMat(b, d, epsilon) != 0))
  1031. {
  1032. fprintf(stderr, "test_layer_gpu failed with shape hint\n");
  1033. return -1;
  1034. }
  1035. }
  1036. #endif // NCNN_VULKAN
  1037. return 0;
  1038. }
  1039. int test_layer_opt(const char* layer_type, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Option& opt, const std::vector<ncnn::Mat>& a, int top_blob_count, float epsilon, void (*func)(ncnn::Layer*), int flag)
  1040. {
  1041. // fp16 representation
  1042. std::vector<ncnn::Mat> a_fp16;
  1043. if (opt.use_bf16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
  1044. {
  1045. a_fp16.resize(a.size());
  1046. for (size_t j = 0; j < a.size(); j++)
  1047. {
  1048. ncnn::Mat tmp;
  1049. ncnn::cast_float32_to_bfloat16(a[j], tmp, opt);
  1050. ncnn::cast_bfloat16_to_float32(tmp, a_fp16[j], opt);
  1051. }
  1052. }
  1053. else if ((opt.use_fp16_packed || opt.use_fp16_storage) && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
  1054. {
  1055. a_fp16.resize(a.size());
  1056. for (size_t j = 0; j < a.size(); j++)
  1057. {
  1058. ncnn::Mat tmp;
  1059. ncnn::cast_float32_to_float16(a[j], tmp, opt);
  1060. ncnn::cast_float16_to_float32(tmp, a_fp16[j], opt);
  1061. }
  1062. }
  1063. else
  1064. {
  1065. a_fp16 = a;
  1066. }
  1067. std::vector<ncnn::Mat> weights_fp16;
  1068. float epsilon_fp16;
  1069. if (opt.use_bf16_storage)
  1070. {
  1071. weights_fp16.resize(weights.size());
  1072. for (size_t j = 0; j < weights.size(); j++)
  1073. {
  1074. if (weights[j].elembits() != 32)
  1075. {
  1076. weights_fp16[j] = weights[j];
  1077. continue;
  1078. }
  1079. ncnn::Mat tmp;
  1080. ncnn::cast_float32_to_bfloat16(weights[j], tmp, opt);
  1081. ncnn::cast_bfloat16_to_float32(tmp, weights_fp16[j], opt);
  1082. }
  1083. epsilon_fp16 = epsilon * 100; // 0.1
  1084. }
  1085. else if (opt.use_fp16_packed || opt.use_fp16_storage)
  1086. {
  1087. weights_fp16.resize(weights.size());
  1088. for (size_t j = 0; j < weights.size(); j++)
  1089. {
  1090. if (weights[j].elembits() != 32)
  1091. {
  1092. weights_fp16[j] = weights[j];
  1093. continue;
  1094. }
  1095. ncnn::Mat tmp;
  1096. ncnn::cast_float32_to_float16(weights[j], tmp, opt);
  1097. ncnn::cast_float16_to_float32(tmp, weights_fp16[j], opt);
  1098. }
  1099. epsilon_fp16 = epsilon * 100; // 0.1
  1100. }
  1101. else
  1102. {
  1103. weights_fp16 = weights;
  1104. epsilon_fp16 = epsilon;
  1105. }
  1106. if (opt.use_fp16_arithmetic)
  1107. {
  1108. epsilon_fp16 = epsilon * 1000; // 1.0
  1109. }
  1110. std::vector<ncnn::Mat> top_shapes;
  1111. int ret = test_layer(ncnn::layer_to_index(layer_type), pd, weights_fp16, opt, a_fp16, top_blob_count, top_shapes, epsilon_fp16, func, flag);
  1112. if (ret != 0)
  1113. {
  1114. fprintf(stderr, "test_layer %s failed use_packing_layout=%d use_fp16_packed=%d use_fp16_storage=%d use_fp16_arithmetic=%d use_shader_pack8=%d use_bf16_storage=%d use_image_storage=%d use_sgemm_convolution=%d use_winograd_convolution=%d\n", layer_type, opt.use_packing_layout, opt.use_fp16_packed, opt.use_fp16_storage, opt.use_fp16_arithmetic, opt.use_shader_pack8, opt.use_bf16_storage, opt.use_image_storage, opt.use_sgemm_convolution, opt.use_winograd_convolution);
  1115. return ret;
  1116. }
  1117. return 0;
  1118. }
  1119. int test_layer_opt(const char* layer_type, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Option& opt, const ncnn::Mat& a, float epsilon, void (*func)(ncnn::Layer*), int flag)
  1120. {
  1121. // fp16 representation
  1122. ncnn::Mat a_fp16;
  1123. if (opt.use_bf16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
  1124. {
  1125. ncnn::Mat tmp;
  1126. ncnn::cast_float32_to_bfloat16(a, tmp, opt);
  1127. ncnn::cast_bfloat16_to_float32(tmp, a_fp16, opt);
  1128. }
  1129. else if ((opt.use_fp16_packed || opt.use_fp16_storage) && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
  1130. {
  1131. ncnn::Mat tmp;
  1132. ncnn::cast_float32_to_float16(a, tmp, opt);
  1133. ncnn::cast_float16_to_float32(tmp, a_fp16, opt);
  1134. }
  1135. else
  1136. {
  1137. a_fp16 = a;
  1138. }
  1139. std::vector<ncnn::Mat> weights_fp16;
  1140. float epsilon_fp16;
  1141. if (opt.use_bf16_storage)
  1142. {
  1143. weights_fp16.resize(weights.size());
  1144. for (size_t j = 0; j < weights.size(); j++)
  1145. {
  1146. if (weights[j].elembits() != 32)
  1147. {
  1148. weights_fp16[j] = weights[j];
  1149. continue;
  1150. }
  1151. ncnn::Mat tmp;
  1152. ncnn::cast_float32_to_bfloat16(weights[j], tmp, opt);
  1153. ncnn::cast_bfloat16_to_float32(tmp, weights_fp16[j], opt);
  1154. }
  1155. epsilon_fp16 = epsilon * 100; // 0.1
  1156. }
  1157. else if (opt.use_fp16_packed || opt.use_fp16_storage)
  1158. {
  1159. weights_fp16.resize(weights.size());
  1160. for (size_t j = 0; j < weights.size(); j++)
  1161. {
  1162. if (weights[j].elembits() != 32)
  1163. {
  1164. weights_fp16[j] = weights[j];
  1165. continue;
  1166. }
  1167. ncnn::Mat tmp;
  1168. ncnn::cast_float32_to_float16(weights[j], tmp, opt);
  1169. ncnn::cast_float16_to_float32(tmp, weights_fp16[j], opt);
  1170. }
  1171. epsilon_fp16 = epsilon * 100; // 0.1
  1172. }
  1173. else
  1174. {
  1175. weights_fp16 = weights;
  1176. epsilon_fp16 = epsilon;
  1177. }
  1178. if (opt.use_fp16_arithmetic)
  1179. {
  1180. epsilon_fp16 = epsilon * 1000; // 1.0
  1181. }
  1182. ncnn::Mat top_shape;
  1183. int ret = test_layer(ncnn::layer_to_index(layer_type), pd, weights_fp16, opt, a_fp16, top_shape, epsilon_fp16, func, flag);
  1184. if (ret != 0)
  1185. {
  1186. fprintf(stderr, "test_layer %s failed use_packing_layout=%d use_fp16_packed=%d use_fp16_storage=%d use_fp16_arithmetic=%d use_shader_pack8=%d use_bf16_storage=%d use_image_storage=%d use_sgemm_convolution=%d use_winograd_convolution=%d\n", layer_type, opt.use_packing_layout, opt.use_fp16_packed, opt.use_fp16_storage, opt.use_fp16_arithmetic, opt.use_shader_pack8, opt.use_bf16_storage, opt.use_image_storage, opt.use_sgemm_convolution, opt.use_winograd_convolution);
  1187. return ret;
  1188. }
  1189. return 0;
  1190. }
  1191. int test_layer(const char* layer_type, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const std::vector<ncnn::Mat>& a, int top_blob_count, float epsilon, void (*func)(ncnn::Layer*), int flag)
  1192. {
  1193. // pack fp16p fp16s fp16a bf16s shader8 image
  1194. const int options[][7] = {
  1195. {0, 0, 0, 0, 0, 0, 0},
  1196. {0, 0, 1, 0, 0, 0, 0},
  1197. {0, 0, 1, 1, 1, 0, 0},
  1198. {1, 0, 0, 0, 0, 0, 0},
  1199. {1, 1, 0, 0, 1, 0, 0},
  1200. {1, 0, 1, 0, 0, 1, 0},
  1201. {1, 1, 1, 1, 0, 0, 0},
  1202. {1, 1, 1, 1, 1, 1, 1},
  1203. };
  1204. const int opt_count = sizeof(options) / sizeof(options[0]);
  1205. for (int i = 0; i < opt_count; i++)
  1206. {
  1207. ncnn::Option opt;
  1208. opt.num_threads = 1;
  1209. opt.use_packing_layout = options[i][0];
  1210. opt.use_fp16_packed = options[i][1];
  1211. opt.use_fp16_storage = options[i][2];
  1212. opt.use_fp16_arithmetic = options[i][3];
  1213. opt.use_bf16_storage = options[i][4];
  1214. opt.use_shader_pack8 = options[i][5];
  1215. opt.use_image_storage = options[i][6];
  1216. int ret = test_layer_opt(layer_type, pd, weights, opt, a, top_blob_count, epsilon, func, flag);
  1217. if (ret != 0)
  1218. return ret;
  1219. }
  1220. return 0;
  1221. }
  1222. int test_layer(const char* layer_type, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Mat& a, float epsilon, void (*func)(ncnn::Layer*), int flag)
  1223. {
  1224. // pack fp16p fp16s fp16a bf16s shader8 image
  1225. const int options[][7] = {
  1226. {0, 0, 0, 0, 0, 0, 0},
  1227. {0, 0, 1, 0, 0, 0, 0},
  1228. {0, 0, 1, 1, 1, 0, 0},
  1229. {1, 0, 0, 0, 0, 0, 0},
  1230. {1, 1, 0, 0, 1, 0, 0},
  1231. {1, 0, 1, 0, 0, 1, 0},
  1232. {1, 1, 1, 1, 0, 0, 0},
  1233. {1, 1, 1, 1, 1, 1, 1},
  1234. };
  1235. const int opt_count = sizeof(options) / sizeof(options[0]);
  1236. for (int i = 0; i < opt_count; i++)
  1237. {
  1238. ncnn::Option opt;
  1239. opt.num_threads = 1;
  1240. opt.use_packing_layout = options[i][0];
  1241. opt.use_fp16_packed = options[i][1];
  1242. opt.use_fp16_storage = options[i][2];
  1243. opt.use_fp16_arithmetic = options[i][3];
  1244. opt.use_bf16_storage = options[i][4];
  1245. opt.use_shader_pack8 = options[i][5];
  1246. opt.use_image_storage = options[i][6];
  1247. int ret = test_layer_opt(layer_type, pd, weights, opt, a, epsilon, func, flag);
  1248. if (ret != 0)
  1249. return ret;
  1250. }
  1251. return 0;
  1252. }
  1253. class TestOOMAllocator : public ncnn::UnlockedPoolAllocator
  1254. {
  1255. public:
  1256. TestOOMAllocator();
  1257. virtual void* fastMalloc(size_t size);
  1258. virtual void fastFree(void* ptr);
  1259. ncnn::Mutex lock;
  1260. int counter;
  1261. int failid;
  1262. };
  1263. TestOOMAllocator::TestOOMAllocator()
  1264. {
  1265. counter = 0;
  1266. failid = INT_MAX;
  1267. }
  1268. void* TestOOMAllocator::fastMalloc(size_t size)
  1269. {
  1270. lock.lock();
  1271. void* ptr;
  1272. if (counter == failid)
  1273. {
  1274. ptr = 0;
  1275. }
  1276. else
  1277. {
  1278. ptr = ncnn::UnlockedPoolAllocator::fastMalloc(size);
  1279. }
  1280. counter++;
  1281. lock.unlock();
  1282. return ptr;
  1283. }
  1284. void TestOOMAllocator::fastFree(void* ptr)
  1285. {
  1286. lock.lock();
  1287. ncnn::UnlockedPoolAllocator::fastFree(ptr);
  1288. lock.unlock();
  1289. }
  1290. int test_layer_oom_opt(const char* layer_type, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Option& _opt, const std::vector<ncnn::Mat>& a, int top_blob_count, int flag)
  1291. {
  1292. int typeindex = ncnn::layer_to_index(layer_type);
  1293. if (typeindex == -1)
  1294. return -1;
  1295. ncnn::Layer* op = ncnn::create_layer_cpu(typeindex);
  1296. if (!op->support_packing && _opt.use_packing_layout)
  1297. {
  1298. delete op;
  1299. return 233;
  1300. }
  1301. if (!op->support_bf16_storage && !op->support_fp16_storage && (_opt.use_bf16_storage || _opt.use_fp16_arithmetic))
  1302. {
  1303. delete op;
  1304. return 233;
  1305. }
  1306. op->load_param(pd);
  1307. if (op->one_blob_only && a.size() != 1)
  1308. {
  1309. fprintf(stderr, "layer with one_blob_only but consume multiple inputs\n");
  1310. delete op;
  1311. return -1;
  1312. }
  1313. ncnn::ModelBinFromMatArray mb(weights.data());
  1314. op->load_model(mb);
  1315. ncnn::Option opt = _opt;
  1316. opt.num_threads = 1;
  1317. opt.use_vulkan_compute = false;
  1318. op->create_pipeline(opt);
  1319. if (!op->support_packing && _opt.use_packing_layout)
  1320. {
  1321. op->destroy_pipeline(opt);
  1322. delete op;
  1323. return 233;
  1324. }
  1325. if (!op->support_bf16_storage && !op->support_fp16_storage && (_opt.use_bf16_storage || _opt.use_fp16_arithmetic))
  1326. {
  1327. op->destroy_pipeline(opt);
  1328. delete op;
  1329. return 233;
  1330. }
  1331. std::vector<ncnn::Mat> a4(a.size());
  1332. for (size_t i = 0; i < a4.size(); i++)
  1333. {
  1334. convert_to_optimal_layout(a[i], a4[i], opt, op, flag);
  1335. }
  1336. TestOOMAllocator test_oom_allocator;
  1337. opt.blob_allocator = &test_oom_allocator;
  1338. opt.workspace_allocator = &test_oom_allocator;
  1339. std::vector<ncnn::Mat> c;
  1340. c.resize(top_blob_count);
  1341. if (op->support_inplace)
  1342. {
  1343. for (size_t i = 0; i < a4.size(); i++)
  1344. {
  1345. c[i] = a4[i].clone();
  1346. }
  1347. op->forward_inplace(c, opt);
  1348. }
  1349. else
  1350. {
  1351. op->forward(a4, c, opt);
  1352. }
  1353. for (int i = 0; i < top_blob_count; i++)
  1354. {
  1355. c[i].release();
  1356. }
  1357. const int alloc_count = test_oom_allocator.counter;
  1358. for (int i = 0; i < alloc_count; i++)
  1359. {
  1360. test_oom_allocator.counter = 0;
  1361. test_oom_allocator.failid = i;
  1362. int ret = 0;
  1363. if (op->support_inplace)
  1364. {
  1365. for (size_t i = 0; i < a4.size(); i++)
  1366. {
  1367. c[i] = a4[i].clone();
  1368. }
  1369. ret = op->forward_inplace(c, opt);
  1370. }
  1371. else
  1372. {
  1373. ret = op->forward(a4, c, opt);
  1374. }
  1375. for (int i = 0; i < top_blob_count; i++)
  1376. {
  1377. c[i].release();
  1378. }
  1379. if (ret != -100)
  1380. {
  1381. fprintf(stderr, "oom not catched %d/%d\n", i, alloc_count);
  1382. op->destroy_pipeline(opt);
  1383. delete op;
  1384. return -1;
  1385. }
  1386. }
  1387. op->destroy_pipeline(opt);
  1388. delete op;
  1389. return 0;
  1390. }
  1391. int test_layer_oom_opt(const char* layer_type, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Option& _opt, const ncnn::Mat& a, int flag)
  1392. {
  1393. int typeindex = ncnn::layer_to_index(layer_type);
  1394. if (typeindex == -1)
  1395. return -1;
  1396. ncnn::Layer* op = ncnn::create_layer_cpu(typeindex);
  1397. if (!op->support_packing && _opt.use_packing_layout)
  1398. {
  1399. delete op;
  1400. return 233;
  1401. }
  1402. if (!op->support_bf16_storage && !op->support_fp16_storage && (_opt.use_bf16_storage || _opt.use_fp16_arithmetic))
  1403. {
  1404. delete op;
  1405. return 233;
  1406. }
  1407. op->load_param(pd);
  1408. ncnn::ModelBinFromMatArray mb(weights.data());
  1409. op->load_model(mb);
  1410. ncnn::Option opt = _opt;
  1411. opt.num_threads = 1;
  1412. opt.use_vulkan_compute = false;
  1413. op->create_pipeline(opt);
  1414. if (!op->support_packing && _opt.use_packing_layout)
  1415. {
  1416. op->destroy_pipeline(opt);
  1417. delete op;
  1418. return 233;
  1419. }
  1420. if (!op->support_bf16_storage && !op->support_fp16_storage && (_opt.use_bf16_storage || _opt.use_fp16_arithmetic))
  1421. {
  1422. op->destroy_pipeline(opt);
  1423. delete op;
  1424. return 233;
  1425. }
  1426. ncnn::Mat a4;
  1427. convert_to_optimal_layout(a, a4, opt, op, flag);
  1428. TestOOMAllocator test_oom_allocator;
  1429. opt.blob_allocator = &test_oom_allocator;
  1430. opt.workspace_allocator = &test_oom_allocator;
  1431. ncnn::Mat c;
  1432. if (op->support_inplace)
  1433. {
  1434. c = a4.clone();
  1435. op->forward_inplace(c, opt);
  1436. }
  1437. else
  1438. {
  1439. op->forward(a4, c, opt);
  1440. }
  1441. c.release();
  1442. const int alloc_count = test_oom_allocator.counter;
  1443. for (int i = 0; i < alloc_count; i++)
  1444. {
  1445. test_oom_allocator.counter = 0;
  1446. test_oom_allocator.failid = i;
  1447. int ret = 0;
  1448. if (op->support_inplace)
  1449. {
  1450. c = a4.clone();
  1451. ret = op->forward_inplace(c, opt);
  1452. }
  1453. else
  1454. {
  1455. ret = op->forward(a4, c, opt);
  1456. }
  1457. c.release();
  1458. if (ret != -100)
  1459. {
  1460. fprintf(stderr, "oom not catched %d/%d\n", i, alloc_count);
  1461. op->destroy_pipeline(opt);
  1462. delete op;
  1463. return -1;
  1464. }
  1465. }
  1466. op->destroy_pipeline(opt);
  1467. delete op;
  1468. return 0;
  1469. }
  1470. int test_layer_oom(const char* layer_type, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const std::vector<ncnn::Mat>& a, int top_blob_count, int flag)
  1471. {
  1472. // pack fp16p fp16s fp16a bf16s shader8 image
  1473. const int options[][7] = {
  1474. {0, 0, 0, 0, 0, 0, 0},
  1475. {0, 0, 1, 0, 0, 0, 0},
  1476. {0, 0, 1, 1, 1, 0, 0},
  1477. {1, 0, 0, 0, 0, 0, 0},
  1478. {1, 1, 0, 0, 1, 0, 0},
  1479. {1, 0, 1, 0, 0, 1, 0},
  1480. {1, 1, 1, 1, 0, 0, 0},
  1481. {1, 1, 1, 1, 1, 1, 1},
  1482. };
  1483. const int opt_count = sizeof(options) / sizeof(options[0]);
  1484. for (int i = 0; i < opt_count; i++)
  1485. {
  1486. ncnn::Option opt;
  1487. opt.num_threads = 1;
  1488. opt.use_packing_layout = options[i][0];
  1489. opt.use_fp16_packed = options[i][1];
  1490. opt.use_fp16_storage = options[i][2];
  1491. opt.use_fp16_arithmetic = options[i][3];
  1492. opt.use_bf16_storage = options[i][4];
  1493. opt.use_shader_pack8 = options[i][5];
  1494. opt.use_image_storage = options[i][6];
  1495. int ret = test_layer_oom_opt(layer_type, pd, weights, opt, a, top_blob_count, flag);
  1496. if (ret != 233 && ret != 0)
  1497. return ret;
  1498. }
  1499. return 0;
  1500. }
  1501. int test_layer_oom(const char* layer_type, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Mat& a, int flag)
  1502. {
  1503. // pack fp16p fp16s fp16a bf16s shader8 image
  1504. const int options[][7] = {
  1505. {0, 0, 0, 0, 0, 0, 0},
  1506. {0, 0, 1, 0, 0, 0, 0},
  1507. {0, 0, 1, 1, 1, 0, 0},
  1508. {1, 0, 0, 0, 0, 0, 0},
  1509. {1, 1, 0, 0, 1, 0, 0},
  1510. {1, 0, 1, 0, 0, 1, 0},
  1511. {1, 1, 1, 1, 0, 0, 0},
  1512. {1, 1, 1, 1, 1, 1, 1},
  1513. };
  1514. const int opt_count = sizeof(options) / sizeof(options[0]);
  1515. for (int i = 0; i < opt_count; i++)
  1516. {
  1517. ncnn::Option opt;
  1518. opt.num_threads = 1;
  1519. opt.use_packing_layout = options[i][0];
  1520. opt.use_fp16_packed = options[i][1];
  1521. opt.use_fp16_storage = options[i][2];
  1522. opt.use_fp16_arithmetic = options[i][3];
  1523. opt.use_bf16_storage = options[i][4];
  1524. opt.use_shader_pack8 = options[i][5];
  1525. opt.use_image_storage = options[i][6];
  1526. int ret = test_layer_oom_opt(layer_type, pd, weights, opt, a, flag);
  1527. if (ret != 233 && ret != 0)
  1528. return ret;
  1529. }
  1530. return 0;
  1531. }