You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

testutil.cpp 43 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include "testutil.h"
  15. #include "cpu.h"
  16. #include "layer.h"
  17. #include "mat.h"
  18. #include "prng.h"
  19. #include <stdio.h>
  20. #include <stdlib.h>
  21. #if NCNN_VULKAN
  22. #include "command.h"
  23. #include "gpu.h"
  24. #endif // NCNN_VULKAN
  25. static struct prng_rand_t g_prng_rand_state;
  26. void SRAND(int seed)
  27. {
  28. prng_srand(seed, &g_prng_rand_state);
  29. }
  30. uint64_t RAND()
  31. {
  32. return prng_rand(&g_prng_rand_state);
  33. }
  34. float RandomFloat(float a, float b)
  35. {
  36. float random = ((float)RAND()) / (float)uint64_t(-1); //RAND_MAX;
  37. float diff = b - a;
  38. float r = random * diff;
  39. float v = a + r;
  40. // generate denormal as zero
  41. if (v < 0.0001 && v > -0.0001)
  42. v = 0.f;
  43. return v;
  44. }
  45. int RandomInt(int a, int b)
  46. {
  47. float random = ((float)RAND()) / (float)uint64_t(-1); //RAND_MAX;
  48. int diff = b - a;
  49. float r = random * diff;
  50. return a + (int)r;
  51. }
  52. signed char RandomS8()
  53. {
  54. return (signed char)RandomInt(-127, 127);
  55. }
  56. void Randomize(ncnn::Mat& m, float a, float b)
  57. {
  58. for (size_t i = 0; i < m.total(); i++)
  59. {
  60. m[i] = RandomFloat(a, b);
  61. }
  62. }
  63. void RandomizeInt(ncnn::Mat& m, int a, int b)
  64. {
  65. for (size_t i = 0; i < m.total(); i++)
  66. {
  67. ((int*)m)[i] = RandomInt(a, b);
  68. }
  69. }
  70. void RandomizeS8(ncnn::Mat& m)
  71. {
  72. for (size_t i = 0; i < m.total(); i++)
  73. {
  74. ((signed char*)m)[i] = RandomS8();
  75. }
  76. }
  77. ncnn::Mat RandomMat(int w, float a, float b)
  78. {
  79. ncnn::Mat m(w);
  80. Randomize(m, a, b);
  81. return m;
  82. }
  83. ncnn::Mat RandomMat(int w, int h, float a, float b)
  84. {
  85. ncnn::Mat m(w, h);
  86. Randomize(m, a, b);
  87. return m;
  88. }
  89. ncnn::Mat RandomMat(int w, int h, int c, float a, float b)
  90. {
  91. ncnn::Mat m(w, h, c);
  92. Randomize(m, a, b);
  93. return m;
  94. }
  95. ncnn::Mat RandomMat(int w, int h, int d, int c, float a, float b)
  96. {
  97. ncnn::Mat m(w, h, d, c);
  98. Randomize(m, a, b);
  99. return m;
  100. }
  101. ncnn::Mat RandomIntMat(int w)
  102. {
  103. ncnn::Mat m(w);
  104. RandomizeInt(m);
  105. return m;
  106. }
  107. ncnn::Mat RandomIntMat(int w, int h)
  108. {
  109. ncnn::Mat m(w, h);
  110. RandomizeInt(m);
  111. return m;
  112. }
  113. ncnn::Mat RandomIntMat(int w, int h, int c)
  114. {
  115. ncnn::Mat m(w, h, c);
  116. RandomizeInt(m);
  117. return m;
  118. }
  119. ncnn::Mat RandomIntMat(int w, int h, int d, int c)
  120. {
  121. ncnn::Mat m(w, h, d, c);
  122. RandomizeInt(m);
  123. return m;
  124. }
  125. ncnn::Mat RandomS8Mat(int w)
  126. {
  127. ncnn::Mat m(w, (size_t)1u);
  128. RandomizeS8(m);
  129. return m;
  130. }
  131. ncnn::Mat RandomS8Mat(int w, int h)
  132. {
  133. ncnn::Mat m(w, h, (size_t)1u);
  134. RandomizeS8(m);
  135. return m;
  136. }
  137. ncnn::Mat RandomS8Mat(int w, int h, int c)
  138. {
  139. ncnn::Mat m(w, h, c, (size_t)1u);
  140. RandomizeS8(m);
  141. return m;
  142. }
  143. ncnn::Mat RandomS8Mat(int w, int h, int d, int c)
  144. {
  145. ncnn::Mat m(w, h, d, c, (size_t)1u);
  146. RandomizeS8(m);
  147. return m;
  148. }
  149. ncnn::Mat scales_mat(const ncnn::Mat& mat, int m, int k, int ldx)
  150. {
  151. ncnn::Mat weight_scales(m);
  152. for (int i = 0; i < m; ++i)
  153. {
  154. float min = mat[0], _max = mat[0];
  155. const float* ptr = (const float*)(mat.data) + i * ldx;
  156. for (int j = 0; j < k; ++j)
  157. {
  158. if (min > ptr[j])
  159. {
  160. min = ptr[j];
  161. }
  162. if (_max < ptr[j])
  163. {
  164. _max = ptr[j];
  165. }
  166. }
  167. const float abs_min = abs(min), abs_max = abs(_max);
  168. weight_scales[i] = 127.f / (abs_min > abs_max ? abs_min : abs_max);
  169. }
  170. return weight_scales;
  171. }
  172. bool NearlyEqual(float a, float b, float epsilon)
  173. {
  174. if (a == b)
  175. return true;
  176. float diff = (float)fabs(a - b);
  177. if (diff <= epsilon)
  178. return true;
  179. // relative error
  180. return diff < epsilon * std::max(fabs(a), fabs(b));
  181. }
  182. int Compare(const ncnn::Mat& a, const ncnn::Mat& b, float epsilon)
  183. {
  184. #define CHECK_MEMBER(m) \
  185. if (a.m != b.m) \
  186. { \
  187. fprintf(stderr, #m " not match expect %d but got %d\n", (int)a.m, (int)b.m); \
  188. return -1; \
  189. }
  190. CHECK_MEMBER(dims)
  191. CHECK_MEMBER(w)
  192. CHECK_MEMBER(h)
  193. CHECK_MEMBER(d)
  194. CHECK_MEMBER(c)
  195. CHECK_MEMBER(elemsize)
  196. CHECK_MEMBER(elempack)
  197. #undef CHECK_MEMBER
  198. for (int q = 0; q < a.c; q++)
  199. {
  200. const ncnn::Mat ma = a.channel(q);
  201. const ncnn::Mat mb = b.channel(q);
  202. for (int z = 0; z < a.d; z++)
  203. {
  204. const ncnn::Mat da = ma.depth(z);
  205. const ncnn::Mat db = mb.depth(z);
  206. for (int i = 0; i < a.h; i++)
  207. {
  208. const float* pa = da.row(i);
  209. const float* pb = db.row(i);
  210. for (int j = 0; j < a.w; j++)
  211. {
  212. if (!NearlyEqual(pa[j], pb[j], epsilon))
  213. {
  214. fprintf(stderr, "value not match at c:%d d:%d h:%d w:%d expect %f but got %f\n", q, z, i, j, pa[j], pb[j]);
  215. return -1;
  216. }
  217. }
  218. }
  219. }
  220. }
  221. return 0;
  222. }
  223. int CompareMat(const ncnn::Mat& a, const ncnn::Mat& b, float epsilon)
  224. {
  225. ncnn::Option opt;
  226. opt.num_threads = 1;
  227. if (a.elempack != 1)
  228. {
  229. ncnn::Mat a1;
  230. ncnn::convert_packing(a, a1, 1, opt);
  231. return CompareMat(a1, b, epsilon);
  232. }
  233. if (b.elempack != 1)
  234. {
  235. ncnn::Mat b1;
  236. ncnn::convert_packing(b, b1, 1, opt);
  237. return CompareMat(a, b1, epsilon);
  238. }
  239. if (a.elemsize == 2u)
  240. {
  241. ncnn::Mat a32;
  242. cast_float16_to_float32(a, a32, opt);
  243. return CompareMat(a32, b, epsilon);
  244. }
  245. if (a.elemsize == 1u)
  246. {
  247. ncnn::Mat a32;
  248. cast_int8_to_float32(a, a32, opt);
  249. return CompareMat(a32, b, epsilon);
  250. }
  251. if (b.elemsize == 2u)
  252. {
  253. ncnn::Mat b32;
  254. cast_float16_to_float32(b, b32, opt);
  255. return CompareMat(a, b32, epsilon);
  256. }
  257. if (b.elemsize == 1u)
  258. {
  259. ncnn::Mat b32;
  260. cast_int8_to_float32(b, b32, opt);
  261. return CompareMat(a, b32, epsilon);
  262. }
  263. return Compare(a, b, epsilon);
  264. }
  265. int CompareMat(const std::vector<ncnn::Mat>& a, const std::vector<ncnn::Mat>& b, float epsilon)
  266. {
  267. if (a.size() != b.size())
  268. {
  269. fprintf(stderr, "output blob count not match %zu %zu\n", a.size(), b.size());
  270. return -1;
  271. }
  272. for (size_t i = 0; i < a.size(); i++)
  273. {
  274. if (CompareMat(a[i], b[i], epsilon))
  275. {
  276. fprintf(stderr, "output blob %zu not match\n", i);
  277. return -1;
  278. }
  279. }
  280. return 0;
  281. }
  282. int test_layer_naive(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const std::vector<ncnn::Mat>& a, int top_blob_count, std::vector<ncnn::Mat>& b, void (*func)(ncnn::Layer*), int flag)
  283. {
  284. ncnn::Layer* op = ncnn::create_layer_naive(typeindex);
  285. if (func)
  286. {
  287. (*func)((ncnn::Layer*)op);
  288. }
  289. op->load_param(pd);
  290. if (op->one_blob_only && a.size() != 1)
  291. {
  292. fprintf(stderr, "layer with one_blob_only but consume multiple inputs\n");
  293. delete op;
  294. return -1;
  295. }
  296. ncnn::ModelBinFromMatArray mb(weights.data());
  297. op->load_model(mb);
  298. ncnn::Option opt;
  299. opt.num_threads = 1;
  300. opt.lightmode = false;
  301. opt.use_packing_layout = false;
  302. opt.use_fp16_packed = false;
  303. opt.use_fp16_storage = false;
  304. opt.use_fp16_arithmetic = false;
  305. opt.use_shader_pack8 = false;
  306. opt.use_image_storage = false;
  307. opt.use_bf16_storage = false;
  308. opt.use_vulkan_compute = false;
  309. op->create_pipeline(opt);
  310. b.resize(top_blob_count);
  311. if (op->support_inplace)
  312. {
  313. for (size_t i = 0; i < a.size(); i++)
  314. {
  315. b[i] = a[i].clone();
  316. }
  317. op->forward_inplace(b, opt);
  318. }
  319. else
  320. {
  321. op->forward(a, b, opt);
  322. }
  323. op->destroy_pipeline(opt);
  324. delete op;
  325. return 0;
  326. }
  327. int test_layer_cpu(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Option& _opt, const std::vector<ncnn::Mat>& a, int top_blob_count, std::vector<ncnn::Mat>& c, const std::vector<ncnn::Mat>& top_shapes, void (*func)(ncnn::Layer*), int flag)
  328. {
  329. ncnn::Layer* op = ncnn::create_layer_cpu(typeindex);
  330. if (!op->support_packing && _opt.use_packing_layout)
  331. {
  332. delete op;
  333. return 233;
  334. }
  335. if (!op->support_bf16_storage && !op->support_fp16_storage && (_opt.use_bf16_storage || _opt.use_fp16_arithmetic))
  336. {
  337. delete op;
  338. return 233;
  339. }
  340. if (func)
  341. {
  342. (*func)((ncnn::Layer*)op);
  343. }
  344. if (!top_shapes.empty())
  345. {
  346. op->bottom_shapes = a;
  347. op->top_shapes = top_shapes;
  348. }
  349. op->load_param(pd);
  350. if (op->one_blob_only && a.size() != 1)
  351. {
  352. fprintf(stderr, "layer with one_blob_only but consume multiple inputs\n");
  353. delete op;
  354. return -1;
  355. }
  356. ncnn::ModelBinFromMatArray mb(weights.data());
  357. op->load_model(mb);
  358. ncnn::Option opt = _opt;
  359. opt.num_threads = 1;
  360. opt.use_vulkan_compute = false;
  361. op->create_pipeline(opt);
  362. if (!op->support_packing && _opt.use_packing_layout)
  363. {
  364. op->destroy_pipeline(opt);
  365. delete op;
  366. return 233;
  367. }
  368. if (!op->support_bf16_storage && !op->support_fp16_storage && (_opt.use_bf16_storage || _opt.use_fp16_arithmetic))
  369. {
  370. op->destroy_pipeline(opt);
  371. delete op;
  372. return 233;
  373. }
  374. std::vector<ncnn::Mat> a4(a.size());
  375. for (size_t i = 0; i < a4.size(); i++)
  376. {
  377. // clang-format off
  378. // *INDENT-OFF*
  379. #if NCNN_ARM82
  380. if (opt.use_fp16_storage && ncnn::cpu_support_arm_asimdhp() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
  381. {
  382. ncnn::cast_float32_to_float16(a[i], a4[i], opt);
  383. }
  384. else
  385. #endif // NCNN_ARM82
  386. #if NCNN_RVV
  387. if (opt.use_fp16_storage && ncnn::cpu_support_riscv_v() && ncnn::cpu_support_riscv_zfh() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
  388. {
  389. ncnn::cast_float32_to_float16(a[i], a4[i], opt);
  390. }
  391. else
  392. #endif // NCNN_RVV
  393. #if NCNN_BF16
  394. if (opt.use_bf16_storage && op->support_bf16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
  395. {
  396. ncnn::cast_float32_to_bfloat16(a[i], a4[i], opt);
  397. }
  398. else
  399. #endif // NCNN_BF16
  400. if (opt.use_fp16_storage && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
  401. {
  402. ncnn::cast_float32_to_float16(a[i], a4[i], opt);
  403. }
  404. else
  405. {
  406. a4[i] = a[i];
  407. }
  408. // *INDENT-ON*
  409. // clang-format on
  410. if (opt.use_packing_layout && op->support_packing && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_PACKING))
  411. {
  412. // resolve dst_elempack
  413. int dims = a4[i].dims;
  414. int elemcount = 0;
  415. if (dims == 1) elemcount = a4[i].elempack * a4[i].w;
  416. if (dims == 2) elemcount = a4[i].elempack * a4[i].h;
  417. if (dims == 3 || dims == 4) elemcount = a4[i].elempack * a4[i].c;
  418. int elembits = a4[i].elembits();
  419. int dst_elempack = 1;
  420. if (elembits == 32)
  421. {
  422. #if NCNN_AVX512
  423. if (elemcount % 16 == 0 && ncnn::cpu_support_x86_avx512())
  424. dst_elempack = 16;
  425. else if (elemcount % 8 == 0 && ncnn::cpu_support_x86_avx())
  426. dst_elempack = 8;
  427. else if (elemcount % 4 == 0)
  428. dst_elempack = 4;
  429. #elif NCNN_AVX
  430. if (elemcount % 8 == 0 && ncnn::cpu_support_x86_avx())
  431. dst_elempack = 8;
  432. else if (elemcount % 4 == 0)
  433. dst_elempack = 4;
  434. #elif NCNN_RVV
  435. const int packn = ncnn::cpu_riscv_vlenb() / (elembits / 8);
  436. if (elemcount % packn == 0)
  437. dst_elempack = packn;
  438. #else
  439. if (elemcount % 4 == 0)
  440. dst_elempack = 4;
  441. #endif
  442. }
  443. if (elembits == 16)
  444. {
  445. #if NCNN_ARM82
  446. if (elemcount % 8 == 0 && ncnn::cpu_support_arm_asimdhp() && opt.use_fp16_arithmetic)
  447. dst_elempack = 8;
  448. else if (elemcount % 4 == 0)
  449. dst_elempack = 4;
  450. #elif NCNN_RVV
  451. const int packn = ncnn::cpu_riscv_vlenb() / 2;
  452. if (elemcount % packn == 0)
  453. dst_elempack = packn;
  454. #else
  455. if (elemcount % 4 == 0)
  456. dst_elempack = 4;
  457. #endif
  458. }
  459. if (elembits == 8)
  460. {
  461. #if NCNN_RVV
  462. const int packn = ncnn::cpu_riscv_vlenb() / 1;
  463. if (elemcount % packn == 0)
  464. dst_elempack = packn;
  465. #else
  466. if (elemcount % 8 == 0)
  467. dst_elempack = 8;
  468. #endif
  469. }
  470. if (flag & TEST_LAYER_ENABLE_FORCE_INPUT_PACK8)
  471. dst_elempack = 8;
  472. ncnn::Mat a4_packed;
  473. ncnn::convert_packing(a4[i], a4_packed, dst_elempack, opt);
  474. a4[i] = a4_packed;
  475. }
  476. }
  477. c.resize(top_blob_count);
  478. if (op->support_inplace)
  479. {
  480. for (size_t i = 0; i < a4.size(); i++)
  481. {
  482. c[i] = a4[i].clone();
  483. }
  484. op->forward_inplace(c, opt);
  485. }
  486. else
  487. {
  488. op->forward(a4, c, opt);
  489. }
  490. for (size_t i = 0; i < c.size(); i++)
  491. {
  492. // clang-format off
  493. // *INDENT-OFF*
  494. #if NCNN_ARM82
  495. if (opt.use_fp16_storage && ncnn::cpu_support_arm_asimdhp() && op->support_fp16_storage && c[i].elembits() == 16)
  496. {
  497. ncnn::Mat c_fp32;
  498. ncnn::cast_float16_to_float32(c[i], c_fp32, opt);
  499. c[i] = c_fp32;
  500. }
  501. else
  502. #endif // NCNN_ARM82
  503. #if NCNN_RVV
  504. if (opt.use_fp16_storage && ncnn::cpu_support_riscv_v() && ncnn::cpu_support_riscv_zfh() && op->support_fp16_storage && c[i].elembits() == 16)
  505. {
  506. ncnn::Mat c_fp32;
  507. ncnn::cast_float16_to_float32(c[i], c_fp32, opt);
  508. c[i] = c_fp32;
  509. }
  510. else
  511. #endif // NCNN_RVV
  512. #if NCNN_BF16
  513. if (opt.use_bf16_storage && op->support_bf16_storage && c[i].elembits() == 16)
  514. {
  515. ncnn::Mat c_fp32;
  516. ncnn::cast_bfloat16_to_float32(c[i], c_fp32, opt);
  517. c[i] = c_fp32;
  518. }
  519. else
  520. #endif // NCNN_BF16
  521. if (opt.use_fp16_storage && op->support_fp16_storage && c[i].elembits() == 16)
  522. {
  523. ncnn::Mat c_fp32;
  524. ncnn::cast_float16_to_float32(c[i], c_fp32, opt);
  525. c[i] = c_fp32;
  526. }
  527. // *INDENT-ON*
  528. // clang-format on
  529. }
  530. op->destroy_pipeline(opt);
  531. delete op;
  532. return 0;
  533. }
  534. #if NCNN_VULKAN
  535. int test_layer_gpu(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Option& _opt, const std::vector<ncnn::Mat>& a, int top_blob_count, std::vector<ncnn::Mat>& d, const std::vector<ncnn::Mat>& top_shapes, void (*func)(ncnn::Layer*), int flag)
  536. {
  537. if (!_opt.use_packing_layout)
  538. {
  539. // pack1 test is useless for gpu
  540. return 233;
  541. }
  542. ncnn::Layer* op = ncnn::create_layer_vulkan(typeindex);
  543. if (!op)
  544. {
  545. return 233;
  546. }
  547. op->load_param(pd);
  548. if (!op->support_vulkan)
  549. {
  550. delete op;
  551. return 233;
  552. }
  553. ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
  554. op->vkdev = vkdev;
  555. if (func)
  556. {
  557. (*func)((ncnn::Layer*)op);
  558. }
  559. if (!top_shapes.empty())
  560. {
  561. op->bottom_shapes = a;
  562. op->top_shapes = top_shapes;
  563. }
  564. if (op->one_blob_only && a.size() != 1)
  565. {
  566. fprintf(stderr, "layer with one_blob_only but consume multiple inputs\n");
  567. delete op;
  568. return -1;
  569. }
  570. ncnn::ModelBinFromMatArray mb(weights.data());
  571. op->load_model(mb);
  572. ncnn::VkWeightAllocator g_weight_vkallocator(vkdev);
  573. ncnn::VkWeightStagingAllocator g_weight_staging_vkallocator(vkdev);
  574. ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
  575. ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
  576. ncnn::Option opt = _opt;
  577. opt.num_threads = 1;
  578. opt.use_vulkan_compute = true;
  579. #if __APPLE__
  580. opt.use_image_storage = false;
  581. #endif
  582. opt.blob_vkallocator = blob_vkallocator;
  583. opt.workspace_vkallocator = blob_vkallocator;
  584. opt.staging_vkallocator = staging_vkallocator;
  585. if (!vkdev->info.support_fp16_packed()) opt.use_fp16_packed = false;
  586. if (!vkdev->info.support_fp16_storage()) opt.use_fp16_storage = false;
  587. if (!vkdev->info.support_fp16_arithmetic()) opt.use_fp16_arithmetic = false;
  588. if (!vkdev->info.support_cooperative_matrix()) opt.use_cooperative_matrix = false;
  589. // FIXME fp16a may produce large error
  590. opt.use_fp16_arithmetic = false;
  591. op->create_pipeline(opt);
  592. if (!op->support_vulkan)
  593. {
  594. op->destroy_pipeline(opt);
  595. delete op;
  596. return 233;
  597. }
  598. {
  599. ncnn::VkTransfer cmd(vkdev);
  600. ncnn::Option opt_upload = opt;
  601. opt_upload.blob_vkallocator = &g_weight_vkallocator;
  602. opt_upload.workspace_vkallocator = &g_weight_vkallocator;
  603. opt_upload.staging_vkallocator = &g_weight_staging_vkallocator;
  604. op->upload_model(cmd, opt_upload);
  605. cmd.submit_and_wait();
  606. }
  607. d.resize(top_blob_count);
  608. {
  609. // forward
  610. ncnn::VkCompute cmd(vkdev);
  611. if (op->support_image_storage && opt.use_image_storage)
  612. {
  613. // upload
  614. std::vector<ncnn::VkImageMat> a_gpu(a.size());
  615. for (size_t i = 0; i < a_gpu.size(); i++)
  616. {
  617. cmd.record_upload(a[i], a_gpu[i], opt);
  618. }
  619. std::vector<ncnn::VkImageMat> d_gpu(top_blob_count);
  620. if (op->support_inplace)
  621. {
  622. op->forward_inplace(a_gpu, cmd, opt);
  623. d_gpu = a_gpu;
  624. }
  625. else
  626. {
  627. op->forward(a_gpu, d_gpu, cmd, opt);
  628. }
  629. // download
  630. for (size_t i = 0; i < d_gpu.size(); i++)
  631. {
  632. cmd.record_download(d_gpu[i], d[i], opt);
  633. }
  634. }
  635. else
  636. {
  637. // upload
  638. std::vector<ncnn::VkMat> a_gpu(a.size());
  639. for (size_t i = 0; i < a_gpu.size(); i++)
  640. {
  641. cmd.record_upload(a[i], a_gpu[i], opt);
  642. }
  643. std::vector<ncnn::VkMat> d_gpu(top_blob_count);
  644. if (op->support_inplace)
  645. {
  646. op->forward_inplace(a_gpu, cmd, opt);
  647. d_gpu = a_gpu;
  648. }
  649. else
  650. {
  651. op->forward(a_gpu, d_gpu, cmd, opt);
  652. }
  653. // download
  654. for (size_t i = 0; i < d_gpu.size(); i++)
  655. {
  656. cmd.record_download(d_gpu[i], d[i], opt);
  657. }
  658. }
  659. cmd.submit_and_wait();
  660. }
  661. op->destroy_pipeline(opt);
  662. delete op;
  663. vkdev->reclaim_blob_allocator(blob_vkallocator);
  664. vkdev->reclaim_staging_allocator(staging_vkallocator);
  665. g_weight_vkallocator.clear();
  666. g_weight_staging_vkallocator.clear();
  667. return 0;
  668. }
  669. #endif // NCNN_VULKAN
  670. int test_layer(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Option& _opt, const std::vector<ncnn::Mat>& a, int top_blob_count, const std::vector<ncnn::Mat>& top_shapes, float epsilon, void (*func)(ncnn::Layer*), int flag)
  671. {
  672. // naive
  673. std::vector<ncnn::Mat> b;
  674. {
  675. int ret = test_layer_naive(typeindex, pd, weights, a, top_blob_count, b, func, flag);
  676. if (ret != 233 && ret != 0)
  677. {
  678. fprintf(stderr, "test_layer_naive failed\n");
  679. return -1;
  680. }
  681. }
  682. // cpu
  683. {
  684. std::vector<ncnn::Mat> c;
  685. int ret = test_layer_cpu(typeindex, pd, weights, _opt, a, top_blob_count, c, std::vector<ncnn::Mat>(), func, flag);
  686. if (ret != 233 && (ret != 0 || CompareMat(b, c, epsilon) != 0))
  687. {
  688. fprintf(stderr, "test_layer_cpu failed\n");
  689. return -1;
  690. }
  691. }
  692. // cpu shape hint
  693. {
  694. std::vector<ncnn::Mat> c;
  695. int ret = test_layer_cpu(typeindex, pd, weights, _opt, a, top_blob_count, c, b, func, flag);
  696. if (ret != 233 && (ret != 0 || CompareMat(b, c, epsilon) != 0))
  697. {
  698. fprintf(stderr, "test_layer_cpu failed with shape hint\n");
  699. return -1;
  700. }
  701. }
  702. #if NCNN_VULKAN
  703. // gpu
  704. if (!(flag & TEST_LAYER_DISABLE_GPU_TESTING))
  705. {
  706. std::vector<ncnn::Mat> d;
  707. int ret = test_layer_gpu(typeindex, pd, weights, _opt, a, top_blob_count, d, std::vector<ncnn::Mat>(), func, flag);
  708. if (ret != 233 && (ret != 0 || CompareMat(b, d, epsilon) != 0))
  709. {
  710. fprintf(stderr, "test_layer_gpu failed\n");
  711. return -1;
  712. }
  713. }
  714. // gpu shape hint
  715. if (!(flag & TEST_LAYER_DISABLE_GPU_TESTING))
  716. {
  717. std::vector<ncnn::Mat> d;
  718. int ret = test_layer_gpu(typeindex, pd, weights, _opt, a, top_blob_count, d, b, func, flag);
  719. if (ret != 233 && (ret != 0 || CompareMat(b, d, epsilon) != 0))
  720. {
  721. fprintf(stderr, "test_layer_gpu failed with shape hint\n");
  722. return -1;
  723. }
  724. }
  725. #endif // NCNN_VULKAN
  726. return 0;
  727. }
  728. int test_layer_naive(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Mat& a, ncnn::Mat& b, void (*func)(ncnn::Layer*), int flag)
  729. {
  730. ncnn::Layer* op = ncnn::create_layer_naive(typeindex);
  731. if (func)
  732. {
  733. (*func)((ncnn::Layer*)op);
  734. }
  735. op->load_param(pd);
  736. ncnn::ModelBinFromMatArray mb(weights.data());
  737. op->load_model(mb);
  738. ncnn::Option opt;
  739. opt.num_threads = 1;
  740. opt.lightmode = false;
  741. opt.use_packing_layout = false;
  742. opt.use_fp16_packed = false;
  743. opt.use_fp16_storage = false;
  744. opt.use_fp16_arithmetic = false;
  745. opt.use_shader_pack8 = false;
  746. opt.use_image_storage = false;
  747. opt.use_bf16_storage = false;
  748. opt.use_vulkan_compute = false;
  749. op->create_pipeline(opt);
  750. if (op->support_inplace)
  751. {
  752. b = a.clone();
  753. op->forward_inplace(b, opt);
  754. }
  755. else
  756. {
  757. op->forward(a, b, opt);
  758. }
  759. op->destroy_pipeline(opt);
  760. delete op;
  761. return 0;
  762. }
  763. int test_layer_cpu(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Option& _opt, const ncnn::Mat& a, ncnn::Mat& c, const ncnn::Mat& top_shape, void (*func)(ncnn::Layer*), int flag)
  764. {
  765. ncnn::Layer* op = ncnn::create_layer_cpu(typeindex);
  766. if (!op->support_packing && _opt.use_packing_layout)
  767. {
  768. delete op;
  769. return 233;
  770. }
  771. if (!op->support_bf16_storage && !op->support_fp16_storage && (_opt.use_bf16_storage || _opt.use_fp16_arithmetic))
  772. {
  773. delete op;
  774. return 233;
  775. }
  776. if (func)
  777. {
  778. (*func)((ncnn::Layer*)op);
  779. }
  780. if (top_shape.dims)
  781. {
  782. op->bottom_shapes.resize(1);
  783. op->top_shapes.resize(1);
  784. op->bottom_shapes[0] = a;
  785. op->top_shapes[0] = top_shape;
  786. }
  787. op->load_param(pd);
  788. ncnn::ModelBinFromMatArray mb(weights.data());
  789. op->load_model(mb);
  790. ncnn::Option opt = _opt;
  791. opt.num_threads = 1;
  792. opt.use_vulkan_compute = false;
  793. op->create_pipeline(opt);
  794. if (!op->support_packing && _opt.use_packing_layout)
  795. {
  796. op->destroy_pipeline(opt);
  797. delete op;
  798. return 233;
  799. }
  800. if (!op->support_bf16_storage && !op->support_fp16_storage && (_opt.use_bf16_storage || _opt.use_fp16_arithmetic))
  801. {
  802. op->destroy_pipeline(opt);
  803. delete op;
  804. return 233;
  805. }
  806. ncnn::Mat a4;
  807. // clang-format off
  808. // *INDENT-OFF*
  809. #if NCNN_ARM82
  810. if (opt.use_fp16_storage && ncnn::cpu_support_arm_asimdhp() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
  811. {
  812. ncnn::cast_float32_to_float16(a, a4, opt);
  813. }
  814. else
  815. #endif // NCNN_ARM82
  816. #if NCNN_RVV
  817. if (opt.use_fp16_storage && ncnn::cpu_support_riscv_v() && ncnn::cpu_support_riscv_zfh() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
  818. {
  819. ncnn::cast_float32_to_float16(a, a4, opt);
  820. }
  821. else
  822. #endif // NCNN_RVV
  823. #if NCNN_BF16
  824. if (opt.use_bf16_storage && op->support_bf16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
  825. {
  826. ncnn::cast_float32_to_bfloat16(a, a4, opt);
  827. }
  828. else
  829. #endif // NCNN_BF16
  830. if (opt.use_fp16_storage && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
  831. {
  832. ncnn::cast_float32_to_float16(a, a4, opt);
  833. }
  834. else
  835. {
  836. a4 = a;
  837. }
  838. // *INDENT-ON*
  839. // clang-format on
  840. if (opt.use_packing_layout && op->support_packing && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_PACKING))
  841. {
  842. // resolve dst_elempack
  843. int dims = a4.dims;
  844. int elemcount = 0;
  845. if (dims == 1) elemcount = a4.elempack * a4.w;
  846. if (dims == 2) elemcount = a4.elempack * a4.h;
  847. if (dims == 3 || dims == 4) elemcount = a4.elempack * a4.c;
  848. int elembits = a4.elembits();
  849. int dst_elempack = 1;
  850. if (elembits == 32)
  851. {
  852. #if NCNN_AVX512
  853. if (elemcount % 16 == 0 && ncnn::cpu_support_x86_avx512())
  854. dst_elempack = 16;
  855. else if (elemcount % 8 == 0 && ncnn::cpu_support_x86_avx())
  856. dst_elempack = 8;
  857. else if (elemcount % 4 == 0)
  858. dst_elempack = 4;
  859. #elif NCNN_AVX
  860. if (elemcount % 8 == 0 && ncnn::cpu_support_x86_avx())
  861. dst_elempack = 8;
  862. else if (elemcount % 4 == 0)
  863. dst_elempack = 4;
  864. #elif NCNN_RVV
  865. const int packn = ncnn::cpu_riscv_vlenb() / (elembits / 8);
  866. if (elemcount % packn == 0)
  867. dst_elempack = packn;
  868. #else
  869. if (elemcount % 4 == 0)
  870. dst_elempack = 4;
  871. #endif
  872. }
  873. if (elembits == 16)
  874. {
  875. #if NCNN_ARM82
  876. if (elemcount % 8 == 0 && ncnn::cpu_support_arm_asimdhp() && opt.use_fp16_arithmetic)
  877. dst_elempack = 8;
  878. else if (elemcount % 4 == 0)
  879. dst_elempack = 4;
  880. #elif NCNN_RVV
  881. const int packn = ncnn::cpu_riscv_vlenb() / 2;
  882. if (elemcount % packn == 0)
  883. dst_elempack = packn;
  884. #else
  885. if (elemcount % 4 == 0)
  886. dst_elempack = 4;
  887. #endif
  888. }
  889. if (elembits == 8)
  890. {
  891. #if NCNN_RVV
  892. const int packn = ncnn::cpu_riscv_vlenb() / 1;
  893. if (elemcount % packn == 0)
  894. dst_elempack = packn;
  895. #else
  896. if (elemcount % 8 == 0)
  897. dst_elempack = 8;
  898. #endif
  899. }
  900. if (flag & TEST_LAYER_ENABLE_FORCE_INPUT_PACK8)
  901. dst_elempack = 8;
  902. ncnn::Mat a4_packed;
  903. ncnn::convert_packing(a4, a4_packed, dst_elempack, opt);
  904. a4 = a4_packed;
  905. }
  906. if (op->support_inplace)
  907. {
  908. c = a4.clone();
  909. op->forward_inplace(c, opt);
  910. }
  911. else
  912. {
  913. op->forward(a4, c, opt);
  914. }
  915. // clang-format off
  916. // *INDENT-OFF*
  917. #if NCNN_ARM82
  918. if (opt.use_fp16_storage && ncnn::cpu_support_arm_asimdhp() && op->support_fp16_storage && c.elembits() == 16)
  919. {
  920. ncnn::Mat c_fp32;
  921. ncnn::cast_float16_to_float32(c, c_fp32, opt);
  922. c = c_fp32;
  923. }
  924. else
  925. #endif // NCNN_ARM82
  926. #if NCNN_RVV
  927. if (opt.use_fp16_storage && ncnn::cpu_support_riscv_v() && ncnn::cpu_support_riscv_zfh() && op->support_fp16_storage && c.elembits() == 16)
  928. {
  929. ncnn::Mat c_fp32;
  930. ncnn::cast_float16_to_float32(c, c_fp32, opt);
  931. c = c_fp32;
  932. }
  933. else
  934. #endif // NCNN_RVV
  935. #if NCNN_BF16
  936. if (opt.use_bf16_storage && op->support_bf16_storage && c.elembits() == 16)
  937. {
  938. ncnn::Mat c_fp32;
  939. ncnn::cast_bfloat16_to_float32(c, c_fp32, opt);
  940. c = c_fp32;
  941. }
  942. else
  943. #endif // NCNN_BF16
  944. if (opt.use_fp16_storage && op->support_fp16_storage && c.elembits() == 16)
  945. {
  946. ncnn::Mat c_fp32;
  947. ncnn::cast_float16_to_float32(c, c_fp32, opt);
  948. c = c_fp32;
  949. }
  950. // *INDENT-ON*
  951. // clang-format on
  952. op->destroy_pipeline(opt);
  953. delete op;
  954. return 0;
  955. }
  956. #if NCNN_VULKAN
  957. int test_layer_gpu(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Option& _opt, const ncnn::Mat& a, ncnn::Mat& d, const ncnn::Mat& top_shape, void (*func)(ncnn::Layer*), int flag)
  958. {
  959. if (!_opt.use_packing_layout)
  960. {
  961. // pack1 test is useless for gpu
  962. return 233;
  963. }
  964. ncnn::Layer* op = ncnn::create_layer_vulkan(typeindex);
  965. if (!op)
  966. {
  967. return 233;
  968. }
  969. op->load_param(pd);
  970. if (!op->support_vulkan)
  971. {
  972. delete op;
  973. return 233;
  974. }
  975. ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
  976. op->vkdev = vkdev;
  977. if (func)
  978. {
  979. (*func)((ncnn::Layer*)op);
  980. }
  981. if (top_shape.dims)
  982. {
  983. op->bottom_shapes.resize(1);
  984. op->top_shapes.resize(1);
  985. op->bottom_shapes[0] = a;
  986. op->top_shapes[0] = top_shape;
  987. }
  988. ncnn::ModelBinFromMatArray mb(weights.data());
  989. op->load_model(mb);
  990. ncnn::VkWeightAllocator g_weight_vkallocator(vkdev);
  991. ncnn::VkWeightStagingAllocator g_weight_staging_vkallocator(vkdev);
  992. ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
  993. ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
  994. ncnn::Option opt = _opt;
  995. opt.num_threads = 1;
  996. opt.use_vulkan_compute = true;
  997. #if __APPLE__
  998. opt.use_image_storage = false;
  999. #endif
  1000. opt.blob_vkallocator = blob_vkallocator;
  1001. opt.workspace_vkallocator = blob_vkallocator;
  1002. opt.staging_vkallocator = staging_vkallocator;
  1003. if (!vkdev->info.support_fp16_packed()) opt.use_fp16_packed = false;
  1004. if (!vkdev->info.support_fp16_storage()) opt.use_fp16_storage = false;
  1005. if (!vkdev->info.support_fp16_arithmetic()) opt.use_fp16_arithmetic = false;
  1006. if (!vkdev->info.support_cooperative_matrix()) opt.use_cooperative_matrix = false;
  1007. // FIXME fp16a may produce large error
  1008. opt.use_fp16_arithmetic = false;
  1009. op->create_pipeline(opt);
  1010. if (!op->support_vulkan)
  1011. {
  1012. op->destroy_pipeline(opt);
  1013. delete op;
  1014. return 233;
  1015. }
  1016. {
  1017. ncnn::VkTransfer cmd(vkdev);
  1018. ncnn::Option opt_upload = opt;
  1019. opt_upload.blob_vkallocator = &g_weight_vkallocator;
  1020. opt_upload.workspace_vkallocator = &g_weight_vkallocator;
  1021. opt_upload.staging_vkallocator = &g_weight_staging_vkallocator;
  1022. op->upload_model(cmd, opt_upload);
  1023. cmd.submit_and_wait();
  1024. }
  1025. {
  1026. // forward
  1027. ncnn::VkCompute cmd(vkdev);
  1028. if (op->support_image_storage && opt.use_image_storage)
  1029. {
  1030. // upload
  1031. ncnn::VkImageMat a_gpu;
  1032. cmd.record_upload(a, a_gpu, opt);
  1033. ncnn::VkImageMat d_gpu;
  1034. if (op->support_inplace)
  1035. {
  1036. op->forward_inplace(a_gpu, cmd, opt);
  1037. d_gpu = a_gpu;
  1038. }
  1039. else
  1040. {
  1041. op->forward(a_gpu, d_gpu, cmd, opt);
  1042. }
  1043. // download
  1044. cmd.record_download(d_gpu, d, opt);
  1045. }
  1046. else
  1047. {
  1048. // upload
  1049. ncnn::VkMat a_gpu;
  1050. cmd.record_upload(a, a_gpu, opt);
  1051. ncnn::VkMat d_gpu;
  1052. if (op->support_inplace)
  1053. {
  1054. op->forward_inplace(a_gpu, cmd, opt);
  1055. d_gpu = a_gpu;
  1056. }
  1057. else
  1058. {
  1059. op->forward(a_gpu, d_gpu, cmd, opt);
  1060. }
  1061. // download
  1062. cmd.record_download(d_gpu, d, opt);
  1063. }
  1064. cmd.submit_and_wait();
  1065. }
  1066. op->destroy_pipeline(opt);
  1067. delete op;
  1068. vkdev->reclaim_blob_allocator(blob_vkallocator);
  1069. vkdev->reclaim_staging_allocator(staging_vkallocator);
  1070. g_weight_vkallocator.clear();
  1071. g_weight_staging_vkallocator.clear();
  1072. return 0;
  1073. }
  1074. #endif // NCNN_VULKAN
  1075. int test_layer(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Option& _opt, const ncnn::Mat& a, const ncnn::Mat& top_shape, float epsilon, void (*func)(ncnn::Layer*), int flag)
  1076. {
  1077. // naive
  1078. ncnn::Mat b;
  1079. {
  1080. int ret = test_layer_naive(typeindex, pd, weights, a, b, func, flag);
  1081. if (ret != 233 && ret != 0)
  1082. {
  1083. fprintf(stderr, "test_layer_naive failed\n");
  1084. return -1;
  1085. }
  1086. }
  1087. // cpu
  1088. {
  1089. ncnn::Mat c;
  1090. int ret = test_layer_cpu(typeindex, pd, weights, _opt, a, c, ncnn::Mat(), func, flag);
  1091. if (ret != 233 && (ret != 0 || CompareMat(b, c, epsilon) != 0))
  1092. {
  1093. fprintf(stderr, "test_layer_cpu failed\n");
  1094. return -1;
  1095. }
  1096. }
  1097. // cpu shape hint
  1098. {
  1099. ncnn::Mat c;
  1100. int ret = test_layer_cpu(typeindex, pd, weights, _opt, a, c, b, func, flag);
  1101. if (ret != 233 && (ret != 0 || CompareMat(b, c, epsilon) != 0))
  1102. {
  1103. fprintf(stderr, "test_layer_cpu failed with shape hint\n");
  1104. return -1;
  1105. }
  1106. }
  1107. #if NCNN_VULKAN
  1108. // gpu
  1109. if (!(flag & TEST_LAYER_DISABLE_GPU_TESTING))
  1110. {
  1111. ncnn::Mat d;
  1112. int ret = test_layer_gpu(typeindex, pd, weights, _opt, a, d, ncnn::Mat(), func, flag);
  1113. if (ret != 233 && (ret != 0 || CompareMat(b, d, epsilon) != 0))
  1114. {
  1115. fprintf(stderr, "test_layer_gpu failed\n");
  1116. return -1;
  1117. }
  1118. }
  1119. // gpu shape hint
  1120. if (!(flag & TEST_LAYER_DISABLE_GPU_TESTING))
  1121. {
  1122. ncnn::Mat d;
  1123. int ret = test_layer_gpu(typeindex, pd, weights, _opt, a, d, b, func, flag);
  1124. if (ret != 233 && (ret != 0 || CompareMat(b, d, epsilon) != 0))
  1125. {
  1126. fprintf(stderr, "test_layer_gpu failed with shape hint\n");
  1127. return -1;
  1128. }
  1129. }
  1130. #endif // NCNN_VULKAN
  1131. return 0;
  1132. }
  1133. int test_layer_opt(const char* layer_type, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Option& opt, const std::vector<ncnn::Mat>& a, int top_blob_count, float epsilon, void (*func)(ncnn::Layer*), int flag)
  1134. {
  1135. // fp16 representation
  1136. std::vector<ncnn::Mat> a_fp16;
  1137. if (opt.use_bf16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
  1138. {
  1139. a_fp16.resize(a.size());
  1140. for (size_t j = 0; j < a.size(); j++)
  1141. {
  1142. ncnn::Mat tmp;
  1143. ncnn::cast_float32_to_bfloat16(a[j], tmp, opt);
  1144. ncnn::cast_bfloat16_to_float32(tmp, a_fp16[j], opt);
  1145. }
  1146. }
  1147. else if ((opt.use_fp16_packed || opt.use_fp16_storage) && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
  1148. {
  1149. a_fp16.resize(a.size());
  1150. for (size_t j = 0; j < a.size(); j++)
  1151. {
  1152. ncnn::Mat tmp;
  1153. ncnn::cast_float32_to_float16(a[j], tmp, opt);
  1154. ncnn::cast_float16_to_float32(tmp, a_fp16[j], opt);
  1155. }
  1156. }
  1157. else
  1158. {
  1159. a_fp16 = a;
  1160. }
  1161. std::vector<ncnn::Mat> weights_fp16;
  1162. float epsilon_fp16;
  1163. if (opt.use_bf16_storage)
  1164. {
  1165. weights_fp16.resize(weights.size());
  1166. for (size_t j = 0; j < weights.size(); j++)
  1167. {
  1168. ncnn::Mat tmp;
  1169. ncnn::cast_float32_to_bfloat16(weights[j], tmp, opt);
  1170. ncnn::cast_bfloat16_to_float32(tmp, weights_fp16[j], opt);
  1171. }
  1172. epsilon_fp16 = epsilon * 100; // 0.1
  1173. }
  1174. else if (opt.use_fp16_packed || opt.use_fp16_storage)
  1175. {
  1176. weights_fp16.resize(weights.size());
  1177. for (size_t j = 0; j < weights.size(); j++)
  1178. {
  1179. ncnn::Mat tmp;
  1180. ncnn::cast_float32_to_float16(weights[j], tmp, opt);
  1181. ncnn::cast_float16_to_float32(tmp, weights_fp16[j], opt);
  1182. }
  1183. epsilon_fp16 = epsilon * 100; // 0.1
  1184. }
  1185. else
  1186. {
  1187. weights_fp16 = weights;
  1188. epsilon_fp16 = epsilon;
  1189. }
  1190. if (opt.use_fp16_arithmetic)
  1191. {
  1192. epsilon_fp16 = epsilon * 1000; // 1.0
  1193. }
  1194. std::vector<ncnn::Mat> top_shapes;
  1195. int ret = test_layer(ncnn::layer_to_index(layer_type), pd, weights_fp16, opt, a_fp16, top_blob_count, top_shapes, epsilon_fp16, func, flag);
  1196. if (ret != 0)
  1197. {
  1198. fprintf(stderr, "test_layer %s failed use_packing_layout=%d use_fp16_packed=%d use_fp16_storage=%d use_fp16_arithmetic=%d use_shader_pack8=%d use_bf16_storage=%d use_image_storage=%d use_sgemm_convolution=%d use_winograd_convolution=%d\n", layer_type, opt.use_packing_layout, opt.use_fp16_packed, opt.use_fp16_storage, opt.use_fp16_arithmetic, opt.use_shader_pack8, opt.use_bf16_storage, opt.use_image_storage, opt.use_sgemm_convolution, opt.use_winograd_convolution);
  1199. return ret;
  1200. }
  1201. return 0;
  1202. }
  1203. int test_layer_opt(const char* layer_type, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Option& opt, const ncnn::Mat& a, float epsilon, void (*func)(ncnn::Layer*), int flag)
  1204. {
  1205. // fp16 representation
  1206. ncnn::Mat a_fp16;
  1207. if (opt.use_bf16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
  1208. {
  1209. ncnn::Mat tmp;
  1210. ncnn::cast_float32_to_bfloat16(a, tmp, opt);
  1211. ncnn::cast_bfloat16_to_float32(tmp, a_fp16, opt);
  1212. }
  1213. else if ((opt.use_fp16_packed || opt.use_fp16_storage) && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
  1214. {
  1215. ncnn::Mat tmp;
  1216. ncnn::cast_float32_to_float16(a, tmp, opt);
  1217. ncnn::cast_float16_to_float32(tmp, a_fp16, opt);
  1218. }
  1219. else
  1220. {
  1221. a_fp16 = a;
  1222. }
  1223. std::vector<ncnn::Mat> weights_fp16;
  1224. float epsilon_fp16;
  1225. if (opt.use_bf16_storage)
  1226. {
  1227. weights_fp16.resize(weights.size());
  1228. for (size_t j = 0; j < weights.size(); j++)
  1229. {
  1230. ncnn::Mat tmp;
  1231. ncnn::cast_float32_to_bfloat16(weights[j], tmp, opt);
  1232. ncnn::cast_bfloat16_to_float32(tmp, weights_fp16[j], opt);
  1233. }
  1234. epsilon_fp16 = epsilon * 100; // 0.1
  1235. }
  1236. else if (opt.use_fp16_packed || opt.use_fp16_storage)
  1237. {
  1238. weights_fp16.resize(weights.size());
  1239. for (size_t j = 0; j < weights.size(); j++)
  1240. {
  1241. ncnn::Mat tmp;
  1242. ncnn::cast_float32_to_float16(weights[j], tmp, opt);
  1243. ncnn::cast_float16_to_float32(tmp, weights_fp16[j], opt);
  1244. }
  1245. epsilon_fp16 = epsilon * 100; // 0.1
  1246. }
  1247. else
  1248. {
  1249. weights_fp16 = weights;
  1250. epsilon_fp16 = epsilon;
  1251. }
  1252. if (opt.use_fp16_arithmetic)
  1253. {
  1254. epsilon_fp16 = epsilon * 1000; // 1.0
  1255. }
  1256. ncnn::Mat top_shape;
  1257. int ret = test_layer(ncnn::layer_to_index(layer_type), pd, weights_fp16, opt, a_fp16, top_shape, epsilon_fp16, func, flag);
  1258. if (ret != 0)
  1259. {
  1260. fprintf(stderr, "test_layer %s failed use_packing_layout=%d use_fp16_packed=%d use_fp16_storage=%d use_fp16_arithmetic=%d use_shader_pack8=%d use_bf16_storage=%d use_image_storage=%d use_sgemm_convolution=%d use_winograd_convolution=%d\n", layer_type, opt.use_packing_layout, opt.use_fp16_packed, opt.use_fp16_storage, opt.use_fp16_arithmetic, opt.use_shader_pack8, opt.use_bf16_storage, opt.use_image_storage, opt.use_sgemm_convolution, opt.use_winograd_convolution);
  1261. return ret;
  1262. }
  1263. return 0;
  1264. }
  1265. int test_layer(const char* layer_type, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const std::vector<ncnn::Mat>& a, int top_blob_count, float epsilon, void (*func)(ncnn::Layer*), int flag)
  1266. {
  1267. // pack fp16p fp16s fp16a bf16s shader8 image
  1268. const int options[][7] = {
  1269. {0, 0, 0, 0, 0, 0, 0},
  1270. {0, 0, 1, 0, 0, 0, 0},
  1271. {0, 0, 1, 1, 1, 0, 0},
  1272. {1, 0, 0, 0, 0, 0, 0},
  1273. {1, 1, 0, 0, 1, 0, 0},
  1274. {1, 0, 1, 0, 0, 1, 0},
  1275. {1, 1, 1, 1, 0, 0, 0},
  1276. {1, 1, 1, 1, 1, 1, 1},
  1277. };
  1278. const int opt_count = sizeof(options) / sizeof(options[0]);
  1279. for (int i = 0; i < opt_count; i++)
  1280. {
  1281. ncnn::Option opt;
  1282. opt.num_threads = 1;
  1283. opt.use_packing_layout = options[i][0];
  1284. opt.use_fp16_packed = options[i][1];
  1285. opt.use_fp16_storage = options[i][2];
  1286. opt.use_fp16_arithmetic = options[i][3];
  1287. opt.use_bf16_storage = options[i][4];
  1288. opt.use_shader_pack8 = options[i][5];
  1289. opt.use_image_storage = options[i][6];
  1290. int ret = test_layer_opt(layer_type, pd, weights, opt, a, top_blob_count, epsilon, func, flag);
  1291. if (ret != 0)
  1292. return ret;
  1293. }
  1294. return 0;
  1295. }
  1296. int test_layer(const char* layer_type, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Mat& a, float epsilon, void (*func)(ncnn::Layer*), int flag)
  1297. {
  1298. // pack fp16p fp16s fp16a bf16s shader8 image
  1299. const int options[][7] = {
  1300. {0, 0, 0, 0, 0, 0, 0},
  1301. {0, 0, 1, 0, 0, 0, 0},
  1302. {0, 0, 1, 1, 1, 0, 0},
  1303. {1, 0, 0, 0, 0, 0, 0},
  1304. {1, 1, 0, 0, 1, 0, 0},
  1305. {1, 0, 1, 0, 0, 1, 0},
  1306. {1, 1, 1, 1, 0, 0, 0},
  1307. {1, 1, 1, 1, 1, 1, 1},
  1308. };
  1309. const int opt_count = sizeof(options) / sizeof(options[0]);
  1310. for (int i = 0; i < opt_count; i++)
  1311. {
  1312. ncnn::Option opt;
  1313. opt.num_threads = 1;
  1314. opt.use_packing_layout = options[i][0];
  1315. opt.use_fp16_packed = options[i][1];
  1316. opt.use_fp16_storage = options[i][2];
  1317. opt.use_fp16_arithmetic = options[i][3];
  1318. opt.use_bf16_storage = options[i][4];
  1319. opt.use_shader_pack8 = options[i][5];
  1320. opt.use_image_storage = options[i][6];
  1321. int ret = test_layer_opt(layer_type, pd, weights, opt, a, epsilon, func, flag);
  1322. if (ret != 0)
  1323. return ret;
  1324. }
  1325. return 0;
  1326. }