You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

crop_arm.cpp 20 kB

6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include "crop_arm.h"
  15. #if __ARM_NEON
  16. #include <arm_neon.h>
  17. #endif // __ARM_NEON
  18. namespace ncnn {
  19. Crop_arm::Crop_arm()
  20. {
  21. #if __ARM_NEON
  22. support_packing = true;
  23. #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  24. support_fp16_storage = true;
  25. #endif
  26. #endif // __ARM_NEON
  27. #if NCNN_BF16
  28. support_bf16_storage = true;
  29. #endif
  30. }
  31. #if __ARM_NEON
  32. static void crop_pack8_neon(const Mat& src, Mat& dst, int top, int left)
  33. {
  34. int w = dst.w;
  35. int h = dst.h;
  36. int right = src.w - dst.w - left;
  37. const float* ptr = src.row(top) + left * 8;
  38. float* outptr = dst;
  39. for (int y = 0; y < h; y++)
  40. {
  41. for (int x = 0; x < w; x++)
  42. {
  43. float32x4_t _p0 = vld1q_f32(ptr);
  44. float32x4_t _p1 = vld1q_f32(ptr + 4);
  45. vst1q_f32(outptr, _p0);
  46. vst1q_f32(outptr + 4, _p1);
  47. ptr += 8;
  48. outptr += 8;
  49. }
  50. ptr += (left + right) * 8;
  51. }
  52. }
  53. static void crop_pack8_bf16_fp16s_neon(const Mat& src, Mat& dst, int top, int left)
  54. {
  55. int w = dst.w;
  56. int h = dst.h;
  57. int right = src.w - dst.w - left;
  58. const unsigned short* ptr = src.row<unsigned short>(top) + left * 8;
  59. unsigned short* outptr = dst;
  60. for (int y = 0; y < h; y++)
  61. {
  62. for (int x = 0; x < w; x++)
  63. {
  64. uint16x8_t _p = vld1q_u16(ptr);
  65. vst1q_u16(outptr, _p);
  66. ptr += 8;
  67. outptr += 8;
  68. }
  69. ptr += (left + right) * 8;
  70. }
  71. }
  72. static void crop_pack4_neon(const Mat& src, Mat& dst, int top, int left)
  73. {
  74. int w = dst.w;
  75. int h = dst.h;
  76. int right = src.w - dst.w - left;
  77. const float* ptr = src.row(top) + left * 4;
  78. float* outptr = dst;
  79. for (int y = 0; y < h; y++)
  80. {
  81. for (int x = 0; x < w; x++)
  82. {
  83. float32x4_t _p = vld1q_f32(ptr);
  84. vst1q_f32(outptr, _p);
  85. ptr += 4;
  86. outptr += 4;
  87. }
  88. ptr += (left + right) * 4;
  89. }
  90. }
  91. static void crop_pack4_bf16_fp16s_neon(const Mat& src, Mat& dst, int top, int left)
  92. {
  93. int w = dst.w;
  94. int h = dst.h;
  95. int right = src.w - dst.w - left;
  96. const unsigned short* ptr = src.row<unsigned short>(top) + left * 4;
  97. unsigned short* outptr = dst;
  98. for (int y = 0; y < h; y++)
  99. {
  100. for (int x = 0; x < w; x++)
  101. {
  102. uint16x4_t _p = vld1_u16(ptr);
  103. vst1_u16(outptr, _p);
  104. ptr += 4;
  105. outptr += 4;
  106. }
  107. ptr += (left + right) * 4;
  108. }
  109. }
  110. #endif // __ARM_NEON
  111. int Crop_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
  112. {
  113. int w = bottom_blob.w;
  114. int h = bottom_blob.h;
  115. int channels = bottom_blob.c;
  116. int dims = bottom_blob.dims;
  117. size_t elemsize = bottom_blob.elemsize;
  118. int elempack = bottom_blob.elempack;
  119. #if __ARM_NEON
  120. if (elempack == 8)
  121. {
  122. int _woffset, _hoffset, _coffset;
  123. int _outw, _outh, _outc;
  124. resolve_crop_roi(bottom_blob.shape(), _woffset, _hoffset, _coffset, _outw, _outh, _outc);
  125. if (dims == 1)
  126. {
  127. int out_elempack = _outw % 8 == 0 ? 8 : _outw % 4 == 0 ? 4 : 1;
  128. size_t out_elemsize = elemsize / elempack * out_elempack;
  129. if (_outw / out_elempack == w)
  130. {
  131. top_blob = bottom_blob;
  132. return 0;
  133. }
  134. top_blob.create(_outw / out_elempack, out_elemsize, out_elempack, opt.blob_allocator);
  135. if (top_blob.empty())
  136. return -100;
  137. if (_woffset % 8 == 0 && out_elempack == 8)
  138. {
  139. if (elemsize == 16u)
  140. crop_pack8_bf16_fp16s_neon(bottom_blob, top_blob, 0, _woffset / elempack);
  141. else
  142. crop_pack8_neon(bottom_blob, top_blob, 0, _woffset / elempack);
  143. return 0;
  144. }
  145. }
  146. if (dims == 2)
  147. {
  148. int out_elempack = _outh % 8 == 0 ? 8 : _outh % 4 == 0 ? 4 : 1;
  149. size_t out_elemsize = elemsize / elempack * out_elempack;
  150. if (_outw == w && _outh / out_elempack == h)
  151. {
  152. top_blob = bottom_blob;
  153. return 0;
  154. }
  155. top_blob.create(_outw, _outh / out_elempack, out_elemsize, out_elempack, opt.blob_allocator);
  156. if (top_blob.empty())
  157. return -100;
  158. if (_hoffset % 8 == 0 && out_elempack == 8)
  159. {
  160. if (elemsize == 16u)
  161. crop_pack8_bf16_fp16s_neon(bottom_blob, top_blob, _hoffset / elempack, _woffset);
  162. else
  163. crop_pack8_neon(bottom_blob, top_blob, _hoffset / elempack, _woffset);
  164. return 0;
  165. }
  166. }
  167. if (dims == 3)
  168. {
  169. int out_elempack = _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
  170. size_t out_elemsize = elemsize / elempack * out_elempack;
  171. if (_coffset % 8 == 0 && out_elempack == 8)
  172. {
  173. const Mat bottom_blob_sliced = bottom_blob.channel_range(_coffset / out_elempack, _outc / out_elempack);
  174. if (_outw == w && _outh == h)
  175. {
  176. top_blob = bottom_blob_sliced.clone();
  177. if (top_blob.empty())
  178. return -100;
  179. }
  180. if (_outw == w && _outh == h && _outc / out_elempack == channels)
  181. {
  182. top_blob = bottom_blob;
  183. return 0;
  184. }
  185. top_blob.create(_outw, _outh, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_allocator);
  186. if (top_blob.empty())
  187. return -100;
  188. #pragma omp parallel for num_threads(opt.num_threads)
  189. for (int q = 0; q < top_blob.c; q++)
  190. {
  191. const Mat m = bottom_blob_sliced.channel(q);
  192. Mat borderm = top_blob.channel(q);
  193. if (elemsize == 16u)
  194. crop_pack8_bf16_fp16s_neon(m, borderm, _hoffset, _woffset);
  195. else
  196. crop_pack8_neon(m, borderm, _hoffset, _woffset);
  197. }
  198. return 0;
  199. }
  200. }
  201. }
  202. if (elempack == 4)
  203. {
  204. int _woffset, _hoffset, _coffset;
  205. int _outw, _outh, _outc;
  206. resolve_crop_roi(bottom_blob.shape(), _woffset, _hoffset, _coffset, _outw, _outh, _outc);
  207. if (dims == 1)
  208. {
  209. #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  210. int out_elempack = opt.use_fp16_arithmetic && _outw % 8 == 0 ? 8 : _outw % 4 == 0 ? 4 : 1;
  211. #else
  212. int out_elempack = _outw % 4 == 0 ? 4 : 1;
  213. #endif
  214. size_t out_elemsize = elemsize / elempack * out_elempack;
  215. if (_outw / out_elempack == w)
  216. {
  217. top_blob = bottom_blob;
  218. return 0;
  219. }
  220. top_blob.create(_outw / out_elempack, out_elemsize, out_elempack, opt.blob_allocator);
  221. if (top_blob.empty())
  222. return -100;
  223. if (_woffset % 4 == 0 && out_elempack == 4)
  224. {
  225. if (elemsize == 8u)
  226. crop_pack4_bf16_fp16s_neon(bottom_blob, top_blob, 0, _woffset / elempack);
  227. else
  228. crop_pack4_neon(bottom_blob, top_blob, 0, _woffset / elempack);
  229. return 0;
  230. }
  231. }
  232. if (dims == 2)
  233. {
  234. #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  235. int out_elempack = opt.use_fp16_arithmetic && _outh % 8 == 0 ? 8 : _outh % 4 == 0 ? 4 : 1;
  236. #else
  237. int out_elempack = _outh % 4 == 0 ? 4 : 1;
  238. #endif
  239. size_t out_elemsize = elemsize / elempack * out_elempack;
  240. if (_outw == w && _outh / out_elempack == h)
  241. {
  242. top_blob = bottom_blob;
  243. return 0;
  244. }
  245. top_blob.create(_outw, _outh / out_elempack, out_elemsize, out_elempack, opt.blob_allocator);
  246. if (top_blob.empty())
  247. return -100;
  248. if (_hoffset % 4 == 0 && out_elempack == 4)
  249. {
  250. if (elemsize == 8u)
  251. crop_pack4_bf16_fp16s_neon(bottom_blob, top_blob, _hoffset / elempack, _woffset);
  252. else
  253. crop_pack4_neon(bottom_blob, top_blob, _hoffset / elempack, _woffset);
  254. return 0;
  255. }
  256. }
  257. if (dims == 3)
  258. {
  259. #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  260. int out_elempack = opt.use_fp16_arithmetic && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
  261. #else
  262. int out_elempack = _outc % 4 == 0 ? 4 : 1;
  263. #endif
  264. size_t out_elemsize = elemsize / elempack * out_elempack;
  265. if (_coffset % 4 == 0 && out_elempack == 4)
  266. {
  267. const Mat bottom_blob_sliced = bottom_blob.channel_range(_coffset / out_elempack, _outc / out_elempack);
  268. if (_outw == w && _outh == h)
  269. {
  270. top_blob = bottom_blob_sliced.clone();
  271. if (top_blob.empty())
  272. return -100;
  273. }
  274. if (_outw == w && _outh == h && _outc / out_elempack == channels)
  275. {
  276. top_blob = bottom_blob;
  277. return 0;
  278. }
  279. top_blob.create(_outw, _outh, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_allocator);
  280. if (top_blob.empty())
  281. return -100;
  282. #pragma omp parallel for num_threads(opt.num_threads)
  283. for (int q = 0; q < top_blob.c; q++)
  284. {
  285. const Mat m = bottom_blob_sliced.channel(q);
  286. Mat borderm = top_blob.channel(q);
  287. if (elemsize == 8u)
  288. crop_pack4_bf16_fp16s_neon(m, borderm, _hoffset, _woffset);
  289. else
  290. crop_pack4_neon(m, borderm, _hoffset, _woffset);
  291. }
  292. return 0;
  293. }
  294. }
  295. }
  296. #endif // __ARM_NEON
  297. Mat bottom_blob_unpacked = bottom_blob;
  298. if (elempack != 1)
  299. {
  300. Option opt_pack1 = opt;
  301. opt_pack1.blob_allocator = opt.workspace_allocator;
  302. convert_packing(bottom_blob, bottom_blob_unpacked, 1, opt_pack1);
  303. }
  304. return Crop::forward(bottom_blob_unpacked, top_blob, opt);
  305. }
  306. int Crop_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
  307. {
  308. const Mat& bottom_blob = bottom_blobs[0];
  309. const Mat& reference_blob = bottom_blobs[1];
  310. int w = bottom_blob.w;
  311. int h = bottom_blob.h;
  312. int channels = bottom_blob.c;
  313. int dims = bottom_blob.dims;
  314. size_t elemsize = bottom_blob.elemsize;
  315. int elempack = bottom_blob.elempack;
  316. int ref_elempack = reference_blob.elempack;
  317. Mat& top_blob = top_blobs[0];
  318. #if __ARM_NEON
  319. if (elempack == 8)
  320. {
  321. int _woffset, _hoffset, _coffset;
  322. int _outw, _outh, _outc;
  323. if (woffset == -233)
  324. {
  325. resolve_crop_roi(bottom_blob.shape(), (const int*)reference_blob, _woffset, _hoffset, _coffset, _outw, _outh, _outc);
  326. }
  327. else
  328. {
  329. resolve_crop_roi(bottom_blob.shape(), reference_blob.shape(), _woffset, _hoffset, _coffset, _outw, _outh, _outc);
  330. }
  331. if (dims == 1)
  332. {
  333. int out_elempack = _outw % 8 == 0 ? 8 : _outw % 4 == 0 ? 4 : 1;
  334. size_t out_elemsize = elemsize / elempack * out_elempack;
  335. if (_outw / out_elempack == w)
  336. {
  337. top_blob = bottom_blob;
  338. return 0;
  339. }
  340. top_blob.create(_outw / out_elempack, out_elemsize, out_elempack, opt.blob_allocator);
  341. if (top_blob.empty())
  342. return -100;
  343. if (_woffset % 8 == 0 && out_elempack == 8)
  344. {
  345. if (elemsize == 16u)
  346. crop_pack8_bf16_fp16s_neon(bottom_blob, top_blob, 0, _woffset / elempack);
  347. else
  348. crop_pack8_neon(bottom_blob, top_blob, 0, _woffset / elempack);
  349. return 0;
  350. }
  351. }
  352. if (dims == 2)
  353. {
  354. int out_elempack = _outh % 8 == 0 ? 8 : _outh % 4 == 0 ? 4 : 1;
  355. size_t out_elemsize = elemsize / elempack * out_elempack;
  356. if (_outw == w && _outh / out_elempack == h)
  357. {
  358. top_blob = bottom_blob;
  359. return 0;
  360. }
  361. top_blob.create(_outw, _outh / out_elempack, out_elemsize, out_elempack, opt.blob_allocator);
  362. if (top_blob.empty())
  363. return -100;
  364. if (_hoffset % 8 == 0 && out_elempack == 8)
  365. {
  366. if (elemsize == 16u)
  367. crop_pack8_bf16_fp16s_neon(bottom_blob, top_blob, _hoffset / elempack, _woffset);
  368. else
  369. crop_pack8_neon(bottom_blob, top_blob, _hoffset / elempack, _woffset);
  370. return 0;
  371. }
  372. }
  373. if (dims == 3)
  374. {
  375. int out_elempack = _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
  376. size_t out_elemsize = elemsize / elempack * out_elempack;
  377. if (_coffset % 8 == 0 && out_elempack == 8)
  378. {
  379. const Mat bottom_blob_sliced = bottom_blob.channel_range(_coffset / out_elempack, _outc / out_elempack);
  380. if (_outw == w && _outh == h)
  381. {
  382. top_blob = bottom_blob_sliced.clone();
  383. if (top_blob.empty())
  384. return -100;
  385. }
  386. if (_outw == w && _outh == h && _outc / out_elempack == channels)
  387. {
  388. top_blob = bottom_blob;
  389. return 0;
  390. }
  391. top_blob.create(_outw, _outh, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_allocator);
  392. if (top_blob.empty())
  393. return -100;
  394. #pragma omp parallel for num_threads(opt.num_threads)
  395. for (int q = 0; q < top_blob.c; q++)
  396. {
  397. const Mat m = bottom_blob_sliced.channel(q);
  398. Mat borderm = top_blob.channel(q);
  399. if (elemsize == 16u)
  400. crop_pack8_bf16_fp16s_neon(m, borderm, _hoffset, _woffset);
  401. else
  402. crop_pack8_neon(m, borderm, _hoffset, _woffset);
  403. }
  404. return 0;
  405. }
  406. }
  407. }
  408. if (elempack == 4)
  409. {
  410. int _woffset, _hoffset, _coffset;
  411. int _outw, _outh, _outc;
  412. if (woffset == -233)
  413. {
  414. resolve_crop_roi(bottom_blob.shape(), (const int*)reference_blob, _woffset, _hoffset, _coffset, _outw, _outh, _outc);
  415. }
  416. else
  417. {
  418. resolve_crop_roi(bottom_blob.shape(), reference_blob.shape(), _woffset, _hoffset, _coffset, _outw, _outh, _outc);
  419. }
  420. if (dims == 1)
  421. {
  422. #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  423. int out_elempack = opt.use_fp16_arithmetic && _outw % 8 == 0 ? 8 : _outw % 4 == 0 ? 4 : 1;
  424. #else
  425. int out_elempack = _outw % 4 == 0 ? 4 : 1;
  426. #endif
  427. size_t out_elemsize = elemsize / elempack * out_elempack;
  428. if (_outw / out_elempack == w)
  429. {
  430. top_blob = bottom_blob;
  431. return 0;
  432. }
  433. top_blob.create(_outw / out_elempack, out_elemsize, out_elempack, opt.blob_allocator);
  434. if (top_blob.empty())
  435. return -100;
  436. if (_woffset % 4 == 0 && out_elempack == 4)
  437. {
  438. if (elemsize == 8u)
  439. crop_pack4_bf16_fp16s_neon(bottom_blob, top_blob, 0, _woffset / elempack);
  440. else
  441. crop_pack4_neon(bottom_blob, top_blob, 0, _woffset / elempack);
  442. return 0;
  443. }
  444. }
  445. if (dims == 2)
  446. {
  447. #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  448. int out_elempack = opt.use_fp16_arithmetic && _outh % 8 == 0 ? 8 : _outh % 4 == 0 ? 4 : 1;
  449. #else
  450. int out_elempack = _outh % 4 == 0 ? 4 : 1;
  451. #endif
  452. size_t out_elemsize = elemsize / elempack * out_elempack;
  453. if (_outw == w && _outh / out_elempack == h)
  454. {
  455. top_blob = bottom_blob;
  456. return 0;
  457. }
  458. top_blob.create(_outw, _outh / out_elempack, out_elemsize, out_elempack, opt.blob_allocator);
  459. if (top_blob.empty())
  460. return -100;
  461. if (_hoffset % 4 == 0 && out_elempack == 4)
  462. {
  463. if (elemsize == 8u)
  464. crop_pack4_bf16_fp16s_neon(bottom_blob, top_blob, _hoffset / elempack, _woffset);
  465. else
  466. crop_pack4_neon(bottom_blob, top_blob, _hoffset / elempack, _woffset);
  467. return 0;
  468. }
  469. }
  470. if (dims == 3)
  471. {
  472. #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  473. int out_elempack = opt.use_fp16_arithmetic && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
  474. #else
  475. int out_elempack = _outc % 4 == 0 ? 4 : 1;
  476. #endif
  477. size_t out_elemsize = elemsize / elempack * out_elempack;
  478. if (_coffset % 4 == 0 && out_elempack == 4)
  479. {
  480. const Mat bottom_blob_sliced = bottom_blob.channel_range(_coffset / out_elempack, _outc / out_elempack);
  481. if (_outw == w && _outh == h)
  482. {
  483. top_blob = bottom_blob_sliced.clone();
  484. if (top_blob.empty())
  485. return -100;
  486. }
  487. if (_outw == w && _outh == h && _outc / out_elempack == channels)
  488. {
  489. top_blob = bottom_blob;
  490. return 0;
  491. }
  492. top_blob.create(_outw, _outh, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_allocator);
  493. if (top_blob.empty())
  494. return -100;
  495. #pragma omp parallel for num_threads(opt.num_threads)
  496. for (int q = 0; q < top_blob.c; q++)
  497. {
  498. const Mat m = bottom_blob_sliced.channel(q);
  499. Mat borderm = top_blob.channel(q);
  500. if (elemsize == 8u)
  501. crop_pack4_bf16_fp16s_neon(m, borderm, _hoffset, _woffset);
  502. else
  503. crop_pack4_neon(m, borderm, _hoffset, _woffset);
  504. }
  505. return 0;
  506. }
  507. }
  508. }
  509. #endif // __ARM_NEON
  510. Mat bottom_blob_unpacked = bottom_blob;
  511. if (elempack != 1)
  512. {
  513. Option opt_pack1 = opt;
  514. opt_pack1.blob_allocator = opt.workspace_allocator;
  515. convert_packing(bottom_blob, bottom_blob_unpacked, 1, opt_pack1);
  516. }
  517. Mat reference_blob_unpacked = reference_blob;
  518. if (ref_elempack != 1)
  519. {
  520. Option opt_pack1 = opt;
  521. opt_pack1.blob_allocator = opt.workspace_allocator;
  522. convert_packing(reference_blob, reference_blob_unpacked, 1, opt_pack1);
  523. }
  524. std::vector<Mat> bottom_blobs_unpacked(2);
  525. bottom_blobs_unpacked[0] = bottom_blob_unpacked;
  526. bottom_blobs_unpacked[1] = reference_blob_unpacked;
  527. return Crop::forward(bottom_blobs_unpacked, top_blobs, opt);
  528. }
  529. } // namespace ncnn