You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

mat.cpp 40 kB

adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include "mat.h"
  15. #if __ARM_NEON
  16. #include <arm_neon.h>
  17. #endif // __ARM_NEON
  18. #include "cpu.h"
  19. #include "layer.h"
  20. #include "layer_type.h"
  21. #include <math.h>
  22. #if NCNN_VULKAN
  23. #if NCNN_PLATFORM_API
  24. #if __ANDROID_API__ >= 26
  25. #include <android/hardware_buffer.h>
  26. #endif // __ANDROID_API__ >= 26
  27. #endif // NCNN_PLATFORM_API
  28. #endif // NCNN_VULKAN
  29. namespace ncnn {
  30. Mat Mat::clone(Allocator* _allocator) const
  31. {
  32. if (empty())
  33. return Mat();
  34. Mat m;
  35. if (dims == 1)
  36. m.create(w, elemsize, elempack, _allocator);
  37. else if (dims == 2)
  38. m.create(w, h, elemsize, elempack, _allocator);
  39. else if (dims == 3)
  40. m.create(w, h, c, elemsize, elempack, _allocator);
  41. else if (dims == 4)
  42. m.create(w, h, d, c, elemsize, elempack, _allocator);
  43. if (total() > 0)
  44. {
  45. if (cstep == m.cstep)
  46. memcpy(m.data, data, total() * elemsize);
  47. else
  48. {
  49. // copy by channel for differnet cstep
  50. size_t size = (size_t)w * h * d * elemsize;
  51. for (int i = 0; i < c; i++)
  52. {
  53. memcpy(m.channel(i), channel(i), size);
  54. }
  55. }
  56. }
  57. return m;
  58. }
  59. void Mat::clone_from(const ncnn::Mat& mat, Allocator* allocator)
  60. {
  61. *this = mat.clone(allocator);
  62. }
  63. Mat Mat::reshape(int _w, Allocator* _allocator) const
  64. {
  65. if (w * h * d * c != _w)
  66. return Mat();
  67. if (dims >= 3 && cstep != (size_t)w * h * d)
  68. {
  69. Mat m;
  70. m.create(_w, elemsize, elempack, _allocator);
  71. // flatten
  72. for (int i = 0; i < c; i++)
  73. {
  74. const void* ptr = (unsigned char*)data + i * cstep * elemsize;
  75. void* mptr = (unsigned char*)m.data + (size_t)i * w * h * d * elemsize;
  76. memcpy(mptr, ptr, (size_t)w * h * d * elemsize);
  77. }
  78. return m;
  79. }
  80. Mat m = *this;
  81. m.dims = 1;
  82. m.w = _w;
  83. m.h = 1;
  84. m.d = 1;
  85. m.c = 1;
  86. m.cstep = _w;
  87. return m;
  88. }
  89. Mat Mat::reshape(int _w, int _h, Allocator* _allocator) const
  90. {
  91. if (w * h * d * c != _w * _h)
  92. return Mat();
  93. if (dims >= 3 && cstep != (size_t)w * h * d)
  94. {
  95. Mat m;
  96. m.create(_w, _h, elemsize, elempack, _allocator);
  97. // flatten
  98. for (int i = 0; i < c; i++)
  99. {
  100. const void* ptr = (unsigned char*)data + i * cstep * elemsize;
  101. void* mptr = (unsigned char*)m.data + (size_t)i * w * h * d * elemsize;
  102. memcpy(mptr, ptr, (size_t)w * h * d * elemsize);
  103. }
  104. return m;
  105. }
  106. Mat m = *this;
  107. m.dims = 2;
  108. m.w = _w;
  109. m.h = _h;
  110. m.d = 1;
  111. m.c = 1;
  112. m.cstep = (size_t)_w * _h;
  113. return m;
  114. }
  115. Mat Mat::reshape(int _w, int _h, int _c, Allocator* _allocator) const
  116. {
  117. if (w * h * d * c != _w * _h * _c)
  118. return Mat();
  119. if (dims < 3)
  120. {
  121. if ((size_t)_w * _h != alignSize((size_t)_w * _h * elemsize, 16) / elemsize)
  122. {
  123. Mat m;
  124. m.create(_w, _h, _c, elemsize, elempack, _allocator);
  125. // align channel
  126. for (int i = 0; i < _c; i++)
  127. {
  128. const void* ptr = (unsigned char*)data + (size_t)i * _w * _h * elemsize;
  129. void* mptr = (unsigned char*)m.data + i * m.cstep * m.elemsize;
  130. memcpy(mptr, ptr, (size_t)_w * _h * elemsize);
  131. }
  132. return m;
  133. }
  134. }
  135. else if (c != _c)
  136. {
  137. // flatten and then align
  138. Mat tmp = reshape(_w * _h * _c, _allocator);
  139. return tmp.reshape(_w, _h, _c, _allocator);
  140. }
  141. Mat m = *this;
  142. m.dims = 3;
  143. m.w = _w;
  144. m.h = _h;
  145. m.d = 1;
  146. m.c = _c;
  147. m.cstep = alignSize((size_t)_w * _h * elemsize, 16) / elemsize;
  148. return m;
  149. }
  150. Mat Mat::reshape(int _w, int _h, int _d, int _c, Allocator* _allocator) const
  151. {
  152. if (w * h * d * c != _w * _h * _d * _c)
  153. return Mat();
  154. if (dims < 3)
  155. {
  156. if ((size_t)_w * _h * _d != alignSize((size_t)_w * _h * _d * elemsize, 16) / elemsize)
  157. {
  158. Mat m;
  159. m.create(_w, _h, _d, _c, elemsize, elempack, _allocator);
  160. // align channel
  161. for (int i = 0; i < _c; i++)
  162. {
  163. const void* ptr = (unsigned char*)data + (size_t)i * _w * _h * _d * elemsize;
  164. void* mptr = (unsigned char*)m.data + i * m.cstep * m.elemsize;
  165. memcpy(mptr, ptr, (size_t)_w * _h * _d * elemsize);
  166. }
  167. return m;
  168. }
  169. }
  170. else if (c != _c)
  171. {
  172. // flatten and then align
  173. Mat tmp = reshape(_w * _h * _d * _c, _allocator);
  174. return tmp.reshape(_w, _h, _d, _c, _allocator);
  175. }
  176. Mat m = *this;
  177. m.dims = 4;
  178. m.w = _w;
  179. m.h = _h;
  180. m.d = _d;
  181. m.c = _c;
  182. m.cstep = alignSize((size_t)_w * _h * _d * elemsize, 16) / elemsize;
  183. return m;
  184. }
  185. void Mat::create(int _w, size_t _elemsize, Allocator* _allocator)
  186. {
  187. if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  188. return;
  189. release();
  190. elemsize = _elemsize;
  191. elempack = 1;
  192. allocator = _allocator;
  193. dims = 1;
  194. w = _w;
  195. h = 1;
  196. d = 1;
  197. c = 1;
  198. cstep = w;
  199. if (total() > 0)
  200. {
  201. size_t totalsize = alignSize(total() * elemsize, 4);
  202. if (allocator)
  203. data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
  204. else
  205. data = fastMalloc(totalsize + (int)sizeof(*refcount));
  206. refcount = (int*)(((unsigned char*)data) + totalsize);
  207. *refcount = 1;
  208. }
  209. }
  210. void Mat::create(int _w, int _h, size_t _elemsize, Allocator* _allocator)
  211. {
  212. if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  213. return;
  214. release();
  215. elemsize = _elemsize;
  216. elempack = 1;
  217. allocator = _allocator;
  218. dims = 2;
  219. w = _w;
  220. h = _h;
  221. d = 1;
  222. c = 1;
  223. cstep = (size_t)w * h;
  224. if (total() > 0)
  225. {
  226. size_t totalsize = alignSize(total() * elemsize, 4);
  227. if (allocator)
  228. data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
  229. else
  230. data = fastMalloc(totalsize + (int)sizeof(*refcount));
  231. refcount = (int*)(((unsigned char*)data) + totalsize);
  232. *refcount = 1;
  233. }
  234. }
  235. void Mat::create(int _w, int _h, int _c, size_t _elemsize, Allocator* _allocator)
  236. {
  237. if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  238. return;
  239. release();
  240. elemsize = _elemsize;
  241. elempack = 1;
  242. allocator = _allocator;
  243. dims = 3;
  244. w = _w;
  245. h = _h;
  246. d = 1;
  247. c = _c;
  248. cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize;
  249. if (total() > 0)
  250. {
  251. size_t totalsize = alignSize(total() * elemsize, 4);
  252. if (allocator)
  253. data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
  254. else
  255. data = fastMalloc(totalsize + (int)sizeof(*refcount));
  256. refcount = (int*)(((unsigned char*)data) + totalsize);
  257. *refcount = 1;
  258. }
  259. }
  260. void Mat::create(int _w, int _h, int _d, int _c, size_t _elemsize, Allocator* _allocator)
  261. {
  262. if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  263. return;
  264. release();
  265. elemsize = _elemsize;
  266. elempack = 1;
  267. allocator = _allocator;
  268. dims = 4;
  269. w = _w;
  270. h = _h;
  271. d = _d;
  272. c = _c;
  273. cstep = alignSize((size_t)w * h * d * elemsize, 16) / elemsize;
  274. if (total() > 0)
  275. {
  276. size_t totalsize = alignSize(total() * elemsize, 4);
  277. if (allocator)
  278. data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
  279. else
  280. data = fastMalloc(totalsize + (int)sizeof(*refcount));
  281. refcount = (int*)(((unsigned char*)data) + totalsize);
  282. *refcount = 1;
  283. }
  284. }
  285. void Mat::create(int _w, size_t _elemsize, int _elempack, Allocator* _allocator)
  286. {
  287. if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  288. return;
  289. release();
  290. elemsize = _elemsize;
  291. elempack = _elempack;
  292. allocator = _allocator;
  293. dims = 1;
  294. w = _w;
  295. h = 1;
  296. d = 1;
  297. c = 1;
  298. cstep = w;
  299. if (total() > 0)
  300. {
  301. size_t totalsize = alignSize(total() * elemsize, 4);
  302. if (allocator)
  303. data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
  304. else
  305. data = fastMalloc(totalsize + (int)sizeof(*refcount));
  306. refcount = (int*)(((unsigned char*)data) + totalsize);
  307. *refcount = 1;
  308. }
  309. }
  310. void Mat::create(int _w, int _h, size_t _elemsize, int _elempack, Allocator* _allocator)
  311. {
  312. if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  313. return;
  314. release();
  315. elemsize = _elemsize;
  316. elempack = _elempack;
  317. allocator = _allocator;
  318. dims = 2;
  319. w = _w;
  320. h = _h;
  321. d = 1;
  322. c = 1;
  323. cstep = (size_t)w * h;
  324. if (total() > 0)
  325. {
  326. size_t totalsize = alignSize(total() * elemsize, 4);
  327. if (allocator)
  328. data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
  329. else
  330. data = fastMalloc(totalsize + (int)sizeof(*refcount));
  331. refcount = (int*)(((unsigned char*)data) + totalsize);
  332. *refcount = 1;
  333. }
  334. }
  335. void Mat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, Allocator* _allocator)
  336. {
  337. if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  338. return;
  339. release();
  340. elemsize = _elemsize;
  341. elempack = _elempack;
  342. allocator = _allocator;
  343. dims = 3;
  344. w = _w;
  345. h = _h;
  346. d = 1;
  347. c = _c;
  348. cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize;
  349. if (total() > 0)
  350. {
  351. size_t totalsize = alignSize(total() * elemsize, 4);
  352. if (allocator)
  353. data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
  354. else
  355. data = fastMalloc(totalsize + (int)sizeof(*refcount));
  356. refcount = (int*)(((unsigned char*)data) + totalsize);
  357. *refcount = 1;
  358. }
  359. }
  360. void Mat::create(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, Allocator* _allocator)
  361. {
  362. if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  363. return;
  364. release();
  365. elemsize = _elemsize;
  366. elempack = _elempack;
  367. allocator = _allocator;
  368. dims = 4;
  369. w = _w;
  370. h = _h;
  371. d = _d;
  372. c = _c;
  373. cstep = alignSize((size_t)w * h * d * elemsize, 16) / elemsize;
  374. if (total() > 0)
  375. {
  376. size_t totalsize = alignSize(total() * elemsize, 4);
  377. if (allocator)
  378. data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
  379. else
  380. data = fastMalloc(totalsize + (int)sizeof(*refcount));
  381. refcount = (int*)(((unsigned char*)data) + totalsize);
  382. *refcount = 1;
  383. }
  384. }
  385. void Mat::create_like(const Mat& m, Allocator* _allocator)
  386. {
  387. int _dims = m.dims;
  388. if (_dims == 1)
  389. create(m.w, m.elemsize, m.elempack, _allocator);
  390. if (_dims == 2)
  391. create(m.w, m.h, m.elemsize, m.elempack, _allocator);
  392. if (_dims == 3)
  393. create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
  394. if (_dims == 4)
  395. create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
  396. }
  397. #if NCNN_VULKAN
  398. void Mat::create_like(const VkMat& m, Allocator* _allocator)
  399. {
  400. int _dims = m.dims;
  401. if (_dims == 1)
  402. create(m.w, m.elemsize, m.elempack, _allocator);
  403. if (_dims == 2)
  404. create(m.w, m.h, m.elemsize, m.elempack, _allocator);
  405. if (_dims == 3)
  406. create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
  407. if (_dims == 4)
  408. create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
  409. }
  410. void Mat::create_like(const VkImageMat& im, Allocator* _allocator)
  411. {
  412. int _dims = im.dims;
  413. if (_dims == 1)
  414. create(im.w, im.elemsize, im.elempack, _allocator);
  415. if (_dims == 2)
  416. create(im.w, im.h, im.elemsize, im.elempack, _allocator);
  417. if (_dims == 3)
  418. create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator);
  419. if (_dims == 4)
  420. create(im.w, im.h, im.d, im.c, im.elemsize, im.elempack, _allocator);
  421. }
  422. #endif // NCNN_VULKAN
  423. #if NCNN_VULKAN
  424. void VkMat::create(int _w, size_t _elemsize, VkAllocator* _allocator)
  425. {
  426. if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  427. return;
  428. release();
  429. elemsize = _elemsize;
  430. elempack = 1;
  431. allocator = _allocator;
  432. dims = 1;
  433. w = _w;
  434. h = 1;
  435. d = 1;
  436. c = 1;
  437. cstep = w;
  438. if (total() > 0)
  439. {
  440. size_t totalsize = alignSize(total() * elemsize, 4);
  441. data = allocator->fastMalloc(totalsize);
  442. refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
  443. *refcount = 1;
  444. }
  445. }
  446. void VkMat::create(int _w, int _h, size_t _elemsize, VkAllocator* _allocator)
  447. {
  448. if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  449. return;
  450. release();
  451. elemsize = _elemsize;
  452. elempack = 1;
  453. allocator = _allocator;
  454. dims = 2;
  455. w = _w;
  456. h = _h;
  457. d = 1;
  458. c = 1;
  459. cstep = w * h;
  460. if (total() > 0)
  461. {
  462. size_t totalsize = alignSize(total() * elemsize, 4);
  463. data = allocator->fastMalloc(totalsize);
  464. refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
  465. *refcount = 1;
  466. }
  467. }
  468. void VkMat::create(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator)
  469. {
  470. if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  471. return;
  472. release();
  473. elemsize = _elemsize;
  474. elempack = 1;
  475. allocator = _allocator;
  476. dims = 3;
  477. w = _w;
  478. h = _h;
  479. d = 1;
  480. c = _c;
  481. cstep = alignSize(w * h * elemsize, 16) / elemsize;
  482. if (total() > 0)
  483. {
  484. size_t totalsize = alignSize(total() * elemsize, 4);
  485. data = allocator->fastMalloc(totalsize);
  486. refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
  487. *refcount = 1;
  488. }
  489. }
  490. void VkMat::create(int _w, int _h, int _d, int _c, size_t _elemsize, VkAllocator* _allocator)
  491. {
  492. if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  493. return;
  494. release();
  495. elemsize = _elemsize;
  496. elempack = 1;
  497. allocator = _allocator;
  498. dims = 4;
  499. w = _w;
  500. h = _h;
  501. d = _d;
  502. c = _c;
  503. cstep = alignSize(w * h * d * elemsize, 16) / elemsize;
  504. if (total() > 0)
  505. {
  506. size_t totalsize = alignSize(total() * elemsize, 4);
  507. data = allocator->fastMalloc(totalsize);
  508. refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
  509. *refcount = 1;
  510. }
  511. }
  512. void VkMat::create(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  513. {
  514. if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  515. return;
  516. release();
  517. elemsize = _elemsize;
  518. elempack = _elempack;
  519. allocator = _allocator;
  520. dims = 1;
  521. w = _w;
  522. h = 1;
  523. d = 1;
  524. c = 1;
  525. cstep = w;
  526. if (total() > 0)
  527. {
  528. size_t totalsize = alignSize(total() * elemsize, 4);
  529. data = allocator->fastMalloc(totalsize);
  530. refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
  531. *refcount = 1;
  532. }
  533. }
  534. void VkMat::create(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  535. {
  536. if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  537. return;
  538. release();
  539. elemsize = _elemsize;
  540. elempack = _elempack;
  541. allocator = _allocator;
  542. dims = 2;
  543. w = _w;
  544. h = _h;
  545. d = 1;
  546. c = 1;
  547. cstep = w * h;
  548. if (total() > 0)
  549. {
  550. size_t totalsize = alignSize(total() * elemsize, 4);
  551. data = allocator->fastMalloc(totalsize);
  552. refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
  553. *refcount = 1;
  554. }
  555. }
  556. void VkMat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  557. {
  558. if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  559. return;
  560. release();
  561. elemsize = _elemsize;
  562. elempack = _elempack;
  563. allocator = _allocator;
  564. dims = 3;
  565. w = _w;
  566. h = _h;
  567. d = 1;
  568. c = _c;
  569. cstep = alignSize(w * h * elemsize, 16) / elemsize;
  570. if (total() > 0)
  571. {
  572. size_t totalsize = alignSize(total() * elemsize, 4);
  573. data = allocator->fastMalloc(totalsize);
  574. refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
  575. *refcount = 1;
  576. }
  577. }
  578. void VkMat::create(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  579. {
  580. if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  581. return;
  582. release();
  583. elemsize = _elemsize;
  584. elempack = _elempack;
  585. allocator = _allocator;
  586. dims = 4;
  587. w = _w;
  588. h = _h;
  589. d = _d;
  590. c = _c;
  591. cstep = alignSize(w * h * d * elemsize, 16) / elemsize;
  592. if (total() > 0)
  593. {
  594. size_t totalsize = alignSize(total() * elemsize, 4);
  595. data = allocator->fastMalloc(totalsize);
  596. refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
  597. *refcount = 1;
  598. }
  599. }
  600. void VkMat::create_like(const Mat& m, VkAllocator* _allocator)
  601. {
  602. int _dims = m.dims;
  603. if (_dims == 1)
  604. create(m.w, m.elemsize, m.elempack, _allocator);
  605. if (_dims == 2)
  606. create(m.w, m.h, m.elemsize, m.elempack, _allocator);
  607. if (_dims == 3)
  608. create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
  609. if (_dims == 4)
  610. create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
  611. }
  612. void VkMat::create_like(const VkMat& m, VkAllocator* _allocator)
  613. {
  614. int _dims = m.dims;
  615. if (_dims == 1)
  616. create(m.w, m.elemsize, m.elempack, _allocator);
  617. if (_dims == 2)
  618. create(m.w, m.h, m.elemsize, m.elempack, _allocator);
  619. if (_dims == 3)
  620. create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
  621. if (_dims == 4)
  622. create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
  623. }
  624. void VkMat::create_like(const VkImageMat& im, VkAllocator* _allocator)
  625. {
  626. int _dims = im.dims;
  627. if (_dims == 1)
  628. create(im.w, im.elemsize, im.elempack, _allocator);
  629. if (_dims == 2)
  630. create(im.w, im.h, im.elemsize, im.elempack, _allocator);
  631. if (_dims == 3)
  632. create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator);
  633. if (_dims == 4)
  634. create(im.w, im.h, im.d, im.c, im.elemsize, im.elempack, _allocator);
  635. }
  636. void VkImageMat::create(int _w, size_t _elemsize, VkAllocator* _allocator)
  637. {
  638. if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  639. return;
  640. release();
  641. elemsize = _elemsize;
  642. elempack = 1;
  643. allocator = _allocator;
  644. dims = 1;
  645. w = _w;
  646. h = 1;
  647. d = 1;
  648. c = 1;
  649. if (total() > 0)
  650. {
  651. data = allocator->fastMalloc(w, h, c, elemsize, elempack);
  652. if (!data)
  653. return;
  654. refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
  655. *refcount = 1;
  656. }
  657. }
  658. void VkImageMat::create(int _w, int _h, size_t _elemsize, VkAllocator* _allocator)
  659. {
  660. if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  661. return;
  662. release();
  663. elemsize = _elemsize;
  664. elempack = 1;
  665. allocator = _allocator;
  666. dims = 2;
  667. w = _w;
  668. h = _h;
  669. d = 1;
  670. c = 1;
  671. if (total() > 0)
  672. {
  673. data = allocator->fastMalloc(w, h, c, elemsize, elempack);
  674. if (!data)
  675. return;
  676. refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
  677. *refcount = 1;
  678. }
  679. }
  680. void VkImageMat::create(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator)
  681. {
  682. if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  683. return;
  684. release();
  685. elemsize = _elemsize;
  686. elempack = 1;
  687. allocator = _allocator;
  688. dims = 3;
  689. w = _w;
  690. h = _h;
  691. d = 1;
  692. c = _c;
  693. if (total() > 0)
  694. {
  695. data = allocator->fastMalloc(w, h, c, elemsize, elempack);
  696. if (!data)
  697. return;
  698. refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
  699. *refcount = 1;
  700. }
  701. }
  702. void VkImageMat::create(int _w, int _h, int _d, int _c, size_t _elemsize, VkAllocator* _allocator)
  703. {
  704. if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  705. return;
  706. release();
  707. elemsize = _elemsize;
  708. elempack = 1;
  709. allocator = _allocator;
  710. dims = 4;
  711. w = _w;
  712. h = _h;
  713. d = _d;
  714. c = _c;
  715. if (total() > 0)
  716. {
  717. // underlying image is 3d
  718. data = allocator->fastMalloc(w, h * d, c, elemsize, elempack);
  719. if (!data)
  720. return;
  721. refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
  722. *refcount = 1;
  723. }
  724. }
  725. void VkImageMat::create(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  726. {
  727. if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  728. return;
  729. release();
  730. elemsize = _elemsize;
  731. elempack = _elempack;
  732. allocator = _allocator;
  733. dims = 1;
  734. w = _w;
  735. h = 1;
  736. d = 1;
  737. c = 1;
  738. if (total() > 0)
  739. {
  740. data = allocator->fastMalloc(w, h, c, elemsize, elempack);
  741. if (!data)
  742. return;
  743. refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
  744. *refcount = 1;
  745. }
  746. }
  747. void VkImageMat::create(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  748. {
  749. if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  750. return;
  751. release();
  752. elemsize = _elemsize;
  753. elempack = _elempack;
  754. allocator = _allocator;
  755. dims = 2;
  756. w = _w;
  757. h = _h;
  758. d = 1;
  759. c = 1;
  760. if (total() > 0)
  761. {
  762. data = allocator->fastMalloc(w, h, c, elemsize, elempack);
  763. if (!data)
  764. return;
  765. refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
  766. *refcount = 1;
  767. }
  768. }
  769. void VkImageMat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  770. {
  771. if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  772. return;
  773. release();
  774. elemsize = _elemsize;
  775. elempack = _elempack;
  776. allocator = _allocator;
  777. dims = 3;
  778. w = _w;
  779. h = _h;
  780. d = 1;
  781. c = _c;
  782. if (total() > 0)
  783. {
  784. data = allocator->fastMalloc(w, h, c, elemsize, elempack);
  785. if (!data)
  786. return;
  787. refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
  788. *refcount = 1;
  789. }
  790. }
  791. void VkImageMat::create(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  792. {
  793. if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  794. return;
  795. release();
  796. elemsize = _elemsize;
  797. elempack = _elempack;
  798. allocator = _allocator;
  799. dims = 4;
  800. w = _w;
  801. h = _h;
  802. d = _d;
  803. c = _c;
  804. if (total() > 0)
  805. {
  806. // underlying image is 3d
  807. data = allocator->fastMalloc(w, h * d, c, elemsize, elempack);
  808. if (!data)
  809. return;
  810. refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
  811. *refcount = 1;
  812. }
  813. }
  814. void VkImageMat::create_like(const Mat& m, VkAllocator* _allocator)
  815. {
  816. int _dims = m.dims;
  817. if (_dims == 1)
  818. create(m.w, m.elemsize, m.elempack, _allocator);
  819. if (_dims == 2)
  820. create(m.w, m.h, m.elemsize, m.elempack, _allocator);
  821. if (_dims == 3)
  822. create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
  823. if (_dims == 4)
  824. create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
  825. }
  826. void VkImageMat::create_like(const VkMat& m, VkAllocator* _allocator)
  827. {
  828. int _dims = m.dims;
  829. if (_dims == 1)
  830. create(m.w, m.elemsize, m.elempack, _allocator);
  831. if (_dims == 2)
  832. create(m.w, m.h, m.elemsize, m.elempack, _allocator);
  833. if (_dims == 3)
  834. create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
  835. if (_dims == 4)
  836. create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
  837. }
  838. void VkImageMat::create_like(const VkImageMat& im, VkAllocator* _allocator)
  839. {
  840. int _dims = im.dims;
  841. if (_dims == 1)
  842. create(im.w, im.elemsize, im.elempack, _allocator);
  843. if (_dims == 2)
  844. create(im.w, im.h, im.elemsize, im.elempack, _allocator);
  845. if (_dims == 3)
  846. create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator);
  847. if (_dims == 4)
  848. create(im.w, im.h, im.d, im.c, im.elemsize, im.elempack, _allocator);
  849. }
  850. #endif // NCNN_VULKAN
  851. void Mat::substract_mean_normalize(const float* mean_vals, const float* norm_vals)
  852. {
  853. Layer* op;
  854. if (mean_vals && !norm_vals)
  855. {
  856. // substract mean only
  857. op = create_layer(LayerType::Bias);
  858. ParamDict pd;
  859. pd.set(0, c);
  860. op->load_param(pd);
  861. Mat weights[1];
  862. weights[0] = Mat(c);
  863. for (int q = 0; q < c; q++)
  864. {
  865. weights[0][q] = -mean_vals[q];
  866. }
  867. op->load_model(ModelBinFromMatArray(weights));
  868. }
  869. else if (!mean_vals && norm_vals)
  870. {
  871. // normalize only
  872. op = create_layer(LayerType::Scale);
  873. ParamDict pd;
  874. pd.set(0, c);
  875. op->load_param(pd);
  876. Mat weights[1];
  877. weights[0] = Mat(c);
  878. for (int q = 0; q < c; q++)
  879. {
  880. weights[0][q] = norm_vals[q];
  881. }
  882. op->load_model(ModelBinFromMatArray(weights));
  883. }
  884. else if (mean_vals && norm_vals)
  885. {
  886. // substract mean and normalize
  887. op = create_layer(LayerType::Scale);
  888. ParamDict pd;
  889. pd.set(0, c);
  890. pd.set(1, 1);
  891. op->load_param(pd);
  892. Mat weights[2];
  893. weights[0] = Mat(c);
  894. weights[1] = Mat(c);
  895. for (int q = 0; q < c; q++)
  896. {
  897. weights[0][q] = norm_vals[q];
  898. weights[1][q] = -mean_vals[q] * norm_vals[q];
  899. }
  900. op->load_model(ModelBinFromMatArray(weights));
  901. }
  902. else // if (!mean_vals && !norm_vals)
  903. {
  904. return;
  905. }
  906. Option opt;
  907. opt.num_threads = 1; // TODO
  908. op->create_pipeline(opt);
  909. op->forward_inplace(*this, opt);
  910. op->destroy_pipeline(opt);
  911. delete op;
  912. }
  913. Mat Mat::from_float16(const unsigned short* data, int size)
  914. {
  915. Mat m(size);
  916. if (m.empty())
  917. return m;
  918. float* ptr = m; //.data;
  919. #if __ARM_NEON && (__ARM_FP & 2)
  920. int nn = cpu_support_arm_vfpv4() ? size >> 2 : 0;
  921. int remain = size - (nn << 2);
  922. #else
  923. int remain = size;
  924. #endif // __ARM_NEON
  925. #if __ARM_NEON && (__ARM_FP & 2)
  926. #if __aarch64__
  927. if (nn > 0)
  928. {
  929. asm volatile(
  930. "0: \n"
  931. "ld1 {v0.4h}, [%1], #8 \n"
  932. "fcvtl v1.4s, v0.4h \n"
  933. "subs %w0, %w0, #1 \n"
  934. "st1 {v1.4s}, [%2], #16 \n"
  935. "bne 0b \n"
  936. : "=r"(nn), // %0
  937. "=r"(data), // %1
  938. "=r"(ptr) // %2
  939. : "0"(nn),
  940. "1"(data),
  941. "2"(ptr)
  942. : "cc", "memory", "v0", "v1");
  943. }
  944. #else
  945. if (nn > 0)
  946. {
  947. asm volatile(
  948. "0: \n"
  949. "pld [%1, #64] \n"
  950. "vld1.s16 {d0}, [%1 :64]! \n"
  951. "vcvt.f32.f16 q1, d0 \n"
  952. "subs %0, #1 \n"
  953. "vst1.f32 {d2-d3}, [%2 :128]! \n"
  954. "bne 0b \n"
  955. : "=r"(nn), // %0
  956. "=r"(data), // %1
  957. "=r"(ptr) // %2
  958. : "0"(nn),
  959. "1"(data),
  960. "2"(ptr)
  961. : "cc", "memory", "q0", "q1");
  962. }
  963. #endif // __aarch64__
  964. #endif // __ARM_NEON
  965. for (; remain > 0; remain--)
  966. {
  967. *ptr = float16_to_float32(*data);
  968. data++;
  969. ptr++;
  970. }
  971. return m;
  972. }
  973. #if NCNN_VULKAN
  974. #if NCNN_PLATFORM_API
  975. #if __ANDROID_API__ >= 26
  976. VkImageMat VkImageMat::from_android_hardware_buffer(VkAndroidHardwareBufferImageAllocator* allocator)
  977. {
  978. int width = allocator->width();
  979. int height = allocator->height();
  980. return VkImageMat(width, height, allocator);
  981. }
  982. #endif // __ANDROID_API__ >= 26
  983. #endif // NCNN_PLATFORM_API
  984. #endif // NCNN_VULKAN
  985. unsigned short float32_to_float16(float value)
  986. {
  987. // 1 : 8 : 23
  988. union
  989. {
  990. unsigned int u;
  991. float f;
  992. } tmp;
  993. tmp.f = value;
  994. // 1 : 8 : 23
  995. unsigned short sign = (tmp.u & 0x80000000) >> 31;
  996. unsigned short exponent = (tmp.u & 0x7F800000) >> 23;
  997. unsigned int significand = tmp.u & 0x7FFFFF;
  998. // NCNN_LOGE("%d %d %d", sign, exponent, significand);
  999. // 1 : 5 : 10
  1000. unsigned short fp16;
  1001. if (exponent == 0)
  1002. {
  1003. // zero or denormal, always underflow
  1004. fp16 = (sign << 15) | (0x00 << 10) | 0x00;
  1005. }
  1006. else if (exponent == 0xFF)
  1007. {
  1008. // infinity or NaN
  1009. fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00);
  1010. }
  1011. else
  1012. {
  1013. // normalized
  1014. short newexp = exponent + (-127 + 15);
  1015. if (newexp >= 31)
  1016. {
  1017. // overflow, return infinity
  1018. fp16 = (sign << 15) | (0x1F << 10) | 0x00;
  1019. }
  1020. else if (newexp <= 0)
  1021. {
  1022. // Some normal fp32 cannot be expressed as normal fp16
  1023. fp16 = (sign << 15) | (0x00 << 10) | 0x00;
  1024. }
  1025. else
  1026. {
  1027. // normal fp16
  1028. fp16 = (sign << 15) | (newexp << 10) | (significand >> 13);
  1029. }
  1030. }
  1031. return fp16;
  1032. }
  1033. float float16_to_float32(unsigned short value)
  1034. {
  1035. // 1 : 5 : 10
  1036. unsigned short sign = (value & 0x8000) >> 15;
  1037. unsigned short exponent = (value & 0x7c00) >> 10;
  1038. unsigned short significand = value & 0x03FF;
  1039. // NCNN_LOGE("%d %d %d", sign, exponent, significand);
  1040. // 1 : 8 : 23
  1041. union
  1042. {
  1043. unsigned int u;
  1044. float f;
  1045. } tmp;
  1046. if (exponent == 0)
  1047. {
  1048. if (significand == 0)
  1049. {
  1050. // zero
  1051. tmp.u = (sign << 31);
  1052. }
  1053. else
  1054. {
  1055. // denormal
  1056. exponent = 0;
  1057. // find non-zero bit
  1058. while ((significand & 0x200) == 0)
  1059. {
  1060. significand <<= 1;
  1061. exponent++;
  1062. }
  1063. significand <<= 1;
  1064. significand &= 0x3FF;
  1065. tmp.u = (sign << 31) | ((-exponent + (-15 + 127)) << 23) | (significand << 13);
  1066. }
  1067. }
  1068. else if (exponent == 0x1F)
  1069. {
  1070. // infinity or NaN
  1071. tmp.u = (sign << 31) | (0xFF << 23) | (significand << 13);
  1072. }
  1073. else
  1074. {
  1075. // normalized
  1076. tmp.u = (sign << 31) | ((exponent + (-15 + 127)) << 23) | (significand << 13);
  1077. }
  1078. return tmp.f;
  1079. }
  1080. void copy_make_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int type, float v, const Option& opt)
  1081. {
  1082. Layer* padding = create_layer(LayerType::Padding);
  1083. ParamDict pd;
  1084. pd.set(0, top);
  1085. pd.set(1, bottom);
  1086. pd.set(2, left);
  1087. pd.set(3, right);
  1088. pd.set(4, type);
  1089. pd.set(5, v);
  1090. padding->load_param(pd);
  1091. padding->create_pipeline(opt);
  1092. padding->forward(src, dst, opt);
  1093. padding->destroy_pipeline(opt);
  1094. delete padding;
  1095. }
  1096. void copy_make_border_3d(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int front, int behind, int type, float v, const Option& opt)
  1097. {
  1098. Layer* padding = create_layer(LayerType::Padding);
  1099. ParamDict pd;
  1100. pd.set(0, top);
  1101. pd.set(1, bottom);
  1102. pd.set(2, left);
  1103. pd.set(3, right);
  1104. pd.set(4, type);
  1105. pd.set(5, v);
  1106. pd.set(7, front);
  1107. pd.set(8, behind);
  1108. padding->load_param(pd);
  1109. padding->create_pipeline(opt);
  1110. padding->forward(src, dst, opt);
  1111. padding->destroy_pipeline(opt);
  1112. delete padding;
  1113. }
  1114. void copy_cut_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, const Option& opt)
  1115. {
  1116. if (left + right > src.w || top + bottom > src.h)
  1117. {
  1118. NCNN_LOGE("copy_cut_border parameter error, top: %d, bottom: %d, left: %d, right: %d, src.w: %d, src.h: %d", top, bottom, left, right, src.w, src.h);
  1119. return;
  1120. }
  1121. Layer* crop = create_layer(LayerType::Crop);
  1122. ParamDict pd;
  1123. pd.set(0, left);
  1124. pd.set(1, top);
  1125. pd.set(2, 0);
  1126. pd.set(3, src.w - left - right);
  1127. pd.set(4, src.h - top - bottom);
  1128. pd.set(5, -233);
  1129. crop->load_param(pd);
  1130. crop->create_pipeline(opt);
  1131. crop->forward(src, dst, opt);
  1132. crop->destroy_pipeline(opt);
  1133. delete crop;
  1134. }
  1135. void resize_nearest(const Mat& src, Mat& dst, int w, int h, const Option& opt)
  1136. {
  1137. Layer* interp = create_layer(LayerType::Interp);
  1138. ParamDict pd;
  1139. pd.set(0, 1);
  1140. pd.set(3, h);
  1141. pd.set(4, w);
  1142. interp->load_param(pd);
  1143. interp->create_pipeline(opt);
  1144. interp->forward(src, dst, opt);
  1145. interp->destroy_pipeline(opt);
  1146. delete interp;
  1147. }
  1148. void resize_bilinear(const Mat& src, Mat& dst, int w, int h, const Option& opt)
  1149. {
  1150. Layer* interp = create_layer(LayerType::Interp);
  1151. ParamDict pd;
  1152. pd.set(0, 2);
  1153. pd.set(3, h);
  1154. pd.set(4, w);
  1155. interp->load_param(pd);
  1156. interp->create_pipeline(opt);
  1157. interp->forward(src, dst, opt);
  1158. interp->destroy_pipeline(opt);
  1159. delete interp;
  1160. }
  1161. void resize_bicubic(const Mat& src, Mat& dst, int w, int h, const Option& opt)
  1162. {
  1163. Layer* interp = create_layer(LayerType::Interp);
  1164. ParamDict pd;
  1165. pd.set(0, 3);
  1166. pd.set(3, h);
  1167. pd.set(4, w);
  1168. interp->load_param(pd);
  1169. interp->create_pipeline(opt);
  1170. interp->forward(src, dst, opt);
  1171. interp->destroy_pipeline(opt);
  1172. delete interp;
  1173. }
  1174. void convert_packing(const Mat& src, Mat& dst, int _elempack, const Option& opt)
  1175. {
  1176. Layer* packing = create_layer(LayerType::Packing);
  1177. ParamDict pd;
  1178. pd.set(0, _elempack);
  1179. packing->load_param(pd);
  1180. packing->create_pipeline(opt);
  1181. packing->forward(src, dst, opt);
  1182. packing->destroy_pipeline(opt);
  1183. delete packing;
  1184. }
  1185. void flatten(const Mat& src, Mat& dst, const Option& opt)
  1186. {
  1187. Layer* flatten = create_layer(LayerType::Flatten);
  1188. ParamDict pd;
  1189. flatten->load_param(pd);
  1190. flatten->create_pipeline(opt);
  1191. flatten->forward(src, dst, opt);
  1192. flatten->destroy_pipeline(opt);
  1193. delete flatten;
  1194. }
  1195. void cast_float32_to_float16(const Mat& src, Mat& dst, const Option& opt)
  1196. {
  1197. Layer* cast = create_layer(LayerType::Cast);
  1198. ParamDict pd;
  1199. pd.set(0, 1);
  1200. pd.set(1, 2);
  1201. cast->load_param(pd);
  1202. cast->create_pipeline(opt);
  1203. cast->forward(src, dst, opt);
  1204. cast->destroy_pipeline(opt);
  1205. delete cast;
  1206. }
  1207. void cast_float16_to_float32(const Mat& src, Mat& dst, const Option& opt)
  1208. {
  1209. Layer* cast = create_layer(LayerType::Cast);
  1210. ParamDict pd;
  1211. pd.set(0, 2);
  1212. pd.set(1, 1);
  1213. cast->load_param(pd);
  1214. cast->create_pipeline(opt);
  1215. cast->forward(src, dst, opt);
  1216. cast->destroy_pipeline(opt);
  1217. delete cast;
  1218. }
  1219. void cast_int8_to_float32(const Mat& src, Mat& dst, const Option& opt)
  1220. {
  1221. Layer* cast = create_layer(LayerType::Cast);
  1222. ParamDict pd;
  1223. pd.set(0, 3);
  1224. pd.set(1, 1);
  1225. cast->load_param(pd);
  1226. cast->create_pipeline(opt);
  1227. cast->forward(src, dst, opt);
  1228. cast->destroy_pipeline(opt);
  1229. delete cast;
  1230. }
  1231. void cast_float32_to_bfloat16(const Mat& src, Mat& dst, const Option& opt)
  1232. {
  1233. Layer* cast = create_layer(LayerType::Cast);
  1234. ParamDict pd;
  1235. pd.set(0, 1);
  1236. pd.set(1, 4);
  1237. cast->load_param(pd);
  1238. cast->create_pipeline(opt);
  1239. cast->forward(src, dst, opt);
  1240. cast->destroy_pipeline(opt);
  1241. delete cast;
  1242. }
  1243. void cast_bfloat16_to_float32(const Mat& src, Mat& dst, const Option& opt)
  1244. {
  1245. Layer* cast = create_layer(LayerType::Cast);
  1246. ParamDict pd;
  1247. pd.set(0, 4);
  1248. pd.set(1, 1);
  1249. cast->load_param(pd);
  1250. cast->create_pipeline(opt);
  1251. cast->forward(src, dst, opt);
  1252. cast->destroy_pipeline(opt);
  1253. delete cast;
  1254. }
  1255. void quantize_to_int8(const Mat& src, Mat& dst, const Mat& scale_data, const Option& opt)
  1256. {
  1257. Layer* quantize = create_layer(LayerType::Quantize);
  1258. ParamDict pd;
  1259. pd.set(0, scale_data.w);
  1260. quantize->load_param(pd);
  1261. Mat weights[1];
  1262. weights[0] = scale_data;
  1263. quantize->load_model(ModelBinFromMatArray(weights));
  1264. quantize->create_pipeline(opt);
  1265. quantize->forward(src, dst, opt);
  1266. quantize->destroy_pipeline(opt);
  1267. delete quantize;
  1268. }
  1269. void dequantize_from_int32(const Mat& src, Mat& dst, const Mat& scale_data, const Mat& bias_data, const Option& opt)
  1270. {
  1271. Layer* dequantize = create_layer(LayerType::Dequantize);
  1272. ParamDict pd;
  1273. pd.set(0, scale_data.w);
  1274. pd.set(1, bias_data.w);
  1275. dequantize->load_param(pd);
  1276. Mat weights[2];
  1277. weights[0] = scale_data;
  1278. weights[1] = bias_data;
  1279. dequantize->load_model(ModelBinFromMatArray(weights));
  1280. dequantize->create_pipeline(opt);
  1281. dequantize->forward(src, dst, opt);
  1282. dequantize->destroy_pipeline(opt);
  1283. delete dequantize;
  1284. }
  1285. void requantize_from_int32_to_int8(const Mat& src, Mat& dst, const Mat& scale_in_data, const Mat& scale_out_data, const Mat& bias_data, int activation_type, const Mat& activation_params, const Option& opt)
  1286. {
  1287. Layer* requantize = create_layer(LayerType::Requantize);
  1288. ParamDict pd;
  1289. pd.set(0, scale_in_data.w);
  1290. pd.set(1, scale_out_data.w);
  1291. pd.set(2, bias_data.w);
  1292. pd.set(3, activation_type);
  1293. pd.set(4, activation_params);
  1294. requantize->load_param(pd);
  1295. Mat weights[3];
  1296. weights[0] = scale_in_data;
  1297. weights[1] = scale_out_data;
  1298. weights[2] = bias_data;
  1299. requantize->load_model(ModelBinFromMatArray(weights));
  1300. requantize->create_pipeline(opt);
  1301. requantize->forward(src, dst, opt);
  1302. requantize->destroy_pipeline(opt);
  1303. delete requantize;
  1304. }
  1305. } // namespace ncnn