You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

mat.cpp 40 kB

7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include "mat.h"
  15. #include "layer.h"
  16. #include "layer_type.h"
  17. #if NCNN_VULKAN
  18. #if NCNN_PLATFORM_API
  19. #if __ANDROID_API__ >= 26
  20. #include <android/hardware_buffer.h>
  21. #endif // __ANDROID_API__ >= 26
  22. #endif // NCNN_PLATFORM_API
  23. #endif // NCNN_VULKAN
  24. namespace ncnn {
  25. Mat Mat::clone(Allocator* _allocator) const
  26. {
  27. if (empty())
  28. return Mat();
  29. Mat m;
  30. if (dims == 1)
  31. m.create(w, elemsize, elempack, _allocator);
  32. else if (dims == 2)
  33. m.create(w, h, elemsize, elempack, _allocator);
  34. else if (dims == 3)
  35. m.create(w, h, c, elemsize, elempack, _allocator);
  36. else if (dims == 4)
  37. m.create(w, h, d, c, elemsize, elempack, _allocator);
  38. if (m.empty())
  39. return m;
  40. if (total() > 0)
  41. {
  42. if (cstep == m.cstep)
  43. memcpy(m.data, data, total() * elemsize);
  44. else
  45. {
  46. // copy by channel for differnet cstep
  47. size_t size = (size_t)w * h * d * elemsize;
  48. for (int i = 0; i < c; i++)
  49. {
  50. memcpy(m.channel(i), channel(i), size);
  51. }
  52. }
  53. }
  54. return m;
  55. }
  56. void Mat::clone_from(const ncnn::Mat& mat, Allocator* allocator)
  57. {
  58. *this = mat.clone(allocator);
  59. }
  60. Mat Mat::reshape(int _w, Allocator* _allocator) const
  61. {
  62. if (w * h * d * c != _w)
  63. return Mat();
  64. if (dims >= 3 && cstep != (size_t)w * h * d)
  65. {
  66. Mat m;
  67. m.create(_w, elemsize, elempack, _allocator);
  68. if (m.empty())
  69. return m;
  70. // flatten
  71. for (int i = 0; i < c; i++)
  72. {
  73. const void* ptr = (unsigned char*)data + i * cstep * elemsize;
  74. void* mptr = (unsigned char*)m.data + (size_t)i * w * h * d * elemsize;
  75. memcpy(mptr, ptr, (size_t)w * h * d * elemsize);
  76. }
  77. return m;
  78. }
  79. Mat m = *this;
  80. m.dims = 1;
  81. m.w = _w;
  82. m.h = 1;
  83. m.d = 1;
  84. m.c = 1;
  85. m.cstep = _w;
  86. return m;
  87. }
  88. Mat Mat::reshape(int _w, int _h, Allocator* _allocator) const
  89. {
  90. if (w * h * d * c != _w * _h)
  91. return Mat();
  92. if (dims >= 3 && cstep != (size_t)w * h * d)
  93. {
  94. Mat m;
  95. m.create(_w, _h, elemsize, elempack, _allocator);
  96. if (m.empty())
  97. return m;
  98. // flatten
  99. for (int i = 0; i < c; i++)
  100. {
  101. const void* ptr = (unsigned char*)data + i * cstep * elemsize;
  102. void* mptr = (unsigned char*)m.data + (size_t)i * w * h * d * elemsize;
  103. memcpy(mptr, ptr, (size_t)w * h * d * elemsize);
  104. }
  105. return m;
  106. }
  107. Mat m = *this;
  108. m.dims = 2;
  109. m.w = _w;
  110. m.h = _h;
  111. m.d = 1;
  112. m.c = 1;
  113. m.cstep = (size_t)_w * _h;
  114. return m;
  115. }
  116. Mat Mat::reshape(int _w, int _h, int _c, Allocator* _allocator) const
  117. {
  118. if (w * h * d * c != _w * _h * _c)
  119. return Mat();
  120. if (dims < 3)
  121. {
  122. if ((size_t)_w * _h != alignSize((size_t)_w * _h * elemsize, 16) / elemsize)
  123. {
  124. Mat m;
  125. m.create(_w, _h, _c, elemsize, elempack, _allocator);
  126. if (m.empty())
  127. return m;
  128. // align channel
  129. for (int i = 0; i < _c; i++)
  130. {
  131. const void* ptr = (unsigned char*)data + (size_t)i * _w * _h * elemsize;
  132. void* mptr = (unsigned char*)m.data + i * m.cstep * m.elemsize;
  133. memcpy(mptr, ptr, (size_t)_w * _h * elemsize);
  134. }
  135. return m;
  136. }
  137. }
  138. else if (c != _c)
  139. {
  140. // flatten and then align
  141. Mat tmp = reshape(_w * _h * _c, _allocator);
  142. return tmp.reshape(_w, _h, _c, _allocator);
  143. }
  144. Mat m = *this;
  145. m.dims = 3;
  146. m.w = _w;
  147. m.h = _h;
  148. m.d = 1;
  149. m.c = _c;
  150. m.cstep = alignSize((size_t)_w * _h * elemsize, 16) / elemsize;
  151. return m;
  152. }
  153. Mat Mat::reshape(int _w, int _h, int _d, int _c, Allocator* _allocator) const
  154. {
  155. if (w * h * d * c != _w * _h * _d * _c)
  156. return Mat();
  157. if (dims < 3)
  158. {
  159. if ((size_t)_w * _h * _d != alignSize((size_t)_w * _h * _d * elemsize, 16) / elemsize)
  160. {
  161. Mat m;
  162. m.create(_w, _h, _d, _c, elemsize, elempack, _allocator);
  163. if (m.empty())
  164. return m;
  165. // align channel
  166. for (int i = 0; i < _c; i++)
  167. {
  168. const void* ptr = (unsigned char*)data + (size_t)i * _w * _h * _d * elemsize;
  169. void* mptr = (unsigned char*)m.data + i * m.cstep * m.elemsize;
  170. memcpy(mptr, ptr, (size_t)_w * _h * _d * elemsize);
  171. }
  172. return m;
  173. }
  174. }
  175. else if (c != _c)
  176. {
  177. // flatten and then align
  178. Mat tmp = reshape(_w * _h * _d * _c, _allocator);
  179. return tmp.reshape(_w, _h, _d, _c, _allocator);
  180. }
  181. Mat m = *this;
  182. m.dims = 4;
  183. m.w = _w;
  184. m.h = _h;
  185. m.d = _d;
  186. m.c = _c;
  187. m.cstep = alignSize((size_t)_w * _h * _d * elemsize, 16) / elemsize;
  188. return m;
  189. }
  190. void Mat::create(int _w, size_t _elemsize, Allocator* _allocator)
  191. {
  192. if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  193. return;
  194. release();
  195. elemsize = _elemsize;
  196. elempack = 1;
  197. allocator = _allocator;
  198. dims = 1;
  199. w = _w;
  200. h = 1;
  201. d = 1;
  202. c = 1;
  203. cstep = w;
  204. size_t totalsize = alignSize(total() * elemsize, 4);
  205. if (totalsize > 0)
  206. {
  207. if (allocator)
  208. data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
  209. else
  210. data = fastMalloc(totalsize + (int)sizeof(*refcount));
  211. }
  212. if (data)
  213. {
  214. refcount = (int*)(((unsigned char*)data) + totalsize);
  215. *refcount = 1;
  216. }
  217. }
  218. void Mat::create(int _w, int _h, size_t _elemsize, Allocator* _allocator)
  219. {
  220. if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  221. return;
  222. release();
  223. elemsize = _elemsize;
  224. elempack = 1;
  225. allocator = _allocator;
  226. dims = 2;
  227. w = _w;
  228. h = _h;
  229. d = 1;
  230. c = 1;
  231. cstep = (size_t)w * h;
  232. size_t totalsize = alignSize(total() * elemsize, 4);
  233. if (totalsize > 0)
  234. {
  235. if (allocator)
  236. data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
  237. else
  238. data = fastMalloc(totalsize + (int)sizeof(*refcount));
  239. }
  240. if (data)
  241. {
  242. refcount = (int*)(((unsigned char*)data) + totalsize);
  243. *refcount = 1;
  244. }
  245. }
  246. void Mat::create(int _w, int _h, int _c, size_t _elemsize, Allocator* _allocator)
  247. {
  248. if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  249. return;
  250. release();
  251. elemsize = _elemsize;
  252. elempack = 1;
  253. allocator = _allocator;
  254. dims = 3;
  255. w = _w;
  256. h = _h;
  257. d = 1;
  258. c = _c;
  259. cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize;
  260. size_t totalsize = alignSize(total() * elemsize, 4);
  261. if (totalsize > 0)
  262. {
  263. if (allocator)
  264. data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
  265. else
  266. data = fastMalloc(totalsize + (int)sizeof(*refcount));
  267. }
  268. if (data)
  269. {
  270. refcount = (int*)(((unsigned char*)data) + totalsize);
  271. *refcount = 1;
  272. }
  273. }
  274. void Mat::create(int _w, int _h, int _d, int _c, size_t _elemsize, Allocator* _allocator)
  275. {
  276. if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  277. return;
  278. release();
  279. elemsize = _elemsize;
  280. elempack = 1;
  281. allocator = _allocator;
  282. dims = 4;
  283. w = _w;
  284. h = _h;
  285. d = _d;
  286. c = _c;
  287. cstep = alignSize((size_t)w * h * d * elemsize, 16) / elemsize;
  288. size_t totalsize = alignSize(total() * elemsize, 4);
  289. if (totalsize > 0)
  290. {
  291. if (allocator)
  292. data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
  293. else
  294. data = fastMalloc(totalsize + (int)sizeof(*refcount));
  295. }
  296. if (data)
  297. {
  298. refcount = (int*)(((unsigned char*)data) + totalsize);
  299. *refcount = 1;
  300. }
  301. }
  302. void Mat::create(int _w, size_t _elemsize, int _elempack, Allocator* _allocator)
  303. {
  304. if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  305. return;
  306. release();
  307. elemsize = _elemsize;
  308. elempack = _elempack;
  309. allocator = _allocator;
  310. dims = 1;
  311. w = _w;
  312. h = 1;
  313. d = 1;
  314. c = 1;
  315. cstep = w;
  316. size_t totalsize = alignSize(total() * elemsize, 4);
  317. if (totalsize > 0)
  318. {
  319. if (allocator)
  320. data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
  321. else
  322. data = fastMalloc(totalsize + (int)sizeof(*refcount));
  323. }
  324. if (data)
  325. {
  326. refcount = (int*)(((unsigned char*)data) + totalsize);
  327. *refcount = 1;
  328. }
  329. }
  330. void Mat::create(int _w, int _h, size_t _elemsize, int _elempack, Allocator* _allocator)
  331. {
  332. if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  333. return;
  334. release();
  335. elemsize = _elemsize;
  336. elempack = _elempack;
  337. allocator = _allocator;
  338. dims = 2;
  339. w = _w;
  340. h = _h;
  341. d = 1;
  342. c = 1;
  343. cstep = (size_t)w * h;
  344. size_t totalsize = alignSize(total() * elemsize, 4);
  345. if (totalsize > 0)
  346. {
  347. if (allocator)
  348. data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
  349. else
  350. data = fastMalloc(totalsize + (int)sizeof(*refcount));
  351. }
  352. if (data)
  353. {
  354. refcount = (int*)(((unsigned char*)data) + totalsize);
  355. *refcount = 1;
  356. }
  357. }
  358. void Mat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, Allocator* _allocator)
  359. {
  360. if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  361. return;
  362. release();
  363. elemsize = _elemsize;
  364. elempack = _elempack;
  365. allocator = _allocator;
  366. dims = 3;
  367. w = _w;
  368. h = _h;
  369. d = 1;
  370. c = _c;
  371. cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize;
  372. size_t totalsize = alignSize(total() * elemsize, 4);
  373. if (totalsize > 0)
  374. {
  375. if (allocator)
  376. data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
  377. else
  378. data = fastMalloc(totalsize + (int)sizeof(*refcount));
  379. }
  380. if (data)
  381. {
  382. refcount = (int*)(((unsigned char*)data) + totalsize);
  383. *refcount = 1;
  384. }
  385. }
  386. void Mat::create(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, Allocator* _allocator)
  387. {
  388. if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  389. return;
  390. release();
  391. elemsize = _elemsize;
  392. elempack = _elempack;
  393. allocator = _allocator;
  394. dims = 4;
  395. w = _w;
  396. h = _h;
  397. d = _d;
  398. c = _c;
  399. cstep = alignSize((size_t)w * h * d * elemsize, 16) / elemsize;
  400. size_t totalsize = alignSize(total() * elemsize, 4);
  401. if (totalsize > 0)
  402. {
  403. if (allocator)
  404. data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
  405. else
  406. data = fastMalloc(totalsize + (int)sizeof(*refcount));
  407. }
  408. if (data)
  409. {
  410. refcount = (int*)(((unsigned char*)data) + totalsize);
  411. *refcount = 1;
  412. }
  413. }
  414. void Mat::create_like(const Mat& m, Allocator* _allocator)
  415. {
  416. int _dims = m.dims;
  417. if (_dims == 1)
  418. create(m.w, m.elemsize, m.elempack, _allocator);
  419. if (_dims == 2)
  420. create(m.w, m.h, m.elemsize, m.elempack, _allocator);
  421. if (_dims == 3)
  422. create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
  423. if (_dims == 4)
  424. create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
  425. }
  426. #if NCNN_VULKAN
  427. void Mat::create_like(const VkMat& m, Allocator* _allocator)
  428. {
  429. int _dims = m.dims;
  430. if (_dims == 1)
  431. create(m.w, m.elemsize, m.elempack, _allocator);
  432. if (_dims == 2)
  433. create(m.w, m.h, m.elemsize, m.elempack, _allocator);
  434. if (_dims == 3)
  435. create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
  436. if (_dims == 4)
  437. create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
  438. }
  439. void Mat::create_like(const VkImageMat& im, Allocator* _allocator)
  440. {
  441. int _dims = im.dims;
  442. if (_dims == 1)
  443. create(im.w, im.elemsize, im.elempack, _allocator);
  444. if (_dims == 2)
  445. create(im.w, im.h, im.elemsize, im.elempack, _allocator);
  446. if (_dims == 3)
  447. create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator);
  448. if (_dims == 4)
  449. create(im.w, im.h, im.d, im.c, im.elemsize, im.elempack, _allocator);
  450. }
  451. #endif // NCNN_VULKAN
  452. #if NCNN_VULKAN
  453. void VkMat::create(int _w, size_t _elemsize, VkAllocator* _allocator)
  454. {
  455. if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  456. return;
  457. release();
  458. elemsize = _elemsize;
  459. elempack = 1;
  460. allocator = _allocator;
  461. dims = 1;
  462. w = _w;
  463. h = 1;
  464. d = 1;
  465. c = 1;
  466. cstep = w;
  467. if (total() > 0)
  468. {
  469. size_t totalsize = alignSize(total() * elemsize, 4);
  470. data = allocator->fastMalloc(totalsize);
  471. }
  472. if (data)
  473. {
  474. refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
  475. *refcount = 1;
  476. }
  477. }
  478. void VkMat::create(int _w, int _h, size_t _elemsize, VkAllocator* _allocator)
  479. {
  480. if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  481. return;
  482. release();
  483. elemsize = _elemsize;
  484. elempack = 1;
  485. allocator = _allocator;
  486. dims = 2;
  487. w = _w;
  488. h = _h;
  489. d = 1;
  490. c = 1;
  491. cstep = w * h;
  492. if (total() > 0)
  493. {
  494. size_t totalsize = alignSize(total() * elemsize, 4);
  495. data = allocator->fastMalloc(totalsize);
  496. }
  497. if (data)
  498. {
  499. refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
  500. *refcount = 1;
  501. }
  502. }
  503. void VkMat::create(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator)
  504. {
  505. if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  506. return;
  507. release();
  508. elemsize = _elemsize;
  509. elempack = 1;
  510. allocator = _allocator;
  511. dims = 3;
  512. w = _w;
  513. h = _h;
  514. d = 1;
  515. c = _c;
  516. cstep = alignSize(w * h * elemsize, 16) / elemsize;
  517. if (total() > 0)
  518. {
  519. size_t totalsize = alignSize(total() * elemsize, 4);
  520. data = allocator->fastMalloc(totalsize);
  521. }
  522. if (data)
  523. {
  524. refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
  525. *refcount = 1;
  526. }
  527. }
  528. void VkMat::create(int _w, int _h, int _d, int _c, size_t _elemsize, VkAllocator* _allocator)
  529. {
  530. if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  531. return;
  532. release();
  533. elemsize = _elemsize;
  534. elempack = 1;
  535. allocator = _allocator;
  536. dims = 4;
  537. w = _w;
  538. h = _h;
  539. d = _d;
  540. c = _c;
  541. cstep = alignSize(w * h * d * elemsize, 16) / elemsize;
  542. if (total() > 0)
  543. {
  544. size_t totalsize = alignSize(total() * elemsize, 4);
  545. data = allocator->fastMalloc(totalsize);
  546. }
  547. if (data)
  548. {
  549. refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
  550. *refcount = 1;
  551. }
  552. }
  553. void VkMat::create(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  554. {
  555. if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  556. return;
  557. release();
  558. elemsize = _elemsize;
  559. elempack = _elempack;
  560. allocator = _allocator;
  561. dims = 1;
  562. w = _w;
  563. h = 1;
  564. d = 1;
  565. c = 1;
  566. cstep = w;
  567. if (total() > 0)
  568. {
  569. size_t totalsize = alignSize(total() * elemsize, 4);
  570. data = allocator->fastMalloc(totalsize);
  571. }
  572. if (data)
  573. {
  574. refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
  575. *refcount = 1;
  576. }
  577. }
  578. void VkMat::create(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  579. {
  580. if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  581. return;
  582. release();
  583. elemsize = _elemsize;
  584. elempack = _elempack;
  585. allocator = _allocator;
  586. dims = 2;
  587. w = _w;
  588. h = _h;
  589. d = 1;
  590. c = 1;
  591. cstep = w * h;
  592. if (total() > 0)
  593. {
  594. size_t totalsize = alignSize(total() * elemsize, 4);
  595. data = allocator->fastMalloc(totalsize);
  596. }
  597. if (data)
  598. {
  599. refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
  600. *refcount = 1;
  601. }
  602. }
  603. void VkMat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  604. {
  605. if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  606. return;
  607. release();
  608. elemsize = _elemsize;
  609. elempack = _elempack;
  610. allocator = _allocator;
  611. dims = 3;
  612. w = _w;
  613. h = _h;
  614. d = 1;
  615. c = _c;
  616. cstep = alignSize(w * h * elemsize, 16) / elemsize;
  617. if (total() > 0)
  618. {
  619. size_t totalsize = alignSize(total() * elemsize, 4);
  620. data = allocator->fastMalloc(totalsize);
  621. }
  622. if (data)
  623. {
  624. refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
  625. *refcount = 1;
  626. }
  627. }
  628. void VkMat::create(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  629. {
  630. if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  631. return;
  632. release();
  633. elemsize = _elemsize;
  634. elempack = _elempack;
  635. allocator = _allocator;
  636. dims = 4;
  637. w = _w;
  638. h = _h;
  639. d = _d;
  640. c = _c;
  641. cstep = alignSize(w * h * d * elemsize, 16) / elemsize;
  642. if (total() > 0)
  643. {
  644. size_t totalsize = alignSize(total() * elemsize, 4);
  645. data = allocator->fastMalloc(totalsize);
  646. }
  647. if (data)
  648. {
  649. refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
  650. *refcount = 1;
  651. }
  652. }
  653. void VkMat::create_like(const Mat& m, VkAllocator* _allocator)
  654. {
  655. int _dims = m.dims;
  656. if (_dims == 1)
  657. create(m.w, m.elemsize, m.elempack, _allocator);
  658. if (_dims == 2)
  659. create(m.w, m.h, m.elemsize, m.elempack, _allocator);
  660. if (_dims == 3)
  661. create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
  662. if (_dims == 4)
  663. create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
  664. }
  665. void VkMat::create_like(const VkMat& m, VkAllocator* _allocator)
  666. {
  667. int _dims = m.dims;
  668. if (_dims == 1)
  669. create(m.w, m.elemsize, m.elempack, _allocator);
  670. if (_dims == 2)
  671. create(m.w, m.h, m.elemsize, m.elempack, _allocator);
  672. if (_dims == 3)
  673. create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
  674. if (_dims == 4)
  675. create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
  676. }
  677. void VkMat::create_like(const VkImageMat& im, VkAllocator* _allocator)
  678. {
  679. int _dims = im.dims;
  680. if (_dims == 1)
  681. create(im.w, im.elemsize, im.elempack, _allocator);
  682. if (_dims == 2)
  683. create(im.w, im.h, im.elemsize, im.elempack, _allocator);
  684. if (_dims == 3)
  685. create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator);
  686. if (_dims == 4)
  687. create(im.w, im.h, im.d, im.c, im.elemsize, im.elempack, _allocator);
  688. }
  689. void VkImageMat::create(int _w, size_t _elemsize, VkAllocator* _allocator)
  690. {
  691. if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  692. return;
  693. release();
  694. elemsize = _elemsize;
  695. elempack = 1;
  696. allocator = _allocator;
  697. dims = 1;
  698. w = _w;
  699. h = 1;
  700. d = 1;
  701. c = 1;
  702. if (total() > 0)
  703. {
  704. data = allocator->fastMalloc(w, h, c, elemsize, elempack);
  705. }
  706. if (data)
  707. {
  708. refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
  709. *refcount = 1;
  710. }
  711. }
  712. void VkImageMat::create(int _w, int _h, size_t _elemsize, VkAllocator* _allocator)
  713. {
  714. if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  715. return;
  716. release();
  717. elemsize = _elemsize;
  718. elempack = 1;
  719. allocator = _allocator;
  720. dims = 2;
  721. w = _w;
  722. h = _h;
  723. d = 1;
  724. c = 1;
  725. if (total() > 0)
  726. {
  727. data = allocator->fastMalloc(w, h, c, elemsize, elempack);
  728. }
  729. if (data)
  730. {
  731. refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
  732. *refcount = 1;
  733. }
  734. }
  735. void VkImageMat::create(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator)
  736. {
  737. if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  738. return;
  739. release();
  740. elemsize = _elemsize;
  741. elempack = 1;
  742. allocator = _allocator;
  743. dims = 3;
  744. w = _w;
  745. h = _h;
  746. d = 1;
  747. c = _c;
  748. if (total() > 0)
  749. {
  750. data = allocator->fastMalloc(w, h, c, elemsize, elempack);
  751. }
  752. if (data)
  753. {
  754. refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
  755. *refcount = 1;
  756. }
  757. }
  758. void VkImageMat::create(int _w, int _h, int _d, int _c, size_t _elemsize, VkAllocator* _allocator)
  759. {
  760. if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  761. return;
  762. release();
  763. elemsize = _elemsize;
  764. elempack = 1;
  765. allocator = _allocator;
  766. dims = 4;
  767. w = _w;
  768. h = _h;
  769. d = _d;
  770. c = _c;
  771. if (total() > 0)
  772. {
  773. // underlying image is 3d
  774. data = allocator->fastMalloc(w, h * d, c, elemsize, elempack);
  775. }
  776. if (data)
  777. {
  778. refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
  779. *refcount = 1;
  780. }
  781. }
  782. void VkImageMat::create(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  783. {
  784. if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  785. return;
  786. release();
  787. elemsize = _elemsize;
  788. elempack = _elempack;
  789. allocator = _allocator;
  790. dims = 1;
  791. w = _w;
  792. h = 1;
  793. d = 1;
  794. c = 1;
  795. if (total() > 0)
  796. {
  797. data = allocator->fastMalloc(w, h, c, elemsize, elempack);
  798. }
  799. if (data)
  800. {
  801. refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
  802. *refcount = 1;
  803. }
  804. }
  805. void VkImageMat::create(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  806. {
  807. if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  808. return;
  809. release();
  810. elemsize = _elemsize;
  811. elempack = _elempack;
  812. allocator = _allocator;
  813. dims = 2;
  814. w = _w;
  815. h = _h;
  816. d = 1;
  817. c = 1;
  818. if (total() > 0)
  819. {
  820. data = allocator->fastMalloc(w, h, c, elemsize, elempack);
  821. }
  822. if (data)
  823. {
  824. refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
  825. *refcount = 1;
  826. }
  827. }
  828. void VkImageMat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  829. {
  830. if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  831. return;
  832. release();
  833. elemsize = _elemsize;
  834. elempack = _elempack;
  835. allocator = _allocator;
  836. dims = 3;
  837. w = _w;
  838. h = _h;
  839. d = 1;
  840. c = _c;
  841. if (total() > 0)
  842. {
  843. data = allocator->fastMalloc(w, h, c, elemsize, elempack);
  844. }
  845. if (data)
  846. {
  847. refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
  848. *refcount = 1;
  849. }
  850. }
  851. void VkImageMat::create(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  852. {
  853. if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  854. return;
  855. release();
  856. elemsize = _elemsize;
  857. elempack = _elempack;
  858. allocator = _allocator;
  859. dims = 4;
  860. w = _w;
  861. h = _h;
  862. d = _d;
  863. c = _c;
  864. if (total() > 0)
  865. {
  866. // underlying image is 3d
  867. data = allocator->fastMalloc(w, h * d, c, elemsize, elempack);
  868. }
  869. if (data)
  870. {
  871. refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
  872. *refcount = 1;
  873. }
  874. }
  875. void VkImageMat::create_like(const Mat& m, VkAllocator* _allocator)
  876. {
  877. int _dims = m.dims;
  878. if (_dims == 1)
  879. create(m.w, m.elemsize, m.elempack, _allocator);
  880. if (_dims == 2)
  881. create(m.w, m.h, m.elemsize, m.elempack, _allocator);
  882. if (_dims == 3)
  883. create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
  884. if (_dims == 4)
  885. create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
  886. }
  887. void VkImageMat::create_like(const VkMat& m, VkAllocator* _allocator)
  888. {
  889. int _dims = m.dims;
  890. if (_dims == 1)
  891. create(m.w, m.elemsize, m.elempack, _allocator);
  892. if (_dims == 2)
  893. create(m.w, m.h, m.elemsize, m.elempack, _allocator);
  894. if (_dims == 3)
  895. create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
  896. if (_dims == 4)
  897. create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
  898. }
  899. void VkImageMat::create_like(const VkImageMat& im, VkAllocator* _allocator)
  900. {
  901. int _dims = im.dims;
  902. if (_dims == 1)
  903. create(im.w, im.elemsize, im.elempack, _allocator);
  904. if (_dims == 2)
  905. create(im.w, im.h, im.elemsize, im.elempack, _allocator);
  906. if (_dims == 3)
  907. create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator);
  908. if (_dims == 4)
  909. create(im.w, im.h, im.d, im.c, im.elemsize, im.elempack, _allocator);
  910. }
  911. #endif // NCNN_VULKAN
  912. void Mat::substract_mean_normalize(const float* mean_vals, const float* norm_vals)
  913. {
  914. Layer* op;
  915. if (mean_vals && !norm_vals)
  916. {
  917. // substract mean only
  918. op = create_layer(LayerType::Bias);
  919. ParamDict pd;
  920. pd.set(0, c);
  921. op->load_param(pd);
  922. Mat weights[1];
  923. weights[0] = Mat(c);
  924. for (int q = 0; q < c; q++)
  925. {
  926. weights[0][q] = -mean_vals[q];
  927. }
  928. op->load_model(ModelBinFromMatArray(weights));
  929. }
  930. else if (!mean_vals && norm_vals)
  931. {
  932. // normalize only
  933. op = create_layer(LayerType::Scale);
  934. ParamDict pd;
  935. pd.set(0, c);
  936. op->load_param(pd);
  937. Mat weights[1];
  938. weights[0] = Mat(c);
  939. for (int q = 0; q < c; q++)
  940. {
  941. weights[0][q] = norm_vals[q];
  942. }
  943. op->load_model(ModelBinFromMatArray(weights));
  944. }
  945. else if (mean_vals && norm_vals)
  946. {
  947. // substract mean and normalize
  948. op = create_layer(LayerType::Scale);
  949. ParamDict pd;
  950. pd.set(0, c);
  951. pd.set(1, 1);
  952. op->load_param(pd);
  953. Mat weights[2];
  954. weights[0] = Mat(c);
  955. weights[1] = Mat(c);
  956. for (int q = 0; q < c; q++)
  957. {
  958. weights[0][q] = norm_vals[q];
  959. weights[1][q] = -mean_vals[q] * norm_vals[q];
  960. }
  961. op->load_model(ModelBinFromMatArray(weights));
  962. }
  963. else // if (!mean_vals && !norm_vals)
  964. {
  965. return;
  966. }
  967. Option opt;
  968. opt.num_threads = 1; // TODO
  969. op->create_pipeline(opt);
  970. op->forward_inplace(*this, opt);
  971. op->destroy_pipeline(opt);
  972. delete op;
  973. }
  974. Mat Mat::from_float16(const unsigned short* data, int size)
  975. {
  976. Mat src(size, (void*)data, (size_t)2u);
  977. Mat dst;
  978. Option opt;
  979. opt.num_threads = 1; // TODO
  980. cast_float16_to_float32(src, dst, opt);
  981. return dst;
  982. }
  983. #if NCNN_VULKAN
  984. #if NCNN_PLATFORM_API
  985. #if __ANDROID_API__ >= 26
  986. VkImageMat VkImageMat::from_android_hardware_buffer(VkAndroidHardwareBufferImageAllocator* allocator)
  987. {
  988. int width = allocator->width();
  989. int height = allocator->height();
  990. size_t elemsize = 4u; // elemsize for ahb is actually just a placeholder
  991. return VkImageMat(width, height, elemsize, allocator);
  992. }
  993. #endif // __ANDROID_API__ >= 26
  994. #endif // NCNN_PLATFORM_API
  995. #endif // NCNN_VULKAN
  996. unsigned short float32_to_float16(float value)
  997. {
  998. // 1 : 8 : 23
  999. union
  1000. {
  1001. unsigned int u;
  1002. float f;
  1003. } tmp;
  1004. tmp.f = value;
  1005. // 1 : 8 : 23
  1006. unsigned short sign = (tmp.u & 0x80000000) >> 31;
  1007. unsigned short exponent = (tmp.u & 0x7F800000) >> 23;
  1008. unsigned int significand = tmp.u & 0x7FFFFF;
  1009. // NCNN_LOGE("%d %d %d", sign, exponent, significand);
  1010. // 1 : 5 : 10
  1011. unsigned short fp16;
  1012. if (exponent == 0)
  1013. {
  1014. // zero or denormal, always underflow
  1015. fp16 = (sign << 15) | (0x00 << 10) | 0x00;
  1016. }
  1017. else if (exponent == 0xFF)
  1018. {
  1019. // infinity or NaN
  1020. fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00);
  1021. }
  1022. else
  1023. {
  1024. // normalized
  1025. short newexp = exponent + (-127 + 15);
  1026. if (newexp >= 31)
  1027. {
  1028. // overflow, return infinity
  1029. fp16 = (sign << 15) | (0x1F << 10) | 0x00;
  1030. }
  1031. else if (newexp <= 0)
  1032. {
  1033. // Some normal fp32 cannot be expressed as normal fp16
  1034. fp16 = (sign << 15) | (0x00 << 10) | 0x00;
  1035. }
  1036. else
  1037. {
  1038. // normal fp16
  1039. fp16 = (sign << 15) | (newexp << 10) | (significand >> 13);
  1040. }
  1041. }
  1042. return fp16;
  1043. }
  1044. float float16_to_float32(unsigned short value)
  1045. {
  1046. // 1 : 5 : 10
  1047. unsigned short sign = (value & 0x8000) >> 15;
  1048. unsigned short exponent = (value & 0x7c00) >> 10;
  1049. unsigned short significand = value & 0x03FF;
  1050. // NCNN_LOGE("%d %d %d", sign, exponent, significand);
  1051. // 1 : 8 : 23
  1052. union
  1053. {
  1054. unsigned int u;
  1055. float f;
  1056. } tmp;
  1057. if (exponent == 0)
  1058. {
  1059. if (significand == 0)
  1060. {
  1061. // zero
  1062. tmp.u = (sign << 31);
  1063. }
  1064. else
  1065. {
  1066. // denormal
  1067. exponent = 0;
  1068. // find non-zero bit
  1069. while ((significand & 0x200) == 0)
  1070. {
  1071. significand <<= 1;
  1072. exponent++;
  1073. }
  1074. significand <<= 1;
  1075. significand &= 0x3FF;
  1076. tmp.u = (sign << 31) | ((-exponent + (-15 + 127)) << 23) | (significand << 13);
  1077. }
  1078. }
  1079. else if (exponent == 0x1F)
  1080. {
  1081. // infinity or NaN
  1082. tmp.u = (sign << 31) | (0xFF << 23) | (significand << 13);
  1083. }
  1084. else
  1085. {
  1086. // normalized
  1087. tmp.u = (sign << 31) | ((exponent + (-15 + 127)) << 23) | (significand << 13);
  1088. }
  1089. return tmp.f;
  1090. }
  1091. void copy_make_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int type, float v, const Option& opt)
  1092. {
  1093. Layer* padding = create_layer(LayerType::Padding);
  1094. ParamDict pd;
  1095. pd.set(0, top);
  1096. pd.set(1, bottom);
  1097. pd.set(2, left);
  1098. pd.set(3, right);
  1099. pd.set(4, type);
  1100. pd.set(5, v);
  1101. padding->load_param(pd);
  1102. padding->create_pipeline(opt);
  1103. padding->forward(src, dst, opt);
  1104. padding->destroy_pipeline(opt);
  1105. delete padding;
  1106. }
  1107. void copy_make_border_3d(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int front, int behind, int type, float v, const Option& opt)
  1108. {
  1109. Layer* padding = create_layer(LayerType::Padding);
  1110. ParamDict pd;
  1111. pd.set(0, top);
  1112. pd.set(1, bottom);
  1113. pd.set(2, left);
  1114. pd.set(3, right);
  1115. pd.set(4, type);
  1116. pd.set(5, v);
  1117. pd.set(7, front);
  1118. pd.set(8, behind);
  1119. padding->load_param(pd);
  1120. padding->create_pipeline(opt);
  1121. padding->forward(src, dst, opt);
  1122. padding->destroy_pipeline(opt);
  1123. delete padding;
  1124. }
  1125. void copy_cut_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, const Option& opt)
  1126. {
  1127. if (left + right > src.w || top + bottom > src.h)
  1128. {
  1129. NCNN_LOGE("copy_cut_border parameter error, top: %d, bottom: %d, left: %d, right: %d, src.w: %d, src.h: %d", top, bottom, left, right, src.w, src.h);
  1130. return;
  1131. }
  1132. Layer* crop = create_layer(LayerType::Crop);
  1133. ParamDict pd;
  1134. pd.set(0, left);
  1135. pd.set(1, top);
  1136. pd.set(2, 0);
  1137. pd.set(3, src.w - left - right);
  1138. pd.set(4, src.h - top - bottom);
  1139. pd.set(5, -233);
  1140. crop->load_param(pd);
  1141. crop->create_pipeline(opt);
  1142. crop->forward(src, dst, opt);
  1143. crop->destroy_pipeline(opt);
  1144. delete crop;
  1145. }
  1146. void copy_cut_border_3d(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int front, int behind, const Option& opt)
  1147. {
  1148. if (left + right > src.w || top + bottom > src.h || front + behind > src.d)
  1149. {
  1150. NCNN_LOGE("copy_cut_border_3d parameter error, top: %d, bottom: %d, left: %d, right: %d, front: %d, behind: %d, src.w: %d, src.h: %d, src.d: %d", top, bottom, left, right, front, behind, src.w, src.h, src.d);
  1151. return;
  1152. }
  1153. Layer* crop = create_layer(LayerType::Crop);
  1154. ParamDict pd;
  1155. pd.set(0, left);
  1156. pd.set(1, top);
  1157. pd.set(13, front);
  1158. pd.set(2, 0);
  1159. pd.set(3, src.w - left - right);
  1160. pd.set(4, src.h - top - bottom);
  1161. pd.set(14, src.d - front - behind);
  1162. pd.set(5, -233);
  1163. crop->load_param(pd);
  1164. crop->create_pipeline(opt);
  1165. crop->forward(src, dst, opt);
  1166. crop->destroy_pipeline(opt);
  1167. delete crop;
  1168. }
  1169. void resize_nearest(const Mat& src, Mat& dst, int w, int h, const Option& opt)
  1170. {
  1171. Layer* interp = create_layer(LayerType::Interp);
  1172. ParamDict pd;
  1173. pd.set(0, 1);
  1174. pd.set(3, h);
  1175. pd.set(4, w);
  1176. interp->load_param(pd);
  1177. interp->create_pipeline(opt);
  1178. interp->forward(src, dst, opt);
  1179. interp->destroy_pipeline(opt);
  1180. delete interp;
  1181. }
  1182. void resize_bilinear(const Mat& src, Mat& dst, int w, int h, const Option& opt)
  1183. {
  1184. Layer* interp = create_layer(LayerType::Interp);
  1185. ParamDict pd;
  1186. pd.set(0, 2);
  1187. pd.set(3, h);
  1188. pd.set(4, w);
  1189. interp->load_param(pd);
  1190. interp->create_pipeline(opt);
  1191. interp->forward(src, dst, opt);
  1192. interp->destroy_pipeline(opt);
  1193. delete interp;
  1194. }
  1195. void resize_bicubic(const Mat& src, Mat& dst, int w, int h, const Option& opt)
  1196. {
  1197. Layer* interp = create_layer(LayerType::Interp);
  1198. ParamDict pd;
  1199. pd.set(0, 3);
  1200. pd.set(3, h);
  1201. pd.set(4, w);
  1202. interp->load_param(pd);
  1203. interp->create_pipeline(opt);
  1204. interp->forward(src, dst, opt);
  1205. interp->destroy_pipeline(opt);
  1206. delete interp;
  1207. }
  1208. void convert_packing(const Mat& src, Mat& dst, int _elempack, const Option& opt)
  1209. {
  1210. Layer* packing = create_layer(LayerType::Packing);
  1211. ParamDict pd;
  1212. pd.set(0, _elempack);
  1213. packing->load_param(pd);
  1214. packing->create_pipeline(opt);
  1215. packing->forward(src, dst, opt);
  1216. packing->destroy_pipeline(opt);
  1217. delete packing;
  1218. }
  1219. void flatten(const Mat& src, Mat& dst, const Option& opt)
  1220. {
  1221. Layer* flatten = create_layer(LayerType::Flatten);
  1222. ParamDict pd;
  1223. flatten->load_param(pd);
  1224. flatten->create_pipeline(opt);
  1225. flatten->forward(src, dst, opt);
  1226. flatten->destroy_pipeline(opt);
  1227. delete flatten;
  1228. }
  1229. void cast_float32_to_float16(const Mat& src, Mat& dst, const Option& opt)
  1230. {
  1231. Layer* cast = create_layer(LayerType::Cast);
  1232. ParamDict pd;
  1233. pd.set(0, 1);
  1234. pd.set(1, 2);
  1235. cast->load_param(pd);
  1236. cast->create_pipeline(opt);
  1237. cast->forward(src, dst, opt);
  1238. cast->destroy_pipeline(opt);
  1239. delete cast;
  1240. }
  1241. void cast_float16_to_float32(const Mat& src, Mat& dst, const Option& opt)
  1242. {
  1243. Layer* cast = create_layer(LayerType::Cast);
  1244. ParamDict pd;
  1245. pd.set(0, 2);
  1246. pd.set(1, 1);
  1247. cast->load_param(pd);
  1248. cast->create_pipeline(opt);
  1249. cast->forward(src, dst, opt);
  1250. cast->destroy_pipeline(opt);
  1251. delete cast;
  1252. }
  1253. void cast_int8_to_float32(const Mat& src, Mat& dst, const Option& opt)
  1254. {
  1255. Layer* cast = create_layer(LayerType::Cast);
  1256. ParamDict pd;
  1257. pd.set(0, 3);
  1258. pd.set(1, 1);
  1259. cast->load_param(pd);
  1260. cast->create_pipeline(opt);
  1261. cast->forward(src, dst, opt);
  1262. cast->destroy_pipeline(opt);
  1263. delete cast;
  1264. }
  1265. void cast_float32_to_bfloat16(const Mat& src, Mat& dst, const Option& opt)
  1266. {
  1267. Layer* cast = create_layer(LayerType::Cast);
  1268. ParamDict pd;
  1269. pd.set(0, 1);
  1270. pd.set(1, 4);
  1271. cast->load_param(pd);
  1272. cast->create_pipeline(opt);
  1273. cast->forward(src, dst, opt);
  1274. cast->destroy_pipeline(opt);
  1275. delete cast;
  1276. }
  1277. void cast_bfloat16_to_float32(const Mat& src, Mat& dst, const Option& opt)
  1278. {
  1279. Layer* cast = create_layer(LayerType::Cast);
  1280. ParamDict pd;
  1281. pd.set(0, 4);
  1282. pd.set(1, 1);
  1283. cast->load_param(pd);
  1284. cast->create_pipeline(opt);
  1285. cast->forward(src, dst, opt);
  1286. cast->destroy_pipeline(opt);
  1287. delete cast;
  1288. }
  1289. void quantize_to_int8(const Mat& src, Mat& dst, const Mat& scale_data, const Option& opt)
  1290. {
  1291. Layer* quantize = create_layer(LayerType::Quantize);
  1292. ParamDict pd;
  1293. pd.set(0, scale_data.w);
  1294. quantize->load_param(pd);
  1295. Mat weights[1];
  1296. weights[0] = scale_data;
  1297. quantize->load_model(ModelBinFromMatArray(weights));
  1298. quantize->create_pipeline(opt);
  1299. quantize->forward(src, dst, opt);
  1300. quantize->destroy_pipeline(opt);
  1301. delete quantize;
  1302. }
  1303. void dequantize_from_int32(const Mat& src, Mat& dst, const Mat& scale_data, const Mat& bias_data, const Option& opt)
  1304. {
  1305. Layer* dequantize = create_layer(LayerType::Dequantize);
  1306. ParamDict pd;
  1307. pd.set(0, scale_data.w);
  1308. pd.set(1, bias_data.w);
  1309. dequantize->load_param(pd);
  1310. Mat weights[2];
  1311. weights[0] = scale_data;
  1312. weights[1] = bias_data;
  1313. dequantize->load_model(ModelBinFromMatArray(weights));
  1314. dequantize->create_pipeline(opt);
  1315. dequantize->forward(src, dst, opt);
  1316. dequantize->destroy_pipeline(opt);
  1317. delete dequantize;
  1318. }
  1319. void requantize_from_int32_to_int8(const Mat& src, Mat& dst, const Mat& scale_in_data, const Mat& scale_out_data, const Mat& bias_data, int activation_type, const Mat& activation_params, const Option& opt)
  1320. {
  1321. Layer* requantize = create_layer(LayerType::Requantize);
  1322. ParamDict pd;
  1323. pd.set(0, scale_in_data.w);
  1324. pd.set(1, scale_out_data.w);
  1325. pd.set(2, bias_data.w);
  1326. pd.set(3, activation_type);
  1327. pd.set(4, activation_params);
  1328. requantize->load_param(pd);
  1329. Mat weights[3];
  1330. weights[0] = scale_in_data;
  1331. weights[1] = scale_out_data;
  1332. weights[2] = bias_data;
  1333. requantize->load_model(ModelBinFromMatArray(weights));
  1334. requantize->create_pipeline(opt);
  1335. requantize->forward(src, dst, opt);
  1336. requantize->destroy_pipeline(opt);
  1337. delete requantize;
  1338. }
  1339. } // namespace ncnn