You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

mat.cpp 41 kB

adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include "mat.h"
  15. #if __ARM_NEON
  16. #include <arm_neon.h>
  17. #endif // __ARM_NEON
  18. #include "cpu.h"
  19. #include "layer.h"
  20. #include "layer_type.h"
  21. #include <math.h>
  22. #if NCNN_VULKAN
  23. #if NCNN_PLATFORM_API
  24. #if __ANDROID_API__ >= 26
  25. #include <android/hardware_buffer.h>
  26. #endif // __ANDROID_API__ >= 26
  27. #endif // NCNN_PLATFORM_API
  28. #endif // NCNN_VULKAN
  29. namespace ncnn {
  30. Mat Mat::clone(Allocator* _allocator) const
  31. {
  32. if (empty())
  33. return Mat();
  34. Mat m;
  35. if (dims == 1)
  36. m.create(w, elemsize, elempack, _allocator);
  37. else if (dims == 2)
  38. m.create(w, h, elemsize, elempack, _allocator);
  39. else if (dims == 3)
  40. m.create(w, h, c, elemsize, elempack, _allocator);
  41. else if (dims == 4)
  42. m.create(w, h, d, c, elemsize, elempack, _allocator);
  43. if (total() > 0)
  44. {
  45. if (cstep == m.cstep)
  46. memcpy(m.data, data, total() * elemsize);
  47. else
  48. {
  49. // copy by channel for differnet cstep
  50. size_t size = (size_t)w * h * d * elemsize;
  51. for (int i = 0; i < c; i++)
  52. {
  53. memcpy(m.channel(i), channel(i), size);
  54. }
  55. }
  56. }
  57. return m;
  58. }
  59. void Mat::clone_from(const ncnn::Mat& mat, Allocator* allocator)
  60. {
  61. *this = mat.clone(allocator);
  62. }
  63. Mat Mat::reshape(int _w, Allocator* _allocator) const
  64. {
  65. if (w * h * d * c != _w)
  66. return Mat();
  67. if (dims >= 3 && cstep != (size_t)w * h * d)
  68. {
  69. Mat m;
  70. m.create(_w, elemsize, elempack, _allocator);
  71. // flatten
  72. for (int i = 0; i < c; i++)
  73. {
  74. const void* ptr = (unsigned char*)data + i * cstep * elemsize;
  75. void* mptr = (unsigned char*)m.data + (size_t)i * w * h * d * elemsize;
  76. memcpy(mptr, ptr, (size_t)w * h * d * elemsize);
  77. }
  78. return m;
  79. }
  80. Mat m = *this;
  81. m.dims = 1;
  82. m.w = _w;
  83. m.h = 1;
  84. m.d = 1;
  85. m.c = 1;
  86. m.cstep = _w;
  87. return m;
  88. }
  89. Mat Mat::reshape(int _w, int _h, Allocator* _allocator) const
  90. {
  91. if (w * h * d * c != _w * _h)
  92. return Mat();
  93. if (dims >= 3 && cstep != (size_t)w * h * d)
  94. {
  95. Mat m;
  96. m.create(_w, _h, elemsize, elempack, _allocator);
  97. // flatten
  98. for (int i = 0; i < c; i++)
  99. {
  100. const void* ptr = (unsigned char*)data + i * cstep * elemsize;
  101. void* mptr = (unsigned char*)m.data + (size_t)i * w * h * d * elemsize;
  102. memcpy(mptr, ptr, (size_t)w * h * d * elemsize);
  103. }
  104. return m;
  105. }
  106. Mat m = *this;
  107. m.dims = 2;
  108. m.w = _w;
  109. m.h = _h;
  110. m.d = 1;
  111. m.c = 1;
  112. m.cstep = (size_t)_w * _h;
  113. return m;
  114. }
  115. Mat Mat::reshape(int _w, int _h, int _c, Allocator* _allocator) const
  116. {
  117. if (w * h * d * c != _w * _h * _c)
  118. return Mat();
  119. if (dims < 3)
  120. {
  121. if ((size_t)_w * _h != alignSize((size_t)_w * _h * elemsize, 16) / elemsize)
  122. {
  123. Mat m;
  124. m.create(_w, _h, _c, elemsize, elempack, _allocator);
  125. // align channel
  126. for (int i = 0; i < _c; i++)
  127. {
  128. const void* ptr = (unsigned char*)data + (size_t)i * _w * _h * elemsize;
  129. void* mptr = (unsigned char*)m.data + i * m.cstep * m.elemsize;
  130. memcpy(mptr, ptr, (size_t)_w * _h * elemsize);
  131. }
  132. return m;
  133. }
  134. }
  135. else if (c != _c)
  136. {
  137. // flatten and then align
  138. Mat tmp = reshape(_w * _h * _c, _allocator);
  139. return tmp.reshape(_w, _h, _c, _allocator);
  140. }
  141. Mat m = *this;
  142. m.dims = 3;
  143. m.w = _w;
  144. m.h = _h;
  145. m.d = 1;
  146. m.c = _c;
  147. m.cstep = alignSize((size_t)_w * _h * elemsize, 16) / elemsize;
  148. return m;
  149. }
  150. Mat Mat::reshape(int _w, int _h, int _d, int _c, Allocator* _allocator) const
  151. {
  152. if (w * h * d * c != _w * _h * _d * _c)
  153. return Mat();
  154. if (dims < 3)
  155. {
  156. if ((size_t)_w * _h * _d != alignSize((size_t)_w * _h * _d * elemsize, 16) / elemsize)
  157. {
  158. Mat m;
  159. m.create(_w, _h, _d, _c, elemsize, elempack, _allocator);
  160. // align channel
  161. for (int i = 0; i < _c; i++)
  162. {
  163. const void* ptr = (unsigned char*)data + (size_t)i * _w * _h * _d * elemsize;
  164. void* mptr = (unsigned char*)m.data + i * m.cstep * m.elemsize;
  165. memcpy(mptr, ptr, (size_t)_w * _h * _d * elemsize);
  166. }
  167. return m;
  168. }
  169. }
  170. else if (c != _c)
  171. {
  172. // flatten and then align
  173. Mat tmp = reshape(_w * _h * _d * _c, _allocator);
  174. return tmp.reshape(_w, _h, _d, _c, _allocator);
  175. }
  176. Mat m = *this;
  177. m.dims = 4;
  178. m.w = _w;
  179. m.h = _h;
  180. m.d = _d;
  181. m.c = _c;
  182. m.cstep = alignSize((size_t)_w * _h * _d * elemsize, 16) / elemsize;
  183. return m;
  184. }
  185. void Mat::create(int _w, size_t _elemsize, Allocator* _allocator)
  186. {
  187. if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  188. return;
  189. release();
  190. elemsize = _elemsize;
  191. elempack = 1;
  192. allocator = _allocator;
  193. dims = 1;
  194. w = _w;
  195. h = 1;
  196. d = 1;
  197. c = 1;
  198. cstep = w;
  199. size_t totalsize = alignSize(total() * elemsize, 4);
  200. if (totalsize > 0)
  201. {
  202. if (allocator)
  203. data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
  204. else
  205. data = fastMalloc(totalsize + (int)sizeof(*refcount));
  206. }
  207. if (data)
  208. {
  209. refcount = (int*)(((unsigned char*)data) + totalsize);
  210. *refcount = 1;
  211. }
  212. }
  213. void Mat::create(int _w, int _h, size_t _elemsize, Allocator* _allocator)
  214. {
  215. if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  216. return;
  217. release();
  218. elemsize = _elemsize;
  219. elempack = 1;
  220. allocator = _allocator;
  221. dims = 2;
  222. w = _w;
  223. h = _h;
  224. d = 1;
  225. c = 1;
  226. cstep = (size_t)w * h;
  227. size_t totalsize = alignSize(total() * elemsize, 4);
  228. if (totalsize > 0)
  229. {
  230. if (allocator)
  231. data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
  232. else
  233. data = fastMalloc(totalsize + (int)sizeof(*refcount));
  234. }
  235. if (data)
  236. {
  237. refcount = (int*)(((unsigned char*)data) + totalsize);
  238. *refcount = 1;
  239. }
  240. }
  241. void Mat::create(int _w, int _h, int _c, size_t _elemsize, Allocator* _allocator)
  242. {
  243. if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  244. return;
  245. release();
  246. elemsize = _elemsize;
  247. elempack = 1;
  248. allocator = _allocator;
  249. dims = 3;
  250. w = _w;
  251. h = _h;
  252. d = 1;
  253. c = _c;
  254. cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize;
  255. size_t totalsize = alignSize(total() * elemsize, 4);
  256. if (totalsize > 0)
  257. {
  258. if (allocator)
  259. data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
  260. else
  261. data = fastMalloc(totalsize + (int)sizeof(*refcount));
  262. }
  263. if (data)
  264. {
  265. refcount = (int*)(((unsigned char*)data) + totalsize);
  266. *refcount = 1;
  267. }
  268. }
  269. void Mat::create(int _w, int _h, int _d, int _c, size_t _elemsize, Allocator* _allocator)
  270. {
  271. if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  272. return;
  273. release();
  274. elemsize = _elemsize;
  275. elempack = 1;
  276. allocator = _allocator;
  277. dims = 4;
  278. w = _w;
  279. h = _h;
  280. d = _d;
  281. c = _c;
  282. cstep = alignSize((size_t)w * h * d * elemsize, 16) / elemsize;
  283. size_t totalsize = alignSize(total() * elemsize, 4);
  284. if (totalsize > 0)
  285. {
  286. if (allocator)
  287. data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
  288. else
  289. data = fastMalloc(totalsize + (int)sizeof(*refcount));
  290. }
  291. if (data)
  292. {
  293. refcount = (int*)(((unsigned char*)data) + totalsize);
  294. *refcount = 1;
  295. }
  296. }
  297. void Mat::create(int _w, size_t _elemsize, int _elempack, Allocator* _allocator)
  298. {
  299. if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  300. return;
  301. release();
  302. elemsize = _elemsize;
  303. elempack = _elempack;
  304. allocator = _allocator;
  305. dims = 1;
  306. w = _w;
  307. h = 1;
  308. d = 1;
  309. c = 1;
  310. cstep = w;
  311. size_t totalsize = alignSize(total() * elemsize, 4);
  312. if (totalsize > 0)
  313. {
  314. if (allocator)
  315. data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
  316. else
  317. data = fastMalloc(totalsize + (int)sizeof(*refcount));
  318. }
  319. if (data)
  320. {
  321. refcount = (int*)(((unsigned char*)data) + totalsize);
  322. *refcount = 1;
  323. }
  324. }
  325. void Mat::create(int _w, int _h, size_t _elemsize, int _elempack, Allocator* _allocator)
  326. {
  327. if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  328. return;
  329. release();
  330. elemsize = _elemsize;
  331. elempack = _elempack;
  332. allocator = _allocator;
  333. dims = 2;
  334. w = _w;
  335. h = _h;
  336. d = 1;
  337. c = 1;
  338. cstep = (size_t)w * h;
  339. size_t totalsize = alignSize(total() * elemsize, 4);
  340. if (totalsize > 0)
  341. {
  342. if (allocator)
  343. data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
  344. else
  345. data = fastMalloc(totalsize + (int)sizeof(*refcount));
  346. }
  347. if (data)
  348. {
  349. refcount = (int*)(((unsigned char*)data) + totalsize);
  350. *refcount = 1;
  351. }
  352. }
  353. void Mat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, Allocator* _allocator)
  354. {
  355. if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  356. return;
  357. release();
  358. elemsize = _elemsize;
  359. elempack = _elempack;
  360. allocator = _allocator;
  361. dims = 3;
  362. w = _w;
  363. h = _h;
  364. d = 1;
  365. c = _c;
  366. cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize;
  367. size_t totalsize = alignSize(total() * elemsize, 4);
  368. if (totalsize > 0)
  369. {
  370. if (allocator)
  371. data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
  372. else
  373. data = fastMalloc(totalsize + (int)sizeof(*refcount));
  374. }
  375. if (data)
  376. {
  377. refcount = (int*)(((unsigned char*)data) + totalsize);
  378. *refcount = 1;
  379. }
  380. }
  381. void Mat::create(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, Allocator* _allocator)
  382. {
  383. if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  384. return;
  385. release();
  386. elemsize = _elemsize;
  387. elempack = _elempack;
  388. allocator = _allocator;
  389. dims = 4;
  390. w = _w;
  391. h = _h;
  392. d = _d;
  393. c = _c;
  394. cstep = alignSize((size_t)w * h * d * elemsize, 16) / elemsize;
  395. size_t totalsize = alignSize(total() * elemsize, 4);
  396. if (totalsize > 0)
  397. {
  398. if (allocator)
  399. data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
  400. else
  401. data = fastMalloc(totalsize + (int)sizeof(*refcount));
  402. }
  403. if (data)
  404. {
  405. refcount = (int*)(((unsigned char*)data) + totalsize);
  406. *refcount = 1;
  407. }
  408. }
  409. void Mat::create_like(const Mat& m, Allocator* _allocator)
  410. {
  411. int _dims = m.dims;
  412. if (_dims == 1)
  413. create(m.w, m.elemsize, m.elempack, _allocator);
  414. if (_dims == 2)
  415. create(m.w, m.h, m.elemsize, m.elempack, _allocator);
  416. if (_dims == 3)
  417. create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
  418. if (_dims == 4)
  419. create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
  420. }
  421. #if NCNN_VULKAN
  422. void Mat::create_like(const VkMat& m, Allocator* _allocator)
  423. {
  424. int _dims = m.dims;
  425. if (_dims == 1)
  426. create(m.w, m.elemsize, m.elempack, _allocator);
  427. if (_dims == 2)
  428. create(m.w, m.h, m.elemsize, m.elempack, _allocator);
  429. if (_dims == 3)
  430. create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
  431. if (_dims == 4)
  432. create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
  433. }
  434. void Mat::create_like(const VkImageMat& im, Allocator* _allocator)
  435. {
  436. int _dims = im.dims;
  437. if (_dims == 1)
  438. create(im.w, im.elemsize, im.elempack, _allocator);
  439. if (_dims == 2)
  440. create(im.w, im.h, im.elemsize, im.elempack, _allocator);
  441. if (_dims == 3)
  442. create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator);
  443. if (_dims == 4)
  444. create(im.w, im.h, im.d, im.c, im.elemsize, im.elempack, _allocator);
  445. }
  446. #endif // NCNN_VULKAN
  447. #if NCNN_VULKAN
  448. void VkMat::create(int _w, size_t _elemsize, VkAllocator* _allocator)
  449. {
  450. if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  451. return;
  452. release();
  453. elemsize = _elemsize;
  454. elempack = 1;
  455. allocator = _allocator;
  456. dims = 1;
  457. w = _w;
  458. h = 1;
  459. d = 1;
  460. c = 1;
  461. cstep = w;
  462. if (total() > 0)
  463. {
  464. size_t totalsize = alignSize(total() * elemsize, 4);
  465. data = allocator->fastMalloc(totalsize);
  466. }
  467. if (data)
  468. {
  469. refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
  470. *refcount = 1;
  471. }
  472. }
  473. void VkMat::create(int _w, int _h, size_t _elemsize, VkAllocator* _allocator)
  474. {
  475. if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  476. return;
  477. release();
  478. elemsize = _elemsize;
  479. elempack = 1;
  480. allocator = _allocator;
  481. dims = 2;
  482. w = _w;
  483. h = _h;
  484. d = 1;
  485. c = 1;
  486. cstep = w * h;
  487. if (total() > 0)
  488. {
  489. size_t totalsize = alignSize(total() * elemsize, 4);
  490. data = allocator->fastMalloc(totalsize);
  491. }
  492. if (data)
  493. {
  494. refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
  495. *refcount = 1;
  496. }
  497. }
  498. void VkMat::create(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator)
  499. {
  500. if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  501. return;
  502. release();
  503. elemsize = _elemsize;
  504. elempack = 1;
  505. allocator = _allocator;
  506. dims = 3;
  507. w = _w;
  508. h = _h;
  509. d = 1;
  510. c = _c;
  511. cstep = alignSize(w * h * elemsize, 16) / elemsize;
  512. if (total() > 0)
  513. {
  514. size_t totalsize = alignSize(total() * elemsize, 4);
  515. data = allocator->fastMalloc(totalsize);
  516. }
  517. if (data)
  518. {
  519. refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
  520. *refcount = 1;
  521. }
  522. }
  523. void VkMat::create(int _w, int _h, int _d, int _c, size_t _elemsize, VkAllocator* _allocator)
  524. {
  525. if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  526. return;
  527. release();
  528. elemsize = _elemsize;
  529. elempack = 1;
  530. allocator = _allocator;
  531. dims = 4;
  532. w = _w;
  533. h = _h;
  534. d = _d;
  535. c = _c;
  536. cstep = alignSize(w * h * d * elemsize, 16) / elemsize;
  537. if (total() > 0)
  538. {
  539. size_t totalsize = alignSize(total() * elemsize, 4);
  540. data = allocator->fastMalloc(totalsize);
  541. }
  542. if (data)
  543. {
  544. refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
  545. *refcount = 1;
  546. }
  547. }
  548. void VkMat::create(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  549. {
  550. if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  551. return;
  552. release();
  553. elemsize = _elemsize;
  554. elempack = _elempack;
  555. allocator = _allocator;
  556. dims = 1;
  557. w = _w;
  558. h = 1;
  559. d = 1;
  560. c = 1;
  561. cstep = w;
  562. if (total() > 0)
  563. {
  564. size_t totalsize = alignSize(total() * elemsize, 4);
  565. data = allocator->fastMalloc(totalsize);
  566. }
  567. if (data)
  568. {
  569. refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
  570. *refcount = 1;
  571. }
  572. }
  573. void VkMat::create(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  574. {
  575. if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  576. return;
  577. release();
  578. elemsize = _elemsize;
  579. elempack = _elempack;
  580. allocator = _allocator;
  581. dims = 2;
  582. w = _w;
  583. h = _h;
  584. d = 1;
  585. c = 1;
  586. cstep = w * h;
  587. if (total() > 0)
  588. {
  589. size_t totalsize = alignSize(total() * elemsize, 4);
  590. data = allocator->fastMalloc(totalsize);
  591. }
  592. if (data)
  593. {
  594. refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
  595. *refcount = 1;
  596. }
  597. }
  598. void VkMat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  599. {
  600. if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  601. return;
  602. release();
  603. elemsize = _elemsize;
  604. elempack = _elempack;
  605. allocator = _allocator;
  606. dims = 3;
  607. w = _w;
  608. h = _h;
  609. d = 1;
  610. c = _c;
  611. cstep = alignSize(w * h * elemsize, 16) / elemsize;
  612. if (total() > 0)
  613. {
  614. size_t totalsize = alignSize(total() * elemsize, 4);
  615. data = allocator->fastMalloc(totalsize);
  616. }
  617. if (data)
  618. {
  619. refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
  620. *refcount = 1;
  621. }
  622. }
  623. void VkMat::create(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  624. {
  625. if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  626. return;
  627. release();
  628. elemsize = _elemsize;
  629. elempack = _elempack;
  630. allocator = _allocator;
  631. dims = 4;
  632. w = _w;
  633. h = _h;
  634. d = _d;
  635. c = _c;
  636. cstep = alignSize(w * h * d * elemsize, 16) / elemsize;
  637. if (total() > 0)
  638. {
  639. size_t totalsize = alignSize(total() * elemsize, 4);
  640. data = allocator->fastMalloc(totalsize);
  641. }
  642. if (data)
  643. {
  644. refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
  645. *refcount = 1;
  646. }
  647. }
  648. void VkMat::create_like(const Mat& m, VkAllocator* _allocator)
  649. {
  650. int _dims = m.dims;
  651. if (_dims == 1)
  652. create(m.w, m.elemsize, m.elempack, _allocator);
  653. if (_dims == 2)
  654. create(m.w, m.h, m.elemsize, m.elempack, _allocator);
  655. if (_dims == 3)
  656. create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
  657. if (_dims == 4)
  658. create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
  659. }
  660. void VkMat::create_like(const VkMat& m, VkAllocator* _allocator)
  661. {
  662. int _dims = m.dims;
  663. if (_dims == 1)
  664. create(m.w, m.elemsize, m.elempack, _allocator);
  665. if (_dims == 2)
  666. create(m.w, m.h, m.elemsize, m.elempack, _allocator);
  667. if (_dims == 3)
  668. create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
  669. if (_dims == 4)
  670. create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
  671. }
  672. void VkMat::create_like(const VkImageMat& im, VkAllocator* _allocator)
  673. {
  674. int _dims = im.dims;
  675. if (_dims == 1)
  676. create(im.w, im.elemsize, im.elempack, _allocator);
  677. if (_dims == 2)
  678. create(im.w, im.h, im.elemsize, im.elempack, _allocator);
  679. if (_dims == 3)
  680. create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator);
  681. if (_dims == 4)
  682. create(im.w, im.h, im.d, im.c, im.elemsize, im.elempack, _allocator);
  683. }
  684. void VkImageMat::create(int _w, size_t _elemsize, VkAllocator* _allocator)
  685. {
  686. if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  687. return;
  688. release();
  689. elemsize = _elemsize;
  690. elempack = 1;
  691. allocator = _allocator;
  692. dims = 1;
  693. w = _w;
  694. h = 1;
  695. d = 1;
  696. c = 1;
  697. if (total() > 0)
  698. {
  699. data = allocator->fastMalloc(w, h, c, elemsize, elempack);
  700. }
  701. if (data)
  702. {
  703. refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
  704. *refcount = 1;
  705. }
  706. }
  707. void VkImageMat::create(int _w, int _h, size_t _elemsize, VkAllocator* _allocator)
  708. {
  709. if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  710. return;
  711. release();
  712. elemsize = _elemsize;
  713. elempack = 1;
  714. allocator = _allocator;
  715. dims = 2;
  716. w = _w;
  717. h = _h;
  718. d = 1;
  719. c = 1;
  720. if (total() > 0)
  721. {
  722. data = allocator->fastMalloc(w, h, c, elemsize, elempack);
  723. }
  724. if (data)
  725. {
  726. refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
  727. *refcount = 1;
  728. }
  729. }
  730. void VkImageMat::create(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator)
  731. {
  732. if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  733. return;
  734. release();
  735. elemsize = _elemsize;
  736. elempack = 1;
  737. allocator = _allocator;
  738. dims = 3;
  739. w = _w;
  740. h = _h;
  741. d = 1;
  742. c = _c;
  743. if (total() > 0)
  744. {
  745. data = allocator->fastMalloc(w, h, c, elemsize, elempack);
  746. }
  747. if (data)
  748. {
  749. refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
  750. *refcount = 1;
  751. }
  752. }
  753. void VkImageMat::create(int _w, int _h, int _d, int _c, size_t _elemsize, VkAllocator* _allocator)
  754. {
  755. if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
  756. return;
  757. release();
  758. elemsize = _elemsize;
  759. elempack = 1;
  760. allocator = _allocator;
  761. dims = 4;
  762. w = _w;
  763. h = _h;
  764. d = _d;
  765. c = _c;
  766. if (total() > 0)
  767. {
  768. // underlying image is 3d
  769. data = allocator->fastMalloc(w, h * d, c, elemsize, elempack);
  770. }
  771. if (data)
  772. {
  773. refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
  774. *refcount = 1;
  775. }
  776. }
  777. void VkImageMat::create(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  778. {
  779. if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  780. return;
  781. release();
  782. elemsize = _elemsize;
  783. elempack = _elempack;
  784. allocator = _allocator;
  785. dims = 1;
  786. w = _w;
  787. h = 1;
  788. d = 1;
  789. c = 1;
  790. if (total() > 0)
  791. {
  792. data = allocator->fastMalloc(w, h, c, elemsize, elempack);
  793. }
  794. if (data)
  795. {
  796. refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
  797. *refcount = 1;
  798. }
  799. }
  800. void VkImageMat::create(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  801. {
  802. if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  803. return;
  804. release();
  805. elemsize = _elemsize;
  806. elempack = _elempack;
  807. allocator = _allocator;
  808. dims = 2;
  809. w = _w;
  810. h = _h;
  811. d = 1;
  812. c = 1;
  813. if (total() > 0)
  814. {
  815. data = allocator->fastMalloc(w, h, c, elemsize, elempack);
  816. }
  817. if (data)
  818. {
  819. refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
  820. *refcount = 1;
  821. }
  822. }
  823. void VkImageMat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  824. {
  825. if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  826. return;
  827. release();
  828. elemsize = _elemsize;
  829. elempack = _elempack;
  830. allocator = _allocator;
  831. dims = 3;
  832. w = _w;
  833. h = _h;
  834. d = 1;
  835. c = _c;
  836. if (total() > 0)
  837. {
  838. data = allocator->fastMalloc(w, h, c, elemsize, elempack);
  839. }
  840. if (data)
  841. {
  842. refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
  843. *refcount = 1;
  844. }
  845. }
  846. void VkImageMat::create(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  847. {
  848. if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
  849. return;
  850. release();
  851. elemsize = _elemsize;
  852. elempack = _elempack;
  853. allocator = _allocator;
  854. dims = 4;
  855. w = _w;
  856. h = _h;
  857. d = _d;
  858. c = _c;
  859. if (total() > 0)
  860. {
  861. // underlying image is 3d
  862. data = allocator->fastMalloc(w, h * d, c, elemsize, elempack);
  863. }
  864. if (data)
  865. {
  866. refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
  867. *refcount = 1;
  868. }
  869. }
  870. void VkImageMat::create_like(const Mat& m, VkAllocator* _allocator)
  871. {
  872. int _dims = m.dims;
  873. if (_dims == 1)
  874. create(m.w, m.elemsize, m.elempack, _allocator);
  875. if (_dims == 2)
  876. create(m.w, m.h, m.elemsize, m.elempack, _allocator);
  877. if (_dims == 3)
  878. create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
  879. if (_dims == 4)
  880. create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
  881. }
  882. void VkImageMat::create_like(const VkMat& m, VkAllocator* _allocator)
  883. {
  884. int _dims = m.dims;
  885. if (_dims == 1)
  886. create(m.w, m.elemsize, m.elempack, _allocator);
  887. if (_dims == 2)
  888. create(m.w, m.h, m.elemsize, m.elempack, _allocator);
  889. if (_dims == 3)
  890. create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
  891. if (_dims == 4)
  892. create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
  893. }
  894. void VkImageMat::create_like(const VkImageMat& im, VkAllocator* _allocator)
  895. {
  896. int _dims = im.dims;
  897. if (_dims == 1)
  898. create(im.w, im.elemsize, im.elempack, _allocator);
  899. if (_dims == 2)
  900. create(im.w, im.h, im.elemsize, im.elempack, _allocator);
  901. if (_dims == 3)
  902. create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator);
  903. if (_dims == 4)
  904. create(im.w, im.h, im.d, im.c, im.elemsize, im.elempack, _allocator);
  905. }
  906. #endif // NCNN_VULKAN
  907. void Mat::substract_mean_normalize(const float* mean_vals, const float* norm_vals)
  908. {
  909. Layer* op;
  910. if (mean_vals && !norm_vals)
  911. {
  912. // substract mean only
  913. op = create_layer(LayerType::Bias);
  914. ParamDict pd;
  915. pd.set(0, c);
  916. op->load_param(pd);
  917. Mat weights[1];
  918. weights[0] = Mat(c);
  919. for (int q = 0; q < c; q++)
  920. {
  921. weights[0][q] = -mean_vals[q];
  922. }
  923. op->load_model(ModelBinFromMatArray(weights));
  924. }
  925. else if (!mean_vals && norm_vals)
  926. {
  927. // normalize only
  928. op = create_layer(LayerType::Scale);
  929. ParamDict pd;
  930. pd.set(0, c);
  931. op->load_param(pd);
  932. Mat weights[1];
  933. weights[0] = Mat(c);
  934. for (int q = 0; q < c; q++)
  935. {
  936. weights[0][q] = norm_vals[q];
  937. }
  938. op->load_model(ModelBinFromMatArray(weights));
  939. }
  940. else if (mean_vals && norm_vals)
  941. {
  942. // substract mean and normalize
  943. op = create_layer(LayerType::Scale);
  944. ParamDict pd;
  945. pd.set(0, c);
  946. pd.set(1, 1);
  947. op->load_param(pd);
  948. Mat weights[2];
  949. weights[0] = Mat(c);
  950. weights[1] = Mat(c);
  951. for (int q = 0; q < c; q++)
  952. {
  953. weights[0][q] = norm_vals[q];
  954. weights[1][q] = -mean_vals[q] * norm_vals[q];
  955. }
  956. op->load_model(ModelBinFromMatArray(weights));
  957. }
  958. else // if (!mean_vals && !norm_vals)
  959. {
  960. return;
  961. }
  962. Option opt;
  963. opt.num_threads = 1; // TODO
  964. op->create_pipeline(opt);
  965. op->forward_inplace(*this, opt);
  966. op->destroy_pipeline(opt);
  967. delete op;
  968. }
  969. Mat Mat::from_float16(const unsigned short* data, int size)
  970. {
  971. Mat m(size);
  972. if (m.empty())
  973. return m;
  974. float* ptr = m; //.data;
  975. #if __ARM_NEON && (__ARM_FP & 2)
  976. int nn = cpu_support_arm_vfpv4() ? size >> 2 : 0;
  977. int remain = size - (nn << 2);
  978. #else
  979. int remain = size;
  980. #endif // __ARM_NEON
  981. #if __ARM_NEON && (__ARM_FP & 2)
  982. #if __aarch64__
  983. if (nn > 0)
  984. {
  985. asm volatile(
  986. "0: \n"
  987. "ld1 {v0.4h}, [%1], #8 \n"
  988. "fcvtl v1.4s, v0.4h \n"
  989. "subs %w0, %w0, #1 \n"
  990. "st1 {v1.4s}, [%2], #16 \n"
  991. "bne 0b \n"
  992. : "=r"(nn), // %0
  993. "=r"(data), // %1
  994. "=r"(ptr) // %2
  995. : "0"(nn),
  996. "1"(data),
  997. "2"(ptr)
  998. : "cc", "memory", "v0", "v1");
  999. }
  1000. #else
  1001. if (nn > 0)
  1002. {
  1003. asm volatile(
  1004. "0: \n"
  1005. "pld [%1, #64] \n"
  1006. "vld1.s16 {d0}, [%1]! \n"
  1007. "vcvt.f32.f16 q1, d0 \n"
  1008. "subs %0, #1 \n"
  1009. "vst1.f32 {d2-d3}, [%2 :128]! \n"
  1010. "bne 0b \n"
  1011. : "=r"(nn), // %0
  1012. "=r"(data), // %1
  1013. "=r"(ptr) // %2
  1014. : "0"(nn),
  1015. "1"(data),
  1016. "2"(ptr)
  1017. : "cc", "memory", "q0", "q1");
  1018. }
  1019. #endif // __aarch64__
  1020. #endif // __ARM_NEON
  1021. for (; remain > 0; remain--)
  1022. {
  1023. *ptr = float16_to_float32(*data);
  1024. data++;
  1025. ptr++;
  1026. }
  1027. return m;
  1028. }
  1029. #if NCNN_VULKAN
  1030. #if NCNN_PLATFORM_API
  1031. #if __ANDROID_API__ >= 26
  1032. VkImageMat VkImageMat::from_android_hardware_buffer(VkAndroidHardwareBufferImageAllocator* allocator)
  1033. {
  1034. int width = allocator->width();
  1035. int height = allocator->height();
  1036. return VkImageMat(width, height, allocator);
  1037. }
  1038. #endif // __ANDROID_API__ >= 26
  1039. #endif // NCNN_PLATFORM_API
  1040. #endif // NCNN_VULKAN
  1041. unsigned short float32_to_float16(float value)
  1042. {
  1043. // 1 : 8 : 23
  1044. union
  1045. {
  1046. unsigned int u;
  1047. float f;
  1048. } tmp;
  1049. tmp.f = value;
  1050. // 1 : 8 : 23
  1051. unsigned short sign = (tmp.u & 0x80000000) >> 31;
  1052. unsigned short exponent = (tmp.u & 0x7F800000) >> 23;
  1053. unsigned int significand = tmp.u & 0x7FFFFF;
  1054. // NCNN_LOGE("%d %d %d", sign, exponent, significand);
  1055. // 1 : 5 : 10
  1056. unsigned short fp16;
  1057. if (exponent == 0)
  1058. {
  1059. // zero or denormal, always underflow
  1060. fp16 = (sign << 15) | (0x00 << 10) | 0x00;
  1061. }
  1062. else if (exponent == 0xFF)
  1063. {
  1064. // infinity or NaN
  1065. fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00);
  1066. }
  1067. else
  1068. {
  1069. // normalized
  1070. short newexp = exponent + (-127 + 15);
  1071. if (newexp >= 31)
  1072. {
  1073. // overflow, return infinity
  1074. fp16 = (sign << 15) | (0x1F << 10) | 0x00;
  1075. }
  1076. else if (newexp <= 0)
  1077. {
  1078. // Some normal fp32 cannot be expressed as normal fp16
  1079. fp16 = (sign << 15) | (0x00 << 10) | 0x00;
  1080. }
  1081. else
  1082. {
  1083. // normal fp16
  1084. fp16 = (sign << 15) | (newexp << 10) | (significand >> 13);
  1085. }
  1086. }
  1087. return fp16;
  1088. }
  1089. float float16_to_float32(unsigned short value)
  1090. {
  1091. // 1 : 5 : 10
  1092. unsigned short sign = (value & 0x8000) >> 15;
  1093. unsigned short exponent = (value & 0x7c00) >> 10;
  1094. unsigned short significand = value & 0x03FF;
  1095. // NCNN_LOGE("%d %d %d", sign, exponent, significand);
  1096. // 1 : 8 : 23
  1097. union
  1098. {
  1099. unsigned int u;
  1100. float f;
  1101. } tmp;
  1102. if (exponent == 0)
  1103. {
  1104. if (significand == 0)
  1105. {
  1106. // zero
  1107. tmp.u = (sign << 31);
  1108. }
  1109. else
  1110. {
  1111. // denormal
  1112. exponent = 0;
  1113. // find non-zero bit
  1114. while ((significand & 0x200) == 0)
  1115. {
  1116. significand <<= 1;
  1117. exponent++;
  1118. }
  1119. significand <<= 1;
  1120. significand &= 0x3FF;
  1121. tmp.u = (sign << 31) | ((-exponent + (-15 + 127)) << 23) | (significand << 13);
  1122. }
  1123. }
  1124. else if (exponent == 0x1F)
  1125. {
  1126. // infinity or NaN
  1127. tmp.u = (sign << 31) | (0xFF << 23) | (significand << 13);
  1128. }
  1129. else
  1130. {
  1131. // normalized
  1132. tmp.u = (sign << 31) | ((exponent + (-15 + 127)) << 23) | (significand << 13);
  1133. }
  1134. return tmp.f;
  1135. }
  1136. void copy_make_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int type, float v, const Option& opt)
  1137. {
  1138. Layer* padding = create_layer(LayerType::Padding);
  1139. ParamDict pd;
  1140. pd.set(0, top);
  1141. pd.set(1, bottom);
  1142. pd.set(2, left);
  1143. pd.set(3, right);
  1144. pd.set(4, type);
  1145. pd.set(5, v);
  1146. padding->load_param(pd);
  1147. padding->create_pipeline(opt);
  1148. padding->forward(src, dst, opt);
  1149. padding->destroy_pipeline(opt);
  1150. delete padding;
  1151. }
  1152. void copy_make_border_3d(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int front, int behind, int type, float v, const Option& opt)
  1153. {
  1154. Layer* padding = create_layer(LayerType::Padding);
  1155. ParamDict pd;
  1156. pd.set(0, top);
  1157. pd.set(1, bottom);
  1158. pd.set(2, left);
  1159. pd.set(3, right);
  1160. pd.set(4, type);
  1161. pd.set(5, v);
  1162. pd.set(7, front);
  1163. pd.set(8, behind);
  1164. padding->load_param(pd);
  1165. padding->create_pipeline(opt);
  1166. padding->forward(src, dst, opt);
  1167. padding->destroy_pipeline(opt);
  1168. delete padding;
  1169. }
  1170. void copy_cut_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, const Option& opt)
  1171. {
  1172. if (left + right > src.w || top + bottom > src.h)
  1173. {
  1174. NCNN_LOGE("copy_cut_border parameter error, top: %d, bottom: %d, left: %d, right: %d, src.w: %d, src.h: %d", top, bottom, left, right, src.w, src.h);
  1175. return;
  1176. }
  1177. Layer* crop = create_layer(LayerType::Crop);
  1178. ParamDict pd;
  1179. pd.set(0, left);
  1180. pd.set(1, top);
  1181. pd.set(2, 0);
  1182. pd.set(3, src.w - left - right);
  1183. pd.set(4, src.h - top - bottom);
  1184. pd.set(5, -233);
  1185. crop->load_param(pd);
  1186. crop->create_pipeline(opt);
  1187. crop->forward(src, dst, opt);
  1188. crop->destroy_pipeline(opt);
  1189. delete crop;
  1190. }
  1191. void copy_cut_border_3d(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int front, int behind, const Option& opt)
  1192. {
  1193. if (left + right > src.w || top + bottom > src.h || front + behind > src.d)
  1194. {
  1195. NCNN_LOGE("copy_cut_border_3d parameter error, top: %d, bottom: %d, left: %d, right: %d, front: %d, behind: %d, src.w: %d, src.h: %d, src.d: %d", top, bottom, left, right, front, behind, src.w, src.h, src.d);
  1196. return;
  1197. }
  1198. Layer* crop = create_layer(LayerType::Crop);
  1199. ParamDict pd;
  1200. pd.set(0, left);
  1201. pd.set(1, top);
  1202. pd.set(13, front);
  1203. pd.set(2, 0);
  1204. pd.set(3, src.w - left - right);
  1205. pd.set(4, src.h - top - bottom);
  1206. pd.set(14, src.d - front - behind);
  1207. pd.set(5, -233);
  1208. crop->load_param(pd);
  1209. crop->create_pipeline(opt);
  1210. crop->forward(src, dst, opt);
  1211. crop->destroy_pipeline(opt);
  1212. delete crop;
  1213. }
  1214. void resize_nearest(const Mat& src, Mat& dst, int w, int h, const Option& opt)
  1215. {
  1216. Layer* interp = create_layer(LayerType::Interp);
  1217. ParamDict pd;
  1218. pd.set(0, 1);
  1219. pd.set(3, h);
  1220. pd.set(4, w);
  1221. interp->load_param(pd);
  1222. interp->create_pipeline(opt);
  1223. interp->forward(src, dst, opt);
  1224. interp->destroy_pipeline(opt);
  1225. delete interp;
  1226. }
  1227. void resize_bilinear(const Mat& src, Mat& dst, int w, int h, const Option& opt)
  1228. {
  1229. Layer* interp = create_layer(LayerType::Interp);
  1230. ParamDict pd;
  1231. pd.set(0, 2);
  1232. pd.set(3, h);
  1233. pd.set(4, w);
  1234. interp->load_param(pd);
  1235. interp->create_pipeline(opt);
  1236. interp->forward(src, dst, opt);
  1237. interp->destroy_pipeline(opt);
  1238. delete interp;
  1239. }
  1240. void resize_bicubic(const Mat& src, Mat& dst, int w, int h, const Option& opt)
  1241. {
  1242. Layer* interp = create_layer(LayerType::Interp);
  1243. ParamDict pd;
  1244. pd.set(0, 3);
  1245. pd.set(3, h);
  1246. pd.set(4, w);
  1247. interp->load_param(pd);
  1248. interp->create_pipeline(opt);
  1249. interp->forward(src, dst, opt);
  1250. interp->destroy_pipeline(opt);
  1251. delete interp;
  1252. }
  1253. void convert_packing(const Mat& src, Mat& dst, int _elempack, const Option& opt)
  1254. {
  1255. Layer* packing = create_layer(LayerType::Packing);
  1256. ParamDict pd;
  1257. pd.set(0, _elempack);
  1258. packing->load_param(pd);
  1259. packing->create_pipeline(opt);
  1260. packing->forward(src, dst, opt);
  1261. packing->destroy_pipeline(opt);
  1262. delete packing;
  1263. }
  1264. void flatten(const Mat& src, Mat& dst, const Option& opt)
  1265. {
  1266. Layer* flatten = create_layer(LayerType::Flatten);
  1267. ParamDict pd;
  1268. flatten->load_param(pd);
  1269. flatten->create_pipeline(opt);
  1270. flatten->forward(src, dst, opt);
  1271. flatten->destroy_pipeline(opt);
  1272. delete flatten;
  1273. }
  1274. void cast_float32_to_float16(const Mat& src, Mat& dst, const Option& opt)
  1275. {
  1276. Layer* cast = create_layer(LayerType::Cast);
  1277. ParamDict pd;
  1278. pd.set(0, 1);
  1279. pd.set(1, 2);
  1280. cast->load_param(pd);
  1281. cast->create_pipeline(opt);
  1282. cast->forward(src, dst, opt);
  1283. cast->destroy_pipeline(opt);
  1284. delete cast;
  1285. }
  1286. void cast_float16_to_float32(const Mat& src, Mat& dst, const Option& opt)
  1287. {
  1288. Layer* cast = create_layer(LayerType::Cast);
  1289. ParamDict pd;
  1290. pd.set(0, 2);
  1291. pd.set(1, 1);
  1292. cast->load_param(pd);
  1293. cast->create_pipeline(opt);
  1294. cast->forward(src, dst, opt);
  1295. cast->destroy_pipeline(opt);
  1296. delete cast;
  1297. }
  1298. void cast_int8_to_float32(const Mat& src, Mat& dst, const Option& opt)
  1299. {
  1300. Layer* cast = create_layer(LayerType::Cast);
  1301. ParamDict pd;
  1302. pd.set(0, 3);
  1303. pd.set(1, 1);
  1304. cast->load_param(pd);
  1305. cast->create_pipeline(opt);
  1306. cast->forward(src, dst, opt);
  1307. cast->destroy_pipeline(opt);
  1308. delete cast;
  1309. }
  1310. void cast_float32_to_bfloat16(const Mat& src, Mat& dst, const Option& opt)
  1311. {
  1312. Layer* cast = create_layer(LayerType::Cast);
  1313. ParamDict pd;
  1314. pd.set(0, 1);
  1315. pd.set(1, 4);
  1316. cast->load_param(pd);
  1317. cast->create_pipeline(opt);
  1318. cast->forward(src, dst, opt);
  1319. cast->destroy_pipeline(opt);
  1320. delete cast;
  1321. }
  1322. void cast_bfloat16_to_float32(const Mat& src, Mat& dst, const Option& opt)
  1323. {
  1324. Layer* cast = create_layer(LayerType::Cast);
  1325. ParamDict pd;
  1326. pd.set(0, 4);
  1327. pd.set(1, 1);
  1328. cast->load_param(pd);
  1329. cast->create_pipeline(opt);
  1330. cast->forward(src, dst, opt);
  1331. cast->destroy_pipeline(opt);
  1332. delete cast;
  1333. }
  1334. void quantize_to_int8(const Mat& src, Mat& dst, const Mat& scale_data, const Option& opt)
  1335. {
  1336. Layer* quantize = create_layer(LayerType::Quantize);
  1337. ParamDict pd;
  1338. pd.set(0, scale_data.w);
  1339. quantize->load_param(pd);
  1340. Mat weights[1];
  1341. weights[0] = scale_data;
  1342. quantize->load_model(ModelBinFromMatArray(weights));
  1343. quantize->create_pipeline(opt);
  1344. quantize->forward(src, dst, opt);
  1345. quantize->destroy_pipeline(opt);
  1346. delete quantize;
  1347. }
  1348. void dequantize_from_int32(const Mat& src, Mat& dst, const Mat& scale_data, const Mat& bias_data, const Option& opt)
  1349. {
  1350. Layer* dequantize = create_layer(LayerType::Dequantize);
  1351. ParamDict pd;
  1352. pd.set(0, scale_data.w);
  1353. pd.set(1, bias_data.w);
  1354. dequantize->load_param(pd);
  1355. Mat weights[2];
  1356. weights[0] = scale_data;
  1357. weights[1] = bias_data;
  1358. dequantize->load_model(ModelBinFromMatArray(weights));
  1359. dequantize->create_pipeline(opt);
  1360. dequantize->forward(src, dst, opt);
  1361. dequantize->destroy_pipeline(opt);
  1362. delete dequantize;
  1363. }
  1364. void requantize_from_int32_to_int8(const Mat& src, Mat& dst, const Mat& scale_in_data, const Mat& scale_out_data, const Mat& bias_data, int activation_type, const Mat& activation_params, const Option& opt)
  1365. {
  1366. Layer* requantize = create_layer(LayerType::Requantize);
  1367. ParamDict pd;
  1368. pd.set(0, scale_in_data.w);
  1369. pd.set(1, scale_out_data.w);
  1370. pd.set(2, bias_data.w);
  1371. pd.set(3, activation_type);
  1372. pd.set(4, activation_params);
  1373. requantize->load_param(pd);
  1374. Mat weights[3];
  1375. weights[0] = scale_in_data;
  1376. weights[1] = scale_out_data;
  1377. weights[2] = bias_data;
  1378. requantize->load_model(ModelBinFromMatArray(weights));
  1379. requantize->create_pipeline(opt);
  1380. requantize->forward(src, dst, opt);
  1381. requantize->destroy_pipeline(opt);
  1382. delete requantize;
  1383. }
  1384. } // namespace ncnn