You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

low-level-operation-api.md 7.4 kB

6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. # implement elementwise addition with/without broadcast using BinaryOp operation
  2. * input must be fp32 storage without packing
  3. * output is expected to be fp32 storage without packing
  4. ```cpp
  5. void binary_add(const ncnn::Mat& a, const ncnn::Mat& b, ncnn::Mat& c)
  6. {
  7. ncnn::Option opt;
  8. opt.num_threads = 2;
  9. opt.use_fp16_storage = false;
  10. opt.use_packing_layout = false;
  11. ncnn::Layer* op = ncnn::create_layer("BinaryOp");
  12. // set param
  13. ncnn::ParamDict pd;
  14. pd.set(0, 0);// op_type
  15. op->load_param(pd);
  16. op->create_pipeline(opt);
  17. // forward
  18. std::vector<ncnn::Mat> bottoms(2);
  19. bottoms[0] = a;
  20. bottoms[1] = b;
  21. std::vector<ncnn::Mat> tops(1);
  22. op->forward(bottoms, tops, opt);
  23. c = tops[0];
  24. op->destroy_pipeline(opt);
  25. delete op;
  26. }
  27. ```
  28. # implement 3x3 box blur on three channel image using ConvolutionDepthWise operation
  29. * input must be fp32 storage without packing
  30. * output is expected to be fp32 storage without packing
  31. ```cpp
  32. void convolution_3x3_boxblur_RGB(const ncnn::Mat& rgb, ncnn::Mat& out)
  33. {
  34. ncnn::Option opt;
  35. opt.num_threads = 2;
  36. opt.use_fp16_storage = false;
  37. opt.use_packing_layout = false;
  38. ncnn::Layer* op = ncnn::create_layer("ConvolutionDepthWise");
  39. // set param
  40. ncnn::ParamDict pd;
  41. pd.set(0, 3);// num_output
  42. pd.set(1, 3);// kernel_w
  43. pd.set(5, 0);// bias_term
  44. pd.set(6, 3*3*3);// weight_data_size
  45. pd.set(7, 3);// group
  46. op->load_param(pd);
  47. // set weights
  48. ncnn::Mat weights[1];
  49. weights[0].create(3*3*3);// weight_data
  50. for (int i=0; i<3*3*3; i++)
  51. {
  52. weights[0][i] = 1.f / 9;
  53. }
  54. op->load_model(ncnn::ModelBinFromMatArray(weights));
  55. op->create_pipeline(opt);
  56. // forward
  57. op->forward(rgb, out, opt);
  58. op->destroy_pipeline(opt);
  59. delete op;
  60. }
  61. ```
  62. # transpose Mat, chw to cwh
  63. * input must be fp32 storage with/without packing
  64. * output is expected to be fp32 storage packed
  65. ```cpp
  66. void transpose(const ncnn::Mat& in, ncnn::Mat& out)
  67. {
  68. ncnn::Option opt;
  69. opt.num_threads = 2;
  70. opt.use_fp16_storage = false;
  71. opt.use_packing_layout = true;
  72. ncnn::Layer* op = ncnn::create_layer("Permute");
  73. // set param
  74. ncnn::ParamDict pd;
  75. pd.set(0, 1);// order_type
  76. op->load_param(pd);
  77. op->create_pipeline(opt);
  78. ncnn::Mat in_packed = in;
  79. {
  80. // resolve dst_elempack
  81. int dims = in.dims;
  82. int elemcount = 0;
  83. if (dims == 1) elemcount = in.elempack * in.w;
  84. if (dims == 2) elemcount = in.elempack * in.h;
  85. if (dims == 3) elemcount = in.elempack * in.c;
  86. int dst_elempack = 1;
  87. if (op->support_packing)
  88. {
  89. if (elemcount % 8 == 0 && (ncnn::cpu_support_x86_avx2() || ncnn::cpu_support_x86_avx()))
  90. dst_elempack = 8;
  91. else if (elemcount % 4 == 0)
  92. dst_elempack = 4;
  93. }
  94. if (in.elempack != dst_elempack)
  95. {
  96. convert_packing(in, in_packed, dst_elempack, opt);
  97. }
  98. }
  99. // forward
  100. op->forward(in_packed, out, opt);
  101. op->destroy_pipeline(opt);
  102. delete op;
  103. }
  104. ```
  105. # apply instance normalization
  106. // x = (x - mean) / sqrt(var)
  107. * input can be fp32/fp16 storage with/without packing
  108. * output is expected to be fp16 storage packed when supported, or fp32 storage packed otherwise
  109. ```cpp
  110. void normalize(const ncnn::Mat& in, ncnn::Mat& out)
  111. {
  112. ncnn::Option opt;
  113. opt.num_threads = 2;
  114. opt.use_fp16_storage = true;
  115. opt.use_packing_layout = true;
  116. ncnn::Layer* op = ncnn::create_layer("InstanceNorm");
  117. // set param
  118. ncnn::ParamDict pd;
  119. pd.set(0, in.c);// channels
  120. pd.set(1, 0.f);// eps
  121. op->load_param(pd);
  122. // set weights
  123. ncnn::Mat weights[2];
  124. weights[0].create(in.c);// gamma_data
  125. weights[1].create(in.c);// beta_data
  126. weights[0].fill(1.f);
  127. weights[1].fill(0.f);
  128. op->load_model(ncnn::ModelBinFromMatArray(weights));
  129. op->create_pipeline(opt);
  130. ncnn::Mat in_fp16 = in;
  131. if (in.elembits() == 32 && op->support_fp16_storage)
  132. {
  133. cast_float32_to_float16(in, in_fp16, opt);
  134. }
  135. if (in.elembits() == 16 && !op->support_fp16_storage)
  136. {
  137. cast_float16_to_float32(in, in_fp16, opt);
  138. }
  139. ncnn::Mat in_fp16_packed = in_fp16;
  140. {
  141. // resolve dst_elempack
  142. int dims = in_fp16.dims;
  143. int elemcount = 0;
  144. if (dims == 1) elemcount = in_fp16.elempack * in_fp16.w;
  145. if (dims == 2) elemcount = in_fp16.elempack * in_fp16.h;
  146. if (dims == 3) elemcount = in_fp16.elempack * in_fp16.c;
  147. int dst_elempack = 1;
  148. if (op->support_packing)
  149. {
  150. if (elemcount % 8 == 0 && (ncnn::cpu_support_x86_avx2() || ncnn::cpu_support_x86_avx()))
  151. dst_elempack = 8;
  152. else if (elemcount % 4 == 0)
  153. dst_elempack = 4;
  154. }
  155. if (in_fp16.elempack != dst_elempack)
  156. {
  157. convert_packing(in_fp16, in_fp16_packed, dst_elempack, opt);
  158. }
  159. }
  160. // forward
  161. op->forward(in_fp16_packed, out, opt);
  162. op->destroy_pipeline(opt);
  163. delete op;
  164. }
  165. ```
  166. # cpu -> gpu -> forward -> gpu -> cpu
  167. ```cpp
  168. ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
  169. ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
  170. ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
  171. ncnn::VkWeightAllocator* weight_vkallocator = new ncnn::VkWeightAllocator(vkdev);
  172. ncnn::VkWeightStagingAllocator* weight_staging_vkallocator = new ncnn::VkWeightStagingAllocator(vkdev);
  173. // create layer
  174. ncnn::Layer* convolution = ncnn::create_layer("Convolution");
  175. convolution->vkdev = vkdev;
  176. // set option
  177. ncnn::Option opt;
  178. opt.num_threads = 4;
  179. opt.use_vulkan_compute = true;
  180. opt.blob_vkallocator = blob_vkallocator;
  181. opt.workspace_vkallocator = blob_vkallocator;
  182. opt.staging_vkallocator = staging_vkallocator;
  183. // load param
  184. {
  185. ncnn::ParamDict pd;
  186. pd.set(0, outch);
  187. pd.set(1, ksize);
  188. pd.set(6, outch*inch*ksize*ksize);
  189. pd.use_vulkan_compute = 1;
  190. convolution->load_param(pd);
  191. }
  192. // load model
  193. {
  194. ncnn::Mat weights[2];
  195. weights[0] = random_mat(outch*inch*ksize*ksize);
  196. weights[1] = random_mat(outch);
  197. ncnn::ModelBinFromMatArray mb(weights);
  198. convolution->load_model(mb);
  199. }
  200. // create pipeline
  201. convolution->create_pipeline(opt);
  202. // upload model
  203. {
  204. ncnn::VkTransfer cmd(vkdev);
  205. ncnn::Option opt_upload = opt;
  206. opt_upload.blob_vkallocator = weight_vkallocator;
  207. opt_upload.workspace_vkallocator = weight_vkallocator;
  208. opt_upload.staging_vkallocator = weight_staging_vkallocator;
  209. convolution->upload_model(cmd, opt_upload);
  210. cmd.submit_and_wait();
  211. }
  212. ncnn::Mat bottom = random_mat(w, h, inch);
  213. ncnn::Mat top;
  214. // forward
  215. {
  216. ncnn::VkCompute cmd(vkdev);
  217. ncnn::VkMat bottom_gpu;
  218. cmd.record_upload(bottom, bottom_gpu, opt);
  219. ncnn::VkMat top_gpu;
  220. convolution->forward(bottom_gpu, top_gpu, cmd, opt);
  221. cmd.record_download(top_gpu, top, opt);
  222. cmd.submit_and_wait();
  223. }
  224. convolution->destroy_pipeline(opt);
  225. delete convolution;
  226. vkdev->reclaim_blob_allocator(blob_vkallocator);
  227. vkdev->reclaim_staging_allocator(staging_vkallocator);
  228. weight_vkallocator->clear();
  229. weight_staging_vkallocator->clear();
  230. delete weight_vkallocator;
  231. delete weight_staging_vkallocator;
  232. ```