You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

low-level-operation-api.md 4.9 kB

6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. implement elementwise addition with/without broadcast using BinaryOp operation
  2. ```
  3. void binary_add(const ncnn::Mat& a, const ncnn::Mat& b, ncnn::Mat& c)
  4. {
  5. ncnn::Option opt;
  6. opt.num_threads = 2;
  7. ncnn::Layer* op = ncnn::create_layer("BinaryOp");
  8. // set param
  9. ncnn::ParamDict pd;
  10. pd.set(0, 0);// op_type
  11. op->load_param(pd);
  12. op->create_pipeline(opt);
  13. // forward
  14. std::vector<ncnn::Mat> bottoms(2);
  15. bottoms[0] = a;
  16. bottoms[1] = b;
  17. std::vector<ncnn::Mat> tops(1);
  18. op->forward(bottoms, tops, opt);
  19. c = tops[0];
  20. op->destroy_pipeline(opt);
  21. delete op;
  22. }
  23. ```
  24. implement 3x3 box blur on three channel image using ConvolutionDepthWise operation
  25. ```
  26. void convolution_3x3_boxblur_RGB(const ncnn::Mat& rgb, ncnn::Mat& out)
  27. {
  28. ncnn::Option opt;
  29. opt.num_threads = 2;
  30. ncnn::Layer* op = ncnn::create_layer("ConvolutionDepthWise");
  31. // set param
  32. ncnn::ParamDict pd;
  33. pd.set(0, 3);// num_output
  34. pd.set(1, 3);// kernel_w
  35. pd.set(5, 0);// bias_term
  36. pd.set(6, 3*3*3);// weight_data_size
  37. pd.set(7, 3);// group
  38. op->load_param(pd);
  39. // set weights
  40. ncnn::Mat weights[1];
  41. weights[0].create(3*3*3);// weight_data
  42. for (int i=0; i<3*3*3; i++)
  43. {
  44. weights[0][i] = 1.f / 9;
  45. }
  46. op->load_model(ncnn::ModelBinFromMatArray(weights));
  47. op->create_pipeline(opt);
  48. // forward
  49. op->forward(rgb, out, opt);
  50. op->destroy_pipeline(opt);
  51. delete op;
  52. }
  53. ```
  54. transpose Mat, chw to cwh
  55. ```
  56. void transpose(const ncnn::Mat& in, ncnn::Mat& out)
  57. {
  58. ncnn::Option opt;
  59. opt.num_threads = 2;
  60. ncnn::Layer* op = ncnn::create_layer("Permute");
  61. // set param
  62. ncnn::ParamDict pd;
  63. pd.set(0, 1);// order_type
  64. op->load_param(pd);
  65. op->create_pipeline(opt);
  66. // forward
  67. op->forward(in, out, opt);
  68. op->destroy_pipeline(opt);
  69. delete op;
  70. }
  71. ```
  72. apply instance normalization
  73. // x = (x - mean) / sqrt(var)
  74. ```
  75. void normalize(const ncnn::Mat& in, ncnn::Mat& out)
  76. {
  77. ncnn::Option opt;
  78. opt.num_threads = 2;
  79. ncnn::Layer* op = ncnn::create_layer("InstanceNorm");
  80. // set param
  81. ncnn::ParamDict pd;
  82. pd.set(0, in.c);// channels
  83. pd.set(1, 0.f);// eps
  84. op->load_param(pd);
  85. // set weights
  86. ncnn::Mat weights[2];
  87. weights[0].create(in.c);// gamma_data
  88. weights[1].create(in.c);// beta_data
  89. weights[0].fill(1.f);
  90. weights[1].fill(0.f);
  91. op->load_model(ncnn::ModelBinFromMatArray(weights));
  92. op->create_pipeline(opt);
  93. // forward
  94. op->forward(in, out, opt);
  95. op->destroy_pipeline(opt);
  96. delete op;
  97. }
  98. ```
  99. # cpu -> gpu -> forward -> gpu -> cpu
  100. ```
  101. ncnn::create_gpu_instance();
  102. {
  103. ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
  104. ncnn::VkWeightBufferAllocator g_weight_vkallocator(vkdev);
  105. ncnn::VkBlobBufferAllocator g_blob_vkallocator(vkdev);
  106. ncnn::VkStagingBufferAllocator g_staging_vkallocator(vkdev);
  107. ncnn::VkWeightStagingBufferAllocator g_weight_staging_vkallocator(vkdev);
  108. // create layer
  109. ncnn::Layer* convolution = ncnn::create_layer("Convolution");
  110. convolution->vkdev = vkdev;
  111. // load param
  112. {
  113. ncnn::ParamDict pd;
  114. pd.set(0, outch);
  115. pd.set(1, ksize);
  116. pd.set(6, outch*inch*ksize*ksize);
  117. pd.use_vulkan_compute = 1;
  118. convolution->load_param(pd);
  119. }
  120. // load model
  121. {
  122. ncnn::Mat weights[2];
  123. weights[0] = random_mat(outch*inch*ksize*ksize);
  124. weights[1] = random_mat(outch);
  125. ncnn::ModelBinFromMatArray mb(weights);
  126. convolution->load_model(mb);
  127. }
  128. // upload model
  129. {
  130. ncnn::VkTransfer cmd(vkdev);
  131. cmd.weight_vkallocator = &g_weight_vkallocator;
  132. cmd.staging_vkallocator = &g_weight_staging_vkallocator;
  133. convolution->upload_model(cmd);
  134. cmd.submit();
  135. cmd.wait();
  136. g_weight_staging_vkallocator.clear();
  137. }
  138. // create pipeline
  139. convolution->create_pipeline(opt);
  140. // set default option
  141. {
  142. ncnn::Option opt = ncnn::get_default_option();
  143. opt.lightmode = true;
  144. opt.num_threads = 4;
  145. opt.blob_allocator = 0;
  146. opt.workspace_allocator = 0;
  147. opt.vulkan_compute = true;
  148. opt.blob_vkallocator = &g_blob_vkallocator;
  149. opt.workspace_vkallocator = &g_blob_vkallocator;
  150. opt.staging_vkallocator = &g_staging_vkallocator;
  151. ncnn::set_default_option(opt);
  152. }
  153. ncnn::Mat bottom = random_mat(w, h, inch);
  154. ncnn::VkMat bottom_gpu;
  155. // copy bottom to bottom_gpu
  156. {
  157. bottom_gpu.create_like(bottom, &g_blob_vkallocator, &g_staging_vkallocator);
  158. bottom_gpu.prepare_staging_buffer();
  159. bottom_gpu.upload(bottom);
  160. }
  161. ncnn::VkMat top_gpu;
  162. // forward
  163. {
  164. ncnn::VkCompute cmd(vkdev);
  165. cmd.record_upload(bottom_gpu);
  166. convolution->forward(bottom_gpu, top_gpu, cmd, opt);
  167. top_gpu.prepare_staging_buffer();
  168. cmd.record_download(top_gpu);
  169. cmd.submit_and_wait();
  170. }
  171. ncnn::Mat top;
  172. // copy top_gpu to top
  173. {
  174. top.create_like(top_gpu);
  175. top_gpu.download(top);
  176. }
  177. convolution->destroy_pipeline(opt);
  178. delete convolution;
  179. g_weight_vkallocator.clear();
  180. g_blob_vkallocator.clear();
  181. g_staging_vkallocator.clear();
  182. g_weight_staging_vkallocator.clear();
  183. }
  184. ncnn::destroy_gpu_instance();
  185. ```