You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

low-level-operation-api.md 4.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. implement elementwise addition with/without broadcast using BinaryOp operation
  2. ```
  3. void binary_add(const ncnn::Mat& a, const ncnn::Mat& b, ncnn::Mat& c)
  4. {
  5. ncnn::Layer* op = ncnn::create_layer("BinaryOp");
  6. // set param
  7. ncnn::ParamDict pd;
  8. pd.set(0, 0);// op_type
  9. op->load_param(pd);
  10. // forward
  11. std::vector<ncnn::Mat> bottoms(2);
  12. bottoms[0] = a;
  13. bottoms[1] = b;
  14. std::vector<ncnn::Mat> tops(1);
  15. op->forward(bottoms, tops);
  16. c = tops[0];
  17. delete op;
  18. }
  19. ```
  20. implement 3x3 box blur on three channel image using ConvolutionDepthWise operation
  21. ```
  22. void convolution_3x3_boxblur_RGB(const ncnn::Mat& rgb, ncnn::Mat& out)
  23. {
  24. ncnn::Layer* op = ncnn::create_layer("ConvolutionDepthWise");
  25. // set param
  26. ncnn::ParamDict pd;
  27. pd.set(0, 3);// num_output
  28. pd.set(1, 3);// kernel_w
  29. pd.set(5, 0);// bias_term
  30. pd.set(6, 3*3*3);// weight_data_size
  31. pd.set(7, 3);// group
  32. op->load_param(pd);
  33. // set weights
  34. ncnn::Mat weights[1];
  35. weights[0].create(3*3*3);// weight_data
  36. for (int i=0; i<3*3*3; i++)
  37. {
  38. weights[0][i] = 1.f / 9;
  39. }
  40. op->load_model(ncnn::ModelBinFromMatArray(weights));
  41. // forward
  42. op->forward(rgb, out);
  43. delete op;
  44. }
  45. ```
  46. transpose Mat, chw to cwh
  47. ```
  48. void transpose(const ncnn::Mat& in, ncnn::Mat& out)
  49. {
  50. ncnn::Layer* op = ncnn::create_layer("Permute");
  51. // set param
  52. ncnn::ParamDict pd;
  53. pd.set(0, 1);// order_type
  54. op->load_param(pd);
  55. // forward
  56. op->forward(in, out);
  57. delete op;
  58. }
  59. ```
  60. apply instance normalization
  61. // x = (x - mean) / sqrt(var)
  62. ```
  63. void normalize(const ncnn::Mat& in, ncnn::Mat& out)
  64. {
  65. ncnn::Layer* op = ncnn::create_layer("InstanceNorm");
  66. // set param
  67. ncnn::ParamDict pd;
  68. pd.set(0, in.c);// channels
  69. pd.set(1, 0.f);// eps
  70. op->load_param(pd);
  71. // set weights
  72. ncnn::Mat weights[2];
  73. weights[0].create(in.c);// gamma_data
  74. weights[1].create(in.c);// beta_data
  75. weights[0].fill(1.f);
  76. weights[1].fill(0.f);
  77. op->load_model(ncnn::ModelBinFromMatArray(weights));
  78. // forward
  79. op->forward(in, out);
  80. delete op;
  81. }
  82. ```
  83. # cpu -> gpu -> forward -> gpu -> cpu
  84. ```
  85. ncnn::create_gpu_instance();
  86. {
  87. ncnn::VulkanDevice vkdev;
  88. ncnn::VkWeightBufferAllocator g_weight_vkallocator(&vkdev);
  89. ncnn::VkBlobBufferAllocator g_blob_vkallocator(&vkdev);
  90. ncnn::VkStagingBufferAllocator g_staging_vkallocator(&vkdev);
  91. ncnn::VkWeightStagingBufferAllocator g_weight_staging_vkallocator(&vkdev);
  92. // create layer
  93. ncnn::Layer* convolution = ncnn::create_layer("Convolution");
  94. convolution->vkdev = &vkdev;
  95. // load param
  96. {
  97. ncnn::ParamDict pd;
  98. pd.set(0, outch);
  99. pd.set(1, ksize);
  100. pd.set(6, outch*inch*ksize*ksize);
  101. pd.use_vulkan_compute = 1;
  102. convolution->load_param(pd);
  103. }
  104. // load model
  105. {
  106. ncnn::Mat weights[2];
  107. weights[0] = random_mat(outch*inch*ksize*ksize);
  108. weights[1] = random_mat(outch);
  109. ncnn::ModelBinFromMatArray mb(weights);
  110. convolution->load_model(mb);
  111. }
  112. // upload model
  113. {
  114. ncnn::VkTransfer cmd(&vkdev);
  115. cmd.weight_vkallocator = &g_weight_vkallocator;
  116. cmd.staging_vkallocator = &g_weight_staging_vkallocator;
  117. convolution->upload_model(cmd);
  118. cmd.submit();
  119. cmd.wait();
  120. g_weight_staging_vkallocator.clear();
  121. }
  122. // create pipeline
  123. convolution->create_pipeline();
  124. // set default option
  125. {
  126. ncnn::Option opt = ncnn::get_default_option();
  127. opt.lightmode = true;
  128. opt.num_threads = 4;
  129. opt.blob_allocator = 0;
  130. opt.workspace_allocator = 0;
  131. opt.vulkan_compute = true;
  132. opt.blob_vkallocator = &g_blob_vkallocator;
  133. opt.workspace_vkallocator = &g_blob_vkallocator;
  134. opt.staging_vkallocator = &g_staging_vkallocator;
  135. ncnn::set_default_option(opt);
  136. }
  137. ncnn::Mat bottom = random_mat(w, h, inch);
  138. ncnn::VkMat bottom_gpu;
  139. // copy bottom to bottom_gpu
  140. {
  141. bottom_gpu.create_like(bottom, &g_blob_vkallocator, &g_staging_vkallocator);
  142. bottom_gpu.prepare_staging_buffer();
  143. bottom_gpu.upload(bottom);
  144. }
  145. ncnn::VkMat top_gpu;
  146. // forward
  147. {
  148. ncnn::VkCompute cmd(&vkdev);
  149. cmd.record_upload(bottom_gpu);
  150. convolution->forward(bottom_gpu, top_gpu, cmd);
  151. top_gpu.prepare_staging_buffer();
  152. cmd.record_download(top_gpu);
  153. cmd.submit();
  154. cmd.wait();
  155. }
  156. ncnn::Mat top;
  157. // copy top_gpu to top
  158. {
  159. top.create_like(top_gpu);
  160. top_gpu.download(top);
  161. }
  162. delete convolution;
  163. g_weight_vkallocator.clear();
  164. g_blob_vkallocator.clear();
  165. g_staging_vkallocator.clear();
  166. g_weight_staging_vkallocator.clear();
  167. }
  168. ncnn::destroy_gpu_instance();
  169. ```