You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

low-level-operation-api.md 4.7 kB

6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. implement elementwise addition with/without broadcast using BinaryOp operation
  2. ```cpp
  3. void binary_add(const ncnn::Mat& a, const ncnn::Mat& b, ncnn::Mat& c)
  4. {
  5. ncnn::Option opt;
  6. opt.num_threads = 2;
  7. ncnn::Layer* op = ncnn::create_layer("BinaryOp");
  8. // set param
  9. ncnn::ParamDict pd;
  10. pd.set(0, 0);// op_type
  11. op->load_param(pd);
  12. op->create_pipeline(opt);
  13. // forward
  14. std::vector<ncnn::Mat> bottoms(2);
  15. bottoms[0] = a;
  16. bottoms[1] = b;
  17. std::vector<ncnn::Mat> tops(1);
  18. op->forward(bottoms, tops, opt);
  19. c = tops[0];
  20. op->destroy_pipeline(opt);
  21. delete op;
  22. }
  23. ```
  24. implement 3x3 box blur on three channel image using ConvolutionDepthWise operation
  25. ```cpp
  26. void convolution_3x3_boxblur_RGB(const ncnn::Mat& rgb, ncnn::Mat& out)
  27. {
  28. ncnn::Option opt;
  29. opt.num_threads = 2;
  30. ncnn::Layer* op = ncnn::create_layer("ConvolutionDepthWise");
  31. // set param
  32. ncnn::ParamDict pd;
  33. pd.set(0, 3);// num_output
  34. pd.set(1, 3);// kernel_w
  35. pd.set(5, 0);// bias_term
  36. pd.set(6, 3*3*3);// weight_data_size
  37. pd.set(7, 3);// group
  38. op->load_param(pd);
  39. // set weights
  40. ncnn::Mat weights[1];
  41. weights[0].create(3*3*3);// weight_data
  42. for (int i=0; i<3*3*3; i++)
  43. {
  44. weights[0][i] = 1.f / 9;
  45. }
  46. op->load_model(ncnn::ModelBinFromMatArray(weights));
  47. op->create_pipeline(opt);
  48. // forward
  49. op->forward(rgb, out, opt);
  50. op->destroy_pipeline(opt);
  51. delete op;
  52. }
  53. ```
  54. transpose Mat, chw to cwh
  55. ```cpp
  56. void transpose(const ncnn::Mat& in, ncnn::Mat& out)
  57. {
  58. ncnn::Option opt;
  59. opt.num_threads = 2;
  60. ncnn::Layer* op = ncnn::create_layer("Permute");
  61. // set param
  62. ncnn::ParamDict pd;
  63. pd.set(0, 1);// order_type
  64. op->load_param(pd);
  65. op->create_pipeline(opt);
  66. // forward
  67. op->forward(in, out, opt);
  68. op->destroy_pipeline(opt);
  69. delete op;
  70. }
  71. ```
  72. apply instance normalization
  73. // x = (x - mean) / sqrt(var)
  74. ```cpp
  75. void normalize(const ncnn::Mat& in, ncnn::Mat& out)
  76. {
  77. ncnn::Option opt;
  78. opt.num_threads = 2;
  79. ncnn::Layer* op = ncnn::create_layer("InstanceNorm");
  80. // set param
  81. ncnn::ParamDict pd;
  82. pd.set(0, in.c);// channels
  83. pd.set(1, 0.f);// eps
  84. op->load_param(pd);
  85. // set weights
  86. ncnn::Mat weights[2];
  87. weights[0].create(in.c);// gamma_data
  88. weights[1].create(in.c);// beta_data
  89. weights[0].fill(1.f);
  90. weights[1].fill(0.f);
  91. op->load_model(ncnn::ModelBinFromMatArray(weights));
  92. op->create_pipeline(opt);
  93. // forward
  94. op->forward(in, out, opt);
  95. op->destroy_pipeline(opt);
  96. delete op;
  97. }
  98. ```
  99. # cpu -> gpu -> forward -> gpu -> cpu
  100. ```cpp
  101. ncnn::create_gpu_instance();
  102. {
  103. ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
  104. ncnn::VkWeightAllocator g_weight_vkallocator(vkdev);
  105. ncnn::VkBlobAllocator g_blob_vkallocator(vkdev);
  106. ncnn::VkStagingAllocator g_staging_vkallocator(vkdev);
  107. ncnn::VkWeightStagingAllocator g_weight_staging_vkallocator(vkdev);
  108. // create layer
  109. ncnn::Layer* convolution = ncnn::create_layer("Convolution");
  110. convolution->vkdev = vkdev;
  111. // set option
  112. ncnn::Option opt;
  113. opt.lightmode = true;
  114. opt.num_threads = 4;
  115. opt.blob_allocator = 0;
  116. opt.workspace_allocator = 0;
  117. opt.vulkan_compute = true;
  118. opt.blob_vkallocator = &g_blob_vkallocator;
  119. opt.workspace_vkallocator = &g_blob_vkallocator;
  120. opt.staging_vkallocator = &g_staging_vkallocator;
  121. // load param
  122. {
  123. ncnn::ParamDict pd;
  124. pd.set(0, outch);
  125. pd.set(1, ksize);
  126. pd.set(6, outch*inch*ksize*ksize);
  127. pd.use_vulkan_compute = 1;
  128. convolution->load_param(pd);
  129. }
  130. // load model
  131. {
  132. ncnn::Mat weights[2];
  133. weights[0] = random_mat(outch*inch*ksize*ksize);
  134. weights[1] = random_mat(outch);
  135. ncnn::ModelBinFromMatArray mb(weights);
  136. convolution->load_model(mb);
  137. }
  138. // create pipeline
  139. convolution->create_pipeline(opt);
  140. // upload model
  141. {
  142. ncnn::VkTransfer cmd(vkdev);
  143. ncnn::Option opt_upload = opt;
  144. opt_upload.blob_vkallocator = &g_weight_vkallocator;
  145. opt_upload.workspace_vkallocator = &g_weight_vkallocator;
  146. opt_upload.staging_vkallocator = &g_weight_staging_vkallocator;
  147. convolution->upload_model(cmd, opt_upload);
  148. cmd.submit_and_wait();
  149. }
  150. ncnn::Mat bottom = random_mat(w, h, inch);
  151. ncnn::Mat top;
  152. // forward
  153. {
  154. ncnn::VkCompute cmd(vkdev);
  155. ncnn::VkMat bottom_gpu;
  156. cmd.record_upload(bottom, bottom_gpu, opt);
  157. ncnn::VkMat top_gpu;
  158. convolution->forward(bottom_gpu, top_gpu, cmd, opt);
  159. cmd.record_download(top_gpu, top, opt);
  160. cmd.submit_and_wait();
  161. }
  162. convolution->destroy_pipeline(opt);
  163. delete convolution;
  164. g_weight_vkallocator.clear();
  165. g_blob_vkallocator.clear();
  166. g_staging_vkallocator.clear();
  167. g_weight_staging_vkallocator.clear();
  168. }
  169. ncnn::destroy_gpu_instance();
  170. ```