You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

softmax_vulkan.cpp 11 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include "softmax_vulkan.h"
  15. #include <float.h>
  16. #include <math.h>
  17. #include <algorithm>
  18. namespace ncnn {
  19. DEFINE_LAYER_CREATOR(Softmax_vulkan)
  20. Softmax_vulkan::Softmax_vulkan()
  21. {
  22. support_vulkan = true;
  23. pipeline_softmax_reduce_max = 0;
  24. pipeline_softmax_exp_sub_max = 0;
  25. pipeline_softmax_reduce_sum = 0;
  26. pipeline_softmax_div_sum = 0;
  27. pipeline_softmax_reduce_max_pack4 = 0;
  28. pipeline_softmax_exp_sub_max_pack4 = 0;
  29. pipeline_softmax_reduce_sum_pack4 = 0;
  30. pipeline_softmax_div_sum_pack4 = 0;
  31. pipeline_softmax_reduce_max_pack8 = 0;
  32. pipeline_softmax_exp_sub_max_pack8 = 0;
  33. pipeline_softmax_reduce_sum_pack8 = 0;
  34. pipeline_softmax_div_sum_pack8 = 0;
  35. }
  36. int Softmax_vulkan::create_pipeline(const Option& opt)
  37. {
  38. std::vector<vk_specialization_type> specializations(1);
  39. specializations[0].i = axis;
  40. // pack1
  41. {
  42. pipeline_softmax_reduce_max = new Pipeline(vkdev);
  43. pipeline_softmax_exp_sub_max = new Pipeline(vkdev);
  44. pipeline_softmax_reduce_sum = new Pipeline(vkdev);
  45. pipeline_softmax_div_sum = new Pipeline(vkdev);
  46. pipeline_softmax_reduce_max->set_optimal_local_size_xyz();
  47. pipeline_softmax_exp_sub_max->set_optimal_local_size_xyz();
  48. pipeline_softmax_reduce_sum->set_optimal_local_size_xyz();
  49. pipeline_softmax_div_sum->set_optimal_local_size_xyz();
  50. pipeline_softmax_reduce_max->create("softmax_reduce_max", opt, specializations, 2, 10);
  51. pipeline_softmax_exp_sub_max->create("softmax_exp_sub_max", opt, specializations, 2, 10);
  52. pipeline_softmax_reduce_sum->create("softmax_reduce_sum", opt, specializations, 2, 10);
  53. pipeline_softmax_div_sum->create("softmax_div_sum", opt, specializations, 2, 10);
  54. }
  55. // pack4
  56. {
  57. pipeline_softmax_reduce_max_pack4 = new Pipeline(vkdev);
  58. pipeline_softmax_exp_sub_max_pack4 = new Pipeline(vkdev);
  59. pipeline_softmax_reduce_sum_pack4 = new Pipeline(vkdev);
  60. pipeline_softmax_div_sum_pack4 = new Pipeline(vkdev);
  61. pipeline_softmax_reduce_max_pack4->set_optimal_local_size_xyz();
  62. pipeline_softmax_exp_sub_max_pack4->set_optimal_local_size_xyz();
  63. pipeline_softmax_reduce_sum_pack4->set_optimal_local_size_xyz();
  64. pipeline_softmax_div_sum_pack4->set_optimal_local_size_xyz();
  65. pipeline_softmax_reduce_max_pack4->create("softmax_reduce_max_pack4", opt, specializations, 2, 10);
  66. pipeline_softmax_exp_sub_max_pack4->create("softmax_exp_sub_max_pack4", opt, specializations, 2, 10);
  67. pipeline_softmax_reduce_sum_pack4->create("softmax_reduce_sum_pack4", opt, specializations, 2, 10);
  68. pipeline_softmax_div_sum_pack4->create("softmax_div_sum_pack4", opt, specializations, 2, 10);
  69. }
  70. // pack8
  71. {
  72. pipeline_softmax_reduce_max_pack8 = new Pipeline(vkdev);
  73. pipeline_softmax_exp_sub_max_pack8 = new Pipeline(vkdev);
  74. pipeline_softmax_reduce_sum_pack8 = new Pipeline(vkdev);
  75. pipeline_softmax_div_sum_pack8 = new Pipeline(vkdev);
  76. pipeline_softmax_reduce_max_pack8->set_optimal_local_size_xyz();
  77. pipeline_softmax_exp_sub_max_pack8->set_optimal_local_size_xyz();
  78. pipeline_softmax_reduce_sum_pack8->set_optimal_local_size_xyz();
  79. pipeline_softmax_div_sum_pack8->set_optimal_local_size_xyz();
  80. pipeline_softmax_reduce_max_pack8->create("softmax_reduce_max_pack8", opt, specializations, 2, 10);
  81. pipeline_softmax_exp_sub_max_pack8->create("softmax_exp_sub_max_pack8", opt, specializations, 2, 10);
  82. pipeline_softmax_reduce_sum_pack8->create("softmax_reduce_sum_pack8", opt, specializations, 2, 10);
  83. pipeline_softmax_div_sum_pack8->create("softmax_div_sum_pack8", opt, specializations, 2, 10);
  84. }
  85. return 0;
  86. }
  87. int Softmax_vulkan::destroy_pipeline(const Option& /*opt*/)
  88. {
  89. delete pipeline_softmax_reduce_max;
  90. pipeline_softmax_reduce_max = 0;
  91. delete pipeline_softmax_exp_sub_max;
  92. pipeline_softmax_exp_sub_max = 0;
  93. delete pipeline_softmax_reduce_sum;
  94. pipeline_softmax_reduce_sum = 0;
  95. delete pipeline_softmax_div_sum;
  96. pipeline_softmax_div_sum = 0;
  97. delete pipeline_softmax_reduce_max_pack4;
  98. pipeline_softmax_reduce_max_pack4 = 0;
  99. delete pipeline_softmax_exp_sub_max_pack4;
  100. pipeline_softmax_exp_sub_max_pack4 = 0;
  101. delete pipeline_softmax_reduce_sum_pack4;
  102. pipeline_softmax_reduce_sum_pack4 = 0;
  103. delete pipeline_softmax_div_sum_pack4;
  104. pipeline_softmax_div_sum_pack4 = 0;
  105. delete pipeline_softmax_reduce_max_pack8;
  106. pipeline_softmax_reduce_max_pack8 = 0;
  107. delete pipeline_softmax_exp_sub_max_pack8;
  108. pipeline_softmax_exp_sub_max_pack8 = 0;
  109. delete pipeline_softmax_reduce_sum_pack8;
  110. pipeline_softmax_reduce_sum_pack8 = 0;
  111. delete pipeline_softmax_div_sum_pack8;
  112. pipeline_softmax_div_sum_pack8 = 0;
  113. return 0;
  114. }
  115. int Softmax_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const
  116. {
  117. int dims = bottom_top_blob.dims;
  118. int w = bottom_top_blob.w;
  119. int h = bottom_top_blob.h;
  120. int channels = bottom_top_blob.c;
  121. size_t elemsize = bottom_top_blob.elemsize;
  122. int elempack = bottom_top_blob.elempack;
  123. VkMat max_workspace;
  124. VkMat sum_workspace;
  125. if (dims == 1) // axis == 0
  126. {
  127. max_workspace.create(1, elemsize, elempack, opt.workspace_vkallocator, opt.staging_vkallocator);
  128. sum_workspace.create(1, elemsize, elempack, opt.workspace_vkallocator, opt.staging_vkallocator);
  129. }
  130. else if (dims == 2 && axis == 0)
  131. {
  132. max_workspace.create(w, elemsize, elempack, opt.workspace_vkallocator, opt.staging_vkallocator);
  133. sum_workspace.create(w, elemsize, elempack, opt.workspace_vkallocator, opt.staging_vkallocator);
  134. }
  135. else if (dims == 2 && axis == 1)
  136. {
  137. max_workspace.create(h, elemsize, elempack, opt.workspace_vkallocator, opt.staging_vkallocator);
  138. sum_workspace.create(h, elemsize, elempack, opt.workspace_vkallocator, opt.staging_vkallocator);
  139. }
  140. else if (dims == 3 && axis == 0)
  141. {
  142. max_workspace.create(w, h, elemsize, elempack, opt.workspace_vkallocator, opt.staging_vkallocator);
  143. sum_workspace.create(w, h, elemsize, elempack, opt.workspace_vkallocator, opt.staging_vkallocator);
  144. }
  145. else if (dims == 3 && axis == 1)
  146. {
  147. max_workspace.create(w, channels, elemsize, elempack, opt.workspace_vkallocator, opt.staging_vkallocator);
  148. sum_workspace.create(w, channels, elemsize, elempack, opt.workspace_vkallocator, opt.staging_vkallocator);
  149. }
  150. else if (dims == 3 && axis == 2)
  151. {
  152. max_workspace.create(h, channels, elemsize, elempack, opt.workspace_vkallocator, opt.staging_vkallocator);
  153. sum_workspace.create(h, channels, elemsize, elempack, opt.workspace_vkallocator, opt.staging_vkallocator);
  154. }
  155. // reduce max
  156. {
  157. std::vector<VkMat> bindings(2);
  158. bindings[0] = bottom_top_blob;
  159. bindings[1] = max_workspace;
  160. std::vector<vk_constant_type> constants(10);
  161. constants[0].i = bottom_top_blob.dims;
  162. constants[1].i = bottom_top_blob.w;
  163. constants[2].i = bottom_top_blob.h;
  164. constants[3].i = bottom_top_blob.c;
  165. constants[4].i = bottom_top_blob.cstep;
  166. constants[5].i = max_workspace.dims;
  167. constants[6].i = max_workspace.w;
  168. constants[7].i = max_workspace.h;
  169. constants[8].i = max_workspace.c;
  170. constants[9].i = max_workspace.cstep;
  171. const Pipeline* pipeline = elempack == 8 ? pipeline_softmax_reduce_max_pack8
  172. : elempack == 4 ? pipeline_softmax_reduce_max_pack4
  173. : pipeline_softmax_reduce_max;
  174. cmd.record_pipeline(pipeline, bindings, constants, max_workspace);
  175. }
  176. // exp( v - max )
  177. {
  178. std::vector<VkMat> bindings(2);
  179. bindings[0] = bottom_top_blob;
  180. bindings[1] = max_workspace;
  181. std::vector<vk_constant_type> constants(10);
  182. constants[0].i = bottom_top_blob.dims;
  183. constants[1].i = bottom_top_blob.w;
  184. constants[2].i = bottom_top_blob.h;
  185. constants[3].i = bottom_top_blob.c;
  186. constants[4].i = bottom_top_blob.cstep;
  187. constants[5].i = max_workspace.dims;
  188. constants[6].i = max_workspace.w;
  189. constants[7].i = max_workspace.h;
  190. constants[8].i = max_workspace.c;
  191. constants[9].i = max_workspace.cstep;
  192. const Pipeline* pipeline = elempack == 8 ? pipeline_softmax_exp_sub_max_pack8
  193. : elempack == 4 ? pipeline_softmax_exp_sub_max_pack4
  194. : pipeline_softmax_exp_sub_max;
  195. cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);
  196. }
  197. // reduce sum
  198. {
  199. std::vector<VkMat> bindings(2);
  200. bindings[0] = bottom_top_blob;
  201. bindings[1] = sum_workspace;
  202. std::vector<vk_constant_type> constants(10);
  203. constants[0].i = bottom_top_blob.dims;
  204. constants[1].i = bottom_top_blob.w;
  205. constants[2].i = bottom_top_blob.h;
  206. constants[3].i = bottom_top_blob.c;
  207. constants[4].i = bottom_top_blob.cstep;
  208. constants[5].i = sum_workspace.dims;
  209. constants[6].i = sum_workspace.w;
  210. constants[7].i = sum_workspace.h;
  211. constants[8].i = sum_workspace.c;
  212. constants[9].i = sum_workspace.cstep;
  213. const Pipeline* pipeline = elempack == 8 ? pipeline_softmax_reduce_sum_pack8
  214. : elempack == 4 ? pipeline_softmax_reduce_sum_pack4
  215. : pipeline_softmax_reduce_sum;
  216. cmd.record_pipeline(pipeline, bindings, constants, sum_workspace);
  217. }
  218. // div sum
  219. {
  220. std::vector<VkMat> bindings(2);
  221. bindings[0] = bottom_top_blob;
  222. bindings[1] = sum_workspace;
  223. std::vector<vk_constant_type> constants(10);
  224. constants[0].i = bottom_top_blob.dims;
  225. constants[1].i = bottom_top_blob.w;
  226. constants[2].i = bottom_top_blob.h;
  227. constants[3].i = bottom_top_blob.c;
  228. constants[4].i = bottom_top_blob.cstep;
  229. constants[5].i = sum_workspace.dims;
  230. constants[6].i = sum_workspace.w;
  231. constants[7].i = sum_workspace.h;
  232. constants[8].i = sum_workspace.c;
  233. constants[9].i = sum_workspace.cstep;
  234. const Pipeline* pipeline = elempack == 8 ? pipeline_softmax_div_sum_pack8
  235. : elempack == 4 ? pipeline_softmax_div_sum_pack4
  236. : pipeline_softmax_div_sum;
  237. cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);
  238. }
  239. return 0;
  240. }
  241. } // namespace ncnn