You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

algos.cpp 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. /**
  2. * \file dnn/src/fallback/conv_bias/conv1x1/algos.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "src/common/opr_delegate.h"
  13. #include "src/fallback/conv_bias/common.h"
  14. #include "src/fallback/conv_bias/conv1x1/algos.h"
  15. #include "src/fallback/conv_bias/conv1x1/conv1x1_dispatcher.h"
  16. #include "src/fallback/conv_bias/conv1x1/conv1x1_strategy.h"
  17. #include "src/fallback/conv_bias/opr_impl.h"
  18. #include "megdnn/opr_param_defs.h"
  19. #include "src/naive/convolution/helper.h"
  20. #if MEGDNN_X86
  21. #include "src/x86/conv_bias/postprocess_helper.h"
  22. #elif (MEGDNN_ARMV7 || MEGDNN_AARCH64)
  23. #include "src/arm_common/conv_bias/postprocess_helper.h"
  24. #else
  25. #include "src/common/postprocess_helper.h"
  26. #endif
  27. #include "midout.h"
  28. MIDOUT_DECL(megdnn_fallback_conv1x1)
  29. using namespace megdnn;
  30. using namespace fallback;
  31. #if MEGDNN_X86
  32. using namespace x86;
  33. #endif
  34. using namespace conv1x1;
  35. size_t ConvBiasImpl::AlgoConv1x1::get_oc_tile_size_heuristic(
  36. const NCBKernSizeParam& param) const {
  37. size_t OH = param.osz[0];
  38. size_t OW = param.osz[1];
  39. size_t OC = param.filter_meta.ocpg;
  40. if (OH * OW >= 56 * 56 || OC >= 64)
  41. return m_oc_block_size;
  42. size_t oc_block_size_one_thread = div_ceil(OC, param.nr_threads);
  43. return round_up<size_t>(oc_block_size_one_thread, 24);
  44. }
  45. WorkspaceBundle ConvBiasImpl::AlgoConv1x1::get_bundle_according_packmode(
  46. const NCBKernSizeParam& param) const {
  47. size_t OH = param.osz[0];
  48. size_t OW = param.osz[1];
  49. size_t compt_oc_block_size = get_oc_tile_size_heuristic(param);
  50. auto matmul_param =
  51. utils::get_matmul_kern_param(param, OH * OW, compt_oc_block_size);
  52. auto pack_mode = m_matmul_algo->packmode();
  53. if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::DEFAULT) {
  54. MIDOUT_BEGIN(megdnn_fallback_conv1x1,
  55. midout_iv("get_bundle_default"_hash)) {
  56. return Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::DEFAULT>()
  57. .get_bundle(param, matmul_param, m_matmul_algo,
  58. compt_oc_block_size);
  59. }
  60. MIDOUT_END();
  61. } else if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA) {
  62. MIDOUT_BEGIN(megdnn_fallback_conv1x1,
  63. midout_iv("get_bundle_only_packa"_hash)) {
  64. return Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA>()
  65. .get_bundle(param, matmul_param, m_matmul_algo,
  66. compt_oc_block_size);
  67. }
  68. MIDOUT_END();
  69. } else {
  70. MIDOUT_BEGIN(megdnn_fallback_conv1x1,
  71. midout_iv("get_bundle_no_pack"_hash)) {
  72. return Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::NO_PACK>()
  73. .get_bundle(param, matmul_param, m_matmul_algo,
  74. compt_oc_block_size);
  75. }
  76. MIDOUT_END();
  77. }
  78. return {nullptr, {}};
  79. }
  80. size_t ConvBiasImpl::AlgoConv1x1::get_workspace(
  81. const NCBKernSizeParam& param) const {
  82. return get_bundle_according_packmode(param).total_size_in_bytes();
  83. }
  84. SmallVector<ConvBiasImpl::NCBKern>
  85. ConvBiasImpl::AlgoConv1x1::get_kerns_according_packmode(
  86. const NCBKernSizeParam& param, bool weight_preprocess) const {
  87. size_t OH = param.osz[0];
  88. size_t OW = param.osz[1];
  89. size_t compt_oc_block_size = get_oc_tile_size_heuristic(param);
  90. auto pack_mode = m_matmul_algo->packmode();
  91. Conv1x1StrategyBase* conv1x1_strategy =
  92. Conv1x1Factory::make_conv1x1_strategy(param, pack_mode,
  93. param.filter_meta.format);
  94. auto matmul_param =
  95. utils::get_matmul_kern_param(param, OH * OW, compt_oc_block_size);
  96. WorkspaceBundle whole_bundle = get_bundle_according_packmode(param);
  97. //! NO_PACK not implement get_bundle
  98. WorkspaceBundle matmul_bundle = {nullptr, {}};
  99. if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::NO_PACK) {
  100. matmul_bundle = {nullptr,
  101. {0, 0, m_matmul_algo->get_workspace(matmul_param)}};
  102. } else {
  103. matmul_bundle = m_matmul_algo->get_bundle(matmul_param);
  104. }
  105. WorkspaceBundle thread_bundle = utils::get_thread_bundle(
  106. param, matmul_bundle.get_size(2), compt_oc_block_size);
  107. if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::DEFAULT) {
  108. MIDOUT_BEGIN(megdnn_fallback_conv1x1,
  109. midout_iv("get_kern_default"_hash)) {
  110. if (!weight_preprocess) {
  111. return Conv1x1Kerns<
  112. MatrixMulImpl::AlgoBase::PackMode::DEFAULT>()
  113. .get_kern(param, whole_bundle, matmul_bundle,
  114. thread_bundle, conv1x1_strategy,
  115. m_matmul_algo, compt_oc_block_size);
  116. } else {
  117. return Conv1x1Kerns<
  118. MatrixMulImpl::AlgoBase::PackMode::DEFAULT>()
  119. .get_kern_preprocess(param, whole_bundle, matmul_bundle,
  120. conv1x1_strategy, m_matmul_algo,
  121. compt_oc_block_size);
  122. }
  123. }
  124. MIDOUT_END();
  125. } else if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA) {
  126. MIDOUT_BEGIN(megdnn_fallback_conv1x1,
  127. midout_iv("get_kern_only_packa"_hash)) {
  128. if (!weight_preprocess) {
  129. return Conv1x1Kerns<
  130. MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA>()
  131. .get_kern(param, whole_bundle, matmul_bundle,
  132. thread_bundle, conv1x1_strategy,
  133. m_matmul_algo, compt_oc_block_size);
  134. } else {
  135. return Conv1x1Kerns<
  136. MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA>()
  137. .get_kern_preprocess(param, whole_bundle, matmul_bundle,
  138. conv1x1_strategy, m_matmul_algo,
  139. compt_oc_block_size);
  140. }
  141. }
  142. MIDOUT_END();
  143. } else {
  144. MIDOUT_BEGIN(megdnn_fallback_conv1x1,
  145. midout_iv("get_kern_no_pack"_hash)) {
  146. if (!weight_preprocess) {
  147. return Conv1x1Kerns<
  148. MatrixMulImpl::AlgoBase::PackMode::NO_PACK>()
  149. .get_kern(param, whole_bundle, matmul_bundle,
  150. thread_bundle, conv1x1_strategy,
  151. m_matmul_algo, compt_oc_block_size);
  152. } else {
  153. return Conv1x1Kerns<
  154. MatrixMulImpl::AlgoBase::PackMode::NO_PACK>()
  155. .get_kern_preprocess(param, whole_bundle, matmul_bundle,
  156. conv1x1_strategy, m_matmul_algo,
  157. compt_oc_block_size);
  158. }
  159. }
  160. MIDOUT_END();
  161. }
  162. }
  163. SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoConv1x1::dispatch_kerns(
  164. const NCBKernSizeParam& param) const {
  165. return get_kerns_according_packmode(param, false);
  166. }
  167. SmallVector<TensorLayout>
  168. ConvBiasImpl::AlgoConv1x1::deduce_preprocessed_filter_layout(
  169. const NCBKernSizeParam& param) const {
  170. MIDOUT_BEGIN(megdnn_fallback_conv1x1,
  171. midout_iv("deduce_preprocessed_filter_layout"_hash)) {
  172. WorkspaceBundle wb = get_bundle_according_packmode(param);
  173. size_t GROUP = param.filter_meta.group;
  174. SmallVector<TensorLayout> preprocessed_layouts;
  175. preprocessed_layouts.push_back(
  176. {{GROUP, wb.get_size(0)}, dtype::Int8()});
  177. return preprocessed_layouts;
  178. }
  179. MIDOUT_END();
  180. return {};
  181. }
  182. SmallVector<ConvBiasImpl::NCBKern>
  183. ConvBiasImpl::AlgoConv1x1::dispatch_preprocess_kerns(
  184. const NCBKernSizeParam& param) const {
  185. return get_kerns_according_packmode(param, true);
  186. }
  187. bool ConvBiasImpl::AlgoConv1x1::usable(const NCBKernSizeParam& param,
  188. AlgoSelectionStrategy) const {
  189. MIDOUT_BEGIN(megdnn_fallback_conv1x1, 0, 2) {
  190. size_t FH = param.filter_meta.spatial[0],
  191. FW = param.filter_meta.spatial[1];
  192. size_t PH = param.filter_meta.padding[0],
  193. PW = param.filter_meta.padding[1];
  194. size_t SH = param.filter_meta.stride[0],
  195. SW = param.filter_meta.stride[1];
  196. auto format = param.filter_meta.format;
  197. size_t OH = param.osz[0];
  198. size_t OW = param.osz[1];
  199. #if MEGDNN_AARCH64 || MEGDNN_ARMV7
  200. if (format != param::ConvBias::Format::NCHW &&
  201. format != param::ConvBias::Format::NCHW44 &&
  202. format != param::ConvBias::Format::NCHW44_DOT) {
  203. return false;
  204. }
  205. //! hybird mode is not support
  206. if (param.filter_meta.format == param::ConvBias::Format::NCHW44 ||
  207. param.filter_meta.format == param::ConvBias::Format::NCHW44_DOT) {
  208. if (param.filter_meta.icpg < 4_z || param.filter_meta.icpg == 1 ||
  209. param.filter_meta.ocpg == 1) {
  210. return false;
  211. }
  212. }
  213. #else //! x86 only support nchw mode
  214. if (format != param::ConvBias::Format::NCHW) {
  215. return false;
  216. }
  217. #endif
  218. //! param
  219. if (FH != 1 || FW != 1 || PH || PW || SH != 1 || SW != 1) {
  220. return false;
  221. }
  222. //! data type
  223. if (param.src_type.enumv() != param.filter_type.enumv() ||
  224. (param.src_type.enumv() != DTypeEnum::Int8 &&
  225. param.src_type.enumv() != DTypeEnum::QuantizedS8 &&
  226. param.src_type.enumv() != DTypeEnum::Quantized8Asymm &&
  227. #if !MEGDNN_DISABLE_FLOAT16
  228. param.src_type.enumv() != DTypeEnum::Float16 &&
  229. #endif
  230. param.src_type.enumv() != DTypeEnum::Float32)) {
  231. return false;
  232. }
  233. //! x86 disable Quntized8Asymm
  234. #if MEGDNN_X86
  235. if (param.src_type.enumv() == DTypeEnum::Quantized8Asymm) {
  236. return false;
  237. }
  238. #endif
  239. //! make sure 8x8x16 and 8x8x32 biasmode is nobias and nonlineMode
  240. //! is identity otherwise return false mean that 8x8x32 and 8x8x16
  241. //! not support PostProcess
  242. if (param.dst_type.enumv() == DTypeEnum::Int16 ||
  243. param.dst_type.enumv() == DTypeEnum::QuantizedS16 ||
  244. param.dst_type.enumv() == DTypeEnum::Int32 ||
  245. param.dst_type.enumv() == DTypeEnum::QuantizedS32) {
  246. if (param.nonlineMode != megdnn::NonlineMode::IDENTITY) {
  247. return false;
  248. }
  249. }
  250. MatrixMulImpl::KernSizeParam matmul_param =
  251. utils::get_matmul_kern_param(param, OH * OW,
  252. get_oc_tile_size_heuristic(param));
  253. bool matmul_usable = m_matmul_algo->usable(matmul_param);
  254. auto pack_mode = m_matmul_algo->packmode();
  255. bool strategy_usable = Conv1x1Factory::can_make_conv1x1_strategy(
  256. param, pack_mode, param.filter_meta.format);
  257. return matmul_usable && strategy_usable &&
  258. (param.filter_meta.dilation[0] ==
  259. param.filter_meta.dilation[1] &&
  260. param.filter_meta.dilation[0] == 1) &&
  261. param.compute_mode == param::ConvBias::ComputeMode::DEFAULT;
  262. }
  263. MIDOUT_END();
  264. return false;
  265. }
  266. bool ConvBiasImpl::AlgoConv1x1::is_preferred(
  267. const NCBKernSizeParam& param) const {
  268. size_t OH = param.osz[0];
  269. size_t OW = param.osz[1];
  270. if (OH * OW != 1) {
  271. return true;
  272. } else {
  273. #if (MEGDNN_ARMV7 || MEGDNN_AARCH64)
  274. if (param.src_type.enumv() == DTypeEnum::Int8 &&
  275. param.filter_type.enumv() == DTypeEnum::Int8 &&
  276. param.dst_type.enumv() == DTypeEnum::Int16) {
  277. return true;
  278. }
  279. #elif MEGDNN_X86
  280. size_t OC = param.filter_meta.ocpg;
  281. if (OC > 2 || param.src_type.enumv() == DTypeEnum::Float32)
  282. return true;
  283. #endif
  284. return false;
  285. }
  286. }
  287. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台