You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

algos.cpp 23 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. /**
  2. * \file dnn/src/fallback/conv_bias/algos.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "src/fallback/conv_bias/algos.h"
  12. #include "src/common/opr_delegate.h"
  13. #include "src/fallback/conv_bias/winograd/strategy.h"
  14. #include "src/naive/convolution/helper.h"
  15. #include "midout.h"
  16. using namespace megdnn;
  17. using namespace fallback;
  18. namespace {
  19. param::Convolution get_param_convolution(const param::ConvBias param) {
  20. param::Convolution ret{param.mode, param.pad_h,
  21. param.pad_w, param.stride_h,
  22. param.stride_w, param.dilate_h,
  23. param.dilate_w, param::Convolution::Sparse::DENSE,
  24. param.format};
  25. return ret;
  26. }
  27. TensorLayoutArray get_layouts(const param::ConvBias& param,
  28. const ConvBiasImpl::NCBKernSizeParam& p) {
  29. megdnn_assert(param.format == param::ConvBias::Format::NCHW);
  30. UNPACK_CONV_NCB_KERN_SIZES(p);
  31. MEGDNN_MARK_USED_VAR(SH);
  32. MEGDNN_MARK_USED_VAR(SW);
  33. MEGDNN_MARK_USED_VAR(PH);
  34. MEGDNN_MARK_USED_VAR(PW);
  35. MEGDNN_MARK_USED_VAR(OW);
  36. MEGDNN_MARK_USED_VAR(OH);
  37. MEGDNN_MARK_USED_VAR(N);
  38. TensorLayout src_layout({1, IC, IH, IW}, p.src_type);
  39. TensorLayout filter_layout({OC, IC, FH, FW}, p.filter_type);
  40. TensorLayout bias_layout{{}, p.bias_type};
  41. if (p.bias_mode == BiasMode::BROADCAST_CHANNEL_BIAS) {
  42. bias_layout = TensorLayout({1, OC, 1, 1}, p.bias_type);
  43. } else if (p.bias_mode == BiasMode::BIAS) {
  44. bias_layout = TensorLayout({1, OC, OH, OW}, p.bias_type);
  45. }
  46. TensorLayout dst_layout = TensorLayout({1, OC, OH, OW}, p.dst_type);
  47. return {src_layout, filter_layout, bias_layout, dst_layout};
  48. }
  49. void kern_default(param::ConvBias param, const ConvBiasImpl::NCBKernParam& p) {
  50. dt_byte* workspace_ptr = static_cast<dt_byte*>(p.workspace_ptr);
  51. auto filter_meta_ptr =
  52. reinterpret_cast<const ConvBiasForward::CanonizedFilterMeta*>(
  53. &p.filter_meta);
  54. auto filter_meta = *filter_meta_ptr;
  55. auto layouts = get_layouts(param, p);
  56. TensorND src{reinterpret_cast<dt_byte*>(const_cast<void*>(p.src_ptr)),
  57. layouts[0]};
  58. TensorND filter{const_cast<void*>(p.filter_ptr), layouts[1]};
  59. auto bias_ptr = reinterpret_cast<dt_byte*>(const_cast<void*>(p.bias_ptr));
  60. TensorND bias{bias_ptr, layouts[2]};
  61. TensorND dst{reinterpret_cast<dt_byte*>(const_cast<void*>(p.dst_ptr)),
  62. layouts[3]};
  63. auto sfb = dst;
  64. if (bias.layout.dtype.enumv() != dst.layout.dtype.enumv()) {
  65. // intermediate result
  66. sfb = TensorND{workspace_ptr,
  67. TensorLayout{dst.layout, bias.layout.dtype}};
  68. }
  69. #define DISPATCH_RAW(in_dt, bias_dt, out_dt, cmode, func) \
  70. else if (src.layout.dtype.enumv() == DTypeTrait<dtype::in_dt>::enumv && \
  71. filter.layout.dtype.enumv() == DTypeTrait<dtype::in_dt>::enumv && \
  72. (!bias.layout.dtype.valid() || \
  73. bias.layout.dtype.enumv() == \
  74. DTypeTrait<dtype::bias_dt>::enumv) && \
  75. sfb.layout.dtype.enumv() == DTypeTrait<dtype::out_dt>::enumv && \
  76. param.compute_mode == param::ConvBias::ComputeMode::cmode) { \
  77. func(src, filter, bias, sfb, workspace_ptr, filter_meta); \
  78. }
  79. #define DISPATCH(in_dt, out_dt) \
  80. DISPATCH_RAW(in_dt, out_dt, out_dt, DEFAULT, \
  81. (megdnn::naive::convolution::forward_bias< \
  82. DTypeTrait<dtype::in_dt>::ctype, \
  83. DTypeTrait<dtype::in_dt>::ctype, \
  84. DTypeTrait<dtype::out_dt>::ctype, \
  85. DTypeTrait<dtype::out_dt>::ctype>))
  86. if (0) {
  87. }
  88. DISPATCH(Float32, Float32)
  89. DISPATCH(Int8, Int16)
  90. DISPATCH(Int8, Int32)
  91. DISPATCH(QuantizedS8, QuantizedS32)
  92. DISPATCH(Quantized8Asymm, QuantizedS32)
  93. #if !MEGDNN_DISABLE_FLOAT16
  94. DISPATCH(Float16, Float16)
  95. DISPATCH_RAW(
  96. Float16, Float16, Float16, FLOAT32,
  97. (megdnn::naive::convolution::forward_bias<dt_float16, dt_float16,
  98. dt_float16, dt_float32>))
  99. #endif
  100. else {
  101. megdnn_throw(
  102. ssprintf("unsupported naive ConvBias(%s, %s, %s) -> %s",
  103. src.layout.dtype.name(), filter.layout.dtype.name(),
  104. bias.layout.dtype.name(), dst.layout.dtype.name()));
  105. }
  106. #undef DISPATCH
  107. #undef DISPATCH_RAW
  108. auto res = sfb;
  109. using NonlineMode = param::ConvBias::NonlineMode;
  110. switch (param.nonlineMode) {
  111. #define cb(_mode) \
  112. case NonlineMode::_mode: { \
  113. if (res.layout.dtype.category() != DTypeCategory::QUANTIZED) { \
  114. auto nonlinear = \
  115. inplace_cpu_handle()->create_operator<ElemwiseForward>(); \
  116. nonlinear->param().mode = Elemwise::Param::Mode::_mode; \
  117. nonlinear->exec({res}, dst); \
  118. } else { \
  119. auto nonlinear = inplace_cpu_handle() \
  120. ->create_operator<ElemwiseMultiType>(); \
  121. nonlinear->param().mode = \
  122. ElemwiseMultiType::Param::Mode::Q##_mode; \
  123. nonlinear->exec({res}, dst); \
  124. } \
  125. break; \
  126. }
  127. cb(RELU);
  128. cb(H_SWISH);
  129. #undef cb
  130. case NonlineMode::SIGMOID: {
  131. megdnn_assert(res.layout.dtype.category() !=
  132. DTypeCategory::QUANTIZED);
  133. auto nonlinear =
  134. inplace_cpu_handle()->create_operator<ElemwiseForward>();
  135. nonlinear->param().mode = Elemwise::Param::Mode::SIGMOID;
  136. nonlinear->exec({res}, res);
  137. if (res.raw_ptr != dst.raw_ptr) {
  138. inplace_cpu_handle()->create_operator<TypeCvt>()->exec(res,
  139. dst);
  140. }
  141. break;
  142. }
  143. case NonlineMode::IDENTITY: {
  144. if (res.raw_ptr != dst.raw_ptr) {
  145. inplace_cpu_handle()->create_operator<TypeCvt>()->exec(res,
  146. dst);
  147. }
  148. break;
  149. }
  150. default:
  151. megdnn_assert(false);
  152. }
  153. }
  154. } // namespace
  155. MIDOUT_DECL(megdnn_fallback_naive)
  156. /* ======================= AlgoNaive ======================== */
  157. bool ConvBiasImpl::AlgoNaive::usable(
  158. ConvBiasImpl* opr, const NCBKernSizeParam&,
  159. AlgoSelectionStrategy /*algo_selection_strategy*/) const {
  160. MIDOUT_BEGIN(megdnn_fallback_naive, 0) {
  161. return opr->param().format == param::ConvBias::Format::NCHW;
  162. }
  163. MIDOUT_END();
  164. return false;
  165. }
  166. size_t ConvBiasImpl::AlgoNaive::get_workspace(ConvBiasImpl* opr,
  167. const NCBKernSizeParam& p) const {
  168. MIDOUT_BEGIN(megdnn_fallback_naive, 1) {
  169. auto layouts = get_layouts(opr->param(), p);
  170. //! When group>1 or n>1, this algo will parallel by group and n
  171. size_t nr_threads = p.nr_threads;
  172. auto conv_opr =
  173. inplace_cpu_handle()->create_operator<ConvolutionForward>();
  174. conv_opr->param() = get_param_convolution(opr->param());
  175. if (p.dst_type.enumv() == DTypeEnum::QuantizedS8 ||
  176. p.dst_type.enumv() == DTypeEnum::Quantized8Asymm) {
  177. TensorLayout conv_dst_layout;
  178. conv_opr->deduce_layout(layouts[0], layouts[1], conv_dst_layout);
  179. WorkspaceBundle bundle(nullptr,
  180. {conv_dst_layout.span().dist_byte()});
  181. return bundle.total_size_in_bytes() * nr_threads;
  182. }
  183. return 0;
  184. }
  185. MIDOUT_END();
  186. return 0;
  187. }
  188. SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoNaive::dispatch_kerns(
  189. ConvBiasImpl* opr, const NCBKernSizeParam& p) const {
  190. param::ConvBias opr_param = opr->param();
  191. size_t workspace_size = get_workspace(opr, p);
  192. //! When group>1 or n>1, this algo will parallel by group and n
  193. size_t nr_threads = p.nr_threads;
  194. size_t GROUP = p.filter_meta.group;
  195. size_t N = p.n;
  196. size_t workspace_per_thread = workspace_size / nr_threads;
  197. auto kern = [opr_param, workspace_per_thread](
  198. const NCBKernParam& param,
  199. const NCBKernIndex& ncb_index) {
  200. MIDOUT_BEGIN(megdnn_fallback_naive, 2) {
  201. size_t group_id = ncb_index.ndrange_id[0];
  202. size_t batch_id = ncb_index.ndrange_id[1];
  203. size_t thread_id = ncb_index.thread_id;
  204. auto thread_param = param;
  205. thread_param.workspace_ptr = reinterpret_cast<void*>(
  206. reinterpret_cast<ptrdiff_t>(param.workspace_ptr) +
  207. thread_id * workspace_per_thread);
  208. thread_param.filter_ptr = param.filter<void>(group_id);
  209. thread_param.dst_ptr = param.dst<void>(batch_id, group_id);
  210. thread_param.src_ptr = param.src<void>(batch_id, group_id);
  211. thread_param.bias_ptr = param.bias<void>(batch_id, group_id);
  212. kern_default(opr_param, thread_param);
  213. }
  214. MIDOUT_END();
  215. };
  216. return {{kern, {GROUP, N, 1_z}}};
  217. }
  218. MIDOUT_DECL(megdnn_fallback_winograd)
  219. /* ======================= AlgoWinogradF32 ======================== */
  220. bool ConvBiasImpl::AlgoWinogradF32::usable(
  221. ConvBiasImpl* opr, const NCBKernSizeParam& param,
  222. AlgoSelectionStrategy /*algo_selection_strategy*/) const {
  223. MEGDNN_MARK_USED_VAR(param);
  224. MEGDNN_MARK_USED_VAR(opr);
  225. MIDOUT_BEGIN(megdnn_fallback_winograd, 1, 0) {
  226. using Strategy = fallback::winograd::winograd_2x3_1x1_f;
  227. Strategy strategy(param.src_type, param.filter_type, param.dst_type);
  228. auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>(
  229. strategy, UNIT_TILE_SIZE, param)
  230. .get_matmul_kern_param(param);
  231. return m_matmul_algo->usable(matmul_param) &&
  232. (opr->param().format == param::ConvBias::Format::NCHW ||
  233. (opr->param().format ==
  234. param::ConvBias::Format::NCHW_WINOGRAD &&
  235. opr->param().output_block_size == 2 &&
  236. param.winograd_matmul_format ==
  237. param::MatrixMul::Format::DEFAULT)) &&
  238. opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION &&
  239. (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] &&
  240. param.filter_meta.spatial[0] == 3) &&
  241. (param.filter_meta.stride[0] == param.filter_meta.stride[1] &&
  242. param.filter_meta.stride[0] == 1) &&
  243. (param.filter_meta.dilation[0] ==
  244. param.filter_meta.dilation[1] &&
  245. param.filter_meta.dilation[0] == 1) &&
  246. param.compute_mode == param::ConvBias::ComputeMode::DEFAULT &&
  247. param.src_type.enumv() == DTypeEnum::Float32;
  248. }
  249. MIDOUT_END();
  250. return false;
  251. }
  252. size_t ConvBiasImpl::AlgoWinogradF32::get_workspace(
  253. ConvBiasImpl*, const NCBKernSizeParam& p) const {
  254. MEGDNN_MARK_USED_VAR(p);
  255. MIDOUT_BEGIN(megdnn_fallback_winograd, 1, 1) {
  256. fallback::winograd::winograd_2x3_1x1_f strategy(
  257. p.src_type, p.filter_type, p.dst_type);
  258. return megdnn::winograd::ConvBias<
  259. fallback::winograd::winograd_2x3_1x1_f>(
  260. strategy, UNIT_TILE_SIZE, p)
  261. .get_workspace_size(p, m_matmul_algo);
  262. }
  263. MIDOUT_END();
  264. return 0;
  265. }
  266. SmallVector<ConvBiasImpl::NCBKern>
  267. ConvBiasImpl::AlgoWinogradF32::dispatch_kerns(
  268. ConvBiasImpl*, const NCBKernSizeParam& param) const {
  269. MEGDNN_MARK_USED_VAR(param);
  270. MIDOUT_BEGIN(megdnn_fallback_winograd, 1, 2) {
  271. fallback::winograd::winograd_2x3_1x1_f strategy(
  272. param.src_type, param.filter_type, param.dst_type);
  273. auto winograd_impl = megdnn::winograd::ConvBias<
  274. fallback::winograd::winograd_2x3_1x1_f>(strategy,
  275. UNIT_TILE_SIZE, param);
  276. return winograd_impl.get_kerns(param, m_matmul_algo);
  277. }
  278. MIDOUT_END();
  279. return {};
  280. }
  281. /* ======================= AlgoWinogradF32 4x4 ======================== */
  282. bool ConvBiasImpl::AlgoWinogradF32_4x4::usable(
  283. ConvBiasImpl* opr, const NCBKernSizeParam& param,
  284. AlgoSelectionStrategy /*algo_selection_strategy*/) const {
  285. MEGDNN_MARK_USED_VAR(param);
  286. MEGDNN_MARK_USED_VAR(opr);
  287. MIDOUT_BEGIN(megdnn_fallback_winograd, 2, 0) {
  288. if (param.filter_meta.icpg % 4 != 0 || param.filter_meta.ocpg % 4 != 0)
  289. return false;
  290. using Strategy = fallback::winograd::winograd_2x3_4x4_f;
  291. Strategy strategy(param.src_type, param.filter_type, param.dst_type);
  292. auto&& matmul_param =
  293. megdnn::winograd::ConvBias<Strategy,
  294. param::MatrixMul::Format::MK4>(
  295. strategy, UNIT_TILE_SIZE, param)
  296. .get_matmul_kern_param(param);
  297. return m_matmul_algo->usable(matmul_param) &&
  298. (opr->param().format == param::ConvBias::Format::NCHW ||
  299. (opr->param().format ==
  300. param::ConvBias::Format::NCHW_WINOGRAD &&
  301. opr->param().output_block_size == 2 &&
  302. param.winograd_matmul_format ==
  303. param::MatrixMul::Format::MK4)) &&
  304. opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION &&
  305. (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] &&
  306. param.filter_meta.spatial[0] == 3) &&
  307. (param.filter_meta.stride[0] == param.filter_meta.stride[1] &&
  308. param.filter_meta.stride[0] == 1) &&
  309. (param.filter_meta.dilation[0] ==
  310. param.filter_meta.dilation[1] &&
  311. param.filter_meta.dilation[0] == 1) &&
  312. param.compute_mode == param::ConvBias::ComputeMode::DEFAULT &&
  313. param.src_type.enumv() == DTypeEnum::Float32;
  314. }
  315. MIDOUT_END();
  316. return false;
  317. }
  318. size_t ConvBiasImpl::AlgoWinogradF32_4x4::get_workspace(
  319. ConvBiasImpl*, const NCBKernSizeParam& p) const {
  320. MEGDNN_MARK_USED_VAR(p);
  321. MIDOUT_BEGIN(megdnn_fallback_winograd, 2, 1) {
  322. fallback::winograd::winograd_2x3_4x4_f strategy(
  323. p.src_type, p.filter_type, p.dst_type);
  324. return megdnn::winograd::ConvBias<
  325. fallback::winograd::winograd_2x3_4x4_f,
  326. param::MatrixMul::Format::MK4>(strategy, UNIT_TILE_SIZE,
  327. p)
  328. .get_workspace_size(p, m_matmul_algo);
  329. }
  330. MIDOUT_END();
  331. return 0;
  332. }
  333. SmallVector<ConvBiasImpl::NCBKern>
  334. ConvBiasImpl::AlgoWinogradF32_4x4::dispatch_kerns(
  335. ConvBiasImpl*, const NCBKernSizeParam& param) const {
  336. MEGDNN_MARK_USED_VAR(param);
  337. MIDOUT_BEGIN(megdnn_fallback_winograd, 2, 2) {
  338. fallback::winograd::winograd_2x3_4x4_f strategy(
  339. param.src_type, param.filter_type, param.dst_type);
  340. auto winograd_impl = megdnn::winograd::ConvBias<
  341. fallback::winograd::winograd_2x3_4x4_f,
  342. param::MatrixMul::Format::MK4>(strategy, UNIT_TILE_SIZE, param);
  343. return winograd_impl.get_kerns(param, m_matmul_algo);
  344. }
  345. MIDOUT_END();
  346. return {};
  347. }
  348. /* ======================= AlgoWinogradQS8 ======================== */
  349. bool ConvBiasImpl::AlgoWinogradQS8::usable(
  350. ConvBiasImpl* opr, const NCBKernSizeParam& param,
  351. AlgoSelectionStrategy /*algo_selection_strategy*/) const {
  352. MEGDNN_MARK_USED_VAR(param);
  353. MEGDNN_MARK_USED_VAR(opr);
  354. MIDOUT_BEGIN(megdnn_fallback_winograd, 3, 0) {
  355. using Strategy = fallback::winograd::winograd_2x3_1x1_qs8;
  356. Strategy strategy(param.src_type, param.filter_type, param.dst_type);
  357. auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>(
  358. strategy, UNIT_TILE_SIZE, param)
  359. .get_matmul_kern_param(param);
  360. return m_matmul_algo->usable(matmul_param) &&
  361. (opr->param().format == param::ConvBias::Format::NCHW ||
  362. (opr->param().format ==
  363. param::ConvBias::Format::NCHW_WINOGRAD &&
  364. opr->param().output_block_size == 2 &&
  365. param.winograd_matmul_format ==
  366. param::MatrixMul::Format::DEFAULT)) &&
  367. opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION &&
  368. (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] &&
  369. param.filter_meta.spatial[0] == 3) &&
  370. (param.filter_meta.stride[0] == param.filter_meta.stride[1] &&
  371. param.filter_meta.stride[0] == 1) &&
  372. (param.filter_meta.dilation[0] ==
  373. param.filter_meta.dilation[1] &&
  374. param.filter_meta.dilation[0] == 1) &&
  375. param.compute_mode == param::ConvBias::ComputeMode::DEFAULT &&
  376. param.src_type.enumv() == DTypeEnum::QuantizedS8;
  377. }
  378. MIDOUT_END();
  379. return false;
  380. }
  381. size_t ConvBiasImpl::AlgoWinogradQS8::get_workspace(
  382. ConvBiasImpl*, const NCBKernSizeParam& p) const {
  383. MEGDNN_MARK_USED_VAR(p);
  384. MIDOUT_BEGIN(megdnn_fallback_winograd, 3, 1) {
  385. fallback::winograd::winograd_2x3_1x1_qs8 strategy(
  386. p.src_type, p.filter_type, p.dst_type);
  387. return megdnn::winograd::ConvBias<
  388. fallback::winograd::winograd_2x3_1x1_qs8>(
  389. strategy, UNIT_TILE_SIZE, p)
  390. .get_workspace_size(p, m_matmul_algo);
  391. }
  392. MIDOUT_END();
  393. return 0;
  394. }
  395. SmallVector<ConvBiasImpl::NCBKern>
  396. ConvBiasImpl::AlgoWinogradQS8::dispatch_kerns(
  397. ConvBiasImpl*, const NCBKernSizeParam& param) const {
  398. MEGDNN_MARK_USED_VAR(param);
  399. MIDOUT_BEGIN(megdnn_fallback_winograd, 3, 2) {
  400. fallback::winograd::winograd_2x3_1x1_qs8 strategy(
  401. param.src_type, param.filter_type, param.dst_type);
  402. auto winograd_impl = megdnn::winograd::ConvBias<
  403. fallback::winograd::winograd_2x3_1x1_qs8>(
  404. strategy, UNIT_TILE_SIZE, param);
  405. return winograd_impl.get_kerns(param, m_matmul_algo);
  406. }
  407. MIDOUT_END();
  408. return {};
  409. }
  410. /* ======================= AlgoWinogradQS8 8x8 ======================== */
  411. bool ConvBiasImpl::AlgoWinogradQS8_8x8::usable(
  412. ConvBiasImpl* opr, const NCBKernSizeParam& param,
  413. AlgoSelectionStrategy /*algo_selection_strategy*/) const {
  414. MEGDNN_MARK_USED_VAR(param);
  415. MEGDNN_MARK_USED_VAR(opr);
  416. MIDOUT_BEGIN(megdnn_fallback_winograd, 4, 0) {
  417. if (param.filter_meta.icpg % 8 != 0 || param.filter_meta.ocpg % 8 != 0)
  418. return false;
  419. using Strategy = fallback::winograd::winograd_2x3_8x8_qs8;
  420. Strategy strategy(param.src_type, param.filter_type, param.dst_type);
  421. auto&& matmul_param =
  422. megdnn::winograd::ConvBias<Strategy,
  423. param::MatrixMul::Format::MK8>(
  424. strategy, UNIT_TILE_SIZE, param)
  425. .get_matmul_kern_param(param);
  426. return m_matmul_algo->usable(matmul_param) &&
  427. (opr->param().format == param::ConvBias::Format::NCHW ||
  428. (opr->param().format ==
  429. param::ConvBias::Format::NCHW_WINOGRAD &&
  430. opr->param().output_block_size == 2 &&
  431. param.winograd_matmul_format ==
  432. param::MatrixMul::Format::MK8)) &&
  433. opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION &&
  434. (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] &&
  435. param.filter_meta.spatial[0] == 3) &&
  436. (param.filter_meta.stride[0] == param.filter_meta.stride[1] &&
  437. param.filter_meta.stride[0] == 1) &&
  438. (param.filter_meta.dilation[0] ==
  439. param.filter_meta.dilation[1] &&
  440. param.filter_meta.dilation[0] == 1) &&
  441. param.compute_mode == param::ConvBias::ComputeMode::DEFAULT &&
  442. param.src_type.enumv() == DTypeEnum::QuantizedS8;
  443. }
  444. MIDOUT_END();
  445. return false;
  446. }
  447. size_t ConvBiasImpl::AlgoWinogradQS8_8x8::get_workspace(
  448. ConvBiasImpl*, const NCBKernSizeParam& p) const {
  449. MEGDNN_MARK_USED_VAR(p);
  450. MIDOUT_BEGIN(megdnn_fallback_winograd, 4, 1) {
  451. fallback::winograd::winograd_2x3_8x8_qs8 strategy(
  452. p.src_type, p.filter_type, p.dst_type);
  453. return megdnn::winograd::ConvBias<
  454. fallback::winograd::winograd_2x3_8x8_qs8,
  455. param::MatrixMul::Format::MK8>(strategy, UNIT_TILE_SIZE,
  456. p)
  457. .get_workspace_size(p, m_matmul_algo);
  458. }
  459. MIDOUT_END();
  460. return 0;
  461. }
  462. SmallVector<ConvBiasImpl::NCBKern>
  463. ConvBiasImpl::AlgoWinogradQS8_8x8::dispatch_kerns(
  464. ConvBiasImpl*, const NCBKernSizeParam& param) const {
  465. MEGDNN_MARK_USED_VAR(param);
  466. MIDOUT_BEGIN(megdnn_fallback_winograd, 4, 2) {
  467. fallback::winograd::winograd_2x3_8x8_qs8 strategy(
  468. param.src_type, param.filter_type, param.dst_type);
  469. auto winograd_impl = megdnn::winograd::ConvBias<
  470. fallback::winograd::winograd_2x3_8x8_qs8,
  471. param::MatrixMul::Format::MK8>(strategy, UNIT_TILE_SIZE, param);
  472. return winograd_impl.get_kerns(param, m_matmul_algo);
  473. }
  474. MIDOUT_END();
  475. return {};
  476. }
  477. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台