You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

group_conv.cpp 3.2 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. /**
  2. * \file dnn/src/cuda/convolution3d/forward/group_conv.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "./algo.h"
  12. using namespace megdnn;
  13. using namespace cuda;
  14. using namespace convolution3d;
  15. void Convolution3DForwardImpl::AlgoGroupConvGeneral::modify_size_args(
  16. Convolution3DForwardImpl::AlgoBase::SizeArgs &args,
  17. TensorLayout &src_pg, TensorLayout &dst_pg) {
  18. src_pg = *args.src_layout;
  19. dst_pg = *args.dst_layout;
  20. auto nr_grp = args.filter_meta.group;
  21. args.filter_meta.group = 1;
  22. size_t c_pos;
  23. if (args.filter_meta.format == Param::Format::NCDHW) {
  24. c_pos = 1;
  25. } else {
  26. megdnn_assert(args.filter_meta.format == Param::Format::NDHWC,
  27. "invalid conv format");
  28. c_pos = 4;
  29. }
  30. src_pg.shape[c_pos] /= nr_grp;
  31. dst_pg.shape[c_pos] /= nr_grp;
  32. args.src_layout = &src_pg;
  33. args.dst_layout = &dst_pg;
  34. }
  35. Convolution3DForwardImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral(
  36. AlgoBase *impl):
  37. m_impl{impl} {
  38. m_name = "group_conv3d:";
  39. m_name += impl->name();
  40. }
  41. bool Convolution3DForwardImpl::AlgoGroupConvGeneral::is_available(
  42. const SizeArgs &args) const {
  43. auto sub_args = args;
  44. TensorLayout src_pg, dst_pg;
  45. modify_size_args(sub_args, src_pg, dst_pg);
  46. return m_impl->is_available(sub_args);
  47. }
  48. size_t Convolution3DForwardImpl::AlgoGroupConvGeneral::get_workspace_in_bytes(
  49. const SizeArgs &args) const {
  50. auto sub_args = args;
  51. TensorLayout src_pg, dst_pg;
  52. modify_size_args(sub_args, src_pg, dst_pg);
  53. return m_impl->get_workspace_in_bytes(sub_args);
  54. }
  55. void Convolution3DForwardImpl::AlgoGroupConvGeneral::exec(
  56. const ExecArgs &args) const {
  57. auto sub_args = args;
  58. TensorND tsrc{*args.src_tensor}, tdst{*args.dst_tensor},
  59. tflt{*args.filter_tensor};
  60. modify_size_args(sub_args, tsrc.layout, tdst.layout);
  61. sub_args.src_tensor = &tsrc;
  62. sub_args.dst_tensor = &tdst;
  63. sub_args.filter_tensor = &tflt;
  64. size_t c_pos;
  65. if (args.filter_meta.format == Param::Format::NCDHW) {
  66. c_pos = 1;
  67. } else {
  68. megdnn_assert(args.filter_meta.format == Param::Format::NDHWC,
  69. "invalid conv format");
  70. c_pos = 4;
  71. }
  72. auto grp = args.filter_meta.group;
  73. auto &&fm = args.filter_meta;
  74. auto strd_src = tsrc.layout.stride[c_pos] * fm.icpg * tsrc.layout.dtype.size(),
  75. strd_dst = tdst.layout.stride[c_pos] * fm.ocpg * tdst.layout.dtype.size(),
  76. strd_flt = fm.icpg * fm.ocpg *
  77. fm.spatial[0] * fm.spatial[1] * fm.spatial[2] *
  78. tflt.layout.dtype.size();
  79. for (uint32_t g = 0; g < grp; ++ g) {
  80. m_impl->exec(sub_args);
  81. incr_voidp(tsrc.raw_ptr, strd_src);
  82. incr_voidp(tdst.raw_ptr, strd_dst);
  83. incr_voidp(tflt.raw_ptr, strd_flt);
  84. }
  85. }
  86. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台