You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

bfloat16.cpp 4.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. /**
  2. * \file src/cuda/convolution/backward_data/bfloat16.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "./algo.h"
  12. #include "src/cuda/convolution/chanwise/kern.cuh"
  13. #include "src/cuda/utils.h"
  14. using namespace megdnn;
  15. using namespace cuda;
  16. using namespace convolution;
  17. ConvolutionBackwardDataImpl::AlgoBFloat16::AlgoBFloat16(
  18. ConvolutionBackwardDataImpl::AlgoBase* algorithm)
  19. : m_algorithm(algorithm) {
  20. megdnn_assert_internal(algorithm);
  21. m_name = ssprintf("CONVOLUTION_BACKWARD_DATD_BFLOAT16:%s",
  22. m_algorithm->name());
  23. }
  24. ConvolutionBackwardDataImpl::AlgoBase::SizeArgs
  25. ConvolutionBackwardDataImpl::AlgoBFloat16::float_args(
  26. const SizeArgs& args, ConvolutionBackwardDataImpl* opr,
  27. TensorLayout& ffilter, TensorLayout& fdiff, TensorLayout& fgrad) const {
  28. ffilter = *args.filter_layout;
  29. fdiff = *args.diff_layout;
  30. fgrad = *args.grad_layout;
  31. auto change_dtype = [](TensorLayout& layout) {
  32. if (layout.dtype == dtype::BFloat16()) {
  33. layout.dtype = dtype::Float32();
  34. }
  35. };
  36. change_dtype(ffilter);
  37. change_dtype(fdiff);
  38. change_dtype(fgrad);
  39. opr->param() = args.opr->param();
  40. opr->param().compute_mode = Param::ComputeMode::DEFAULT;
  41. opr->execution_policy() = {m_algorithm->info()};
  42. return SizeArgs(opr, ffilter, fdiff, fgrad);
  43. }
  44. bool ConvolutionBackwardDataImpl::AlgoBFloat16::is_available(
  45. const SizeArgs& args) const {
  46. TensorLayout ffilter, fdiff, fgrad;
  47. auto conv_back_data_opr =
  48. args.handle->create_operator<ConvolutionBackwardData>();
  49. SizeArgs fargs = float_args(
  50. args,
  51. static_cast<ConvolutionBackwardDataImpl*>(conv_back_data_opr.get()),
  52. ffilter, fdiff, fgrad);
  53. return args.diff_layout->dtype == args.filter_layout->dtype &&
  54. args.diff_layout->dtype == dtype::BFloat16() &&
  55. m_algorithm->is_available(fargs);
  56. }
  57. WorkspaceBundle ConvolutionBackwardDataImpl::AlgoBFloat16::get_workspace_bundle(
  58. void* ptr, const SizeArgs& args) const {
  59. TensorLayout ffilter, fdiff, fgrad;
  60. auto conv_back_data_opr =
  61. args.handle->create_operator<ConvolutionBackwardData>();
  62. SizeArgs fargs = float_args(
  63. args,
  64. static_cast<ConvolutionBackwardDataImpl*>(conv_back_data_opr.get()),
  65. ffilter, fdiff, fgrad);
  66. SmallVector<size_t> sizes;
  67. auto get_workspace = [&sizes](const TensorLayout& src,
  68. const TensorLayout& dst) {
  69. if (src.dtype != dst.dtype) {
  70. sizes.push_back(dst.span().dist_byte());
  71. }
  72. };
  73. get_workspace(*args.filter_layout, ffilter);
  74. get_workspace(*args.diff_layout, fdiff);
  75. get_workspace(*args.grad_layout, fgrad);
  76. sizes.push_back(m_algorithm->get_workspace_in_bytes(fargs));
  77. return {ptr, std::move(sizes)};
  78. }
  79. size_t ConvolutionBackwardDataImpl::AlgoBFloat16::get_workspace_in_bytes(
  80. const SizeArgs& args) const {
  81. return get_workspace_bundle(nullptr, args).total_size_in_bytes();
  82. }
  83. void ConvolutionBackwardDataImpl::AlgoBFloat16::exec(
  84. const ExecArgs& args) const {
  85. TensorND ffilter_tensor = *args.filter_tensor;
  86. TensorND fdiff_tensor = *args.diff_tensor;
  87. TensorND fgrad_tensor = *args.grad_tensor;
  88. auto bundle = get_workspace_bundle(args.workspace.raw_ptr, args);
  89. CompTypeCvter<dtype::BFloat16, dtype::Float32> cvter(args.handle, &bundle);
  90. {
  91. cvter.src_to_comp_type(*args.filter_tensor, ffilter_tensor)
  92. .src_to_comp_type(*args.diff_tensor, fdiff_tensor)
  93. .src_to_comp_type(*args.grad_tensor, fgrad_tensor);
  94. }
  95. {
  96. auto conv_back_data_opr =
  97. args.handle->create_operator<ConvolutionBackwardData>();
  98. conv_back_data_opr->param() = args.opr->param();
  99. conv_back_data_opr->param().compute_mode = Param::ComputeMode::DEFAULT;
  100. conv_back_data_opr->execution_policy() = {m_algorithm->info()};
  101. conv_back_data_opr->exec(ffilter_tensor, fdiff_tensor, fgrad_tensor,
  102. cvter.workspace());
  103. }
  104. { cvter.comp_to_dst_type(fgrad_tensor, *args.grad_tensor); }
  105. }
  106. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台