You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

opr_impl.cpp 3.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. /**
  2. * \file dnn/src/cuda/rotate/opr_impl.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include <cstring>
  12. #include "./opr_impl.h"
  13. #include "./rotate.cuh"
  14. #include "src/cuda/handle.h"
  15. #include "src/common/utils.h"
  16. #include "src/cuda/utils.h"
  17. namespace megdnn {
  18. namespace cuda {
  19. namespace rotate_intl {
  20. template <typename ctype>
  21. void rotate_exec(const ctype* src, ctype* dst, size_t N, size_t IH, size_t IW,
  22. size_t IC, size_t istride0, size_t istride1, size_t istride2,
  23. size_t OH, size_t OW, size_t OC, size_t ostride0,
  24. size_t ostride1, size_t ostride2, bool clockwise,
  25. cudaStream_t stream) {
  26. megdnn_assert(IC == OC);
  27. if (clockwise) {
  28. rotate::rotate<ctype, true>(src, dst, N, IH, IW, IC, istride0, istride1,
  29. istride2, OH, OW, ostride0, ostride1,
  30. ostride2, stream);
  31. } else {
  32. rotate::rotate<ctype, false>(src, dst, N, IH, IW, IC, istride0,
  33. istride1, istride2, OH, OW, ostride0,
  34. ostride1, ostride2, stream);
  35. }
  36. }
  37. } // namespace rotate_intl
  38. void RotateImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in dst,
  39. _megdnn_workspace workspace) {
  40. check_exec(src.layout, dst.layout, workspace.size);
  41. auto stream = cuda_stream(handle());
  42. //! src layout is the same as dst layout
  43. size_t N = src.layout.shape[0];
  44. size_t batch_size = 0;
  45. #define cb(DType) \
  46. if (src.layout.dtype.enumv() == DTypeTrait<DType>::enumv) { \
  47. using ctype = typename DTypeTrait<DType>::ctype; \
  48. ctype* src_ptr = src.ptr<ctype>() + curr_batch * src.layout.stride[0]; \
  49. ctype* dst_ptr = dst.ptr<ctype>() + curr_batch * dst.layout.stride[0]; \
  50. batch_size = std::min<size_t>(N - curr_batch, max_batch_x_channel); \
  51. rotate_intl::rotate_exec<ctype>( \
  52. src_ptr, dst_ptr, batch_size, src.layout.shape[1], \
  53. src.layout.shape[2], src.layout.shape[3], \
  54. src.layout.stride[0], src.layout.stride[1], \
  55. src.layout.stride[2], dst.layout.shape[1], \
  56. dst.layout.shape[2], dst.layout.shape[3], \
  57. dst.layout.stride[0], dst.layout.stride[1], \
  58. dst.layout.stride[2], param().clockwise, stream); \
  59. }
  60. size_t max_batch_x_channel = max_batch_x_channel_size();
  61. size_t curr_batch = 0;
  62. if (N <= max_batch_x_channel) {
  63. MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
  64. } else {
  65. while (curr_batch < N) {
  66. MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
  67. curr_batch += max_batch_x_channel;
  68. }
  69. }
  70. #undef cb
  71. }
  72. } // namespace cuda
  73. } // namespace megdnn
  74. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台