You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

opr_impl.cpp 2.3 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. /**
  2. * \file dnn/src/cuda/checksum/opr_impl.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "./opr_impl.h"
  12. #include "./kern.cuh"
  13. #include "src/common/utils.h"
  14. #include "src/cuda/reduce_helper.cuh"
  15. #include <algorithm>
  16. using namespace megdnn;
  17. using namespace cuda;
  18. namespace {
  19. WorkspaceBundle get_wbundle(const TensorLayout& data) {
  20. size_t size_all = data.shape[0], size_ints = size_all / sizeof(uint32_t);
  21. size_t part1 = checksum::get_workspace_in_bytes(size_ints);
  22. size_t part2 = sizeof(ChecksumForward::Result::checksum);
  23. return {nullptr, {part1, part2}};
  24. }
  25. } // anonymous namespace
  26. size_t ChecksumForwardImpl::get_workspace_in_bytes(const TensorLayout& data) {
  27. auto wbundle = get_wbundle(data);
  28. return wbundle.total_size_in_bytes();
  29. }
  30. ChecksumForward::Result ChecksumForwardImpl::exec(
  31. _megdnn_tensor_in data, _megdnn_workspace workspace) {
  32. auto wbundle = get_wbundle(data.layout);
  33. wbundle.set(workspace.raw_ptr);
  34. Result result;
  35. memset(&result, 0, sizeof(result));
  36. check_exec(data.layout, workspace.size);
  37. auto stream = cuda_stream(handle());
  38. auto ptr = static_cast<uint8_t*>(data.raw_ptr());
  39. size_t size_all = data.layout.shape[0], size_ints = size_all / sizeof(uint32_t);
  40. auto last_val_size = std::min<size_t>(size_all, 4);
  41. cuda_check(cudaMemcpyAsync(
  42. &result.last_val, ptr + size_all - last_val_size, last_val_size,
  43. cudaMemcpyDeviceToHost, stream));
  44. if (size_ints) {
  45. checksum::calc(
  46. static_cast<uint32_t*>(wbundle.get(1)),
  47. static_cast<uint32_t*>(data.raw_ptr()),
  48. static_cast<uint32_t*>(wbundle.get(0)), size_ints, stream);
  49. cuda_check(cudaMemcpyAsync(
  50. &result.checksum, wbundle.get(1), sizeof(result.checksum),
  51. cudaMemcpyDeviceToHost, stream));
  52. }
  53. cuda_check(cudaStreamSynchronize(stream));
  54. return result;
  55. }
  56. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台