You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

tensor_sanity_check.cpp 4.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. /**
  2. * \file src/core/impl/imperative/tensor_sanity_check.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "megbrain/imperative/tensor_sanity_check.h"
  12. #include "./op_trait.h"
  13. namespace mgb {
  14. namespace imperative {
  15. TensorChecksumCalc::ChecksumResult TensorChecksumCalc::calc(TensorPtr ptr) {
  16. auto&& dt = ptr->dev_tensor();
  17. if (!dt.layout().total_nr_elems()) {
  18. static ChecksumResult empty_checksum;
  19. return empty_checksum;
  20. }
  21. auto span = dt.layout().span();
  22. megdnn::TensorND tensor;
  23. tensor.raw_ptr = dt.raw_ptr() + span.low_byte;
  24. tensor.layout.init_contiguous_stride({span.dist_byte()});
  25. tensor.layout.dtype = dtype::Byte();
  26. DeviceTensorStorage* workspace;
  27. {
  28. MGB_LOCK_GUARD(m_workspace_mtx);
  29. workspace = &m_workspace[std::this_thread::get_id()]
  30. .storage[ptr->comp_node()];
  31. }
  32. auto comp_node = ptr->comp_node();
  33. comp_node.activate();
  34. auto opr = opr::intl::get_megdnn_global_opr<megdnn::Checksum>(comp_node);
  35. auto workspace_reqsize = opr->get_workspace_in_bytes(tensor.layout);
  36. workspace->comp_node(ptr->comp_node()).ensure_size(workspace_reqsize);
  37. megdnn::Workspace mwk;
  38. if (workspace_reqsize)
  39. mwk = {workspace->ptr(), workspace_reqsize};
  40. return opr->exec(tensor, mwk);
  41. }
  42. class TensorSanityCheckImpl {
  43. public:
  44. std::vector<std::tuple<OpTrait*, std::unique_ptr<ApplyOnPhysicalTensor>>>
  45. hook_list;
  46. std::unordered_map<TensorPtr, TensorChecksumCalc::ChecksumResult>
  47. tensor2chksum; // TODO: may increase device memory overhead
  48. TensorSanityCheckImpl() {
  49. m_calc = std::make_unique<TensorChecksumCalc>();
  50. }
  51. bool check(TensorPtr p);
  52. private:
  53. std::unique_ptr<TensorChecksumCalc> m_calc;
  54. };
  55. bool TensorSanityCheckImpl::check(TensorPtr p) {
  56. auto&& it = tensor2chksum.find(p);
  57. auto&& chksum = m_calc->calc(p);
  58. if (it == tensor2chksum.end()) {
  59. tensor2chksum[p] = chksum;
  60. return true;
  61. }
  62. return it->second == chksum;
  63. }
  64. void TensorSanityCheck::enable() {
  65. CompNode::sync_all();
  66. OpTrait::for_each_trait([this](OpTrait& trait) {
  67. auto backup = std::make_unique<ApplyOnPhysicalTensor>(
  68. std::move(trait.apply_on_physical_tensor));
  69. trait.apply_on_physical_tensor = ApplyOnPhysicalTensor(
  70. [this, backup = backup.get()](
  71. const OpDef& def,
  72. const SmallVector<TensorPtr>& inputs) {
  73. for (auto&& i : inputs) {
  74. if (!m_checker->check(i)) {
  75. mgb_throw(TensorChecksumCalc::Error,
  76. "tensor modified before exec %s",
  77. print_op(def).c_str());
  78. }
  79. }
  80. auto output = (*backup)(def, inputs);
  81. for (auto&& i : output) {
  82. mgb_assert(m_checker->check(i));
  83. }
  84. for (auto&& i : inputs) {
  85. if (!m_checker->check(i)) {
  86. mgb_throw(TensorChecksumCalc::Error,
  87. "tensor modified after exec %s",
  88. print_op(def).c_str());
  89. }
  90. }
  91. return output;
  92. });
  93. m_checker->hook_list.push_back({&trait, std::move(backup)});
  94. });
  95. }
  96. void TensorSanityCheck::disable() {
  97. for (auto&& hook : m_checker->hook_list) {
  98. std::get<0>(hook)->apply_on_physical_tensor =
  99. std::move(*std::get<1>(hook));
  100. }
  101. m_checker->tensor2chksum.clear();
  102. m_checker->hook_list.clear();
  103. }
  104. TensorSanityCheck::TensorSanityCheck() {
  105. m_checker = std::make_unique<TensorSanityCheckImpl>();
  106. }
  107. TensorSanityCheck::~TensorSanityCheck () {
  108. }
  109. std::string TensorSanityCheck::print_op(const OpDef& def){
  110. auto* opr_attr = def.try_cast_final<const OprAttr>();
  111. if(opr_attr){
  112. return std::string("OprAttr:") + opr_attr->type;
  113. }
  114. return def.dyn_typeinfo()->name;
  115. }
  116. } // namespace imperative
  117. } // namespace mgb

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台