You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

physical_tensor.cpp 7.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. /**
  2. * \file imperative/src/impl/physical_tensor.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "megbrain/imperative.h"
  12. #include "megbrain/imperative/blob_manager.h"
  13. #include "megbrain/imperative/profiler.h"
  14. #include "./async_releaser.h"
  15. #include "./event_pool.h"
  16. #include "./profiler/events.h"
  17. #include <mutex>
  18. namespace mgb {
  19. namespace imperative {
  20. namespace {
  21. class CompNodeSyncManager : public CompNodeDepedentObject {
  22. ThinHashMap<Blob*, std::unique_ptr<CompNode::Event>> m_blob2event;
  23. std::mutex m_mtx;
  24. public:
  25. #if MGB_CUDA && defined(WIN32)
  26. //! FIXME: windows cuda driver shutdown before call atexit function even
  27. //! register atexit function after init cuda driver! as a workround
  28. //! recovery resource by OS temporarily, may need remove this after
  29. //! upgrade cuda runtime
  30. static bool is_into_atexit;
  31. #endif
  32. std::shared_ptr<void> on_comp_node_finalize() override {
  33. MGB_LOCK_GUARD(m_mtx);
  34. m_blob2event.clear();
  35. return {};
  36. }
  37. static CompNodeSyncManager& inst() {
  38. static CompNodeSyncManager* sl_inst = new CompNodeSyncManager();
  39. #if MGB_CUDA && defined(WIN32)
  40. //! FIXME: windows cuda driver shutdown before call atexit function even
  41. //! register atexit function after init cuda driver! as a workround
  42. //! recovery resource by OS temporarily, may need remove this after
  43. //! upgrade cuda runtime
  44. if (!is_into_atexit) {
  45. auto err = atexit([] { is_into_atexit = true; });
  46. mgb_assert(!err, "failed to register atexit function");
  47. }
  48. #endif
  49. return *sl_inst;
  50. }
  51. CompNode::Event* get_or_create_event(Blob* blob) {
  52. mgb_assert(!is_finalized());
  53. MGB_LOCK_GUARD(m_mtx);
  54. auto&& e = m_blob2event[blob];
  55. if (!e) {
  56. e = blob->comp_node().create_event();
  57. }
  58. return e.get();
  59. }
  60. void remove(Blob* blob) {
  61. MGB_LOCK_GUARD(m_mtx);
  62. m_blob2event.erase(blob);
  63. }
  64. };
  65. #if MGB_CUDA && defined(WIN32)
  66. //! FIXME: windows cuda driver shutdown before call atexit function even
  67. //! register atexit function after init cuda driver! as a workround
  68. //! recovery resource by OS temporarily, may need remove this after
  69. //! upgrade cuda runtime
  70. bool CompNodeSyncManager::is_into_atexit = false;
  71. #endif
  72. } // namespace
  73. void EventDeleter::operator()(CompNode::Event* event) {
  74. EventPool::without_timer().free(event);
  75. }
  76. namespace {
  77. std::atomic_uint64_t next_blob_id = 0;
  78. }
  79. Blob::Blob(const DeviceTensorStorage& s)
  80. : m_comp_node{s.comp_node()},
  81. m_storage{s.raw_storage()},
  82. m_size{s.size() + s.offset()} {
  83. m_id = next_blob_id++;
  84. BlobManager::inst()->register_blob(this);
  85. }
  86. Blob::Blob(CompNode cn, size_t sz) : m_comp_node{cn}, m_storage{}, m_size{sz} {
  87. m_id = next_blob_id++;
  88. BlobManager::inst()->register_blob(this);
  89. }
  90. Blob::~Blob() {
  91. BlobManager::inst()->unregister_blob(this);
  92. #if MGB_CUDA && defined(WIN32)
  93. //! FIXME: windows cuda driver shutdown before call atexit function even
  94. //! register atexit function after init cuda driver! as a workround
  95. //! recovery resource by OS temporarily, may need remove this after
  96. //! upgrade cuda runtime
  97. if (CompNodeSyncManager::is_into_atexit)
  98. return;
  99. #endif
  100. CompNodeSyncManager::inst().remove(this);
  101. }
  102. const Blob::RawStorage& Blob::storage() {
  103. if (!m_storage) {
  104. BlobManager::inst()->alloc_with_defrag(this, m_size);
  105. }
  106. return m_storage;
  107. }
  108. Tensor::Tensor(
  109. BlobPtr blob, const TensorLayout& layout, size_t offset, const HostTensorND& hv)
  110. : m_layout(layout), m_blob(std::move(blob)), m_offset(offset), m_value(hv) {}
  111. Tensor::Tensor(const HostTensorND& hv) : Tensor(hv.layout(), hv.comp_node()) {
  112. constexpr int size_threshold = TensorShape::MAX_NDIM;
  113. if (hv.layout().total_nr_elems() <= size_threshold) {
  114. m_value = hv;
  115. }
  116. MGB_RECORD_EVENT(
  117. profiler::HostToDeviceEvent, hv.layout(), hv.comp_node(), hv.raw_ptr(),
  118. dev_tensor().raw_ptr());
  119. dev_tensor().copy_from_fixlayout(hv);
  120. // even though hv is saved in m_value, Tensor itself could be
  121. // released before copy completes
  122. MGB_RECORD_EVENT(
  123. profiler::HostToDeviceFinishEvent, hv.layout(), hv.comp_node(),
  124. hv.raw_ptr(), dev_tensor().raw_ptr());
  125. AsyncReleaser::inst()->add(hv);
  126. }
  127. Tensor::Tensor(const DeviceTensorND& dv, const HostTensorND& hv) {
  128. if (!hv.empty()) {
  129. mgb_assert(dv.comp_node() == hv.comp_node());
  130. mgb_assert(dv.dtype() == hv.dtype());
  131. mgb_assert(dv.shape().eq_shape(hv.shape()));
  132. m_value = hv;
  133. }
  134. m_layout = dv.layout();
  135. m_blob = Blob::make(dv.storage());
  136. m_offset = dv.storage().offset();
  137. }
  138. Tensor::Tensor(const TensorLayout& layout, const CompNode& cn)
  139. : m_layout{layout},
  140. m_blob{Blob::make(cn, layout.span().dist_byte())},
  141. m_offset{0} {}
  142. Tensor::Tensor(const BlobPtr blob, const size_t offset, const TensorLayout& layout)
  143. : m_layout{layout}, m_blob{blob}, m_offset{offset} {}
  144. TensorPtr Tensor::make(const HostTensorND& hv) {
  145. auto&& blob = MultiCNConstTensorCache::inst().lookup(hv);
  146. if (blob) {
  147. return make(std::forward<decltype(blob)>(blob), hv.layout(), hv);
  148. }
  149. return std::make_shared<Tensor>(hv);
  150. }
  151. DeviceTensorND Tensor::dev_tensor() {
  152. mgb_assert(m_blob, "uninitialized tensor.");
  153. DeviceTensorStorage storage;
  154. storage.reset(m_blob->comp_node(), m_blob->size(), m_blob->storage());
  155. storage = storage.sub(m_offset);
  156. DeviceTensorND ret;
  157. ret.reset(storage, m_layout);
  158. return ret;
  159. }
  160. void Tensor::fetch_value() {
  161. MGB_LOCK_GUARD(m_mtx);
  162. if (m_value.empty()) {
  163. m_value.copy_from(dev_tensor());
  164. m_value_ready.reset(EventPool::without_timer().alloc(comp_node()));
  165. m_value_ready->record();
  166. }
  167. }
  168. bool Tensor::value_fetched() {
  169. MGB_LOCK_GUARD(m_mtx);
  170. return m_value.layout().ndim != 0;
  171. }
  172. const HostTensorND& Tensor::get_value() {
  173. fetch_value();
  174. if (m_value_ready) {
  175. m_value_ready->host_wait();
  176. }
  177. return m_value;
  178. }
  179. const HostTensorND* Tensor::try_get_value() {
  180. MGB_LOCK_GUARD(m_mtx);
  181. if (!m_value.empty() && (!m_value_ready || m_value_ready->finished())) {
  182. return &m_value;
  183. }
  184. return nullptr;
  185. }
  186. TensorPtr Tensor::make_scalar(DTypeScalar value, CompNode cn) {
  187. HostTensorND hv{cn, value.dtype()};
  188. hv.resize({1});
  189. memcpy(hv.raw_ptr(), value.storage(), value.dtype().size(1));
  190. return make(hv);
  191. }
  192. TensorPtr Tensor::sub(size_t offset, TensorShape shape) {
  193. TensorLayout layout(shape, m_layout.dtype);
  194. return Tensor::make(m_blob, offset + m_offset, layout);
  195. }
  196. void Tensor::add_release_callback(CompNode cn) {
  197. AsyncReleaser::inst()->add(m_blob, cn);
  198. }
  199. CompNode::Event* Tensor::get_or_create_event() {
  200. auto e = CompNodeSyncManager::inst().get_or_create_event(m_blob.get());
  201. e->record();
  202. return e;
  203. }
  204. void Tensor::static_initialize() {
  205. EventPool::with_timer();
  206. EventPool::without_timer();
  207. AsyncReleaser::inst();
  208. CompNodeSyncManager::inst();
  209. MultiCNConstTensorCache::inst();
  210. // clean all CompNodeDepedentObjects
  211. mgb_assert(!atexit(CompNode::finalize), "atexit register failed");
  212. }
  213. } // namespace imperative
  214. } // namespace mgb
  215. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}