You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

persistent_cache.cpp 9.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. /**
  2. * \file src/core/impl/utils/persistent_cache.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "megbrain/utils/persistent_cache.h"
  12. #include "megbrain/comp_node_env.h"
  13. #include <cstdio>
  14. #include <cstring>
  15. #ifdef WIN32
  16. #define snprintf _snprintf
  17. #endif
  18. #if MGB_CUDA
  19. #include <cuda_runtime_api.h>
  20. #endif
  21. using namespace mgb;
  22. // ================= PersistentCache ======================
  23. std::shared_ptr<PersistentCache> PersistentCache::sm_impl =
  24. std::make_shared<InMemoryPersistentCache>();
  25. std::shared_ptr<PersistentCache> PersistentCache::set_impl(
  26. std::shared_ptr<PersistentCache> impl) {
  27. mgb_assert(impl);
  28. sm_impl.swap(impl);
  29. return impl;
  30. }
  31. std::string PersistentCache::make_category_from_comp_node(CompNode comp_node) {
  32. auto&& env = CompNodeEnv::from_comp_node(comp_node);
  33. switch (env.property().type) {
  34. #if MGB_CUDA
  35. case CompNode::DeviceType::CUDA: {
  36. int drv = -1, cuda_rt = -1;
  37. MGB_CUDA_CHECK(cudaDriverGetVersion(&drv));
  38. MGB_CUDA_CHECK(cudaRuntimeGetVersion(&cuda_rt));
  39. auto&& prop = env.cuda_env().device_prop;
  40. // note: we do not contain library versions such as cudnn here. They
  41. // are handled by opr impls in MegDNN
  42. return ssprintf("plat=cuda;dev=%s;cap=%d.%d,drv=%d;runtime=%d",
  43. prop.name, prop.major, prop.minor, drv, cuda_rt);
  44. break;
  45. }
  46. #endif
  47. #if MGB_ROCM
  48. case CompNode::DeviceType::ROCM: {
  49. int drv = -1, hip_rt = -1;
  50. MGB_ROCM_CHECK(hipDriverGetVersion(&drv));
  51. MGB_ROCM_CHECK(hipRuntimeGetVersion(&hip_rt));
  52. auto&& prop = env.rocm_env().device_prop;
  53. return ssprintf("plat=rocm;dev=%s;cap=%d.%d,drv=%d;runtime=%d",
  54. prop.name, prop.major, prop.minor, drv, hip_rt);
  55. break;
  56. }
  57. #endif
  58. case CompNode::DeviceType::CPU:
  59. return "plat=cpu";
  60. default:
  61. mgb_throw(MegBrainError,
  62. "unsupported comp node for persistent cache category");
  63. }
  64. }
  65. // ================= InMemoryPersistentCache ==================
  66. using Blob = PersistentCache::Blob;
  67. InMemoryPersistentCache::BlobStorage&
  68. InMemoryPersistentCache::BlobStorage::init_data_ref(const Blob& b) {
  69. data_refhold = std::make_unique<uint8_t[]>(b.size + 1);
  70. memcpy(data_refhold.get(), b.ptr, b.size);
  71. data_refhold.get()[b.size] = 0; // for C-string safety
  72. ptr = data_refhold.get();
  73. size = b.size;
  74. return *this;
  75. }
  76. InMemoryPersistentCache::BlobStorage&
  77. InMemoryPersistentCache::BlobStorage::init_hash() {
  78. hash = XXHash{}.update(ptr, size).digest();
  79. return *this;
  80. }
  81. bool InMemoryPersistentCache::BlobStorage::operator==(
  82. const BlobStorage& rhs) const {
  83. return size == rhs.size && !memcmp(ptr, rhs.ptr, size);
  84. }
  85. Maybe<Blob> InMemoryPersistentCache::get(const std::string& category,
  86. const Blob& key) {
  87. decltype(m_cache.begin()) iter0;
  88. {
  89. MGB_LOCK_GUARD(m_mtx);
  90. iter0 = m_cache.find(category);
  91. if (iter0 == m_cache.end())
  92. return None;
  93. }
  94. BlobStorage key_storage;
  95. key_storage.Blob::operator=(key);
  96. key_storage.init_hash();
  97. MGB_LOCK_GUARD(m_mtx);
  98. auto iter1 = iter0->second.find(key_storage);
  99. if (iter1 == iter0->second.end())
  100. return None;
  101. return iter1->second;
  102. }
  103. void InMemoryPersistentCache::put(const std::string& category, const Blob& key,
  104. const Blob& value) {
  105. BlobStorage key_storage;
  106. key_storage.init_data_ref(key).init_hash();
  107. MGB_LOCK_GUARD(m_mtx);
  108. auto size0 = m_cache.size();
  109. m_cache[category][std::move(key_storage)].init_data_ref(value);
  110. if (m_cache.size() > size0) {
  111. mgb_log_debug("new cache category: %s", category.c_str());
  112. }
  113. }
  114. // ================= AlgoChooserProfileCache ==================
  115. AlgoChooserProfileCache::AlgoChooserProfileCache(
  116. CompNode cn, const char *opr_type) {
  117. m_category = "profile:";
  118. m_category.append(PersistentCache::make_category_from_comp_node(cn));
  119. m_category.append(":");
  120. m_category.append(opr_type);
  121. }
  122. #define ENTRY_FMT ":%d;%lg;%zu:"
  123. Maybe<AlgoChooserProfileCache::Result>
  124. AlgoChooserProfileCache::get(const Key &key) {
  125. auto raw_buf = PersistentCache::inst().get(m_category, key.build_blob());
  126. if(!raw_buf.valid())
  127. return None;
  128. mgb_assert(raw_buf->size <= 1024 * 1024,
  129. "buf size too large, maybe corrupted data: %p %zu",
  130. raw_buf->ptr, raw_buf->size);
  131. auto buf = static_cast<const uint8_t*>(raw_buf->ptr),
  132. buf_end = buf + raw_buf->size;
  133. mgb_assert(buf && buf < buf_end,
  134. "PersistentCache returned invalid value: ptr=%p size=%zu",
  135. raw_buf->ptr, raw_buf->size);
  136. auto read_uint32 = [&]() {
  137. auto next = buf + sizeof(uint32_t);
  138. mgb_assert(next <= buf_end);
  139. auto ret = *reinterpret_cast<const uint32_t*>(buf);
  140. buf = next;
  141. return ret;
  142. };
  143. auto ret_size = read_uint32();
  144. mgb_assert(static_cast<ptrdiff_t>(ret_size) < buf_end - buf,
  145. "result size too large (%u), maybe corrupted data",
  146. ret_size);
  147. Result ret(ret_size);
  148. for (auto &&i: ret) {
  149. // read algo name
  150. auto size = read_uint32();
  151. i.algo.resize(size);
  152. mgb_assert(buf + size < buf_end);
  153. memcpy(&i.algo[0], buf, size);
  154. buf += size;
  155. auto entry_len = read_uint32();
  156. mgb_assert(buf + entry_len <= buf_end);
  157. auto nr = sscanf(reinterpret_cast<const char*>(buf), ENTRY_FMT,
  158. &i.attribute, &i.time, &i.workspace);
  159. mgb_assert(nr == 3);
  160. buf += entry_len;
  161. }
  162. mgb_assert(buf == buf_end);
  163. return ret;
  164. }
  165. void AlgoChooserProfileCache::put(const Key &key, Result &result) {
  166. mgb_assert(!result.empty());
  167. auto result_cmp = [](const ResultEntry &a, const ResultEntry &b) {
  168. return a.time < b.time ||
  169. (a.time == b.time && a.workspace < b.workspace);
  170. };
  171. small_sort(result.begin(), result.end(), result_cmp);
  172. // remove algos that run slower but use more workspace
  173. for (size_t i = 1; i < result.size(); ) {
  174. auto &&prev = result[i - 1];
  175. auto &&cur = result[i];
  176. if (prev.workspace <= cur.workspace &&
  177. prev.attribute == cur.attribute) {
  178. result.erase(result.begin() + i);
  179. } else {
  180. ++i;
  181. }
  182. }
  183. std::string val;
  184. val.reserve((sizeof(ResultEntry) - sizeof(std::string)) * 2 * result.size());
  185. auto write_uint32 = [&](uint32_t v) {
  186. val.append(reinterpret_cast<const char*>(&v), sizeof(v));
  187. };
  188. write_uint32(result.size());
  189. constexpr int SPR_SIZE = 100;
  190. for (auto &&i: result) {
  191. // write algo
  192. write_uint32(i.algo.size());
  193. auto pos = val.size();
  194. val.resize(pos + i.algo.size());
  195. memcpy(&val[pos], i.algo.data(), i.algo.size());
  196. // write others
  197. write_uint32(0);
  198. pos = val.size();
  199. val.resize(pos + SPR_SIZE);
  200. uint32_t nr = snprintf(&val[pos], SPR_SIZE, ENTRY_FMT, i.attribute,
  201. i.time, i.workspace);
  202. //! for memory boundary failed, snprintf ret do not contain \0
  203. nr += 1;
  204. mgb_assert(nr < SPR_SIZE);
  205. memcpy(&val[pos - sizeof(uint32_t)], &nr, sizeof(nr));
  206. val.resize(pos + nr);
  207. }
  208. PersistentCache::inst().put(m_category, key.build_blob(),
  209. {val.data(), val.size()});
  210. }
  211. PersistentCache::Blob AlgoChooserProfileCache::Key::build_blob() const {
  212. auto &&ret = m_blob_storage;
  213. if (!m_blob_storage.empty())
  214. return {ret.data(), ret.size()};
  215. ret.reserve(sizeof(TensorLayout) * 3 * m_inp_layouts_size + m_param_size);
  216. for (size_t i = 0; i < m_inp_layouts_size; ++ i) {
  217. auto &&ly = m_inp_layouts_ptr[i];
  218. for (size_t j = 0; j < ly.ndim; ++ j) {
  219. if (j)
  220. ret.push_back(',');
  221. ret.append(std::to_string(ly.shape[j]));
  222. }
  223. if (!ly.is_contiguous()) {
  224. ret.push_back(';');
  225. for (size_t j = 0; j < ly.ndim; ++ j) {
  226. if (j)
  227. ret.push_back(',');
  228. ret.append(std::to_string(ly.stride[j]));
  229. }
  230. }
  231. ret.push_back(';');
  232. ret.append(ly.dtype.name());
  233. ret.push_back('|');
  234. mgb_assert(ly.format.is_default() || (ly.format.is_lowbit_aligned() &&
  235. ly.dtype.is_low_bit()),
  236. "currently only default format is supported");
  237. }
  238. if (m_param_size) {
  239. ret.append(reinterpret_cast<const char*>(m_param), m_param_size);
  240. }
  241. return {ret.data(), ret.size()};
  242. }
  243. #undef ENGRY_FMT
  244. #ifdef WIN32
  245. #undef snprintf
  246. #endif
  247. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台