From d9c4ef59febc7bbe139361fe7961ce78dec8a80e Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Thu, 24 Mar 2022 19:05:33 +0800 Subject: [PATCH] perf(imperative): using simple hash key in heuristic cache GitOrigin-RevId: 6fddd612e7cc193a140a401fd2a62a98a5056b1d --- dnn/include/megdnn/heuristic_cache.h | 20 ++-- dnn/src/common/heuristic_cache.cpp | 107 ++++++++++++-------- imperative/src/impl/algo_chooser.h | 5 +- imperative/src/impl/ops/convolution.cpp | 23 ++--- src/opr/impl/search_policy/algo_chooser.cpp | 4 +- 5 files changed, 91 insertions(+), 68 deletions(-) diff --git a/dnn/include/megdnn/heuristic_cache.h b/dnn/include/megdnn/heuristic_cache.h index f8daf65a..fbf24f86 100644 --- a/dnn/include/megdnn/heuristic_cache.h +++ b/dnn/include/megdnn/heuristic_cache.h @@ -29,15 +29,12 @@ public: MGE_WIN_DECLSPEC_FUC static HeuristicCache& instance(); struct KeyStorage { - std::string category; - std::string input; + size_t k1, k2; - bool operator==(const KeyStorage& k) const { - return category == k.category && input == k.input; - } + bool operator==(const KeyStorage& k) const { return k1 == k.k1 && k2 == k.k2; } }; - class Key { + struct Key { Handle* m_handle; uint32_t m_opr_type; const TensorLayout* m_inp_layouts_ptr; @@ -45,8 +42,7 @@ public: const void* m_param_ptr; size_t m_param_size; - mutable std::string m_category; - mutable std::string m_input; + mutable SmallVector m_buf; public: Key(Handle* opr_handle, Algorithm::OprType opr_type, @@ -65,6 +61,10 @@ public: struct Result { ExecutionPolicy policy; size_t workspace; + + // for cache collision + SmallVector m_buf; + SmallVector m_param_buf; }; MGE_WIN_DECLSPEC_FUC void put(const Key& key, Result& result); @@ -76,8 +76,8 @@ public: private: struct Hash { size_t operator()(const KeyStorage& k) const { - size_t h1 = std::hash{}(k.category); - size_t h2 = std::hash{}(k.input); + size_t h1 = k.k1; + size_t h2 = k.k2; h1 ^= h2 + 0x9e3779b9 + (h1 << 6) + (h1 >> 2); return h1; } diff --git a/dnn/src/common/heuristic_cache.cpp b/dnn/src/common/heuristic_cache.cpp index 0d6296bf..189b289f 100644 --- a/dnn/src/common/heuristic_cache.cpp +++ b/dnn/src/common/heuristic_cache.cpp @@ -11,6 +11,8 @@ */ #include "megdnn/heuristic_cache.h" +#include "megdnn/tensor_format.h" +#include "src/common/hash_ct.h" #include "src/common/utils.h" #include "src/naive/handle.h" @@ -32,38 +34,27 @@ HeuristicCache& HeuristicCache::instance() { } HeuristicCache::KeyStorage HeuristicCache::Key::build_key_storage() const { - auto&& ctg = m_category; - auto&& inp = m_input; + size_t buf_size = 16 * m_inp_layouts_size + 6; + size_t buf[buf_size]; - if (!m_category.empty() && !m_input.empty()) - return {ctg, inp}; - - inp.reserve(sizeof(TensorLayout) * 3 * m_inp_layouts_size + m_param_size); + size_t pos = 0; for (size_t i = 0; i < m_inp_layouts_size; i++) { - auto&& ly = m_inp_layouts_ptr[i]; - for (size_t j = 0; j < ly.ndim; j++) { - if (j) - inp.push_back(','); - inp.append(std::to_string(ly.shape[j])); + auto&& layout = m_inp_layouts_ptr[i]; + if (layout.dtype.valid()) { + buf[pos++] = static_cast(layout.dtype.enumv()); + } else { + buf[pos++] = static_cast(SIZE_MAX); } - inp.push_back(';'); - for (size_t j = 0; j < ly.ndim; j++) { - if (j) - inp.push_back(','); - inp.append(std::to_string(ly.stride[j])); + buf[pos++] = static_cast(layout.format.type()); + for (size_t j = 0; j < layout.ndim; j++) { + buf[pos++] = layout.shape[j]; + buf[pos++] = layout.stride[j]; } - inp.push_back(';'); - inp.append(ly.dtype.name()); - inp.push_back(';'); - inp.append(ly.format.to_string().c_str()); - inp.push_back('|'); - } - if (m_param_size) { - inp.append(reinterpret_cast(m_param_ptr), m_param_size); } - ctg = "plat:"; - ctg.append(std::to_string(static_cast(m_handle->type()))); + buf[pos++] = m_opr_type; + buf[pos++] = static_cast(m_handle->type()); + switch (m_handle->type()) { #if MEGDNN_WITH_CUDA case Handle::HandleType::CUDA: { @@ -72,9 +63,9 @@ HeuristicCache::KeyStorage HeuristicCache::Key::build_key_storage() const { cuda_rt /= 1000; auto&& handle = static_cast(m_handle); auto&& prop = handle->device_prop(); - ctg.append(ssprintf( - ";dev=%s;cap=%d.%d;runtime=%d;", prop.name, prop.major, prop.minor, - cuda_rt)); + buf[pos++] = prop.major; + buf[pos++] = prop.minor; + buf[pos++] = cuda_rt; break; } #endif @@ -85,9 +76,10 @@ HeuristicCache::KeyStorage HeuristicCache::Key::build_key_storage() const { int drv = -1, hip_rt = -1; hip_check(hipDriverGetVersion(&drv)); hip_check(hipRuntimeGetVersion(&hip_rt)); - ctg.append(ssprintf( - ";dev=%s;cap=%d.%d,drv=%d;runtime=%d;", prop.name, prop.major, - prop.minor, drv, hip_rt)); + buf[pos++] = prop.major; + buf[pos++] = prop.minor; + buf[pos++] = drv; + buf[pos++] = hip_rt; break; } #endif @@ -108,16 +100,21 @@ HeuristicCache::KeyStorage HeuristicCache::Key::build_key_storage() const { size_t nr_threads = static_cast(m_handle) ->megcore_dispatcher() ->nr_threads(); - ctg.append(";"); - ctg.append(std::to_string(nr_threads)); - ctg.append(";"); + buf[pos++] = nr_threads; break; } default: - ctg.append(";"); + break; } - ctg.append(std::to_string(m_opr_type)); - return {ctg, inp}; + + m_buf.resize(pos); + SmallVector tmp(buf, buf + pos); + m_buf = std::move(tmp); + + size_t k1 = XXHash64CT::hash((const char*)buf, pos * sizeof(size_t), 20220328); + size_t k2 = XXHash64CT::hash((const char*)m_param_ptr, m_param_size, 20220328); + + return {k1, k2}; } void HeuristicCache::put(const Key& key, Result& result) { @@ -126,15 +123,41 @@ void HeuristicCache::put(const Key& key, Result& result) { m_heuristic_cache[key.build_key_storage()] = result; } +template +bool is_same_buf( + const T hash_buf[], const size_t buf_size, const T hash_buf_[], + const size_t buf_size_) { + if (buf_size != buf_size_) { + return false; + } + for (size_t i = 0; i < buf_size; i++) { + if (hash_buf[i] != hash_buf_[i]) { + return false; + } + } + return true; +} + HeuristicCache::Result HeuristicCache::get(const Key& key) { MEGDNN_LOCK_GUARD(m_mtx); KeyStorage ks = key.build_key_storage(); auto iter = m_heuristic_cache.find(ks); - if (iter == m_heuristic_cache.end()) { - return {}; - } else { - return iter->second; + if (iter != m_heuristic_cache.end()) { + if (is_same_buf( + key.m_buf.data(), key.m_buf.size(), iter->second.m_buf.data(), + iter->second.m_buf.size()) && + is_same_buf( + (char*)(key.m_param_ptr), key.m_param_size, + iter->second.m_param_buf.data(), iter->second.m_param_buf.size())) { + return iter->second; + } + megdnn_log_warn( + "hash collision occurs in heuristic cache with key: (%zu, %zu)", ks.k1, + ks.k2); } + SmallVector param_buf( + (char*)key.m_param_ptr, (char*)key.m_param_ptr + key.m_param_size); + return Result{{}, 0, key.m_buf, param_buf}; } void HeuristicCache::clear() { diff --git a/imperative/src/impl/algo_chooser.h b/imperative/src/impl/algo_chooser.h index 454c8723..d8e481f3 100644 --- a/imperative/src/impl/algo_chooser.h +++ b/imperative/src/impl/algo_chooser.h @@ -18,6 +18,8 @@ MGE_WIN_DECLSPEC_FUC size_t setup_algo( megdnn_opr->execution_policy() = rst.policy; return rst.workspace; } + SmallVector buf = rst.m_buf; + SmallVector param_buf = rst.m_param_buf; std::string param_str; megdnn::Algorithm::serialize_write_pod(megdnn_opr->param(), param_str); @@ -40,11 +42,10 @@ MGE_WIN_DECLSPEC_FUC size_t setup_algo( megdnn::ExecutionPolicy policy; policy = mgb::rdnn::AlgoChooser::get_policy(helper); size_t workspace = helper.get_workspace_size_bytes(policy, layouts); - megdnn_opr->execution_policy() = policy; if (execution_policy.strategy & rdnn::ExecutionStrategy::HEURISTIC) { - megdnn::HeuristicCache::Result cache_result{policy, workspace}; + megdnn::HeuristicCache::Result cache_result{policy, workspace, buf, param_buf}; megdnn::HeuristicCache::instance().put(cache_key, cache_result); } return workspace; diff --git a/imperative/src/impl/ops/convolution.cpp b/imperative/src/impl/ops/convolution.cpp index 0a576123..3c279104 100644 --- a/imperative/src/impl/ops/convolution.cpp +++ b/imperative/src/impl/ops/convolution.cpp @@ -123,8 +123,6 @@ TensorLayout do_shape_infer( std::tuple, bool> infer_output_attrs_fallible( const OpDef& def, const SmallVector& inputs) { - using Param = ::megdnn::param::Convolution; - SmallVector dests(1); auto&& desc = dests[0]; desc.comp_node = inputs[0].comp_node; @@ -166,15 +164,16 @@ SmallVector apply_on_physical_tensor( } oup_shapes[0] = out_layout; DnnOprCaller dnn_opr(cn); - dnn_opr.op->param().pad_h = conv.pad_h; - dnn_opr.op->param().pad_w = conv.pad_w; - dnn_opr.op->param().stride_h = conv.stride_h; - dnn_opr.op->param().stride_w = conv.stride_w; - dnn_opr.op->param().dilate_h = conv.dilate_h; - dnn_opr.op->param().dilate_w = conv.dilate_w; - dnn_opr.op->param().sparse = conv.sparse; - dnn_opr.op->param().compute_mode = conv.compute_mode; - dnn_opr.op->param().format = conv.format; + auto&& param = dnn_opr.op->param(); + param.pad_h = conv.pad_h; + param.pad_w = conv.pad_w; + param.stride_h = conv.stride_h; + param.stride_w = conv.stride_w; + param.dilate_h = conv.dilate_h; + param.dilate_w = conv.dilate_w; + param.sparse = conv.sparse; + param.compute_mode = conv.compute_mode; + param.format = conv.format; // shape infer TensorLayout shp({0}, inputs[0]->dtype()); @@ -513,8 +512,6 @@ TensorLayout do_shape_infer( std::tuple, bool> infer_output_attrs_fallible( const OpDef& def, const SmallVector& inputs) { - using Param = ::megdnn::param::Convolution3D; - SmallVector dests(1); auto&& desc = dests[0]; desc.comp_node = inputs[0].comp_node; diff --git a/src/opr/impl/search_policy/algo_chooser.cpp b/src/opr/impl/search_policy/algo_chooser.cpp index 5ee30dd6..25eeeeb3 100644 --- a/src/opr/impl/search_policy/algo_chooser.cpp +++ b/src/opr/impl/search_policy/algo_chooser.cpp @@ -42,6 +42,8 @@ size_t AlgoChooser::setup_algo( megdnn_opr->execution_policy() = rst.policy; return rst.workspace; } + SmallVector buf = rst.m_buf; + SmallVector param_buf = rst.m_param_buf; if (WorkspaceLimitGetter::is_prealloc_run(mgb_opr->owner_graph())) { return 0; @@ -92,7 +94,7 @@ size_t AlgoChooser::setup_algo( megdnn_opr->execution_policy() = policy; if (mgb_opr->execution_policy().strategy & rdnn::ExecutionStrategy::HEURISTIC) { - HeuristicCache::Result cache_result{policy, workspace}; + HeuristicCache::Result cache_result{policy, workspace, buf, param_buf}; HeuristicCache::instance().put(cache_key, cache_result); } return workspace;