GitOrigin-RevId: 6fddd612e7
tags/v1.10.0
| @@ -29,15 +29,12 @@ public: | |||||
| MGE_WIN_DECLSPEC_FUC static HeuristicCache& instance(); | MGE_WIN_DECLSPEC_FUC static HeuristicCache& instance(); | ||||
| struct KeyStorage { | struct KeyStorage { | ||||
| std::string category; | |||||
| std::string input; | |||||
| size_t k1, k2; | |||||
| bool operator==(const KeyStorage& k) const { | |||||
| return category == k.category && input == k.input; | |||||
| } | |||||
| bool operator==(const KeyStorage& k) const { return k1 == k.k1 && k2 == k.k2; } | |||||
| }; | }; | ||||
| class Key { | |||||
| struct Key { | |||||
| Handle* m_handle; | Handle* m_handle; | ||||
| uint32_t m_opr_type; | uint32_t m_opr_type; | ||||
| const TensorLayout* m_inp_layouts_ptr; | const TensorLayout* m_inp_layouts_ptr; | ||||
| @@ -45,8 +42,7 @@ public: | |||||
| const void* m_param_ptr; | const void* m_param_ptr; | ||||
| size_t m_param_size; | size_t m_param_size; | ||||
| mutable std::string m_category; | |||||
| mutable std::string m_input; | |||||
| mutable SmallVector<size_t> m_buf; | |||||
| public: | public: | ||||
| Key(Handle* opr_handle, Algorithm::OprType opr_type, | Key(Handle* opr_handle, Algorithm::OprType opr_type, | ||||
| @@ -65,6 +61,10 @@ public: | |||||
| struct Result { | struct Result { | ||||
| ExecutionPolicy policy; | ExecutionPolicy policy; | ||||
| size_t workspace; | size_t workspace; | ||||
| // for cache collision | |||||
| SmallVector<size_t> m_buf; | |||||
| SmallVector<char> m_param_buf; | |||||
| }; | }; | ||||
| MGE_WIN_DECLSPEC_FUC void put(const Key& key, Result& result); | MGE_WIN_DECLSPEC_FUC void put(const Key& key, Result& result); | ||||
| @@ -76,8 +76,8 @@ public: | |||||
| private: | private: | ||||
| struct Hash { | struct Hash { | ||||
| size_t operator()(const KeyStorage& k) const { | size_t operator()(const KeyStorage& k) const { | ||||
| size_t h1 = std::hash<std::string>{}(k.category); | |||||
| size_t h2 = std::hash<std::string>{}(k.input); | |||||
| size_t h1 = k.k1; | |||||
| size_t h2 = k.k2; | |||||
| h1 ^= h2 + 0x9e3779b9 + (h1 << 6) + (h1 >> 2); | h1 ^= h2 + 0x9e3779b9 + (h1 << 6) + (h1 >> 2); | ||||
| return h1; | return h1; | ||||
| } | } | ||||
| @@ -11,6 +11,8 @@ | |||||
| */ | */ | ||||
| #include "megdnn/heuristic_cache.h" | #include "megdnn/heuristic_cache.h" | ||||
| #include "megdnn/tensor_format.h" | |||||
| #include "src/common/hash_ct.h" | |||||
| #include "src/common/utils.h" | #include "src/common/utils.h" | ||||
| #include "src/naive/handle.h" | #include "src/naive/handle.h" | ||||
| @@ -32,38 +34,27 @@ HeuristicCache& HeuristicCache::instance() { | |||||
| } | } | ||||
| HeuristicCache::KeyStorage HeuristicCache::Key::build_key_storage() const { | HeuristicCache::KeyStorage HeuristicCache::Key::build_key_storage() const { | ||||
| auto&& ctg = m_category; | |||||
| auto&& inp = m_input; | |||||
| size_t buf_size = 16 * m_inp_layouts_size + 6; | |||||
| size_t buf[buf_size]; | |||||
| if (!m_category.empty() && !m_input.empty()) | |||||
| return {ctg, inp}; | |||||
| inp.reserve(sizeof(TensorLayout) * 3 * m_inp_layouts_size + m_param_size); | |||||
| size_t pos = 0; | |||||
| for (size_t i = 0; i < m_inp_layouts_size; i++) { | for (size_t i = 0; i < m_inp_layouts_size; i++) { | ||||
| auto&& ly = m_inp_layouts_ptr[i]; | |||||
| for (size_t j = 0; j < ly.ndim; j++) { | |||||
| if (j) | |||||
| inp.push_back(','); | |||||
| inp.append(std::to_string(ly.shape[j])); | |||||
| auto&& layout = m_inp_layouts_ptr[i]; | |||||
| if (layout.dtype.valid()) { | |||||
| buf[pos++] = static_cast<size_t>(layout.dtype.enumv()); | |||||
| } else { | |||||
| buf[pos++] = static_cast<size_t>(SIZE_MAX); | |||||
| } | } | ||||
| inp.push_back(';'); | |||||
| for (size_t j = 0; j < ly.ndim; j++) { | |||||
| if (j) | |||||
| inp.push_back(','); | |||||
| inp.append(std::to_string(ly.stride[j])); | |||||
| buf[pos++] = static_cast<size_t>(layout.format.type()); | |||||
| for (size_t j = 0; j < layout.ndim; j++) { | |||||
| buf[pos++] = layout.shape[j]; | |||||
| buf[pos++] = layout.stride[j]; | |||||
| } | } | ||||
| inp.push_back(';'); | |||||
| inp.append(ly.dtype.name()); | |||||
| inp.push_back(';'); | |||||
| inp.append(ly.format.to_string().c_str()); | |||||
| inp.push_back('|'); | |||||
| } | |||||
| if (m_param_size) { | |||||
| inp.append(reinterpret_cast<const char*>(m_param_ptr), m_param_size); | |||||
| } | } | ||||
| ctg = "plat:"; | |||||
| ctg.append(std::to_string(static_cast<uint32_t>(m_handle->type()))); | |||||
| buf[pos++] = m_opr_type; | |||||
| buf[pos++] = static_cast<size_t>(m_handle->type()); | |||||
| switch (m_handle->type()) { | switch (m_handle->type()) { | ||||
| #if MEGDNN_WITH_CUDA | #if MEGDNN_WITH_CUDA | ||||
| case Handle::HandleType::CUDA: { | case Handle::HandleType::CUDA: { | ||||
| @@ -72,9 +63,9 @@ HeuristicCache::KeyStorage HeuristicCache::Key::build_key_storage() const { | |||||
| cuda_rt /= 1000; | cuda_rt /= 1000; | ||||
| auto&& handle = static_cast<megdnn::cuda::HandleImpl*>(m_handle); | auto&& handle = static_cast<megdnn::cuda::HandleImpl*>(m_handle); | ||||
| auto&& prop = handle->device_prop(); | auto&& prop = handle->device_prop(); | ||||
| ctg.append(ssprintf( | |||||
| ";dev=%s;cap=%d.%d;runtime=%d;", prop.name, prop.major, prop.minor, | |||||
| cuda_rt)); | |||||
| buf[pos++] = prop.major; | |||||
| buf[pos++] = prop.minor; | |||||
| buf[pos++] = cuda_rt; | |||||
| break; | break; | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -85,9 +76,10 @@ HeuristicCache::KeyStorage HeuristicCache::Key::build_key_storage() const { | |||||
| int drv = -1, hip_rt = -1; | int drv = -1, hip_rt = -1; | ||||
| hip_check(hipDriverGetVersion(&drv)); | hip_check(hipDriverGetVersion(&drv)); | ||||
| hip_check(hipRuntimeGetVersion(&hip_rt)); | hip_check(hipRuntimeGetVersion(&hip_rt)); | ||||
| ctg.append(ssprintf( | |||||
| ";dev=%s;cap=%d.%d,drv=%d;runtime=%d;", prop.name, prop.major, | |||||
| prop.minor, drv, hip_rt)); | |||||
| buf[pos++] = prop.major; | |||||
| buf[pos++] = prop.minor; | |||||
| buf[pos++] = drv; | |||||
| buf[pos++] = hip_rt; | |||||
| break; | break; | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -108,16 +100,21 @@ HeuristicCache::KeyStorage HeuristicCache::Key::build_key_storage() const { | |||||
| size_t nr_threads = static_cast<megdnn::naive::HandleImpl*>(m_handle) | size_t nr_threads = static_cast<megdnn::naive::HandleImpl*>(m_handle) | ||||
| ->megcore_dispatcher() | ->megcore_dispatcher() | ||||
| ->nr_threads(); | ->nr_threads(); | ||||
| ctg.append(";"); | |||||
| ctg.append(std::to_string(nr_threads)); | |||||
| ctg.append(";"); | |||||
| buf[pos++] = nr_threads; | |||||
| break; | break; | ||||
| } | } | ||||
| default: | default: | ||||
| ctg.append(";"); | |||||
| break; | |||||
| } | } | ||||
| ctg.append(std::to_string(m_opr_type)); | |||||
| return {ctg, inp}; | |||||
| m_buf.resize(pos); | |||||
| SmallVector<size_t> tmp(buf, buf + pos); | |||||
| m_buf = std::move(tmp); | |||||
| size_t k1 = XXHash64CT::hash((const char*)buf, pos * sizeof(size_t), 20220328); | |||||
| size_t k2 = XXHash64CT::hash((const char*)m_param_ptr, m_param_size, 20220328); | |||||
| return {k1, k2}; | |||||
| } | } | ||||
| void HeuristicCache::put(const Key& key, Result& result) { | void HeuristicCache::put(const Key& key, Result& result) { | ||||
| @@ -126,15 +123,41 @@ void HeuristicCache::put(const Key& key, Result& result) { | |||||
| m_heuristic_cache[key.build_key_storage()] = result; | m_heuristic_cache[key.build_key_storage()] = result; | ||||
| } | } | ||||
| template <typename T> | |||||
| bool is_same_buf( | |||||
| const T hash_buf[], const size_t buf_size, const T hash_buf_[], | |||||
| const size_t buf_size_) { | |||||
| if (buf_size != buf_size_) { | |||||
| return false; | |||||
| } | |||||
| for (size_t i = 0; i < buf_size; i++) { | |||||
| if (hash_buf[i] != hash_buf_[i]) { | |||||
| return false; | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| HeuristicCache::Result HeuristicCache::get(const Key& key) { | HeuristicCache::Result HeuristicCache::get(const Key& key) { | ||||
| MEGDNN_LOCK_GUARD(m_mtx); | MEGDNN_LOCK_GUARD(m_mtx); | ||||
| KeyStorage ks = key.build_key_storage(); | KeyStorage ks = key.build_key_storage(); | ||||
| auto iter = m_heuristic_cache.find(ks); | auto iter = m_heuristic_cache.find(ks); | ||||
| if (iter == m_heuristic_cache.end()) { | |||||
| return {}; | |||||
| } else { | |||||
| return iter->second; | |||||
| if (iter != m_heuristic_cache.end()) { | |||||
| if (is_same_buf( | |||||
| key.m_buf.data(), key.m_buf.size(), iter->second.m_buf.data(), | |||||
| iter->second.m_buf.size()) && | |||||
| is_same_buf( | |||||
| (char*)(key.m_param_ptr), key.m_param_size, | |||||
| iter->second.m_param_buf.data(), iter->second.m_param_buf.size())) { | |||||
| return iter->second; | |||||
| } | |||||
| megdnn_log_warn( | |||||
| "hash collision occurs in heuristic cache with key: (%zu, %zu)", ks.k1, | |||||
| ks.k2); | |||||
| } | } | ||||
| SmallVector<char> param_buf( | |||||
| (char*)key.m_param_ptr, (char*)key.m_param_ptr + key.m_param_size); | |||||
| return Result{{}, 0, key.m_buf, param_buf}; | |||||
| } | } | ||||
| void HeuristicCache::clear() { | void HeuristicCache::clear() { | ||||
| @@ -18,6 +18,8 @@ MGE_WIN_DECLSPEC_FUC size_t setup_algo( | |||||
| megdnn_opr->execution_policy() = rst.policy; | megdnn_opr->execution_policy() = rst.policy; | ||||
| return rst.workspace; | return rst.workspace; | ||||
| } | } | ||||
| SmallVector<size_t> buf = rst.m_buf; | |||||
| SmallVector<char> param_buf = rst.m_param_buf; | |||||
| std::string param_str; | std::string param_str; | ||||
| megdnn::Algorithm::serialize_write_pod(megdnn_opr->param(), param_str); | megdnn::Algorithm::serialize_write_pod(megdnn_opr->param(), param_str); | ||||
| @@ -40,11 +42,10 @@ MGE_WIN_DECLSPEC_FUC size_t setup_algo( | |||||
| megdnn::ExecutionPolicy policy; | megdnn::ExecutionPolicy policy; | ||||
| policy = mgb::rdnn::AlgoChooser<Opr>::get_policy(helper); | policy = mgb::rdnn::AlgoChooser<Opr>::get_policy(helper); | ||||
| size_t workspace = helper.get_workspace_size_bytes(policy, layouts); | size_t workspace = helper.get_workspace_size_bytes(policy, layouts); | ||||
| megdnn_opr->execution_policy() = policy; | megdnn_opr->execution_policy() = policy; | ||||
| if (execution_policy.strategy & rdnn::ExecutionStrategy::HEURISTIC) { | if (execution_policy.strategy & rdnn::ExecutionStrategy::HEURISTIC) { | ||||
| megdnn::HeuristicCache::Result cache_result{policy, workspace}; | |||||
| megdnn::HeuristicCache::Result cache_result{policy, workspace, buf, param_buf}; | |||||
| megdnn::HeuristicCache::instance().put(cache_key, cache_result); | megdnn::HeuristicCache::instance().put(cache_key, cache_result); | ||||
| } | } | ||||
| return workspace; | return workspace; | ||||
| @@ -123,8 +123,6 @@ TensorLayout do_shape_infer( | |||||
| std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( | std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( | ||||
| const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs) { | const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs) { | ||||
| using Param = ::megdnn::param::Convolution; | |||||
| SmallVector<LogicalTensorDesc> dests(1); | SmallVector<LogicalTensorDesc> dests(1); | ||||
| auto&& desc = dests[0]; | auto&& desc = dests[0]; | ||||
| desc.comp_node = inputs[0].comp_node; | desc.comp_node = inputs[0].comp_node; | ||||
| @@ -166,15 +164,16 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
| } | } | ||||
| oup_shapes[0] = out_layout; | oup_shapes[0] = out_layout; | ||||
| DnnOprCaller<megdnn::ConvBiasForward> dnn_opr(cn); | DnnOprCaller<megdnn::ConvBiasForward> dnn_opr(cn); | ||||
| dnn_opr.op->param().pad_h = conv.pad_h; | |||||
| dnn_opr.op->param().pad_w = conv.pad_w; | |||||
| dnn_opr.op->param().stride_h = conv.stride_h; | |||||
| dnn_opr.op->param().stride_w = conv.stride_w; | |||||
| dnn_opr.op->param().dilate_h = conv.dilate_h; | |||||
| dnn_opr.op->param().dilate_w = conv.dilate_w; | |||||
| dnn_opr.op->param().sparse = conv.sparse; | |||||
| dnn_opr.op->param().compute_mode = conv.compute_mode; | |||||
| dnn_opr.op->param().format = conv.format; | |||||
| auto&& param = dnn_opr.op->param(); | |||||
| param.pad_h = conv.pad_h; | |||||
| param.pad_w = conv.pad_w; | |||||
| param.stride_h = conv.stride_h; | |||||
| param.stride_w = conv.stride_w; | |||||
| param.dilate_h = conv.dilate_h; | |||||
| param.dilate_w = conv.dilate_w; | |||||
| param.sparse = conv.sparse; | |||||
| param.compute_mode = conv.compute_mode; | |||||
| param.format = conv.format; | |||||
| // shape infer | // shape infer | ||||
| TensorLayout shp({0}, inputs[0]->dtype()); | TensorLayout shp({0}, inputs[0]->dtype()); | ||||
| @@ -513,8 +512,6 @@ TensorLayout do_shape_infer( | |||||
| std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( | std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( | ||||
| const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs) { | const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs) { | ||||
| using Param = ::megdnn::param::Convolution3D; | |||||
| SmallVector<LogicalTensorDesc> dests(1); | SmallVector<LogicalTensorDesc> dests(1); | ||||
| auto&& desc = dests[0]; | auto&& desc = dests[0]; | ||||
| desc.comp_node = inputs[0].comp_node; | desc.comp_node = inputs[0].comp_node; | ||||
| @@ -42,6 +42,8 @@ size_t AlgoChooser<Opr>::setup_algo( | |||||
| megdnn_opr->execution_policy() = rst.policy; | megdnn_opr->execution_policy() = rst.policy; | ||||
| return rst.workspace; | return rst.workspace; | ||||
| } | } | ||||
| SmallVector<size_t> buf = rst.m_buf; | |||||
| SmallVector<char> param_buf = rst.m_param_buf; | |||||
| if (WorkspaceLimitGetter::is_prealloc_run(mgb_opr->owner_graph())) { | if (WorkspaceLimitGetter::is_prealloc_run(mgb_opr->owner_graph())) { | ||||
| return 0; | return 0; | ||||
| @@ -92,7 +94,7 @@ size_t AlgoChooser<Opr>::setup_algo( | |||||
| megdnn_opr->execution_policy() = policy; | megdnn_opr->execution_policy() = policy; | ||||
| if (mgb_opr->execution_policy().strategy & rdnn::ExecutionStrategy::HEURISTIC) { | if (mgb_opr->execution_policy().strategy & rdnn::ExecutionStrategy::HEURISTIC) { | ||||
| HeuristicCache::Result cache_result{policy, workspace}; | |||||
| HeuristicCache::Result cache_result{policy, workspace, buf, param_buf}; | |||||
| HeuristicCache::instance().put(cache_key, cache_result); | HeuristicCache::instance().put(cache_key, cache_result); | ||||
| } | } | ||||
| return workspace; | return workspace; | ||||