|
- /**
- * \file imperative/src/impl/proxy_graph/mini_graph.h
- * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
- *
- * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- */
-
- #include "megbrain/graph/operator_node.h"
- #include "megbrain/imperative/physical_tensor.h"
- #include "megbrain/imperative/op_def.h"
-
- #include "./common.h"
- #include "./proxy_graph_base.h"
-
- #include <optional>
- #include "range/v3/all.hpp"
-
-
- namespace mgb::imperative::proxy_graph {
-
- using cg::OperatorNodeBase;
-
-
- template<typename C, typename E>
- std::pair<bool, size_t> find_index(const C& container, const E& item) {
- auto&& it = std::find(container.begin(), container.end(), item);
- return {it != container.end(), it - container.begin()};
- }
-
-
- template <typename T, typename = void> class TensorAdaptor;
-
- template <typename T, typename U>
- using enable_if_same_upto_cv_t = std::enable_if_t<std::is_same_v<std::remove_cv_t<T>, std::remove_cv_t<U>>>;
-
- template<typename T>
- class TensorAdaptor<T, enable_if_same_upto_cv_t<T, LogicalTensorDesc>> {
- T& wrapped;
- template <typename U>
- using maybe_add_const_t = std::conditional_t<std::is_const_v<T>, const U, U>;
-
- public:
- using type = T;
-
- TensorAdaptor(T& desc) : wrapped(desc) {}
- TensorAdaptor(T* desc) : wrapped(*desc) {}
-
- DType dtype() {return wrapped.layout.dtype;}
- CompNode comp_node() {return wrapped.comp_node;}
- maybe_add_const_t<TensorShape>& shape() {return wrapped.layout;}
- bool has_value() {return wrapped.value.shape_valid();}
- auto& value() {return wrapped.value;}
-
- auto* operator->() {return &wrapped;}
- };
-
- template<typename T>
- class TensorAdaptor<T, enable_if_same_upto_cv_t<T, Tensor>> {
- Tensor& wrapped;
-
- public:
- using type = Tensor;
-
- TensorAdaptor(Tensor& tensor) : wrapped(tensor) {}
- TensorAdaptor(Tensor* tensor) : wrapped(*tensor) {}
-
- DType dtype() {return wrapped.dtype();}
- CompNode comp_node() {return wrapped.comp_node();}
- const TensorShape& shape() {return wrapped.shape();}
-
- type* operator->() {return &wrapped;}
- };
-
- // deduction guides
- template <typename T> TensorAdaptor(T&) -> TensorAdaptor<T, void>;
- template <typename T> TensorAdaptor(T*) -> TensorAdaptor<T, void>;
-
-
- // single opr graph, for static inference and execution
- // contains static inference descs
- class ProxyGraph::MiniGraph {
- protected:
- struct InferDepItem {
- bool is_input : 1;
- size_t idx : 63;
- cg::static_infer::DepType type;
- };
-
- enum class InferStatus {
- UNKOWN,
- READY,
- FAILED
- };
-
- // inference desc and pre-allocated storage for a single var
- template <typename T>
- struct InferData {
- SmallVector<InferDepItem> deps;
- thin_function<bool(T&, const cg::static_infer::InpVal&)> infer_func;
-
- // pre-allocated infer states
- InferStatus status = InferStatus::UNKOWN;
- cg::static_infer::InpVal inp_val;
- T dest;
-
- void initialize(OperatorNodeBase* opr, const cg::static_infer::DepVal& dep_val,
- const thin_function<bool(T&, const cg::static_infer::InpVal&)>& func) {
- mgb_assert(!infer_func);
- infer_func = func;
- inp_val.val.resize(dep_val.size());
- deps.reserve(dep_val.size());
-
- for (auto&& dep : dep_val) {
- auto [found, i] = find_index(opr->input(), dep.dest);
- if (found) {
- deps.push_back({true, i, dep.type});
- } else {
- auto [found, i] = find_index(opr->output(), dep.dest);
- mgb_assert(found);
- deps.push_back({false, i, dep.type});
- }
- }
- }
-
- void reset() {
- status = InferStatus::UNKOWN;
- if constexpr (std::is_same_v<T, TensorShape>) {
- dest.ndim = 0;
- } else {
- static_assert(std::is_same_v<T, DeviceTensorND>);
- dest.storage({});
- }
- }
- };
-
- struct OutputData {
- InferData<TensorShape> shape_infer;
- InferData<DeviceTensorND> value_infer;
- };
-
- struct InferSessionBase {
- virtual const TensorShape& infer_shape(VarNode*) {mgb_assert(0);}
- virtual const TensorShape* infer_shape_fallible(VarNode*) {mgb_assert(0);}
- virtual const DeviceTensorND& infer_value(VarNode*) {mgb_assert(0);}
- virtual const DeviceTensorND* infer_value_fallible(VarNode*) {mgb_assert(0);}
- };
-
- OperatorNodeBase* m_opr = nullptr;
- SmallVector<std::unique_ptr<OperatorNodeBase>> opr_ref_keeper;
-
- size_t run_id = 0;
- SmallVector<OutputData> output_data;
- SmallVector<size_t> input_remap;
- SmallVector<size_t> output_remap;
-
- // pre-allocated buffer for converted inputs
- SmallVector<std::optional<DeviceTensorND>> input_value_storage;
-
- InferSessionBase* m_sess = nullptr;
-
- template <typename T>
- struct InputAdaptor {
- T& wrapped;
- SmallVector<std::optional<DeviceTensorND>>& value_storage;
-
- InputAdaptor(MiniGraph& owner, T& inputs) : wrapped(inputs), value_storage(owner.input_value_storage) {}
- ~InputAdaptor() {
- for (auto& i : value_storage) {
- i.reset();
- }
- }
-
- const TensorShape* shape(size_t i) {
- TensorAdaptor tensor(wrapped[i]);
- auto& shape = tensor.shape();
- return shape.ndim ? &shape : nullptr;
- }
-
- const DeviceTensorND* value(size_t i, bool sync) {
- TensorAdaptor tensor(wrapped[i]);
- using tensor_t = std::remove_cv_t<typename decltype(tensor)::type>;
- if constexpr (std::is_same_v<tensor_t, Tensor>) {
- auto& storage = value_storage[i];
- if (!storage) {
- if (sync) {
- return &storage.emplace(tensor->get_value().proxy_to_default_cpu());
- } else {
- if (auto* hv = tensor->try_get_value()) {
- return &storage.emplace(hv->proxy_to_default_cpu());
- }
- return nullptr;
- }
- }
- } else {
- auto& value = tensor.value();
- return value.shape_valid() ? &value : nullptr;
- }
- }
- };
-
- public:
- template <typename I, typename G>
- MiniGraph(G& graph, const OpDef& opdef, const I& inputs) : input_value_storage(inputs.size()) {
- mgb_assert(!m_opr);
- auto _ = graph.scoped_attach(this);
- cg::VarNodeArray vinputs(inputs.size());
- for (auto&& [i, t] : ranges::views::enumerate(inputs)) {
- auto tensor = TensorAdaptor(t);
- opr_ref_keeper.emplace_back(new InputPlaceholder(graph, tensor.dtype(), tensor.comp_node()));
- vinputs[i] = opr_ref_keeper.back()->output(0);
- }
- auto ovars = OpDef::apply_on_var_node(opdef, vinputs);
- mgb_assert(m_opr);
- output_data.resize(m_opr->output().size());
- for (auto* v : ovars) {
- mgb_assert(v->owner_opr() == m_opr);
- }
- m_opr->init_output_static_infer_desc();
-
- // fix permuted input
- input_remap.reserve(m_opr->input().size());
- for (auto* v : m_opr->input()) {
- auto [found, i] = find_index(vinputs, v);
- mgb_assert(found);
- input_remap.push_back(i);
- }
- auto fix_dep_idx = [&](SmallVector<InferDepItem>& deps) {
- for (auto& dep : deps) {
- if (dep.is_input) {
- dep.idx = input_remap[dep.idx];
- }
- }
- };
- for (auto& data : output_data) {
- fix_dep_idx(data.shape_infer.deps);
- fix_dep_idx(data.value_infer.deps);
- }
-
- // fix permuted output
- output_remap.reserve(ovars.size());
- for (auto* v : ovars) {
- auto [found, i] = find_index(m_opr->output(), v);
- mgb_assert(found);
- output_remap.push_back(i);
- }
- }
-
- // methods for containing graph
-
- OperatorNodeBase* insert_opr(std::unique_ptr<OperatorNodeBase> opr_uniqp) {
- mgb_assert(!m_opr);
- m_opr = opr_uniqp.get();
- mgb_assert(opr_ref_keeper.back()->owner_graph() == m_opr->owner_graph());
- mgb_assert(!m_opr->inserted_in_graph());
- opr_ref_keeper.push_back(std::move(opr_uniqp));
- m_opr->set_inserted_in_graph();
- m_opr->init_output_comp_node();
- m_opr->init_output_dtype();
- return m_opr;
- }
-
- void register_shape_infer(VarNode* varnode, const cg::static_infer::ShapeInferDesc& desc) {
- auto [found, i] = find_index(m_opr->output(), varnode);
- mgb_assert(found);
- output_data[i].shape_infer.initialize(m_opr, desc.deps, desc.infer_func);
- }
-
- void register_value_infer(VarNode* varnode, const cg::static_infer::ValueInferDesc& desc) {
- auto [found, i] = find_index(m_opr->output(), varnode);
- mgb_assert(found);
- output_data[i].value_infer.initialize(m_opr, desc.deps, desc.infer_func);
- }
-
- const TensorShape& infer_shape(VarNode* var) {
- return m_sess->infer_shape(var);
- }
-
- const DeviceTensorND& infer_value(VarNode* var) {
- return m_sess->infer_value(var);
- }
-
- OperatorNodeBase* opr() {
- return m_opr;
- }
-
- // inference routine template for type of input
- template<typename I>
- class InferSession : protected InferSessionBase {
- MiniGraph& owner;
- SmallVector<OutputData>& output_data;
- InputAdaptor<I> inputs;
-
- template<typename T>
- const T* infer(InferData<T>& target, bool sync) {
- bool ret;
- if (target.status != InferStatus::UNKOWN) {
- ret = target.status == InferStatus::READY;
- } else {
- ret = target.infer_func && do_infer(target, sync);
- target.status = ret ? InferStatus::READY : InferStatus::FAILED;
- }
- return ret ? &target.dest : nullptr;
- }
-
- template<typename T>
- bool do_infer(InferData<T>& target, bool sync) {
- for (size_t i = 0; i < target.deps.size(); ++i) {
- target.inp_val.run_id = owner.run_id;
- auto& dep = target.deps[i];
- if (dep.is_input) {
- if (dep.type == cg::static_infer::DepType::SHAPE) {
- if (auto* val = inputs.shape(dep.idx)) {
- target.inp_val.val[i].m_shape = val;
- } else return false;
- } else {
- if (auto* val = inputs.value(dep.idx, sync)) {
- target.inp_val.val[i].m_value = val;
- } else return false;
- }
- } else {
- if (dep.type == cg::static_infer::DepType::SHAPE) {
- if (auto* val = infer(output_data[dep.idx].shape_infer, sync)) {
- target.inp_val.val[i].m_shape = val;
- } else return false;
- } else {
- if (auto* val = infer(output_data[dep.idx].value_infer, sync)) {
- target.inp_val.val[i].m_value = val;
- } else return false;
- }
- }
- }
- return target.infer_func(target.dest, target.inp_val);
- }
-
- // methods for owner mini graph
- // corresponding methods of containing ComputingGraph will be redirected here
-
- const TensorShape& infer_shape(VarNode* var) override {
- mgb_assert(owner.m_opr);
- auto [found, i] = find_index(owner.m_opr->input(), var);
- mgb_assert(found);
- i = owner.input_remap[i];
- auto* shape = inputs.shape(i);
- mgb_assert(shape);
- return *shape;
- }
-
- const DeviceTensorND& infer_value(VarNode* var) override {
- mgb_assert(owner.m_opr);
- auto [found, i] = find_index(owner.m_opr->input(), var);
- mgb_assert(found);
- i = owner.input_remap[i];
- auto* value = inputs.value(i, false);
- mgb_assert(value);
- return *value;
- }
-
- public:
- InferSession(MiniGraph& mgraph, I& inputs_)
- : owner(mgraph), output_data(mgraph.output_data), inputs(mgraph, inputs_) {
- mgraph.run_id++;
- mgb_assert(!owner.m_sess);
- owner.m_sess = this;
- }
- ~InferSession() {
- owner.m_sess = nullptr;
- for (auto& i : output_data) {
- i.shape_infer.reset();
- i.value_infer.reset();
- }
- }
-
- const TensorShape* infer_shape(size_t i, bool sync) {
- i = owner.output_remap[i];
- return infer(output_data[i].shape_infer, sync);
- }
-
- const DeviceTensorND* infer_value(size_t i, bool sync) {
- i = owner.output_remap[i];
- return infer(output_data[i].shape_infer, sync);
- }
- };
-
- template <typename T>
- InferSession<T> infer_session(T& inputs) {return InferSession(*this, inputs);}
-
- size_t output_size() {
- return output_remap.size();
- }
-
- VarNode* output_var(size_t i) {
- i = output_remap[i];
- return m_opr->output(i);
- }
- };
-
-
- class CompNodeTracker {
- static constexpr size_t bucket_size = 100;
- static constexpr size_t bucket_count = 10;
-
- CompNode comp_node;
- std::array<std::unique_ptr<CompNode::Event>, bucket_count> events;
-
- size_t free_slots = bucket_size;
- size_t head = 0; // events[head] is not recorded
- size_t tail = 0; // events[tail] is not finished
-
- void rotate() {
- while (tail < head && events[tail % bucket_count]->finished()) {
- ++tail;
- }
- auto& ev = events[head % bucket_count];
- if (head == tail + bucket_count) {
- // do not wait if head == tail
- ev->host_wait();
- ++tail;
- }
- ev->record();
- ++head;
- free_slots = bucket_size;
- }
-
- public:
- CompNodeTracker(CompNode cn) : comp_node(cn) {
- for (auto& e : events) {
- e = cn.create_event();
- }
- }
-
- size_t add_opr() {
- if (!free_slots) rotate();
- --free_slots;
- return head;
- }
-
- size_t progress() {
- return tail;
- }
- };
-
-
- class ExecMiniGraph : public ProxyGraph::MiniGraph {
- union BusyListItem {
- size_t finish_time;
- OperatorNodeBase* opr;
- };
-
- SmallVector<CompNodeTracker*> comp_node_trackers;
- std::deque<BusyListItem> busy_oprs;
- SmallVector<OperatorNodeBase*> idle_oprs;
-
- OperatorNodeBase* acquire_opr() {
- mgb_assert(!m_opr);
- if (!idle_oprs.empty()) {
- m_opr = idle_oprs.back();
- idle_oprs.pop_back();
- return m_opr;
- }
- mgb_assert(busy_oprs.size() > comp_node_trackers.size());
- bool can_pop = true;
- for (auto [item, tracker] : ranges::views::zip(busy_oprs, comp_node_trackers)) {
- if (item.finish_time >= tracker->progress()) {
- can_pop = false;
- break;
- }
- }
- if (can_pop) {
- for (auto _ : comp_node_trackers) {
- MGB_MARK_USED_VAR(_);
- busy_oprs.pop_front();
- }
- m_opr = busy_oprs.front().opr;
- busy_oprs.pop_front();
- return m_opr;
- }
-
- }
-
- template <bool in_use>
- void release_opr() {
- if constexpr (in_use) {
- for (auto tracker : comp_node_trackers) {
- tracker->add_opr();
- }
- }
- }
- };
-
-
- class ProxyGraphTypeI : public ProxyGraphBase {
- class StaticInferManager : public StaticInferManagerBase {
- ProxyGraph::MiniGraph* target = nullptr;
-
- friend class ProxyGraphTypeI;
-
- public:
- void register_shape_infer(VarNode* var, const cg::static_infer::ShapeInferDesc& desc) override {
- target->register_shape_infer(var, desc);
- };
- void register_value_infer(VarNode* var, const cg::static_infer::ValueInferDesc& desc) override {
- target->register_value_infer(var, desc);
- };
- cg::static_infer::InferType get_infer_type(VarNode*) override {
- return {cg::static_infer::InferType::MISSING_INP, cg::static_infer::InferType::MISSING_INP};
- }
- // some poorly written inference func would call infer_{shape,value}
- const TensorShape& infer_shape(VarNode* var) override {
- return target->infer_shape(var);
- }
- const DeviceTensorND& infer_value(VarNode* var) override {
- return target->infer_value(var);
- }
- };
-
- ProxyGraph::MiniGraph* target = nullptr;
- StaticInferManager m_static_infer_manager;
- std::unordered_map<size_t, ProxyGraph::MiniGraph> m_mini_graph_cache;
- size_t opr_count = 0;
-
- static thread_local std::unique_ptr<ProxyGraphTypeI> sm_instance;
-
- friend class ProxyGraph::MiniGraph;
-
- size_t nr_oprs_in_graph() const override {
- return opr_count;
- }
-
- size_t next_node_id() override {
- return opr_count;
- }
-
- std::shared_ptr<void> on_comp_node_finalize() override {
- sm_instance.reset();
- return {};
- }
-
- cg::static_infer::StaticInferManager& static_infer_manager() override {
- return m_static_infer_manager;
- }
-
- void attach(ProxyGraph::MiniGraph* target_) {
- target = target_;
- m_static_infer_manager.target = target_;
- }
-
- struct AttachGuard {
- ProxyGraphTypeI* owner = nullptr;
- ProxyGraph::MiniGraph* target = nullptr;
-
- AttachGuard(ProxyGraphTypeI* owner_ = nullptr, ProxyGraph::MiniGraph* target_ = nullptr)
- : owner(owner_), target(target_) {}
- AttachGuard(AttachGuard&) = delete;
- AttachGuard& operator=(AttachGuard&) = delete;
- AttachGuard(AttachGuard&& rhs) : owner(rhs.owner), target(rhs.target) {rhs.owner = nullptr;}
- AttachGuard& operator=(AttachGuard&& rhs) = delete;
- ~AttachGuard() {if (owner) owner->attach(target);}
- };
-
- [[nodiscard]]
- AttachGuard scoped_attach(ProxyGraph::MiniGraph* target_) {
- attach(target_);
- return attach_guard();
- }
-
- [[nodiscard]]
- AttachGuard attach_guard(ProxyGraph::MiniGraph* target_ = nullptr) {
- return {this, target_};
- }
-
- public:
- OperatorNodeBase* insert_opr(std::unique_ptr<OperatorNodeBase> opr_uniqp) override {
- return target->insert_opr(std::move(opr_uniqp));
- }
-
- static ProxyGraphTypeI& inst() {
- if (!sm_instance) {
- sm_instance.reset(new ProxyGraphTypeI);
- }
- return *sm_instance;
- }
-
- std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible(const OpDef& def,
- const SmallVector<LogicalTensorDesc>& inputs) {
- size_t buf_size = 2 * inputs.size() + 1;
- size_t buf[buf_size];
- size_t pos = 0;
- buf[pos++] = def.hash();
- for (auto&& desc : inputs) {
- buf[pos++] = mgb::hash(desc.layout.dtype.handle());
- buf[pos++] = mgb::hash(desc.comp_node);
- }
- mgb_assert(pos == buf_size);
- auto key = XXHash{}.update(buf, buf_size*sizeof(size_t)).digest();
- auto it = m_mini_graph_cache.find(key);
- if (it == m_mini_graph_cache.end()) {
- auto&& result = m_mini_graph_cache.emplace(
- std::piecewise_construct,
- std::make_tuple(key),
- std::forward_as_tuple(*this, def, inputs));
- mgb_assert(result.second);
- it = result.first;
- }
- auto& minigraph = it->second;
- auto _ = scoped_attach(&minigraph);
- auto sess = minigraph.infer_session(inputs);
- std::tuple<SmallVector<LogicalTensorDesc>, bool> ret;
- auto& [descs, noerr] = ret;
- descs.reserve(minigraph.output_size());
- for (size_t i = 0; i < minigraph.output_size(); ++i) {
- descs.emplace_back();
- auto& desc = descs.back();
- desc.layout.dtype = minigraph.output_var(i)->dtype();
- desc.comp_node = minigraph.output_var(i)->comp_node();
- if (auto* shape = sess.infer_shape(i, false)) {
- desc.layout.init_contiguous_stride(*shape);
- noerr = true;
- } else {
- noerr = false;
- }
- }
- return ret;
- }
- };
-
- } // namespace mgb::imperative::proxy_graph
|