GitOrigin-RevId: d05b114668
tags/v1.0.0-rc1
| @@ -129,7 +129,7 @@ else() | |||
| if(ANDROID) | |||
| set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -DNDEBUG") | |||
| set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Ofast -DNDEBUG -g") | |||
| else() | |||
| set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG") | |||
| set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -DNDEBUG -g") | |||
| @@ -224,6 +224,7 @@ endif() | |||
| option(MGE_WITH_DISTRIBUTED "Build with distributed support" ON) | |||
| option(MGE_BUILD_XXX "Build _xxx.so instead of mgb.so " OFF) | |||
| if(MGE_BUILD_XXX) | |||
| add_compile_definitions(MGB_ENABLE_IMPERATIVE_RUNTIME) | |||
| set(CMAKE_CXX_STANDARD 17) | |||
| endif() | |||
| @@ -662,14 +663,14 @@ endif() | |||
| configure_file(cmake/megengine.pc.in | |||
| ${CMAKE_CURRENT_BINARY_DIR}/megengine.pc | |||
| @ONLY) | |||
| install(FILES ${CMAKE_CURRENT_BINARY_DIR}/megengine.pc | |||
| install(FILES ${CMAKE_CURRENT_BINARY_DIR}/megengine.pc | |||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) | |||
| # Do not export targets if MGE_WITH_DISTRIBUTED is on. MegRay is not ready. | |||
| if (NOT MGE_WITH_DISTRIBUTED) | |||
| include(CMakePackageConfigHelpers) | |||
| set (MGE_INSTALL_CMAKEDIR ${CMAKE_INSTALL_LIBDIR}/cmake/MegEngine) | |||
| configure_package_config_file(cmake/MegEngineConfig.cmake.in | |||
| configure_package_config_file(cmake/MegEngineConfig.cmake.in | |||
| ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfig.cmake | |||
| INSTALL_DESTINATION ${MGE_INSTALL_CMAKEDIR} | |||
| ) | |||
| @@ -674,7 +674,7 @@ void ComputingGraphImpl::share_device_memory_with(ComputingGraph& other) { | |||
| mgb_assert( | |||
| !m_current_comp_seq, | |||
| "share_device_memory_with must be called before compiling graph"); | |||
| auto&& oimpl = static_cast<ComputingGraphImpl&>(other); | |||
| auto&& oimpl = *ComputingGraphImpl::downcast(&other); | |||
| var_node_mem_manager().static_device_memory_manager( | |||
| oimpl.var_node_mem_manager().static_device_memory_manager()); | |||
| } | |||
| @@ -707,7 +707,7 @@ size_t ComputingGraphImpl::clear_device_memory() { | |||
| } | |||
| void ComputingGraphImpl::set_as_subgraph(ComputingGraph& par_graph) { | |||
| m_parent_graph = static_cast<ComputingGraphImpl*>(&par_graph); | |||
| m_parent_graph = ComputingGraphImpl::downcast(&par_graph); | |||
| m_parent_graph->m_subgraphs.emplace_back(this); | |||
| m_node_id_counter = m_parent_graph->m_node_id_counter; | |||
| options().var_sanity_check_first_run = | |||
| @@ -122,6 +122,15 @@ public: | |||
| ComputingGraphImpl(); | |||
| ~ComputingGraphImpl(); | |||
| template<typename T> static ComputingGraphImpl* downcast(T* ptr) = delete; | |||
| inline static ComputingGraphImpl* downcast(ComputingGraph* graph) { | |||
| #ifdef MGB_ENABLE_IMPERATIVE_RUNTIME | |||
| mgb_assert(!graph->options().imperative_proxy_graph); | |||
| #endif | |||
| return static_cast<ComputingGraphImpl*>(graph); | |||
| } | |||
| friend struct ComputingGraph::Options; | |||
| std::unique_ptr<AsyncExecutable> compile( | |||
| @@ -100,7 +100,7 @@ class ComputingGraphImpl::ComputingSequence final : public AsyncExecutable { | |||
| public: | |||
| ComputingSequence(const std::shared_ptr<ComputingGraph>& graph) | |||
| : m_owner_graph_refkeep{graph}, | |||
| m_owner_graph{static_cast<ComputingGraphImpl*>(graph.get())}, | |||
| m_owner_graph{ComputingGraphImpl::downcast(graph.get())}, | |||
| m_have_parent_graph{m_owner_graph->m_parent_graph} {} | |||
| GraphExecutable::ExecEnv& exec_env() { return m_exec_env; } | |||
| @@ -76,7 +76,7 @@ EagerEvalManager::~EagerEvalManager() noexcept { | |||
| if (m_first_opr_enable_status == 1) { | |||
| m_var_sync_mgr_pool.disable_freelist(); | |||
| for (auto&& i : | |||
| static_cast<ComputingGraphImpl*>(m_owner_graph)->all_oprs()) { | |||
| ComputingGraphImpl::downcast(m_owner_graph)->all_oprs()) { | |||
| for (auto var : i->output()) { | |||
| auto mgr = VarNodeMemManager::var_node_cn_sync_manager(var); | |||
| if (mgr) { | |||
| @@ -223,7 +223,7 @@ void EagerEvalManager::prepare_for_exec(OperatorNodeBase* opr) { | |||
| } | |||
| void EagerEvalManager::update_static_infer_result(OperatorNodeBase* opr) { | |||
| auto&& mgr = static_cast<ComputingGraphImpl*>(m_owner_graph) | |||
| auto&& mgr = ComputingGraphImpl::downcast(m_owner_graph) | |||
| ->static_infer_manager_impl(); | |||
| auto sync_missing_trait = | |||
| [&](static_infer::StaticInferManagerImpl::TagHandler* handler) { | |||
| @@ -260,7 +260,7 @@ void EagerEvalManager::update_static_infer_result(OperatorNodeBase* opr) { | |||
| } | |||
| void EagerEvalManager::ensure_input_layout(VarNode* var) { | |||
| auto&& mem_mgr = static_cast<ComputingGraphImpl*>(var->owner_graph()) | |||
| auto&& mem_mgr = ComputingGraphImpl::downcast(var->owner_graph()) | |||
| ->var_node_mem_manager(); | |||
| auto trait = mem_mgr.get_var_node_mem_trait_nullable(var); | |||
| @@ -287,7 +287,7 @@ void EagerEvalManager::alloc_output_mem(OperatorNodeBase* opr) { | |||
| } | |||
| } | |||
| auto&& mgr = static_cast<ComputingGraphImpl*>(m_owner_graph) | |||
| auto&& mgr = ComputingGraphImpl::downcast(m_owner_graph) | |||
| ->var_node_mem_manager(); | |||
| OprNodeArray opr_seq{opr}; | |||
| @@ -348,7 +348,7 @@ void EagerEvalManager::do_on_opr_insert(OperatorNodeBase* opr) { | |||
| if (status) { | |||
| update_static_infer_result(opr); | |||
| alloc_output_mem(opr); | |||
| auto&& mgr = static_cast<ComputingGraphImpl*>(m_owner_graph) | |||
| auto&& mgr = ComputingGraphImpl::downcast(m_owner_graph) | |||
| ->var_node_mem_manager(); | |||
| mgr.on_graph_compile_finished(); | |||
| opr->execute(*m_exec_env); | |||
| @@ -40,7 +40,7 @@ class GradShapeChecker { | |||
| void do_on_var_shape(VarNode *var) { | |||
| MGB_MARK_USED_VAR(m_opr); | |||
| auto graph = static_cast<ComputingGraphImpl*>(var->owner_graph()); | |||
| auto graph = ComputingGraphImpl::downcast(var->owner_graph()); | |||
| auto seq = graph->current_comp_seq(); | |||
| if (seq) { | |||
| @@ -90,7 +90,7 @@ class GradShapeChecker { | |||
| } | |||
| static void make(OperatorNodeBase *opr, VarNode *wrt, VarNode *grad) { | |||
| if (static_cast<ComputingGraphImpl*>(wrt->owner_graph()) | |||
| if (ComputingGraphImpl::downcast(wrt->owner_graph()) | |||
| ->eager_eval_manager().enabled()) | |||
| return; | |||
| using namespace std::placeholders; | |||
| @@ -650,13 +650,13 @@ void GradManager::add_var_virtual_receiver( | |||
| } | |||
| void cg::add_grad_transformer(VarNode *var, const GradTransformer &cb) { | |||
| static_cast<ComputingGraphImpl*>(var->owner_graph())-> | |||
| ComputingGraphImpl::downcast(var->owner_graph())-> | |||
| grad_manager(). | |||
| add_grad_transformer(var, cb); | |||
| } | |||
| void cg::add_extra_dep_for_grad(VarNode *inp, VarNode *out) { | |||
| static_cast<ComputingGraphImpl*>(inp->owner_graph())->grad_manager(). | |||
| ComputingGraphImpl::downcast(inp->owner_graph())->grad_manager(). | |||
| add_extra_dep_for_grad(inp, out); | |||
| } | |||
| @@ -667,7 +667,7 @@ void cg::add_var_virtual_receiver( | |||
| desc->inputs = inputs; | |||
| desc->outputs = outputs; | |||
| desc->grad = grad; | |||
| static_cast<ComputingGraphImpl*>(inputs.at(0)->owner_graph())-> | |||
| ComputingGraphImpl::downcast(inputs.at(0)->owner_graph())-> | |||
| grad_manager(). | |||
| add_var_virtual_receiver(desc); | |||
| } | |||
| @@ -99,8 +99,8 @@ SymbolVarArray cg::grad(SymbolVar target_, SymbolVarArray wrts_, bool warn_mid_w | |||
| grads.reserve(wrts_.size()); | |||
| VarNodeArray dest_vars; | |||
| auto&& graph = target->owner_graph(); | |||
| auto&& eager_mgr = static_cast<ComputingGraphImpl*>(graph)->eager_eval_manager(); | |||
| auto&& grad_mgr = static_cast<ComputingGraphImpl*>(graph)->grad_manager(); | |||
| auto&& eager_mgr = ComputingGraphImpl::downcast(graph)->eager_eval_manager(); | |||
| auto&& grad_mgr = ComputingGraphImpl::downcast(graph)->grad_manager(); | |||
| bool already_recorded = eager_mgr.enter_record_mode(); | |||
| for (auto&& wrt_ : wrts_) { | |||
| auto wrt = wrt_.node(); | |||
| @@ -139,7 +139,7 @@ SymbolVarArray cg::grad(SymbolVar target_, SymbolVarArray wrts_, bool warn_mid_w | |||
| SymbolVar cg::current_grad_target(ComputingGraph &graph) { | |||
| #if MGB_ENABLE_GRAD | |||
| auto var = static_cast<ComputingGraphImpl&>(graph).grad_manager( | |||
| auto var = ComputingGraphImpl::downcast(&graph)->grad_manager( | |||
| ).current_grad_target(); | |||
| mgb_throw_if(!var, GraphError, "current_grad_target() called outside " | |||
| "grad computing environment"); | |||
| @@ -93,7 +93,7 @@ OperatorNodeBase::OperatorNodeBase(ComputingGraph *owner, | |||
| } | |||
| OperatorNodeBase::~OperatorNodeBase() noexcept { | |||
| auto &&pool = static_cast<ComputingGraphImpl*>( | |||
| auto &&pool = ComputingGraphImpl::cast( | |||
| owner_graph())->var_node_pool(); | |||
| for (auto i: m_output) { | |||
| pool.free(i); | |||
| @@ -124,7 +124,7 @@ void OperatorNodeBase::execute(ExecEnv &env) { | |||
| } | |||
| // allocate output with dynamic storage | |||
| static_cast<ComputingGraphImpl*>(owner_graph()) | |||
| ComputingGraphImpl::downcast(owner_graph()) | |||
| ->var_node_mem_manager() | |||
| .alloc_var_node_mem_dynamic(env, this); | |||
| @@ -135,11 +135,11 @@ void OperatorNodeBase::execute(ExecEnv &env) { | |||
| // static_infer_manager so the value would be up-to-date; however for shape | |||
| // deps, oprs would access the shape directly, so we need to insert some | |||
| // code here to ensure it is up-to-date. | |||
| if (!static_cast<ComputingGraphImpl*>(owner_graph()) | |||
| if (!ComputingGraphImpl::downcast(owner_graph()) | |||
| ->eager_eval_manager() | |||
| .enabled()) { | |||
| VarNodeArray vars_to_set; | |||
| auto cg = static_cast<ComputingGraphImpl*>(owner_graph()); | |||
| auto cg = ComputingGraphImpl::downcast(owner_graph()); | |||
| auto step_cur = cg->opr_step_num_in_cur_comp_seq(this).val(); | |||
| mgb_assert(step_cur < std::numeric_limits<size_t>::max()); | |||
| using DT = NodeProp::DepType; | |||
| @@ -264,7 +264,7 @@ VarNode* OperatorNodeBase::add_output(const Maybe<std::string> &name) { | |||
| mgb_assert(!m_inserted_in_graph && !m_node_prop.valid(), | |||
| "add output on opr after it has been inserted into graph"); | |||
| auto ptr = static_cast<ComputingGraphImpl*>( | |||
| auto ptr = ComputingGraphImpl::cast( | |||
| owner_graph())->var_node_pool().alloc( | |||
| name.valid() ? this->name() + ":" + name.val() : name, this); | |||
| m_output.push_back(ptr); | |||
| @@ -676,7 +676,7 @@ void mixin::IOSameShapeOperatorNode::get_output_var_shape( | |||
| void PostExecActions::add(VarNode* var) { | |||
| mgb_assert(m_comp_node == var->comp_node()); | |||
| auto graph = static_cast<ComputingGraphImpl*>(var->owner_graph()); | |||
| auto graph = ComputingGraphImpl::downcast(var->owner_graph()); | |||
| auto&& infer_mgr = graph->static_infer_manager_impl(); | |||
| auto&& extra_info = graph->current_comp_seq_extra_info(); | |||
| @@ -813,7 +813,7 @@ StaticInferManagerImpl::~StaticInferManagerImpl() noexcept { | |||
| m_mem_pool_value_trait.disable_freelist(); | |||
| for (auto &&i: m_dtor_callbacks) | |||
| i.second(); | |||
| for (auto &&i: static_cast<ComputingGraphImpl*>( | |||
| for (auto &&i: ComputingGraphImpl::downcast( | |||
| m_owner_graph)->all_oprs()) { | |||
| for (auto j: i->output()) { | |||
| clear_tag_handler(j); | |||
| @@ -1212,7 +1212,7 @@ class StaticInferManagerImpl::SubgraphStaticInferHelperImpl final: | |||
| void check_graph_par(VarNode *var) { | |||
| if (mgb_unlikely(!m_par_graph)) { | |||
| m_par_graph = static_cast<ComputingGraphImpl*>(var->owner_graph()); | |||
| m_par_graph = ComputingGraphImpl::downcast(var->owner_graph()); | |||
| mgb_assert(m_par_graph != m_sub_graph); | |||
| auto cb = [this]() { | |||
| @@ -1230,7 +1230,7 @@ class StaticInferManagerImpl::SubgraphStaticInferHelperImpl final: | |||
| void check_graph_sub(VarNode *var) { | |||
| if (mgb_unlikely(!m_sub_graph)) { | |||
| m_sub_graph = static_cast<ComputingGraphImpl*>(var->owner_graph()); | |||
| m_sub_graph = ComputingGraphImpl::downcast(var->owner_graph()); | |||
| mgb_assert(m_sub_graph != m_par_graph); | |||
| } else { | |||
| mgb_assert(m_sub_graph == var->owner_graph()); | |||
| @@ -132,7 +132,7 @@ const DeviceTensorND& SymbolVar::eager_eval_get_value() const { | |||
| #if MGB_BUILD_SLIM_SERVING | |||
| mgb_throw(MegBrainError, "eager eval disabled at compile time"); | |||
| #else | |||
| auto og = static_cast<ComputingGraphImpl*>(node()->owner_graph()); | |||
| auto og = ComputingGraphImpl::downcast(node()->owner_graph()); | |||
| mgb_assert(og->options().eager_evaluation); | |||
| return node()->dev_tensor(); | |||
| #endif | |||
| @@ -260,7 +260,7 @@ void TopoSorter::DFSDepDiscover::proc_add_dep_comp_order1() { | |||
| void TopoSorter::DFSDepDiscover::proc_find_missing_inp() { | |||
| auto frame = m_cur_frame; | |||
| auto opr = frame->opr; | |||
| auto&& mgr = static_cast<ComputingGraphImpl*>(opr->owner_graph()) | |||
| auto&& mgr = ComputingGraphImpl::downcast(opr->owner_graph()) | |||
| ->static_infer_manager_impl(); | |||
| auto&& missing_inp = frame->missing_inputs; | |||
| @@ -233,12 +233,12 @@ bool VarNode::set_fwd_in2out_readonly( | |||
| if (owner_graph()->options().imperative_proxy_graph) { | |||
| return false; | |||
| } | |||
| return static_cast<ComputingGraphImpl*>(owner_graph()) | |||
| return ComputingGraphImpl::downcast(owner_graph()) | |||
| ->var_node_mem_manager().fwd_in2out_readonly(input, sub, this); | |||
| } | |||
| VarNode& VarNode::set_fwd_in2out_writable(VarNode *input) { | |||
| static_cast<ComputingGraphImpl*>(owner_graph()) | |||
| ComputingGraphImpl::downcast(owner_graph()) | |||
| ->var_node_mem_manager().fwd_in2out_writable(input, this); | |||
| return *this; | |||
| } | |||
| @@ -246,20 +246,20 @@ VarNode& VarNode::set_fwd_in2out_writable(VarNode *input) { | |||
| VarNode& VarNode::set_fwd_in2out_writable_force(VarNode *input) { | |||
| mgb_assert(!owner_graph()->options().imperative_proxy_graph); | |||
| static_cast<ComputingGraphImpl*>(owner_graph()) | |||
| ComputingGraphImpl::downcast(owner_graph()) | |||
| ->var_node_mem_manager().fwd_in2out_writable_force(input, this); | |||
| return *this; | |||
| } | |||
| VarNode& VarNode::add_layout_constraint(LayoutConstraintCallback callback) { | |||
| static_cast<ComputingGraphImpl*>(owner_graph()) | |||
| ComputingGraphImpl::downcast(owner_graph()) | |||
| ->var_node_mem_manager().add_layout_constraint( | |||
| this, std::move(callback)); | |||
| return *this; | |||
| } | |||
| VarNode& VarNode::add_layout_constraint_contiguous() { | |||
| static_cast<ComputingGraphImpl*>(owner_graph()) | |||
| ComputingGraphImpl::downcast(owner_graph()) | |||
| ->var_node_mem_manager() | |||
| .add_layout_constraint_level( | |||
| this, VarNodeMemManager::LayoutConstraintLevel::CONTIG); | |||
| @@ -267,7 +267,7 @@ VarNode& VarNode::add_layout_constraint_contiguous() { | |||
| } | |||
| VarNode& VarNode::add_layout_constraint_monotone() { | |||
| static_cast<ComputingGraphImpl*>(owner_graph()) | |||
| ComputingGraphImpl::downcast(owner_graph()) | |||
| ->var_node_mem_manager() | |||
| .add_layout_constraint_level( | |||
| this, VarNodeMemManager::LayoutConstraintLevel::MONOTONE); | |||
| @@ -315,7 +315,7 @@ VarNode& VarNode::shape_alloc(const TensorShape &shape) { | |||
| "shape_alloc() could only be used for vars with" | |||
| " NO_SYS_MEM_ALLOC flag; actual var: %s", | |||
| cg::dump_var_info({this}).c_str()); | |||
| static_cast<ComputingGraphImpl*>(owner_graph()) | |||
| ComputingGraphImpl::downcast(owner_graph()) | |||
| ->var_node_mem_manager().var_alloc_with_shape(this, shape); | |||
| return *this; | |||
| } | |||
| @@ -330,7 +330,7 @@ bool VarNode::reset_dev_tensor_from_other_var(VarNode* src_var) { | |||
| "dynamic storage on src is required for dynamic readonly " | |||
| "forwarding: vars=%s", | |||
| dump_var_info({src_var, this}).c_str()); | |||
| auto&& trait = static_cast<ComputingGraphImpl*>(owner_graph()) | |||
| auto&& trait = ComputingGraphImpl::downcast(owner_graph()) | |||
| ->var_node_mem_manager() | |||
| .get_var_node_mem_trait_at(src_var); | |||
| if (trait.seq_force_update_dest || | |||
| @@ -403,7 +403,7 @@ std::shared_ptr<json::Value> VarNode::to_json() const { | |||
| return json::Null::make(); | |||
| }; | |||
| auto &&trait = static_cast<ComputingGraphImpl*>(owner_graph() | |||
| auto &&trait = ComputingGraphImpl::downcast(owner_graph() | |||
| )->var_node_mem_manager().get_var_node_mem_trait(this); | |||
| auto flag = json::Array::make(); | |||
| { | |||
| @@ -459,7 +459,7 @@ std::shared_ptr<json::Value> VarNode::to_json() const { | |||
| #endif | |||
| MemAllocPlan& VarNode::init_mem_plan(const DeviceTensorND* fixed_alloc) { | |||
| static_cast<ComputingGraphImpl*>(owner_graph()) | |||
| ComputingGraphImpl::downcast(owner_graph()) | |||
| ->var_node_mem_manager() | |||
| .init_single_var_mem_plan(this, fixed_alloc); | |||
| return m_mem_plan; | |||
| @@ -477,7 +477,7 @@ void VarNode::modify_flag(Flag delta, Flag new_flag) { | |||
| Flag::NO_SYS_STATIC_MEM_ALLOC | | |||
| Flag::RT_FORCE_DYNAMIC_MEM_ALLOC)) == delta); | |||
| mgb_assert(!static_cast<ComputingGraphImpl*>(owner_graph())-> | |||
| mgb_assert(!ComputingGraphImpl::downcast(owner_graph())-> | |||
| var_node_mem_manager().optimize_started(), | |||
| "could not modify var flags after optimization started"); | |||
| } | |||
| @@ -340,7 +340,7 @@ VarNodeMemManager::DynamicAllocOprInfo::DynamicAllocOprInfo( | |||
| prev_dev_val_input.clear(); | |||
| static_infer_inp.clear(); | |||
| dev_val_input.clear(); | |||
| auto &&mgr = static_cast<ComputingGraphImpl*>(opr->owner_graph())-> | |||
| auto &&mgr = ComputingGraphImpl::downcast(opr->owner_graph())-> | |||
| static_infer_manager_impl(); | |||
| CompNode single_cn; | |||
| @@ -73,7 +73,7 @@ void VarDevMemDefragmenter::defrag(VarNode* req_var, | |||
| const CompNodeInfo& cn_info, | |||
| size_t extra_size) { | |||
| // pause all other comp nodes before calling defrag_impl() | |||
| auto exec_env = static_cast<ComputingGraphImpl*>(req_var->owner_graph()) | |||
| auto exec_env = ComputingGraphImpl::downcast(req_var->owner_graph()) | |||
| ->current_exec_env(); | |||
| mgb_assert(exec_env); | |||
| exec_env->pause_exec(); | |||