You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graph_rt.cpp 8.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. /**
  2. * \file imperative/python/src/graph_rt.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "./graph_rt.h"
  12. #include "megbrain/imperative/opr_utility.h"
  13. #include "megbrain/opr/basic_arith.h"
  14. #include "megbrain/imperative.h"
  15. #include "./helper.h"
  16. namespace py = pybind11;
  17. using namespace mgb;
  18. using namespace imperative;
  19. #define DEF_READWRITE(name) .def_readwrite(#name, &CURRENT_CLASS::name)
  20. template<typename T>
  21. auto def_rendezvous(py::object m, const char* name) {
  22. return py::class_<Rendezvous<T>, std::shared_ptr<Rendezvous<T>>>(m, name)
  23. .def(py::init([](){return std::make_shared<Rendezvous<T>>();}))
  24. .def("set", [](Rendezvous<T>& r, T v) {r.set(std::move(v));})
  25. .def("get", [](Rendezvous<T>& r) {return r.get();}, py::call_guard<py::gil_scoped_release>())
  26. .def("reset", &Rendezvous<T>::reset);
  27. }
  28. using TensorAttr = LogicalTensorDesc;
  29. void init_graph_rt(py::module m) {
  30. def_rendezvous<DeviceTensorND>(m, "DeviceTensorNDRendezvous");
  31. def_rendezvous<TensorAttr>(m, "TensorAttrRendezvous");
  32. py::class_<cg::VarNode, GraphNodePtr<cg::VarNode>>(m, "VarNode")
  33. .def_property_readonly("owner", [](cg::VarNode* v) {return v->owner_opr();})
  34. .def_property_readonly("graph", [](cg::VarNode* v) {return v->owner_graph();})
  35. .def_property_readonly("dtype", [](cg::VarNode* v) {return v->dtype();})
  36. .def_property_readonly("comp_node", [](cg::VarNode* v) {return v->comp_node();});
  37. py::class_<cg::OperatorNodeBase, GraphNodePtr<cg::OperatorNodeBase>>(m, "OperatorNode")
  38. .def_property_readonly("graph", [](cg::OperatorNodeBase* opr) {return opr->owner_graph();})
  39. .def_property_readonly("inputs", [](cg::OperatorNodeBase* opr) {
  40. return to_tuple(opr->input());
  41. })
  42. .def_property_readonly("outputs", [](cg::OperatorNodeBase* opr) {
  43. return to_tuple(opr->output());
  44. });
  45. py::class_<cg::AsyncExecutable>(m, "AsyncExecutable")
  46. .def("execute", &cg::AsyncExecutable::execute, py::call_guard<py::gil_scoped_release>())
  47. .def("wait", &cg::AsyncExecutable::wait, py::call_guard<py::gil_scoped_release>());
  48. auto PyComputingGraph = py::class_<cg::ComputingGraph, std::shared_ptr<cg::ComputingGraph>>(m, "ComputingGraph")
  49. .def(py::init(py::overload_cast<>(&cg::ComputingGraph::make)))
  50. .def("compile", [](cg::ComputingGraph& graph, const std::vector<cg::VarNode*>& dest_vars) {
  51. mgb_assert(!dest_vars.empty());
  52. cg::ComputingGraph::OutputSpec spec;
  53. for (auto v : dest_vars) {
  54. spec.emplace_back(v, nullptr);
  55. }
  56. return graph.compile(spec);
  57. })
  58. .def_property_readonly("options", py::overload_cast<>(&cg::ComputingGraph::options));
  59. #define CURRENT_CLASS cg::ComputingGraph::Options
  60. auto PyComputingGraphOptions = py::class_<cg::ComputingGraph::Options>(PyComputingGraph, "Options")
  61. // DEF_READWRITE(opr_attribute)
  62. DEF_READWRITE(seq_opt)
  63. DEF_READWRITE(graph_opt)
  64. DEF_READWRITE(graph_opt_level)
  65. DEF_READWRITE(log_level)
  66. DEF_READWRITE(async_exec_level)
  67. DEF_READWRITE(force_dynamic_alloc)
  68. DEF_READWRITE(var_sanity_check_first_run)
  69. DEF_READWRITE(allocate_static_mem_after_graph_compile)
  70. DEF_READWRITE(fake_next_exec)
  71. DEF_READWRITE(enable_sublinear_memory_opt)
  72. DEF_READWRITE(no_profiling_on_shape_change)
  73. DEF_READWRITE(enable_var_mem_defragment)
  74. DEF_READWRITE(enable_grad_var_static_reshape)
  75. DEF_READWRITE(enable_memory_swap)
  76. DEF_READWRITE(comp_node_seq_record_level)
  77. // DEF_READWRITE(eager_evaluation)
  78. // DEF_READWRITE(imperative_proxy_graph)
  79. // DEF_READWRITE(extra_vardeps)
  80. // DEF_READWRITE(user_data)
  81. ;
  82. #undef CURRENT_CLASS
  83. #define CURRENT_CLASS cg::ComputingGraph::Options::SeqOpt
  84. py::class_<cg::ComputingGraph::Options::SeqOpt>(PyComputingGraphOptions, "SeqOpt")
  85. DEF_READWRITE(enable_mem_plan_opt)
  86. DEF_READWRITE(enable_mem_reuse_alloc)
  87. DEF_READWRITE(enable_seq_comp_node_opt);
  88. #undef CURRENT_CLASS
  89. #define CURRENT_CLASS cg::ComputingGraph::Options::GraphOpt
  90. py::class_<cg::ComputingGraph::Options::GraphOpt>(PyComputingGraphOptions, "GraphOpt")
  91. DEF_READWRITE(jit)
  92. DEF_READWRITE(tensorrt);
  93. #undef CURRENT_CLASS
  94. auto common = rel_import("common", m, 1);
  95. common.def("invoke_op", [](const OpDef& def, const std::vector<cg::VarNode*> inputs, cg::ComputingGraph* graph) {
  96. cg::VarNodeArray vinputs(inputs.begin(), inputs.end());
  97. auto opr = OpDef::apply_on_var_node(def, vinputs);
  98. auto outputs = opr->output();
  99. return to_tuple(outputs);
  100. },
  101. py::arg(), py::arg(), py::arg("graph") = py::none());
  102. auto input_callback = [](auto callback,
  103. const CompNode& comp_node,
  104. const DType& dtype,
  105. const std::vector<cg::VarNode*>& inputs,
  106. cg::ComputingGraph* graph) {
  107. if (!graph) {
  108. graph = inputs[0]->owner_graph();
  109. }
  110. SymbolVarArray sinputs;
  111. for (auto i : inputs) {
  112. sinputs.emplace_back(i);
  113. }
  114. static_assert(!std::is_reference<decltype(callback)>::value);
  115. auto soutputs = opr::InputCallback::make(*graph, std::move(callback), comp_node, dtype, sinputs);
  116. std::vector<VarNode*> outputs;
  117. outputs.reserve(soutputs.size());
  118. for (auto i : soutputs) {
  119. outputs.push_back(i.node());
  120. }
  121. return outputs;
  122. };
  123. m.def("input_callback", [input_callback](std::function<DeviceTensorND(void)> callback,
  124. const CompNode& comp_node,
  125. const DType& dtype,
  126. const std::vector<cg::VarNode*>& inputs,
  127. cg::ComputingGraph* graph) {
  128. return input_callback([f=std::move(callback)](){py::gil_scoped_acquire _; return f();}, comp_node, dtype, inputs, graph);
  129. },
  130. py::arg(), py::arg(), py::arg(), py::arg() = py::tuple(), py::arg("graph") = py::none());
  131. m.def("input_callback", [input_callback](std::shared_ptr<Rendezvous<DeviceTensorND>> p,
  132. const CompNode& comp_node,
  133. const DType& dtype,
  134. const std::vector<cg::VarNode*>& inputs,
  135. cg::ComputingGraph* graph) {
  136. auto f = [p]() -> DeviceTensorND {
  137. return p->get();
  138. };
  139. return input_callback(std::move(f), comp_node, dtype, inputs, graph);
  140. },
  141. py::arg(), py::arg(), py::arg(), py::arg() = py::tuple(), py::arg("graph") = py::none());
  142. auto output_callback = [](auto callback, const std::vector<cg::VarNode*>& inputs, bool borrow = false) {
  143. SymbolVarArray sinputs;
  144. for (auto i : inputs) {
  145. sinputs.emplace_back(i);
  146. }
  147. static_assert(!std::is_reference<decltype(callback)>::value);
  148. opr::OutputCallback::Param param{std::move(callback), borrow};
  149. auto output = opr::OutputCallback::make(std::move(param), sinputs);
  150. return output.node();
  151. };
  152. m.def("output_callback", [output_callback](std::function<void(DeviceTensorND)> callback, std::vector<cg::VarNode*> inputs) {
  153. auto f = [f=std::move(callback)](DeviceTensorND dv) {
  154. auto task = [f=std::move(f), dv=std::move(dv)]() {
  155. f(dv);
  156. };
  157. py_task_q.add_task(std::move(task));
  158. };
  159. return output_callback(std::move(f), std::move(inputs));
  160. });
  161. m.def("output_callback", [output_callback](std::shared_ptr<Rendezvous<DeviceTensorND>> p, std::vector<cg::VarNode*> inputs) {
  162. auto f = [p](DeviceTensorND dv) {
  163. p->set(std::move(dv));
  164. };
  165. return output_callback(std::move(f), std::move(inputs));
  166. });
  167. m.def("attr_output_callback", [output_callback](std::shared_ptr<Rendezvous<TensorAttr>> p, std::vector<cg::VarNode*> inputs) {
  168. auto f = [p](DeviceTensorND dv) {
  169. p->set(TensorAttr{TensorLayout{dv.shape(), dv.dtype()}, dv.comp_node()});
  170. };
  171. return output_callback(std::move(f), std::move(inputs), true);
  172. });
  173. }

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台