You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graph_rt.cpp 29 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652
  1. /**
  2. * \file imperative/python/src/graph_rt.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "./graph_rt.h"
  12. #include "megbrain/graph/cg.h"
  13. #include "megbrain/serialization/serializer.h"
  14. #include "megbrain/imperative/opr_utility.h"
  15. #include "megbrain/opr/io.h"
  16. #include "megbrain/opr/utility.h"
  17. #include "megbrain/opr/basic_arith.h"
  18. #include "megbrain/imperative.h"
  19. #include "./helper.h"
  20. #include "megbrain/plugin/profiler.h"
  21. #include "./common.h"
  22. #include "./ops.h"
  23. #include "megbrain/gopt/inference.h"
  24. #include "megbrain/imperative/profiler_plugin.h"
  25. namespace py = pybind11;
  26. using namespace mgb;
  27. using namespace imperative;
  28. namespace ser = mgb::serialization;
  29. using _OptimizeForInferenceOptions = mgb::gopt::OptimizeForInferenceOptions;
  30. using _LayoutTransform = _OptimizeForInferenceOptions::LayoutTransform;
  31. using _AlgoStrategy = opr::mixin::AlgoChooserHelper::ExecutionPolicy::Strategy;
  32. using _SerializationMetadata = mgb::serialization::Metadata;
  33. namespace {
  34. class _CompGraphProfilerImpl {
  35. std::shared_ptr<ComputingGraph> m_comp_graph;
  36. GraphProfiler m_profiler;
  37. public:
  38. _CompGraphProfilerImpl(std::shared_ptr<ComputingGraph> cg):
  39. m_comp_graph{cg},
  40. m_profiler{m_comp_graph.get()}
  41. {
  42. }
  43. std::string _get_result() {
  44. auto json = m_profiler.to_json_full(
  45. m_comp_graph->current_comp_seq());
  46. return json->to_string();
  47. }
  48. };
  49. struct WeakRendezvousArray:
  50. public std::vector<std::weak_ptr<RendezvousBase>>,
  51. public UserDataContainer::UserData {
  52. MGB_TYPEINFO_OBJ_DECL;
  53. };
  54. MGB_TYPEINFO_OBJ_IMPL(WeakRendezvousArray);
  55. }
  56. #define DEF_READWRITE(name) .def_readwrite(#name, &CURRENT_CLASS::name)
  57. template<typename T>
  58. auto def_rendezvous(py::object m, const char* name) {
  59. return py::class_<Rendezvous<T>, std::shared_ptr<Rendezvous<T>>>(m, name)
  60. .def(py::init([](){return Rendezvous<T>::make();}))
  61. .def("set", [](Rendezvous<T>& r, T v) {r.set(std::move(v));})
  62. .def("get", [](Rendezvous<T>& r) {return r.get();}, py::call_guard<py::gil_scoped_release>())
  63. .def("drop", &Rendezvous<T>::drop)
  64. .def("reset", &Rendezvous<T>::reset)
  65. .def("set_exception", [](Rendezvous<T>& r, std::string&& message) {
  66. r.set_exception(std::make_exception_ptr(
  67. std::runtime_error(std::move(message))));
  68. });
  69. }
  70. using TensorAttr = LogicalTensorDesc;
  71. using HostNDWithEvent = std::pair<HostTensorND, std::shared_ptr<CompNode::Event>>;
  72. std::vector<mgb::cg::VarNode*> _replace_vars(const std::vector<mgb::cg::VarNode*>& repl_src,
  73. const std::vector<mgb::cg::VarNode*>& repl_dst,
  74. const std::vector<mgb::cg::VarNode*>& vars) {
  75. mgb::ThinHashMap<SymbolVar, SymbolVar> varmap;
  76. for (size_t i = 0; i < repl_src.size(); ++i) {
  77. varmap[SymbolVar(repl_src[i])] = SymbolVar(repl_dst[i]);
  78. }
  79. SymbolVarArray symvars(vars.begin(), vars.end());
  80. auto sym_result = mgb::cg::replace_vars(symvars, varmap);
  81. std::vector<mgb::cg::VarNode*> result;
  82. for (auto symvar : sym_result){
  83. result.push_back(symvar.node());
  84. }
  85. return result;
  86. }
  87. typedef std::vector<mgb::cg::OperatorNodeBase*> OperatorArray;
  88. std::vector<mgb::cg::VarNode*> _replace_oprs(const OperatorArray& repl_src,
  89. const OperatorArray& repl_dst,
  90. const std::vector<mgb::cg::VarNode*>& vars) {
  91. mgb::ThinHashMap<mgb::cg::OperatorNodeBase*, mgb::cg::OperatorNodeBase*>
  92. oprmap;
  93. for (size_t i = 0; i < repl_src.size(); ++i) {
  94. oprmap[repl_src[i]] = repl_dst[i];
  95. }
  96. const SymbolVarArray symvars(vars.begin(), vars.end());
  97. auto sym_result = mgb::cg::replace_oprs(symvars, oprmap);
  98. std::vector<mgb::cg::VarNode*> result;
  99. for (auto symvar : sym_result){
  100. result.push_back(symvar.node());
  101. }
  102. return result;
  103. }
  104. void _set_priority_to_id(const std::vector<mgb::cg::VarNode*>& dest_vars) {
  105. auto on_opr = [](mgb::cg::OperatorNodeBase* opr) {
  106. if (opr->node_prop().attribute().priority == 0) {
  107. opr->node_prop().attribute().priority = opr->id();
  108. }
  109. };
  110. mgb::cg::DepOprIter dep_iter{on_opr};
  111. for (const auto& var : dest_vars) {
  112. dep_iter.add(SymbolVar(var));
  113. }
  114. }
  115. void init_graph_rt(py::module m) {
  116. static const std::unique_ptr<mgb::OprFootprint> _imperative_sm_opr_footprint_ptr{std::make_unique<mgb::OprFootprint>()};
  117. def_rendezvous<DeviceTensorND>(m, "DeviceTensorNDRendezvous");
  118. def_rendezvous<HostNDWithEvent>(m, "HostTensorNDRendezvous");
  119. def_rendezvous<TensorAttr>(m, "TensorAttrRendezvous");
  120. py::class_<cg::VarNode, GraphNodePtr<cg::VarNode>>(m, "VarNode")
  121. .def_property_readonly("owner", [](cg::VarNode* v) {return v->owner_opr();})
  122. .def_property_readonly("graph", [](cg::VarNode* v) {return v->owner_graph();})
  123. .def_property("name", py::overload_cast<>(&VarNode::name, py::const_),
  124. py::overload_cast<std::string>(&VarNode::name))
  125. .def_property_readonly("dtype", [](cg::VarNode* v) {return v->dtype();})
  126. .def_property_readonly("comp_node", [](cg::VarNode* v) {return v->comp_node();})
  127. .def_property_readonly("shape", [](cg::VarNode* v) -> const TensorShape* {
  128. auto&& mgr = v->owner_graph()->static_infer_manager();
  129. return mgr.infer_shape_fallible(v);
  130. })
  131. .def_property_readonly("value", [](cg::VarNode* v) -> py::object {
  132. auto&& mgr = v->owner_graph()->static_infer_manager();
  133. auto&& type = mgr.get_infer_type(v);
  134. using InferType = cg::static_infer::InferType;
  135. if (!(type.value & (InferType::CONST | InferType::RT_STATIC))) {
  136. return py::none();
  137. }
  138. auto* val = mgr.infer_value_fallible(v);
  139. if (!val) {
  140. return py::none();
  141. }
  142. return py::cast(*val).attr("numpy")();
  143. })
  144. .def_property_readonly("id",[](cg::VarNode* v){
  145. return (v->id());
  146. })
  147. .def("__repr__", [](cg::VarNode* v) {
  148. return "Var:" + v->name();
  149. });
  150. py::class_<cg::OperatorNodeBase, GraphNodePtr<cg::OperatorNodeBase>>(m, "OperatorNode")
  151. .def_property_readonly("graph", [](cg::OperatorNodeBase* opr) {return opr->owner_graph();})
  152. .def_property("name", py::overload_cast<>(&cg::OperatorNodeBase::name, py::const_),
  153. py::overload_cast<std::string>(&cg::OperatorNodeBase::name))
  154. .def_property_readonly("inputs", [](cg::OperatorNodeBase* opr) {
  155. return to_tuple(opr->input());
  156. })
  157. .def_property_readonly("outputs", [](cg::OperatorNodeBase* opr) {
  158. return to_tuple(opr->usable_output());
  159. })
  160. .def_property_readonly("id",[](cg::OperatorNodeBase* opr){
  161. return opr->id();
  162. })
  163. .def_property_readonly("params",[](cg::OperatorNodeBase* opr){
  164. return _imperative_sm_opr_footprint_ptr->calc_footprint(opr).param->to_string();
  165. })
  166. .def_property_readonly("type",[](cg::OperatorNodeBase* opr){
  167. return opr->dyn_typeinfo()->name;
  168. })
  169. .def("__repr__", [](cg::OperatorNodeBase* opr){
  170. return "Opr:" + opr->name();
  171. })
  172. .def_property("priority",
  173. [](cg::OperatorNodeBase* opr) {
  174. return opr->node_prop().attribute().priority;
  175. },
  176. [](cg::OperatorNodeBase* opr, int priority) {
  177. opr->node_prop().attribute().priority = priority;
  178. });
  179. py::class_<cg::AsyncExecutable>(m, "AsyncExecutable")
  180. .def("execute", &cg::AsyncExecutable::execute, py::call_guard<py::gil_scoped_release>())
  181. .def("wait", &cg::AsyncExecutable::wait, py::call_guard<py::gil_scoped_release>())
  182. .def("get_prev_exec_time", &cg::AsyncExecutable::get_prev_exec_time, py::call_guard<py::gil_scoped_release>())
  183. .def("_to_json", [](cg::AsyncExecutable* exec) {
  184. py::call_guard<py::gil_scoped_release>();
  185. // dump currently compiled computing graph for debugging
  186. return exec->to_json()->to_string();
  187. })
  188. // only used for exception handle
  189. .def_property_readonly("_all_rendezvous", [](cg::AsyncExecutable* exec) {
  190. auto ud = exec->owner_graph()->options().user_data
  191. .get_user_data<WeakRendezvousArray>();
  192. std::vector<std::shared_ptr<RendezvousBase>> ret;
  193. if (ud.second) {
  194. for (auto&& r: *ud.first[0]) {
  195. if (auto p = r.lock()) {
  196. ret.emplace_back(std::move(p));
  197. }
  198. }
  199. }
  200. return ret;
  201. })
  202. .def("get_static_memory_alloc_info",
  203. &cg::AsyncExecutable::get_static_memory_alloc_info,
  204. py::call_guard<py::gil_scoped_release>());
  205. auto PyComputingGraph = py::class_<cg::ComputingGraph, std::shared_ptr<cg::ComputingGraph>>(m, "ComputingGraph")
  206. .def(py::init(py::overload_cast<>(&cg::ComputingGraph::make)))
  207. .def("compile", [](cg::ComputingGraph& graph, const std::vector<cg::VarNode*>& dest_vars) {
  208. mgb_assert(!dest_vars.empty());
  209. cg::ComputingGraph::OutputSpec spec;
  210. for (auto v : dest_vars) {
  211. spec.emplace_back(v, nullptr);
  212. }
  213. return graph.compile(spec);
  214. })
  215. .def_property_readonly("options", py::overload_cast<>(&cg::ComputingGraph::options));
  216. py::class_<_CompGraphProfilerImpl, std::shared_ptr<_CompGraphProfilerImpl>>(m, "GraphProfiler")
  217. .def(py::init([](std::shared_ptr<ComputingGraph> graph) {
  218. return std::make_shared<_CompGraphProfilerImpl>(graph);
  219. }))
  220. .def("get", [](_CompGraphProfilerImpl& profiler) { return profiler._get_result(); });
  221. using interpreter::intl::ProfilerPlugin;
  222. py::class_<ProfilerPlugin, std::shared_ptr<ProfilerPlugin>>(m, "GraphProfiler2")
  223. .def(py::init<cg::ComputingGraph*>());
  224. auto GraphOptimizeOptions = py::class_<_OptimizeForInferenceOptions>(m, "GraphOptimizeOptions")
  225. .def(py::init())
  226. .def("serialize", &_OptimizeForInferenceOptions::serialize)
  227. .def_static("deserialize", &_OptimizeForInferenceOptions::deserialize)
  228. .def_readwrite("f16_io_f32_comp", &_OptimizeForInferenceOptions::f16_io_f32_comp)
  229. .def_readwrite("f16_io_comp", &_OptimizeForInferenceOptions::f16_io_comp)
  230. .def_readwrite("fuse_conv_bias_nonlinearity", &_OptimizeForInferenceOptions::fuse_conv_bias_nonlinearity)
  231. .def_readwrite("fuse_conv_bias_with_z", &_OptimizeForInferenceOptions::fuse_conv_bias_with_z)
  232. .def_readwrite("fuse_preprocess", &_OptimizeForInferenceOptions::fuse_preprocess)
  233. .def_readwrite("layout_transform", &_OptimizeForInferenceOptions::layout_transform)
  234. ;
  235. py::enum_<_LayoutTransform>(GraphOptimizeOptions, "LayoutTransform")
  236. .value("DEFAULT", _LayoutTransform::DEFAULT)
  237. .value("NCHW4", _LayoutTransform::NCHW4)
  238. .value("NHWCD4", _LayoutTransform::NHWCD4)
  239. .value("NCHW88", _LayoutTransform::NCHW88)
  240. .value("NCHW44", _LayoutTransform::NCHW44)
  241. .value("NCHW44_DOT", _LayoutTransform::NCHW44_DOT)
  242. .value("NCHW32", _LayoutTransform::NCHW32)
  243. .value("CHWN4", _LayoutTransform::CHWN4)
  244. .value("NCHW64", _LayoutTransform::NCHW64)
  245. .export_values()
  246. ;
  247. m.def("optimize_for_inference", [](const VarNodeArray& dest_vars, const _OptimizeForInferenceOptions& opt) {
  248. SymbolVarArray symvars(dest_vars.begin(), dest_vars.end());
  249. auto res_symvars = mgb::gopt::optimize_for_inference(symvars, opt);
  250. VarNodeArray vars;
  251. for (auto& si: res_symvars)
  252. vars.push_back(si.node());
  253. return vars;
  254. });
  255. m.def("modify_opr_algo_strategy_inplace", [](const VarNodeArray& dest_vars,
  256. const _AlgoStrategy& strategy) {
  257. mgb::gopt::modify_opr_algo_strategy_inplace(dest_vars, strategy);
  258. });
  259. m.def("get_info_for_strip", [](const std::vector<VarNode*>& dest_vars) {
  260. std::unordered_set<const char*> opr_types, dtype_names, elemwise_modes;
  261. auto on_opr = [&](cg::OperatorNodeBase *opr) {
  262. if (ser::GraphDumper::should_remove_in_dump(opr))
  263. return;
  264. opr_types.insert(opr->dyn_typeinfo()->name);
  265. for (auto i : opr->output())
  266. dtype_names.insert(i->dtype().name());
  267. if (opr->same_type<opr::Elemwise>()) {
  268. auto mode = opr->cast_final<opr::Elemwise>().param().mode;
  269. elemwise_modes.insert(
  270. megdnn::Elemwise::ModeTrait::from_mode(mode).name);
  271. }
  272. };
  273. cg::DepOprIter opr_iter{on_opr};
  274. for (auto i : dest_vars)
  275. opr_iter.add(i->owner_opr());
  276. auto to_json = [](const std::unordered_set<const char*> &v) {
  277. std::vector<std::string> vs(v.begin(), v.end());
  278. std::sort(vs.begin(), vs.end());
  279. auto ret = json::Array::make();
  280. for (auto &&i : vs)
  281. ret->add(json::String::make(i));
  282. return ret;
  283. };
  284. return json::Object::make({
  285. {"opr_types", to_json(opr_types)},
  286. {"dtypes", to_json(dtype_names)},
  287. {"elemwise_modes", to_json(elemwise_modes)},
  288. })->to_string();
  289. });
  290. py::class_<_SerializationMetadata>(m, "SerializationMetadata")
  291. .def(py::init())
  292. .def_property("user_info", [](const _SerializationMetadata& meta){return py::bytes(meta.get_user_info()); },
  293. &_SerializationMetadata::set_user_info)
  294. .def_readonly("optimized_for_inference", &_SerializationMetadata::optimized_for_inference)
  295. .def_property("optimize_options", &_SerializationMetadata::get_optimize_options,
  296. &_SerializationMetadata::set_optimize_options)
  297. .def_readwrite("graph_modified", &_SerializationMetadata::graph_modified)
  298. .def_readwrite("is_valid", &_SerializationMetadata::is_valid)
  299. ;
  300. m.def("dump_graph", [](
  301. const std::vector<VarNode*>& dest_vars,
  302. int keep_var_name,
  303. bool keep_opr_name,
  304. bool keep_param_name,
  305. bool keep_opr_priority,
  306. std::optional<_SerializationMetadata> metadata,
  307. py::list& stat,
  308. py::list& inputs,
  309. py::list& outputs,
  310. py::list& params
  311. ) {
  312. std::vector<uint8_t> buf;
  313. auto dumper = ser::GraphDumper::make(ser::OutputFile::make_vector_proxy(&buf));
  314. SymbolVarArray symvars(dest_vars.begin(), dest_vars.end());
  315. ser::GraphDumper::DumpConfig config{keep_var_name, keep_param_name,
  316. keep_opr_priority, keep_opr_name};
  317. ser::GraphDumper::DumpResult rst;
  318. if (metadata)
  319. rst = dumper->dump(symvars, config, *metadata);
  320. else
  321. rst = dumper->dump(symvars, config);
  322. for (auto i : rst.inputs) {
  323. inputs.append(py::cast(i));
  324. }
  325. for (auto i : rst.outputs) {
  326. outputs.append(py::cast(i));
  327. }
  328. for (auto i : rst.params) {
  329. params.append(py::cast(i));
  330. }
  331. auto rst_stat =
  332. std::vector{rst.nr_opr, rst.tot_bytes, rst.tensor_value_bytes,
  333. static_cast<size_t>(rst.content_hash)};
  334. for (auto i : rst_stat) {
  335. stat.append(py::cast(i));
  336. }
  337. return py::bytes(reinterpret_cast<const char*>(&buf[0]), buf.size());
  338. });
  339. m.def("load_graph", [](
  340. std::string& buf,
  341. py::list& output_var_map,
  342. py::list& output_var_list
  343. ) {
  344. auto file = ser::InputFile::make_mem_proxy(buf.c_str(), buf.length());
  345. auto format = ser::GraphLoader::identify_graph_dump_format(*file);
  346. auto loader = ser::GraphLoader::make(std::move(file), format.val());
  347. ser::GraphLoader::LoadConfig config;
  348. auto rst = loader->load(config);
  349. for (auto i : rst.output_var_map) {
  350. output_var_map.append(py::make_tuple(i.first, i.second.node()));
  351. }
  352. for (auto i : rst.output_var_list) {
  353. output_var_list.append(i.node());
  354. }
  355. std::unordered_map<HostTensorND*, const std::string*> tensor2name;
  356. for (const auto& pair : rst.tensor_map) {
  357. tensor2name[pair.second.get()] = &pair.first;
  358. }
  359. auto cb = [&tensor2name, graph=rst.graph](cg::OperatorNodeBase* opr) {
  360. if (!opr->same_type<opr::Host2DeviceCopy>())
  361. return;
  362. auto& h2d = opr->cast_final_safe<opr::Host2DeviceCopy>();
  363. auto it = tensor2name.find(h2d.host_data().get());
  364. mgb_throw_if(it == tensor2name.end(), GraphError,
  365. "unbound Host2DeviceCopy in loaded graph");
  366. h2d.output(0)->name(*it->second);
  367. };
  368. cg::DepOprIter iter{cb};
  369. for (const auto& var : rst.output_var_list) {
  370. iter.add(var);
  371. }
  372. auto ret = py::tuple(2);
  373. ret[0] = py::cast(rst.graph);
  374. ret[1] = py::cast(rst.metadata);
  375. return ret;
  376. });
  377. #define CURRENT_CLASS cg::ComputingGraph::Options
  378. auto PyComputingGraphOptions = py::class_<cg::ComputingGraph::Options>(PyComputingGraph, "Options")
  379. // DEF_READWRITE(opr_attribute)
  380. DEF_READWRITE(seq_opt)
  381. DEF_READWRITE(graph_opt)
  382. DEF_READWRITE(graph_opt_level)
  383. DEF_READWRITE(log_level)
  384. DEF_READWRITE(async_exec_level)
  385. DEF_READWRITE(force_dynamic_alloc)
  386. DEF_READWRITE(var_sanity_check_first_run)
  387. DEF_READWRITE(allocate_static_mem_after_graph_compile)
  388. DEF_READWRITE(fake_next_exec)
  389. DEF_READWRITE(enable_sublinear_memory_opt)
  390. DEF_READWRITE(enable_dtr_memory_opt)
  391. DEF_READWRITE(no_profiling_on_shape_change)
  392. DEF_READWRITE(enable_var_mem_defragment)
  393. DEF_READWRITE(enable_grad_var_static_reshape)
  394. DEF_READWRITE(enable_memory_swap)
  395. DEF_READWRITE(comp_node_seq_record_level)
  396. DEF_READWRITE(no_force_inplace)
  397. DEF_READWRITE(sublinear_mem_config)
  398. DEF_READWRITE(dtr_config)
  399. // DEF_READWRITE(eager_evaluation)
  400. // DEF_READWRITE(imperative_proxy_graph)
  401. // DEF_READWRITE(extra_vardeps)
  402. // DEF_READWRITE(user_data)
  403. ;
  404. #undef CURRENT_CLASS
  405. #define CURRENT_CLASS cg::ComputingGraph::Options::SeqOpt
  406. py::class_<cg::ComputingGraph::Options::SeqOpt>(PyComputingGraphOptions, "SeqOpt")
  407. DEF_READWRITE(enable_mem_plan_opt)
  408. DEF_READWRITE(enable_mem_reuse_alloc)
  409. DEF_READWRITE(enable_seq_comp_node_opt);
  410. #undef CURRENT_CLASS
  411. #define CURRENT_CLASS cg::ComputingGraph::Options::GraphOpt
  412. auto PyGraphOpt = py::class_<cg::ComputingGraph::Options::GraphOpt>(
  413. PyComputingGraphOptions, "GraphOpt")
  414. DEF_READWRITE(jit)
  415. DEF_READWRITE(jit_config)
  416. DEF_READWRITE(tensorrt);
  417. #undef CURRENT_CLASS
  418. #define CURRENT_CLASS cg::ComputingGraph::Options::GraphOpt::JITConfig
  419. py::class_<cg::ComputingGraph::Options::GraphOpt::JITConfig>(PyGraphOpt, "JITConfig")
  420. DEF_READWRITE(fuse_dimshuffle)
  421. DEF_READWRITE(fuse_reduce);
  422. #undef CURRENT_CLASS
  423. #define CURRENT_CLASS cg::ComputingGraph::Options::SublinearMemConfig
  424. py::class_<cg::ComputingGraph::Options::SublinearMemConfig>(PyComputingGraphOptions, "SublinearMemConfig")
  425. DEF_READWRITE(thresh_nr_try)
  426. DEF_READWRITE(genetic_nr_iter)
  427. DEF_READWRITE(genetic_pool_size)
  428. DEF_READWRITE(lb_memory_mb)
  429. DEF_READWRITE(num_worker);
  430. #undef CURRENT_CLASS
  431. #define CURRENT_CLASS cg::ComputingGraph::Options::DTRConfig
  432. py::class_<cg::ComputingGraph::Options::DTRConfig>(PyComputingGraphOptions, "DTRConfig")
  433. DEF_READWRITE(eviction_threshold)
  434. DEF_READWRITE(evictee_minimum_size);
  435. #undef CURRENT_CLASS
  436. auto common = rel_import("common", m, 1);
  437. common.def("invoke_op", [](const OpDef& def, const std::vector<cg::VarNode*> inputs, cg::ComputingGraph* graph) {
  438. cg::VarNodeArray vinputs(inputs.begin(), inputs.end());
  439. return to_tuple(OpDef::apply_on_var_node(def, vinputs));
  440. },
  441. py::arg(), py::arg(), py::arg("graph") = py::none());
  442. auto input_callback = [](auto callback,
  443. const CompNode& comp_node,
  444. const DType& dtype,
  445. const TensorShape& shape,
  446. const std::vector<cg::VarNode*>& inputs,
  447. cg::ComputingGraph* graph,
  448. bool use_static_shape) {
  449. if (!graph) {
  450. graph = inputs[0]->owner_graph();
  451. }
  452. SymbolVarArray sinputs;
  453. for (auto i : inputs) {
  454. sinputs.emplace_back(i);
  455. }
  456. static_assert(!std::is_reference<decltype(callback)>::value);
  457. auto soutputs = opr::InputCallback::make(*graph, std::move(callback),
  458. comp_node, dtype, shape,
  459. sinputs, use_static_shape);
  460. std::vector<VarNode*> outputs;
  461. outputs.reserve(soutputs.size());
  462. for (auto i : soutputs) {
  463. outputs.push_back(i.node());
  464. }
  465. return outputs;
  466. };
  467. m.def("make_shared", [](cg::ComputingGraph* graph, const DeviceTensorND& data) {
  468. return opr::SharedDeviceTensor::make(*graph, std::make_shared<DeviceTensorND>(data)).node();
  469. });
  470. m.def("make_const", [](cg::ComputingGraph* graph, py::array data, CompNode cn, DType dtype, std::optional<std::string> name) {
  471. if (!cn.valid()) {
  472. cn = CompNode::load(get_default_device());
  473. }
  474. OperatorNodeConfig config(cn);
  475. if (name) {
  476. config.name(*name);
  477. }
  478. auto hv = npy::np2tensor(data.ptr(), npy::Meth::borrow(cn), dtype);
  479. return opr::ImmutableTensor::make(*graph, hv, config).node();
  480. }, py::arg(), py::arg(), py::arg(), py::arg(), py::arg() = py::none());
  481. m.def("make_h2d", [](cg::ComputingGraph& graph, CompNode cn, DType dtype, TensorShape shape, std::optional<std::string> name) {
  482. if (!cn.valid()) {
  483. throw py::type_error("device must be valid");
  484. }
  485. if (!dtype.valid()) {
  486. throw py::type_error("dtype must be valid");
  487. }
  488. OperatorNodeConfig config;
  489. if (name) {
  490. config.name(*name);
  491. }
  492. return opr::Host2DeviceCopy::make(graph, std::make_shared<HostTensorND>(cn, shape, dtype), config).node();
  493. }, py::arg(), py::arg(), py::arg(), py::arg() = py::none(), py::arg() = py::none());
  494. m.def("_replace_vars", &_replace_vars,py::arg(),py::arg(),py::arg());
  495. m.def("_replace_oprs", &_replace_oprs,py::arg(),py::arg(),py::arg());
  496. m.def("_set_priority_to_id",&_set_priority_to_id,py::arg());
  497. m.def("input_callback", [input_callback](std::function<DeviceTensorND(void)> callback,
  498. const CompNode& comp_node,
  499. const DType& dtype,
  500. const TensorShape& shape,
  501. const std::vector<cg::VarNode*>& inputs,
  502. cg::ComputingGraph* graph,
  503. bool use_static_shape) {
  504. return input_callback(
  505. [f=std::move(callback)](){py::gil_scoped_acquire _; return f();},
  506. comp_node, dtype, shape, inputs, graph, use_static_shape);
  507. },
  508. py::arg(), py::arg(), py::arg(), py::arg() = py::none(), py::arg() = py::tuple(),
  509. py::arg("graph") = py::none(), py::arg("use_static_shape") = false);
  510. m.def("input_callback", [input_callback](std::shared_ptr<Rendezvous<DeviceTensorND>> p,
  511. const CompNode& comp_node,
  512. const DType& dtype,
  513. const TensorShape& shape,
  514. const std::vector<cg::VarNode*>& inputs,
  515. cg::ComputingGraph* graph,
  516. bool use_static_shape) {
  517. auto f = [p]() -> DeviceTensorND {
  518. return p->get();
  519. };
  520. return input_callback(std::move(f), comp_node, dtype, shape, inputs, graph, use_static_shape);
  521. },
  522. py::arg(), py::arg(), py::arg(), py::arg() = py::none(), py::arg() = py::tuple(),
  523. py::arg("graph") = py::none(), py::arg("use_static_shape") = false);
  524. auto output_callback = [](auto callback, const std::vector<cg::VarNode*>& inputs,
  525. std::shared_ptr<RendezvousBase> r = {}, bool borrow = false, bool prefer_host_value = false) {
  526. if (r) {
  527. mgb_assert(inputs.size());
  528. auto cg = inputs[0]->owner_graph();
  529. cg->options().user_data.get_user_data_or_create<WeakRendezvousArray>()
  530. ->emplace_back(r);
  531. }
  532. SymbolVarArray sinputs;
  533. for (auto i : inputs) {
  534. sinputs.emplace_back(i);
  535. }
  536. static_assert(!std::is_reference<decltype(callback)>::value);
  537. opr::OutputCallback::Param param{std::move(callback), borrow, prefer_host_value};
  538. auto output = opr::OutputCallback::make(std::move(param), sinputs);
  539. return output.node();
  540. };
  541. m.def("output_callback", [output_callback](std::function<void(DeviceTensorND)> callback, std::vector<cg::VarNode*> inputs) {
  542. auto f = [f=std::move(callback)](DeviceTensorND dv) {
  543. auto task = [f=std::move(f), dv=std::move(dv)]() {
  544. f(dv);
  545. };
  546. py_task_q.add_task(std::move(task));
  547. };
  548. return output_callback(std::move(f), std::move(inputs));
  549. });
  550. m.def("output_callback", [output_callback](std::shared_ptr<Rendezvous<DeviceTensorND>> p, std::vector<cg::VarNode*> inputs) {
  551. auto f = [p](DeviceTensorND dv) {
  552. p->set(std::move(dv));
  553. };
  554. return output_callback(std::move(f), std::move(inputs), p);
  555. });
  556. m.def("value_output_callback", [output_callback](std::shared_ptr<Rendezvous<HostNDWithEvent>> p, std::vector<cg::VarNode*> inputs) {
  557. auto f = [p](DeviceTensorND dv) {
  558. HostNDWithEvent hv_with_event;
  559. hv_with_event.first.copy_from(dv);
  560. hv_with_event.second = dv.comp_node().create_event();
  561. hv_with_event.second->record();
  562. p->set(std::move(hv_with_event));
  563. };
  564. return output_callback(std::move(f), std::move(inputs), p, true, true);
  565. });
  566. m.def("attr_output_callback", [output_callback](std::shared_ptr<Rendezvous<TensorAttr>> p, std::vector<cg::VarNode*> inputs) {
  567. auto f = [p](DeviceTensorND dv) {
  568. p->set(TensorAttr{TensorLayout{dv.shape(), dv.dtype()}, dv.comp_node()});
  569. };
  570. return output_callback(std::move(f), std::move(inputs), p, true);
  571. });
  572. m.def("virtual_dep", [](std::vector<cg::VarNode*> inputs, std::string device) {
  573. auto&& graph = inputs[0]->owner_graph();
  574. VarNodeArray inps(inputs.begin(), inputs.end());
  575. cg::OperatorNodeConfig config;
  576. if (device.length() > 0) {
  577. config.comp_node(CompNode::load(device));
  578. }
  579. cg::OperatorNodeBase* opr = graph->insert_opr(
  580. std::make_unique<mgb::opr::VirtualDep>(inps, config));
  581. return opr;
  582. });
  583. }

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台