You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

imperative_rt.cpp 3.9 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. #include "./imperative_rt.h"
  2. #include <future>
  3. #include <variant>
  4. #include <unordered_map>
  5. #include <pybind11/numpy.h>
  6. #include <pybind11/operators.h>
  7. #include "megbrain/imperative.h"
  8. #include "megbrain/imperative/interpreter.h"
  9. #include "megbrain/imperative/ops/opr_attr.h"
  10. #include "./helper.h"
  11. namespace py = pybind11;
  12. using namespace mgb;
  13. using namespace imperative;
  14. using namespace interpreter;
  15. namespace {
  16. std::optional<std::tuple<std::shared_ptr<OpDef>, std::vector<bool>, std::vector<bool>>>
  17. make_backward_graph(
  18. const OpDef& opdef, std::vector<LogicalTensorDesc> inputs,
  19. std::vector<bool> input_requires_grad,
  20. std::vector<bool> output_has_grad) {
  21. auto res = OpDef::make_backward_graph(opdef,
  22. SmallVector<LogicalTensorDesc>(inputs.begin(), inputs.end()),
  23. SmallVector<bool>(input_requires_grad.begin(), input_requires_grad.end()),
  24. SmallVector<bool>(output_has_grad.begin(), output_has_grad.end()));
  25. if (res.backward) {
  26. return std::optional<std::tuple<std::shared_ptr<OpDef>, std::vector<bool>, std::vector<bool>>>{
  27. std::in_place, res.backward, res.save_for_backward, res.input_has_grad};
  28. } else {
  29. return {};
  30. }
  31. }
  32. } // namespace
  33. void init_imperative_rt(py::module m) {
  34. py::class_<Interpreter::Channel>(m, "Interpreter")
  35. .def("put", [](Interpreter::Channel& self, py::array data, DType dtype, CompNode cn) {
  36. if (!cn.valid()) {
  37. cn = CompNode::load("xpux");
  38. }
  39. constexpr int size_threshhold = TensorShape::MAX_NDIM;
  40. if (data.size() > size_threshhold) {
  41. return self.put(npy::np2tensor(data.ptr(), npy::Meth::borrow(cn), dtype));
  42. } else {
  43. HostTensorND ret(cn);
  44. return self.put(npy::np2tensor(data.ptr(), npy::Meth::copy_into(&ret), dtype));
  45. }
  46. }, py::arg(), py::arg("dtype") = py::none(), py::arg("device") = py::none())
  47. .def("delete", [](Interpreter::Channel& self, Interpreter::Handle handle) {
  48. return self.del(handle);
  49. })
  50. .def("get_value", [](Interpreter::Channel& self, Interpreter::Handle handle) {
  51. PyObject* optr = npy::ndarray_from_tensor(self.get_value(handle), npy::ShareType::TRY_SHARE);
  52. return py::reinterpret_steal<py::object>(optr);
  53. })
  54. .def("get_dtype", &Interpreter::Channel::get_dtype)
  55. .def("get_device", &Interpreter::Channel::get_device)
  56. .def("get_shape", &Interpreter::Channel::get_shape)
  57. .def("_get_dev_tensor", &Interpreter::Channel::get_dev_tensor)
  58. .def("apply_op", &Interpreter::Channel::apply_op)
  59. .def("sync", &Interpreter::Channel::sync);
  60. std::unique_ptr<Interpreter::Channel> ch = Interpreter::inst().create_channel();
  61. m.attr("interpreter") = py::detail::make_caster<decltype(ch)>::cast(
  62. std::move(ch), py::return_value_policy::move, {});
  63. for (auto name : {"put", "delete", "get_value", "get_dtype", "get_device", "get_shape", "_get_dev_tensor", "apply_op"}) {
  64. m.attr(name) = m.attr("interpreter").attr(name);
  65. }
  66. m.def("sync", [m]() {
  67. m.attr("interpreter").attr("sync")();
  68. py_task_q.wait_all_task_finish();
  69. });
  70. m.def("make_backward_graph", &make_backward_graph);
  71. py::class_<OpDef, std::shared_ptr<OpDef>>(m, "OpDef")
  72. .def("ctype", [](const OpDef& opdef) {
  73. if (auto attr = opdef.try_cast_final<OprAttr>()) {
  74. return attr->type.c_str();
  75. }
  76. return opdef.dyn_typeinfo()->name;
  77. })
  78. .def("__eq__", [](const OpDef& lhs, const OpDef& rhs) {
  79. return lhs.is_same(rhs);
  80. })
  81. .def("__hash__", &OpDef::hash);
  82. }

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台