You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

common.cpp 11 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. #include "./common.h"
  2. #include <pybind11/operators.h>
  3. #include <pybind11/pytypes.h>
  4. #include "./helper.h"
  5. #include "./numpy_dtypes.h"
  6. #include "megbrain/comp_node.h"
  7. #include "megbrain/graph.h"
  8. #include "megbrain/imperative/physical_tensor.h"
  9. #if MEGDNN_WITH_CUDA
  10. #include "cuda_sm_gen.h"
  11. #endif
  12. namespace py = pybind11;
  13. using namespace mgb;
  14. using namespace imperative;
  15. namespace {
  16. template <typename XTensorND>
  17. auto def_TensorND(py::object parent, const char* name) {
  18. return py::class_<XTensorND>(parent, name)
  19. .def_property_readonly(
  20. "shape", py::overload_cast<>(&XTensorND::shape, py::const_))
  21. .def_property_readonly(
  22. "dtype", py::overload_cast<>(&XTensorND::dtype, py::const_))
  23. .def_property_readonly(
  24. "comp_node", py::overload_cast<>(&XTensorND::comp_node, py::const_))
  25. .def("copy_from", &XTensorND::template copy_from<DeviceTensorStorage>)
  26. .def("copy_from", &XTensorND::template copy_from<HostTensorStorage>)
  27. .def("copy_from_fixlayout",
  28. py::overload_cast<const DeviceTensorND&>(
  29. &XTensorND::template copy_from_fixlayout<DeviceTensorStorage>))
  30. .def("copy_from_fixlayout",
  31. py::overload_cast<const HostTensorND&>(
  32. &XTensorND::template copy_from_fixlayout<HostTensorStorage>));
  33. }
  34. std::string default_device = "xpux";
  35. } // namespace
  36. void set_default_device(const std::string& device) {
  37. default_device = device;
  38. }
  39. std::string get_default_device() {
  40. return default_device;
  41. }
  42. py::handle py_comp_node_type;
  43. void init_common(py::module m) {
  44. auto PyCompNode =
  45. py::class_<CompNode>(m, "CompNode")
  46. .def(py::init())
  47. .def(py::init(
  48. py::overload_cast<const std::string&>(&CompNode::load)))
  49. .def_property_readonly(
  50. "logical_name",
  51. [](const CompNode& cn) { return cn.to_string_logical(); })
  52. .def_property_readonly(
  53. "physical_name",
  54. [](const CompNode& cn) { return cn.to_string_physical(); })
  55. .def_property_readonly(
  56. "get_mem_status_bytes",
  57. [](const CompNode& cn) {
  58. return cn.get_mem_status_bytes();
  59. })
  60. .def_property_readonly(
  61. "get_used_memory",
  62. [](const CompNode& cn) { return cn.get_used_memory(); })
  63. .def_property_readonly(
  64. "get_max_used_memory",
  65. [](const CompNode& cn) { return cn.get_max_used_memory(); })
  66. .def_property_readonly(
  67. "get_reserved_memory",
  68. [](const CompNode& cn) { return cn.get_reserved_memory(); })
  69. .def_property_readonly(
  70. "get_max_reserved_memory",
  71. [](const CompNode& cn) {
  72. return cn.get_max_reserved_memory();
  73. })
  74. .def_static(
  75. "reset_max_memory_stats",
  76. [](const CompNode& cn) {
  77. cn.reset_max_used_memory();
  78. cn.reset_max_reserved_memory();
  79. })
  80. .def("create_event", &CompNode::create_event,
  81. py::arg("flags") = 0ul)
  82. .def_static("_set_default_device", &set_default_device)
  83. .def_static("_get_default_device", &get_default_device)
  84. .def("__str__", &CompNode::to_string_logical)
  85. .def("__repr__",
  86. [](const CompNode& cn) {
  87. return mgb::ssprintf(
  88. "CompNode(\"%s\" from \"%s\")",
  89. cn.to_string_physical().c_str(),
  90. cn.to_string_logical().c_str());
  91. })
  92. .def("__hash__", [](CompNode cn) { return mgb::hash(cn); })
  93. .def_static("_sync_all", &CompNode::sync_all)
  94. .def(py::self == py::self)
  95. .def_static(
  96. "_get_device_count", &CompNode::get_device_count,
  97. "Get total number of specific devices on this system")
  98. .def(py::pickle(
  99. [](const CompNode& cn) {
  100. return py::str(cn.to_string_logical());
  101. },
  102. [](py::str cn) { return CompNode::load(cn); }));
  103. py_comp_node_type = PyCompNode.inc_ref();
  104. py::class_<CompNode::Event, std::shared_ptr<CompNode::Event>>(PyCompNode, "Event")
  105. .def("record", &CompNode::Event::record)
  106. .def("wait", &CompNode::Event::host_wait);
  107. py::implicitly_convertible<std::string, CompNode>();
  108. py::class_<CompNode::DeviceProperties>(m, "DeviceProperties")
  109. .def(py::init())
  110. .def_property_readonly(
  111. "name",
  112. [](const CompNode::DeviceProperties prop) { return prop.name; })
  113. .def_property_readonly(
  114. "total_memory",
  115. [](const CompNode::DeviceProperties prop) {
  116. return prop.total_memory;
  117. })
  118. .def_property_readonly(
  119. "major",
  120. [](const CompNode::DeviceProperties prop) { return prop.major; })
  121. .def_property_readonly("minor", [](const CompNode::DeviceProperties prop) {
  122. return prop.minor;
  123. });
  124. def_TensorND<DeviceTensorND>(m, "DeviceTensorND")
  125. .def("numpy", [](const DeviceTensorND& self) {
  126. HostTensorND hv;
  127. hv.copy_from(self).sync();
  128. return py::handle(
  129. npy::ndarray_from_tensor(hv, npy::ShareType::TRY_SHARE));
  130. });
  131. def_TensorND<HostTensorND>(m, "HostTensorND")
  132. .def(py::init([](py::array data, CompNode cn, DType dtype) {
  133. if (!cn.valid()) {
  134. throw py::type_error("device must not be None");
  135. }
  136. return npy::np2tensor(data.ptr(), npy::Meth::borrow(cn), dtype);
  137. }))
  138. .def("numpy", [](const HostTensorND& self) {
  139. return py::reinterpret_steal<py::object>(
  140. npy::ndarray_from_tensor(self, npy::ShareType::TRY_SHARE));
  141. });
  142. py::class_<cg::OperatorNodeConfig>(m, "OperatorNodeConfig")
  143. .def(py::init())
  144. .def_property(
  145. "name",
  146. [](const OperatorNodeConfig& config) -> py::object {
  147. auto name = config.name();
  148. if (name.valid()) {
  149. return py::str(name.val());
  150. } else {
  151. return py::none();
  152. }
  153. },
  154. [](OperatorNodeConfig& config, std::string name) {
  155. config.name(std::move(name));
  156. })
  157. .def_property(
  158. "dtype",
  159. [](const OperatorNodeConfig& config) {
  160. return config.output_dtype();
  161. },
  162. [](OperatorNodeConfig& config, DType dtype) {
  163. config.output_dtype(dtype);
  164. })
  165. .def_property(
  166. "comp_node_arr",
  167. [](const OperatorNodeConfig& config) -> py::tuple {
  168. auto arr = config.comp_node();
  169. std::vector<CompNode> tmp(arr.begin(), arr.end());
  170. return py::cast(tmp);
  171. },
  172. [](OperatorNodeConfig& config, std::vector<CompNode> cns) {
  173. config.comp_node_arr({cns.begin(), cns.end()});
  174. })
  175. .def_property(
  176. "comp_node",
  177. [](const OperatorNodeConfig& config) {
  178. auto arr = config.comp_node();
  179. if (arr.size() != 1) {
  180. throw py::value_error("invalid number of comp_node");
  181. }
  182. return arr[0];
  183. },
  184. [](OperatorNodeConfig& config, CompNode cn) {
  185. OperatorNodeConfig::CompNodeArray arr{cn};
  186. config.comp_node_arr(arr);
  187. });
  188. py::class_<LogicalTensorDesc>(m, "TensorAttr")
  189. .def(py::init())
  190. .def(py::init([](const TensorShape& shape, const DType& dtype,
  191. const CompNode& comp_node) {
  192. return LogicalTensorDesc{TensorLayout{shape, dtype}, comp_node};
  193. }))
  194. .def_property(
  195. "shape",
  196. [](const LogicalTensorDesc& desc) {
  197. return static_cast<TensorShape>(desc.layout);
  198. },
  199. [](LogicalTensorDesc& desc, TensorShape shape) {})
  200. .def_property(
  201. "dtype",
  202. [](const LogicalTensorDesc& desc) { return desc.layout.dtype; },
  203. [](LogicalTensorDesc& desc, DType dtype) {
  204. desc.layout.dtype = dtype;
  205. })
  206. .def_readwrite("comp_node", &LogicalTensorDesc::comp_node);
  207. py::enum_<CompNode::DeviceType>(m, "DeviceType")
  208. .value("UNSPEC", CompNode::DeviceType::UNSPEC)
  209. .value("CUDA", CompNode::DeviceType::CUDA)
  210. .value("ROCM", CompNode::DeviceType::ROCM)
  211. .value("CPU", CompNode::DeviceType::CPU)
  212. .value("CAMBRICON", CompNode::DeviceType::CAMBRICON)
  213. .value("ATLAS", CompNode::DeviceType::ATLAS)
  214. .value("MULTITHREAD", CompNode::DeviceType::MULTITHREAD)
  215. .value("MAX_DEVICE_ID", CompNode::DeviceType::MAX_DEVICE_ID);
  216. m.def("set_prealloc_config", &CompNode::set_prealloc_config,
  217. "specifies how to pre-allocate from raw dev allocator");
  218. m.def("get_device_prop", &CompNode::get_device_prop);
  219. m.def("get_supported_sm_versions", []() {
  220. #if MEGDNN_WITH_CUDA
  221. static const char* mge_gen_code = MGE_CUDA_GENCODE;
  222. #else
  223. static const char* mge_gen_code = "-1";
  224. #endif
  225. return mge_gen_code;
  226. });
  227. m.def("what_is_xpu",
  228. [] { return CompNode::Locator::parse("xpux").to_physical().type; });
  229. init_npy_num_bfloat16(m);
  230. init_npy_num_intbx(m);
  231. init_dtypes(m);
  232. }