You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

tensor.cpp 9.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. /**
  2. * \file imperative/python/src/tensor.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "./tensor.h"
  12. #include "./grad.h"
  13. #include "./common.h"
  14. #include "./numpy_dtypes.h"
  15. #include <pybind11/numpy.h>
  16. #include <pybind11/operators.h>
  17. #include "./helper.h"
  18. namespace py = pybind11;
  19. namespace mgb::imperative::python {
  20. std::unique_ptr<interpreter::Interpreter::Channel> interpreter_for_py;
  21. apply_result_t apply(ApplyContext& ctx) {
  22. // emulating scalar should be put to specific op's apply, e.g.,
  23. // elementwise, reduce, typecvt. Currently it's still handled at python
  24. // side. It could be move to C++ side if it has an impact on performance
  25. if (ctx.flags & Tensor::Flags::SCALAR) {
  26. // TODO: emulate scalar
  27. }
  28. if (ctx.flags & Tensor::Flags::GRAD) {
  29. return apply_grad(ctx);
  30. }
  31. if (ctx.flags & Tensor::Flags::TRACE) {
  32. // TODO: trace
  33. } else {
  34. SmallVector<interpreter::Interpreter::Handle> handles(ctx.nargs);
  35. for (size_t i = 0; i < ctx.nargs; ++i) {
  36. handles[i] = ctx.args[i]->m_handle.get();
  37. }
  38. auto output_handles = interpreter_for_py->apply_op(ctx.op, handles);
  39. apply_result_t outputs;
  40. outputs.reserve(output_handles.size());
  41. for (auto h : output_handles) {
  42. outputs.emplace_back(std::make_shared<Tensor>(h));
  43. }
  44. return outputs;
  45. }
  46. mgb_assert(0);
  47. }
  48. PyObject* py_apply(PyObject* self, PyObject*const* args, size_t nargs/* , PyObject* kwnames */) {
  49. try {
  50. // if (kwnames && PyTuple_GET_SIZE(kwnames)) {
  51. // PyErr_SetString(PyExc_TypeError, "keyword argument not allowed");
  52. // return nullptr;
  53. // }
  54. if (!nargs) {
  55. PyErr_SetString(PyExc_TypeError, "expect Op");
  56. return nullptr;
  57. }
  58. auto* op = args[0];
  59. PyTypeObject* pytype = args[1]->ob_type;
  60. ++args;
  61. --nargs;
  62. ApplyContext ctx;
  63. ctx.flags = 0;
  64. ctx.op = py::handle(op).cast<std::shared_ptr<OpDef>>();
  65. SmallVector<Tensor*, 64> tensors(nargs);
  66. ctx.args = &tensors[0];
  67. ctx.nargs = nargs;
  68. for (size_t i = 0; i < nargs; ++i) {
  69. TensorWrapper* tw = TensorWrapper::cast_safe(args[i]);
  70. if (!tw) {
  71. PyErr_SetString(PyExc_TypeError, "expect Tensor");
  72. return nullptr;
  73. }
  74. auto* t = tensors[i] = tw->m_tensor.get();
  75. ctx.flags |= t->m_flags;
  76. }
  77. // TODO: set TRACE flag
  78. auto outputs = apply(ctx);
  79. size_t nout = outputs.size();
  80. auto ret = py::tuple(nout);
  81. for (size_t i = 0; i < nout; ++i) {
  82. ret[i] = TensorWrapper::make(pytype, std::move(outputs[i]));
  83. }
  84. return ret.release().ptr();
  85. } catch (std::exception& e) {
  86. PyErr_SetString(PyExc_RuntimeError, e.what());
  87. return nullptr;
  88. }
  89. }
  90. TensorWrapper::TensorWrapper(PyObject* args, PyObject* kwargs) {
  91. if (kwargs && PyDict_Size(kwargs)) {
  92. throw py::type_error("keyword argument not allowed");
  93. }
  94. auto nargs = PyTuple_Size(args);
  95. auto tup = py::reinterpret_borrow<py::tuple>(args);
  96. if (nargs == 0) {
  97. throw py::type_error("too few arguments");
  98. }
  99. if (auto* t = cast_safe(tup[0].ptr())) {
  100. if (nargs > 1) {
  101. throw py::type_error("expect 1 argument");
  102. }
  103. m_tensor = t->m_tensor;
  104. } else {
  105. if (nargs != 3) {
  106. throw py::type_error("expect 3 arguments");
  107. }
  108. py::detail::loader_life_support life_sup; // required to cast DType
  109. auto data = tup[0].cast<py::array>();
  110. DType dtype = tup[1].cast<DType>();
  111. CompNode cn = tup[2].cast<CompNode>();
  112. interpreter::Interpreter::Handle handle;
  113. constexpr auto size_threshhold = TensorShape::MAX_NDIM;
  114. if (data.size() > size_threshhold) {
  115. handle = interpreter_for_py->put(npy::np2tensor(data.ptr(), npy::Meth::borrow(cn), dtype));
  116. } else {
  117. HostTensorND ret(cn);
  118. handle = interpreter_for_py->put(npy::np2tensor(data.ptr(), npy::Meth::copy_into(&ret), dtype));
  119. }
  120. m_tensor = std::make_shared<Tensor>(handle);
  121. if (data.ndim() == 0) {
  122. m_tensor->m_flags |= Tensor::Flags::SCALAR;
  123. }
  124. }
  125. }
  126. PyObject* TensorWrapper::shape() {
  127. if (m_tensor->m_flags & Tensor::Flags::SCALAR) {
  128. return PyTuple_New(0);
  129. }
  130. auto&& shape = m_tensor->shape();
  131. if (!shape.ndim) {
  132. Py_RETURN_NONE;
  133. }
  134. py::tuple ret(shape.ndim);
  135. for (size_t i = 0; i < shape.ndim; ++i) {
  136. ret[i] = shape[i];
  137. }
  138. return ret.release().ptr();
  139. }
  140. PyObject* TensorWrapper::dtype() {
  141. return py::cast(m_tensor->dtype()).release().ptr();
  142. }
  143. PyObject* TensorWrapper::device() {
  144. return py::cast(m_tensor->comp_node()).release().ptr();
  145. }
  146. PyObject* TensorWrapper::numpy() {
  147. auto&& hv = interpreter_for_py->get_value(m_tensor->m_handle.get());
  148. auto arr = py::reinterpret_steal<py::array>(npy::ndarray_from_tensor(hv, npy::ShareType::TRY_SHARE));
  149. if (!arr) return nullptr;
  150. if (m_tensor->m_flags & Tensor::Flags::SCALAR) {
  151. mgb_assert(PyArray_Check(arr.ptr()));
  152. return PyArray_Squeeze(reinterpret_cast<PyArrayObject*>(arr.ptr()));
  153. }
  154. return arr.release().ptr();
  155. }
  156. void TensorWrapper::reset(PyObject* tensor) {
  157. TensorWrapper* t = TensorWrapper::cast_safe(tensor);
  158. if (!t) {
  159. throw py::type_error("expect Tensor");
  160. }
  161. m_tensor = t->m_tensor;
  162. }
  163. PyObject* TensorWrapper::detach() {
  164. PyObject* self = wrap_t::pycast(this);
  165. PyTypeObject* pytype = self->ob_type;
  166. auto new_tensor = std::make_shared<Tensor>(m_tensor->m_handle);
  167. auto ret = TensorWrapper::make(pytype, std::move(new_tensor));
  168. return ret.release().ptr();
  169. }
  170. PyObject* TensorWrapper::_dev_tensor(){
  171. auto dev_tensor = interpreter_for_py->get_dev_tensor(m_tensor->m_handle.get());
  172. return py::cast(dev_tensor).release().ptr();
  173. }
  174. void TensorWrapper::_swap_out() {
  175. interpreter_for_py->swap_out(m_tensor->m_handle.get());
  176. }
  177. void TensorWrapper::_swap_in() {
  178. interpreter_for_py->swap_in(m_tensor->m_handle.get());
  179. }
  180. void TensorWrapper::_drop() {
  181. interpreter_for_py->drop(m_tensor->m_handle.get());
  182. }
  183. PyObject* TensorWrapper::isscalar() {
  184. if(m_tensor->m_flags & Tensor::Flags::SCALAR) {
  185. Py_RETURN_TRUE;
  186. } else {
  187. Py_RETURN_FALSE;
  188. }
  189. }
  190. void TensorWrapper::setscalar() {
  191. m_tensor->m_flags |= Tensor::Flags::SCALAR;
  192. }
  193. struct TensorWeakRef {
  194. std::weak_ptr<Tensor> wptr;
  195. TensorWeakRef(const TensorWrapper& tw) : wptr(tw.m_tensor) {}
  196. py::object operator()() {
  197. if (auto p = wptr.lock()) {
  198. return TensorWrapper::make(p);
  199. }
  200. return py::none();
  201. }
  202. };
  203. void init_tensor(py::module m) {
  204. interpreter_for_py = interpreter::Interpreter::inst().create_channel();
  205. auto* tensor_type = TensorWrapper::wrap_t::type()
  206. .def<&TensorWrapper::numpy>("numpy")
  207. .def_getset<&TensorWrapper::shape>("shape")
  208. .def_getset<&TensorWrapper::dtype>("dtype")
  209. .def_getset<&TensorWrapper::device>("device")
  210. .def<&TensorWrapper::reset>("_reset")
  211. .def<&TensorWrapper::isscalar>("isscalar")
  212. .def<&TensorWrapper::setscalar>("setscalar")
  213. .def<&TensorWrapper::detach>("detach")
  214. .def<&TensorWrapper::_dev_tensor>("_dev_tensor")
  215. .def<&TensorWrapper::_swap_out>("_swap_out")
  216. .def<&TensorWrapper::_swap_in>("_swap_in")
  217. .def<&TensorWrapper::_drop>("_drop")
  218. .finalize();
  219. if (!tensor_type) throw py::error_already_set();
  220. py::setattr(m, "Tensor", tensor_type);
  221. py::class_<TensorWeakRef>(m, "TensorWeakRef")
  222. .def(py::init<const TensorWrapper&>())
  223. .def("__call__", &TensorWeakRef::operator());
  224. static PyMethodDef apply_def{"apply", (PyCFunction)py_apply, METH_FASTCALL, nullptr};
  225. auto* apply_func = PyCFunction_NewEx(&apply_def, nullptr, nullptr);
  226. if (!apply_func) throw py::error_already_set();
  227. py::setattr(m, "apply", apply_func);
  228. m.def("_set_swap_flag",
  229. [](bool flag) { interpreter_for_py->set_swap_flag(flag); });
  230. m.def("_set_drop_flag",
  231. [](bool flag) { interpreter_for_py->set_drop_flag(flag); });
  232. m.def("config_async_level",
  233. [](int level) { interpreter_for_py->config_async_level(level); });
  234. m.def("get_async_level",
  235. []() { return interpreter_for_py->get_async_level(); });
  236. m.def("sync",
  237. []() {
  238. interpreter_for_py->sync();
  239. py_task_q.wait_all_task_finish();
  240. },
  241. py::call_guard<py::gil_scoped_release>());
  242. py::handle grad_key_type = GradKeyWrapper::wrap_t::type()
  243. .def<&GradKeyWrapper::attach>("attach")
  244. .finalize();
  245. if (!grad_key_type) throw py::error_already_set();
  246. py::setattr(m, "GradKey", grad_key_type);
  247. py::setattr(m, "backward", py::cpp_function(&GradKeyWrapper::backward));
  248. }
  249. } // namespace mgb::imperative::python

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台