GitOrigin-RevId: 7c1993979c
tags/v1.9.0
| @@ -285,7 +285,8 @@ struct TensorLayout : public TensorShape { | |||||
| * stride | * stride | ||||
| */ | */ | ||||
| void add_axis_cont_inplace(size_t axis) { | void add_axis_cont_inplace(size_t axis) { | ||||
| add_axis_inplace(axis, 1, stride[axis] * shape[axis]); | |||||
| ptrdiff_t stride_ = axis < ndim ? stride[axis] * shape[axis] : 1; | |||||
| add_axis_inplace(axis, 1, stride_); | |||||
| } | } | ||||
| /*! | /*! | ||||
| @@ -382,7 +382,7 @@ bool TensorLayout::eq_layout(const TensorLayout& rhs) const { | |||||
| MEGDNN_STATIC_ASSERT(MAX_NDIM == 7, "please update the code"); | MEGDNN_STATIC_ASSERT(MAX_NDIM == 7, "please update the code"); | ||||
| auto ax = [](size_t shape0, size_t shape1, ptrdiff_t stride0, ptrdiff_t stride1) { | auto ax = [](size_t shape0, size_t shape1, ptrdiff_t stride0, ptrdiff_t stride1) { | ||||
| return (shape0 == shape1) & ((shape0 == 1) | (stride0 == stride1)); | |||||
| return (shape0 == shape1) & ((shape0 <= 1) | (stride0 == stride1)); | |||||
| }; | }; | ||||
| if (ndim == rhs.ndim) { | if (ndim == rhs.ndim) { | ||||
| size_t eq = 0; | size_t eq = 0; | ||||
| @@ -13,7 +13,8 @@ | |||||
| using namespace megdnn; | using namespace megdnn; | ||||
| const std::shared_ptr<Handle>& megdnn::inplace_cpu_handle(int debug_level) { | |||||
| MGE_WIN_DECLSPEC_FUC const std::shared_ptr<Handle>& megdnn::inplace_cpu_handle( | |||||
| int debug_level) { | |||||
| auto make = [](int deb_level) { | auto make = [](int deb_level) { | ||||
| megcoreDeviceHandle_t dev_handle; | megcoreDeviceHandle_t dev_handle; | ||||
| megcoreCreateDeviceHandle(&dev_handle, megcorePlatformCPU); | megcoreCreateDeviceHandle(&dev_handle, megcorePlatformCPU); | ||||
| @@ -32,6 +32,7 @@ | |||||
| #include "./module_trace.h" | #include "./module_trace.h" | ||||
| #include "./numpy_dtypes.h" | #include "./numpy_dtypes.h" | ||||
| #include "./tensor.h" | #include "./tensor.h" | ||||
| #include "./tensor_utils.h" | |||||
| #include "./transformation.h" | #include "./transformation.h" | ||||
| #include <object.h> | #include <object.h> | ||||
| @@ -549,557 +550,6 @@ CompNode _get_device(PyObject* const* args, size_t nargs) { | |||||
| return cn; | return cn; | ||||
| } | } | ||||
| bool is_scalar(PyObject* tensor) { | |||||
| if (py::isinstance<PySymbolVar>(py::handle(tensor))) { | |||||
| auto var = py::handle(tensor).cast<PySymbolVar*>(); | |||||
| return var->is_scalar; | |||||
| } | |||||
| auto* tw = TensorWrapper::try_cast(tensor); | |||||
| if (tw) { | |||||
| return tw->m_tensor->is_scalar(); | |||||
| } | |||||
| return PyArray_CheckAnyScalar(tensor); | |||||
| } | |||||
| bool is_bool_list(PyObject* arg) { | |||||
| if (!PyList_Check(arg)) { | |||||
| return false; | |||||
| } | |||||
| size_t sz = PyList_Size(arg); | |||||
| if (!sz) { | |||||
| return false; | |||||
| } | |||||
| for (size_t i = 0; i < sz; ++i) { | |||||
| PyObject* handle = PyList_GetItem(arg, i); | |||||
| if (!PyBool_Check(handle)) { | |||||
| return false; | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool is_bool_dtype(PyObject* args) { | |||||
| if (!PyObject_HasAttrString(args, "dtype")) | |||||
| return false; | |||||
| PyObject* dobj = PyObject_GetAttrString(args, "dtype"); | |||||
| PyArray_Descr* dtype; | |||||
| PyArray_DescrConverter(dobj, &dtype); | |||||
| bool ret = (dtype->kind == 'b'); | |||||
| Py_XDECREF(dtype); | |||||
| Py_XDECREF(dobj); | |||||
| return ret; | |||||
| } | |||||
| py::object _Const( | |||||
| py::handle value, py::handle dtype, py::handle device, py::handle ref) { | |||||
| py::object val = py::reinterpret_borrow<py::object>(value); | |||||
| if (PyArray_Check(value.ptr())) { | |||||
| py::tuple strides = | |||||
| py::reinterpret_borrow<py::tuple>(getattr(value, "strides")); | |||||
| bool need_squeeze = false; | |||||
| for (size_t i = 0; i < strides.size(); ++i) { | |||||
| if (strides[i].cast<ptrdiff_t>() == 0) { | |||||
| need_squeeze = true; | |||||
| } | |||||
| } | |||||
| if (need_squeeze) { | |||||
| val = py::reinterpret_borrow<py::array>(value); | |||||
| val = val.attr("squeeze")(); | |||||
| val = val.attr("reshape")(val.attr("shape")); | |||||
| } | |||||
| } | |||||
| if (py::isinstance<PySymbolVar>(ref)) { | |||||
| auto ref_var = ref.cast<PySymbolVar*>(); | |||||
| auto* graph = ref_var->m_node->owner_graph(); | |||||
| auto cn = device.cast<CompNode>(); | |||||
| OperatorNodeConfig config(cn); | |||||
| auto hv = npy::np2tensor( | |||||
| val.ptr(), npy::Meth::borrow(cn), dtype.cast<mgb::DType>()); | |||||
| auto typeobj = ref.get_type(); | |||||
| return typeobj(opr::ImmutableTensor::make(*graph, hv, config).node()); | |||||
| } | |||||
| py::tuple tup = py::make_tuple(val, dtype, device, true, false, py::none()); | |||||
| return TensorWrapper::make(py_tensor_type, tup.ptr(), nullptr); | |||||
| } | |||||
| py::tuple _make_shape_tuple(py::handle shape) { | |||||
| py::list orig; | |||||
| py::list ret(0); | |||||
| auto solve_one = [&](py::handle val) { | |||||
| if (TensorWrapper::try_cast(val.ptr()) || py::isinstance<PySymbolVar>(val)) { | |||||
| py::object np = getattr(val, "numpy")(); | |||||
| PyArrayObject* arr = (PyArrayObject*)np.ptr(); | |||||
| PyObject* maybe_list = PyArray_ToList(arr); | |||||
| if (PyList_Check(maybe_list)) { | |||||
| py::list may = py::reinterpret_steal<py::list>(maybe_list); | |||||
| for (size_t i = 0; i < may.size(); ++i) { | |||||
| ret.append(may[i]); | |||||
| } | |||||
| } else { | |||||
| mgb_assert(PyLong_Check(maybe_list)); | |||||
| ret.append(PyLong_AsLong(maybe_list)); | |||||
| Py_XDECREF(maybe_list); | |||||
| } | |||||
| } else if (PyArray_Check(val.ptr())) { | |||||
| ret.append(PyArray_PyIntAsInt(val.ptr())); | |||||
| } else { | |||||
| ret.append(PyLong_AsLong(val.ptr())); | |||||
| } | |||||
| }; | |||||
| if (PyArray_Check(shape.ptr()) && !PyArray_CheckAnyScalar(shape.ptr())) { | |||||
| orig = py::reinterpret_steal<py::list>( | |||||
| PyArray_ToList((PyArrayObject*)shape.ptr())); | |||||
| for (size_t i = 0; i < orig.size(); ++i) { | |||||
| solve_one(orig[i]); | |||||
| } | |||||
| } else if (PyList_Check(shape.ptr())) { | |||||
| orig = py::reinterpret_borrow<py::list>(shape); | |||||
| for (size_t i = 0; i < orig.size(); ++i) { | |||||
| solve_one(orig[i]); | |||||
| } | |||||
| } else if (PyTuple_Check(shape.ptr())) { | |||||
| py::tuple tup = py::reinterpret_borrow<py::tuple>(shape); | |||||
| for (size_t i = 0; i < tup.size(); ++i) { | |||||
| solve_one(tup[i]); | |||||
| } | |||||
| } else { | |||||
| solve_one(shape); | |||||
| } | |||||
| return py::reinterpret_steal<py::tuple>(PyList_AsTuple(ret.ptr())); | |||||
| } | |||||
| py::object _get_index(py::object tensor, py::object src) { | |||||
| if (!TensorWrapper::try_cast(tensor.ptr()) && | |||||
| !py::isinstance<PySymbolVar>(tensor)) { | |||||
| auto get_const = [&](mgb::DType dtype) -> py::object { | |||||
| return _Const(tensor, py::cast(dtype), src.attr("device"), src); | |||||
| }; | |||||
| if (is_bool_list(tensor.ptr()) || is_bool_dtype(tensor.ptr())) { | |||||
| tensor = get_const(dtype::Bool()); | |||||
| } else { | |||||
| tensor = get_const(dtype::Int32()); | |||||
| } | |||||
| if (!is_bool_dtype(tensor.ptr())) { | |||||
| return tensor; | |||||
| } | |||||
| } else { | |||||
| if (!is_bool_dtype(tensor.ptr())) { | |||||
| return tensor; | |||||
| } | |||||
| } | |||||
| static std::shared_ptr<OpDef> op = CondTake::make(); | |||||
| std::vector<PyObject*> p; | |||||
| p.resize(3); | |||||
| py::object Op = py::cast(op); | |||||
| p[0] = Op.ptr(); | |||||
| p[1] = tensor.ptr(); | |||||
| p[2] = tensor.ptr(); | |||||
| py::tuple ret = | |||||
| py::reinterpret_steal<py::object>(py_apply(NULL, p.data(), p.size())); | |||||
| return ret[1]; | |||||
| } | |||||
| py::tuple _try_cond_take(py::handle tensor, py::handle index) { | |||||
| if (!hasattr(index, "dtype") || !hasattr(index, "shape")) { | |||||
| return py::tuple(); | |||||
| } | |||||
| if (!is_bool_dtype(index.ptr()) || | |||||
| _make_shape_tuple(getattr(index, "shape")) | |||||
| .not_equal(_make_shape_tuple(getattr(tensor, "shape")))) { | |||||
| return py::tuple(); | |||||
| } | |||||
| py::object iobj; | |||||
| if (PyArray_Check(index.ptr())) { | |||||
| iobj = | |||||
| _Const(index, py::cast((mgb::DType)dtype::Bool()), | |||||
| getattr(tensor, "device"), tensor); | |||||
| } else { | |||||
| iobj = py::reinterpret_borrow<py::object>(index); | |||||
| } | |||||
| static std::shared_ptr<OpDef> op = CondTake::make(); | |||||
| std::vector<PyObject*> p; | |||||
| p.resize(3); | |||||
| py::object Op = py::cast(op); | |||||
| p[0] = Op.ptr(); | |||||
| p[1] = tensor.ptr(); | |||||
| p[2] = iobj.ptr(); | |||||
| py::tuple ret = | |||||
| py::reinterpret_steal<py::object>(py_apply(NULL, p.data(), p.size())); | |||||
| return ret; | |||||
| } | |||||
| py::tuple _remove_ellipsis(py::object tensor, py::tuple tuple_val) { | |||||
| size_t tuple_size = tuple_val.size(); | |||||
| size_t ndim_sum = 0, cur_sum = 0; | |||||
| int pos = -1; | |||||
| bool has_unknown_ndim_bool_index = false; | |||||
| for (size_t i = 0; i < tuple_size; ++i) { | |||||
| py::object handle = tuple_val[i]; | |||||
| if (handle.ptr() == Py_Ellipsis) { | |||||
| pos = static_cast<int>(i); | |||||
| for (size_t j = 0; j < i; ++j) { | |||||
| py::object t = tuple_val[j]; | |||||
| if (t.ptr() == Py_Ellipsis) { | |||||
| throw py::index_error("only one ellipsis is allowed."); | |||||
| } | |||||
| } | |||||
| } else { | |||||
| size_t ndim_incr = 1; | |||||
| if (hasattr(handle, "dtype") && is_bool_dtype(handle.ptr()) && | |||||
| hasattr(handle, "ndim")) { | |||||
| py::object ndim = getattr(handle, "ndim"); | |||||
| if (PyLong_Check(ndim.ptr())) { | |||||
| ndim_incr = PyLong_AsLong(ndim.ptr()); | |||||
| } else { | |||||
| has_unknown_ndim_bool_index = true; | |||||
| } | |||||
| } | |||||
| cur_sum += ndim_incr; | |||||
| } | |||||
| } | |||||
| if (pos == -1) { | |||||
| return tuple_val; | |||||
| } else { | |||||
| if (has_unknown_ndim_bool_index) { | |||||
| throw py::index_error( | |||||
| "does not support bool index with unknown shape when using " | |||||
| "Ellipsis."); | |||||
| } | |||||
| try { | |||||
| ndim_sum = getattr(tensor, "ndim").cast<size_t>(); | |||||
| } catch (py::error_already_set& err) { | |||||
| throw py::index_error( | |||||
| "does not support Ellipsis when tensor's ndim is unknown."); | |||||
| } | |||||
| py::tuple ret(ndim_sum - cur_sum + tuple_size - 1); | |||||
| size_t idx = 0; | |||||
| for (size_t i = 0; i < tuple_size; ++i) { | |||||
| if (i == pos) { | |||||
| for (size_t j = cur_sum; j < ndim_sum; ++j) { | |||||
| ret[idx++] = PySlice_New(NULL, NULL, NULL); | |||||
| } | |||||
| } else { | |||||
| ret[idx++] = tuple_val[i]; | |||||
| } | |||||
| } | |||||
| return ret; | |||||
| } | |||||
| } | |||||
| py::tuple _expand_bool_dim(py::object tensor, py::tuple tuple_val) { | |||||
| py::tuple cur_shape = _make_shape_tuple(py::handle(getattr(tensor, "shape"))); | |||||
| py::list new_tuple_val(0); | |||||
| size_t offset = 0; | |||||
| size_t tdim = 0; | |||||
| for (size_t i = 0; i < tuple_val.size(); ++i) { | |||||
| py::handle k = tuple_val[i]; | |||||
| if (is_bool_dtype(k.ptr())) { | |||||
| size_t ndim = getattr(k, "ndim").cast<size_t>(); | |||||
| if (ndim > 1) { | |||||
| py::tuple ishape = _make_shape_tuple(py::handle(getattr(k, "shape"))); | |||||
| for (size_t j = 0; j < ndim; ++j) { | |||||
| if (cur_shape[tdim + j - offset].cast<size_t>() != | |||||
| ishape[j].cast<size_t>()) { | |||||
| std::string msg = | |||||
| "boolean index did not match tensor along dimension " + | |||||
| std::to_string(tdim + j) + "; dimension is " + | |||||
| std::to_string( | |||||
| cur_shape[tdim + j - offset].cast<size_t>()) + | |||||
| " but corresponding boolean dimension is " + | |||||
| std::to_string(ishape[j].cast<size_t>()); | |||||
| throw py::index_error(msg.c_str()); | |||||
| } | |||||
| } | |||||
| py::object new_k = getattr(k, "reshape")(-1); | |||||
| py::object kshape = getattr(new_k, "shape"); | |||||
| py::list new_shape(0); | |||||
| PyObject* sym = PyObject_CallObject(cpp_use_symbolic_shape, nullptr); | |||||
| bool is_sym = (sym == Py_True); | |||||
| Py_XDECREF(sym); | |||||
| if (is_sym) { | |||||
| py::object tshape = getattr(tensor, "shape"); | |||||
| for (size_t j = 0; j < i; ++j) { | |||||
| new_shape.append(tshape[py::int_(j)]); | |||||
| } | |||||
| new_shape.append(kshape[py::int_(0)]); | |||||
| for (size_t j = tdim + ndim - offset; j < cur_shape.size(); ++j) { | |||||
| new_shape.append(cur_shape[j]); | |||||
| } | |||||
| py::tuple args = py::make_tuple(new_shape); | |||||
| PyObject* shape_tensor = | |||||
| PyObject_CallObject(cpp_astensor1d, args.ptr()); | |||||
| py::object reshape_func = getattr(tensor, "reshape"); | |||||
| Py_INCREF(shape_tensor); | |||||
| PyObject* Args = PyTuple_New(1); | |||||
| PyTuple_SetItem(Args, 0, shape_tensor); | |||||
| PyObject* new_tensor = | |||||
| PyObject_CallObject(reshape_func.ptr(), Args); | |||||
| Py_XDECREF(Args); | |||||
| tensor = py::reinterpret_steal<py::object>(new_tensor); | |||||
| cur_shape = _make_shape_tuple(py::handle(shape_tensor)); | |||||
| Py_XDECREF(shape_tensor); | |||||
| } else { | |||||
| for (size_t j = 0; j < i; ++j) { | |||||
| new_shape.append(cur_shape[j]); | |||||
| } | |||||
| new_shape.append(py::reinterpret_borrow<py::tuple>(kshape)[0]); | |||||
| for (size_t j = tdim + ndim - offset; j < cur_shape.size(); ++j) { | |||||
| new_shape.append(cur_shape[j]); | |||||
| } | |||||
| cur_shape = new_shape; | |||||
| tensor = getattr(tensor, "reshape")(cur_shape); | |||||
| } | |||||
| offset++; | |||||
| tdim += ndim; | |||||
| } | |||||
| new_tuple_val.append(k); | |||||
| } else { | |||||
| new_tuple_val.append(k); | |||||
| tdim++; | |||||
| } | |||||
| } | |||||
| return py::make_tuple(tensor, py::reinterpret_borrow<py::tuple>(new_tuple_val)); | |||||
| } | |||||
| py::tuple _unpack_indexes(py::handle inp_hdl, py::handle idx_hdl) { | |||||
| py::object inp = py::reinterpret_borrow<py::object>(inp_hdl); | |||||
| py::tuple tuple_val; | |||||
| if (py::isinstance<py::tuple>(idx_hdl)) { | |||||
| tuple_val = py::reinterpret_borrow<py::tuple>(idx_hdl); | |||||
| } else { | |||||
| tuple_val = py::make_tuple(idx_hdl); | |||||
| } | |||||
| bool use_subtensor = true; | |||||
| bool need_remove_ellipsis = false; | |||||
| bool need_expand_bool_dim = false; | |||||
| size_t idx_ndim = 0; | |||||
| for (size_t i = 0; i < tuple_val.size(); ++i) { | |||||
| py::object k = tuple_val[i]; | |||||
| if (k.ptr() == Py_None) { | |||||
| throw py::index_error("newaxis is not allowed here"); | |||||
| } else if (k.ptr() == Py_Ellipsis) { | |||||
| need_remove_ellipsis = true; | |||||
| } else { | |||||
| if (is_bool_dtype(k.ptr()) && hasattr(k, "ndim")) { | |||||
| size_t ndim = getattr(k, "ndim").cast<size_t>(); | |||||
| idx_ndim += ndim; | |||||
| if (ndim > 1) { | |||||
| need_expand_bool_dim = true; | |||||
| } | |||||
| } else { | |||||
| idx_ndim++; | |||||
| } | |||||
| } | |||||
| } | |||||
| try { | |||||
| size_t inp_ndim = getattr(inp, "ndim").cast<size_t>(); | |||||
| if (idx_ndim > inp_ndim) { | |||||
| std::string msg = "too many indices for tensor: tensor is " + | |||||
| std::to_string(inp_ndim) + "-dimensional, but " + | |||||
| std::to_string(idx_ndim) + " were indexed"; | |||||
| throw py::index_error(msg.c_str()); | |||||
| } | |||||
| } catch (py::error_already_set& err) { | |||||
| ; // ignore | |||||
| } | |||||
| if (need_remove_ellipsis) { | |||||
| tuple_val = _remove_ellipsis(inp, tuple_val); | |||||
| } | |||||
| if (need_expand_bool_dim) { | |||||
| py::object shape = getattr(inp, "shape"); | |||||
| if (shape.ptr() != Py_None) { | |||||
| py::tuple ret = _expand_bool_dim(inp, tuple_val); | |||||
| inp = ret[0]; | |||||
| tuple_val = ret[1]; | |||||
| } | |||||
| } | |||||
| py::list items; | |||||
| py::list tensors; | |||||
| int cur_axis = -1; | |||||
| for (size_t i = 0; i < tuple_val.size(); ++i) { | |||||
| py::object handle = tuple_val[i]; | |||||
| cur_axis++; | |||||
| if (!is_scalar(handle.ptr()) && !PySlice_Check(handle.ptr())) { | |||||
| use_subtensor = false; | |||||
| } | |||||
| py::list item; | |||||
| item.append(cur_axis); | |||||
| auto push = [&](PyObject* v) { | |||||
| if (v == Py_None) { | |||||
| item.append(false); | |||||
| } else { | |||||
| item.append(true); | |||||
| tensors.append(_get_index(py::reinterpret_borrow<py::object>(v), inp)); | |||||
| } | |||||
| }; | |||||
| if (PySlice_Check(handle.ptr())) { | |||||
| PySliceObject* s = (PySliceObject*)handle.ptr(); | |||||
| if (s->start == Py_None && s->stop == Py_None && s->step == Py_None) { | |||||
| continue; | |||||
| } | |||||
| push(s->start); | |||||
| push(s->stop); | |||||
| push(s->step); | |||||
| item.append(false); | |||||
| } else { | |||||
| for (size_t j = 0; j < 3; j++) | |||||
| item.append(false); | |||||
| push(handle.ptr()); | |||||
| } | |||||
| items.append(item); | |||||
| } | |||||
| return py::make_tuple(inp, tensors, items, use_subtensor, need_expand_bool_dim); | |||||
| } | |||||
| py::object _getitem_cpp(py::handle inp_hdl, py::handle idx_hdl) { | |||||
| py::tuple try_res = _try_cond_take(inp_hdl, idx_hdl); | |||||
| if (try_res.size() == 2) { | |||||
| return try_res[0]; | |||||
| } | |||||
| py::tuple up = _unpack_indexes(inp_hdl, idx_hdl); | |||||
| py::object tensor = py::reinterpret_borrow<py::object>(up[0]); | |||||
| py::list tensors = py::reinterpret_borrow<py::list>(up[1]); | |||||
| py::list py_items = py::reinterpret_borrow<py::list>(up[2]); | |||||
| std::vector<std::tuple<int8_t, bool, bool, bool, bool>> cpp_items; | |||||
| for (size_t i = 0; i < py_items.size(); ++i) { | |||||
| py::list item = py::reinterpret_borrow<py::list>(py_items[i]); | |||||
| cpp_items.push_back( | |||||
| {item[0].cast<int8_t>(), item[1].cast<bool>(), item[2].cast<bool>(), | |||||
| item[3].cast<bool>(), item[4].cast<bool>()}); | |||||
| } | |||||
| static std::shared_ptr<OpDef> op; | |||||
| if (up[3].cast<bool>()) { | |||||
| op = Subtensor::make(cpp_items); | |||||
| } else { | |||||
| op = IndexingMultiAxisVec::make(cpp_items); | |||||
| } | |||||
| std::vector<PyObject*> p; | |||||
| p.resize(tensors.size() + 2); | |||||
| py::object Op = py::cast(op); | |||||
| p[0] = Op.ptr(); | |||||
| p[1] = tensor.ptr(); | |||||
| for (size_t i = 0; i < tensors.size(); ++i) { | |||||
| p[i + 2] = tensors[i].ptr(); | |||||
| } | |||||
| py::tuple ret = | |||||
| py::reinterpret_steal<py::object>(py_apply(NULL, p.data(), p.size())); | |||||
| return ret[0]; | |||||
| } | |||||
| py::object _setitem_cpp(py::handle inp_hdl, py::handle idx_hdl, py::handle val_hdl) { | |||||
| py::object org_shape = getattr(inp_hdl, "shape"); | |||||
| py::object val = py::reinterpret_borrow<py::object>(val_hdl); | |||||
| if (!TensorWrapper::try_cast(val.ptr()) && !py::isinstance<PySymbolVar>(val)) { | |||||
| val = | |||||
| _Const(val_hdl, getattr(inp_hdl, "dtype"), getattr(inp_hdl, "device"), | |||||
| inp_hdl); | |||||
| } | |||||
| py::tuple up = _unpack_indexes(inp_hdl, idx_hdl); | |||||
| py::object tensor = py::reinterpret_borrow<py::object>(up[0]); | |||||
| py::list tensors = py::reinterpret_borrow<py::list>(up[1]); | |||||
| py::list py_items = py::reinterpret_borrow<py::list>(up[2]); | |||||
| std::vector<std::tuple<int8_t, bool, bool, bool, bool>> cpp_items; | |||||
| for (size_t i = 0; i < py_items.size(); ++i) { | |||||
| py::list item = py::reinterpret_borrow<py::list>(py_items[i]); | |||||
| cpp_items.push_back( | |||||
| {item[0].cast<int8_t>(), item[1].cast<bool>(), item[2].cast<bool>(), | |||||
| item[3].cast<bool>(), item[4].cast<bool>()}); | |||||
| } | |||||
| static std::shared_ptr<OpDef> op, set_op; | |||||
| if (up[3].cast<bool>()) { | |||||
| op = Subtensor::make(cpp_items); | |||||
| } else { | |||||
| op = IndexingMultiAxisVec::make(cpp_items); | |||||
| } | |||||
| std::vector<PyObject*> p; | |||||
| p.resize(tensors.size() + 2); | |||||
| py::object Op = py::cast(op); | |||||
| p[0] = Op.ptr(); | |||||
| p[1] = tensor.ptr(); | |||||
| for (size_t i = 0; i < tensors.size(); ++i) { | |||||
| p[i + 2] = tensors[i].ptr(); | |||||
| } | |||||
| py::tuple ret = | |||||
| py::reinterpret_steal<py::object>(py_apply(NULL, p.data(), p.size())); | |||||
| py::object tmp_result = ret[0]; | |||||
| try { | |||||
| py::object value_tuple_shape = val.attr("_tuple_shape"); | |||||
| py::object tmp_result_tuple_shape = tmp_result.attr("_tuple_shape"); | |||||
| py::tuple value_shape = py::reinterpret_borrow<py::tuple>(value_tuple_shape); | |||||
| py::tuple tmp_result_shape = | |||||
| py::reinterpret_borrow<py::tuple>(tmp_result_tuple_shape); | |||||
| for (size_t i = 0; i < value_shape.size() && i < tmp_result_shape.size(); ++i) { | |||||
| size_t vs = value_shape[value_shape.size() - i - 1].cast<size_t>(); | |||||
| size_t ts = | |||||
| tmp_result_shape[tmp_result_shape.size() - i - 1].cast<size_t>(); | |||||
| if (vs != 1 && vs != ts) { | |||||
| std::string lhs = "", rhs = ""; | |||||
| for (size_t j = 0; j < tmp_result_shape.size(); ++j) { | |||||
| lhs += std::to_string(tmp_result_shape[j].cast<size_t>()); | |||||
| if (j) | |||||
| lhs += ","; | |||||
| } | |||||
| for (size_t j = 0; j < value_shape.size(); ++j) { | |||||
| rhs += std::to_string(value_shape[j].cast<size_t>()); | |||||
| if (j) | |||||
| rhs += ","; | |||||
| } | |||||
| throw py::value_error( | |||||
| "cannot copy tensor with shape (" + rhs + | |||||
| ") to subtensor with shape (" + lhs + ")"); | |||||
| } | |||||
| } | |||||
| } catch (py::error_already_set& err) { | |||||
| ; | |||||
| } | |||||
| py::object broadcast_func = getattr(val, "_broadcast"); | |||||
| PyObject* Args = PyTuple_New(1); | |||||
| PyTuple_SetItem(Args, 0, getattr(tmp_result, "shape").release().ptr()); | |||||
| PyObject* new_val = PyObject_CallObject(broadcast_func.ptr(), Args); | |||||
| Py_XDECREF(Args); | |||||
| val = py::reinterpret_steal<py::object>(new_val); | |||||
| if (up[3].cast<bool>()) { | |||||
| set_op = SetSubtensor::make(cpp_items); | |||||
| } else { | |||||
| set_op = IndexingSetMultiAxisVec::make(cpp_items); | |||||
| } | |||||
| std::vector<PyObject*> q; | |||||
| q.resize(tensors.size() + 3); | |||||
| py::object Set_Op = py::cast(set_op); | |||||
| q[0] = Set_Op.ptr(); | |||||
| q[1] = tensor.ptr(); | |||||
| q[2] = val.ptr(); | |||||
| for (size_t i = 0; i < tensors.size(); ++i) { | |||||
| q[i + 3] = tensors[i].ptr(); | |||||
| } | |||||
| py::tuple result = | |||||
| py::reinterpret_steal<py::object>(py_apply(NULL, q.data(), q.size())); | |||||
| py::object res = result[0]; | |||||
| if (up[4].cast<bool>()) { | |||||
| py::object reshape_func = getattr(res, "reshape"); | |||||
| PyObject* Args = PyTuple_New(1); | |||||
| PyTuple_SetItem(Args, 0, org_shape.release().ptr()); | |||||
| PyObject* new_tensor = PyObject_CallObject(reshape_func.ptr(), Args); | |||||
| Py_XDECREF(Args); | |||||
| res = py::reinterpret_steal<py::object>(new_tensor); | |||||
| } | |||||
| return res; | |||||
| } | |||||
| // Returns the dtype that would result from performing an arithmetic | // Returns the dtype that would result from performing an arithmetic | ||||
| // operation on the provided input tensors and scalars. | // operation on the provided input tensors and scalars. | ||||
| PyObject* dtype_promotion(PyObject* self, PyObject* const* args, size_t nargs) { | PyObject* dtype_promotion(PyObject* self, PyObject* const* args, size_t nargs) { | ||||
| @@ -1126,30 +576,6 @@ PyObject* get_device(PyObject* self, PyObject* const* args, size_t nargs) { | |||||
| PYEXT17_TRANSLATE_EXC_RET(nullptr) | PYEXT17_TRANSLATE_EXC_RET(nullptr) | ||||
| } | } | ||||
| PyObject* make_shape_tuple(PyObject* self, PyObject* const* args, size_t nargs) { | |||||
| try { | |||||
| return _make_shape_tuple(py::handle(args[0])).release().ptr(); | |||||
| } | |||||
| PYEXT17_TRANSLATE_EXC_RET(nullptr) | |||||
| } | |||||
| PyObject* getitem_cpp(PyObject* self, PyObject* const* args, size_t nargs) { | |||||
| try { | |||||
| return _getitem_cpp(py::handle(args[0]), py::handle(args[1])).release().ptr(); | |||||
| } | |||||
| PYEXT17_TRANSLATE_EXC_RET(nullptr) | |||||
| } | |||||
| PyObject* setitem_cpp(PyObject* self, PyObject* const* args, size_t nargs) { | |||||
| try { | |||||
| return _setitem_cpp( | |||||
| py::handle(args[0]), py::handle(args[1]), py::handle(args[2])) | |||||
| .release() | |||||
| .ptr(); | |||||
| } | |||||
| PYEXT17_TRANSLATE_EXC_RET(nullptr) | |||||
| } | |||||
| #ifdef METH_FASTCALL | #ifdef METH_FASTCALL | ||||
| #define MGE_PY_INTERFACE(NAME, FUNC) \ | #define MGE_PY_INTERFACE(NAME, FUNC) \ | ||||
| { #NAME, (PyCFunction)FUNC, METH_FASTCALL, nullptr } | { #NAME, (PyCFunction)FUNC, METH_FASTCALL, nullptr } | ||||
| @@ -38,6 +38,8 @@ namespace mgb::imperative::python { | |||||
| extern interpreter::Interpreter::Channel* interpreter_for_py; | extern interpreter::Interpreter::Channel* interpreter_for_py; | ||||
| extern PyTypeObject* py_tensor_type; | extern PyTypeObject* py_tensor_type; | ||||
| extern PyObject* cpp_use_symbolic_shape; | |||||
| extern PyObject* cpp_astensor1d; | |||||
| struct Tensor : NonCopyableObj { | struct Tensor : NonCopyableObj { | ||||
| private: | private: | ||||
| @@ -0,0 +1,630 @@ | |||||
| /** | |||||
| * \file imperative/python/src/tensor.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| */ | |||||
| #include "megbrain/common.h" | |||||
| #include "megbrain/dtype.h" | |||||
| #include "megbrain/imperative/ops/autogen.h" | |||||
| #include "megbrain/imperative/ops/backward_graph.h" | |||||
| #include "megbrain/imperative/ops/utility.h" | |||||
| #include "megbrain/imperative/profiler.h" | |||||
| #include "megbrain/imperative/transformations/eval.h" | |||||
| #include "megbrain/imperative/transformations/lazy.h" | |||||
| #include "megbrain/imperative/transformations/scalar.h" | |||||
| #include "megbrain/imperative/transformations/symbol.h" | |||||
| #include "megbrain/imperative/transformations/trace.h" | |||||
| #include "megbrain/imperative/utils/map.h" | |||||
| #include "megbrain/imperative/utils/stats.h" | |||||
| #include "megbrain/opr/io.h" | |||||
| #include "megbrain/plugin/profiler.h" | |||||
| #include "./common.h" | |||||
| #include "./grad.h" | |||||
| #include "./graph_rt.h" | |||||
| #include "./helper.h" | |||||
| #include "./module_trace.h" | |||||
| #include "./numpy_dtypes.h" | |||||
| #include "./tensor.h" | |||||
| #include "./tensor_utils.h" | |||||
| #include "./transformation.h" | |||||
| #include <object.h> | |||||
| #include <pybind11/numpy.h> | |||||
| #include <pybind11/operators.h> | |||||
| #include <pybind11/pytypes.h> | |||||
| #include <pyerrors.h> | |||||
| #include <range/v3/all.hpp> | |||||
| #include <string> | |||||
| #include <unordered_map> | |||||
| #include "../../src/impl/mgb_cg_impl.h" | |||||
| namespace py = pybind11; | |||||
| namespace views = ranges::views; | |||||
| namespace mgb::imperative::python { | |||||
| bool is_scalar(PyObject* tensor) { | |||||
| if (py::isinstance<PySymbolVar>(py::handle(tensor))) { | |||||
| auto var = py::handle(tensor).cast<PySymbolVar*>(); | |||||
| return var->is_scalar; | |||||
| } | |||||
| auto* tw = TensorWrapper::try_cast(tensor); | |||||
| if (tw) { | |||||
| return tw->m_tensor->is_scalar(); | |||||
| } | |||||
| return PyArray_CheckAnyScalar(tensor); | |||||
| } | |||||
| bool is_bool_list(PyObject* arg) { | |||||
| if (!PyList_Check(arg)) { | |||||
| return false; | |||||
| } | |||||
| size_t sz = PyList_Size(arg); | |||||
| if (!sz) { | |||||
| return false; | |||||
| } | |||||
| for (size_t i = 0; i < sz; ++i) { | |||||
| PyObject* handle = PyList_GetItem(arg, i); | |||||
| if (!PyBool_Check(handle)) { | |||||
| return false; | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool is_bool_dtype(PyObject* args) { | |||||
| if (!PyObject_HasAttrString(args, "dtype")) | |||||
| return false; | |||||
| PyObject* dobj = PyObject_GetAttrString(args, "dtype"); | |||||
| PyArray_Descr* dtype; | |||||
| PyArray_DescrConverter(dobj, &dtype); | |||||
| bool ret = (dtype->kind == 'b'); | |||||
| Py_XDECREF(dtype); | |||||
| Py_XDECREF(dobj); | |||||
| return ret; | |||||
| } | |||||
| py::object _Const( | |||||
| py::handle value, py::handle dtype, py::handle device, py::handle ref) { | |||||
| py::object val = py::reinterpret_borrow<py::object>(value); | |||||
| if (PyArray_Check(value.ptr())) { | |||||
| py::tuple strides = | |||||
| py::reinterpret_borrow<py::tuple>(getattr(value, "strides")); | |||||
| bool need_squeeze = false; | |||||
| for (size_t i = 0; i < strides.size(); ++i) { | |||||
| if (strides[i].cast<ptrdiff_t>() == 0) { | |||||
| need_squeeze = true; | |||||
| } | |||||
| } | |||||
| if (need_squeeze) { | |||||
| val = py::reinterpret_borrow<py::array>(value); | |||||
| val = val.attr("squeeze")(); | |||||
| val = val.attr("reshape")(val.attr("shape")); | |||||
| } | |||||
| } | |||||
| if (py::isinstance<PySymbolVar>(ref)) { | |||||
| auto ref_var = ref.cast<PySymbolVar*>(); | |||||
| auto* graph = ref_var->m_node->owner_graph(); | |||||
| auto cn = device.cast<CompNode>(); | |||||
| OperatorNodeConfig config(cn); | |||||
| auto hv = npy::np2tensor( | |||||
| val.ptr(), npy::Meth::borrow(cn), dtype.cast<mgb::DType>()); | |||||
| auto typeobj = ref.get_type(); | |||||
| return typeobj(opr::ImmutableTensor::make(*graph, hv, config).node()); | |||||
| } | |||||
| py::tuple tup = py::make_tuple(val, dtype, device, true, false, py::none()); | |||||
| return TensorWrapper::make(py_tensor_type, tup.ptr(), nullptr); | |||||
| } | |||||
| py::tuple _make_shape_tuple(py::handle shape) { | |||||
| py::list orig; | |||||
| py::list ret(0); | |||||
| auto solve_one = [&](py::handle val) { | |||||
| if (TensorWrapper::try_cast(val.ptr()) || py::isinstance<PySymbolVar>(val)) { | |||||
| py::object np = getattr(val, "numpy")(); | |||||
| PyArrayObject* arr = (PyArrayObject*)np.ptr(); | |||||
| PyObject* maybe_list = PyArray_ToList(arr); | |||||
| if (PyList_Check(maybe_list)) { | |||||
| py::list may = py::reinterpret_steal<py::list>(maybe_list); | |||||
| for (size_t i = 0; i < may.size(); ++i) { | |||||
| ret.append(may[i]); | |||||
| } | |||||
| } else { | |||||
| mgb_assert(PyLong_Check(maybe_list)); | |||||
| ret.append(PyLong_AsLong(maybe_list)); | |||||
| Py_XDECREF(maybe_list); | |||||
| } | |||||
| } else if (PyArray_Check(val.ptr())) { | |||||
| ret.append(PyArray_PyIntAsInt(val.ptr())); | |||||
| } else { | |||||
| ret.append(PyLong_AsLong(val.ptr())); | |||||
| } | |||||
| }; | |||||
| if (PyArray_Check(shape.ptr()) && !PyArray_CheckAnyScalar(shape.ptr())) { | |||||
| orig = py::reinterpret_steal<py::list>( | |||||
| PyArray_ToList((PyArrayObject*)shape.ptr())); | |||||
| for (size_t i = 0; i < orig.size(); ++i) { | |||||
| solve_one(orig[i]); | |||||
| } | |||||
| } else if (PyList_Check(shape.ptr())) { | |||||
| orig = py::reinterpret_borrow<py::list>(shape); | |||||
| for (size_t i = 0; i < orig.size(); ++i) { | |||||
| solve_one(orig[i]); | |||||
| } | |||||
| } else if (PyTuple_Check(shape.ptr())) { | |||||
| py::tuple tup = py::reinterpret_borrow<py::tuple>(shape); | |||||
| for (size_t i = 0; i < tup.size(); ++i) { | |||||
| solve_one(tup[i]); | |||||
| } | |||||
| } else { | |||||
| solve_one(shape); | |||||
| } | |||||
| return py::reinterpret_steal<py::tuple>(PyList_AsTuple(ret.ptr())); | |||||
| } | |||||
| py::object _get_index(py::object tensor, py::object src) { | |||||
| if (!TensorWrapper::try_cast(tensor.ptr()) && | |||||
| !py::isinstance<PySymbolVar>(tensor)) { | |||||
| auto get_const = [&](mgb::DType dtype) -> py::object { | |||||
| return _Const(tensor, py::cast(dtype), src.attr("device"), src); | |||||
| }; | |||||
| if (is_bool_list(tensor.ptr()) || is_bool_dtype(tensor.ptr())) { | |||||
| tensor = get_const(dtype::Bool()); | |||||
| } else { | |||||
| tensor = get_const(dtype::Int32()); | |||||
| } | |||||
| if (!is_bool_dtype(tensor.ptr())) { | |||||
| return tensor; | |||||
| } | |||||
| } else { | |||||
| if (!is_bool_dtype(tensor.ptr())) { | |||||
| return tensor; | |||||
| } | |||||
| } | |||||
| static std::shared_ptr<OpDef> op = CondTake::make(); | |||||
| std::vector<PyObject*> p; | |||||
| p.resize(3); | |||||
| py::object Op = py::cast(op); | |||||
| p[0] = Op.ptr(); | |||||
| p[1] = tensor.ptr(); | |||||
| p[2] = tensor.ptr(); | |||||
| py::tuple ret = | |||||
| py::reinterpret_steal<py::object>(py_apply(NULL, p.data(), p.size())); | |||||
| return ret[1]; | |||||
| } | |||||
| py::tuple _try_cond_take(py::handle tensor, py::handle index) { | |||||
| if (!hasattr(index, "dtype") || !hasattr(index, "shape")) { | |||||
| return py::tuple(); | |||||
| } | |||||
| if (!is_bool_dtype(index.ptr()) || | |||||
| _make_shape_tuple(getattr(index, "shape")) | |||||
| .not_equal(_make_shape_tuple(getattr(tensor, "shape")))) { | |||||
| return py::tuple(); | |||||
| } | |||||
| py::object iobj; | |||||
| if (PyArray_Check(index.ptr())) { | |||||
| iobj = | |||||
| _Const(index, py::cast((mgb::DType)dtype::Bool()), | |||||
| getattr(tensor, "device"), tensor); | |||||
| } else { | |||||
| iobj = py::reinterpret_borrow<py::object>(index); | |||||
| } | |||||
| static std::shared_ptr<OpDef> op = CondTake::make(); | |||||
| std::vector<PyObject*> p; | |||||
| p.resize(3); | |||||
| py::object Op = py::cast(op); | |||||
| p[0] = Op.ptr(); | |||||
| p[1] = tensor.ptr(); | |||||
| p[2] = iobj.ptr(); | |||||
| py::tuple ret = | |||||
| py::reinterpret_steal<py::object>(py_apply(NULL, p.data(), p.size())); | |||||
| return ret; | |||||
| } | |||||
| py::tuple _remove_ellipsis(py::object tensor, py::tuple tuple_val) { | |||||
| size_t tuple_size = tuple_val.size(); | |||||
| size_t ndim_sum = 0, cur_sum = 0; | |||||
| int pos = -1; | |||||
| bool has_unknown_ndim_bool_index = false; | |||||
| for (size_t i = 0; i < tuple_size; ++i) { | |||||
| py::object handle = tuple_val[i]; | |||||
| if (handle.ptr() == Py_Ellipsis) { | |||||
| pos = static_cast<int>(i); | |||||
| for (size_t j = 0; j < i; ++j) { | |||||
| py::object t = tuple_val[j]; | |||||
| if (t.ptr() == Py_Ellipsis) { | |||||
| throw py::index_error("only one ellipsis is allowed."); | |||||
| } | |||||
| } | |||||
| } else { | |||||
| size_t ndim_incr = 1; | |||||
| if (hasattr(handle, "dtype") && is_bool_dtype(handle.ptr()) && | |||||
| hasattr(handle, "ndim")) { | |||||
| py::object ndim = getattr(handle, "ndim"); | |||||
| if (PyLong_Check(ndim.ptr())) { | |||||
| ndim_incr = PyLong_AsLong(ndim.ptr()); | |||||
| } else { | |||||
| has_unknown_ndim_bool_index = true; | |||||
| } | |||||
| } | |||||
| cur_sum += ndim_incr; | |||||
| } | |||||
| } | |||||
| if (pos == -1) { | |||||
| return tuple_val; | |||||
| } else { | |||||
| if (has_unknown_ndim_bool_index) { | |||||
| throw py::index_error( | |||||
| "does not support bool index with unknown shape when using " | |||||
| "Ellipsis."); | |||||
| } | |||||
| try { | |||||
| ndim_sum = getattr(tensor, "ndim").cast<size_t>(); | |||||
| } catch (py::error_already_set& err) { | |||||
| throw py::index_error( | |||||
| "does not support Ellipsis when tensor's ndim is unknown."); | |||||
| } | |||||
| py::tuple ret(ndim_sum - cur_sum + tuple_size - 1); | |||||
| size_t idx = 0; | |||||
| for (size_t i = 0; i < tuple_size; ++i) { | |||||
| if (i == pos) { | |||||
| for (size_t j = cur_sum; j < ndim_sum; ++j) { | |||||
| ret[idx++] = PySlice_New(NULL, NULL, NULL); | |||||
| } | |||||
| } else { | |||||
| ret[idx++] = tuple_val[i]; | |||||
| } | |||||
| } | |||||
| return ret; | |||||
| } | |||||
| } | |||||
| py::tuple _expand_bool_dim(py::object tensor, py::tuple tuple_val) { | |||||
| py::tuple cur_shape = _make_shape_tuple(py::handle(getattr(tensor, "shape"))); | |||||
| py::list new_tuple_val(0); | |||||
| size_t offset = 0; | |||||
| size_t tdim = 0; | |||||
| for (size_t i = 0; i < tuple_val.size(); ++i) { | |||||
| py::handle k = tuple_val[i]; | |||||
| if (is_bool_dtype(k.ptr())) { | |||||
| size_t ndim = getattr(k, "ndim").cast<size_t>(); | |||||
| if (ndim > 1) { | |||||
| py::tuple ishape = _make_shape_tuple(py::handle(getattr(k, "shape"))); | |||||
| for (size_t j = 0; j < ndim; ++j) { | |||||
| if (cur_shape[tdim + j - offset].cast<size_t>() != | |||||
| ishape[j].cast<size_t>()) { | |||||
| std::string msg = | |||||
| "boolean index did not match tensor along dimension " + | |||||
| std::to_string(tdim + j) + "; dimension is " + | |||||
| std::to_string( | |||||
| cur_shape[tdim + j - offset].cast<size_t>()) + | |||||
| " but corresponding boolean dimension is " + | |||||
| std::to_string(ishape[j].cast<size_t>()); | |||||
| throw py::index_error(msg.c_str()); | |||||
| } | |||||
| } | |||||
| py::object new_k = getattr(k, "reshape")(-1); | |||||
| py::object kshape = getattr(new_k, "shape"); | |||||
| py::list new_shape(0); | |||||
| PyObject* sym = PyObject_CallObject(cpp_use_symbolic_shape, nullptr); | |||||
| bool is_sym = (sym == Py_True); | |||||
| Py_XDECREF(sym); | |||||
| if (is_sym) { | |||||
| py::object tshape = getattr(tensor, "shape"); | |||||
| for (size_t j = 0; j < i; ++j) { | |||||
| new_shape.append(tshape[py::int_(j)]); | |||||
| } | |||||
| new_shape.append(kshape[py::int_(0)]); | |||||
| for (size_t j = tdim + ndim - offset; j < cur_shape.size(); ++j) { | |||||
| new_shape.append(cur_shape[j]); | |||||
| } | |||||
| py::tuple args = py::make_tuple(new_shape); | |||||
| PyObject* shape_tensor = | |||||
| PyObject_CallObject(cpp_astensor1d, args.ptr()); | |||||
| py::object reshape_func = getattr(tensor, "reshape"); | |||||
| Py_INCREF(shape_tensor); | |||||
| PyObject* Args = PyTuple_New(1); | |||||
| PyTuple_SetItem(Args, 0, shape_tensor); | |||||
| PyObject* new_tensor = | |||||
| PyObject_CallObject(reshape_func.ptr(), Args); | |||||
| Py_XDECREF(Args); | |||||
| tensor = py::reinterpret_steal<py::object>(new_tensor); | |||||
| cur_shape = _make_shape_tuple(py::handle(shape_tensor)); | |||||
| Py_XDECREF(shape_tensor); | |||||
| } else { | |||||
| for (size_t j = 0; j < i; ++j) { | |||||
| new_shape.append(cur_shape[j]); | |||||
| } | |||||
| new_shape.append(py::reinterpret_borrow<py::tuple>(kshape)[0]); | |||||
| for (size_t j = tdim + ndim - offset; j < cur_shape.size(); ++j) { | |||||
| new_shape.append(cur_shape[j]); | |||||
| } | |||||
| cur_shape = new_shape; | |||||
| tensor = getattr(tensor, "reshape")(cur_shape); | |||||
| } | |||||
| offset++; | |||||
| tdim += ndim; | |||||
| } | |||||
| new_tuple_val.append(k); | |||||
| } else { | |||||
| new_tuple_val.append(k); | |||||
| tdim++; | |||||
| } | |||||
| } | |||||
| return py::make_tuple(tensor, py::reinterpret_borrow<py::tuple>(new_tuple_val)); | |||||
| } | |||||
| py::tuple _unpack_indexes(py::handle inp_hdl, py::handle idx_hdl) { | |||||
| py::object inp = py::reinterpret_borrow<py::object>(inp_hdl); | |||||
| py::tuple tuple_val; | |||||
| if (py::isinstance<py::tuple>(idx_hdl)) { | |||||
| tuple_val = py::reinterpret_borrow<py::tuple>(idx_hdl); | |||||
| } else { | |||||
| tuple_val = py::make_tuple(idx_hdl); | |||||
| } | |||||
| bool use_subtensor = true; | |||||
| bool need_remove_ellipsis = false; | |||||
| bool need_expand_bool_dim = false; | |||||
| size_t idx_ndim = 0; | |||||
| for (size_t i = 0; i < tuple_val.size(); ++i) { | |||||
| py::object k = tuple_val[i]; | |||||
| if (k.ptr() == Py_None) { | |||||
| throw py::index_error("newaxis is not allowed here"); | |||||
| } else if (k.ptr() == Py_Ellipsis) { | |||||
| need_remove_ellipsis = true; | |||||
| } else { | |||||
| if (is_bool_dtype(k.ptr()) && hasattr(k, "ndim")) { | |||||
| size_t ndim = getattr(k, "ndim").cast<size_t>(); | |||||
| idx_ndim += ndim; | |||||
| if (ndim > 1) { | |||||
| need_expand_bool_dim = true; | |||||
| } | |||||
| } else { | |||||
| idx_ndim++; | |||||
| } | |||||
| } | |||||
| } | |||||
| try { | |||||
| size_t inp_ndim = getattr(inp, "ndim").cast<size_t>(); | |||||
| if (idx_ndim > inp_ndim) { | |||||
| std::string msg = "too many indices for tensor: tensor is " + | |||||
| std::to_string(inp_ndim) + "-dimensional, but " + | |||||
| std::to_string(idx_ndim) + " were indexed"; | |||||
| throw py::index_error(msg.c_str()); | |||||
| } | |||||
| } catch (py::error_already_set& err) { | |||||
| ; // ignore | |||||
| } | |||||
| if (need_remove_ellipsis) { | |||||
| tuple_val = _remove_ellipsis(inp, tuple_val); | |||||
| } | |||||
| if (need_expand_bool_dim) { | |||||
| py::object shape = getattr(inp, "shape"); | |||||
| if (shape.ptr() != Py_None) { | |||||
| py::tuple ret = _expand_bool_dim(inp, tuple_val); | |||||
| inp = ret[0]; | |||||
| tuple_val = ret[1]; | |||||
| } | |||||
| } | |||||
| py::list items; | |||||
| py::list tensors; | |||||
| int cur_axis = -1; | |||||
| for (size_t i = 0; i < tuple_val.size(); ++i) { | |||||
| py::object handle = tuple_val[i]; | |||||
| cur_axis++; | |||||
| if (!is_scalar(handle.ptr()) && !PySlice_Check(handle.ptr())) { | |||||
| use_subtensor = false; | |||||
| } | |||||
| py::list item; | |||||
| item.append(cur_axis); | |||||
| auto push = [&](PyObject* v) { | |||||
| if (v == Py_None) { | |||||
| item.append(false); | |||||
| } else { | |||||
| item.append(true); | |||||
| tensors.append(_get_index(py::reinterpret_borrow<py::object>(v), inp)); | |||||
| } | |||||
| }; | |||||
| if (PySlice_Check(handle.ptr())) { | |||||
| PySliceObject* s = (PySliceObject*)handle.ptr(); | |||||
| if (s->start == Py_None && s->stop == Py_None && s->step == Py_None) { | |||||
| continue; | |||||
| } | |||||
| push(s->start); | |||||
| push(s->stop); | |||||
| push(s->step); | |||||
| item.append(false); | |||||
| } else { | |||||
| for (size_t j = 0; j < 3; j++) | |||||
| item.append(false); | |||||
| push(handle.ptr()); | |||||
| } | |||||
| items.append(item); | |||||
| } | |||||
| return py::make_tuple(inp, tensors, items, use_subtensor, need_expand_bool_dim); | |||||
| } | |||||
| py::object _getitem_cpp(py::handle inp_hdl, py::handle idx_hdl) { | |||||
| py::tuple try_res = _try_cond_take(inp_hdl, idx_hdl); | |||||
| if (try_res.size() == 2) { | |||||
| return try_res[0]; | |||||
| } | |||||
| py::tuple up = _unpack_indexes(inp_hdl, idx_hdl); | |||||
| py::object tensor = py::reinterpret_borrow<py::object>(up[0]); | |||||
| py::list tensors = py::reinterpret_borrow<py::list>(up[1]); | |||||
| py::list py_items = py::reinterpret_borrow<py::list>(up[2]); | |||||
| std::vector<std::tuple<int8_t, bool, bool, bool, bool>> cpp_items; | |||||
| for (size_t i = 0; i < py_items.size(); ++i) { | |||||
| py::list item = py::reinterpret_borrow<py::list>(py_items[i]); | |||||
| cpp_items.push_back( | |||||
| {item[0].cast<int8_t>(), item[1].cast<bool>(), item[2].cast<bool>(), | |||||
| item[3].cast<bool>(), item[4].cast<bool>()}); | |||||
| } | |||||
| static std::shared_ptr<OpDef> op; | |||||
| if (up[3].cast<bool>()) { | |||||
| op = Subtensor::make(cpp_items); | |||||
| } else { | |||||
| op = IndexingMultiAxisVec::make(cpp_items); | |||||
| } | |||||
| std::vector<PyObject*> p; | |||||
| p.resize(tensors.size() + 2); | |||||
| py::object Op = py::cast(op); | |||||
| p[0] = Op.ptr(); | |||||
| p[1] = tensor.ptr(); | |||||
| for (size_t i = 0; i < tensors.size(); ++i) { | |||||
| p[i + 2] = tensors[i].ptr(); | |||||
| } | |||||
| py::tuple ret = | |||||
| py::reinterpret_steal<py::object>(py_apply(NULL, p.data(), p.size())); | |||||
| return ret[0]; | |||||
| } | |||||
| py::object _setitem_cpp(py::handle inp_hdl, py::handle idx_hdl, py::handle val_hdl) { | |||||
| py::object org_shape = getattr(inp_hdl, "shape"); | |||||
| py::object val = py::reinterpret_borrow<py::object>(val_hdl); | |||||
| if (!TensorWrapper::try_cast(val.ptr()) && !py::isinstance<PySymbolVar>(val)) { | |||||
| val = | |||||
| _Const(val_hdl, getattr(inp_hdl, "dtype"), getattr(inp_hdl, "device"), | |||||
| inp_hdl); | |||||
| } | |||||
| py::tuple up = _unpack_indexes(inp_hdl, idx_hdl); | |||||
| py::object tensor = py::reinterpret_borrow<py::object>(up[0]); | |||||
| py::list tensors = py::reinterpret_borrow<py::list>(up[1]); | |||||
| py::list py_items = py::reinterpret_borrow<py::list>(up[2]); | |||||
| std::vector<std::tuple<int8_t, bool, bool, bool, bool>> cpp_items; | |||||
| for (size_t i = 0; i < py_items.size(); ++i) { | |||||
| py::list item = py::reinterpret_borrow<py::list>(py_items[i]); | |||||
| cpp_items.push_back( | |||||
| {item[0].cast<int8_t>(), item[1].cast<bool>(), item[2].cast<bool>(), | |||||
| item[3].cast<bool>(), item[4].cast<bool>()}); | |||||
| } | |||||
| static std::shared_ptr<OpDef> op, set_op; | |||||
| if (up[3].cast<bool>()) { | |||||
| op = Subtensor::make(cpp_items); | |||||
| } else { | |||||
| op = IndexingMultiAxisVec::make(cpp_items); | |||||
| } | |||||
| std::vector<PyObject*> p; | |||||
| p.resize(tensors.size() + 2); | |||||
| py::object Op = py::cast(op); | |||||
| p[0] = Op.ptr(); | |||||
| p[1] = tensor.ptr(); | |||||
| for (size_t i = 0; i < tensors.size(); ++i) { | |||||
| p[i + 2] = tensors[i].ptr(); | |||||
| } | |||||
| py::tuple ret = | |||||
| py::reinterpret_steal<py::object>(py_apply(NULL, p.data(), p.size())); | |||||
| py::object tmp_result = ret[0]; | |||||
| try { | |||||
| py::object value_tuple_shape = val.attr("_tuple_shape"); | |||||
| py::object tmp_result_tuple_shape = tmp_result.attr("_tuple_shape"); | |||||
| py::tuple value_shape = py::reinterpret_borrow<py::tuple>(value_tuple_shape); | |||||
| py::tuple tmp_result_shape = | |||||
| py::reinterpret_borrow<py::tuple>(tmp_result_tuple_shape); | |||||
| for (size_t i = 0; i < value_shape.size() && i < tmp_result_shape.size(); ++i) { | |||||
| size_t vs = value_shape[value_shape.size() - i - 1].cast<size_t>(); | |||||
| size_t ts = | |||||
| tmp_result_shape[tmp_result_shape.size() - i - 1].cast<size_t>(); | |||||
| if (vs != 1 && vs != ts) { | |||||
| std::string lhs = "", rhs = ""; | |||||
| for (size_t j = 0; j < tmp_result_shape.size(); ++j) { | |||||
| lhs += std::to_string(tmp_result_shape[j].cast<size_t>()); | |||||
| if (j) | |||||
| lhs += ","; | |||||
| } | |||||
| for (size_t j = 0; j < value_shape.size(); ++j) { | |||||
| rhs += std::to_string(value_shape[j].cast<size_t>()); | |||||
| if (j) | |||||
| rhs += ","; | |||||
| } | |||||
| throw py::value_error( | |||||
| "cannot copy tensor with shape (" + rhs + | |||||
| ") to subtensor with shape (" + lhs + ")"); | |||||
| } | |||||
| } | |||||
| } catch (py::error_already_set& err) { | |||||
| ; | |||||
| } | |||||
| py::object broadcast_func = getattr(val, "_broadcast"); | |||||
| PyObject* Args = PyTuple_New(1); | |||||
| PyTuple_SetItem(Args, 0, getattr(tmp_result, "shape").release().ptr()); | |||||
| PyObject* new_val = PyObject_CallObject(broadcast_func.ptr(), Args); | |||||
| Py_XDECREF(Args); | |||||
| val = py::reinterpret_steal<py::object>(new_val); | |||||
| if (up[3].cast<bool>()) { | |||||
| set_op = SetSubtensor::make(cpp_items); | |||||
| } else { | |||||
| set_op = IndexingSetMultiAxisVec::make(cpp_items); | |||||
| } | |||||
| std::vector<PyObject*> q; | |||||
| q.resize(tensors.size() + 3); | |||||
| py::object Set_Op = py::cast(set_op); | |||||
| q[0] = Set_Op.ptr(); | |||||
| q[1] = tensor.ptr(); | |||||
| q[2] = val.ptr(); | |||||
| for (size_t i = 0; i < tensors.size(); ++i) { | |||||
| q[i + 3] = tensors[i].ptr(); | |||||
| } | |||||
| py::tuple result = | |||||
| py::reinterpret_steal<py::object>(py_apply(NULL, q.data(), q.size())); | |||||
| py::object res = result[0]; | |||||
| if (up[4].cast<bool>()) { | |||||
| py::object reshape_func = getattr(res, "reshape"); | |||||
| PyObject* Args = PyTuple_New(1); | |||||
| PyTuple_SetItem(Args, 0, org_shape.release().ptr()); | |||||
| PyObject* new_tensor = PyObject_CallObject(reshape_func.ptr(), Args); | |||||
| Py_XDECREF(Args); | |||||
| res = py::reinterpret_steal<py::object>(new_tensor); | |||||
| } | |||||
| return res; | |||||
| } | |||||
| PyObject* make_shape_tuple(PyObject* self, PyObject* const* args, size_t nargs) { | |||||
| try { | |||||
| return _make_shape_tuple(py::handle(args[0])).release().ptr(); | |||||
| } | |||||
| PYEXT17_TRANSLATE_EXC_RET(nullptr) | |||||
| } | |||||
| PyObject* getitem_cpp(PyObject* self, PyObject* const* args, size_t nargs) { | |||||
| try { | |||||
| return _getitem_cpp(py::handle(args[0]), py::handle(args[1])).release().ptr(); | |||||
| } | |||||
| PYEXT17_TRANSLATE_EXC_RET(nullptr) | |||||
| } | |||||
| PyObject* setitem_cpp(PyObject* self, PyObject* const* args, size_t nargs) { | |||||
| try { | |||||
| return _setitem_cpp( | |||||
| py::handle(args[0]), py::handle(args[1]), py::handle(args[2])) | |||||
| .release() | |||||
| .ptr(); | |||||
| } | |||||
| PYEXT17_TRANSLATE_EXC_RET(nullptr) | |||||
| } | |||||
| } // namespace mgb::imperative::python | |||||
| @@ -0,0 +1,11 @@ | |||||
| #pragma once | |||||
| namespace mgb::imperative::python { | |||||
| PyObject* make_shape_tuple(PyObject* self, PyObject* const* args, size_t nargs); | |||||
| PyObject* getitem_cpp(PyObject* self, PyObject* const* args, size_t nargs); | |||||
| PyObject* setitem_cpp(PyObject* self, PyObject* const* args, size_t nargs); | |||||
| } // namespace mgb::imperative::python | |||||
| @@ -642,7 +642,7 @@ void ChannelImpl::produce_tensor(TensorInfo* dest, TensorPtr ptr) { | |||||
| m_dtr.update_used_time(dest); | m_dtr.update_used_time(dest); | ||||
| MGB_RECORD_EVENT( | MGB_RECORD_EVENT( | ||||
| TensorProduceEvent, dest->id, ptr->layout(), ptr->comp_node(), | TensorProduceEvent, dest->id, ptr->layout(), ptr->comp_node(), | ||||
| ptr->dev_tensor().raw_ptr()); | |||||
| ptr->dev_tensor(false).raw_ptr()); | |||||
| // update tensor desc for static infer | // update tensor desc for static infer | ||||
| if (dest->desc.layout.ndim) { | if (dest->desc.layout.ndim) { | ||||
| mgb_assert( | mgb_assert( | ||||
| @@ -730,10 +730,20 @@ void ChannelImpl::do_apply_op(const ApplyOp& cmd, std::string reason) { | |||||
| inputs, apply_functor, const_functor); | inputs, apply_functor, const_functor); | ||||
| return outputs; | return outputs; | ||||
| } | } | ||||
| return OpDef::apply_on_physical_tensor(def, inputs, output_descs, validated); | |||||
| // Check Input Layout | |||||
| // Get the input layout constraints, and if the constraint is not satisfied | |||||
| // inplace update the layout and blob to make the tensor contiguous | |||||
| auto&& constraints = OpDef::get_input_layout_constraint(def, inputs); | |||||
| for (size_t idx = 0; idx < inputs.size(); ++idx) { | |||||
| auto&& layout_checker = constraints[idx]; | |||||
| if (layout_checker) { | |||||
| inputs[idx]->to_contiguous_inplace(layout_checker); | |||||
| } | |||||
| } | |||||
| return OpDef::apply_on_physical_tensor( | |||||
| def, std::move(inputs), output_descs, validated); | |||||
| }; | }; | ||||
| MGB_RECORD_EVENT(OpExecuteEvent, apply_id, {}, reason); | MGB_RECORD_EVENT(OpExecuteEvent, apply_id, {}, reason); | ||||
| // Begin profiling operator | |||||
| SmallVector<std::pair<CompNode, uint64_t>> kernels; | SmallVector<std::pair<CompNode, uint64_t>> kernels; | ||||
| if (profiling_device) { | if (profiling_device) { | ||||
| // Collecting devices | // Collecting devices | ||||
| @@ -1 +1,2 @@ | |||||
| #include "../../../src/core/impl/graph/cg_impl.h" | #include "../../../src/core/impl/graph/cg_impl.h" | ||||
| #include "../../../src/core/impl/graph/var_node_mem_mgr.h" | |||||
| @@ -60,6 +60,11 @@ std::tuple<SmallVector<LogicalTensorDesc>, bool> OpDef::infer_output_attrs_falli | |||||
| return def.trait()->infer_output_attrs_fallible(def, inputs); | return def.trait()->infer_output_attrs_fallible(def, inputs); | ||||
| } | } | ||||
| SmallVector<VarNode::LayoutConstraintCallback> OpDef::get_input_layout_constraint( | |||||
| const OpDef& def, const SmallVector<TensorPtr>& inputs) { | |||||
| return def.trait()->get_input_layout_constraint(def, inputs); | |||||
| } | |||||
| EncodedSubgraph OpDef::make_backward_graph( | EncodedSubgraph OpDef::make_backward_graph( | ||||
| const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs, | const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs, | ||||
| const SmallVector<bool>& input_requires_grad, | const SmallVector<bool>& input_requires_grad, | ||||
| @@ -47,6 +47,10 @@ void OpMethFallbackByProxyGraph::impl( | |||||
| InferOutputAttrsFallible& func, op_meth_tag::InferOutputAttrsFallible) { | InferOutputAttrsFallible& func, op_meth_tag::InferOutputAttrsFallible) { | ||||
| func.Base::operator=(proxy_graph_detail::infer_output_attrs_fallible); | func.Base::operator=(proxy_graph_detail::infer_output_attrs_fallible); | ||||
| } | } | ||||
| void OpMethFallbackByProxyGraph::impl( | |||||
| GetInputLayoutConstraint& func, op_meth_tag::GetInputLayoutConstraint) { | |||||
| func.Base::operator=(proxy_graph_detail::get_input_layout_constraint); | |||||
| } | |||||
| void OpMethFallbackByProxyGraph::impl(GradMaker& func, op_meth_tag::GradMaker) { | void OpMethFallbackByProxyGraph::impl(GradMaker& func, op_meth_tag::GradMaker) { | ||||
| func.Base::operator=(proxy_graph_detail::make_backward_graph); | func.Base::operator=(proxy_graph_detail::make_backward_graph); | ||||
| } | } | ||||
| @@ -63,6 +67,10 @@ void OpMethFallbackFromSubgraph::impl( | |||||
| InferOutputAttrsFallible& func, op_meth_tag::InferOutputAttrsFallible) { | InferOutputAttrsFallible& func, op_meth_tag::InferOutputAttrsFallible) { | ||||
| func.Base::operator=(subgraph_detail::infer_output_attrs_fallible); | func.Base::operator=(subgraph_detail::infer_output_attrs_fallible); | ||||
| } | } | ||||
| void OpMethFallbackFromSubgraph::impl( | |||||
| GetInputLayoutConstraint& func, op_meth_tag::GetInputLayoutConstraint) { | |||||
| func.Base::operator=(subgraph_detail::get_input_layout_constraint); | |||||
| } | |||||
| void OpMethFallbackFromSubgraph::impl(GradMaker& func, op_meth_tag::GradMaker) { | void OpMethFallbackFromSubgraph::impl(GradMaker& func, op_meth_tag::GradMaker) { | ||||
| func.Base::operator=(subgraph_detail::make_backward_graph); | func.Base::operator=(subgraph_detail::make_backward_graph); | ||||
| } | } | ||||
| @@ -73,6 +73,9 @@ OpMethType(ApplyOnVarNode, | |||||
| OpMethType(InferOutputAttrsFallible, | OpMethType(InferOutputAttrsFallible, | ||||
| decltype(OpDef::infer_output_attrs_fallible)); | decltype(OpDef::infer_output_attrs_fallible)); | ||||
| OpMethType(GetInputLayoutConstraint, | |||||
| decltype(OpDef::get_input_layout_constraint)); | |||||
| OpMethType(GradMaker, | OpMethType(GradMaker, | ||||
| decltype(OpDef::make_backward_graph)); | decltype(OpDef::make_backward_graph)); | ||||
| @@ -119,6 +122,8 @@ struct OpMethFallbackByProxyGraph : OpMethImplBase { | |||||
| static void impl(ApplyOnPhysicalTensor& func, op_meth_tag::ApplyOnPhysicalTensor); | static void impl(ApplyOnPhysicalTensor& func, op_meth_tag::ApplyOnPhysicalTensor); | ||||
| static void impl( | static void impl( | ||||
| InferOutputAttrsFallible& func, op_meth_tag::InferOutputAttrsFallible); | InferOutputAttrsFallible& func, op_meth_tag::InferOutputAttrsFallible); | ||||
| static void impl( | |||||
| GetInputLayoutConstraint& func, op_meth_tag::GetInputLayoutConstraint); | |||||
| static void impl(GradMaker& func, op_meth_tag::GradMaker); | static void impl(GradMaker& func, op_meth_tag::GradMaker); | ||||
| }; | }; | ||||
| @@ -128,6 +133,8 @@ struct OpMethFallbackFromSubgraph : OpMethImplBase { | |||||
| static void impl(ApplyOnVarNode& func, op_meth_tag::ApplyOnVarNode); | static void impl(ApplyOnVarNode& func, op_meth_tag::ApplyOnVarNode); | ||||
| static void impl( | static void impl( | ||||
| InferOutputAttrsFallible& func, op_meth_tag::InferOutputAttrsFallible); | InferOutputAttrsFallible& func, op_meth_tag::InferOutputAttrsFallible); | ||||
| static void impl( | |||||
| GetInputLayoutConstraint& func, op_meth_tag::GetInputLayoutConstraint); | |||||
| static void impl(GradMaker& func, op_meth_tag::GradMaker); | static void impl(GradMaker& func, op_meth_tag::GradMaker); | ||||
| }; | }; | ||||
| @@ -179,6 +186,7 @@ struct OpTrait { | |||||
| ApplyOnDeviceTensorND apply_on_device_tensornd; | ApplyOnDeviceTensorND apply_on_device_tensornd; | ||||
| ApplyOnVarNode apply_on_var_node; | ApplyOnVarNode apply_on_var_node; | ||||
| InferOutputAttrsFallible infer_output_attrs_fallible; | InferOutputAttrsFallible infer_output_attrs_fallible; | ||||
| GetInputLayoutConstraint get_input_layout_constraint; | |||||
| GradMaker make_backward_graph; | GradMaker make_backward_graph; | ||||
| Props props; | Props props; | ||||
| HashFunc hash; | HashFunc hash; | ||||
| @@ -199,6 +207,7 @@ struct OpTrait { | |||||
| cb(apply_on_device_tensornd) \ | cb(apply_on_device_tensornd) \ | ||||
| cb(apply_on_var_node) \ | cb(apply_on_var_node) \ | ||||
| cb(infer_output_attrs_fallible) \ | cb(infer_output_attrs_fallible) \ | ||||
| cb(get_input_layout_constraint) \ | |||||
| cb(make_backward_graph) \ | cb(make_backward_graph) \ | ||||
| cb(props) \ | cb(props) \ | ||||
| cb(hash) \ | cb(hash) \ | ||||
| @@ -117,7 +117,7 @@ void InputCallback::scn_do_execute() { | |||||
| layout.init_contiguous_stride(); | layout.init_contiguous_stride(); | ||||
| dev_tensor.reset(dev_tensor.storage(), layout); | dev_tensor.reset(dev_tensor.storage(), layout); | ||||
| } | } | ||||
| output(0)->reset_dev_tensor_from_tensor(dev_tensor); | |||||
| output(0)->force_assign_dev_tensor_from_tensor(dev_tensor); | |||||
| } | } | ||||
| cg::OperatorNodeBase* InputCallback::shallow_copy( | cg::OperatorNodeBase* InputCallback::shallow_copy( | ||||
| @@ -311,7 +311,7 @@ cg::OperatorNodeBase::NodeProp* MutableTensor::do_make_node_prop() const { | |||||
| } | } | ||||
| void MutableTensor::scn_do_execute() { | void MutableTensor::scn_do_execute() { | ||||
| output(0)->reset_dev_tensor_from_tensor(*m_dev_tensor); | |||||
| output(0)->force_assign_dev_tensor_from_tensor(*m_dev_tensor); | |||||
| } | } | ||||
| void MutableTensor::init_output_static_infer_desc() { | void MutableTensor::init_output_static_infer_desc() { | ||||
| @@ -83,28 +83,18 @@ std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( | |||||
| SmallVector<TensorPtr> apply_on_physical_tensor( | SmallVector<TensorPtr> apply_on_physical_tensor( | ||||
| const OpDef& def, const SmallVector<TensorPtr>& inputs, | const OpDef& def, const SmallVector<TensorPtr>& inputs, | ||||
| SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) { | SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) { | ||||
| auto& input = inputs[0]; | |||||
| TensorShape target_shape; | |||||
| if (validated) { | |||||
| target_shape = output_descs[0].layout; | |||||
| } else { | |||||
| cg::copy_tensor_value_to_shape( | |||||
| target_shape, inputs[1]->get_value().proxy_to_default_cpu()); | |||||
| } | |||||
| TensorPtr output = Tensor::make( | |||||
| TensorLayout(target_shape, input->dtype()), input->comp_node()); | |||||
| if (output->layout().is_empty()) { | |||||
| return {output}; | |||||
| } | |||||
| if (input->shape().eq_shape(output->shape())) { | |||||
| mgb_assert(input->layout().eq_layout(output->layout())); | |||||
| output->dev_tensor().copy_from_fixlayout(input->dev_tensor()); | |||||
| } else { | |||||
| TensorLayout input_layout = input->layout().broadcast(output->shape()); | |||||
| output->dev_tensor().copy_from_fixlayout( | |||||
| input->dev_tensor().sub(SubTensorSpec::make_from_layout(input_layout))); | |||||
| } | |||||
| return {output}; | |||||
| def.cast_final_safe<Broadcast>(); | |||||
| size_t nr_inp = inputs.size(); | |||||
| mgb_assert(nr_inp == 2, "Broadcast expects 2 inputs; got %lu actually", nr_inp); | |||||
| auto&& src = inputs[0]; | |||||
| auto&& tshp_nd = inputs[1]; | |||||
| auto slayout = src->layout(); | |||||
| TensorShape tshp; | |||||
| cg::copy_tensor_value_to_shape(tshp, tshp_nd->get_value().proxy_to_default_cpu()); | |||||
| TensorLayout tlayout = slayout.broadcast(tshp); | |||||
| // memory forward | |||||
| return {Tensor::make(src->blob(), src->offset(), tlayout)}; | |||||
| } | } | ||||
| OP_TRAIT_REG(Broadcast, Broadcast, opr::Broadcast) | OP_TRAIT_REG(Broadcast, Broadcast, opr::Broadcast) | ||||
| @@ -184,10 +174,6 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
| auto&& tshp_nd = inputs[1]; | auto&& tshp_nd = inputs[1]; | ||||
| auto slayout = src->layout(); | auto slayout = src->layout(); | ||||
| if (validated) { | |||||
| return {Tensor::make(src->blob(), 0, output_descs[0].layout)}; | |||||
| } | |||||
| TensorShape tshp; | TensorShape tshp; | ||||
| cg::copy_tensor_value_to_shape(tshp, tshp_nd->get_value().proxy_to_default_cpu()); | cg::copy_tensor_value_to_shape(tshp, tshp_nd->get_value().proxy_to_default_cpu()); | ||||
| if (op_def.axis != opr::Reshape::Param::INVALID_AXIS) { | if (op_def.axis != opr::Reshape::Param::INVALID_AXIS) { | ||||
| @@ -195,13 +181,39 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
| tshp[op_def.axis] = 1; | tshp[op_def.axis] = 1; | ||||
| tshp[op_def.axis] = src->layout().total_nr_elems() / tshp.total_nr_elems(); | tshp[op_def.axis] = src->layout().total_nr_elems() / tshp.total_nr_elems(); | ||||
| } | } | ||||
| return {Tensor::make(src->blob(), 0, slayout.reshape(tshp))}; | |||||
| TensorLayout tlayout; | |||||
| mgb_assert(slayout.try_reshape(tlayout, tshp)); | |||||
| return {Tensor::make(src->blob(), src->offset(), tlayout)}; | |||||
| } | |||||
| SmallVector<VarNode::LayoutConstraintCallback> get_input_layout_constraint( | |||||
| const OpDef& def, const SmallVector<TensorPtr>& inputs) { | |||||
| auto&& op_def = def.cast_final_safe<Reshape>(); | |||||
| SmallVector<VarNode::LayoutConstraintCallback> layout_checker(inputs.size()); | |||||
| layout_checker[0] = [&](const TensorLayout& layout) { | |||||
| TensorShape tshp; | |||||
| TensorLayout ret; | |||||
| cg::copy_tensor_value_to_shape( | |||||
| tshp, inputs[1]->get_value().proxy_to_default_cpu()); | |||||
| if (op_def.axis != opr::Reshape::Param::INVALID_AXIS) { | |||||
| mgb_assert(tshp[op_def.axis] == -1); | |||||
| tshp[op_def.axis] = 1; | |||||
| tshp[op_def.axis] = layout.total_nr_elems() / tshp.total_nr_elems(); | |||||
| } | |||||
| if (layout.try_reshape(ret, tshp)) { | |||||
| return true; | |||||
| } else { | |||||
| return false; | |||||
| } | |||||
| }; | |||||
| return layout_checker; | |||||
| } | } | ||||
| OP_TRAIT_REG(Reshape, Reshape) | OP_TRAIT_REG(Reshape, Reshape) | ||||
| .apply_on_var_node(apply_on_var_node) | .apply_on_var_node(apply_on_var_node) | ||||
| .infer_output_attrs_fallible(infer_output_attrs_fallible) | .infer_output_attrs_fallible(infer_output_attrs_fallible) | ||||
| .apply_on_physical_tensor(apply_on_physical_tensor) | .apply_on_physical_tensor(apply_on_physical_tensor) | ||||
| .get_input_layout_constraint(get_input_layout_constraint) | |||||
| .fallback(); | .fallback(); | ||||
| } // namespace reshape | } // namespace reshape | ||||
| @@ -220,12 +220,22 @@ cg::OperatorNodeBase* apply_inplace_add_on_var_node( | |||||
| SmallVector<TensorPtr> apply_inplace_add_on_physical_tensor( | SmallVector<TensorPtr> apply_inplace_add_on_physical_tensor( | ||||
| const OpDef& def, const SmallVector<TensorPtr>& inputs, | const OpDef& def, const SmallVector<TensorPtr>& inputs, | ||||
| SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) { | SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) { | ||||
| mgb_assert( | |||||
| inputs[0]->blob().use_count() == 1 && inputs[0]->blob()->storage().unique(), | |||||
| "This inplace modification may change the elements of other tensors. " | |||||
| "Please set MEGENGINE_INPLACE_UPDATE to 0 to ensure the program runs " | |||||
| "correctly."); | |||||
| auto dest = inputs[0], delta = inputs[1], alpha = inputs[2], beta = inputs[3]; | auto dest = inputs[0], delta = inputs[1], alpha = inputs[2], beta = inputs[3]; | ||||
| if (!(inputs[0]->blob().unique() && inputs[0]->blob()->storage().unique())) { | |||||
| mgb_log_warn( | |||||
| "This inplace modification may change the elements of other tensors. " | |||||
| "Fallback to non-inplace update."); | |||||
| DeviceTensorStorage storage; | |||||
| storage.reset(dest->comp_node(), dest->blob()->size(), dest->blob()->storage()); | |||||
| storage = storage.sub(dest->offset()); | |||||
| DeviceTensorND dv; | |||||
| dv.reset(storage, dest->layout()); | |||||
| DeviceTensorND dv_new; | |||||
| dv_new.copy_from(dv); | |||||
| dest = Tensor::make(dv_new); | |||||
| } | |||||
| auto tensor_to_scalar = [](const TensorPtr& tensor) -> float { | auto tensor_to_scalar = [](const TensorPtr& tensor) -> float { | ||||
| return *tensor->get_value().ptr<float>(); | return *tensor->get_value().ptr<float>(); | ||||
| }; | }; | ||||
| @@ -54,7 +54,8 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
| const OpDef& def, const SmallVector<TensorPtr>& inputs, | const OpDef& def, const SmallVector<TensorPtr>& inputs, | ||||
| SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) { | SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) { | ||||
| if (memory_forward_success(def, inputs)) { | if (memory_forward_success(def, inputs)) { | ||||
| return {Tensor::make(inputs[0]->blob(), 0, inputs[0]->layout())}; | |||||
| return {Tensor::make( | |||||
| inputs[0]->blob(), inputs[0]->offset(), inputs[0]->layout())}; | |||||
| } | } | ||||
| return proxy_graph_detail::apply_on_physical_tensor( | return proxy_graph_detail::apply_on_physical_tensor( | ||||
| def, inputs, output_descs, validated); | def, inputs, output_descs, validated); | ||||
| @@ -73,11 +74,21 @@ std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( | |||||
| return {output_descs, validated}; | return {output_descs, validated}; | ||||
| } | } | ||||
| SmallVector<VarNode::LayoutConstraintCallback> get_input_layout_constraint( | |||||
| const OpDef& def, const SmallVector<TensorPtr>& inputs) { | |||||
| SmallVector<VarNode::LayoutConstraintCallback> layout_checker(inputs.size()); | |||||
| layout_checker[0] = [](const TensorLayout& layout) { | |||||
| return layout.is_contiguous(); | |||||
| }; | |||||
| return layout_checker; | |||||
| } | |||||
| OP_TRAIT_REG(Reduce, Reduce, opr::Reduce) | OP_TRAIT_REG(Reduce, Reduce, opr::Reduce) | ||||
| .make_from_op_node(make_from_op_node) | .make_from_op_node(make_from_op_node) | ||||
| .apply_on_var_node(apply_on_var_node) | .apply_on_var_node(apply_on_var_node) | ||||
| .apply_on_physical_tensor(apply_on_physical_tensor) | .apply_on_physical_tensor(apply_on_physical_tensor) | ||||
| .infer_output_attrs_fallible(infer_output_attrs_fallible) | .infer_output_attrs_fallible(infer_output_attrs_fallible) | ||||
| .get_input_layout_constraint(get_input_layout_constraint) | |||||
| .fallback(); | .fallback(); | ||||
| } // namespace reduce | } // namespace reduce | ||||
| } // namespace | } // namespace | ||||
| @@ -594,6 +594,13 @@ std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible<Dro | |||||
| return {dests, true}; | return {dests, true}; | ||||
| } | } | ||||
| template <typename Op> | |||||
| SmallVector<VarNode::LayoutConstraintCallback> get_input_layout_constraint( | |||||
| const OpDef& def, const SmallVector<TensorPtr>& inputs) { | |||||
| SmallVector<VarNode::LayoutConstraintCallback> layout_checker(inputs.size()); | |||||
| return layout_checker; | |||||
| } | |||||
| } // anonymous namespace | } // anonymous namespace | ||||
| Handle new_handle(CompNode comp_node, uint64_t seed) { | Handle new_handle(CompNode comp_node, uint64_t seed) { | ||||
| @@ -622,6 +629,7 @@ CompNode get_rng_handle_compnode(Handle handle) { | |||||
| .apply_on_var_node(apply_on_var_node<NAME, Output>) \ | .apply_on_var_node(apply_on_var_node<NAME, Output>) \ | ||||
| .apply_on_physical_tensor(apply_on_physical_tensor<NAME>) \ | .apply_on_physical_tensor(apply_on_physical_tensor<NAME>) \ | ||||
| .infer_output_attrs_fallible(infer_output_attrs_fallible<NAME>) \ | .infer_output_attrs_fallible(infer_output_attrs_fallible<NAME>) \ | ||||
| .get_input_layout_constraint(get_input_layout_constraint<NAME>) \ | |||||
| .fallback(); \ | .fallback(); \ | ||||
| } | } | ||||
| @@ -60,9 +60,55 @@ auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) { | |||||
| return opr::Dimshuffle::make(inputs[0], ds.pattern, 0UL, config); | return opr::Dimshuffle::make(inputs[0], ds.pattern, 0UL, config); | ||||
| } | } | ||||
| SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
| const OpDef& def, const SmallVector<TensorPtr>& inputs, | |||||
| SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) { | |||||
| auto&& ds = static_cast<const Dimshuffle&>(def); | |||||
| mgb_assert( | |||||
| ds.pattern.size() <= TensorShape::MAX_NDIM, | |||||
| "Dimshuffle pattern exceeds max length of %zd", TensorShape::MAX_NDIM); | |||||
| size_t nr_inp = inputs.size(); | |||||
| mgb_assert(nr_inp == 1, "Dimshuffle expects 1 inputs; got %lu actually", nr_inp); | |||||
| auto&& src = inputs[0]; | |||||
| auto inp_layout = src->layout(); | |||||
| size_t pattern_ndim = *std::max_element(ds.pattern.begin(), ds.pattern.end()) + 1; | |||||
| mgb_assert( | |||||
| inp_layout.ndim == pattern_ndim, | |||||
| "input ndim mismatch for Dimshuffle: expect=%zd actual=%zd", pattern_ndim, | |||||
| inp_layout.ndim); | |||||
| TensorLayout out_layout{inp_layout.dtype}; | |||||
| out_layout.ndim = ds.pattern.size(); | |||||
| size_t idx = 0; | |||||
| bool input_used[TensorLayout::MAX_NDIM] = {0}; | |||||
| for (auto i : ds.pattern) { | |||||
| if (i < 0) { | |||||
| out_layout.shape[idx] = 1; | |||||
| out_layout.stride[idx] = 1; | |||||
| } else { | |||||
| input_used[i] = true; | |||||
| out_layout.shape[idx] = inp_layout.shape[i]; | |||||
| out_layout.stride[idx] = inp_layout.stride[i]; | |||||
| } | |||||
| ++idx; | |||||
| } | |||||
| if (out_layout.is_contiguous()) { | |||||
| out_layout.init_contiguous_stride(); | |||||
| } | |||||
| for (size_t i = 0; i < pattern_ndim; ++i) { | |||||
| mgb_assert( | |||||
| input_used[i] || inp_layout.shape[i] == 1, | |||||
| "non-1 dim discarded in Dimshuffle: ishp=%s dim=%zd", | |||||
| inp_layout.megdnn::TensorShape::to_string().c_str(), i); | |||||
| } | |||||
| // memory forward | |||||
| return {Tensor::make(src->blob(), src->offset(), out_layout)}; | |||||
| } | |||||
| OP_TRAIT_REG(Dimshuffle, Dimshuffle, opr::Dimshuffle) | OP_TRAIT_REG(Dimshuffle, Dimshuffle, opr::Dimshuffle) | ||||
| .make_from_op_node(make_from_op_node) | .make_from_op_node(make_from_op_node) | ||||
| .apply_on_var_node(apply_on_var_node) | .apply_on_var_node(apply_on_var_node) | ||||
| .apply_on_physical_tensor(apply_on_physical_tensor) | |||||
| .fallback(); | .fallback(); | ||||
| } // namespace dimshuffle | } // namespace dimshuffle | ||||
| } // namespace | } // namespace | ||||
| @@ -80,7 +126,25 @@ auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) { | |||||
| return opr::AxisAddRemove::make(inputs[0], param, config); | return opr::AxisAddRemove::make(inputs[0], param, config); | ||||
| } | } | ||||
| OP_TRAIT_REG(AddAxis, AddAxis).apply_on_var_node(apply_on_var_node).fallback(); | |||||
| SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
| const OpDef& def, const SmallVector<TensorPtr>& inputs, | |||||
| SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) { | |||||
| auto&& op_def = def.cast_final_safe<AddAxis>(); | |||||
| size_t nr_inp = inputs.size(); | |||||
| mgb_assert(nr_inp == 1, "AddAxis expects 1 inputs; got %lu actually", nr_inp); | |||||
| auto&& src = inputs[0]; | |||||
| auto tlayout = src->layout(); | |||||
| for (auto&& i : op_def.axis) { | |||||
| tlayout.add_axis_cont_inplace(i); | |||||
| } | |||||
| // memory forward | |||||
| return {Tensor::make(src->blob(), src->offset(), tlayout)}; | |||||
| } | |||||
| OP_TRAIT_REG(AddAxis, AddAxis) | |||||
| .apply_on_var_node(apply_on_var_node) | |||||
| .apply_on_physical_tensor(apply_on_physical_tensor) | |||||
| .fallback(); | |||||
| } // namespace add_axis | } // namespace add_axis | ||||
| } // namespace | } // namespace | ||||
| @@ -97,7 +161,36 @@ auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) { | |||||
| return opr::AxisAddRemove::make(inputs[0], param, config); | return opr::AxisAddRemove::make(inputs[0], param, config); | ||||
| } | } | ||||
| OP_TRAIT_REG(RemoveAxis, RemoveAxis).apply_on_var_node(apply_on_var_node).fallback(); | |||||
| SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
| const OpDef& def, const SmallVector<TensorPtr>& inputs, | |||||
| SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) { | |||||
| auto&& op_def = def.cast_final_safe<RemoveAxis>(); | |||||
| size_t nr_inp = inputs.size(); | |||||
| mgb_assert(nr_inp == 1, "RemoveAxis expects 1 inputs; got %lu actually", nr_inp); | |||||
| auto&& src = inputs[0]; | |||||
| auto tlayout = src->layout(); | |||||
| for (auto&& i : op_def.axis) { | |||||
| if (tlayout.ndim == 1) { | |||||
| mgb_assert( | |||||
| tlayout.shape[0] == 1 && i == 0, | |||||
| "can not remove axis %u from tensor of shape=%s", i, | |||||
| tlayout.megdnn::TensorShape::to_string().c_str()); | |||||
| } else { | |||||
| mgb_assert( | |||||
| i < tlayout.ndim && tlayout.shape[i] == 1, | |||||
| "can not remove axis %u from tensor of shape=%s", i, | |||||
| tlayout.megdnn::TensorShape::to_string().c_str()); | |||||
| tlayout.remove_axis_inplace(i); | |||||
| } | |||||
| } | |||||
| // memory forward | |||||
| return {Tensor::make(src->blob(), src->offset(), tlayout)}; | |||||
| } | |||||
| OP_TRAIT_REG(RemoveAxis, RemoveAxis) | |||||
| .apply_on_var_node(apply_on_var_node) | |||||
| .apply_on_physical_tensor(apply_on_physical_tensor) | |||||
| .fallback(); | |||||
| } // namespace remove_axis | } // namespace remove_axis | ||||
| } // namespace | } // namespace | ||||
| @@ -411,7 +411,7 @@ struct ComputingGraphHolder { | |||||
| executable->wait(); | executable->wait(); | ||||
| size_t nr_inputs = inputs.size(); | size_t nr_inputs = inputs.size(); | ||||
| for (size_t i = 0; i < nr_inputs; ++i) { | for (size_t i = 0; i < nr_inputs; ++i) { | ||||
| auto input_dev_tensor = input_tensors[i]->dev_tensor(); | |||||
| auto input_dev_tensor = input_tensors[i]->dev_tensor(false); | |||||
| inputs[i].device_value->reset( | inputs[i].device_value->reset( | ||||
| input_dev_tensor.storage(), input_dev_tensor.layout()); | input_dev_tensor.storage(), input_dev_tensor.layout()); | ||||
| if (inputs[i].host_value) { | if (inputs[i].host_value) { | ||||
| @@ -95,7 +95,13 @@ const Blob::RawStorage& Blob::storage() { | |||||
| Tensor::Tensor( | Tensor::Tensor( | ||||
| BlobPtr blob, const TensorLayout& layout, size_t offset, const HostTensorND& hv) | BlobPtr blob, const TensorLayout& layout, size_t offset, const HostTensorND& hv) | ||||
| : m_layout(layout), m_blob(std::move(blob)), m_offset(offset), m_value(hv) {} | |||||
| : m_cn(blob->comp_node()), | |||||
| m_shape(layout), | |||||
| m_dtype(layout.dtype), | |||||
| m_layout(layout), | |||||
| m_blob(std::move(blob)), | |||||
| m_offset(offset), | |||||
| m_value(hv) {} | |||||
| Tensor::Tensor(const HostTensorND& hv) : Tensor(hv.layout(), hv.comp_node()) { | Tensor::Tensor(const HostTensorND& hv) : Tensor(hv.layout(), hv.comp_node()) { | ||||
| constexpr int size_threshold = TensorShape::MAX_NDIM; | constexpr int size_threshold = TensorShape::MAX_NDIM; | ||||
| @@ -107,7 +113,12 @@ Tensor::Tensor(const HostTensorND& hv) : Tensor(hv.layout(), hv.comp_node()) { | |||||
| MGB_RECORD_EVENT( | MGB_RECORD_EVENT( | ||||
| profiler::HostToDeviceEvent, hv.layout(), hv.comp_node(), hv.raw_ptr(), | profiler::HostToDeviceEvent, hv.layout(), hv.comp_node(), hv.raw_ptr(), | ||||
| dev_tensor().raw_ptr()); | dev_tensor().raw_ptr()); | ||||
| dev_tensor().copy_from_fixlayout(hv); | |||||
| DeviceTensorStorage storage; | |||||
| storage.reset(m_cn, m_blob->size(), m_blob->storage()); | |||||
| storage = storage.sub(m_offset); | |||||
| DeviceTensorND dv; | |||||
| dv.reset(storage, m_layout); | |||||
| dv.copy_from_fixlayout(hv); | |||||
| // even though hv is saved in m_value, Tensor itself could be | // even though hv is saved in m_value, Tensor itself could be | ||||
| // released before copy completes | // released before copy completes | ||||
| MGB_RECORD_EVENT( | MGB_RECORD_EVENT( | ||||
| @@ -117,25 +128,36 @@ Tensor::Tensor(const HostTensorND& hv) : Tensor(hv.layout(), hv.comp_node()) { | |||||
| } | } | ||||
| } | } | ||||
| Tensor::Tensor(const DeviceTensorND& dv, const HostTensorND& hv) { | |||||
| Tensor::Tensor(const DeviceTensorND& dv, const HostTensorND& hv) | |||||
| : m_offset(dv.storage().offset()), | |||||
| m_cn(dv.comp_node()), | |||||
| m_shape(dv.layout()), | |||||
| m_dtype(dv.layout().dtype), | |||||
| m_blob(Blob::make(dv.storage())), | |||||
| m_layout(dv.layout()) { | |||||
| if (!hv.empty()) { | if (!hv.empty()) { | ||||
| mgb_assert(dv.comp_node() == hv.comp_node()); | mgb_assert(dv.comp_node() == hv.comp_node()); | ||||
| mgb_assert(dv.dtype() == hv.dtype()); | mgb_assert(dv.dtype() == hv.dtype()); | ||||
| mgb_assert(dv.shape().eq_shape(hv.shape())); | mgb_assert(dv.shape().eq_shape(hv.shape())); | ||||
| m_value = hv; | m_value = hv; | ||||
| } | } | ||||
| m_layout = dv.layout(); | |||||
| m_blob = Blob::make(dv.storage()); | |||||
| m_offset = dv.storage().offset(); | |||||
| } | } | ||||
| Tensor::Tensor(const TensorLayout& layout, const CompNode& cn) | Tensor::Tensor(const TensorLayout& layout, const CompNode& cn) | ||||
| : m_layout{layout}, | : m_layout{layout}, | ||||
| m_blob{Blob::make(cn, layout.span().dist_byte())}, | m_blob{Blob::make(cn, layout.span().dist_byte())}, | ||||
| m_offset{0} {} | |||||
| m_offset{0}, | |||||
| m_cn(cn), | |||||
| m_shape(layout), | |||||
| m_dtype(layout.dtype) {} | |||||
| Tensor::Tensor(const BlobPtr blob, const size_t offset, const TensorLayout& layout) | Tensor::Tensor(const BlobPtr blob, const size_t offset, const TensorLayout& layout) | ||||
| : m_layout{layout}, m_blob{blob}, m_offset{offset} {} | |||||
| : m_layout{layout}, | |||||
| m_blob{blob}, | |||||
| m_offset{offset}, | |||||
| m_cn(blob->comp_node()), | |||||
| m_shape(layout), | |||||
| m_dtype(layout.dtype) {} | |||||
| TensorPtr Tensor::make(const HostTensorND& hv) { | TensorPtr Tensor::make(const HostTensorND& hv) { | ||||
| auto&& blob = MultiCNConstTensorCache::inst().lookup(hv); | auto&& blob = MultiCNConstTensorCache::inst().lookup(hv); | ||||
| @@ -145,10 +167,45 @@ TensorPtr Tensor::make(const HostTensorND& hv) { | |||||
| return std::make_shared<Tensor>(hv); | return std::make_shared<Tensor>(hv); | ||||
| } | } | ||||
| DeviceTensorND Tensor::dev_tensor() { | |||||
| void Tensor::to_contiguous_inplace(VarNode::LayoutConstraintCallback& layout_checker) { | |||||
| MGB_LOCK_GUARD(m_blob_mtx); | |||||
| if (!m_layout.is_empty() && !layout_checker(m_layout)) { | |||||
| DeviceTensorStorage storage; | |||||
| storage.reset(m_cn, m_blob->size(), m_blob->storage()); | |||||
| storage = storage.sub(m_offset); | |||||
| DeviceTensorND dv; | |||||
| dv.reset(storage, m_layout); | |||||
| DeviceTensorND dv_contig; | |||||
| dv_contig.copy_from(dv); | |||||
| m_layout = dv_contig.layout(); | |||||
| std::atomic_store(&m_blob, Blob::make(dv_contig.storage())); | |||||
| mgb_assert(m_layout.is_contiguous()); | |||||
| m_offset = 0; | |||||
| } | |||||
| } | |||||
| void Tensor::to_contiguous_inplace() { | |||||
| static VarNode::LayoutConstraintCallback default_cb = | |||||
| [](const TensorLayout& layout) { return layout.is_contiguous(); }; | |||||
| to_contiguous_inplace(default_cb); | |||||
| } | |||||
| void Tensor::assign_from_dev_tensor(DeviceTensorND dv) { | |||||
| MGB_LOCK_GUARD(m_blob_mtx); | |||||
| std::atomic_store(&m_blob, Blob::make(dv.storage())); | |||||
| m_offset = dv.storage().offset(); | |||||
| m_layout = dv.layout(); | |||||
| } | |||||
| DeviceTensorND Tensor::dev_tensor(bool contiguous) { | |||||
| mgb_assert(m_blob, "uninitialized tensor."); | mgb_assert(m_blob, "uninitialized tensor."); | ||||
| if (contiguous) { | |||||
| to_contiguous_inplace(); | |||||
| } | |||||
| MGB_LOCK_GUARD(m_blob_mtx); | |||||
| DeviceTensorStorage storage; | DeviceTensorStorage storage; | ||||
| storage.reset(m_blob->comp_node(), m_blob->size(), m_blob->storage()); | |||||
| storage.reset(m_cn, m_blob->size(), m_blob->storage()); | |||||
| storage = storage.sub(m_offset); | storage = storage.sub(m_offset); | ||||
| DeviceTensorND ret; | DeviceTensorND ret; | ||||
| ret.reset(storage, m_layout); | ret.reset(storage, m_layout); | ||||
| @@ -156,16 +213,22 @@ DeviceTensorND Tensor::dev_tensor() { | |||||
| } | } | ||||
| void Tensor::fetch_value() { | void Tensor::fetch_value() { | ||||
| MGB_LOCK_GUARD(m_mtx); | |||||
| MGB_LOCK_GUARD(m_blob_mtx); | |||||
| MGB_LOCK_GUARD(m_value_mtx); | |||||
| if (m_value.empty()) { | if (m_value.empty()) { | ||||
| m_value.copy_from(dev_tensor()); | |||||
| DeviceTensorStorage storage; | |||||
| storage.reset(m_cn, m_blob->size(), m_blob->storage()); | |||||
| storage = storage.sub(m_offset); | |||||
| DeviceTensorND dv; | |||||
| dv.reset(storage, m_layout); | |||||
| m_value.copy_from(dv); | |||||
| m_value_ready.reset(EventPool::without_timer().alloc(comp_node())); | m_value_ready.reset(EventPool::without_timer().alloc(comp_node())); | ||||
| m_value_ready->record(); | m_value_ready->record(); | ||||
| } | } | ||||
| } | } | ||||
| bool Tensor::value_fetched() { | bool Tensor::value_fetched() { | ||||
| MGB_LOCK_GUARD(m_mtx); | |||||
| MGB_LOCK_GUARD(m_value_mtx); | |||||
| return m_value.layout().ndim != 0; | return m_value.layout().ndim != 0; | ||||
| } | } | ||||
| @@ -178,7 +241,7 @@ const HostTensorND& Tensor::get_value() { | |||||
| } | } | ||||
| const HostTensorND* Tensor::try_get_value() { | const HostTensorND* Tensor::try_get_value() { | ||||
| MGB_LOCK_GUARD(m_mtx); | |||||
| MGB_LOCK_GUARD(m_value_mtx); | |||||
| if (!m_value.empty() && (!m_value_ready || m_value_ready->finished())) { | if (!m_value.empty() && (!m_value_ready || m_value_ready->finished())) { | ||||
| return &m_value; | return &m_value; | ||||
| } | } | ||||
| @@ -193,7 +256,7 @@ TensorPtr Tensor::make_scalar(DTypeScalar value, CompNode cn) { | |||||
| } | } | ||||
| TensorPtr Tensor::sub(size_t offset, TensorShape shape) { | TensorPtr Tensor::sub(size_t offset, TensorShape shape) { | ||||
| TensorLayout layout(shape, m_layout.dtype); | |||||
| TensorLayout layout(shape, m_dtype); | |||||
| return Tensor::make(m_blob, offset + m_offset, layout); | return Tensor::make(m_blob, offset + m_offset, layout); | ||||
| } | } | ||||
| @@ -73,7 +73,7 @@ public: | |||||
| static SymbolVar make(ComputingGraph& graph, Tensor& tensor) { | static SymbolVar make(ComputingGraph& graph, Tensor& tensor) { | ||||
| auto opr = graph.insert_opr(std::make_unique<InputPlaceholder>(graph, &tensor)); | auto opr = graph.insert_opr(std::make_unique<InputPlaceholder>(graph, &tensor)); | ||||
| auto var = opr->output(0); | auto var = opr->output(0); | ||||
| auto&& dev_tensor = tensor.dev_tensor(); | |||||
| auto&& dev_tensor = tensor.dev_tensor(false); | |||||
| var->m_comp_node = dev_tensor.comp_node(); | var->m_comp_node = dev_tensor.comp_node(); | ||||
| var->m_shape = dev_tensor.shape(); | var->m_shape = dev_tensor.shape(); | ||||
| if (dev_tensor.empty()) { | if (dev_tensor.empty()) { | ||||
| @@ -81,10 +81,7 @@ public: | |||||
| layout.init_contiguous_stride(); | layout.init_contiguous_stride(); | ||||
| dev_tensor.reset(dev_tensor.storage(), layout); | dev_tensor.reset(dev_tensor.storage(), layout); | ||||
| } | } | ||||
| var->m_dev_tensor = dev_tensor; | |||||
| var->m_mem_plan.reset_from_owner_var() | |||||
| .chunk() | |||||
| .mem_alloc_status.set_from_owner_var(); | |||||
| var->force_assign_dev_tensor_from_tensor(dev_tensor); | |||||
| return var; | return var; | ||||
| } | } | ||||
| @@ -314,15 +314,11 @@ public: | |||||
| size_t idx = 0; | size_t idx = 0; | ||||
| for (auto&& input : opr_inputs) { | for (auto&& input : opr_inputs) { | ||||
| mgb_assert(input->owner_opr()->same_type<InputPlaceholder>()); | mgb_assert(input->owner_opr()->same_type<InputPlaceholder>()); | ||||
| input->m_dev_tensor.storage({}); | |||||
| auto&& dev_tensor = inputs[input_remap[idx]]->dev_tensor(); | |||||
| auto&& dev_tensor = inputs[input_remap[idx]]->dev_tensor(false); | |||||
| auto&& layout = dev_tensor.layout(); | auto&& layout = dev_tensor.layout(); | ||||
| input->shape(dev_tensor.shape()); | input->shape(dev_tensor.shape()); | ||||
| auto&& chk = input->m_mem_plan.reset_from_owner_var().chunk(); | |||||
| input->m_dev_tensor.reset(dev_tensor.storage(), layout); | |||||
| input->m_mem_plan.layout(layout); | |||||
| chk.mem_alloc_status.set_from_owner_var(); | |||||
| input->force_assign_dev_tensor_from_tensor(dev_tensor); | |||||
| mgb_assert(input->comp_node() == dev_tensor.comp_node()); | mgb_assert(input->comp_node() == dev_tensor.comp_node()); | ||||
| mgb_assert(input->shape().eq_shape(layout)); | mgb_assert(input->shape().eq_shape(layout)); | ||||
| @@ -335,9 +331,14 @@ public: | |||||
| mgb_assert(m_opr->usable_output().size() == outputs.size()); | mgb_assert(m_opr->usable_output().size() == outputs.size()); | ||||
| ::mgb::opr::intl::WorkspaceLimitHook::set_impl( | ::mgb::opr::intl::WorkspaceLimitHook::set_impl( | ||||
| m_opr->owner_graph(), get_workspace_limit); | m_opr->owner_graph(), get_workspace_limit); | ||||
| size_t j = 0; | |||||
| for (auto&& var : m_opr->output()) { | for (auto&& var : m_opr->output()) { | ||||
| auto&& chk = var->m_mem_plan.reset_from_owner_var().chunk(); | auto&& chk = var->m_mem_plan.reset_from_owner_var().chunk(); | ||||
| chk.mem_alloc_status.set_from_owner_var(); | |||||
| } | |||||
| m_opr->mem_plan_fwd_in2out_readonly(); | |||||
| size_t j = 0; | |||||
| for (auto&& var : m_opr->output()) { | |||||
| if (var->contain_flag(VarNode::Flag::VOLATILE_CONTENT)) { | if (var->contain_flag(VarNode::Flag::VOLATILE_CONTENT)) { | ||||
| TensorLayout layout{var->shape(), var->dtype(), var->format()}; | TensorLayout layout{var->shape(), var->dtype(), var->format()}; | ||||
| var->m_dev_tensor = BlobManager::inst()->alloc_workspace_with_defrag( | var->m_dev_tensor = BlobManager::inst()->alloc_workspace_with_defrag( | ||||
| @@ -349,18 +350,16 @@ public: | |||||
| mgb_assert(var->comp_node() == tensor->comp_node()); | mgb_assert(var->comp_node() == tensor->comp_node()); | ||||
| mgb_assert(var->shape().eq_shape(layout)); | mgb_assert(var->shape().eq_shape(layout)); | ||||
| mgb_assert(var->dtype() == layout.dtype); | mgb_assert(var->dtype() == layout.dtype); | ||||
| var->assign_dev_tensor_from_tensor(tensor->dev_tensor()); | |||||
| if (var->m_mem_plan.chunk().owner_var != var) { | |||||
| tensor->assign_from_dev_tensor( | |||||
| var->m_dev_tensor); // memory forwarding | |||||
| } else { | |||||
| var->assign_dev_tensor_from_tensor(tensor->dev_tensor()); | |||||
| } | |||||
| ++j; | ++j; | ||||
| } | } | ||||
| chk.mem_alloc_status.set_from_owner_var(); | |||||
| } | } | ||||
| mgb_assert(j == outputs.size()); | mgb_assert(j == outputs.size()); | ||||
| // Memory forwarding was bypassed in megbrain with graph option | |||||
| // imerative_proxy_graph on, here we call mem_plan_fwd_in2out_readonly | |||||
| // to initialize some opr(e.g. Subtensor)'s internal state | |||||
| // TODO: implement memory forwarding | |||||
| m_opr->mem_plan_fwd_in2out_readonly(); | |||||
| { | { | ||||
| // some opr (e.g. Reduce) rely on on_mem_status_changed to set | // some opr (e.g. Reduce) rely on on_mem_status_changed to set | ||||
| // input/output tensor corretly, since we bypass var_node_mem_mgr | // input/output tensor corretly, since we bypass var_node_mem_mgr | ||||
| @@ -840,7 +839,7 @@ public: | |||||
| Tensor::make(output_descs[i].layout, output_descs[i].comp_node); | Tensor::make(output_descs[i].layout, output_descs[i].comp_node); | ||||
| } | } | ||||
| auto raw_outputs = to_raw_ptr_array(outputs); | |||||
| auto raw_outputs = to_raw_ptr_array(outputs, false); | |||||
| CompNode::UnorderedSet used_cns; | CompNode::UnorderedSet used_cns; | ||||
| for (auto&& out : raw_outputs) { | for (auto&& out : raw_outputs) { | ||||
| auto cn = out->comp_node(); | auto cn = out->comp_node(); | ||||
| @@ -9,8 +9,12 @@ | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| */ | */ | ||||
| #include "../mgb_cg_impl.h" | |||||
| #include "./mini_graph.h" | #include "./mini_graph.h" | ||||
| #include "megbrain/opr/io.h" | |||||
| using LayoutConstraintLevel = mgb::cg::VarNodeMemManager::LayoutConstraintLevel; | |||||
| using LayoutConstraintCallback = mgb::VarNode::LayoutConstraintCallback; | |||||
| namespace mgb::imperative::proxy_graph { | namespace mgb::imperative::proxy_graph { | ||||
| MGB_DYN_TYPE_OBJ_FINAL_IMPL(ProxyGraph::InputPlaceholder); | MGB_DYN_TYPE_OBJ_FINAL_IMPL(ProxyGraph::InputPlaceholder); | ||||
| @@ -34,4 +38,81 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| std::unordered_map<size_t, SmallVector<LayoutConstraintCallback>> | |||||
| input_layout_constraints_cache; | |||||
| SmallVector<LayoutConstraintCallback> get_input_layout_constraint( | |||||
| const OpDef& def, const SmallVector<TensorPtr>& inputs) { | |||||
| auto get_input_layout_constraint_hash_key = | |||||
| [](const OpDef& def, const SmallVector<TensorPtr>& inputs) { | |||||
| XXHash state; | |||||
| size_t length = 0, data[1 + inputs.size()]; | |||||
| data[length++] = def.hash(); | |||||
| for (auto&& i : inputs) { | |||||
| data[length++] = mgb::hash(i->comp_node()); | |||||
| } | |||||
| state.update(data, length * sizeof(size_t)); | |||||
| return state.digest(); | |||||
| }; | |||||
| auto hash_key = get_input_layout_constraint_hash_key(def, inputs); | |||||
| auto&& iter = input_layout_constraints_cache.find(hash_key); | |||||
| if (iter != input_layout_constraints_cache.end()) { | |||||
| return iter->second; | |||||
| } | |||||
| static cg::ComputingGraphImpl* graph = | |||||
| imperative::ResourceManager::create_global<cg::ComputingGraphImpl>(); | |||||
| VarNodeArray vinputs(inputs.size()); | |||||
| for (size_t i = 0; i < inputs.size(); ++i) { | |||||
| OperatorNodeConfig config; | |||||
| auto&& layout = inputs[i]->layout(); | |||||
| layout.init_contiguous_stride(); | |||||
| vinputs[i] = graph->insert_opr(std::make_unique<mgb::opr::SharedDeviceTensor>( | |||||
| *graph, | |||||
| std::make_shared<DeviceTensorND>( | |||||
| inputs[i]->comp_node(), layout), | |||||
| false, config)) | |||||
| ->output(0); | |||||
| } | |||||
| auto&& opr = OpDef::apply_on_var_node(def, vinputs)[0]->owner_opr(); | |||||
| opr->add_input_layout_constraint(); | |||||
| SmallVector<LayoutConstraintCallback> res(inputs.size()); | |||||
| auto& mem_mgr = graph->var_node_mem_manager(); | |||||
| for (size_t i = 0; i < vinputs.size(); ++i) { | |||||
| auto& trait = mem_mgr.get_var_node_mem_trait(vinputs[i]); | |||||
| switch (trait.layout_constraint.level) { | |||||
| case LayoutConstraintLevel::CONTIG: | |||||
| res[i] = [](const TensorLayout& layout) { | |||||
| return layout.is_contiguous(); | |||||
| }; | |||||
| break; | |||||
| case LayoutConstraintLevel::MONOTONE: | |||||
| res[i] = [&trait](const TensorLayout& layout) { | |||||
| if (!layout.is_abs_monotonous_allow_brdcst()) { | |||||
| return false; | |||||
| } | |||||
| for (auto&& i : trait.layout_constraint.custom) | |||||
| if (!i(layout)) | |||||
| return false; | |||||
| return true; | |||||
| }; | |||||
| break; | |||||
| case LayoutConstraintLevel::NONE: | |||||
| if (!trait.layout_constraint.custom.empty()) { | |||||
| res[i] = [&trait](const TensorLayout& layout) { | |||||
| for (auto&& i : trait.layout_constraint.custom) | |||||
| if (!i(layout)) | |||||
| return false; | |||||
| return true; | |||||
| }; | |||||
| } | |||||
| break; | |||||
| default: | |||||
| mgb_throw(InternalError, "invalid layout_constraint_level"); | |||||
| } | |||||
| } | |||||
| input_layout_constraints_cache.emplace(hash_key, res); | |||||
| return res; | |||||
| } | |||||
| } // namespace mgb::imperative::proxy_graph_detail | } // namespace mgb::imperative::proxy_graph_detail | ||||
| @@ -17,6 +17,8 @@ | |||||
| #include "./op_trait.h" | #include "./op_trait.h" | ||||
| using LayoutConstraintCallback = mgb::VarNode::LayoutConstraintCallback; | |||||
| namespace mgb { | namespace mgb { | ||||
| namespace imperative { | namespace imperative { | ||||
| namespace subgraph_detail { | namespace subgraph_detail { | ||||
| @@ -73,6 +75,13 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
| const std::shared_ptr<OpDef>& op, | const std::shared_ptr<OpDef>& op, | ||||
| const SmallVector<TensorPtr>& inputs, | const SmallVector<TensorPtr>& inputs, | ||||
| size_t nr_outputs) { | size_t nr_outputs) { | ||||
| auto&& constraints = OpDef::get_input_layout_constraint(*op, inputs); | |||||
| for (size_t idx = 0; idx < inputs.size(); ++idx) { | |||||
| auto&& layout_checker = constraints[idx]; | |||||
| if (layout_checker) { | |||||
| inputs[idx]->to_contiguous_inplace(layout_checker); | |||||
| } | |||||
| } | |||||
| // do not use infered output_desc in subgraph | // do not use infered output_desc in subgraph | ||||
| return OpDef::apply_on_physical_tensor(*op, inputs, output_descs, false); | return OpDef::apply_on_physical_tensor(*op, inputs, output_descs, false); | ||||
| }; | }; | ||||
| @@ -81,6 +90,12 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
| return outputs; | return outputs; | ||||
| } | } | ||||
| SmallVector<LayoutConstraintCallback> get_input_layout_constraint( | |||||
| const OpDef& def, const SmallVector<TensorPtr>& inputs) { | |||||
| SmallVector<LayoutConstraintCallback> res(inputs.size()); | |||||
| return res; | |||||
| } | |||||
| static EncodedSubgraph make_backward_graph_from_forward( | static EncodedSubgraph make_backward_graph_from_forward( | ||||
| const SmallVector<LogicalTensorDesc>& inputs, | const SmallVector<LogicalTensorDesc>& inputs, | ||||
| const SmallVector<bool>& input_requires_grad, | const SmallVector<bool>& input_requires_grad, | ||||
| @@ -78,6 +78,9 @@ public: | |||||
| static EncodedSubgraph make_forward_graph( | static EncodedSubgraph make_forward_graph( | ||||
| const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs); | const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs); | ||||
| static SmallVector<VarNode::LayoutConstraintCallback> get_input_layout_constraint( | |||||
| const OpDef& def, const SmallVector<TensorPtr>& inputs); | |||||
| const OpTrait* trait() const; | const OpTrait* trait() const; | ||||
| std::string to_string() const; | std::string to_string() const; | ||||
| @@ -14,6 +14,7 @@ | |||||
| #include <memory> | #include <memory> | ||||
| #include <mutex> | #include <mutex> | ||||
| #include "megbrain/graph.h" | |||||
| #include "megbrain/imperative/resource_manager.h" | #include "megbrain/imperative/resource_manager.h" | ||||
| #include "megbrain/tensor.h" | #include "megbrain/tensor.h" | ||||
| @@ -90,18 +91,24 @@ public: | |||||
| CompNode comp_node() const { | CompNode comp_node() const { | ||||
| mgb_assert(m_blob, "uninitialized tensor."); | mgb_assert(m_blob, "uninitialized tensor."); | ||||
| return m_blob->comp_node(); | |||||
| return m_cn; | |||||
| } | } | ||||
| DType dtype() const { return m_layout.dtype; } | |||||
| DType dtype() const { return m_dtype; } | |||||
| TensorLayout layout() const { return m_layout; } | TensorLayout layout() const { return m_layout; } | ||||
| const TensorShape& shape() const { return m_layout; } | |||||
| const TensorShape& shape() const { return m_shape; } | |||||
| size_t offset() const { return m_offset; } | size_t offset() const { return m_offset; } | ||||
| DeviceTensorND dev_tensor(); | |||||
| void to_contiguous_inplace(VarNode::LayoutConstraintCallback&); | |||||
| void to_contiguous_inplace(); | |||||
| DeviceTensorND dev_tensor(bool contiguous = true); | |||||
| void assign_from_dev_tensor(DeviceTensorND); | |||||
| static TensorPtr make_scalar(DTypeScalar value, CompNode cn); | static TensorPtr make_scalar(DTypeScalar value, CompNode cn); | ||||
| @@ -110,7 +117,7 @@ public: | |||||
| return make_scalar(value, m_blob->comp_node()); | return make_scalar(value, m_blob->comp_node()); | ||||
| } | } | ||||
| BlobPtr& blob() { return m_blob; } | |||||
| BlobPtr blob() { return m_blob; } | |||||
| void fetch_value(); | void fetch_value(); | ||||
| bool value_fetched(); | bool value_fetched(); | ||||
| @@ -131,10 +138,16 @@ public: | |||||
| static void static_initialize(); | static void static_initialize(); | ||||
| private: | private: | ||||
| TensorLayout m_layout; | |||||
| BlobPtr m_blob; | |||||
| size_t m_offset; | size_t m_offset; | ||||
| std::mutex m_mtx; | |||||
| const CompNode m_cn; | |||||
| const TensorShape m_shape; | |||||
| const DType m_dtype; | |||||
| std::mutex m_blob_mtx; | |||||
| BlobPtr m_blob; | |||||
| TensorLayout m_layout; | |||||
| std::mutex m_value_mtx; | |||||
| HostTensorND m_value; | HostTensorND m_value; | ||||
| EventPtr m_value_ready = nullptr; | EventPtr m_value_ready = nullptr; | ||||
| }; | }; | ||||
| @@ -33,6 +33,9 @@ EncodedSubgraph make_backward_graph( | |||||
| const SmallVector<bool>& input_requires_grad, | const SmallVector<bool>& input_requires_grad, | ||||
| const SmallVector<bool>& output_has_grad); | const SmallVector<bool>& output_has_grad); | ||||
| SmallVector<VarNode::LayoutConstraintCallback> get_input_layout_constraint( | |||||
| const OpDef& def, const SmallVector<TensorPtr>& inputs); | |||||
| } // namespace proxy_graph_detail | } // namespace proxy_graph_detail | ||||
| } // namespace imperative | } // namespace imperative | ||||
| } // namespace mgb | } // namespace mgb | ||||
| @@ -36,6 +36,9 @@ EncodedSubgraph make_backward_graph( | |||||
| const SmallVector<bool>& input_requires_grad, | const SmallVector<bool>& input_requires_grad, | ||||
| const SmallVector<bool>& output_has_grad); | const SmallVector<bool>& output_has_grad); | ||||
| SmallVector<VarNode::LayoutConstraintCallback> get_input_layout_constraint( | |||||
| const OpDef& def, const SmallVector<TensorPtr>& inputs); | |||||
| } // namespace subgraph_detail | } // namespace subgraph_detail | ||||
| } // namespace imperative | } // namespace imperative | ||||
| } // namespace mgb | } // namespace mgb | ||||
| @@ -322,7 +322,7 @@ void ComputingGraphImpl::free_varnode_storage(void* ptr) { | |||||
| m_var_node_pool.free_raw(ptr); | m_var_node_pool.free_raw(ptr); | ||||
| }; | }; | ||||
| OperatorNodeBase* ComputingGraphImpl::insert_opr( | |||||
| MGE_WIN_DECLSPEC_FUC OperatorNodeBase* ComputingGraphImpl::insert_opr( | |||||
| std::unique_ptr<OperatorNodeBase> opr_uniqp) { | std::unique_ptr<OperatorNodeBase> opr_uniqp) { | ||||
| auto opr = opr_uniqp.get(); | auto opr = opr_uniqp.get(); | ||||
| @@ -148,8 +148,8 @@ class ComputingGraphImpl final : public ComputingGraph { | |||||
| public: | public: | ||||
| class ComputingSequence; | class ComputingSequence; | ||||
| ComputingGraphImpl(); | |||||
| ~ComputingGraphImpl(); | |||||
| MGE_WIN_DECLSPEC_FUC ComputingGraphImpl(); | |||||
| MGE_WIN_DECLSPEC_FUC ~ComputingGraphImpl(); | |||||
| template <typename T> | template <typename T> | ||||
| static ComputingGraphImpl* downcast(T* ptr) = delete; | static ComputingGraphImpl* downcast(T* ptr) = delete; | ||||
| @@ -166,7 +166,8 @@ public: | |||||
| SmallVector<std::unique_ptr<AsyncExecutable>> compile_multi_part( | SmallVector<std::unique_ptr<AsyncExecutable>> compile_multi_part( | ||||
| const SmallVector<OutputSpec>& out_specs) override; | const SmallVector<OutputSpec>& out_specs) override; | ||||
| OperatorNodeBase* insert_opr(std::unique_ptr<OperatorNodeBase> opr) override; | |||||
| MGE_WIN_DECLSPEC_FUC OperatorNodeBase* insert_opr( | |||||
| std::unique_ptr<OperatorNodeBase> opr) override; | |||||
| void* alloc_varnode_storage() override; | void* alloc_varnode_storage() override; | ||||
| @@ -93,6 +93,23 @@ MemAllocPlan& MemAllocPlan::assign_for_forward( | |||||
| return *this; | return *this; | ||||
| } | } | ||||
| MemAllocPlan& MemAllocPlan::force_assign_for_forward( | |||||
| const MemAllocPlan& src, const SubTensorSpec& sub) { | |||||
| mgb_assert(valid() && src.valid() && m_layout.eq_shape(sub.layout())); | |||||
| ++(m_chunk = src.m_chunk)->m_refcnt; | |||||
| m_layout = sub.layout(); | |||||
| // make layout strong-contig | |||||
| for (int i = static_cast<int>(m_layout.ndim) - 1; i >= 0; --i) { | |||||
| if (m_layout.shape[i] == 1) { | |||||
| m_layout.stride[i] = i + 1 < static_cast<int>(m_layout.ndim) | |||||
| ? m_layout.stride[i + 1] * m_layout.shape[i + 1] | |||||
| : 1; | |||||
| } | |||||
| } | |||||
| m_layout.dtype = dtype(); | |||||
| return *this; | |||||
| } | |||||
| MemAllocPlan& MemAllocPlan::reset_from_owner_var() { | MemAllocPlan& MemAllocPlan::reset_from_owner_var() { | ||||
| auto owner_var = m_chunk_storage.owner_var; | auto owner_var = m_chunk_storage.owner_var; | ||||
| m_layout.dtype = dtype(); | m_layout.dtype = dtype(); | ||||
| @@ -223,7 +240,12 @@ VarNode& VarNode::format(TensorFormat format) { | |||||
| bool VarNode::set_fwd_in2out_readonly(VarNode* input, const SubTensorSpec& sub) { | bool VarNode::set_fwd_in2out_readonly(VarNode* input, const SubTensorSpec& sub) { | ||||
| if (owner_graph()->options().imperative_proxy_graph) { | if (owner_graph()->options().imperative_proxy_graph) { | ||||
| return false; | |||||
| if (input->comp_node() != comp_node()) { | |||||
| return false; | |||||
| } | |||||
| m_mem_plan.force_assign_for_forward(input->m_mem_plan, sub); | |||||
| m_dev_tensor = input->dev_tensor().sub(sub); | |||||
| return true; | |||||
| } | } | ||||
| return ComputingGraphImpl::downcast(owner_graph()) | return ComputingGraphImpl::downcast(owner_graph()) | ||||
| ->var_node_mem_manager() | ->var_node_mem_manager() | ||||
| @@ -361,6 +383,13 @@ VarNode& VarNode::reset_dev_tensor_from_tensor(const DeviceTensorND& value) { | |||||
| return *this; | return *this; | ||||
| } | } | ||||
| void VarNode::force_assign_dev_tensor_from_tensor(const DeviceTensorND& value) { | |||||
| m_dev_tensor = value; | |||||
| shape(value.shape()); | |||||
| m_mem_plan.reset_from_owner_var().chunk().mem_alloc_status.set_from_owner_var(); | |||||
| m_mem_plan.layout(value.layout()); | |||||
| } | |||||
| void VarNode::assign_dev_tensor_from_tensor(const DeviceTensorND& value) { | void VarNode::assign_dev_tensor_from_tensor(const DeviceTensorND& value) { | ||||
| mgb_assert( | mgb_assert( | ||||
| (value.layout().is_contiguous() || value.empty()) && | (value.layout().is_contiguous() || value.empty()) && | ||||
| @@ -475,7 +475,7 @@ DEF(CompNode node, const TensorShape& shape, DType dtype, TensorFormat format) | |||||
| DEF(CompNode node, const TensorLayout& layout) | DEF(CompNode node, const TensorLayout& layout) | ||||
| : TensorND(node, layout, layout.dtype, layout.format) { | : TensorND(node, layout, layout.dtype, layout.format) { | ||||
| mgb_assert( | mgb_assert( | ||||
| layout.is_contiguous(), | |||||
| layout.is_contiguous() || layout.is_empty(), | |||||
| "non-contiguous layout used for initializing a tensor: %s", | "non-contiguous layout used for initializing a tensor: %s", | ||||
| layout.to_string().c_str()); | layout.to_string().c_str()); | ||||
| } | } | ||||
| @@ -241,7 +241,8 @@ public: | |||||
| * \return the node in the graph (maybe another node due to | * \return the node in the graph (maybe another node due to | ||||
| * deduplication) | * deduplication) | ||||
| */ | */ | ||||
| virtual OperatorNodeBase* insert_opr(std::unique_ptr<OperatorNodeBase> opr) = 0; | |||||
| MGE_WIN_DECLSPEC_FUC virtual OperatorNodeBase* insert_opr( | |||||
| std::unique_ptr<OperatorNodeBase> opr) = 0; | |||||
| /*! | /*! | ||||
| * \brief used by OperatorNodeBase to allocate its outputs | * \brief used by OperatorNodeBase to allocate its outputs | ||||
| @@ -194,6 +194,10 @@ public: | |||||
| MGE_WIN_DECLSPEC_FUC MemAllocPlan& assign_for_forward( | MGE_WIN_DECLSPEC_FUC MemAllocPlan& assign_for_forward( | ||||
| const MemAllocPlan& src, const SubTensorSpec& sub); | const MemAllocPlan& src, const SubTensorSpec& sub); | ||||
| //! force assign for readonly forward | |||||
| MGE_WIN_DECLSPEC_FUC MemAllocPlan& force_assign_for_forward( | |||||
| const MemAllocPlan& src, const SubTensorSpec& sub); | |||||
| /*! | /*! | ||||
| * \brief next readonly-forward reader of this MemAllocPlan | * \brief next readonly-forward reader of this MemAllocPlan | ||||
| * | * | ||||
| @@ -509,6 +513,9 @@ public: | |||||
| //! NO_SYS_MEM_ALLOC can be modified. | //! NO_SYS_MEM_ALLOC can be modified. | ||||
| MGE_WIN_DECLSPEC_FUC bool is_graph_dest_varnode(); | MGE_WIN_DECLSPEC_FUC bool is_graph_dest_varnode(); | ||||
| MGE_WIN_DECLSPEC_FUC void force_assign_dev_tensor_from_tensor( | |||||
| const DeviceTensorND& value); | |||||
| private: | private: | ||||
| //! whether its memory should be allocated by mgb system during graph | //! whether its memory should be allocated by mgb system during graph | ||||
| //! execution; initialized in VarNodeMemManager::reset_opr_seq() | //! execution; initialized in VarNodeMemManager::reset_opr_seq() | ||||
| @@ -24,7 +24,7 @@ namespace intl { | |||||
| * \brief base class for IO nodes between device and host | * \brief base class for IO nodes between device and host | ||||
| */ | */ | ||||
| class HostIONodeBase : public cg::SingleCNOperatorNodeBase { | class HostIONodeBase : public cg::SingleCNOperatorNodeBase { | ||||
| void init_output_static_infer_desc() override final; | |||||
| MGE_WIN_DECLSPEC_FUC void init_output_static_infer_desc() override final; | |||||
| protected: | protected: | ||||
| using cg::SingleCNOperatorNodeBase::SingleCNOperatorNodeBase; | using cg::SingleCNOperatorNodeBase::SingleCNOperatorNodeBase; | ||||
| @@ -32,9 +32,10 @@ protected: | |||||
| /*! | /*! | ||||
| * \brief src_type for static shape and value infer | * \brief src_type for static shape and value infer | ||||
| */ | */ | ||||
| virtual cg::static_infer::SourceType static_infer_src_type() const; | |||||
| MGE_WIN_DECLSPEC_FUC virtual cg::static_infer::SourceType static_infer_src_type() | |||||
| const; | |||||
| virtual const TensorShape& get_output_shape() = 0; | |||||
| MGE_WIN_DECLSPEC_FUC virtual const TensorShape& get_output_shape() = 0; | |||||
| /*! | /*! | ||||
| * \brief fill value in *dest* for static inference | * \brief fill value in *dest* for static inference | ||||
| @@ -52,10 +53,10 @@ protected: | |||||
| class DeviceTensorHolder : public HostIONodeBase { | class DeviceTensorHolder : public HostIONodeBase { | ||||
| class DevValueExecDep; | class DevValueExecDep; | ||||
| void init_output_format() override; | |||||
| void init_output_mem_plan(bool dynamic) override final; | |||||
| void scn_do_execute() override final; | |||||
| void record_execute_deps(ExecDependencyArray& deps) override; | |||||
| MGE_WIN_DECLSPEC_FUC void init_output_format() override; | |||||
| MGE_WIN_DECLSPEC_FUC void init_output_mem_plan(bool dynamic) override final; | |||||
| MGE_WIN_DECLSPEC_FUC void scn_do_execute() override final; | |||||
| MGE_WIN_DECLSPEC_FUC void record_execute_deps(ExecDependencyArray& deps) override; | |||||
| protected: | protected: | ||||
| using HostIONodeBase::HostIONodeBase; | using HostIONodeBase::HostIONodeBase; | ||||
| @@ -77,20 +78,20 @@ MGB_DEFINE_CLS_WITH_SUPER(SharedDeviceTensorBase, DeviceTensorHolder) // { | |||||
| std::shared_ptr<DeviceTensorND> m_dev_data; | std::shared_ptr<DeviceTensorND> m_dev_data; | ||||
| bool m_const_value; | bool m_const_value; | ||||
| const TensorShape& get_output_shape() override; | |||||
| MGE_WIN_DECLSPEC_FUC const TensorShape& get_output_shape() override; | |||||
| bool fill_in_static_infer(DeviceTensorND* dest) override { | bool fill_in_static_infer(DeviceTensorND* dest) override { | ||||
| MGB_MARK_USED_VAR(dest); | MGB_MARK_USED_VAR(dest); | ||||
| return false; | return false; | ||||
| } | } | ||||
| void init_output_comp_node() override; | |||||
| MGE_WIN_DECLSPEC_FUC void init_output_comp_node() override; | |||||
| public: | public: | ||||
| //! const_value marks whether the device value of this operator should | //! const_value marks whether the device value of this operator should | ||||
| //! be treated as constant during graph execution. Should be false in | //! be treated as constant during graph execution. Should be false in | ||||
| //! most cases. | //! most cases. | ||||
| SharedDeviceTensorBase( | |||||
| MGE_WIN_DECLSPEC_FUC SharedDeviceTensorBase( | |||||
| ComputingGraph& graph, const std::shared_ptr<DeviceTensorND>& dev_data, | ComputingGraph& graph, const std::shared_ptr<DeviceTensorND>& dev_data, | ||||
| bool const_value, const OperatorNodeConfig& config); | bool const_value, const OperatorNodeConfig& config); | ||||
| @@ -248,7 +249,8 @@ private: | |||||
| */ | */ | ||||
| MGB_DEFINE_OPR_CLASS_WITH_EXPORT( | MGB_DEFINE_OPR_CLASS_WITH_EXPORT( | ||||
| SharedDeviceTensor, intl::SharedDeviceTensorBase) // { | SharedDeviceTensor, intl::SharedDeviceTensorBase) // { | ||||
| cg::static_infer::SourceType static_infer_src_type() const override; | |||||
| MGE_WIN_DECLSPEC_FUC cg::static_infer::SourceType static_infer_src_type() | |||||
| const override; | |||||
| public: | public: | ||||
| using Super::Super; | using Super::Super; | ||||