| @@ -20,23 +20,6 @@ from .._imperative_rt import core2, ops | |||||
| from ..ops.builtin import Elemwise, OpDef, RemoteSend | from ..ops.builtin import Elemwise, OpDef, RemoteSend | ||||
| from ..ops.special import Const | from ..ops.special import Const | ||||
| """ Some notes: | |||||
| 1. Initialize the optimizer: | |||||
| for each trainable parameter: | |||||
| call wrt(param, callback) | |||||
| Each parameter tensor will be assciated with a Tracer object saved in Tensor._extra_data | |||||
| 2. Tracer has one member: node, which is a VariableNode | |||||
| 3. VariableNode has a OpNode member: opnode | |||||
| 4. OpNode has four members: | |||||
| a. id | |||||
| b. inputs, which is made of VariableNode | |||||
| c. outputs, which are weakref's to VariableNode | |||||
| d. backward: call back function | |||||
| e. has_grad_fn: call has_grad_fn(opnode, reached) to check grad exist | |||||
| f. backward_allow_noinput: whether backward allow noinput | |||||
| """ | |||||
| _grad_count = 0 | _grad_count = 0 | ||||
| _grad_manager_dict = weakref.WeakValueDictionary() | _grad_manager_dict = weakref.WeakValueDictionary() | ||||
| @@ -97,6 +80,64 @@ class Grad: | |||||
| class Function(ops.PyOpBase): | class Function(ops.PyOpBase): | ||||
| """ | |||||
| Defines a block of operations with customizable differentiation. | |||||
| The computation should be defined in ``forward`` method, with gradient | |||||
| computation defined in ``backward`` method. | |||||
| Each instance of ``Function`` should be used only once during forwardding. | |||||
| Examples: | |||||
| .. code-block:: | |||||
| class Sigmoid(Function): | |||||
| def forward(self, x): | |||||
| y = 1 / (1 + F.exp(-x)) | |||||
| self.y = y | |||||
| return y | |||||
| def backward(self, dy): | |||||
| y = self.y | |||||
| return dy * y * (1-y) | |||||
| """ | |||||
| def forward(self, *args, **kwargs): | |||||
| """ | |||||
| Applies operations to ``inputs`` and returns results. It must be overriden by all subclasses. | |||||
| :param input: input tensors. | |||||
| :return: a tuple of Tensor or a single Tensor. | |||||
| .. note:: | |||||
| This method should return a tuple of Tensor or a single Tensor representing the output | |||||
| of the function. | |||||
| """ | |||||
| raise NotImplementedError | |||||
| def backward(self, *output_grads): | |||||
| """ | |||||
| Compute the gradient of the forward function. It must be overriden by all subclasses. | |||||
| :param output_grads: gradients of outputs that are returned by :meth:`forward`. | |||||
| .. note:: | |||||
| In case when some tensors of outputs are not related to loss function, the corresponding | |||||
| values in ``output_grads`` would be ``None``. | |||||
| .. note:: | |||||
| This method should return a tuple which containing the gradients of all inputs, in the same order | |||||
| as the ``inputs`` argument of :meth:`forward` . A ``Tensor`` could be returned | |||||
| instead if there is only one input. If users want to stop the propagation of some gradients, | |||||
| the corresponding returned values should be set ``None`` . | |||||
| """ | |||||
| raise NotImplementedError | |||||
| def _default_rule(self, *args): | def _default_rule(self, *args): | ||||
| ret = self.forward(*args) | ret = self.forward(*args) | ||||
| self.__single_output = isinstance(ret, core2.Tensor) | self.__single_output = isinstance(ret, core2.Tensor) | ||||