| @@ -11,38 +11,37 @@ from ..core.tensor import amp | |||
| class autocast: | |||
| r""" | |||
| A class to control autocast mode for amp as a context manager or a decorator. | |||
| r"""A class to control autocast mode for amp as a context manager or a decorator. | |||
| :param enabled: Whether autocast mode is enabled. | |||
| :param low_prec_dtype: Set amp autocast mode's lower precision dtype. It will change | |||
| the target dtype in tensor casting for better speed and memory. Default: float16. | |||
| :param high_prec_dtype: Set amp autocast mode's higher precision dtype. It will | |||
| change the target dtype in tensor casting for better precision. Default: float32. | |||
| Args: | |||
| enabled: Whether autocast mode is enabled. | |||
| low_prec_dtype: Set amp autocast mode's lower precision dtype. It will change | |||
| the target dtype in tensor casting for better speed and memory. Default: float16. | |||
| high_prec_dtype: Set amp autocast mode's higher precision dtype. It will | |||
| change the target dtype in tensor casting for better precision. Default: float32. | |||
| Examples: | |||
| .. code-block:: | |||
| .. code-block:: | |||
| # used as decorator | |||
| @autocast() | |||
| def train_step(image, label): | |||
| with gm: | |||
| logits = model(image) | |||
| loss = F.nn.cross_entropy(logits, label) | |||
| gm.backward(loss) | |||
| opt.step().clear_grad() | |||
| return loss | |||
| # used as decorator | |||
| @autocast() | |||
| def train_step(image, label): | |||
| with gm: | |||
| logits = model(image) | |||
| loss = F.nn.cross_entropy(logits, label) | |||
| gm.backward(loss) | |||
| opt.step().clear_grad() | |||
| return loss | |||
| # used as context manager | |||
| def train_step(image, label): | |||
| with autocast(): | |||
| with gm: | |||
| logits = model(image) | |||
| loss = F.nn.cross_entropy(logits, label) | |||
| gm.backward(loss) | |||
| opt.step().clear_grad() | |||
| return loss | |||
| # used as context manager | |||
| def train_step(image, label): | |||
| with autocast(): | |||
| with gm: | |||
| logits = model(image) | |||
| loss = F.nn.cross_entropy(logits, label) | |||
| gm.backward(loss) | |||
| opt.step().clear_grad() | |||
| return loss | |||
| """ | |||
| def __init__( | |||
| @@ -16,50 +16,51 @@ from ..tensor import Tensor | |||
| class GradScaler: | |||
| r""" | |||
| A helper class that performs grad scaling to prevent from data overflow in | |||
| r"""A helper class that performs grad scaling to prevent from data overflow in | |||
| :class:`~.autocast` mode. | |||
| :param init_scale: Initial scale factor. | |||
| :param growth_factor: Factor that the scale is multiplied by in actual | |||
| :meth:`update` stage. If growth_factor is 0, scale_factor will not update. | |||
| :param backoff_factor: Factor that the scale is multiplied by when encountering | |||
| overflow grad. | |||
| :param growth_interval: The interval between two scale update stages. | |||
| Example:: | |||
| gm = GradManager() | |||
| opt = ... | |||
| scaler = GradScaler() | |||
| gm.attach(model.parameters()) | |||
| @autocast() | |||
| def train_step(image, label): | |||
| with gm: | |||
| logits = model(image) | |||
| loss = F.nn.cross_entropy(logits, label) | |||
| scaler.backward(gm, loss) | |||
| opt.step().clear_grad() | |||
| return loss | |||
| If need more flexible usage, could split ``scaler.backward`` into three lines: | |||
| .. code-block:: | |||
| @autocast() | |||
| def train_step(image, label): | |||
| with gm: | |||
| logits = model(image) | |||
| loss = F.nn.cross_entropy(logits, label) | |||
| gm.backward(loss, dy=megengine.tensor(scaler.scale_factor)) | |||
| scaler.unscale(gm.attached_tensors()) | |||
| scaler.update() | |||
| opt.step().clear_grad() | |||
| return loss | |||
| This is useful when need to accumulate grads for multi batches. | |||
| Args: | |||
| init_scale: Initial scale factor. | |||
| growth_factor: Factor that the scale is multiplied by in actual | |||
| :meth:`update` stage. If growth_factor is 0, scale_factor will not update. | |||
| backoff_factor: Factor that the scale is multiplied by when encountering | |||
| overflow grad. | |||
| growth_interval: The interval between two scale update stages. | |||
| Example: | |||
| .. code-block:: | |||
| gm = GradManager() | |||
| opt = ... | |||
| scaler = GradScaler() | |||
| gm.attach(model.parameters()) | |||
| @autocast() | |||
| def train_step(image, label): | |||
| with gm: | |||
| logits = model(image) | |||
| loss = F.nn.cross_entropy(logits, label) | |||
| scaler.backward(gm, loss) | |||
| opt.step().clear_grad() | |||
| return loss | |||
| If need more flexible usage, could split ``scaler.backward`` into three lines: | |||
| .. code-block:: | |||
| @autocast() | |||
| def train_step(image, label): | |||
| with gm: | |||
| logits = model(image) | |||
| loss = F.nn.cross_entropy(logits, label) | |||
| gm.backward(loss, dy=megengine.tensor(scaler.scale_factor)) | |||
| scaler.unscale(gm.attached_tensors()) | |||
| scaler.update() | |||
| opt.step().clear_grad() | |||
| return loss | |||
| This is useful when need to accumulate grads for multi batches. | |||
| """ | |||
| def __init__( | |||
| @@ -86,18 +87,18 @@ class GradScaler: | |||
| unscale_grad: bool = True, | |||
| update_scale: bool = "if_unscale_grad" | |||
| ): | |||
| r""" | |||
| A wrapper of GradManager's :meth:`~.GradManager.backward`, used to scale | |||
| r"""A wrapper of GradManager's :meth:`~.GradManager.backward`, used to scale | |||
| ``y``'s grad and unscale parameters' grads. | |||
| :param gm: The to be wrapped GradManager. | |||
| :param y: Same as GradManager backward's ``y``. | |||
| :param dy: Same as GradManager backward's ``dy``. Will be multiplied | |||
| by ``scale_factor``. | |||
| :param unscale_grad: Whether do :meth:`unscale` at the same time. Could be | |||
| ``False`` if needs to accumulate grads. | |||
| :param update_scale: Same as :meth:`unscale`'s ``update``. Will be ignored | |||
| if ``unscale_grad`` is ``False``. | |||
| Args: | |||
| gm: The to be wrapped GradManager. | |||
| y: Same as GradManager backward's ``y``. | |||
| dy: Same as GradManager backward's ``dy``. Will be multiplied | |||
| by ``scale_factor``. | |||
| unscale_grad: Whether do :meth:`unscale` at the same time. Could be | |||
| ``False`` if needs to accumulate grads. | |||
| update_scale: Same as :meth:`unscale`'s ``update``. Will be ignored | |||
| if ``unscale_grad`` is ``False``. | |||
| """ | |||
| # These checks should be consistent with GradManager's | |||
| if y is None: | |||
| @@ -121,11 +122,11 @@ class GradScaler: | |||
| self.update() | |||
| def unscale(self, grad_tensors: Iterable[Tensor]): | |||
| r""" | |||
| Unscale all ``grad_tensors``'s grad. | |||
| r"""Unscale all ``grad_tensors``'s grad. | |||
| :param grad_tensors: Tensors needed to unscale grads. Should be all tensors | |||
| that are affected by ``target`` tensor in GradManager's backward. | |||
| Args: | |||
| grad_tensors: Tensors needed to unscale grads. Should be all tensors | |||
| that are affected by ``target`` tensor in GradManager's backward. | |||
| """ | |||
| # use float64 for better precision | |||
| inv_scale = Tensor(1.0 / self.scale_factor) | |||
| @@ -151,7 +152,8 @@ class GradScaler: | |||
| def update(self, new_scale: float = None): | |||
| r"""Update the scale factor according to whether encountered overflow grad. | |||
| If ``new_scale`` is provided, internal update mechanism will be ignored.""" | |||
| If ``new_scale`` is provided, internal update mechanism will be ignored. | |||
| """ | |||
| if self.growth_interval == 0: | |||
| return | |||
| @@ -32,8 +32,7 @@ _global_priority = 0 | |||
| class GradManager: | |||
| r""" | |||
| GradManager computes gradients or more generally, vector-Jacobian product, by reverse mode | |||
| r"""GradManager computes gradients or more generally, vector-Jacobian product, by reverse mode | |||
| automatic differentiation (a.k.a. back propagation). | |||
| Reverse mode autodiff normally reuses many intermediate tensors for best computation efficiency. | |||
| @@ -120,7 +119,6 @@ class GradManager: | |||
| gm = GradManager() | |||
| gm.attach(model.parameters(), callback=dist.make_allreduce_cb("MEAN")) | |||
| """ | |||
| def __init__(self): | |||
| @@ -136,8 +134,7 @@ class GradManager: | |||
| return [spec.tensor() for spec in self._attach_specs.values()] | |||
| def attach(self, tensors: Iterable[Tensor], callbacks=None): | |||
| r""" | |||
| Instruct GradManager to track operations on tensors, so that gradients with respect | |||
| r"""Instruct GradManager to track operations on tensors, so that gradients with respect | |||
| to those tensors could be evaluated later. | |||
| :meth:`attach` also accepts a list of callbacks, which will be called with the tensor and | |||
| @@ -188,8 +185,9 @@ class GradManager: | |||
| multiple uses of a GradManager, which is unrelated to whether resources is timely | |||
| released within a single use. | |||
| :param tensors: tensor or list of tensors to track | |||
| :param callbacks: callback or list of callbacks | |||
| Args: | |||
| tensors: tensor or list of tensors to track | |||
| callbacks: callback or list of callbacks | |||
| """ | |||
| if callbacks is None: | |||
| callbacks = [] | |||
| @@ -234,8 +232,7 @@ class GradManager: | |||
| y: Union[Tensor, List[Tensor]] = None, | |||
| dy: Union[Tensor, List[Tensor]] = None, | |||
| ): | |||
| r""" | |||
| Compute gradients (or vector-Jacobian product) for all attached tensors, accumulate to | |||
| r"""Compute gradients (or vector-Jacobian product) for all attached tensors, accumulate to | |||
| corresponding .grad attribute, and release resources along the way. | |||
| :meth:`backward` computes the vector-Jacobian product :math:`dx_j = \sum_{i} dy_i J_{ij}` | |||
| @@ -257,8 +254,9 @@ class GradManager: | |||
| process of this call. When the call successfully finishes, the GradManager will be put back | |||
| to an inactive state. | |||
| :param y: tensor or list of tensors | |||
| :param dy: tensor or list of tensors. Defaults to 1 if y is scalar | |||
| Args: | |||
| y: tensor or list of tensors | |||
| dy: tensor or list of tensors. Defaults to 1 if y is scalar | |||
| """ | |||
| push_scope("backward") | |||
| set_option("record_computing_path", 0) | |||
| @@ -310,8 +308,7 @@ class GradManager: | |||
| pop_scope("backward") | |||
| def record(self): | |||
| r""" | |||
| Start recording operations | |||
| r"""Start recording operations | |||
| After this call, you will be able to call :meth:`backward`. | |||
| """ | |||
| @@ -342,8 +339,7 @@ class GradManager: | |||
| self._grad.wrt(tensor, callback=callback) | |||
| def release(self): | |||
| r""" | |||
| Stop recording operations and release resources kept for gradient computation | |||
| r"""Stop recording operations and release resources kept for gradient computation | |||
| After this call, you will not be able to call :meth:`backward`. | |||
| """ | |||
| @@ -15,16 +15,12 @@ if os.environ.get("MEGENGINE_USE_SYMBOLIC_SHAPE"): | |||
| def use_symbolic_shape() -> bool: | |||
| """ | |||
| Returns whether tensor.shape returns a tensor instead of a tuple | |||
| """ | |||
| r"""Returns whether tensor.shape returns a tensor instead of a tuple""" | |||
| return _use_symbolic_shape | |||
| def set_symbolic_shape(option: bool): | |||
| """ Sets whether tensor.shape returns a tensor instead of a tuple | |||
| """ | |||
| r"""Sets whether tensor.shape returns a tensor instead of a tuple""" | |||
| global _use_symbolic_shape | |||
| _org = _use_symbolic_shape | |||
| _use_symbolic_shape = option | |||
| @@ -88,67 +88,56 @@ class Grad: | |||
| class Function(ops.PyOpBase): | |||
| """ | |||
| Defines a block of operations with customizable differentiation. | |||
| r"""Defines a block of operations with customizable differentiation. | |||
| The computation should be defined in ``forward`` method, with gradient | |||
| computation defined in ``backward`` method. | |||
| Each instance of ``Function`` should be used only once during forwardding. | |||
| Examples: | |||
| .. code-block:: | |||
| class Sigmoid(Function): | |||
| def forward(self, x): | |||
| y = 1 / (1 + F.exp(-x)) | |||
| self.y = y | |||
| return y | |||
| def backward(self, dy): | |||
| y = self.y | |||
| return dy * y * (1-y) | |||
| .. code-block:: | |||
| class Sigmoid(Function): | |||
| def forward(self, x): | |||
| y = 1 / (1 + F.exp(-x)) | |||
| self.y = y | |||
| return y | |||
| def backward(self, dy): | |||
| y = self.y | |||
| """ | |||
| def forward(self, *args, **kwargs): | |||
| """ | |||
| Applies operations to ``inputs`` and returns results. It must be overriden by all subclasses. | |||
| :param input: input tensors. | |||
| :return: a tuple of Tensor or a single Tensor. | |||
| .. note:: | |||
| This method should return a tuple of Tensor or a single Tensor representing the output | |||
| of the function. | |||
| .. note:: | |||
| positional arguments should all be Tensor | |||
| r"""Applies operations to ``inputs`` and returns results. It must be overriden by all subclasses. | |||
| Args: | |||
| input: input tensors. | |||
| Returns: | |||
| a tuple of Tensor or a single Tensor. | |||
| Note: | |||
| * This method should return a tuple of Tensor or a single Tensor representing the output | |||
| of the function. | |||
| * positional arguments should all be Tensor | |||
| """ | |||
| raise NotImplementedError | |||
| def backward(self, *output_grads): | |||
| """ | |||
| Compute the gradient of the forward function. It must be overriden by all subclasses. | |||
| :param output_grads: gradients of outputs that are returned by :meth:`forward`. | |||
| .. note:: | |||
| In case when some tensors of outputs are not related to loss function, the corresponding | |||
| values in ``output_grads`` would be ``None``. | |||
| .. note:: | |||
| This method should return a tuple which containing the gradients of all inputs, in the same order | |||
| as the ``inputs`` argument of :meth:`forward` . A ``Tensor`` could be returned | |||
| instead if there is only one input. If users want to stop the propagation of some gradients, | |||
| the corresponding returned values should be set ``None`` . | |||
| r"""Compute the gradient of the forward function. It must be overriden by all subclasses. | |||
| Args: | |||
| output_grads: gradients of outputs that are returned by :meth:`forward`. | |||
| Note: | |||
| * In case when some tensors of outputs are not related to loss function, the corresponding | |||
| values in ``output_grads`` would be ``None``. | |||
| * This method should return a tuple which containing the gradients of all inputs, in the same order | |||
| as the ``inputs`` argument of :meth:`forward` . A ``Tensor`` could be returned | |||
| instead if there is only one input. If users want to stop the propagation of some gradients, | |||
| the corresponding returned values should be set ``None`` . | |||
| """ | |||
| raise NotImplementedError | |||
| @@ -12,16 +12,14 @@ _low_prec_dtype = "float16" | |||
| @property | |||
| def enabled(mod): | |||
| r""" | |||
| Get or set amp autocast mode enabled or not. | |||
| r"""Get or set amp autocast mode enabled or not. | |||
| Examples: | |||
| .. code-block:: | |||
| .. code-block:: | |||
| import megengine as mge | |||
| mge.amp.enabled = True | |||
| import megengine as mge | |||
| mge.amp.enabled = True | |||
| """ | |||
| return _enabled | |||
| @@ -34,17 +32,15 @@ def enabled(mod, enabled: bool): | |||
| @property | |||
| def high_prec_dtype(mod): | |||
| r""" | |||
| Get or set amp autocast mode's higher precision dtype. It will change the | |||
| r"""Get or set amp autocast mode's higher precision dtype. It will change the | |||
| target dtype in tensor casting for better precision. Default: float32. | |||
| Examples: | |||
| .. code-block:: | |||
| .. code-block:: | |||
| import megengine as mge | |||
| mge.amp.high_prec_dtype = "float32" | |||
| import megengine as mge | |||
| mge.amp.high_prec_dtype = "float32" | |||
| """ | |||
| return _high_prec_dtype | |||
| @@ -57,17 +53,15 @@ def high_prec_dtype(mod, dtype: str): | |||
| @property | |||
| def low_prec_dtype(mod): | |||
| r""" | |||
| Get or set amp autocast mode's lower precision dtype. It will change the | |||
| r"""Get or set amp autocast mode's lower precision dtype. It will change the | |||
| target dtype in tensor casting for better speed and memory. Default: float16. | |||
| Examples: | |||
| .. code-block:: | |||
| .. code-block:: | |||
| import megengine as mge | |||
| mge.amp.low_prec_dtype = "float16" | |||
| import megengine as mge | |||
| mge.amp.low_prec_dtype = "float16" | |||
| """ | |||
| return _low_prec_dtype | |||
| @@ -389,9 +389,7 @@ class ArrayMethodMixin(abc.ABC): | |||
| @property | |||
| def ndim(self): | |||
| r""" | |||
| Returns the number of dimensions of self :class:`~.Tensor`. | |||
| """ | |||
| r"""Returns the number of dimensions of self :class:`~.Tensor`.""" | |||
| shape = self._tuple_shape | |||
| if shape is None: | |||
| raise ValueError("unkown ndim") | |||
| @@ -399,8 +397,7 @@ class ArrayMethodMixin(abc.ABC): | |||
| @property | |||
| def size(self): | |||
| r""" | |||
| Returns the size of the self :class:`~.Tensor`. | |||
| r"""Returns the size of the self :class:`~.Tensor`. | |||
| The returned value is a subclass of :class:`tuple`. | |||
| """ | |||
| shape = self.shape | |||
| @@ -410,14 +407,11 @@ class ArrayMethodMixin(abc.ABC): | |||
| @property | |||
| def T(self): | |||
| r""" | |||
| alias of :attr:`~.Tensor.transpose`. | |||
| """ | |||
| r"""alias of :attr:`~.Tensor.transpose`.""" | |||
| return self.transpose() | |||
| def item(self, *args): | |||
| r""" | |||
| Returns the value of this :class:`~.Tensor` as a standard Python :class:`numbers.Number`. | |||
| r"""Returns the value of this :class:`~.Tensor` as a standard Python :class:`numbers.Number`. | |||
| This only works for tensors with one element. For other cases, see :meth:`~.tolist`. | |||
| """ | |||
| if not args: | |||
| @@ -427,8 +421,7 @@ class ArrayMethodMixin(abc.ABC): | |||
| return self[args].item() | |||
| def tolist(self): | |||
| r""" | |||
| Returns the tensor as a (nested) list. | |||
| r"""Returns the tensor as a (nested) list. | |||
| For scalars, a standard Python number is returned, just like with :meth:`~.item`. | |||
| Tensors are automatically moved to the CPU first if necessary. | |||
| @@ -437,16 +430,13 @@ class ArrayMethodMixin(abc.ABC): | |||
| return self.numpy().tolist() | |||
| def astype(self, dtype): | |||
| r""" | |||
| Returns a :class:`Tensor` with the same data and number of elements | |||
| r"""Returns a :class:`Tensor` with the same data and number of elements | |||
| with the specified :attr:`~.Tensor.dtype`. | |||
| """ | |||
| return astype(self, dtype) | |||
| def reshape(self, *args): | |||
| r""" | |||
| See :func:`~.reshape`. | |||
| """ | |||
| r"""See :func:`~.reshape`.""" | |||
| return _reshape(self, _expand_args(args)) | |||
| # FIXME: remove this method | |||
| @@ -454,9 +444,7 @@ class ArrayMethodMixin(abc.ABC): | |||
| return _broadcast(self, _expand_args(args)) | |||
| def transpose(self, *args): | |||
| r""" | |||
| See :func:`~.transpose`. | |||
| """ | |||
| r"""See :func:`~.transpose`.""" | |||
| if self.ndim == 0: | |||
| assert ( | |||
| len(args) == 0 | |||
| @@ -469,172 +457,170 @@ class ArrayMethodMixin(abc.ABC): | |||
| return _transpose(self, _expand_args(args)) | |||
| def flatten(self): | |||
| r""" | |||
| See :func:`~.flatten`. | |||
| """ | |||
| r"""See :func:`~.flatten`.""" | |||
| return self.reshape(-1) | |||
| def sum(self, axis=None, keepdims: bool = False): | |||
| r""" | |||
| Returns the sum of each row of the input tensor in the given dimension ``axis``. | |||
| r"""Returns the sum of each row of the input tensor in the given dimension ``axis``. | |||
| If ``axis`` is a list of axises, reduce over all of them. | |||
| If ``keepdims`` is ``True``, the shape of output tensor is the same as the input tensor, | |||
| except in the dimension(s) ``axis`` where it is of size 1. | |||
| Otherwise, ``axis`` is squeezed (see :func:`~.squeeze`). | |||
| :param axis: the dimension or dimensions to reduce. | |||
| :param keepdims: whether the output tensor has ndim retained or not. | |||
| :return: output tensor. | |||
| Args: | |||
| axis: the dimension or dimensions to reduce. | |||
| keepdims: whether the output tensor has ndim retained or not. | |||
| Examples: | |||
| .. testcode:: | |||
| Returns: | |||
| output tensor. | |||
| from megengine import tensor | |||
| a = tensor([False, True, True, False]) | |||
| b = tensor([1.0, 2.0, 3.0, 4.0]) | |||
| print(a.sum().numpy()) | |||
| print(b.sum().numpy()) | |||
| Examples: | |||
| .. testcode:: | |||
| Outputs: | |||
| from megengine import tensor | |||
| a = tensor([False, True, True, False]) | |||
| b = tensor([1.0, 2.0, 3.0, 4.0]) | |||
| print(a.sum().numpy()) | |||
| print(b.sum().numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| 2 | |||
| 10.0 | |||
| .. testoutput:: | |||
| 2 | |||
| 10.0 | |||
| """ | |||
| return _reduce("sum")(self, axis, keepdims) | |||
| def prod(self, axis=None, keepdims: bool = False): | |||
| r""" | |||
| Returns the product of each row of the input tensor in the given dimension ``axis``. | |||
| r"""Returns the product of each row of the input tensor in the given dimension ``axis``. | |||
| If ``axis`` is a list of axises, reduce over all of them. | |||
| If ``keepdims`` is ``True``, the shape of output tensor is the same as the input tensor, | |||
| except in the dimension(s) ``axis`` where it is of size 1. | |||
| Otherwise, ``axis`` is squeezed (see :func:`~.squeeze`). | |||
| :param axis: the dimension or dimensions to reduce. | |||
| :param keepdims: whether the output tensor has ndim retained or not. | |||
| :return: output tensor. | |||
| Examples: | |||
| Args: | |||
| axis: the dimension or dimensions to reduce. | |||
| keepdims: whether the output tensor has ndim retained or not. | |||
| .. testcode:: | |||
| Returns: | |||
| output tensor. | |||
| from megengine import tensor | |||
| a = tensor([False, True, True, False]) | |||
| b = tensor([1.0, 2.0, 3.0, 4.0]) | |||
| print(a.prod().numpy()) | |||
| print(b.prod().numpy()) | |||
| Examples: | |||
| .. testcode:: | |||
| Outputs: | |||
| from megengine import tensor | |||
| a = tensor([False, True, True, False]) | |||
| b = tensor([1.0, 2.0, 3.0, 4.0]) | |||
| print(a.prod().numpy()) | |||
| print(b.prod().numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| 0 | |||
| 24.0 | |||
| .. testoutput:: | |||
| 0 | |||
| 24.0 | |||
| """ | |||
| return _reduce("product")(self, axis, keepdims) | |||
| def min(self, axis=None, keepdims: bool = False): | |||
| r""" | |||
| Returns the min value of each row of the input tensor in the given dimension ``axis``. | |||
| r"""Returns the min value of each row of the input tensor in the given dimension ``axis``. | |||
| If ``axis`` is a list of axises, reduce over all of them. | |||
| If ``keepdims`` is ``True``, the shape of output tensor is the same as the input tensor, | |||
| except in the dimension(s) ``axis`` where it is of size 1. | |||
| Otherwise, ``axis`` is squeezed (see :func:`~.squeeze`). | |||
| :param axis: the dimension or dimensions to reduce. | |||
| :param keepdims: whether the output tensor has ndim retained or not. | |||
| :return: output tensor. | |||
| Examples: | |||
| Args: | |||
| axis: the dimension or dimensions to reduce. | |||
| keepdims: whether the output tensor has ndim retained or not. | |||
| .. testcode:: | |||
| Returns: | |||
| output tensor. | |||
| from megengine import tensor | |||
| a = tensor([False, True, True, False]) | |||
| b = tensor([1.0, 2.0, 3.0, 4.0]) | |||
| print(a.min().numpy()) | |||
| print(b.min().numpy()) | |||
| Examples: | |||
| .. testcode:: | |||
| Outputs: | |||
| from megengine import tensor | |||
| a = tensor([False, True, True, False]) | |||
| b = tensor([1.0, 2.0, 3.0, 4.0]) | |||
| print(a.min().numpy()) | |||
| print(b.min().numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| False | |||
| 1.0 | |||
| .. testoutput:: | |||
| False | |||
| 1.0 | |||
| """ | |||
| return _reduce("min")(self, axis, keepdims) | |||
| def max(self, axis=None, keepdims: bool = False): | |||
| r""" | |||
| Returns the max value of each row of the input tensor in the given dimension ``axis``. | |||
| r"""Returns the max value of each row of the input tensor in the given dimension ``axis``. | |||
| If ``axis`` is a list of axises, reduce over all of them. | |||
| If ``keepdims`` is ``True``, the shape of output tensor is the same as the input tensor, | |||
| except in the dimension(s) ``axis`` where it is of size 1. | |||
| Otherwise, ``axis`` is squeezed (see :func:`~.squeeze`). | |||
| :param axis: the dimension or dimensions to reduce. | |||
| :param keepdims: whether the output tensor has ndim retained or not. | |||
| :return: output tensor. | |||
| Examples: | |||
| Args: | |||
| axis: the dimension or dimensions to reduce. | |||
| keepdims: whether the output tensor has ndim retained or not. | |||
| .. testcode:: | |||
| Returns: | |||
| output tensor. | |||
| from megengine import tensor | |||
| a = tensor([False, True, True, False]) | |||
| b = tensor([1.0, 2.0, 3.0, 4.0]) | |||
| print(a.max().numpy()) | |||
| print(b.max().numpy()) | |||
| Examples: | |||
| .. testcode:: | |||
| Outputs: | |||
| from megengine import tensor | |||
| a = tensor([False, True, True, False]) | |||
| b = tensor([1.0, 2.0, 3.0, 4.0]) | |||
| print(a.max().numpy()) | |||
| print(b.max().numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| True | |||
| 4.0 | |||
| .. testoutput:: | |||
| True | |||
| 4.0 | |||
| """ | |||
| return _reduce("max")(self, axis, keepdims) | |||
| def mean(self, axis=None, keepdims: bool = False): | |||
| r""" | |||
| Returns the mean value of each row of the input tensor in the given dimension ``axis``. | |||
| r"""Returns the mean value of each row of the input tensor in the given dimension ``axis``. | |||
| If ``axis`` is a list of axises, reduce over all of them. | |||
| If ``keepdims`` is ``True``, the shape of output tensor is the same as the input tensor, | |||
| except in the dimension(s) ``axis`` where it is of size 1. | |||
| Otherwise, ``axis`` is squeezed (see :func:`~.squeeze`). | |||
| :param axis: the dimension or dimensions to reduce. | |||
| :param keepdims: whether the output tensor has ndim retained or not. | |||
| :return: output tensor. | |||
| Args: | |||
| axis: the dimension or dimensions to reduce. | |||
| keepdims: whether the output tensor has ndim retained or not. | |||
| Examples: | |||
| Returns: | |||
| output tensor. | |||
| .. testcode:: | |||
| from megengine import tensor | |||
| a = tensor([False, True, True, False]) | |||
| b = tensor([1.0, 2.0, 3.0, 4.0]) | |||
| print(a.mean().numpy()) | |||
| print(b.mean().numpy()) | |||
| Examples: | |||
| .. testcode:: | |||
| Outputs: | |||
| from megengine import tensor | |||
| a = tensor([False, True, True, False]) | |||
| b = tensor([1.0, 2.0, 3.0, 4.0]) | |||
| print(a.mean().numpy()) | |||
| print(b.mean().numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| 0.5 | |||
| 2.5 | |||
| .. testoutput:: | |||
| 0.5 | |||
| 2.5 | |||
| """ | |||
| return _reduce("mean")(self, axis, keepdims) | |||
| @@ -47,17 +47,17 @@ class QuantDtypeMeta( | |||
| ["name", "cname", "np_dtype_str", "qmin", "qmax", "is_unsigned"], | |||
| ) | |||
| ): | |||
| r""" | |||
| Store metadata for quantize dtype. Could be used to create custom quant dtype | |||
| r"""Store metadata for quantize dtype. Could be used to create custom quant dtype | |||
| for QAT when the network don't need to be converted for inference, but only | |||
| to export network metadata for third-party platform inference. | |||
| :param name: a unique name string. | |||
| :param cname: used in :func:`~.create_quantized_dtype` for model dump and inference. | |||
| :param np_dtype_str: used in :func:`~.create_quantized_dtype` to generate ``np.dtype``. | |||
| :param qmin: a int number indicating quant dtype's lowerbound. | |||
| :param qmax: a int number indicating quant dtype's upperbound. | |||
| :param is_unsigned: a helper value that could be inference from np_dtype_str. | |||
| Args: | |||
| name: a unique name string. | |||
| cname: used in :func:`~.create_quantized_dtype` for model dump and inference. | |||
| np_dtype_str: used in :func:`~.create_quantized_dtype` to generate ``np.dtype``. | |||
| qmin: a int number indicating quant dtype's lowerbound. | |||
| qmax: a int number indicating quant dtype's upperbound. | |||
| is_unsigned: a helper value that could be inference from np_dtype_str. | |||
| """ | |||
| def __new__( | |||
| @@ -77,7 +77,7 @@ class QuantDtypeMeta( | |||
| return self | |||
| def __deepcopy__(self, _): | |||
| """ | |||
| r""" | |||
| Ignore deepcopy so that a dtype meta can be treated as singleton, for more | |||
| strict check in :meth:`~.FakeQuantize.fake_quant_forward`. | |||
| """ | |||
| @@ -113,17 +113,17 @@ def _check_zero_point(zp: int, dtype_meta: QuantDtypeMeta): | |||
| def create_quantized_dtype( | |||
| dtype_meta: QuantDtypeMeta, scale: float, zp: Union[int, None] | |||
| ): | |||
| r""" | |||
| Get quantized dtype with metadata attribute according to _metadata_dict. | |||
| r"""Get quantized dtype with metadata attribute according to _metadata_dict. | |||
| Note that unsigned dtype must have ``zero_point`` and signed dtype must | |||
| not have ``zero_point``, to be consitent with tensor generated by calling | |||
| compiled function from `CompGraph.compile(inputs, outspec)`. | |||
| :param dtype_meta: a QuantDtypeMeta indicating which dtype to return. the | |||
| ``cname`` attribute cannot be ``None``. | |||
| :param scale: a number for scale to store in dtype's metadata | |||
| :param zp: a number for zero_point to store in dtype's metadata | |||
| Args: | |||
| dtype_meta: a QuantDtypeMeta indicating which dtype to return. the | |||
| ``cname`` attribute cannot be ``None``. | |||
| scale: a number for scale to store in dtype's metadata | |||
| zp: a number for zero_point to store in dtype's metadata | |||
| """ | |||
| if dtype_meta.cname is None: | |||
| raise ValueError("dtype {} without cname attr is not supported.") | |||
| @@ -152,8 +152,7 @@ def create_quantized_dtype( | |||
| def quint8(scale, zero_point): | |||
| """ | |||
| Consturct a quantized unsigned int8 data type with ``scale`` (float) and | |||
| r"""Consturct a quantized unsigned int8 data type with ``scale`` (float) and | |||
| ``zero_point`` (uint8). The real value represented by a quint8 data type is | |||
| float_val = scale * (uint8_val - zero_point) | |||
| """ | |||
| @@ -161,24 +160,21 @@ def quint8(scale, zero_point): | |||
| def qint8(scale): | |||
| """ | |||
| Construct a quantized int8 data type with ``scale`` (float). The real value | |||
| r"""Construct a quantized int8 data type with ``scale`` (float). The real value | |||
| represented by a qint8 data type is float_val = scale * int8_val | |||
| """ | |||
| return create_quantized_dtype(_builtin_quant_dtypes["qint8"], scale, None) | |||
| def qint32(scale): | |||
| """ | |||
| Construct a quantized int32 data type with ``scale`` (float). The real value | |||
| r"""Construct a quantized int32 data type with ``scale`` (float). The real value | |||
| represented by a qint32 data type is float_val = scale * int32_val | |||
| """ | |||
| return create_quantized_dtype(_builtin_quant_dtypes["qint32"], scale, None) | |||
| def quint4(scale, zero_point): | |||
| """ | |||
| Consturct a quantized unsigned int4 data type with ``scale`` (float) and | |||
| r"""Consturct a quantized unsigned int4 data type with ``scale`` (float) and | |||
| ``zero_point`` (uint8). The real value represented by a quint4 data type is | |||
| float_val = scale * (uint4_val - zero_point) | |||
| """ | |||
| @@ -186,8 +182,7 @@ def quint4(scale, zero_point): | |||
| def qint4(scale): | |||
| """ | |||
| Construct a quantized int4 data type with ``scale`` (float). The real value | |||
| r"""Construct a quantized int4 data type with ``scale`` (float). The real value | |||
| represented by a qint4 data type is float_val = scale * int4_val | |||
| """ | |||
| return create_quantized_dtype(_builtin_quant_dtypes["qint4"], scale, None) | |||
| @@ -244,95 +239,95 @@ def _convert_from_quantized_dtype(arr: np.ndarray, dtype_meta: QuantDtypeMeta): | |||
| def convert_to_quint8(arr: np.ndarray, q: np.dtype): | |||
| """ | |||
| Quantize a float NumPy ndarray into a quint8 one with specified params. | |||
| r"""Quantize a float NumPy ndarray into a quint8 one with specified params. | |||
| :param arr: Input ndarray. | |||
| :param q: Target data type, should be a quint8. | |||
| Args: | |||
| arr: Input ndarray. | |||
| q: Target data type, should be a quint8. | |||
| """ | |||
| return _convert_to_quantized_dtype(arr, q, _builtin_quant_dtypes["quint8"]) | |||
| def convert_from_quint8(arr: np.ndarray): | |||
| """ | |||
| Dequantize a quint8 NumPy ndarray into a float one. | |||
| r"""Dequantize a quint8 NumPy ndarray into a float one. | |||
| :param arr: Input ndarray. | |||
| Args: | |||
| arr: Input ndarray. | |||
| """ | |||
| return _convert_from_quantized_dtype(arr, _builtin_quant_dtypes["quint8"]) | |||
| def convert_to_qint8(arr: np.ndarray, q: np.dtype): | |||
| """ | |||
| Quantize a float NumPy ndarray into a qint8 one with specified params. | |||
| r"""Quantize a float NumPy ndarray into a qint8 one with specified params. | |||
| :param arr: Input ndarray. | |||
| :param q: Target data type, should be a qint8. | |||
| Args: | |||
| arr: Input ndarray. | |||
| q: Target data type, should be a qint8. | |||
| """ | |||
| return _convert_to_quantized_dtype(arr, q, _builtin_quant_dtypes["qint8"]) | |||
| def convert_from_qint8(arr: np.ndarray): | |||
| """ | |||
| Dequantize a qint8 NumPy ndarray into a float one. | |||
| r"""Dequantize a qint8 NumPy ndarray into a float one. | |||
| :param arr: Input ndarray. | |||
| Args: | |||
| arr: Input ndarray. | |||
| """ | |||
| return _convert_from_quantized_dtype(arr, _builtin_quant_dtypes["qint8"]) | |||
| def convert_to_qint32(arr: np.ndarray, q: np.dtype): | |||
| """ | |||
| Quantize a float NumPy ndarray into a qint32 one with specified params. | |||
| r"""Quantize a float NumPy ndarray into a qint32 one with specified params. | |||
| :param arr: Input ndarray. | |||
| :param q: Target data type, should be a qint8. | |||
| Args: | |||
| arr: Input ndarray. | |||
| q: Target data type, should be a qint8. | |||
| """ | |||
| return _convert_to_quantized_dtype(arr, q, _builtin_quant_dtypes["qint32"]) | |||
| def convert_from_qint32(arr): | |||
| """ | |||
| Dequantize a qint32 NumPy ndarray into a float one. | |||
| r"""Dequantize a qint32 NumPy ndarray into a float one. | |||
| :param arr: Input ndarray. | |||
| Args: | |||
| arr: Input ndarray. | |||
| """ | |||
| return _convert_from_quantized_dtype(arr, _builtin_quant_dtypes["qint32"]) | |||
| def convert_to_quint4(arr: np.ndarray, q: np.dtype): | |||
| """ | |||
| Quantize a float NumPy ndarray into a quint4 one with specified params. | |||
| r"""Quantize a float NumPy ndarray into a quint4 one with specified params. | |||
| :param arr: Input ndarray. | |||
| :param q: Target data type, should be a quint4. | |||
| Args: | |||
| arr: Input ndarray. | |||
| q: Target data type, should be a quint4. | |||
| """ | |||
| return _convert_to_quantized_dtype(arr, q, _builtin_quant_dtypes["quint4"]) | |||
| def convert_from_quint4(arr: np.ndarray): | |||
| """ | |||
| Dequantize a quint4 NumPy ndarray into a float one. | |||
| r"""Dequantize a quint4 NumPy ndarray into a float one. | |||
| :param arr: Input ndarray. | |||
| Args: | |||
| arr: Input ndarray. | |||
| """ | |||
| return _convert_from_quantized_dtype(arr, _builtin_quant_dtypes["quint4"]) | |||
| def convert_to_qint4(arr: np.ndarray, q: np.dtype): | |||
| """ | |||
| Quantize a float NumPy ndarray into a qint4 one with specified params. | |||
| r"""Quantize a float NumPy ndarray into a qint4 one with specified params. | |||
| :param arr: Input ndarray. | |||
| :param q: Target data type, should be a qint4. | |||
| Args: | |||
| arr: Input ndarray. | |||
| q: Target data type, should be a qint4. | |||
| """ | |||
| return _convert_to_quantized_dtype(arr, q, _builtin_quant_dtypes["qint4"]) | |||
| def convert_from_qint4(arr: np.ndarray): | |||
| """ | |||
| Dequantize a qint4 NumPy ndarray into a float one. | |||
| r"""Dequantize a qint4 NumPy ndarray into a float one. | |||
| :param arr: Input ndarray. | |||
| Args: | |||
| arr: Input ndarray. | |||
| """ | |||
| return _convert_from_quantized_dtype(arr, _builtin_quant_dtypes["qint4"]) | |||
| @@ -24,11 +24,11 @@ from .core import TensorBase | |||
| def set_priority_to_id(dest_vars): | |||
| """ | |||
| For all oprs in the subgraph constructed by dest_vars, | |||
| r"""For all oprs in the subgraph constructed by dest_vars, | |||
| sets its priority to id if its original priority is zero. | |||
| :param dest_vars: target vars representing the graph. | |||
| Args: | |||
| dest_vars: target vars representing the graph. | |||
| """ | |||
| dest_vec = [] | |||
| for i in dest_vars: | |||
| @@ -220,54 +220,50 @@ class OpNode: | |||
| def optimize_for_inference(dest_vars, **kwargs): | |||
| r""" | |||
| Applies optimize_for_inference pass for computing graph. | |||
| :param dest_vars: list of output vars in the computing graph | |||
| :Keyword Arguments: | |||
| * enable_io16xc32 -- | |||
| whether to use float16 for I/O between oprs and use | |||
| float32 as internal computation precision. Note the output var would be | |||
| changed to float16. | |||
| * enable_ioc16 -- | |||
| whether to use float16 for both I/O and computation | |||
| precision. | |||
| * enable_hwcd4 -- | |||
| whether to use NHWCD4 data layout. This is faster on some | |||
| OpenCL backend. | |||
| * enable_nchw88 -- | |||
| whether to use NCHW88 data layout, currently | |||
| used in X86 AVX backend. | |||
| * enable_nchw44 -- | |||
| whether to use NCHW44 data layout, currently | |||
| used in arm backend. | |||
| * enable_nchw44_dot -- | |||
| whether to use NCHW44_dot data layout, currently | |||
| used in armv8.2+dotprod backend. | |||
| * enable_nchw4 -- | |||
| whether to use NCHW4 data layout, currently | |||
| used in nvidia backend(based on cudnn). | |||
| * enable_nchw32 -- | |||
| whether to use NCHW32 data layout, currently | |||
| used in nvidia backend with tensorcore(based on cudnn). | |||
| * enable_chwn4 -- | |||
| whether to use CHWN4 data layout, currently | |||
| used in nvidia backend with tensorcore. | |||
| * enable_nchw64 -- | |||
| whether to use NCHW64 data layout, used for fast int4 | |||
| support on Nvidia GPU. | |||
| * enable_fuse_conv_bias_nonlinearity: whether to fuse conv+bias+nonlinearty | |||
| into one opr. | |||
| * enable_fuse_conv_bias_with_z: whether to fuse conv_bias with z | |||
| input for inference on nvidia backend(this optimization pass will | |||
| result in mismatch of the precision of output of training and | |||
| inference) | |||
| * enable_fuse_preprocess: whether to fuse astype\pad channel\dimshuffle and | |||
| etc opr from h2d opr. | |||
| r"""Applies optimize_for_inference pass for computing graph. | |||
| Args: | |||
| dest_vars: list of output vars in the computing graph | |||
| Keyword Arguments: | |||
| * enable_io16xc32 -- | |||
| whether to use float16 for I/O between oprs and use | |||
| float32 as internal computation precision. Note the output var would be | |||
| changed to float16. | |||
| * enable_ioc16 -- | |||
| whether to use float16 for both I/O and computation | |||
| precision. | |||
| * enable_hwcd4 -- | |||
| whether to use NHWCD4 data layout. This is faster on some | |||
| OpenCL backend. | |||
| * enable_nchw88 -- | |||
| whether to use NCHW88 data layout, currently | |||
| used in X86 AVX backend. | |||
| * enable_nchw44 -- | |||
| whether to use NCHW44 data layout, currently | |||
| used in arm backend. | |||
| * enable_nchw44_dot -- | |||
| whether to use NCHW44_dot data layout, currently | |||
| used in armv8.2+dotprod backend. | |||
| * enable_nchw4 -- | |||
| whether to use NCHW4 data layout, currently | |||
| used in nvidia backend(based on cudnn). | |||
| * enable_nchw32 -- | |||
| whether to use NCHW32 data layout, currently | |||
| used in nvidia backend with tensorcore(based on cudnn). | |||
| * enable_chwn4 -- | |||
| whether to use CHWN4 data layout, currently | |||
| used in nvidia backend with tensorcore. | |||
| * enable_nchw64 -- | |||
| whether to use NCHW64 data layout, used for fast int4 | |||
| support on Nvidia GPU. | |||
| * enable_fuse_conv_bias_nonlinearity: whether to fuse conv+bias+nonlinearty | |||
| into one opr. | |||
| * enable_fuse_conv_bias_with_z: whether to fuse conv_bias with z | |||
| input for inference on nvidia backend(this optimization pass will | |||
| result in mismatch of the precision of output of training and | |||
| inference) | |||
| """ | |||
| inference_options = GraphOptimizeOptions() | |||
| inference_optimize_layout_transform_map = { | |||
| @@ -305,11 +301,13 @@ def optimize_for_inference(dest_vars, **kwargs): | |||
| def deserialize_infer_option(x: int) -> Dict[str, bool]: | |||
| r""" | |||
| Deserailize optimize options generated by ``imperative_rt.GraphOptimizeOptions``. | |||
| r"""Deserailize optimize options generated by ``imperative_rt.GraphOptimizeOptions``. | |||
| :param x: inference options represented by int. | |||
| :return: inference options represented by dict. | |||
| Args: | |||
| x: inference options represented by int. | |||
| Returns: | |||
| inference options represented by dict. | |||
| """ | |||
| inference_options = GraphOptimizeOptions.deserialize(x) | |||
| @@ -346,13 +344,12 @@ def deserialize_infer_option(x: int) -> Dict[str, bool]: | |||
| def modify_opr_algo_strategy_inplace(dest_vars, strategy: str): | |||
| """ | |||
| C++ graph version of :func:`~.set_execution_strategy`. Used to inplacely modify | |||
| r"""C++ graph version of :func:`~.set_execution_strategy`. Used to inplacely modify | |||
| dumped graph's fast-run strategy. | |||
| :param dest_vars: list of output vars in the computing graph. | |||
| :param strategy: fast-run algorithms strategy. | |||
| Args: | |||
| dest_vars: list of output vars in the computing graph. | |||
| strategy: fast-run algorithms strategy. | |||
| """ | |||
| dest_vars = _unwrap(dest_vars) | |||
| _imperative_rt.modify_opr_algo_strategy_inplace(dest_vars, strategy) | |||
| @@ -383,39 +380,40 @@ def dump_graph( | |||
| append_json=False, | |||
| metadata=None | |||
| ) -> Tuple[bytes, CompGraphDumpResult]: | |||
| """ | |||
| serialize the computing graph of `output_vars` and get byte result. | |||
| :param output_vars: output variables which are the graph's end point. | |||
| .. note:: | |||
| The underlying C++ API only accepts a var list. If a dict is given, | |||
| the vars would be renamed to the given names. | |||
| :param keep_var_name: level for keeping variable names: | |||
| * 0: none of the names are kept | |||
| * 1: (default)keep names of output vars | |||
| * 2: keep names of all (output and internal) vars | |||
| :param keep_opr_name: whether to keep operator names. | |||
| :param keep_param_name: whether to keep param names, so param values can be | |||
| easily manipulated after loading model | |||
| :param keep_opr_priority: whether to keep priority setting for operators | |||
| :param strip_info_file: a string for path or a file handler. if is not None, | |||
| then the dump information for code strip would be written to ``strip_info_file`` | |||
| :param append_json: will be check when `strip_info_file` is not None. if set | |||
| true, the information for code strip will be append to strip_info_file. | |||
| if set false, will rewrite strip_info_file | |||
| :return: dump result as byte string, and an instance of namedtuple | |||
| r"""serialize the computing graph of `output_vars` and get byte result. | |||
| Args: | |||
| output_vars: output variables which are the graph's end point. | |||
| keep_var_name: level for keeping variable names: | |||
| * 0: none of the names are kept | |||
| * 1: (default)keep names of output vars | |||
| * 2: keep names of all (output and internal) vars | |||
| keep_opr_name: whether to keep operator names. | |||
| keep_param_name: whether to keep param names, so param values can be | |||
| easily manipulated after loading model | |||
| keep_opr_priority: whether to keep priority setting for operators | |||
| strip_info_file: a string for path or a file handler. if is not None, | |||
| then the dump information for code strip would be written to ``strip_info_file`` | |||
| append_json: will be check when `strip_info_file` is not None. if set | |||
| true, the information for code strip will be append to strip_info_file. | |||
| if set false, will rewrite strip_info_file | |||
| Note: | |||
| The underlying C++ API only accepts a var list. If a dict is given, | |||
| the vars would be renamed to the given names. | |||
| Returns: | |||
| dump result as byte string, and an instance of namedtuple | |||
| :class:`CompGraphDumpResult`, whose fields are: | |||
| * ``nr_opr`` number of operators dumped | |||
| * ``tot_bytes`` total bytes for the whole graph | |||
| * ``tensor_value_bytes`` bytes consumed for dumping tensor values | |||
| * ``inputs`` names of input tensors | |||
| * ``params`` list of names of dumped params | |||
| * ``outputs`` names of output vars | |||
| * ``nr_opr`` number of operators dumped | |||
| * ``tot_bytes`` total bytes for the whole graph | |||
| * ``tensor_value_bytes`` bytes consumed for dumping tensor values | |||
| * ``inputs`` names of input tensors | |||
| * ``params`` list of names of dumped params | |||
| * ``outputs`` names of output vars | |||
| """ | |||
| if isinstance(output_vars, dict): | |||
| used_vars = set() | |||
| @@ -483,17 +481,19 @@ CompGraphLoadResult = collections.namedtuple( | |||
| def load_graph(fpath) -> CompGraphLoadResult: | |||
| """ | |||
| Load a serialized computing graph from file. | |||
| r"""Load a serialized computing graph from file. | |||
| Args: | |||
| fpath: Path or Handle of the input file | |||
| :param fpath: Path or Handle of the input file | |||
| :return: An instance of namedtuple :class:`CompGraphLoadResult`, | |||
| Returns: | |||
| An instance of namedtuple :class:`CompGraphLoadResult`, | |||
| whose fields are: | |||
| * ``graph`` loaded CompGraph | |||
| * ``output_vars_dict`` A Python dict, mapping name to output SymbolVar | |||
| * ``output_vars_list`` A Python list, containing output vars in the | |||
| order passed to serialize_comp_graph_to_file | |||
| * ``graph`` loaded CompGraph | |||
| * ``output_vars_dict`` A Python dict, mapping name to output SymbolVar | |||
| * ``output_vars_list`` A Python list, containing output vars in the | |||
| order passed to serialize_comp_graph_to_file | |||
| """ | |||
| output_vars_map = [] | |||
| output_vars_list = [] | |||
| @@ -24,12 +24,12 @@ _enable_convert_inputs = True | |||
| def get_convert_inputs(): | |||
| """ get the curerent state of `_enable_convert_inputs` """ | |||
| r"""get the curerent state of `_enable_convert_inputs`""" | |||
| return _enable_convert_inputs | |||
| def set_convert_inputs(flag): | |||
| """ This function is a temporary workaround for reducing the overhead of operator | |||
| r"""This function is a temporary workaround for reducing the overhead of operator | |||
| invocations. The function `convert_inputs` is disabled if the global state | |||
| `_enable_convert_inputs` is set to `False`, otherwise enabled. This function is for | |||
| internal use only, and should be removed when the tensor-like system is refactored. | |||
| @@ -137,11 +137,11 @@ def setscalar(x): | |||
| def astensor1d(x, *reference, dtype=None, device=None): | |||
| """ | |||
| Convert something to 1D tensor. Support following types | |||
| * sequence of scalar literal / tensor | |||
| * numpy array | |||
| * tensor (returned as is, regardless of dtype and device) | |||
| """Convert something to 1D tensor. Support following types | |||
| * sequence of scalar literal / tensor | |||
| * numpy array | |||
| * tensor (returned as is, regardless of dtype and device) | |||
| """ | |||
| try: | |||
| ndim = x.ndim | |||
| @@ -33,16 +33,11 @@ default_collate_err_msg_format = ( | |||
| class Collator: | |||
| r""" | |||
| Used for merging a list of samples to form a mini-batch of Tensor(s). Used when using batched loading from a dataset. | |||
| r"""Used for merging a list of samples to form a mini-batch of Tensor(s). Used when using batched loading from a dataset. | |||
| Modified from https://github.com/pytorch/pytorch/blob/master/torch/utils/data/_utils/collate.py | |||
| """ | |||
| def apply(self, inputs): | |||
| """ | |||
| :param inputs: sequence_N(tuple(CHW, C, CK)). | |||
| :return: tuple(NCHW, NC, NCK). | |||
| """ | |||
| elem = inputs[0] | |||
| elem_type = type(elem) | |||
| if ( | |||
| @@ -44,28 +44,28 @@ def raise_timeout_error(): | |||
| class DataLoader: | |||
| r"""Provides a convenient way to iterate on a given dataset. | |||
| DataLoader combines a dataset with | |||
| :class:`~.Sampler`, :class:`~.Transform` and :class:`~.Collator`, | |||
| make it flexible to get minibatch continually from a dataset. | |||
| :param dataset: dataset from which to load the minibatch. | |||
| :param sampler: defines the strategy to sample data from the dataset. | |||
| :param transform: defined the transforming strategy for a sampled batch. | |||
| Default: None | |||
| :param collator: defined the merging strategy for a transformed batch. | |||
| Default: None | |||
| :param num_workers: the number of sub-process to load, transform and collate | |||
| the batch. ``0`` means using single-process. Default: 0 | |||
| :param timeout: if positive, means the timeout value(second) for collecting a | |||
| batch from workers. Default: 0 | |||
| :param timeout_event: callback function triggered by timeout, default to raise | |||
| runtime error. | |||
| :param divide: define the paralleling strategy in multi-processing mode. | |||
| ``True`` means one batch is divided into :attr:`num_workers` pieces, and | |||
| the workers will process these pieces parallelly. ``False`` means | |||
| different sub-process will process different batch. Default: False | |||
| Args: | |||
| dataset: dataset from which to load the minibatch. | |||
| sampler: defines the strategy to sample data from the dataset. | |||
| transform: defined the transforming strategy for a sampled batch. | |||
| Default: None | |||
| collator: defined the merging strategy for a transformed batch. | |||
| Default: None | |||
| num_workers: the number of sub-process to load, transform and collate | |||
| the batch. ``0`` means using single-process. Default: 0 | |||
| timeout: if positive, means the timeout value(second) for collecting a | |||
| batch from workers. Default: 0 | |||
| timeout_event: callback function triggered by timeout, default to raise | |||
| runtime error. | |||
| divide: define the paralleling strategy in multi-processing mode. | |||
| ``True`` means one batch is divided into :attr:`num_workers` pieces, and | |||
| the workers will process these pieces parallelly. ``False`` means | |||
| different sub-process will process different batch. Default: False | |||
| """ | |||
| __initialized = False | |||
| @@ -11,8 +11,7 @@ from typing import Tuple | |||
| class Dataset(ABC): | |||
| r""" | |||
| An abstract base class for all datasets. | |||
| r"""An abstract base class for all datasets. | |||
| __getitem__ and __len__ method are aditionally needed. | |||
| """ | |||
| @@ -31,8 +30,7 @@ class Dataset(ABC): | |||
| class StreamDataset(Dataset): | |||
| r""" | |||
| An abstract class for stream data. | |||
| r"""An abstract class for stream data. | |||
| __iter__ method is aditionally needed. | |||
| """ | |||
| @@ -53,10 +51,9 @@ class StreamDataset(Dataset): | |||
| class ArrayDataset(Dataset): | |||
| r""" | |||
| ArrayDataset is a dataset for numpy array data. | |||
| r"""ArrayDataset is a dataset for numpy array data. | |||
| One or more numpy arrays are needed to initiate the dataset. | |||
| One or more numpy arrays are needed to initiate the dataset. | |||
| And the dimensions represented sample number are expected to be the same. | |||
| """ | |||
| @@ -21,8 +21,7 @@ logger = get_logger(__name__) | |||
| class CIFAR10(VisionDataset): | |||
| r""" :class:`~.Dataset` for CIFAR10 meta data. | |||
| """ | |||
| r""":class:`~.Dataset` for CIFAR10 meta data.""" | |||
| url_path = "http://www.cs.utoronto.ca/~kriz/" | |||
| raw_file_name = "cifar-10-python.tar.gz" | |||
| @@ -138,8 +137,7 @@ class CIFAR10(VisionDataset): | |||
| class CIFAR100(CIFAR10): | |||
| r""" :class:`~.Dataset` for CIFAR100 meta data. | |||
| """ | |||
| r""":class:`~.Dataset` for CIFAR100 meta data.""" | |||
| url_path = "http://www.cs.utoronto.ca/~kriz/" | |||
| raw_file_name = "cifar-100-python.tar.gz" | |||
| @@ -23,9 +23,7 @@ from .meta_vision import VisionDataset | |||
| class Cityscapes(VisionDataset): | |||
| r""" | |||
| `Cityscapes <http://www.cityscapes-dataset.com/>`_ Dataset. | |||
| """ | |||
| r"""`Cityscapes <http://www.cityscapes-dataset.com/>`_ Dataset.""" | |||
| supported_order = ( | |||
| "image", | |||
| @@ -46,9 +46,7 @@ def has_valid_annotation(anno, order): | |||
| class COCO(VisionDataset): | |||
| r""" | |||
| `MS COCO <http://cocodataset.org/#home>`_ Dataset. | |||
| """ | |||
| r"""`MS COCO <http://cocodataset.org/#home>`_ Dataset.""" | |||
| supported_order = ( | |||
| "image", | |||
| @@ -26,22 +26,21 @@ from .utils import is_img | |||
| class ImageFolder(VisionDataset): | |||
| r""" | |||
| ImageFolder is a class for loading image data and labels from a organized folder. | |||
| r"""ImageFolder is a class for loading image data and labels from a organized folder. | |||
| The folder is expected to be organized as followed: root/cls/xxx.img_ext | |||
| Labels are indices of sorted classes in the root directory. | |||
| :param root: root directory of an image folder. | |||
| :param loader: a function used to load image from path, | |||
| if ``None``, default function that loads | |||
| images with PIL will be called. | |||
| :param check_valid_func: a function used to check if files in folder are | |||
| expected image files, if ``None``, default function | |||
| that checks file extensions will be called. | |||
| :param class_name: if ``True``, return class name instead of class index. | |||
| Args: | |||
| root: root directory of an image folder. | |||
| loader: a function used to load image from path, | |||
| if ``None``, default function that loads | |||
| images with PIL will be called. | |||
| check_valid_func: a function used to check if files in folder are | |||
| expected image files, if ``None``, default function | |||
| that checks file extensions will be called. | |||
| class_name: if ``True``, return class name instead of class index. | |||
| """ | |||
| def __init__(self, root: str, check_valid_func=None, class_name: bool = False): | |||
| @@ -30,11 +30,10 @@ logger = get_logger(__name__) | |||
| class ImageNet(ImageFolder): | |||
| r""" | |||
| Load ImageNet from raw files or folder. Expected folder looks like: | |||
| .. code-block:: bash | |||
| r"""Load ImageNet from raw files or folder. Expected folder looks like: | |||
| .. code-block:: shell | |||
| ${root}/ | |||
| | [REQUIRED TAR FILES] | |||
| |- ILSVRC2012_img_train.tar | |||
| @@ -45,22 +44,8 @@ class ImageNet(ImageFolder): | |||
| |- val/cls/xxx.${img_ext} | |||
| |- ILSVRC2012_devkit_t12/data/meta.mat | |||
| |- ILSVRC2012_devkit_t12/data/ILSVRC2012_validation_ground_truth.txt | |||
| If the image folders don't exist, raw tar files are required to get extracted and processed. | |||
| """ | |||
| raw_file_meta = { | |||
| "train": ("ILSVRC2012_img_train.tar", "1d675b47d978889d74fa0da5fadfb00e"), | |||
| "val": ("ILSVRC2012_img_val.tar", "29b22e2961454d5413ddabcf34fc5622"), | |||
| "devkit": ("ILSVRC2012_devkit_t12.tar.gz", "fa75699e90414af021442c21a62c3abf"), | |||
| } # ImageNet raw files | |||
| default_train_dir = "train" | |||
| default_val_dir = "val" | |||
| default_devkit_dir = "ILSVRC2012_devkit_t12" | |||
| def __init__(self, root: str = None, train: bool = True, **kwargs): | |||
| r""" | |||
| Initialization: | |||
| * if ``root`` contains ``self.target_folder`` depending on ``train``: | |||
| @@ -77,10 +62,22 @@ class ImageNet(ImageFolder): | |||
| * raise error. | |||
| :param root: root directory of imagenet data, if root is ``None``, use default_dataset_root. | |||
| :param train: if ``True``, load the train split, otherwise load the validation split. | |||
| """ | |||
| Args: | |||
| root: root directory of imagenet data, if root is ``None``, use default_dataset_root. | |||
| train: if ``True``, load the train split, otherwise load the validation split. | |||
| """ | |||
| raw_file_meta = { | |||
| "train": ("ILSVRC2012_img_train.tar", "1d675b47d978889d74fa0da5fadfb00e"), | |||
| "val": ("ILSVRC2012_img_val.tar", "29b22e2961454d5413ddabcf34fc5622"), | |||
| "devkit": ("ILSVRC2012_devkit_t12.tar.gz", "fa75699e90414af021442c21a62c3abf"), | |||
| } # ImageNet raw files | |||
| default_train_dir = "train" | |||
| default_val_dir = "val" | |||
| default_devkit_dir = "ILSVRC2012_devkit_t12" | |||
| def __init__(self, root: str = None, train: bool = True, **kwargs): | |||
| # process the root path | |||
| if root is None: | |||
| self.root = self._default_root | |||
| @@ -22,8 +22,7 @@ logger = get_logger(__name__) | |||
| class MNIST(VisionDataset): | |||
| r""" :class:`~.Dataset` for MNIST meta data. | |||
| """ | |||
| r""":class:`~.Dataset` for MNIST meta data.""" | |||
| url_path = "http://yann.lecun.com/exdb/mnist/" | |||
| """ | |||
| @@ -23,9 +23,7 @@ from .meta_vision import VisionDataset | |||
| class Objects365(VisionDataset): | |||
| r""" | |||
| `Objects365 <https://www.objects365.org/overview.html>`_ Dataset. | |||
| """ | |||
| r"""`Objects365 <https://www.objects365.org/overview.html>`_ Dataset.""" | |||
| supported_order = ( | |||
| "image", | |||
| @@ -24,9 +24,7 @@ from .meta_vision import VisionDataset | |||
| class PascalVOC(VisionDataset): | |||
| r""" | |||
| `Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Dataset. | |||
| """ | |||
| r"""`Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Dataset.""" | |||
| supported_order = ( | |||
| "image", | |||
| @@ -17,9 +17,7 @@ import megengine.distributed as dist | |||
| class Sampler(ABC): | |||
| r""" | |||
| An abstract base class for all Sampler | |||
| """ | |||
| r"""An abstract base class for all Sampler""" | |||
| @abstractmethod | |||
| def __init__(self): | |||
| @@ -27,19 +25,19 @@ class Sampler(ABC): | |||
| class MapSampler(Sampler): | |||
| r""" | |||
| Sampler for map dataset. | |||
| :param dataset: dataset to sample from. | |||
| :param batch_size: batch size for batch method. | |||
| :param drop_last: set ``True`` to drop the last incomplete batch, | |||
| if the dataset size is not divisible by the batch size. If ``False`` and | |||
| the size of dataset is not divisible by the batch_size, then the last batch will | |||
| be smaller. Default: False | |||
| :param num_samples: number of samples assigned to one rank. | |||
| :param world_size: number of ranks. | |||
| :param rank: rank id, non-negative interger within 0 and ``world_size``. | |||
| :param seed: seed for random operators. | |||
| r"""Sampler for map dataset. | |||
| Args: | |||
| dataset: dataset to sample from. | |||
| batch_size: batch size for batch method. | |||
| drop_last: set ``True`` to drop the last incomplete batch, | |||
| if the dataset size is not divisible by the batch size. If ``False`` and | |||
| the size of dataset is not divisible by the batch_size, then the last batch will | |||
| be smaller. Default: False | |||
| num_samples: number of samples assigned to one rank. | |||
| world_size: number of ranks. | |||
| rank: rank id, non-negative interger within 0 and ``world_size``. | |||
| seed: seed for random operators. | |||
| """ | |||
| def __init__( | |||
| @@ -106,14 +104,11 @@ class MapSampler(Sampler): | |||
| return int(math.ceil(self.num_samples / self.batch_size)) | |||
| def sample(self): | |||
| """ | |||
| Return a list contains all sample indices. | |||
| """ | |||
| r"""Return a list contains all sample indices.""" | |||
| raise NotImplementedError | |||
| def scatter(self, indices) -> List: | |||
| r""" | |||
| Scatter method is used for splitting indices into subset, each subset | |||
| r"""Scatter method is used for splitting indices into subset, each subset | |||
| will be assigned to a rank. Indices are evenly splitted by default. | |||
| If customized indices assignment method is needed, please rewrite this method. | |||
| """ | |||
| @@ -130,9 +125,7 @@ class MapSampler(Sampler): | |||
| return indices | |||
| def batch(self) -> Iterator[List[Any]]: | |||
| r""" | |||
| Batch method provides a batch indices generator. | |||
| """ | |||
| r"""Batch method provides a batch indices generator.""" | |||
| indices = list(self.sample()) | |||
| # user might pass the world_size parameter without dist, | |||
| @@ -150,18 +143,15 @@ class MapSampler(Sampler): | |||
| class StreamSampler(Sampler): | |||
| r""" | |||
| Sampler for stream dataset. | |||
| .. warning:: | |||
| r"""Sampler for stream dataset. | |||
| Warning: | |||
| In the case of multiple machines, sampler should ensure that each worker gets | |||
| different data. But this class cannot do it yet, please build your own | |||
| dataset and sampler to achieve this goal. | |||
| Usually, :meth:`~.StreamDataset.__iter__` can return different iterator by | |||
| ``rank = dist.get_rank()``. So that they will get different data. | |||
| """ | |||
| def __init__(self, batch_size=1): | |||
| @@ -175,18 +165,18 @@ class StreamSampler(Sampler): | |||
| class SequentialSampler(MapSampler): | |||
| r""" | |||
| Sample elements sequentially. | |||
| :param dataset: dataset to sample from. | |||
| :param batch_size: batch size for batch method. | |||
| :param drop_last: set ``True`` to drop the last incomplete batch, | |||
| if the dataset size is not divisible by the batch size. If ``False`` and | |||
| the size of dataset is not divisible by the batch_size, then the last batch will | |||
| be smaller. Default: False | |||
| :param indices: indice of samples. | |||
| :param world_size: number of ranks. | |||
| :param rank: rank id, non-negative interger within 0 and ``world_size``. | |||
| r"""Sample elements sequentially. | |||
| Args: | |||
| dataset: dataset to sample from. | |||
| batch_size: batch size for batch method. | |||
| drop_last: set ``True`` to drop the last incomplete batch, | |||
| if the dataset size is not divisible by the batch size. If ``False`` and | |||
| the size of dataset is not divisible by the batch_size, then the last batch will | |||
| be smaller. Default: False | |||
| indices: indice of samples. | |||
| world_size: number of ranks. | |||
| rank: rank id, non-negative interger within 0 and ``world_size``. | |||
| """ | |||
| def __init__( | |||
| @@ -207,9 +197,7 @@ class SequentialSampler(MapSampler): | |||
| self.indices = indices | |||
| def sample(self) -> Iterator[Any]: | |||
| r""" | |||
| Return a generator. | |||
| """ | |||
| r"""Return a generator.""" | |||
| if self.indices is None: | |||
| return iter(range(len(self.dataset))) | |||
| else: | |||
| @@ -217,19 +205,19 @@ class SequentialSampler(MapSampler): | |||
| class RandomSampler(MapSampler): | |||
| r""" | |||
| Sample elements randomly without replacement. | |||
| :param dataset: dataset to sample from. | |||
| :param batch_size: batch size for batch method. | |||
| :param drop_last: set ``True`` to drop the last incomplete batch, | |||
| if the dataset size is not divisible by the batch size. If ``False`` and | |||
| the size of dataset is not divisible by the batch_size, then the last batch will | |||
| be smaller. Default: False | |||
| :param indices: indice of samples. | |||
| :param world_size: number of ranks. | |||
| :param rank: rank id, non-negative interger within 0 and ``world_size``. | |||
| :param seed: seed for random operators. | |||
| r"""Sample elements randomly without replacement. | |||
| Args: | |||
| dataset: dataset to sample from. | |||
| batch_size: batch size for batch method. | |||
| drop_last: set ``True`` to drop the last incomplete batch, | |||
| if the dataset size is not divisible by the batch size. If ``False`` and | |||
| the size of dataset is not divisible by the batch_size, then the last batch will | |||
| be smaller. Default: False | |||
| indices: indice of samples. | |||
| world_size: number of ranks. | |||
| rank: rank id, non-negative interger within 0 and ``world_size``. | |||
| seed: seed for random operators. | |||
| """ | |||
| def __init__( | |||
| @@ -258,20 +246,20 @@ class RandomSampler(MapSampler): | |||
| class ReplacementSampler(MapSampler): | |||
| r""" | |||
| Sample elements randomly with replacement. | |||
| :param dataset: dataset to sample from. | |||
| :param batch_size: batch size for batch method. | |||
| :param drop_last: set ``True`` to drop the last incomplete batch, | |||
| if the dataset size is not divisible by the batch size. If ``False`` and | |||
| the size of dataset is not divisible by the batch_size, then the last batch will | |||
| be smaller. Default: False | |||
| :param num_samples: number of samples assigned to one rank. | |||
| :param weights: weights for sampling indices, it could be unnormalized weights. | |||
| :param world_size: number of ranks. | |||
| :param rank: rank id, non-negative interger within 0 and ``world_size``. | |||
| :param seed: seed for random operators. | |||
| r"""Sample elements randomly with replacement. | |||
| Args: | |||
| dataset: dataset to sample from. | |||
| batch_size: batch size for batch method. | |||
| drop_last: set ``True`` to drop the last incomplete batch, | |||
| if the dataset size is not divisible by the batch size. If ``False`` and | |||
| the size of dataset is not divisible by the batch_size, then the last batch will | |||
| be smaller. Default: False | |||
| num_samples: number of samples assigned to one rank. | |||
| weights: weights for sampling indices, it could be unnormalized weights. | |||
| world_size: number of ranks. | |||
| rank: rank id, non-negative interger within 0 and ``world_size``. | |||
| seed: seed for random operators. | |||
| """ | |||
| def __init__( | |||
| @@ -59,15 +59,13 @@ class _PlasmaStoreManager: | |||
| class PlasmaShmQueue: | |||
| def __init__(self, maxsize: int = 0): | |||
| r""" | |||
| Use pyarrow in-memory plasma store to implement shared memory queue. | |||
| r"""Use pyarrow in-memory plasma store to implement shared memory queue. | |||
| Compared to native `multiprocess.Queue`, `PlasmaShmQueue` avoid pickle/unpickle | |||
| and communication overhead, leading to better performance in multi-process | |||
| application. | |||
| :type maxsize: int | |||
| :param maxsize: maximum size of the queue, `None` means no limit. (default: ``None``) | |||
| Args: | |||
| maxsize: maximum size of the queue, `None` means no limit. (default: ``None``) | |||
| """ | |||
| # Lazy start the plasma store manager | |||
| @@ -11,9 +11,7 @@ from typing import Sequence, Tuple | |||
| class Transform(ABC): | |||
| """ | |||
| Rewrite apply method in subclass. | |||
| """ | |||
| r"""Rewrite apply method in subclass.""" | |||
| def apply_batch(self, inputs: Sequence[Tuple]): | |||
| return tuple(self.apply(input) for input in inputs) | |||
| @@ -15,7 +15,7 @@ import numpy as np | |||
| def wrap_keepdims(func): | |||
| """Wraper to keep the dimension of input images unchanged.""" | |||
| r"""Wraper to keep the dimension of input images unchanged.""" | |||
| @functools.wraps(func) | |||
| def wrapper(image, *args, **kwargs): | |||
| @@ -33,41 +33,47 @@ def wrap_keepdims(func): | |||
| @wrap_keepdims | |||
| def to_gray(image): | |||
| r""" | |||
| Change BGR format image's color space to gray. | |||
| r"""Change BGR format image's color space to gray. | |||
| :param image: input BGR format image, with `(H, W, C)` shape. | |||
| :return: gray format image, with `(H, W, C)` shape. | |||
| Args: | |||
| image: input BGR format image, with `(H, W, C)` shape. | |||
| Returns: | |||
| gray format image, with `(H, W, C)` shape. | |||
| """ | |||
| return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |||
| @wrap_keepdims | |||
| def to_bgr(image): | |||
| r""" | |||
| Change gray format image's color space to BGR. | |||
| r"""Change gray format image's color space to BGR. | |||
| Args: | |||
| image: input Gray format image, with `(H, W, C)` shape. | |||
| :param image: input Gray format image, with `(H, W, C)` shape. | |||
| :return: BGR format image, with `(H, W, C)` shape. | |||
| Returns: | |||
| BGR format image, with `(H, W, C)` shape. | |||
| """ | |||
| return cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) | |||
| @wrap_keepdims | |||
| def pad(input, size, value): | |||
| r""" | |||
| Pad input data with *value* and given *size*. | |||
| :param input: input data, with `(H, W, C)` shape. | |||
| :param size: padding size of input data, it could be integer or sequence. | |||
| If it is an integer, the input data will be padded in four directions. | |||
| If it is a sequence contains two integer, the bottom and right side | |||
| of input data will be padded. | |||
| If it is a sequence contains four integer, the top, bottom, left, right | |||
| side of input data will be padded with given size. | |||
| :param value: padding value of data, could be a sequence of int or float. | |||
| If it is float value, the dtype of image will be casted to float32 also. | |||
| :return: padded image. | |||
| r"""Pad input data with *value* and given *size*. | |||
| Args: | |||
| input: input data, with `(H, W, C)` shape. | |||
| size: padding size of input data, it could be integer or sequence. | |||
| If it is an integer, the input data will be padded in four directions. | |||
| If it is a sequence contains two integer, the bottom and right side | |||
| of input data will be padded. | |||
| If it is a sequence contains four integer, the top, bottom, left, right | |||
| side of input data will be padded with given size. | |||
| value: padding value of data, could be a sequence of int or float. | |||
| If it is float value, the dtype of image will be casted to float32 also. | |||
| Returns: | |||
| padded image. | |||
| """ | |||
| if isinstance(size, int): | |||
| size = (size, size, size, size) | |||
| @@ -80,32 +86,33 @@ def pad(input, size, value): | |||
| @wrap_keepdims | |||
| def flip(image, flipCode): | |||
| r""" | |||
| Accordding to the flipCode (the type of flip), flip the input image. | |||
| :param image: input image, with `(H, W, C)` shape. | |||
| :param flipCode: code that indicates the type of flip. | |||
| r"""Accordding to the flipCode (the type of flip), flip the input image. | |||
| * 1 : Flip horizontally | |||
| Args: | |||
| image: input image, with `(H, W, C)` shape. | |||
| flipCode: code that indicates the type of flip. | |||
| * 0 : Flip vertically | |||
| * 1 : Flip horizontally | |||
| * 0 : Flip vertically | |||
| * -1: Flip horizontally and vertically | |||
| * -1: Flip horizontally and vertically | |||
| :return: BGR format image, with `(H, W, C)` shape. | |||
| Returns: | |||
| BGR format image, with `(H, W, C)` shape. | |||
| """ | |||
| return cv2.flip(image, flipCode=flipCode) | |||
| @wrap_keepdims | |||
| def resize(input, size, interpolation=cv2.INTER_LINEAR): | |||
| r""" | |||
| Resize the input data to given size. | |||
| r"""Resize the input data to given size. | |||
| Args: | |||
| input: input data, could be image or masks, with `(H, W, C)` shape. | |||
| size: target size of input data, with (height, width) shape. | |||
| interpolation: interpolation method. | |||
| :param input: input data, could be image or masks, with `(H, W, C)` shape. | |||
| :param size: target size of input data, with (height, width) shape. | |||
| :param interpolation: interpolation method. | |||
| :return: resized data, with `(H, W, C)` shape. | |||
| Returns: | |||
| resized data, with `(H, W, C)` shape. | |||
| """ | |||
| if len(size) != 2: | |||
| raise ValueError("resize needs (h, w), but got {}".format(size)) | |||
| @@ -42,36 +42,36 @@ __all__ = [ | |||
| class VisionTransform(Transform): | |||
| r""" | |||
| Base class of all transforms used in computer vision. | |||
| r"""Base class of all transforms used in computer vision. | |||
| Calling logic: apply_batch() -> apply() -> _apply_image() and other _apply_*() | |||
| method. If you want to implement a self-defined transform method for image, | |||
| rewrite _apply_image method in subclass. | |||
| :param order: input type order. Input is a tuple containing different structures, | |||
| order is used to specify the order of structures. For example, if your input | |||
| is (image, boxes) type, then the ``order`` should be ("image", "boxes"). | |||
| Current available strings and data type are describe below: | |||
| * "image": input image, with shape of `(H, W, C)`. | |||
| * "coords": coordinates, with shape of `(N, 2)`. | |||
| * "boxes": bounding boxes, with shape of `(N, 4)`, "xyxy" format, | |||
| the 1st "xy" represents top left point of a box, | |||
| the 2nd "xy" represents right bottom point. | |||
| * "mask": map used for segmentation, with shape of `(H, W, 1)`. | |||
| * "keypoints": keypoints with shape of `(N, K, 3)`, N for number of instances, | |||
| and K for number of keypoints in one instance. The first two dimensions | |||
| of last axis is coordinate of keypoints and the the 3rd dimension is | |||
| the label of keypoints. | |||
| * "polygons": a sequence containing numpy arrays, its length is the number of instances. | |||
| Each numpy array represents polygon coordinate of one instance. | |||
| * "category": categories for some data type. For example, "image_category" | |||
| means category of the input image and "boxes_category" means categories of | |||
| bounding boxes. | |||
| * "info": information for images such as image shapes and image path. | |||
| You can also customize your data types only if you implement the corresponding | |||
| _apply_*() methods, otherwise ``NotImplementedError`` will be raised. | |||
| Args: | |||
| order: input type order. Input is a tuple containing different structures, | |||
| order is used to specify the order of structures. For example, if your input | |||
| is (image, boxes) type, then the ``order`` should be ("image", "boxes"). | |||
| Current available strings and data type are describe below: | |||
| * "image": input image, with shape of `(H, W, C)`. | |||
| * "coords": coordinates, with shape of `(N, 2)`. | |||
| * "boxes": bounding boxes, with shape of `(N, 4)`, "xyxy" format, | |||
| the 1st "xy" represents top left point of a box, | |||
| the 2nd "xy" represents right bottom point. | |||
| * "mask": map used for segmentation, with shape of `(H, W, 1)`. | |||
| * "keypoints": keypoints with shape of `(N, K, 3)`, N for number of instances, | |||
| and K for number of keypoints in one instance. The first two dimensions | |||
| of last axis is coordinate of keypoints and the the 3rd dimension is | |||
| the label of keypoints. | |||
| * "polygons": a sequence containing numpy arrays, its length is the number of instances. | |||
| Each numpy array represents polygon coordinate of one instance. | |||
| * "category": categories for some data type. For example, "image_category" | |||
| means category of the input image and "boxes_category" means categories of | |||
| bounding boxes. | |||
| * "info": information for images such as image shapes and image path. | |||
| You can also customize your data types only if you implement the corresponding | |||
| _apply_*() methods, otherwise ``NotImplementedError`` will be raised. | |||
| """ | |||
| def __init__(self, order=None): | |||
| @@ -154,13 +154,13 @@ class VisionTransform(Transform): | |||
| class ToMode(VisionTransform): | |||
| r""" | |||
| Change input data to a target mode. | |||
| r"""Change input data to a target mode. | |||
| For example, most transforms use HWC mode image, | |||
| while the neural network might use CHW mode input tensor. | |||
| :param mode: output mode of input. Default: "CHW" | |||
| :param order: the same with :class:`VisionTransform` | |||
| Args: | |||
| mode: output mode of input. Default: "CHW" | |||
| order: the same with :class:`VisionTransform` | |||
| """ | |||
| def __init__(self, mode="CHW", *, order=None): | |||
| @@ -183,32 +183,31 @@ class ToMode(VisionTransform): | |||
| class Compose(VisionTransform): | |||
| r""" | |||
| Composes several transforms together. | |||
| :param transforms: list of :class:`VisionTransform` to compose. | |||
| :param batch_compose: whether use shuffle_indices for batch data or not. | |||
| If True, use original input sequence. | |||
| Otherwise, the shuffle_indices will be used for transforms. | |||
| :param shuffle_indices: indices used for random shuffle, start at 1. | |||
| For example, if shuffle_indices is [(1, 3), (2, 4)], then the 1st and 3rd transform | |||
| will be random shuffled, the 2nd and 4th transform will also be shuffled. | |||
| :param order: the same with :class:`VisionTransform` | |||
| r"""Composes several transforms together. | |||
| Args: | |||
| transforms: list of :class:`VisionTransform` to compose. | |||
| batch_compose: whether use shuffle_indices for batch data or not. | |||
| If True, use original input sequence. | |||
| Otherwise, the shuffle_indices will be used for transforms. | |||
| shuffle_indices: indices used for random shuffle, start at 1. | |||
| For example, if shuffle_indices is [(1, 3), (2, 4)], then the 1st and 3rd transform | |||
| will be random shuffled, the 2nd and 4th transform will also be shuffled. | |||
| order: the same with :class:`VisionTransform` | |||
| Examples: | |||
| .. testcode:: | |||
| from megengine.data.transform import RandomHorizontalFlip, RandomVerticalFlip, CenterCrop, ToMode, Compose | |||
| transform_func = Compose([ | |||
| RandomHorizontalFlip(), | |||
| RandomVerticalFlip(), | |||
| CenterCrop(100), | |||
| ToMode("CHW"), | |||
| ], | |||
| shuffle_indices=[(1, 2, 3)] | |||
| ) | |||
| .. testcode:: | |||
| from megengine.data.transform import RandomHorizontalFlip, RandomVerticalFlip, CenterCrop, ToMode, Compose | |||
| transform_func = Compose([ | |||
| RandomHorizontalFlip(), | |||
| RandomVerticalFlip(), | |||
| CenterCrop(100), | |||
| ToMode("CHW"), | |||
| ], | |||
| shuffle_indices=[(1, 2, 3)] | |||
| ) | |||
| """ | |||
| def __init__( | |||
| @@ -260,13 +259,13 @@ class Compose(VisionTransform): | |||
| class TorchTransformCompose(VisionTransform): | |||
| r""" | |||
| Compose class used for transforms in torchvision, only support PIL image, | |||
| r"""Compose class used for transforms in torchvision, only support PIL image, | |||
| some transforms with tensor in torchvision are not supported, | |||
| such as Normalize and ToTensor in torchvision. | |||
| :param transforms: the same with ``Compose``. | |||
| :param order: the same with :class:`VisionTransform`. | |||
| Args: | |||
| transforms: the same with ``Compose``. | |||
| order: the same with :class:`VisionTransform`. | |||
| """ | |||
| def __init__(self, transforms, *, order=None): | |||
| @@ -302,19 +301,19 @@ class TorchTransformCompose(VisionTransform): | |||
| class Pad(VisionTransform): | |||
| r""" | |||
| Pad the input data. | |||
| :param size: padding size of input image, it could be integer or sequence. | |||
| If it is an integer, the input image will be padded in four directions. | |||
| If it is a sequence containing two integers, the bottom and right side | |||
| of image will be padded. | |||
| If it is a sequence containing four integers, the top, bottom, left, right | |||
| side of image will be padded with given size. | |||
| :param value: padding value of image, could be a sequence of int or float. | |||
| if it is float value, the dtype of image will be casted to float32 also. | |||
| :param mask_value: padding value of segmentation map. | |||
| :param order: the same with :class:`VisionTransform`. | |||
| r"""Pad the input data. | |||
| Args: | |||
| size: padding size of input image, it could be integer or sequence. | |||
| If it is an integer, the input image will be padded in four directions. | |||
| If it is a sequence containing two integers, the bottom and right side | |||
| of image will be padded. | |||
| If it is a sequence containing four integers, the top, bottom, left, right | |||
| side of image will be padded with given size. | |||
| value: padding value of image, could be a sequence of int or float. | |||
| if it is float value, the dtype of image will be casted to float32 also. | |||
| mask_value: padding value of segmentation map. | |||
| order: the same with :class:`VisionTransform`. | |||
| """ | |||
| def __init__(self, size=0, value=0, mask_value=0, *, order=None): | |||
| @@ -350,18 +349,18 @@ class Pad(VisionTransform): | |||
| class Resize(VisionTransform): | |||
| r""" | |||
| Resize the input data. | |||
| :param output_size: target size of image, with (height, width) shape. | |||
| :param interpolation: interpolation method. All methods are listed below: | |||
| * cv2.INTER_NEAREST – a nearest-neighbor interpolation. | |||
| * cv2.INTER_LINEAR – a bilinear interpolation (used by default). | |||
| * cv2.INTER_AREA – resampling using pixel area relation. | |||
| * cv2.INTER_CUBIC – a bicubic interpolation over 4×4 pixel neighborhood. | |||
| * cv2.INTER_LANCZOS4 – a Lanczos interpolation over 8×8 pixel neighborhood. | |||
| :param order: the same with :class:`VisionTransform`. | |||
| r"""Resize the input data. | |||
| Args: | |||
| output_size: target size of image, with (height, width) shape. | |||
| interpolation: interpolation method. All methods are listed below: | |||
| * cv2.INTER_NEAREST – a nearest-neighbor interpolation. | |||
| * cv2.INTER_LINEAR – a bilinear interpolation (used by default). | |||
| * cv2.INTER_AREA – resampling using pixel area relation. | |||
| * cv2.INTER_CUBIC – a bicubic interpolation over 4×4 pixel neighborhood. | |||
| * cv2.INTER_LANCZOS4 – a Lanczos interpolation over 8×8 pixel neighborhood. | |||
| order: the same with :class:`VisionTransform`. | |||
| """ | |||
| def __init__(self, output_size, interpolation=cv2.INTER_LINEAR, *, order=None): | |||
| @@ -410,9 +409,7 @@ class Resize(VisionTransform): | |||
| class ShortestEdgeResize(VisionTransform): | |||
| r""" | |||
| Resize the input data with specified shortset edge. | |||
| """ | |||
| r"""Resize the input data with specified shortset edge.""" | |||
| def __init__( | |||
| self, | |||
| @@ -481,11 +478,11 @@ class ShortestEdgeResize(VisionTransform): | |||
| class RandomResize(VisionTransform): | |||
| r""" | |||
| Resize the input data randomly. | |||
| r"""Resize the input data randomly. | |||
| :param scale_range: range of scaling. | |||
| :param order: the same with :class:`VisionTransform`. | |||
| Args: | |||
| scale_range: range of scaling. | |||
| order: the same with :class:`VisionTransform`. | |||
| """ | |||
| def __init__(self, scale_range, interpolation=cv2.INTER_LINEAR, *, order=None): | |||
| @@ -526,15 +523,15 @@ class RandomResize(VisionTransform): | |||
| class RandomCrop(VisionTransform): | |||
| r""" | |||
| Crop the input data randomly. Before applying the crop transform, | |||
| r"""Crop the input data randomly. Before applying the crop transform, | |||
| pad the image first. If target size is still bigger than the size of | |||
| padded image, pad the image size to target size. | |||
| :param output_size: target size of output image, with (height, width) shape. | |||
| :param padding_size: the same with `size` in ``Pad``. | |||
| :param padding_value: the same with `value` in ``Pad``. | |||
| :param order: the same with :class:`VisionTransform`. | |||
| Args: | |||
| output_size: target size of output image, with (height, width) shape. | |||
| padding_size: the same with `size` in ``Pad``. | |||
| padding_value: the same with `value` in ``Pad``. | |||
| order: the same with :class:`VisionTransform`. | |||
| """ | |||
| def __init__( | |||
| @@ -584,16 +581,16 @@ class RandomCrop(VisionTransform): | |||
| class RandomResizedCrop(VisionTransform): | |||
| r""" | |||
| Crop the input data to random size and aspect ratio. | |||
| r"""Crop the input data to random size and aspect ratio. | |||
| A crop of random size (default: of 0.08 to 1.0) of the original size and a random | |||
| aspect ratio (default: of 3/4 to 1.33) of the original aspect ratio is made. | |||
| After applying crop transfrom, the input data will be resized to given size. | |||
| :param output_size: target size of output image, with (height, width) shape. | |||
| :param scale_range: range of size of the origin size cropped. Default: (0.08, 1.0) | |||
| :param ratio_range: range of aspect ratio of the origin aspect ratio cropped. Default: (0.75, 1.33) | |||
| :param order: the same with :class:`VisionTransform`. | |||
| Args: | |||
| output_size: target size of output image, with (height, width) shape. | |||
| scale_range: range of size of the origin size cropped. Default: (0.08, 1.0) | |||
| ratio_range: range of aspect ratio of the origin aspect ratio cropped. Default: (0.75, 1.33) | |||
| order: the same with :class:`VisionTransform`. | |||
| """ | |||
| def __init__( | |||
| @@ -674,11 +671,11 @@ class RandomResizedCrop(VisionTransform): | |||
| class CenterCrop(VisionTransform): | |||
| r""" | |||
| Crops the given the input data at the center. | |||
| r"""Crops the given the input data at the center. | |||
| :param output_size: target size of output image, with (height, width) shape. | |||
| :param order: the same with :class:`VisionTransform`. | |||
| Args: | |||
| output_size: target size of output image, with (height, width) shape. | |||
| order: the same with :class:`VisionTransform`. | |||
| """ | |||
| def __init__(self, output_size, *, order=None): | |||
| @@ -718,11 +715,11 @@ class CenterCrop(VisionTransform): | |||
| class RandomHorizontalFlip(VisionTransform): | |||
| r""" | |||
| Horizontally flip the input data randomly with a given probability. | |||
| r"""Horizontally flip the input data randomly with a given probability. | |||
| :param p: probability of the input data being flipped. Default: 0.5 | |||
| :param order: the same with :class:`VisionTransform`. | |||
| Args: | |||
| p: probability of the input data being flipped. Default: 0.5 | |||
| order: the same with :class:`VisionTransform`. | |||
| """ | |||
| def __init__(self, prob: float = 0.5, *, order=None): | |||
| @@ -751,11 +748,11 @@ class RandomHorizontalFlip(VisionTransform): | |||
| class RandomVerticalFlip(VisionTransform): | |||
| r""" | |||
| Vertically flip the input data randomly with a given probability. | |||
| r"""Vertically flip the input data randomly with a given probability. | |||
| :param p: probability of the input data being flipped. Default: 0.5 | |||
| :param order: the same with :class:`VisionTransform`. | |||
| Args: | |||
| p: probability of the input data being flipped. Default: 0.5 | |||
| order: the same with :class:`VisionTransform`. | |||
| """ | |||
| def __init__(self, prob: float = 0.5, *, order=None): | |||
| @@ -784,15 +781,15 @@ class RandomVerticalFlip(VisionTransform): | |||
| class Normalize(VisionTransform): | |||
| r""" | |||
| Normalize the input data with mean and standard deviation. | |||
| r"""Normalize the input data with mean and standard deviation. | |||
| Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, | |||
| this transform will normalize each channel of the input data. | |||
| ``output[channel] = (input[channel] - mean[channel]) / std[channel]`` | |||
| :param mean: sequence of means for each channel. | |||
| :param std: sequence of standard deviations for each channel. | |||
| :param order: the same with :class:`VisionTransform`. | |||
| Args: | |||
| mean: sequence of means for each channel. | |||
| std: sequence of standard deviations for each channel. | |||
| order: the same with :class:`VisionTransform`. | |||
| """ | |||
| def __init__(self, mean=0.0, std=1.0, *, order=None): | |||
| @@ -811,13 +808,13 @@ class Normalize(VisionTransform): | |||
| class GaussianNoise(VisionTransform): | |||
| r""" | |||
| Add random gaussian noise to the input data. | |||
| r"""Add random gaussian noise to the input data. | |||
| Gaussian noise is generated with given mean and std. | |||
| :param mean: Gaussian mean used to generate noise. | |||
| :param std: Gaussian standard deviation used to generate noise. | |||
| :param order: the same with :class:`VisionTransform` | |||
| Args: | |||
| mean: Gaussian mean used to generate noise. | |||
| std: Gaussian standard deviation used to generate noise. | |||
| order: the same with :class:`VisionTransform` | |||
| """ | |||
| def __init__(self, mean=0.0, std=1.0, *, order=None): | |||
| @@ -839,12 +836,12 @@ class GaussianNoise(VisionTransform): | |||
| class BrightnessTransform(VisionTransform): | |||
| r""" | |||
| Adjust brightness of the input data. | |||
| r"""Adjust brightness of the input data. | |||
| :param value: how much to adjust the brightness. Can be any | |||
| non negative number. 0 gives the original image. | |||
| :param order: the same with :class:`VisionTransform`. | |||
| Args: | |||
| value: how much to adjust the brightness. Can be any | |||
| non negative number. 0 gives the original image. | |||
| order: the same with :class:`VisionTransform`. | |||
| """ | |||
| def __init__(self, value, *, order=None): | |||
| @@ -871,12 +868,12 @@ class BrightnessTransform(VisionTransform): | |||
| class ContrastTransform(VisionTransform): | |||
| r""" | |||
| Adjust contrast of the input data. | |||
| r"""Adjust contrast of the input data. | |||
| :param value: how much to adjust the contrast. Can be any | |||
| non negative number. 0 gives the original image. | |||
| :param order: the same with :class:`VisionTransform`. | |||
| Args: | |||
| value: how much to adjust the contrast. Can be any | |||
| non negative number. 0 gives the original image. | |||
| order: the same with :class:`VisionTransform`. | |||
| """ | |||
| def __init__(self, value, *, order=None): | |||
| @@ -903,12 +900,12 @@ class ContrastTransform(VisionTransform): | |||
| class SaturationTransform(VisionTransform): | |||
| r""" | |||
| Adjust saturation of the input data. | |||
| r"""Adjust saturation of the input data. | |||
| :param value: how much to adjust the saturation. Can be any | |||
| non negative number. 0 gives the original image. | |||
| :param order: the same with :class:`VisionTransform`. | |||
| Args: | |||
| value: how much to adjust the saturation. Can be any | |||
| non negative number. 0 gives the original image. | |||
| order: the same with :class:`VisionTransform`. | |||
| """ | |||
| def __init__(self, value, *, order=None): | |||
| @@ -935,12 +932,12 @@ class SaturationTransform(VisionTransform): | |||
| class HueTransform(VisionTransform): | |||
| r""" | |||
| Adjust hue of the input data. | |||
| r"""Adjust hue of the input data. | |||
| :param value: how much to adjust the hue. Can be any number | |||
| between 0 and 0.5, 0 gives the original image. | |||
| :param order: the same with :class:`VisionTransform`. | |||
| Args: | |||
| value: how much to adjust the hue. Can be any number | |||
| between 0 and 0.5, 0 gives the original image. | |||
| order: the same with :class:`VisionTransform`. | |||
| """ | |||
| def __init__(self, value, *, order=None): | |||
| @@ -974,22 +971,22 @@ class HueTransform(VisionTransform): | |||
| class ColorJitter(VisionTransform): | |||
| r""" | |||
| Randomly change the brightness, contrast, saturation and hue of an image. | |||
| :param brightness: how much to jitter brightness. | |||
| Chosen uniformly from [max(0, 1 - brightness), 1 + brightness] | |||
| or the given [min, max]. Should be non negative numbers. | |||
| :param contrast: how much to jitter contrast. | |||
| Chosen uniformly from [max(0, 1 - contrast), 1 + contrast] | |||
| or the given [min, max]. Should be non negative numbers. | |||
| :param saturation: how much to jitter saturation. | |||
| Chosen uniformly from [max(0, 1 - saturation), 1 + saturation] | |||
| or the given [min, max]. Should be non negative numbers. | |||
| :param hue: how much to jitter hue. | |||
| Chosen uniformly from [-hue, hue] or the given [min, max]. | |||
| Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5. | |||
| :param order: the same with :class:`VisionTransform`. | |||
| r"""Randomly change the brightness, contrast, saturation and hue of an image. | |||
| Args: | |||
| brightness: how much to jitter brightness. | |||
| Chosen uniformly from [max(0, 1 - brightness), 1 + brightness] | |||
| or the given [min, max]. Should be non negative numbers. | |||
| contrast: how much to jitter contrast. | |||
| Chosen uniformly from [max(0, 1 - contrast), 1 + contrast] | |||
| or the given [min, max]. Should be non negative numbers. | |||
| saturation: how much to jitter saturation. | |||
| Chosen uniformly from [max(0, 1 - saturation), 1 + saturation] | |||
| or the given [min, max]. Should be non negative numbers. | |||
| hue: how much to jitter hue. | |||
| Chosen uniformly from [-hue, hue] or the given [min, max]. | |||
| Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5. | |||
| order: the same with :class:`VisionTransform`. | |||
| """ | |||
| def __init__(self, brightness=0, contrast=0, saturation=0, hue=0, *, order=None): | |||
| @@ -1014,11 +1011,10 @@ class ColorJitter(VisionTransform): | |||
| class Lighting(VisionTransform): | |||
| r""" | |||
| Apply AlexNet-Style "lighting" augmentation to input data. | |||
| r"""Apply AlexNet-Style "lighting" augmentation to input data. | |||
| Input images are assumed to have 'RGB' channel order. | |||
| The degree of color jittering is randomly sampled via a normal distribution, | |||
| with standard deviation given by the scale parameter. | |||
| """ | |||
| @@ -54,10 +54,10 @@ _device_type_set = {"cpu", "gpu", "xpu", "rocm"} | |||
| def get_device_count(device_type: str) -> int: | |||
| """ | |||
| Gets number of devices installed on this system. | |||
| r"""Gets number of devices installed on this system. | |||
| :param device_type: device type, one of 'gpu' or 'cpu' | |||
| Args: | |||
| device_type: device type, one of 'gpu' or 'cpu' | |||
| """ | |||
| assert device_type in _device_type_set, "device must be one of {}".format( | |||
| _device_type_set | |||
| @@ -67,73 +67,59 @@ def get_device_count(device_type: str) -> int: | |||
| def is_cuda_available() -> bool: | |||
| """ | |||
| Returns whether cuda device is available on this system. | |||
| """ | |||
| r"""Returns whether cuda device is available on this system.""" | |||
| t = _str2device_type("gpu") | |||
| return CompNode._get_device_count(t, False) > 0 | |||
| def is_cambricon_available() -> bool: | |||
| """ | |||
| Returns whether cambricon device is available on this system. | |||
| """ | |||
| r"""Returns whether cambricon device is available on this system.""" | |||
| t = _str2device_type("cambricon") | |||
| return CompNode._get_device_count(t, False) > 0 | |||
| def is_atlas_available() -> bool: | |||
| """ | |||
| Returns whether atlas device is available on this system. | |||
| """ | |||
| r"""Returns whether atlas device is available on this system.""" | |||
| t = _str2device_type("atlas") | |||
| return CompNode._get_device_count(t, False) > 0 | |||
| def is_rocm_available() -> bool: | |||
| """Returns whether rocm device is available on this system. | |||
| """ | |||
| r"""Returns whether rocm device is available on this system.""" | |||
| t = _str2device_type("rocm") | |||
| return CompNode._get_device_count(t, False) > 0 | |||
| def set_default_device(device: str = "xpux"): | |||
| r""" | |||
| Sets default computing node. | |||
| :param device: default device type. The type can be 'cpu0', 'cpu1', etc., | |||
| or 'gpu0', 'gpu1', etc., to specify the particular cpu or gpu to use. | |||
| 'cpux' and 'gpux' can also be used to specify any number of cpu or gpu devices. | |||
| 'multithread' device type is avaliable when inference, which implements | |||
| multi-threading parallelism at the operator level. For example, | |||
| 'multithread4' will compute with 4 threads. | |||
| The default value is 'xpux' to specify any device available. The priority of using gpu is higher when both gpu and cpu are available. | |||
| It can also be set by environment variable `MGE_DEFAULT_DEVICE`. | |||
| r"""Sets default computing node. | |||
| Args: | |||
| device: default device type. | |||
| Note: | |||
| * The type can be 'cpu0', 'cpu1', etc., or 'gpu0', 'gpu1', etc., | |||
| to specify the particular CPU or GPU to use. | |||
| * 'cpux' and 'gpux' can also be used to specify any number of CPU or GPU devices. | |||
| * The default value is 'xpux' to specify any device available. | |||
| * The priority of using GPU is higher when both GPU and CPU are available. | |||
| * 'multithread' device type is avaliable when inference, | |||
| which implements multi-threading parallelism at the operator level. | |||
| For example, 'multithread4' will compute with 4 threads. | |||
| * It can also be set by environment variable ``MGE_DEFAULT_DEVICE``. | |||
| """ | |||
| assert _valid_device(device), "Invalid device name {}".format(device) | |||
| CompNode._set_default_device(device) | |||
| def get_default_device() -> str: | |||
| r""" | |||
| Gets default computing node. | |||
| r"""Gets default computing node. | |||
| It returns the value set by :func:`~.set_default_device`. | |||
| """ | |||
| return CompNode._get_default_device() | |||
| def get_mem_status_bytes(device: Optional[str] = None): | |||
| r""" | |||
| Get total and free memory on the computing device in bytes. | |||
| """ | |||
| r"""Get total and free memory on the computing device in bytes.""" | |||
| if device is None: | |||
| device = get_default_device() | |||
| tot, free = CompNode(device).get_mem_status_bytes | |||
| @@ -150,15 +136,17 @@ def set_prealloc_config( | |||
| growth_factor=2.0, | |||
| device_type=DeviceType.CUDA, | |||
| ): | |||
| """ | |||
| Specifies how to pre-allocate from raw device allocator. | |||
| :param alignment: specifies the alignment in bytes. | |||
| :param min_req: min request size in bytes. | |||
| :param max_overhead: max overhead above required size in bytes. | |||
| :param growth_factor: `request size / cur allocated` | |||
| :param device_type: the device type | |||
| r"""Specifies how to pre-allocate from raw device allocator. | |||
| Args: | |||
| alignment: specifies the alignment in bytes. | |||
| min_req: min request size in bytes. | |||
| max_overhead: max overhead above required size in bytes. | |||
| growth_factor: request size / cur allocated` | |||
| device_type: the device type | |||
| alignment: int: | |||
| min_req: int: | |||
| max_overhead: int: | |||
| """ | |||
| assert alignment > 0 | |||
| assert min_req > 0 | |||
| @@ -31,17 +31,15 @@ from .server import Client, Server | |||
| @mproperty | |||
| def backend(mod): | |||
| r""" | |||
| Get or set backend of collective communication. | |||
| r"""Get or set backend of collective communication. | |||
| Available backends are ['nccl', 'shm', 'rccl'] | |||
| Examples: | |||
| .. code-block:: | |||
| import megengine.distributed as dist | |||
| dist.backend = "nccl" | |||
| .. code-block:: | |||
| import megengine.distributed as dist | |||
| dist.backend = "nccl" | |||
| """ | |||
| assert group._sd, "please call init_process_group first" | |||
| return group._sd.backend | |||
| @@ -50,7 +50,7 @@ def _backend(): | |||
| def collective_comm(inp, mode, group, device): | |||
| """Helper function for applying collective communication functions.""" | |||
| r"""Helper function for applying collective communication functions.""" | |||
| assert isinstance(group, Group) | |||
| if group is None: | |||
| return inp | |||
| @@ -158,8 +158,7 @@ class _ReduceSum(Function): | |||
| def reduce_sum( | |||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = None, | |||
| ) -> Tensor: | |||
| r""" | |||
| Reduce tensor data across the specified group by sum. | |||
| r"""Reduce tensor data across the specified group by sum. | |||
| Only root process will receive the final result. | |||
| Args: | |||
| @@ -176,22 +175,20 @@ def reduce_sum( | |||
| Reduced tensor if in root process, None in other processes. | |||
| Examples: | |||
| .. code-block:: | |||
| input = Tensor([rank]) | |||
| # Rank 0 # input: Tensor([0]) | |||
| # Rank 1 # input: Tensor([1]) | |||
| output = reduce_sum(input) | |||
| # Rank 0 # output: Tensor([1]) | |||
| # Rank 1 # output: None | |||
| input = Tensor([rank]) | |||
| group = Group([1, 0]) # first rank is root | |||
| output = reduce_sum(input, group) | |||
| # Rank 0 # output: None | |||
| # Rank 1 # output: Tensor([1]) | |||
| .. code-block:: | |||
| input = Tensor([rank]) | |||
| # Rank 0 # input: Tensor([0]) | |||
| # Rank 1 # input: Tensor([1]) | |||
| output = reduce_sum(input) | |||
| # Rank 0 # output: Tensor([1]) | |||
| # Rank 1 # output: None | |||
| input = Tensor([rank]) | |||
| group = Group([1, 0]) # first rank is root | |||
| output = reduce_sum(input, group) | |||
| # Rank 0 # output: None | |||
| # Rank 1 # output: Tensor([1]) | |||
| """ | |||
| op = _ReduceSum(group, device) | |||
| (out,) = apply(op, inp) | |||
| @@ -222,8 +219,7 @@ class _Broadcast(Function): | |||
| def broadcast( | |||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = None, | |||
| ) -> Tensor: | |||
| r""" | |||
| Broadcast tensor data from root process to others. | |||
| r"""Broadcast tensor data from root process to others. | |||
| Args: | |||
| inp: Input tensor. | |||
| @@ -240,21 +236,20 @@ def broadcast( | |||
| Examples: | |||
| .. code-block:: | |||
| input = Tensor([rank]) | |||
| # Rank 0 # input: Tensor([0]) | |||
| # Rank 1 # input: Tensor([1]) | |||
| output = broadcast(input) | |||
| # Rank 0 # output: Tensor([0]) | |||
| # Rank 1 # output: Tensor([0]) | |||
| .. code-block:: | |||
| input = Tensor([rank]) | |||
| group = Group([1, 0]) # first rank is root | |||
| output = broadcast(input, group) | |||
| # Rank 0 # output: Tensor([1]) | |||
| # Rank 1 # output: Tensor([1]) | |||
| input = Tensor([rank]) | |||
| # Rank 0 # input: Tensor([0]) | |||
| # Rank 1 # input: Tensor([1]) | |||
| output = broadcast(input) | |||
| # Rank 0 # output: Tensor([0]) | |||
| # Rank 1 # output: Tensor([0]) | |||
| input = Tensor([rank]) | |||
| group = Group([1, 0]) # first rank is root | |||
| output = broadcast(input, group) | |||
| # Rank 0 # output: Tensor([1]) | |||
| # Rank 1 # output: Tensor([1]) | |||
| """ | |||
| shape, dtype = _bcast_shape_dtype(group, inp) | |||
| if group.rank != 0: | |||
| @@ -278,8 +273,7 @@ def _bcast_param( | |||
| def all_gather( | |||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = None, axis=0, | |||
| ) -> Tensor: | |||
| r""" | |||
| Gather tensors across the specified group and concat them at first dimension. | |||
| r"""Gather tensors across the specified group and concat them at first dimension. | |||
| Args: | |||
| inp: Input tensor. | |||
| @@ -298,21 +292,20 @@ def all_gather( | |||
| Examples: | |||
| .. code-block:: | |||
| input = Tensor([rank]) | |||
| # Rank 0 # input: Tensor([0]) | |||
| # Rank 1 # input: Tensor([1]) | |||
| output = all_gather(input) | |||
| # Rank 0 # output: Tensor([0 1]) | |||
| # Rank 1 # output: Tensor([0 1]) | |||
| .. code-block:: | |||
| input = Tensor([rank]) | |||
| group = Group([1, 0]) | |||
| output = all_gather(input, group) | |||
| # Rank 0 # output: Tensor([1 0]) | |||
| # Rank 1 # output: Tensor([1 0]) | |||
| input = Tensor([rank]) | |||
| # Rank 0 # input: Tensor([0]) | |||
| # Rank 1 # input: Tensor([1]) | |||
| output = all_gather(input) | |||
| # Rank 0 # output: Tensor([0 1]) | |||
| # Rank 1 # output: Tensor([0 1]) | |||
| input = Tensor([rank]) | |||
| group = Group([1, 0]) | |||
| output = all_gather(input, group) | |||
| # Rank 0 # output: Tensor([1 0]) | |||
| # Rank 1 # output: Tensor([1 0]) | |||
| """ | |||
| mode = CollectiveComm.Mode.ALL_GATHER | |||
| out = collective_comm(inp, mode, group, device) | |||
| @@ -338,8 +331,7 @@ def all_gather( | |||
| def reduce_scatter_sum( | |||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = None, axis=0 | |||
| ) -> Tensor: | |||
| r""" | |||
| Reduce tensors across the specified group by sum and split them at first dimension. | |||
| r"""Reduce tensors across the specified group by sum and split them at first dimension. | |||
| Args: | |||
| inp: Input tensor. | |||
| @@ -358,21 +350,20 @@ def reduce_scatter_sum( | |||
| Examples: | |||
| .. code-block:: | |||
| input = Tensor([0 1]) | |||
| # Rank 0 # input: Tensor([0 1]) | |||
| # Rank 1 # input: Tensor([0 1]) | |||
| output = reduce_scatter_sum(input) | |||
| # Rank 0 # output: Tensor([0]) | |||
| # Rank 1 # output: Tensor([2]) | |||
| .. code-block:: | |||
| input = Tensor([0 1]) | |||
| group = Group([1, 0]) | |||
| output = reduce_scatter_sum(input, group) | |||
| # Rank 0 # output: Tensor([2]) | |||
| # Rank 1 # output: Tensor([0]) | |||
| input = Tensor([0 1]) | |||
| # Rank 0 # input: Tensor([0 1]) | |||
| # Rank 1 # input: Tensor([0 1]) | |||
| output = reduce_scatter_sum(input) | |||
| # Rank 0 # output: Tensor([0]) | |||
| # Rank 1 # output: Tensor([2]) | |||
| input = Tensor([0 1]) | |||
| group = Group([1, 0]) | |||
| output = reduce_scatter_sum(input, group) | |||
| # Rank 0 # output: Tensor([2]) | |||
| # Rank 1 # output: Tensor([0]) | |||
| """ | |||
| group_size = group.size if group is not None else 1 | |||
| assert ( | |||
| @@ -398,8 +389,7 @@ def reduce_scatter_sum( | |||
| def all_reduce_sum( | |||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = None, | |||
| ) -> Tensor: | |||
| r""" | |||
| Reduce tensors across the specified group by sum. | |||
| r"""Reduce tensors across the specified group by sum. | |||
| Args: | |||
| inp: Input tensor. | |||
| @@ -416,15 +406,14 @@ def all_reduce_sum( | |||
| Examples: | |||
| .. code-block:: | |||
| input = Tensor(rank) | |||
| # Rank 0 # input: Tensor(0) | |||
| # Rank 1 # input: Tensor(1) | |||
| output = all_reduce_sum(input) | |||
| # Rank 0 # output: Tensor(1) | |||
| # Rank 1 # output: Tensor(1) | |||
| .. code-block:: | |||
| input = Tensor(rank) | |||
| # Rank 0 # input: Tensor(0) | |||
| # Rank 1 # input: Tensor(1) | |||
| output = all_reduce_sum(input) | |||
| # Rank 0 # output: Tensor(1) | |||
| # Rank 1 # output: Tensor(1) | |||
| """ | |||
| mode = CollectiveComm.Mode.ALL_REDUCE_SUM | |||
| return collective_comm(inp, mode, group, device) | |||
| @@ -433,8 +422,7 @@ def all_reduce_sum( | |||
| def all_reduce_max( | |||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = None, | |||
| ) -> Tensor: | |||
| r""" | |||
| Reduce tensors across the specified group by max. | |||
| r"""Reduce tensors across the specified group by max. | |||
| Args: | |||
| inp: Input tensor. | |||
| @@ -451,15 +439,14 @@ def all_reduce_max( | |||
| Examples: | |||
| .. code-block:: | |||
| input = Tensor(rank) | |||
| # Rank 0 # input: Tensor(0) | |||
| # Rank 1 # input: Tensor(1) | |||
| output = all_reduce_max(input) | |||
| # Rank 0 # output: Tensor(1) | |||
| # Rank 1 # output: Tensor(1) | |||
| .. code-block:: | |||
| input = Tensor(rank) | |||
| # Rank 0 # input: Tensor(0) | |||
| # Rank 1 # input: Tensor(1) | |||
| output = all_reduce_max(input) | |||
| # Rank 0 # output: Tensor(1) | |||
| # Rank 1 # output: Tensor(1) | |||
| """ | |||
| mode = CollectiveComm.Mode.ALL_REDUCE_MAX | |||
| return collective_comm(inp, mode, group, device) | |||
| @@ -468,8 +455,7 @@ def all_reduce_max( | |||
| def all_reduce_min( | |||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = None, | |||
| ) -> Tensor: | |||
| r""" | |||
| Reduce tensors across the specified group by min. | |||
| r"""Reduce tensors across the specified group by min. | |||
| Args: | |||
| inp: Input tensor. | |||
| @@ -486,15 +472,14 @@ def all_reduce_min( | |||
| Examples: | |||
| .. code-block:: | |||
| input = Tensor(rank) | |||
| # Rank 0 # input: Tensor(0) | |||
| # Rank 1 # input: Tensor(1) | |||
| output = all_reduce_min(input) | |||
| # Rank 0 # output: Tensor(0) | |||
| # Rank 1 # output: Tensor(0) | |||
| .. code-block:: | |||
| input = Tensor(rank) | |||
| # Rank 0 # input: Tensor(0) | |||
| # Rank 1 # input: Tensor(1) | |||
| output = all_reduce_min(input) | |||
| # Rank 0 # output: Tensor(0) | |||
| # Rank 1 # output: Tensor(0) | |||
| """ | |||
| mode = CollectiveComm.Mode.ALL_REDUCE_MIN | |||
| return collective_comm(inp, mode, group, device) | |||
| @@ -520,8 +505,7 @@ class _Gather(Function): | |||
| def gather( | |||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = None, axis=0, | |||
| ) -> Tensor: | |||
| r""" | |||
| Gather tensors across the specified group. | |||
| r"""Gather tensors across the specified group. | |||
| Only root process will receive the final result. | |||
| Args: | |||
| @@ -534,27 +518,23 @@ def gather( | |||
| Specify "gpu0:1" to execute this operator on diffrent cuda stream, | |||
| 1 is stream id, and default stream id is 0. | |||
| axis: The concat axis for collective_comm result | |||
| The default axis is 0 | |||
| Returns: | |||
| Result tensor if in root process, None if in other process | |||
| Examples: | |||
| .. code-block:: | |||
| input = Tensor([rank]) | |||
| # Rank 0 # input: Tensor([0]) | |||
| # Rank 1 # input: Tensor([1]) | |||
| output = gather(input) | |||
| # Rank 0 # output: Tensor([0 1]) | |||
| # Rank 1 # output: None | |||
| .. code-block:: | |||
| input = Tensor([rank]) | |||
| group = Group([1, 0]) # first rank is root | |||
| output = gather(input, group) | |||
| # Rank 0 # output: None | |||
| # Rank 1 # output: Tensor([1 0]) | |||
| input = Tensor([rank]) | |||
| # Rank 0 # input: Tensor([0]) | |||
| # Rank 1 # input: Tensor([1]) | |||
| output = gather(input) | |||
| # Rank 0 # output: Tensor([0 1]) | |||
| # Rank 1 # output: None | |||
| input = Tensor([rank]) | |||
| group = Group([1, 0]) # first rank is root | |||
| output = gather(input, group) | |||
| # Rank 0 # output: None | |||
| # Rank 1 # output: Tensor([1 0]) | |||
| """ | |||
| assert ( | |||
| axis < inp.ndim | |||
| @@ -607,8 +587,7 @@ class _Scatter(Function): | |||
| def scatter( | |||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = None, axis=0, | |||
| ) -> Tensor: | |||
| r""" | |||
| Split tensor in root process at first dimension. | |||
| r"""Split tensor in root process at first dimension. | |||
| Args: | |||
| inp: Input tensor. | |||
| @@ -627,21 +606,20 @@ def scatter( | |||
| Examples: | |||
| .. code-block:: | |||
| input = Tensor([0 1]) + rank*2 | |||
| # Rank 0 # input: Tensor([0 1]) | |||
| # Rank 1 # input: Tensor([2 3]) | |||
| output = scatter(input) | |||
| # Rank 0 # output: Tensor([0]) | |||
| # Rank 1 # output: Tensor([1]) | |||
| .. code-block:: | |||
| input = Tensor([0 1]) + rank*2 | |||
| group = Group([1, 0]) # first rank is root | |||
| output = scatter(input, group) | |||
| # Rank 0 # output: Tensor([3]) | |||
| # Rank 1 # output: Tensor([2]) | |||
| input = Tensor([0 1]) + rank*2 | |||
| # Rank 0 # input: Tensor([0 1]) | |||
| # Rank 1 # input: Tensor([2 3]) | |||
| output = scatter(input) | |||
| # Rank 0 # output: Tensor([0]) | |||
| # Rank 1 # output: Tensor([1]) | |||
| input = Tensor([0 1]) + rank*2 | |||
| group = Group([1, 0]) # first rank is root | |||
| output = scatter(input, group) | |||
| # Rank 0 # output: Tensor([3]) | |||
| # Rank 1 # output: Tensor([2]) | |||
| """ | |||
| shape, dtype = _bcast_shape_dtype(group, inp) | |||
| if group.rank != 0: | |||
| @@ -680,8 +658,7 @@ def all_to_all( | |||
| split_axis: int = 0, | |||
| concat_axis: int = 0, | |||
| ) -> Tensor: | |||
| r""" | |||
| Each process scatter input tensor to all processes and return gathered tensor. | |||
| r"""Each process scatter input tensor to all processes and return gathered tensor. | |||
| Args: | |||
| inp: Input tensor. | |||
| @@ -694,29 +671,26 @@ def all_to_all( | |||
| 1 is stream id, and default stream id is 0. | |||
| split_axis: The axis that collectivecomm will split data | |||
| the default axis is 0 | |||
| split_axis: The axis that collectivecomm will concat data | |||
| the default axis is 0 | |||
| Returns: | |||
| Result tensor. | |||
| Examples: | |||
| .. code-block:: | |||
| input = Tensor([0 1]) + rank*2 | |||
| # Rank 0 # input: Tensor([0 1]) | |||
| # Rank 1 # input: Tensor([2 3]) | |||
| output = all_to_all(input) | |||
| # Rank 0 # output: Tensor([0 2]) | |||
| # Rank 1 # output: Tensor([1 3]) | |||
| .. code-block:: | |||
| input = Tensor([0 1]) + rank*2 | |||
| group = Group([1, 0]) | |||
| output = all_to_all(input, group) | |||
| # Rank 0 # output: Tensor([0 3]) | |||
| # Rank 1 # output: Tensor([2 1]) | |||
| input = Tensor([0 1]) + rank*2 | |||
| # Rank 0 # input: Tensor([0 1]) | |||
| # Rank 1 # input: Tensor([2 3]) | |||
| output = all_to_all(input) | |||
| # Rank 0 # output: Tensor([0 2]) | |||
| # Rank 1 # output: Tensor([1 3]) | |||
| input = Tensor([0 1]) + rank*2 | |||
| group = Group([1, 0]) | |||
| output = all_to_all(input, group) | |||
| # Rank 0 # output: Tensor([0 3]) | |||
| # Rank 1 # output: Tensor([2 1]) | |||
| """ | |||
| group_size = group.size if group is not None else 1 | |||
| assert ( | |||
| @@ -805,8 +779,7 @@ class _RemoteRecv(Function): | |||
| def remote_send(inp: Tensor, dest_rank: int): | |||
| r""" | |||
| Send tensor to another process. | |||
| r"""Send tensor to another process. | |||
| Args: | |||
| inp: Tensor to send. | |||
| @@ -816,17 +789,15 @@ def remote_send(inp: Tensor, dest_rank: int): | |||
| None. | |||
| Examples: | |||
| .. code-block:: | |||
| if rank == 0: | |||
| data = mge.tensor(1) | |||
| # Tensor(1) | |||
| F.distributed.remote_send(data, 1) # return None | |||
| else: | |||
| data = F.distributed.remote_recv(0) | |||
| # Tensor(1) | |||
| .. code-block:: | |||
| if rank == 0: | |||
| data = mge.tensor(1) | |||
| # Tensor(1) | |||
| F.distributed.remote_send(data, 1) # return None | |||
| else: | |||
| data = F.distributed.remote_recv(0) | |||
| # Tensor(1) | |||
| """ | |||
| group = _SendRecvGroup(get_rank(), dest_rank) | |||
| _bcast_shape_dtype(group, inp) | |||
| @@ -844,8 +815,7 @@ def remote_send(inp: Tensor, dest_rank: int): | |||
| def remote_recv(src_rank: int, device: Optional[str] = None, inp=None) -> Tensor: | |||
| r""" | |||
| Receive a tensor from another process. | |||
| r"""Receive a tensor from another process. | |||
| Args: | |||
| src_rank: Rank of source process. | |||
| @@ -862,14 +832,13 @@ def remote_recv(src_rank: int, device: Optional[str] = None, inp=None) -> Tensor | |||
| .. code-block:: | |||
| if rank == 0: | |||
| data = mge.tensor(1) | |||
| # Tensor(1) | |||
| F.distributed.remote_send(data, 1) # return None | |||
| else: | |||
| data = F.distributed.remote_recv(0) | |||
| # Tensor(1) | |||
| if rank == 0: | |||
| data = mge.tensor(1) | |||
| # Tensor(1) | |||
| F.distributed.remote_send(data, 1) # return None | |||
| else: | |||
| data = F.distributed.remote_recv(0) | |||
| # Tensor(1) | |||
| """ | |||
| group = _SendRecvGroup(src_rank, get_rank()) | |||
| shape, dtype = _bcast_shape_dtype(group, None) | |||
| @@ -36,15 +36,13 @@ _sd = None | |||
| class Group: | |||
| r""" | |||
| Include ranked nodes running collective communication (See :mod:`~.functional.distributed`). | |||
| r"""Include ranked nodes running collective communication (See :mod:`~.functional.distributed`). | |||
| By default collectives operate on the default group (also called ``WORLD``) | |||
| and require all processes to enter the distributed function call. | |||
| By default collectives operate on the default group (also called ``WORLD``) | |||
| and require all processes to enter the distributed function call. | |||
| :param proc_ranks: rank list of the group, the first one is root rank. | |||
| Args: | |||
| proc_ranks: rank list of the group, the first one is root rank. | |||
| """ | |||
| def __init__(self, proc_ranks): | |||
| @@ -116,15 +114,15 @@ def init_process_group( | |||
| backend: Optional[str] = "auto", | |||
| device_type: str = "xpu", | |||
| ) -> None: | |||
| """ | |||
| Initialize the distributed process group and specify the device used in the current process | |||
| :param master_ip: ip address of the master node. | |||
| :param port: port available for all processes to communicate. | |||
| :param world_size: total number of processes participating in the job. | |||
| :param rank: rank of the current process. | |||
| :param device: the GPU device id to bind this process to. | |||
| :param backend: communicator backend, currently support 'nccl' and 'shm'. | |||
| r"""Initialize the distributed process group and specify the device used in the current process | |||
| Args: | |||
| master_ip: ip address of the master node. | |||
| port: port available for all processes to communicate. | |||
| world_size: total number of processes participating in the job. | |||
| rank: rank of the current process. | |||
| device: the GPU device id to bind this process to. | |||
| backend: communicator backend, currently support 'nccl' and 'shm'. | |||
| """ | |||
| physical_device_type = what_is_xpu() if device_type == "xpu" else device_type | |||
| if not isinstance(master_ip, str): | |||
| @@ -180,10 +178,10 @@ def _set_machine_ranks(ranks) -> None: | |||
| @contextmanager | |||
| def override_backend(new_backend: str): | |||
| """ | |||
| Override distributed backend | |||
| r"""Override distributed backend | |||
| :param new_backend: communicator backend set in this context. | |||
| Args: | |||
| new_backend: communicator backend set in this context. | |||
| """ | |||
| global _sd | |||
| assert _sd, "please call init_process_group first" | |||
| @@ -196,51 +194,51 @@ def override_backend(new_backend: str): | |||
| def is_distributed() -> bool: | |||
| """Return True if the distributed process group has been initialized.""" | |||
| r"""Return True if the distributed process group has been initialized.""" | |||
| return _sd is not None | |||
| def get_rank() -> int: | |||
| """Get the rank of the current process.""" | |||
| r"""Get the rank of the current process.""" | |||
| return _sd.proc_rank if _sd is not None else 0 | |||
| def get_world_size() -> int: | |||
| """Get the total number of processes participating in the job.""" | |||
| r"""Get the total number of processes participating in the job.""" | |||
| return _sd.world_size if _sd is not None else 1 | |||
| def get_backend() -> str: | |||
| """Get the backend str.""" | |||
| r"""Get the backend str.""" | |||
| assert _sd is not None, "please call init_process_group first" | |||
| return _sd.backend if _sd is not None else None | |||
| def get_py_server_addr() -> Tuple[str, int]: | |||
| """Get master_ip and port of python XML RPC server.""" | |||
| r"""Get master_ip and port of python XML RPC server.""" | |||
| assert _sd is not None, "please call init_process_group first" | |||
| return _sd.master_ip, _sd.py_server_port | |||
| def get_mm_server_addr() -> Tuple[str, int]: | |||
| """Get master_ip and port of C++ mm_server.""" | |||
| r"""Get master_ip and port of C++ mm_server.""" | |||
| assert _sd is not None, "please call init_process_group first" | |||
| return _sd.master_ip, _sd.mm_server_port | |||
| def get_client() -> Client: | |||
| """Get client of python XML RPC server.""" | |||
| r"""Get client of python XML RPC server.""" | |||
| assert _sd is not None, "please call init_process_group first" | |||
| return _sd.client | |||
| def new_group(proc_ranks: List[int]) -> Group: | |||
| """Build a subgroup containing certain ranks.""" | |||
| r"""Build a subgroup containing certain ranks.""" | |||
| return Group(proc_ranks) | |||
| def group_barrier(group: Group = WORLD) -> None: | |||
| """Block until all ranks in the group reach this barrier.""" | |||
| r"""Block until all ranks in the group reach this barrier.""" | |||
| # if running with single node, skip it | |||
| if _sd is None: | |||
| return | |||
| @@ -28,39 +28,40 @@ from .group import WORLD, Group, group_barrier, is_distributed, override_backend | |||
| def param_pack_split(inp: Tensor, offsets: list, shapes: list): | |||
| r""" | |||
| Returns split tensor to tensor list as offsets and shapes described, | |||
| only used for ``parampack``. | |||
| r"""Returns split tensor to tensor list as offsets and shapes described, | |||
| only used for ``parampack``. | |||
| :param inp: input tensor. | |||
| :param offsets: offsets of outputs, length of `2 * n`, | |||
| Args: | |||
| inp: input tensor. | |||
| offsets: offsets of outputs, length of `2 * n`, | |||
| while n is tensor nums you want to split, | |||
| format `[begin0, end0, begin1, end1]`. | |||
| :param shapes: tensor shapes of outputs. | |||
| :return: splitted tensors. | |||
| shapes: tensor shapes of outputs. | |||
| Examples: | |||
| Returns: | |||
| splitted tensors. | |||
| .. testcode:: | |||
| Examples: | |||
| import numpy as np | |||
| from megengine import tensor | |||
| from megengine.distributed.helper import param_pack_split | |||
| .. testcode:: | |||
| a = tensor(np.ones((10,), np.int32)) | |||
| b, c = param_pack_split(a, [0, 1, 1, 10], [(1,), (3, 3)]) | |||
| print(b.numpy()) | |||
| print(c.numpy()) | |||
| import numpy as np | |||
| from megengine import tensor | |||
| from megengine.distributed.helper import param_pack_split | |||
| Outputs: | |||
| a = tensor(np.ones((10,), np.int32)) | |||
| b, c = param_pack_split(a, [0, 1, 1, 10], [(1,), (3, 3)]) | |||
| print(b.numpy()) | |||
| print(c.numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| [1] | |||
| [[1 1 1] | |||
| [1 1 1] | |||
| [1 1 1]] | |||
| .. testoutput:: | |||
| [1] | |||
| [[1 1 1] | |||
| [1 1 1] | |||
| [1 1 1]] | |||
| """ | |||
| op = ParamPackSplit() | |||
| op.offsets = offsets | |||
| @@ -73,36 +74,37 @@ def param_pack_split(inp: Tensor, offsets: list, shapes: list): | |||
| def param_pack_concat(inps: list, offsets: Tensor, offsets_val: list): | |||
| r""" | |||
| Returns concated tensor, only used for ``parampack``. | |||
| r"""Returns concated tensor, only used for ``parampack``. | |||
| :param inps: input tensors. | |||
| :param offsets: device value of offsets. | |||
| :param offsets_val: offsets of inputs, length of `2 * n`, | |||
| Args: | |||
| inps: input tensors. | |||
| offsets: device value of offsets. | |||
| offsets_val: offsets of inputs, length of `2 * n`, | |||
| format `[begin0, end0, begin1, end1]`. | |||
| :return: concated tensor. | |||
| Examples: | |||
| Returns: | |||
| concated tensor. | |||
| .. testcode:: | |||
| Examples: | |||
| import numpy as np | |||
| from megengine import tensor | |||
| from megengine.distributed.helper import param_pack_concat | |||
| .. testcode:: | |||
| a = tensor(np.ones((1,), np.int32)) | |||
| b = tensor(np.ones((3, 3), np.int32)) | |||
| offsets_val = [0, 1, 1, 10] | |||
| offsets = tensor(offsets_val, np.int32) | |||
| c = param_pack_concat([a, b], offsets, offsets_val) | |||
| print(c.numpy()) | |||
| import numpy as np | |||
| from megengine import tensor | |||
| from megengine.distributed.helper import param_pack_concat | |||
| Outputs: | |||
| a = tensor(np.ones((1,), np.int32)) | |||
| b = tensor(np.ones((3, 3), np.int32)) | |||
| offsets_val = [0, 1, 1, 10] | |||
| offsets = tensor(offsets_val, np.int32) | |||
| c = param_pack_concat([a, b], offsets, offsets_val) | |||
| print(c.numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| [1 1 1 1 1 1 1 1 1 1] | |||
| .. testoutput:: | |||
| [1 1 1 1 1 1 1 1 1 1] | |||
| """ | |||
| op = ParamPackConcat() | |||
| op.offsets = offsets_val | |||
| @@ -165,9 +167,9 @@ class TensorFuture(Future): | |||
| def synchronized(func: Callable): | |||
| r"""Decorator. Decorated function will synchronize when finished. | |||
| Specifically, we use this to prevent data race during hub.load | |||
| """ | |||
| Decorator. Decorated function will synchronize when finished. | |||
| Specifically, we use this to prevent data race during hub.load""" | |||
| @functools.wraps(func) | |||
| def wrapper(*args, **kwargs): | |||
| @@ -199,23 +201,23 @@ get_device_count_by_fork = deprecated_func( | |||
| def bcast_list_(inps: list, group: Group = WORLD): | |||
| """ | |||
| Broadcast tensors between given group. | |||
| r"""Broadcast tensors between given group. | |||
| :param inps: input tensors. | |||
| :param group: communication group. | |||
| Args: | |||
| inps: input tensors. | |||
| group: communication group. | |||
| """ | |||
| for inp in inps: | |||
| inp._reset(_bcast_param(inp, group)) | |||
| class AllreduceCallback: | |||
| """ | |||
| Allreduce Callback with tensor fusion optimization. | |||
| r"""Allreduce Callback with tensor fusion optimization. | |||
| :param reduce_method: the method to reduce gradiants. | |||
| :param group: communication group. | |||
| :param backend: override distributed backend in allreduce | |||
| Args: | |||
| reduce_method: the method to reduce gradiants. | |||
| group: communication group. | |||
| backend: override distributed backend in allreduce | |||
| """ | |||
| def __init__(self, reduce_method: str, group: Group = WORLD, backend: str = None): | |||
| @@ -39,7 +39,7 @@ def _run_wrapped( | |||
| queue: mp.Queue, | |||
| machine_ranks: list, | |||
| ): | |||
| """Init distributed process group and run wrapped function.""" | |||
| r"""Init distributed process group and run wrapped function.""" | |||
| _check_device_initialized(device_type, dev) | |||
| init_process_group( | |||
| master_ip=master_ip, | |||
| @@ -64,15 +64,16 @@ def _run_wrapped( | |||
| class launcher: | |||
| """Decorator for launching multiple processes in single-machine multi-gpu training. | |||
| :param func: the function you want to launch in distributed mode. | |||
| :param n_gpus: how many devices each node. | |||
| :param world_size: how many devices totally. | |||
| :param rank_start: start number for rank. | |||
| :param master_ip: ip address for master node (where the rank 0 is). | |||
| :param port: server port for distributed server. | |||
| :param backend: set default collective communication backend. | |||
| r"""Decorator for launching multiple processes in single-machine multi-gpu training. | |||
| Args: | |||
| func: the function you want to launch in distributed mode. | |||
| n_gpus: how many devices each node. | |||
| world_size: how many devices totally. | |||
| rank_start: start number for rank. | |||
| master_ip: ip address for master node (where the rank 0 is). | |||
| port: server port for distributed server. | |||
| backend: set default collective communication backend. | |||
| """ | |||
| def __new__(cls, *args, **kwargs): | |||
| @@ -20,11 +20,11 @@ from ..utils.future import Future | |||
| class Methods: | |||
| """ | |||
| Distributed Server Method. | |||
| r"""Distributed Server Method. | |||
| Used for exchange information between distributed nodes. | |||
| :param mm_server_port: multiple machine rpc server port. | |||
| Args: | |||
| mm_server_port: multiple machine rpc server port. | |||
| """ | |||
| def __init__(self, mm_server_port): | |||
| @@ -39,19 +39,19 @@ class Methods: | |||
| self.bcast_dict = {} | |||
| def connect(self): | |||
| """Method for checking connection success.""" | |||
| r"""Method for checking connection success.""" | |||
| return True | |||
| def get_mm_server_port(self): | |||
| """Get multiple machine rpc server port.""" | |||
| r"""Get multiple machine rpc server port.""" | |||
| return self.mm_server_port | |||
| def set_is_grad(self, key, is_grad): | |||
| """ | |||
| Mark send/recv need gradiants by key. | |||
| r"""Mark send/recv need gradiants by key. | |||
| :param key: key to match send/recv op. | |||
| :param is_grad: whether this op need grad. | |||
| Args: | |||
| key: key to match send/recv op. | |||
| is_grad: whether this op need grad. | |||
| """ | |||
| with self.lock: | |||
| future = self.dict_is_grad[key] | |||
| @@ -59,10 +59,10 @@ class Methods: | |||
| return True | |||
| def check_is_grad(self, key): | |||
| """ | |||
| Check whether send/recv need gradiants. | |||
| r"""Check whether send/recv need gradiants. | |||
| :param key: key to match send/recv op. | |||
| Args: | |||
| key: key to match send/recv op. | |||
| """ | |||
| with self.lock: | |||
| future = self.dict_is_grad[key] | |||
| @@ -72,11 +72,11 @@ class Methods: | |||
| return ret | |||
| def set_remote_tracer(self, key, tracer_set): | |||
| """ | |||
| Set tracer dict for tracing send/recv op. | |||
| r"""Set tracer dict for tracing send/recv op. | |||
| :param key: key to match send/recv op. | |||
| :param tracer_set: valid tracer set. | |||
| Args: | |||
| key: key to match send/recv op. | |||
| tracer_set: valid tracer set. | |||
| """ | |||
| with self.lock: | |||
| future = self.dict_remote_tracer[key] | |||
| @@ -84,10 +84,10 @@ class Methods: | |||
| return True | |||
| def check_remote_tracer(self, key): | |||
| """ | |||
| Get tracer dict for send/recv op. | |||
| r"""Get tracer dict for send/recv op. | |||
| :param key: key to match send/recv op. | |||
| Args: | |||
| key: key to match send/recv op. | |||
| """ | |||
| with self.lock: | |||
| future = self.dict_remote_tracer[key] | |||
| @@ -97,11 +97,11 @@ class Methods: | |||
| return ret | |||
| def group_barrier(self, key, size): | |||
| """ | |||
| A barrier wait for all group member. | |||
| r"""A barrier wait for all group member. | |||
| :param key: group key to match each other. | |||
| :param size: group size. | |||
| Args: | |||
| key: group key to match each other. | |||
| size: group size. | |||
| """ | |||
| with self.lock: | |||
| self.dict_barrier_counter[key] += 1 | |||
| @@ -116,14 +116,14 @@ class Methods: | |||
| return True | |||
| def user_set(self, key, val): | |||
| """Set user defined key-value pairs across processes.""" | |||
| r"""Set user defined key-value pairs across processes.""" | |||
| with self.lock: | |||
| future = self.user_dict[key] | |||
| future.set(val) | |||
| return True | |||
| def user_get(self, key): | |||
| """Get user defined key-value pairs across processes.""" | |||
| r"""Get user defined key-value pairs across processes.""" | |||
| with self.lock: | |||
| future = self.user_dict[key] | |||
| return future.get() | |||
| @@ -161,12 +161,12 @@ class ThreadXMLRPCServer(ThreadingMixIn, SimpleXMLRPCServer): | |||
| def _start_server(py_server_port, queue): | |||
| """ | |||
| Start python distributed server and multiple machine server. | |||
| r"""Start python distributed server and multiple machine server. | |||
| :param py_server_port: python server port. | |||
| :param mm_server_port: multiple machine server port. | |||
| :param queue: server port will put in this queue, puts exception when process fails. | |||
| Args: | |||
| py_server_port: python server port. | |||
| mm_server_port: multiple machine server port. | |||
| queue: server port will put in this queue, puts exception when process fails. | |||
| """ | |||
| try: | |||
| mm_server_port = create_mm_server("0.0.0.0", 0) | |||
| @@ -182,11 +182,11 @@ def _start_server(py_server_port, queue): | |||
| class Server: | |||
| """ | |||
| Distributed Server for distributed training. | |||
| r"""Distributed Server for distributed training. | |||
| Should be running at master node. | |||
| :param port: python server port. | |||
| Args: | |||
| port: python server port. | |||
| """ | |||
| def __init__(self, port=0): | |||
| @@ -204,11 +204,11 @@ class Server: | |||
| class Client: | |||
| """ | |||
| Distributed Client for distributed training. | |||
| r"""Distributed Client for distributed training. | |||
| :param master_ip: ip address of master node. | |||
| :param port: port of server at master node. | |||
| Args: | |||
| master_ip: ip address of master node. | |||
| port: port of server at master node. | |||
| """ | |||
| def __init__(self, master_ip, port): | |||
| @@ -218,7 +218,7 @@ class Client: | |||
| self.bcast_dict = defaultdict(lambda: 0) | |||
| def connect(self): | |||
| """Check connection success.""" | |||
| r"""Check connection success.""" | |||
| while True: | |||
| try: | |||
| self.proxy = ServerProxy( | |||
| @@ -230,62 +230,62 @@ class Client: | |||
| time.sleep(1) | |||
| def get_mm_server_port(self): | |||
| """Get multiple machine server port.""" | |||
| r"""Get multiple machine server port.""" | |||
| return self.proxy.get_mm_server_port() | |||
| def set_is_grad(self, key, is_grad): | |||
| """ | |||
| Mark send/recv need gradiants by key. | |||
| r"""Mark send/recv need gradiants by key. | |||
| :param key: key to match send/recv op. | |||
| :param is_grad: whether this op need grad. | |||
| Args: | |||
| key: key to match send/recv op. | |||
| is_grad: whether this op need grad. | |||
| """ | |||
| self.proxy.set_is_grad(key, is_grad) | |||
| def check_is_grad(self, key): | |||
| """ | |||
| Check whether send/recv need gradiants. | |||
| r"""Check whether send/recv need gradiants. | |||
| :param key: key to match send/recv op. | |||
| Args: | |||
| key: key to match send/recv op. | |||
| """ | |||
| return self.proxy.check_is_grad(key) | |||
| def set_remote_tracer(self, key, tracer_set): | |||
| """ | |||
| Set tracer dict for tracing send/recv op. | |||
| r"""Set tracer dict for tracing send/recv op. | |||
| :param key: key to match send/recv op. | |||
| :param tracer_set: valid tracer set. | |||
| Args: | |||
| key: key to match send/recv op. | |||
| tracer_set: valid tracer set. | |||
| """ | |||
| self.proxy.set_remote_tracer(key, tracer_set) | |||
| def check_remote_tracer(self, key): | |||
| """ | |||
| Get tracer dict for send/recv op. | |||
| r"""Get tracer dict for send/recv op. | |||
| :param key: key to match send/recv op. | |||
| Args: | |||
| key: key to match send/recv op. | |||
| """ | |||
| return self.proxy.check_remote_tracer(key) | |||
| def group_barrier(self, key, size): | |||
| """ | |||
| A barrier wait for all group member. | |||
| r"""A barrier wait for all group member. | |||
| :param key: group key to match each other. | |||
| :param size: group size. | |||
| Args: | |||
| key: group key to match each other. | |||
| size: group size. | |||
| """ | |||
| self.proxy.group_barrier(key, size) | |||
| def user_set(self, key, val): | |||
| """Set user defined key-value pairs across processes.""" | |||
| r"""Set user defined key-value pairs across processes.""" | |||
| return self.proxy.user_set(key, val) | |||
| def user_get(self, key): | |||
| """Get user defined key-value pairs across processes.""" | |||
| r"""Get user defined key-value pairs across processes.""" | |||
| return self.proxy.user_get(key) | |||
| def user_pop(self, key): | |||
| """Get user defined key-value pairs and delete the resources when the get is done""" | |||
| r"""Get user defined key-value pairs and delete the resources when the get is done""" | |||
| return self.proxy.user_pop(key) | |||
| def bcast_val(self, val, key, size): | |||
| @@ -30,24 +30,20 @@ def _str2bytes(text: str) -> int: | |||
| @property | |||
| def eviction_threshold(mod): | |||
| r""" | |||
| Get or set the eviction threshold in bytes. It can also be set to a string, | |||
| r"""Get or set the eviction threshold in bytes. It can also be set to a string, | |||
| whose formatting supports byte(B), kilobyte(KB), megabyte(MB) and | |||
| gigabyte(GB) units. | |||
| .. note:: | |||
| Note: | |||
| When GPU memory usage exceeds this value, DTR will heuristically select | |||
| and evict resident tensors until the amount of used memory falls below | |||
| this threshold. | |||
| Examples: | |||
| .. code-block:: | |||
| .. code-block:: | |||
| import megengine as mge | |||
| mge.dtr.eviction_threshold = "2GB" | |||
| import megengine as mge | |||
| mge.dtr.eviction_threshold = "2GB" | |||
| """ | |||
| return _eviction_threshold | |||
| @@ -66,24 +62,21 @@ def eviction_threshold(mod, value: Union[int, str]): | |||
| @property | |||
| def evictee_minimum_size(mod): | |||
| r""" | |||
| Get or set the memory threshold of tensors in bytes. It can also be set to a | |||
| r"""Get or set the memory threshold of tensors in bytes. It can also be set to a | |||
| string, whose formatting supports byte(B), kilobyte(KB), megabyte(MB) and | |||
| gigabyte(GB) units. | |||
| .. note:: | |||
| Note: | |||
| Only tensors whose size exceeds this threshold will be added to the | |||
| candidate set. A tensor that is not added to the candidate set will | |||
| never be evicted during its lifetime. | |||
| Examples: | |||
| .. code-block:: | |||
| .. code-block:: | |||
| import megengine as mge | |||
| mge.dtr.evictee_minimum_size = "2MB" | |||
| import megengine as mge | |||
| mge.dtr.evictee_minimum_size = "2MB" | |||
| """ | |||
| return _evictee_minimum_size | |||
| @@ -102,19 +95,16 @@ def evictee_minimum_size(mod, value: Union[int, str]): | |||
| @property | |||
| def enable_sqrt_sampling(mod): | |||
| r""" | |||
| Get or set whether sqrt sampling is allowed. Sqrt sampling means that given | |||
| r"""Get or set whether sqrt sampling is allowed. Sqrt sampling means that given | |||
| the size of the candidate set is N, only enumerate sqrt(N) tensors. When | |||
| the number of tensors is very high, enabling this optimization will speed | |||
| up the training. | |||
| Examples: | |||
| .. code-block:: | |||
| Examples: | |||
| .. code-block:: | |||
| import megengine as mge | |||
| mge.dtr.enable_sqrt_sampling = True | |||
| import megengine as mge | |||
| mge.dtr.enable_sqrt_sampling = True | |||
| """ | |||
| return _enable_sqrt_sampling | |||
| @@ -127,9 +117,7 @@ def enable_sqrt_sampling(mod, value: bool): | |||
| def enable(): | |||
| r""" | |||
| Enable to record computing path of tensors and to perform DTR policy. | |||
| """ | |||
| r"""Enable to record computing path of tensors and to perform DTR policy.""" | |||
| _set_defrag(True) | |||
| _set_option("enable_dtr_auto_drop", 1) | |||
| _set_option("enable_drop", 1) | |||
| @@ -138,9 +126,7 @@ def enable(): | |||
| def disable(): | |||
| r""" | |||
| Stop recording computing path of tensors and performing DTR policy. | |||
| """ | |||
| r"""Stop recording computing path of tensors and performing DTR policy.""" | |||
| _set_defrag(False) | |||
| _set_option("enable_dtr_auto_drop", 0) | |||
| _set_option("enable_drop", 0) | |||
| @@ -23,8 +23,7 @@ if os.getenv("MEGENGINE_CONV_EXECUTION_STRATEGY") != None: | |||
| def get_execution_strategy() -> Strategy: | |||
| """ | |||
| Returns the execution strategy of :class:`~module..Conv2d` and :func:`~.matmul` | |||
| r"""Returns the execution strategy of :class:`~module..Conv2d` and :func:`~.matmul` | |||
| See :func:`~.set_execution_strategy` for possible return values | |||
| """ | |||
| @@ -32,31 +31,32 @@ def get_execution_strategy() -> Strategy: | |||
| def set_execution_strategy(option): | |||
| """ | |||
| Sets the execution strategy of :class:`~module.Conv2d` and :func:`~.matmul` | |||
| r"""Sets the execution strategy of :class:`~module.Conv2d` and :func:`~.matmul` | |||
| Args: | |||
| option: Decides how :class:`~.module.Conv2d`and :func:`~.matmul` algorithms are chosen. | |||
| Available value Strategy | |||
| :param option: Decides how :class:`~module.Conv2d`and :func:`~.matmul` algorithms are chosen. | |||
| Available value Strategy | |||
| * HEURISTIC uses heuristic to choose the fastest algorithm. | |||
| * PROFILE runs possible algorithms on real device to find the best one. | |||
| * REPRODUCIBLE uses the algorithms that is reproducible. | |||
| * OPTIMIZED uses the algorithms that is optimized. | |||
| * HEURISTIC uses heuristic to choose the fastest algorithm. | |||
| * PROFILE runs possible algorithms on real device to find the best one. | |||
| * REPRODUCIBLE uses the algorithms that is reproducible. | |||
| * OPTIMIZED uses the algorithms that is optimized. | |||
| The default strategy is HEURISTIC, this options can be combined to | |||
| form a combination option, e.g. PROFILE | REPRODUCIBLE | |||
| can combined a option that uses the fastest of profiling result that is also reproducible. | |||
| The default strategy is HEURISTIC, this options can be combined to | |||
| form a combination option, e.g. PROFILE | REPRODUCIBLE | |||
| can combined a option that uses the fastest of profiling result that is also reproducible. | |||
| Available values string: | |||
| Available values string: | |||
| * 'HEURISTIC' uses heuristic to choose the fastest algorithm. | |||
| * 'PROFILE' runs possible algorithms on real device to find the best one. | |||
| * 'PROFILE_HEURISTIC' uses profiling result and heuristic to choose the fastest algorithm. | |||
| * 'PROFILE_REPRODUCIBLE' uses the fastest of profiling result that is also reproducible. | |||
| * 'HEURISTIC_REPRODUCIBLE' uses heuristic to choose the fastest algorithm that is also reproducible. | |||
| * 'HEURISTIC' uses heuristic to choose the fastest algorithm. | |||
| * 'PROFILE' runs possible algorithms on real device to find the best one. | |||
| * 'PROFILE_HEURISTIC' uses profiling result and heuristic to choose the fastest algorithm. | |||
| * 'PROFILE_REPRODUCIBLE' uses the fastest of profiling result that is also reproducible. | |||
| * 'HEURISTIC_REPRODUCIBLE' uses heuristic to choose the fastest algorithm that is also reproducible. | |||
| The default strategy is 'HEURISTIC'. | |||
| The default strategy is 'HEURISTIC'. | |||
| It can also be set through the environment variable 'MEGENGINE_EXECUTION_STRATEGY'. | |||
| It can also be set through the environment variable 'MEGENGINE_EXECUTION_STRATEGY'. | |||
| """ | |||
| valid_string_option = { | |||
| "REPRODUCIBLE": Strategy.REPRODUCIBLE, | |||
| @@ -78,182 +78,163 @@ def _elemwise_multi_type(*args, mode, **kwargs): | |||
| def add(x, y): | |||
| """ | |||
| Element-wise `addition`. | |||
| At least one operand should be tensor. | |||
| Same for sub/mul/div/floor_div/pow/mod/atan2/equal/not_equal/less/less_equal/greater/greater_equal/maximum/minmium. | |||
| :param x: input tensor. | |||
| :return: computed tensor. | |||
| r"""Element-wise `addition`. | |||
| Examples: | |||
| .. testcode:: | |||
| .. testcode:: | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
| y = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
| out = F.add(x, y) | |||
| print(out.numpy()) | |||
| x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
| y = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
| out = F.add(x, y) | |||
| print(out.numpy()) | |||
| Outputs: | |||
| Outputs: | |||
| .. testoutput:: | |||
| [[ 0. 2. 4.] | |||
| [ 6. 8. 10.]] | |||
| .. testoutput:: | |||
| [[ 0. 2. 4.] | |||
| [ 6. 8. 10.]] | |||
| """ | |||
| return _elwise(x, y, mode=Elemwise.Mode.ADD) | |||
| def sub(x, y): | |||
| """Element-wise `subtraction`.""" | |||
| r"""Element-wise `subtraction`.""" | |||
| return _elwise(x, y, mode=Elemwise.Mode.SUB) | |||
| def mul(x, y): | |||
| """Element-wise `multiplication`.""" | |||
| r"""Element-wise `multiplication`.""" | |||
| return _elwise(x, y, mode=Elemwise.Mode.MUL) | |||
| def div(x, y): | |||
| """Element-wise `(x / y)`.""" | |||
| r"""Element-wise `(x / y)`.""" | |||
| return _elwise(x, y, mode=Elemwise.Mode.TRUE_DIV) | |||
| def floor_div(x, y): | |||
| """Element-wise `floor(x / y)`.""" | |||
| r"""Element-wise `floor(x / y)`.""" | |||
| return _elwise(x, y, mode=Elemwise.Mode.FLOOR_DIV) | |||
| def neg(x): | |||
| """Element-wise `negation`.""" | |||
| r"""Element-wise `negation`.""" | |||
| return _elwise(x, mode=Elemwise.Mode.NEGATE) | |||
| def pow(x, y): | |||
| """Element-wise `power`.""" | |||
| r"""Element-wise `power`.""" | |||
| return _elwise(x, y, mode=Elemwise.Mode.POW) | |||
| def mod(x, y): | |||
| """Element-wise `remainder of division`.""" | |||
| r"""Element-wise `remainder of division`.""" | |||
| return _elwise(x, y, mode=Elemwise.Mode.MOD) | |||
| def abs(x): | |||
| """Element-wise `absolute value`.""" | |||
| r"""Element-wise `absolute value`.""" | |||
| return _elwise(x, mode=Elemwise.Mode.ABS) | |||
| def exp(x): | |||
| """Element-wise `exponential`.""" | |||
| r"""Element-wise `exponential`.""" | |||
| return _elwise(x, mode=Elemwise.Mode.EXP) | |||
| def expm1(x): | |||
| """Element-wise `exp(x)-1`.""" | |||
| r"""Element-wise `exp(x)-1`.""" | |||
| return _elwise(x, mode=Elemwise.Mode.EXPM1) | |||
| def log(x): | |||
| """Element-wise `logarithm (base e)`.""" | |||
| r"""Element-wise `logarithm (base e)`.""" | |||
| return _elwise(x, mode=Elemwise.Mode.LOG) | |||
| def log1p(x): | |||
| """Element-wise `log(x+1) (base e)`.""" | |||
| r"""Element-wise `log(x+1) (base e)`.""" | |||
| return _elwise(x, mode=Elemwise.Mode.LOG1P) | |||
| def sqrt(x: Tensor) -> Tensor: | |||
| """ | |||
| Element-wise `sqrt`. | |||
| Returns ``NaN`` for negative input value. | |||
| :param x: input tensor. | |||
| :return: computed tensor. | |||
| r"""Element-wise `sqrt`. | |||
| Examples: | |||
| .. testcode:: | |||
| .. testcode:: | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
| out = F.sqrt(x) | |||
| print(out.numpy().round(decimals=4)) | |||
| x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
| out = F.sqrt(x) | |||
| print(out.numpy().round(decimals=4)) | |||
| Outputs: | |||
| Outputs: | |||
| .. testoutput:: | |||
| [[0. 1. 1.4142] | |||
| [1.7321 2. 2.2361]] | |||
| .. testoutput:: | |||
| [[0. 1. 1.4142] | |||
| [1.7321 2. 2.2361]] | |||
| """ | |||
| return x ** 0.5 | |||
| def square(x: Tensor) -> Tensor: | |||
| """ | |||
| Returns a new tensor with the square of the elements of input tensor. | |||
| :param inp: input tensor. | |||
| :return: computed tensor. | |||
| r"""Element-wise `square`. | |||
| Examples: | |||
| .. testcode:: | |||
| .. testcode:: | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.functional as F | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.functional as F | |||
| data = mge.tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
| out = F.square(data) | |||
| print(out.numpy().round(decimals=4)) | |||
| data = mge.tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
| out = F.square(data) | |||
| print(out.numpy().round(decimals=4)) | |||
| Outputs: | |||
| Outputs: | |||
| .. testoutput:: | |||
| [[ 0. 1. 4.] | |||
| [ 9. 16. 25.]] | |||
| .. testoutput:: | |||
| [[ 0. 1. 4.] | |||
| [ 9. 16. 25.]] | |||
| """ | |||
| return x ** 2 | |||
| def round(x): | |||
| """Element-wise `rounding to int`.""" | |||
| r"""Element-wise `rounding to int`.""" | |||
| return _elwise(x, mode=Elemwise.Mode.ROUND) | |||
| def ceil(x): | |||
| """Element-wise `ceiling`.""" | |||
| r"""Element-wise `ceiling`.""" | |||
| return _elwise(x, mode=Elemwise.Mode.CEIL) | |||
| def floor(x): | |||
| """Element-wise `floor`.""" | |||
| r"""Element-wise `floor`.""" | |||
| return _elwise(x, mode=Elemwise.Mode.FLOOR) | |||
| def maximum(x, y): | |||
| """Element-wise `maximum of array elements`.""" | |||
| r"""Element-wise `maximum of array elements`.""" | |||
| return _elwise(x, y, mode=Elemwise.Mode.MAX) | |||
| def minimum(x, y): | |||
| """Element-wise `minimum of array elements`.""" | |||
| r"""Element-wise `minimum of array elements`.""" | |||
| return _elwise(x, y, mode=Elemwise.Mode.MIN) | |||
| @@ -261,62 +242,57 @@ def minimum(x, y): | |||
| def cos(x): | |||
| """ | |||
| Element-wise `cosine`. | |||
| :param x: input tensor. | |||
| :return: computed tensor. | |||
| r"""Element-wise `cosine`. | |||
| Examples: | |||
| .. testcode:: | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| .. testcode:: | |||
| x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
| out = F.cos(x) | |||
| print(out.numpy().round(decimals=4)) | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| Outputs: | |||
| x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
| out = F.cos(x) | |||
| print(out.numpy().round(decimals=4)) | |||
| .. testoutput:: | |||
| Outputs: | |||
| [[ 1. 0.5403 -0.4161] | |||
| [-0.99 -0.6536 0.2837]] | |||
| .. testoutput:: | |||
| [[ 1. 0.5403 -0.4161] | |||
| [-0.99 -0.6536 0.2837]] | |||
| """ | |||
| return _elwise(x, mode=Elemwise.Mode.COS) | |||
| def sin(x): | |||
| """Element-wise `sine`.""" | |||
| r"""Element-wise `sine`.""" | |||
| return _elwise(x, mode=Elemwise.Mode.SIN) | |||
| def tan(x): | |||
| """Element-wise `tangent`.""" | |||
| r"""Element-wise `tangent`.""" | |||
| return sin(x) / cos(x) | |||
| def acos(x): | |||
| """Element-wise `inverse cosine`.""" | |||
| r"""Element-wise `inverse cosine`.""" | |||
| return _elwise(x, mode=Elemwise.Mode.ACOS) | |||
| def asin(x): | |||
| """Element-wise `inverse sine`.""" | |||
| r"""Element-wise `inverse sine`.""" | |||
| return _elwise(x, mode=Elemwise.Mode.ASIN) | |||
| def atan(x): | |||
| """Element-wise `inverse tangent`.""" | |||
| r"""Element-wise `inverse tangent`.""" | |||
| return _elwise(x, 1, mode=Elemwise.Mode.ATAN2) | |||
| def atan2(y, x): | |||
| """Element-wise `2-argument arctangent`.""" | |||
| r"""Element-wise `2-argument arctangent`.""" | |||
| return _elwise(y, x, mode=Elemwise.Mode.ATAN2) | |||
| @@ -355,38 +331,33 @@ def atanh(x): | |||
| def left_shift(x, y): | |||
| """ | |||
| Element-wise `bitwise binary: x << y`. | |||
| r"""Element-wise `bitwise binary: x << y`. | |||
| :param x: input tensor, should be int. | |||
| :param y: how many bits to be left-shifted. | |||
| :return: computed tensor. | |||
| Examples: | |||
| Examples: | |||
| .. testcode:: | |||
| .. testcode:: | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| x = tensor(np.arange(0, 6, dtype=np.int32).reshape(2, 3)) | |||
| out = F.left_shift(x, 2) | |||
| print(out.numpy()) | |||
| x = tensor(np.arange(0, 6, dtype=np.int32).reshape(2, 3)) | |||
| out = F.left_shift(x, 2) | |||
| print(out.numpy()) | |||
| Outputs: | |||
| Outputs: | |||
| .. testoutput:: | |||
| .. testoutput:: | |||
| [[ 0 4 8] | |||
| [12 16 20]] | |||
| [[ 0 4 8] | |||
| [12 16 20]] | |||
| """ | |||
| return _elwise(x, y, mode=Elemwise.Mode.SHL) | |||
| def right_shift(x, y): | |||
| """Element-wise `bitwise binary: x >> y`.""" | |||
| r"""Element-wise `bitwise binary: x >> y`.""" | |||
| return _elwise(x, y, mode=Elemwise.Mode.SHR) | |||
| @@ -394,22 +365,22 @@ def right_shift(x, y): | |||
| def logical_and(x, y): | |||
| """Element-wise `logical and: x && y`.""" | |||
| r"""Element-wise `logical and: x && y`.""" | |||
| return _elwise(x, y, mode=Elemwise.Mode.AND) | |||
| def logical_not(x): | |||
| """Element-wise `logical not: ~x`.""" | |||
| r"""Element-wise `logical not: ~x`.""" | |||
| return _elwise(x, mode=Elemwise.Mode.NOT) | |||
| def logical_or(x, y): | |||
| """Element-wise `logical or: x || y`.""" | |||
| r"""Element-wise `logical or: x || y`.""" | |||
| return _elwise(x, y, mode=Elemwise.Mode.OR) | |||
| def logical_xor(x, y): | |||
| """Element-wise `logical xor: x ^ y`.""" | |||
| r"""Element-wise `logical xor: x ^ y`.""" | |||
| return _elwise(x, y, mode=Elemwise.Mode.XOR) | |||
| @@ -417,59 +388,53 @@ def logical_xor(x, y): | |||
| def equal(x, y): | |||
| """ | |||
| Element-wise `(x == y)`. | |||
| :param x: input tensor 1. | |||
| :param y: input tensor 2. | |||
| :return: computed tensor. | |||
| r"""Element-wise `(x == y)`. | |||
| Examples: | |||
| .. testcode:: | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| .. testcode:: | |||
| x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
| y = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
| out = F.equal(x, y) | |||
| print(out.numpy()) | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| Outputs: | |||
| x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
| y = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
| out = F.equal(x, y) | |||
| print(out.numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| [[1. 1. 1.] | |||
| [1. 1. 1.]] | |||
| .. testoutput:: | |||
| [[1. 1. 1.] | |||
| [1. 1. 1.]] | |||
| """ | |||
| return _elwise(x, y, mode=Elemwise.Mode.EQ) | |||
| def not_equal(x, y): | |||
| """Element-wise `(x != y)`.""" | |||
| r"""Element-wise `(x != y)`.""" | |||
| return x != y | |||
| def less(x, y): | |||
| """Element-wise `(x < y)`.""" | |||
| r"""Element-wise `(x < y)`.""" | |||
| return _elwise(x, y, mode=Elemwise.Mode.LT) | |||
| def less_equal(x, y): | |||
| """Element-wise `(x <= y)`.""" | |||
| r"""Element-wise `(x <= y)`.""" | |||
| return _elwise(x, y, mode=Elemwise.Mode.LEQ) | |||
| def greater(x, y): | |||
| """Element-wise `(x > y)`.""" | |||
| r"""Element-wise `(x > y)`.""" | |||
| return _elwise(y, x, mode=Elemwise.Mode.LT) | |||
| def greater_equal(x, y): | |||
| """Element-wise `(x >= y)`.""" | |||
| r"""Element-wise `(x >= y)`.""" | |||
| return _elwise(y, x, mode=Elemwise.Mode.LEQ) | |||
| @@ -477,43 +442,45 @@ def greater_equal(x, y): | |||
| def clip(x: Tensor, lower=None, upper=None) -> Tensor: | |||
| r""" | |||
| Clamps all elements in input tensor into the range `[` :attr:`lower`, :attr:`upper` `]` and returns | |||
| r"""Clamps all elements in input tensor into the range ``[ lower, upper ]`` and returns | |||
| a resulting tensor: | |||
| .. math:: | |||
| y_i = \begin{cases} | |||
| \text{lower} & \text{if } x_i < \text{lower} \\ | |||
| x_i & \text{if } \text{lower} \leq x_i \leq \text{upper} \\ | |||
| \text{upper} & \text{if } x_i > \text{upper} | |||
| \end{cases} | |||
| :param x: input tensor. | |||
| :param lower: lower-bound of the range to be clamped to. | |||
| :param upper: upper-bound of the range to be clamped to. | |||
| :return: output clamped tensor. | |||
| Args: | |||
| x: input tensor. | |||
| lower: lower-bound of the range to be clamped to. | |||
| upper: upper-bound of the range to be clamped to. | |||
| Examples: | |||
| Returns: | |||
| output clamped tensor. | |||
| .. testcode:: | |||
| Examples: | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| .. testcode:: | |||
| a = tensor(np.arange(5).astype(np.int32)) | |||
| print(F.clip(a, 2, 4).numpy()) | |||
| print(F.clip(a, lower=3).numpy()) | |||
| print(F.clip(a, upper=3).numpy()) | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| Outputs: | |||
| a = tensor(np.arange(5).astype(np.int32)) | |||
| print(F.clip(a, 2, 4).numpy()) | |||
| print(F.clip(a, lower=3).numpy()) | |||
| print(F.clip(a, upper=3).numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| [2 2 2 3 4] | |||
| [3 3 3 3 4] | |||
| [0 1 2 3 3] | |||
| .. testoutput:: | |||
| [2 2 2 3 4] | |||
| [3 3 3 3 4] | |||
| [0 1 2 3 3] | |||
| """ | |||
| assert ( | |||
| lower is not None or upper is not None | |||
| @@ -23,14 +23,14 @@ def tensorrt_runtime_opr(inputs, *, data: bytes = None): | |||
| def cambricon_runtime_opr(inputs, data, symbol, tensor_dim_mutable): | |||
| r""" | |||
| Load a serialized Cambricon model as a runtime operator in MegEngine. | |||
| :param inputs: list of input tensors. | |||
| :param data: the serialized Cambricon model. | |||
| :param symbol: name of the function in Cambricon model. | |||
| :param tensor_dim_mutable: whether the input tensors' shapes are mutable | |||
| in ``cnrtModel_t``. | |||
| r"""Load a serialized Cambricon model as a runtime operator in MegEngine. | |||
| Args: | |||
| inputs: list of input tensors. | |||
| data: the serialized Cambricon model. | |||
| symbol: name of the function in Cambricon model. | |||
| tensor_dim_mutable: whether the input tensors' shapes are mutable | |||
| in ``cnrtModel_t``. | |||
| """ | |||
| op = builtin.CambriconRuntime(data, len(data), symbol, tensor_dim_mutable) | |||
| @@ -38,11 +38,11 @@ def cambricon_runtime_opr(inputs, data, symbol, tensor_dim_mutable): | |||
| def atlas_runtime_opr(inputs, data): | |||
| r""" | |||
| Load a serialized Atlas model as a runtime operator in MegEngine. | |||
| r"""Load a serialized Atlas model as a runtime operator in MegEngine. | |||
| :param inputs: list of input tensors. | |||
| :param data: the serialized Atlas model. | |||
| Args: | |||
| inputs: list of input tensors. | |||
| data: the serialized Atlas model. | |||
| """ | |||
| op = builtin.AtlasRuntime(data, len(data)) | |||
| @@ -26,9 +26,7 @@ __all__ = [ | |||
| def _reduce_output(loss_fn): | |||
| r""" | |||
| Wrapper to apply canonical reductions to loss outputs. | |||
| """ | |||
| r"""Wrapper to apply canonical reductions to loss outputs.""" | |||
| @functools.wraps(loss_fn) | |||
| def reduced_loss_fn(*args, reduction="mean", **kwargs): | |||
| @@ -45,13 +43,14 @@ def _reduce_output(loss_fn): | |||
| @_reduce_output | |||
| def l1_loss(pred: Tensor, label: Tensor, reduction: str = "mean") -> Tensor: | |||
| r""" | |||
| Calculates the mean absolute error (MAE) between | |||
| r"""Calculates the mean absolute error (MAE) between | |||
| each element in the pred :math:`x` and label :math:`y`. | |||
| The mean absolute error can be described as: | |||
| .. math:: \ell(x,y) = mean\left(L \right) | |||
| .. math:: | |||
| \ell(x,y) = mean\left(L \right) | |||
| where | |||
| @@ -63,30 +62,32 @@ def l1_loss(pred: Tensor, label: Tensor, reduction: str = "mean") -> Tensor: | |||
| :math:`x` and :math:`y` are tensors of arbitrary shapes with a total | |||
| of :math:`N` elements each. :math:`N` is the batch size. | |||
| :param pred: predicted result from model. | |||
| :param label: ground truth to compare. | |||
| :param reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Default: 'mean' | |||
| :return: loss value. | |||
| Args: | |||
| pred: predicted result from model. | |||
| label: ground truth to compare. | |||
| reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Default: 'mean' | |||
| Examples: | |||
| Returns: | |||
| loss value. | |||
| .. testcode:: | |||
| Examples: | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.functional as F | |||
| .. testcode:: | |||
| ipt = mge.tensor(np.array([3, 3, 3, 3]).astype(np.float32)) | |||
| tgt = mge.tensor(np.array([2, 8, 6, 1]).astype(np.float32)) | |||
| loss = F.nn.l1_loss(ipt, tgt) | |||
| print(loss.numpy()) | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.functional as F | |||
| Outputs: | |||
| ipt = mge.tensor(np.array([3, 3, 3, 3]).astype(np.float32)) | |||
| tgt = mge.tensor(np.array([2, 8, 6, 1]).astype(np.float32)) | |||
| loss = F.nn.l1_loss(ipt, tgt) | |||
| print(loss.numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| 2.75 | |||
| .. testoutput:: | |||
| 2.75 | |||
| """ | |||
| diff = pred - label | |||
| return abs(diff) | |||
| @@ -94,53 +95,56 @@ def l1_loss(pred: Tensor, label: Tensor, reduction: str = "mean") -> Tensor: | |||
| @_reduce_output | |||
| def square_loss(pred: Tensor, label: Tensor, reduction: str = "mean") -> Tensor: | |||
| r""" | |||
| Calculates the mean squared error (squared L2 norm) between | |||
| r"""Calculates the mean squared error (squared L2 norm) between | |||
| each element in the pred :math:`x` and label :math:`y`. | |||
| The mean squared error can be described as: | |||
| .. math:: \ell(x, y) = mean\left( L \right) | |||
| .. math:: | |||
| \ell(x, y) = mean\left( L \right) | |||
| where | |||
| .. math:: | |||
| L = \{l_1,\dots,l_N\}, \quad | |||
| l_n = \left( x_n - y_n \right)^2, | |||
| L = \{l_1,\dots,l_N\}, \quad | |||
| l_n = \left( x_n - y_n \right)^2, | |||
| :math:`x` and :math:`y` are tensors of arbitrary shapes with a total | |||
| of :math:`N` elements each. :math:`N` is the batch size. | |||
| :param pred: predicted result from model. | |||
| :param label: ground truth to compare. | |||
| :param reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Default: 'mean' | |||
| :return: loss value. | |||
| Args: | |||
| pred: predicted result from model. | |||
| label: ground truth to compare. | |||
| reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Default: 'mean' | |||
| Returns: | |||
| loss value. | |||
| Shape: | |||
| - pred: :math:`(N, *)` where :math:`*` means any number of additional | |||
| dimensions. | |||
| - label: :math:`(N, *)`. Same shape as ``pred``. | |||
| * pred: :math:`(N, *)` where :math:`*` means any number of additional | |||
| dimensions. | |||
| * label: :math:`(N, *)`. Same shape as ``pred``. | |||
| Examples: | |||
| .. testcode:: | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.functional as F | |||
| .. testcode:: | |||
| ipt = mge.tensor(np.array([3, 3, 3, 3]).astype(np.float32)) | |||
| tgt = mge.tensor(np.array([2, 8, 6, 1]).astype(np.float32)) | |||
| loss = F.nn.square_loss(ipt, tgt) | |||
| print(loss.numpy()) | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.functional as F | |||
| Outputs: | |||
| ipt = mge.tensor(np.array([3, 3, 3, 3]).astype(np.float32)) | |||
| tgt = mge.tensor(np.array([2, 8, 6, 1]).astype(np.float32)) | |||
| loss = F.nn.square_loss(ipt, tgt) | |||
| print(loss.numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| 9.75 | |||
| .. testoutput:: | |||
| 9.75 | |||
| """ | |||
| diff = pred - label | |||
| return diff ** 2 | |||
| @@ -155,8 +159,7 @@ def cross_entropy( | |||
| label_smooth: float = 0, | |||
| reduction: str = "mean", | |||
| ) -> Tensor: | |||
| r""" | |||
| Computes the multi-class cross entropy loss (using logits by default). | |||
| r"""Computes the multi-class cross entropy loss (using logits by default). | |||
| By default(``with_logitis`` is True), ``pred`` is assumed to be logits, | |||
| class probabilities are given by softmax. | |||
| @@ -170,35 +173,37 @@ def cross_entropy( | |||
| where :math:`y^{LS}` and :math:`y` are new label distribution and origin label distribution respectively. | |||
| k is the index of label distribution. :math:`\alpha` is ``label_smooth`` and :math:`K` is the number of classes. | |||
| :param pred: input tensor representing the predicted probability. | |||
| :param label: input tensor representing the classification label. | |||
| :param axis: an axis along which softmax will be applied. Default: 1 | |||
| :param with_logits: whether to apply softmax first. Default: True | |||
| :param label_smooth: a label smoothing of parameter that can re-distribute target distribution. Default: 0 | |||
| :param reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Default: 'mean' | |||
| :return: loss value. | |||
| Args: | |||
| pred: input tensor representing the predicted probability. | |||
| label: input tensor representing the classification label. | |||
| axis: an axis along which softmax will be applied. Default: 1 | |||
| with_logits: whether to apply softmax first. Default: True | |||
| label_smooth: a label smoothing of parameter that can re-distribute target distribution. Default: 0 | |||
| reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Default: 'mean' | |||
| Examples: | |||
| Returns: | |||
| loss value. | |||
| .. testcode:: | |||
| Examples: | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| .. testcode:: | |||
| data_shape = (1, 2) | |||
| label_shape = (1, ) | |||
| pred = tensor(np.array([0, 0], dtype=np.float32).reshape(data_shape)) | |||
| label = tensor(np.ones(label_shape, dtype=np.int32)) | |||
| loss = F.nn.cross_entropy(pred, label) | |||
| print(loss.numpy().round(decimals=4)) | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| Outputs: | |||
| data_shape = (1, 2) | |||
| label_shape = (1, ) | |||
| pred = tensor(np.array([0, 0], dtype=np.float32).reshape(data_shape)) | |||
| label = tensor(np.ones(label_shape, dtype=np.int32)) | |||
| loss = F.nn.cross_entropy(pred, label) | |||
| print(loss.numpy().round(decimals=4)) | |||
| .. testoutput:: | |||
| Outputs: | |||
| 0.6931 | |||
| .. testoutput:: | |||
| 0.6931 | |||
| """ | |||
| n0 = pred.ndim | |||
| n1 = label.ndim | |||
| @@ -226,37 +231,38 @@ def cross_entropy( | |||
| def binary_cross_entropy( | |||
| pred: Tensor, label: Tensor, with_logits: bool = True, reduction: str = "mean", | |||
| ) -> Tensor: | |||
| r""" | |||
| Computes the binary cross entropy loss (using logits by default). | |||
| r"""Computes the binary cross entropy loss (using logits by default). | |||
| By default(``with_logitis`` is True), ``pred`` is assumed to be logits, | |||
| class probabilities are given by sigmoid. | |||
| :param pred: `(N, *)`, where `*` means any number of additional dimensions. | |||
| :param label: `(N, *)`, same shape as the input. | |||
| :param with_logits: bool, whether to apply sigmoid first. Default: True | |||
| :param reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Default: 'mean' | |||
| :return: loss value. | |||
| Args: | |||
| pred: `(N, *)`, where `*` means any number of additional dimensions. | |||
| label: `(N, *)`, same shape as the input. | |||
| with_logits: bool, whether to apply sigmoid first. Default: True | |||
| reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Default: 'mean' | |||
| Examples: | |||
| Returns: | |||
| loss value. | |||
| .. testcode:: | |||
| Examples: | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| .. testcode:: | |||
| pred = tensor(np.array([0, 0], dtype=np.float32).reshape(1, 2)) | |||
| label = tensor(np.ones((1, 2), dtype=np.float32)) | |||
| loss = F.nn.binary_cross_entropy(pred, label) | |||
| print(loss.numpy().round(decimals=4)) | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| Outputs: | |||
| pred = tensor(np.array([0, 0], dtype=np.float32).reshape(1, 2)) | |||
| label = tensor(np.ones((1, 2), dtype=np.float32)) | |||
| loss = F.nn.binary_cross_entropy(pred, label) | |||
| print(loss.numpy().round(decimals=4)) | |||
| .. testoutput:: | |||
| Outputs: | |||
| 0.6931 | |||
| .. testoutput:: | |||
| 0.6931 | |||
| """ | |||
| if not with_logits: | |||
| return -(label * log(pred) + (1 - label) * log(1 - pred)) | |||
| @@ -269,37 +275,38 @@ def binary_cross_entropy( | |||
| def hinge_loss( | |||
| pred: Tensor, label: Tensor, norm: str = "L1", reduction: str = "mean" | |||
| ) -> Tensor: | |||
| r""" | |||
| Caculates the hinge loss which is often used in SVM. | |||
| r"""Caculates the hinge loss which is often used in SVM. | |||
| The hinge loss can be described as: | |||
| .. math:: loss(x, y) = \frac{1}{N}\sum_i\sum_j(max(0, 1 - x_{ij}*y_{ij})) | |||
| :param pred: input tensor representing the predicted probability, shape is `(N, C)`. | |||
| :param label: input tensor representing the binary classification label, shape is `(N, C)`. | |||
| :param norm: specify the norm to caculate the loss, should be "L1" or "L2". | |||
| :param reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Default: 'mean' | |||
| :return: loss value. | |||
| Args: | |||
| pred: input tensor representing the predicted probability, shape is `(N, C)`. | |||
| label: input tensor representing the binary classification label, shape is `(N, C)`. | |||
| norm: specify the norm to caculate the loss, should be "L1" or "L2". | |||
| reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Default: 'mean' | |||
| Examples: | |||
| Returns: | |||
| loss value. | |||
| .. testcode:: | |||
| Examples: | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| .. testcode:: | |||
| pred = tensor([[0.5, -0.5, 0.1], [-0.6, 0.7, 0.8]], dtype="float32") | |||
| label = tensor([[1, -1, -1], [-1, 1, 1]], dtype="float32") | |||
| loss = F.nn.hinge_loss(pred, label) | |||
| print(loss.numpy()) | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| Outputs: | |||
| pred = tensor([[0.5, -0.5, 0.1], [-0.6, 0.7, 0.8]], dtype="float32") | |||
| label = tensor([[1, -1, -1], [-1, 1, 1]], dtype="float32") | |||
| loss = F.nn.hinge_loss(pred, label) | |||
| print(loss.numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| 1.5 | |||
| .. testoutput:: | |||
| 1.5 | |||
| """ | |||
| norm = norm.upper() | |||
| assert norm in ["L1", "L2"], "norm must be L1 or L2" | |||
| @@ -19,33 +19,16 @@ from .tensor import broadcast_to, transpose | |||
| def topk_accuracy( | |||
| logits: Tensor, target: Tensor, topk: Union[int, Iterable[int]] = 1 | |||
| ) -> Union[Tensor, Iterable[Tensor]]: | |||
| r""" | |||
| Calculates the classification accuracy given predicted logits and ground-truth labels. | |||
| r"""Calculates the classification accuracy given predicted logits and ground-truth labels. | |||
| :param logits: model predictions of shape `[batch_size, num_classes]`, | |||
| representing the probability (likelyhood) of each class. | |||
| :param target: ground-truth labels, 1d tensor of int32. | |||
| :param topk: specifies the topk values, could be an int or tuple of ints. Default: 1 | |||
| :return: tensor(s) of classification accuracy between 0.0 and 1.0. | |||
| Args: | |||
| logits: model predictions of shape `[batch_size, num_classes]`, | |||
| representing the probability (likelyhood) of each class. | |||
| target: ground-truth labels, 1d tensor of int32. | |||
| topk: specifies the topk values, could be an int or tuple of ints. Default: 1 | |||
| Examples: | |||
| .. testcode:: | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| logits = tensor(np.arange(80, dtype=np.int32).reshape(8,10)) | |||
| target = tensor(np.arange(8, dtype=np.int32)) | |||
| top1, top5 = F.metric.topk_accuracy(logits, target, (1, 5)) | |||
| print(top1.numpy(), top5.numpy()) | |||
| Outputs: | |||
| .. testoutput:: | |||
| 0.0 0.375 | |||
| Returns: | |||
| tensor(s) of classification accuracy between 0.0 and 1.0. | |||
| """ | |||
| if isinstance(topk, int): | |||
| topk = (topk,) | |||
| @@ -28,32 +28,28 @@ def conv_bias_activation( | |||
| conv_mode="cross_correlation", | |||
| compute_mode="default", | |||
| ) -> Tensor: | |||
| """ | |||
| Convolution bias with activation operation, only for inference. | |||
| :param inp: feature map of the convolution operation. | |||
| :param weight: convolution kernel. | |||
| :param bias: bias added to the result of convolution | |||
| :param stride: stride of the 2D convolution operation. Default: 1 | |||
| :param padding: size of the paddings added to the input on both sides | |||
| of its spatial dimensions. Only zero-padding is supported. Default: 0 | |||
| :param dilation: dilation of the 2D convolution operation. Default: 1 | |||
| :param groups: number of groups into which the input and output channels are divided, | |||
| so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
| ``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
| and the shape of weight should be `(groups, out_channel // groups, | |||
| in_channels // groups, height, width)`. | |||
| :type conv_mode: string or :class:`Convolution.Mode`. | |||
| :param conv_mode: supports 'cross_correlation' or 'convolution'. Default: | |||
| 'cross_correlation' | |||
| :param dtype: support for ``np.dtype``, Default: np.int8 | |||
| :type compute_mode: string or | |||
| :class:`Convolution.ComputeMode`. | |||
| :param compute_mode: when set to "default", no special requirements will be | |||
| placed on the precision of intermediate results. When set to "float32", | |||
| "float32" would be used for accumulator and intermediate result, | |||
| but only effective when input and output are of float16 dtype. | |||
| r"""Convolution bias with activation operation, only for inference. | |||
| Args: | |||
| inp: feature map of the convolution operation. | |||
| weight: convolution kernel. | |||
| bias: bias added to the result of convolution | |||
| stride: stride of the 2D convolution operation. Default: 1 | |||
| padding: size of the paddings added to the input on both sides | |||
| of its spatial dimensions. Only zero-padding is supported. Default: 0 | |||
| dilation: dilation of the 2D convolution operation. Default: 1 | |||
| groups: number of groups into which the input and output channels are divided, | |||
| so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
| ``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
| and the shape of weight should be `(groups, out_channel // groups, | |||
| in_channels // groups, height, width)`. | |||
| conv_mode: supports 'cross_correlation' or 'convolution'. Default: | |||
| 'cross_correlation' | |||
| dtype: support for ``np.dtype``, Default: np.int8 | |||
| compute_mode: when set to "default", no special requirements will be | |||
| placed on the precision of intermediate results. When set to "float32", | |||
| "float32" would be used for accumulator and intermediate result, | |||
| but only effective when input and output are of float16 dtype. | |||
| """ | |||
| ph, pw = _pair(padding) | |||
| sh, sw = _pair_nonzero(stride) | |||
| @@ -91,32 +87,28 @@ def batch_conv_bias_activation( | |||
| conv_mode="cross_correlation", | |||
| compute_mode="default", | |||
| ) -> Tensor: | |||
| """ | |||
| Batch convolution bias with activation operation, only for inference. | |||
| :param inp: feature map of the convolution operation. | |||
| :param weight: convolution kernel in batched way. | |||
| :param bias: bias added to the result of convolution | |||
| :param stride: stride of the 2D convolution operation. Default: 1 | |||
| :param padding: size of the paddings added to the input on both sides | |||
| of its spatial dimensions. Only zero-padding is supported. Default: 0 | |||
| :param dilation: dilation of the 2D convolution operation. Default: 1 | |||
| :param groups: number of groups into which the input and output channels are divided, | |||
| so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
| ``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
| and the shape of weight should be `(groups, out_channel // groups, | |||
| in_channels // groups, height, width)`. | |||
| :type conv_mode: string or :class:`Convolution.Mode`. | |||
| :param conv_mode: supports 'cross_correlation' or 'convolution'. Default: | |||
| 'cross_correlation' | |||
| :param dtype: support for ``np.dtype``, Default: np.int8 | |||
| :type compute_mode: string or | |||
| :class:`Convolution.ComputeMode`. | |||
| :param compute_mode: when set to "default", no special requirements will be | |||
| placed on the precision of intermediate results. When set to "float32", | |||
| "float32" would be used for accumulator and intermediate result, | |||
| but only effective when input and output are of float16 dtype. | |||
| r"""Batch convolution bias with activation operation, only for inference. | |||
| Args: | |||
| inp: feature map of the convolution operation. | |||
| weight: convolution kernel in batched way. | |||
| bias: bias added to the result of convolution | |||
| stride: stride of the 2D convolution operation. Default: 1 | |||
| padding: size of the paddings added to the input on both sides | |||
| of its spatial dimensions. Only zero-padding is supported. Default: 0 | |||
| dilation: dilation of the 2D convolution operation. Default: 1 | |||
| groups: number of groups into which the input and output channels are divided, | |||
| so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
| ``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
| and the shape of weight should be `(groups, out_channel // groups, | |||
| in_channels // groups, height, width)`. | |||
| conv_mode: supports 'cross_correlation' or 'convolution'. Default: | |||
| 'cross_correlation' | |||
| dtype: support for ``np.dtype``, Default: np.int8 | |||
| compute_mode: when set to "default", no special requirements will be | |||
| placed on the precision of intermediate results. When set to "float32", | |||
| "float32" would be used for accumulator and intermediate result, | |||
| but only effective when input and output are of float16 dtype. | |||
| """ | |||
| ph, pw = _pair(padding) | |||
| sh, sw = _pair_nonzero(stride) | |||
| @@ -19,37 +19,36 @@ __all__ = ["topk_accuracy"] | |||
| def _assert_equal( | |||
| expect: Tensor, actual: Tensor, *, maxerr: float = 0.0001, verbose: bool = False | |||
| ): | |||
| r""" | |||
| Asserts two tensors equal and returns expected value (first input). | |||
| r"""Asserts two tensors equal and returns expected value (first input). | |||
| It is a variant of python assert which is symbolically traceable (similar to ``numpy.testing.assert_equal``). | |||
| If we want to verify the correctness of model, just ``assert`` its states and outputs. | |||
| While sometimes we need to verify the correctness at different backends for *dumped* model | |||
| (or in :class:`~jit.trace` context), and no python code could be executed in that case. | |||
| Thus we have to use :func:`~functional.utils._assert_equal` instead. | |||
| :param expect: expected tensor value | |||
| :param actual: tensor to check value | |||
| :param maxerr: max allowed error; error is defined as the minimal of absolute and relative error | |||
| :param verbose: whether to print maxerr to stdout during opr exec | |||
| :return: expected tensor | |||
| Args: | |||
| expect: expected tensor value | |||
| actual: tensor to check value | |||
| maxerr: max allowed error; error is defined as the minimal of absolute and relative error | |||
| verbose: whether to print maxerr to stdout during opr exec | |||
| Examples: | |||
| .. testcode:: | |||
| .. testcode:: | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| x = tensor([1, 2, 3], np.float32) | |||
| y = tensor([1, 2, 3], np.float32) | |||
| print(F.utils._assert_equal(x, y, maxerr=0).numpy()) | |||
| x = tensor([1, 2, 3], np.float32) | |||
| y = tensor([1, 2, 3], np.float32) | |||
| print(F.utils._assert_equal(x, y, maxerr=0).numpy()) | |||
| Outputs: | |||
| Outputs: | |||
| .. testoutput:: | |||
| .. testoutput:: | |||
| [1. 2. 3.] | |||
| [1. 2. 3.] | |||
| """ | |||
| err = ( | |||
| abs(expect - actual) | |||
| @@ -21,31 +21,32 @@ from .tensor import broadcast_to, concat, expand_dims, reshape, transpose | |||
| def cvt_color(inp: Tensor, mode: str = ""): | |||
| r""" | |||
| Convert images from one format to another | |||
| r"""Convert images from one format to another | |||
| :param inp: input images. | |||
| :param mode: format mode. | |||
| :return: convert result. | |||
| Args: | |||
| inp: input images. | |||
| mode: format mode. | |||
| Examples: | |||
| Returns: | |||
| convert result. | |||
| .. testcode:: | |||
| Examples: | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.functional as F | |||
| .. testcode:: | |||
| x = mge.tensor(np.array([[[[-0.58675045, 1.7526233, 0.10702174]]]]).astype(np.float32)) | |||
| y = F.vision.cvt_color(x, mode="RGB2GRAY") | |||
| print(y.numpy()) | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.functional as F | |||
| Outputs: | |||
| x = mge.tensor(np.array([[[[-0.58675045, 1.7526233, 0.10702174]]]]).astype(np.float32)) | |||
| y = F.vision.cvt_color(x, mode="RGB2GRAY") | |||
| print(y.numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| [[[[0.86555195]]]] | |||
| .. testoutput:: | |||
| [[[[0.86555195]]]] | |||
| """ | |||
| mode = mode.upper() | |||
| assert mode in builtin.CvtColor.Mode.__dict__, "unspport mode for cvt_color" | |||
| @@ -63,37 +64,38 @@ def roi_pooling( | |||
| mode: str = "max", | |||
| scale: float = 1.0, | |||
| ) -> Tensor: | |||
| """ | |||
| Applies roi pooling on input feature. | |||
| r"""Applies roi pooling on input feature. | |||
| :param inp: tensor that represents the input feature, `(N, C, H, W)` images. | |||
| :param rois: `(K, 5)` boxes. First column is the index into N. The other 4 columns are xyxy. | |||
| :param output_shape: `(height, width)` of output rois feature. | |||
| :param mode: "max" or "average", use max/average align just like max/average pooling. Default: "max" | |||
| :param scale: scale the input boxes by this number. Default: 1.0 | |||
| :return: `(K, C, output_shape[0], output_shape[1])` feature of rois. | |||
| Args: | |||
| inp: tensor that represents the input feature, `(N, C, H, W)` images. | |||
| rois: K, 5)` boxes. First column is the index into N. The other 4 columns are xyxy. | |||
| output_shape: height, width)` of output rois feature. | |||
| mode: max" or "average", use max/average align just like max/average pooling. Default: "max" | |||
| scale: scale the input boxes by this number. Default: 1.0 | |||
| Examples: | |||
| Returns: | |||
| ``K, C, output_shape[0], output_shape[1])`` feature of rois. | |||
| .. testcode:: | |||
| Examples: | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| .. testcode:: | |||
| np.random.seed(42) | |||
| inp = tensor(np.random.randn(1, 1, 128, 128)) | |||
| rois = tensor(np.random.random((4, 5))) | |||
| y = F.vision.roi_pooling(inp, rois, (2, 2)) | |||
| print(y.numpy()[0].round(decimals=4)) | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| Outputs: | |||
| np.random.seed(42) | |||
| inp = tensor(np.random.randn(1, 1, 128, 128)) | |||
| rois = tensor(np.random.random((4, 5))) | |||
| y = F.vision.roi_pooling(inp, rois, (2, 2)) | |||
| print(y.numpy()[0].round(decimals=4)) | |||
| .. testoutput:: | |||
| Outputs: | |||
| [[[-0.1383 -0.1383] | |||
| [-0.5035 -0.5035]]] | |||
| .. testoutput:: | |||
| [[[-0.1383 -0.1383] | |||
| [-0.5035 -0.5035]]] | |||
| """ | |||
| assert mode.lower() in ["max", "average"], "only max/average mode is supported" | |||
| if isinstance(output_shape, int): | |||
| @@ -116,17 +118,17 @@ def correlation( | |||
| pad_size: int = 0, | |||
| is_multiply: bool = True, | |||
| ) -> Tensor: | |||
| """ Applies correlation to inputs. | |||
| :param data1: Input data1 to the correlation. format must be nchw | |||
| :param data2: Input data2 to the correlation. format must be nchw | |||
| :param kernel_size: (int (non-negative), optional, default=1) – kernel size for Correlation must be an odd number | |||
| :param max_displacement: (int (non-negative), optional, default=1) – Max displacement of Correlation | |||
| :param stride1: (int (non-negative), optional, default=1) – stride1 quantize data1 globally | |||
| :param stride2: (int (non-negative), optional, default=1) – stride2 quantize data2 within the neighborhood centered around data1 | |||
| :param pad_size: (int (non-negative), optional, default=0) – pad for Correlation | |||
| :param is_multiply: (boolean, optional, default=True) – operation type is either multiplication or absolute difference | |||
| r"""Applies correlation to inputs. | |||
| Args: | |||
| data1: Input data1 to the correlation. format must be nchw | |||
| data2: Input data2 to the correlation. format must be nchw | |||
| kernel_size: int (non-negative), optional, default=1) – kernel size for Correlation must be an odd number | |||
| max_displacement: int (non-negative), optional, default=1) – Max displacement of Correlation | |||
| stride1: int (non-negative), optional, default=1) – stride1 quantize data1 globally | |||
| stride2: int (non-negative), optional, default=1) – stride2 quantize data2 within the neighborhood centered around data1 | |||
| pad_size: int (non-negative), optional, default=0) – pad for Correlation | |||
| is_multiply: boolean, optional, default=True) – operation type is either multiplication or absolute difference | |||
| """ | |||
| op = builtin.Correlation( | |||
| @@ -152,41 +154,42 @@ def roi_align( | |||
| sample_points: Union[int, tuple, list] = 2, | |||
| aligned: bool = True, | |||
| ) -> Tensor: | |||
| """ | |||
| Applies roi align on input feature. | |||
| :param inp: tensor that represents the input feature, shape is `(N, C, H, W)`. | |||
| :param rois: `(N, 5)` boxes. First column is the box index. The other 4 columns are ``xyxy``. | |||
| :param output_shape: `(height, width)` shape of output rois feature. | |||
| :param mode: "max" or "average", use max/average align just like max/average pooling. Default: "average" | |||
| :param spatial_scale: scale the input boxes by this number. Default: 1.0 | |||
| :param sample_points: number of inputs samples to take for each output sample. | |||
| 0 to take samples densely. Default: 2 | |||
| :param aligned: wheather to align the input feature, with `aligned=True`, | |||
| we first appropriately scale the ROI and then shift it by -0.5. Default: True | |||
| :return: output tensor. | |||
| r"""Applies roi align on input feature. | |||
| Args: | |||
| inp: tensor that represents the input feature, shape is `(N, C, H, W)`. | |||
| rois: N, 5)` boxes. First column is the box index. The other 4 columns are ``xyxy``. | |||
| output_shape: height, width)` shape of output rois feature. | |||
| mode: max" or "average", use max/average align just like max/average pooling. Default: "average" | |||
| spatial_scale: scale the input boxes by this number. Default: 1.0 | |||
| sample_points: number of inputs samples to take for each output sample. | |||
| 0 to take samples densely. Default: 2 | |||
| aligned: wheather to align the input feature, with `aligned=True`, | |||
| we first appropriately scale the ROI and then shift it by -0.5. Default: True | |||
| Returns: | |||
| output tensor. | |||
| Examples: | |||
| .. testcode:: | |||
| .. testcode:: | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| np.random.seed(42) | |||
| inp = tensor(np.random.randn(1, 1, 128, 128)) | |||
| rois = tensor(np.random.random((4, 5))) | |||
| y = F.vision.roi_align(inp, rois, (2, 2)) | |||
| print(y.numpy()[0].round(decimals=4)) | |||
| Outputs: | |||
| np.random.seed(42) | |||
| inp = tensor(np.random.randn(1, 1, 128, 128)) | |||
| rois = tensor(np.random.random((4, 5))) | |||
| y = F.vision.roi_align(inp, rois, (2, 2)) | |||
| print(y.numpy()[0].round(decimals=4)) | |||
| .. testoutput:: | |||
| Outputs: | |||
| [[[0.175 0.175 ] | |||
| [0.1359 0.1359]]] | |||
| .. testoutput:: | |||
| [[[0.175 0.175 ] | |||
| [0.1359 0.1359]]] | |||
| """ | |||
| if inp.dtype != np.float32: | |||
| inp = inp.astype(np.float32) | |||
| @@ -217,43 +220,43 @@ def roi_align( | |||
| def nms( | |||
| boxes: Tensor, scores: Tensor, iou_thresh: float, max_output: Optional[int] = None | |||
| ) -> Tensor: | |||
| r""" | |||
| Performs non-maximum suppression (NMS) on the boxes according to their intersection-over-union(IoU). | |||
| r"""Performs non-maximum suppression (NMS) on the boxes according to their intersection-over-union(IoU). | |||
| :param boxes: tensor of shape `(N, 4)`; the boxes to perform nms on; each box is expected to be in `(x1, y1, x2, y2)` format. | |||
| :param iou_thresh: IoU threshold for overlapping. | |||
| :param scores: tensor of shape `(N,)`, the score of boxes. | |||
| :param max_output: the maximum number of boxes to keep; it is optional if this operator is not traced | |||
| otherwise it required to be specified; if it is not specified, all boxes are kept. | |||
| :return: indices of the elements that have been kept by NMS, sorted by scores. | |||
| Args: | |||
| boxes: tensor of shape `(N, 4)`; the boxes to perform nms on; each box is expected to be in `(x1, y1, x2, y2)` format. | |||
| iou_thresh: IoU threshold for overlapping. | |||
| scores: tensor of shape `(N,)`, the score of boxes. | |||
| max_output: the maximum number of boxes to keep; it is optional if this operator is not traced | |||
| otherwise it required to be specified; if it is not specified, all boxes are kept. | |||
| .. note:: | |||
| Returns: | |||
| indices of the elements that have been kept by NMS, sorted by scores. | |||
| max_output should be specified and should have valid positive value under tracing | |||
| Note: | |||
| max_output should be specified and should have valid positive value under tracing. | |||
| Examples: | |||
| .. testcode:: | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| .. testcode:: | |||
| x = np.zeros((100,4)) | |||
| np.random.seed(42) | |||
| x[:,:2] = np.random.rand(100,2)*20 | |||
| x[:,2:] = np.random.rand(100,2)*20 + 100 | |||
| scores = tensor(np.random.rand(100)) | |||
| inp = tensor(x) | |||
| result = F.vision.nms(inp, scores, iou_thresh=0.7) | |||
| print(result.numpy()) | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| Outputs: | |||
| x = np.zeros((100,4)) | |||
| np.random.seed(42) | |||
| x[:,:2] = np.random.rand(100,2)*20 | |||
| x[:,2:] = np.random.rand(100,2)*20 + 100 | |||
| scores = tensor(np.random.rand(100)) | |||
| inp = tensor(x) | |||
| result = F.vision.nms(inp, scores, iou_thresh=0.7) | |||
| print(result.numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| [75 69] | |||
| .. testoutput:: | |||
| [75 69] | |||
| """ | |||
| assert ( | |||
| boxes.ndim == 2 and boxes.shape[1] == 4 | |||
| @@ -286,45 +289,46 @@ def remap( | |||
| scalar: float = 0.0, | |||
| interp_mode: str = "linear", | |||
| ) -> Tensor: | |||
| r""" | |||
| Applies remap transformation to batched 2D images. | |||
| r"""Applies remap transformation to batched 2D images. | |||
| The input images are transformed to the output images by the tensor map_xy. | |||
| The output's H and W are same as map_xy's H and W. | |||
| :param inp: input image | |||
| :param map_xy: (batch, oh, ow, 2) transformation matrix | |||
| :param border_mode: pixel extrapolation method. | |||
| Default: "replicate". Currently also support "constant", "reflect", | |||
| "reflect_101", "wrap". | |||
| :param scalar: value used in case of a constant border. Default: 0 | |||
| :param interp_mode: interpolation methods. | |||
| Default: "linear". Currently only support "linear" mode. | |||
| :return: output tensor. | |||
| Args: | |||
| inp: input image | |||
| map_xy: batch, oh, ow, 2) transformation matrix | |||
| border_mode: pixel extrapolation method. | |||
| Default: "replicate". Currently also support "constant", "reflect", | |||
| "reflect_101", "wrap". | |||
| scalar: value used in case of a constant border. Default: 0 | |||
| interp_mode: interpolation methods. | |||
| Default: "linear". Currently only support "linear" mode. | |||
| Returns: | |||
| output tensor. | |||
| Examples: | |||
| .. testcode:: | |||
| .. testcode:: | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| inp_shape = (1, 1, 4, 4) | |||
| inp = tensor(np.arange(16, dtype=np.float32).reshape(inp_shape)) | |||
| map_xy_shape = (1, 2, 2, 2) | |||
| map_xy = tensor(np.array([[[1., 0.],[0., 1.]], | |||
| [[0., 1.],[0., 1.]]], | |||
| dtype=np.float32).reshape(map_xy_shape)) | |||
| out = F.vision.remap(inp, map_xy) | |||
| print(out.numpy()) | |||
| Outputs: | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| inp_shape = (1, 1, 4, 4) | |||
| inp = tensor(np.arange(16, dtype=np.float32).reshape(inp_shape)) | |||
| map_xy_shape = (1, 2, 2, 2) | |||
| map_xy = tensor(np.array([[[1., 0.],[0., 1.]], | |||
| [[0., 1.],[0., 1.]]], | |||
| dtype=np.float32).reshape(map_xy_shape)) | |||
| out = F.vision.remap(inp, map_xy) | |||
| print(out.numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| [[[[1. 4.] | |||
| [4. 4.]]]] | |||
| .. testoutput:: | |||
| [[[[1. 4.] | |||
| [4. 4.]]]] | |||
| """ | |||
| op = builtin.Remap( | |||
| @@ -344,27 +348,28 @@ def warp_affine( | |||
| format: str = "NHWC", | |||
| interp_mode: str = "linear", | |||
| ) -> Tensor: | |||
| """ | |||
| Batched affine transform on 2D images. | |||
| :param inp: input image. | |||
| :param mat: `(batch, 2, 3)` transformation matrix. | |||
| :param out_shape: output tensor shape. | |||
| :param border_mode: pixel extrapolation method. | |||
| Default: "wrap". Currently "constant", "reflect", | |||
| "reflect_101", "isolated", "wrap", "replicate", "transparent" are supported. | |||
| :param border_val: value used in case of a constant border. Default: 0 | |||
| :param format: "NHWC" as default based on historical concerns, | |||
| "NCHW" is also supported. Default: "NHWC". | |||
| :param interp_mode: interpolation methods. Could be "linear", "nearest", "cubic", "area". | |||
| Default: "linear". | |||
| :return: output tensor. | |||
| .. note:: | |||
| Here all available options for params are listed, | |||
| however it does not mean that you can use all the combinations. | |||
| On different platforms, different combinations are supported. | |||
| r"""Batched affine transform on 2D images. | |||
| Args: | |||
| inp: input image. | |||
| mat: batch, 2, 3)` transformation matrix. | |||
| out_shape: output tensor shape. | |||
| border_mode: pixel extrapolation method. | |||
| Default: "wrap". Currently "constant", "reflect", | |||
| "reflect_101", "isolated", "wrap", "replicate", "transparent" are supported. | |||
| border_val: value used in case of a constant border. Default: 0 | |||
| format: NHWC" as default based on historical concerns, | |||
| "NCHW" is also supported. Default: "NHWC". | |||
| interp_mode: interpolation methods. Could be "linear", "nearest", "cubic", "area". | |||
| Default: "linear". | |||
| Returns: | |||
| output tensor. | |||
| Note: | |||
| Here all available options for params are listed, | |||
| however it does not mean that you can use all the combinations. | |||
| On different platforms, different combinations are supported. | |||
| """ | |||
| op = builtin.WarpAffine( | |||
| border_mode=border_mode, | |||
| @@ -387,8 +392,7 @@ def warp_perspective( | |||
| format: str = "NCHW", | |||
| interp_mode: str = "linear", | |||
| ) -> Tensor: | |||
| r""" | |||
| Applies perspective transformation to batched 2D images. | |||
| r"""Applies perspective transformation to batched 2D images. | |||
| The input images are transformed to the output images by the transformation matrix: | |||
| @@ -401,48 +405,49 @@ def warp_perspective( | |||
| Optionally, we can set `mat_idx` to assign different transformations to the same image, | |||
| otherwise the input images and transformations should be one-to-one correnspondence. | |||
| :param inp: input image. | |||
| :param mat: `(batch, 3, 3)` transformation matrix. | |||
| :param out_shape: `(h, w)` size of the output image. | |||
| :param mat_idx: `(batch, )` image batch idx assigned to each matrix. Default: None | |||
| :param border_mode: pixel extrapolation method. | |||
| Default: "replicate". Currently also support "constant", "reflect", | |||
| "reflect_101", "wrap". | |||
| :param border_val: value used in case of a constant border. Default: 0 | |||
| :param format: "NHWC" is also supported. Default: "NCHW". | |||
| :param interp_mode: interpolation methods. | |||
| Default: "linear". Currently only support "linear" mode. | |||
| :return: output tensor. | |||
| .. note:: | |||
| The transformation matrix is the inverse of that used by `cv2.warpPerspective`. | |||
| Args: | |||
| inp: input image. | |||
| mat: batch, 3, 3)` transformation matrix. | |||
| out_shape: h, w)` size of the output image. | |||
| mat_idx: batch, )` image batch idx assigned to each matrix. Default: None | |||
| border_mode: pixel extrapolation method. | |||
| Default: "replicate". Currently also support "constant", "reflect", | |||
| "reflect_101", "wrap". | |||
| border_val: value used in case of a constant border. Default: 0 | |||
| format: NHWC" is also supported. Default: "NCHW". | |||
| interp_mode: interpolation methods. | |||
| Default: "linear". Currently only support "linear" mode. | |||
| Returns: | |||
| output tensor. | |||
| Note: | |||
| The transformation matrix is the inverse of that used by `cv2.warpPerspective`. | |||
| Examples: | |||
| .. testcode:: | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| .. testcode:: | |||
| inp_shape = (1, 1, 4, 4) | |||
| x = tensor(np.arange(16, dtype=np.float32).reshape(inp_shape)) | |||
| M_shape = (1, 3, 3) | |||
| # M defines a translation: dst(1, 1, h, w) = rst(1, 1, h+1, w+1) | |||
| M = tensor(np.array([[1., 0., 1.], | |||
| [0., 1., 1.], | |||
| [0., 0., 1.]], dtype=np.float32).reshape(M_shape)) | |||
| out = F.vision.warp_perspective(x, M, (2, 2)) | |||
| print(out.numpy()) | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| Outputs: | |||
| inp_shape = (1, 1, 4, 4) | |||
| x = tensor(np.arange(16, dtype=np.float32).reshape(inp_shape)) | |||
| M_shape = (1, 3, 3) | |||
| # M defines a translation: dst(1, 1, h, w) = rst(1, 1, h+1, w+1) | |||
| M = tensor(np.array([[1., 0., 1.], | |||
| [0., 1., 1.], | |||
| [0., 0., 1.]], dtype=np.float32).reshape(M_shape)) | |||
| out = F.vision.warp_perspective(x, M, (2, 2)) | |||
| print(out.numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| [[[[ 5. 6.] | |||
| [ 9. 10.]]]] | |||
| .. testoutput:: | |||
| [[[[ 5. 6.] | |||
| [ 9. 10.]]]] | |||
| """ | |||
| if inp.dtype == np.float32: | |||
| mat = mat.astype("float32") | |||
| @@ -467,48 +472,48 @@ def interpolate( | |||
| mode: str = "bilinear", | |||
| align_corners: Optional[bool] = None, | |||
| ) -> Tensor: | |||
| r""" | |||
| Down/up samples the input tensor to either the given size or with the given scale_factor. ``size`` can not coexist with ``scale_factor``. | |||
| :param inp: input tensor. | |||
| :param size: size of the output tensor. Default: None | |||
| :param scale_factor: scaling factor of the output tensor. Default: None | |||
| :param mode: interpolation methods, acceptable values are: | |||
| "bilinear", "linear", "bicubic" and "nearest". Default: "bilinear" | |||
| :param align_corners: This only has an effect when `mode` | |||
| is "bilinear" or "linear". Geometrically, we consider the pixels of the input | |||
| and output as squares rather than points. If set to ``True``, the input | |||
| and output tensors are aligned by the center points of their corner | |||
| pixels, preserving the values at the corner pixels. If set to ``False``, | |||
| the input and output tensors are aligned by the corner points of their | |||
| corner pixels, and the interpolation uses edge value padding for | |||
| out-of-boundary values, making this operation *independent* of input size | |||
| :return: output tensor. | |||
| r"""Down/up samples the input tensor to either the given size or with the given scale_factor. ``size`` can not coexist with ``scale_factor``. | |||
| Args: | |||
| inp: input tensor. | |||
| size: size of the output tensor. Default: None | |||
| scale_factor: scaling factor of the output tensor. Default: None | |||
| mode: interpolation methods, acceptable values are: | |||
| "bilinear", "linear", "bicubic" and "nearest". Default: "bilinear" | |||
| align_corners: This only has an effect when `mode` | |||
| is "bilinear" or "linear". Geometrically, we consider the pixels of the input | |||
| and output as squares rather than points. If set to ``True``, the input | |||
| and output tensors are aligned by the center points of their corner | |||
| pixels, preserving the values at the corner pixels. If set to ``False``, | |||
| the input and output tensors are aligned by the corner points of their | |||
| corner pixels, and the interpolation uses edge value padding for | |||
| out-of-boundary values, making this operation *independent* of input size | |||
| Returns: | |||
| output tensor. | |||
| Examples: | |||
| .. testcode:: | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| .. testcode:: | |||
| x = tensor(np.arange(1, 5, dtype=np.float32).reshape(1, 1, 2, 2)) | |||
| out = F.vision.interpolate(x, [4, 4], align_corners=False) | |||
| print(out.numpy()) | |||
| out2 = F.vision.interpolate(x, scale_factor=2.) | |||
| np.testing.assert_allclose(out.numpy(), out2.numpy()) | |||
| import numpy as np | |||
| from megengine import tensor | |||
| import megengine.functional as F | |||
| Outputs: | |||
| x = tensor(np.arange(1, 5, dtype=np.float32).reshape(1, 1, 2, 2)) | |||
| out = F.vision.interpolate(x, [4, 4], align_corners=False) | |||
| print(out.numpy()) | |||
| out2 = F.vision.interpolate(x, scale_factor=2.) | |||
| np.testing.assert_allclose(out.numpy(), out2.numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| [[[[1. 1.25 1.75 2. ] | |||
| [1.5 1.75 2.25 2.5 ] | |||
| [2.5 2.75 3.25 3.5 ] | |||
| [3. 3.25 3.75 4. ]]]] | |||
| .. testoutput:: | |||
| [[[[1. 1.25 1.75 2. ] | |||
| [1.5 1.75 2.25 2.5 ] | |||
| [2.5 2.75 3.25 3.5 ] | |||
| [3. 3.25 3.75 4. ]]]] | |||
| """ | |||
| mode = mode.lower() | |||
| if mode not in ["bilinear", "linear", "bicubic", "nearest"]: | |||
| @@ -623,15 +628,15 @@ def interpolate( | |||
| def nvof(src: Tensor, precision: int = 1) -> Tensor: | |||
| r""" | |||
| Implements NVIDIA Optical Flow SDK. | |||
| r"""Implements NVIDIA Optical Flow SDK. | |||
| Args: | |||
| src: input tensor with shape (n, t, h, w, c4) and unit8 dtype. | |||
| precision: 0:NV_OF_PERF_LEVEL_SLOW 1:NV_OF_PERF_LEVEL_MEDIUM 2:NV_OF_PERF_LEVEL_FAST. | |||
| :src shape: input tensor with shape (n, t, h, w, c4). | |||
| :src dtype: uint8. | |||
| :param precision: 0:NV_OF_PERF_LEVEL_SLOW 1:NV_OF_PERF_LEVEL_MEDIUM 2:NV_OF_PERF_LEVEL_FAST. | |||
| :output shape: ``(n, t-1, (h+out_grid_size-1)//out_grid_size, (w+out_grid_size-1)//out_grid_size, c2)``. | |||
| By default, out_grid_size = 4. | |||
| :output dtype: int16. | |||
| Returns: | |||
| output tensor with shape: ``(n, t-1, (h+out_grid_size-1)//out_grid_size, (w+out_grid_size-1)//out_grid_size, c2)``. | |||
| By default, out_grid_size = 4. dtype: int16. | |||
| .. code-block:: python | |||
| @@ -643,7 +648,6 @@ def nvof(src: Tensor, precision: int = 1) -> Tensor: | |||
| src = tensor(x) | |||
| result = F.nn.nvof(src, precision=1) | |||
| print(result.numpy()) | |||
| """ | |||
| assert src.ndim == 5 and src.shape[4] == 4 | |||
| @@ -7,24 +7,24 @@ | |||
| # software distributed under the License is distributed on an | |||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| class FetcherError(Exception): | |||
| """Base class for fetch related error.""" | |||
| r"""Base class for fetch related error.""" | |||
| class InvalidRepo(FetcherError): | |||
| """The repo provided was somehow invalid.""" | |||
| r"""The repo provided was somehow invalid.""" | |||
| class InvalidGitHost(FetcherError): | |||
| """The git host provided was somehow invalid.""" | |||
| r"""The git host provided was somehow invalid.""" | |||
| class GitPullError(FetcherError): | |||
| """A git pull error occurred.""" | |||
| r"""A git pull error occurred.""" | |||
| class GitCheckoutError(FetcherError): | |||
| """A git checkout error occurred.""" | |||
| r"""A git checkout error occurred.""" | |||
| class InvalidProtocol(FetcherError): | |||
| """The protocol provided was somehow invalid.""" | |||
| r"""The protocol provided was somehow invalid.""" | |||
| @@ -102,24 +102,18 @@ class GitSSHFetcher(RepoFetcherBase): | |||
| commit: str = None, | |||
| silent: bool = True, | |||
| ) -> str: | |||
| """ | |||
| Fetches git repo by SSH protocol | |||
| :param git_host: | |||
| host address of git repo. | |||
| Example: github.com | |||
| :param repo_info: | |||
| a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
| tag/branch. The default branch is ``master`` if not specified. | |||
| Example: ``"brain_sdk/MegBrain[:hub]"`` | |||
| :param use_cache: | |||
| whether to use locally fetched code or completely re-fetch. | |||
| :param commit: | |||
| commit id on github or gitlab. | |||
| :param silent: | |||
| whether to accept the stdout and stderr of the subprocess with PIPE, instead of | |||
| displaying on the screen. | |||
| :return: | |||
| """Fetches git repo by SSH protocol | |||
| Args: | |||
| git_host: host address of git repo. Eg: github.com | |||
| repo_info: a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
| tag/branch. The default branch is ``master`` if not specified. Eg: ``"brain_sdk/MegBrain[:hub]"`` | |||
| use_cache: whether to use locally fetched code or completely re-fetch. | |||
| commit: commit id on github or gitlab. | |||
| silent: whether to accept the stdout and stderr of the subprocess with PIPE, instead of | |||
| displaying on the screen. | |||
| Returns: | |||
| directory where the repo code is stored. | |||
| """ | |||
| if not cls._check_git_host(git_host): | |||
| @@ -217,24 +211,19 @@ class GitHTTPSFetcher(RepoFetcherBase): | |||
| commit: str = None, | |||
| silent: bool = True, | |||
| ) -> str: | |||
| """ | |||
| Fetches git repo by HTTPS protocol. | |||
| :param git_host: | |||
| host address of git repo. | |||
| Example: github.com | |||
| :param repo_info: | |||
| a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
| tag/branch. The default branch is ``master`` if not specified. | |||
| Example: ``"brain_sdk/MegBrain[:hub]"`` | |||
| :param use_cache: | |||
| whether to use locally cached code or completely re-fetch. | |||
| :param commit: | |||
| commit id on github or gitlab. | |||
| :param silent: | |||
| whether to accept the stdout and stderr of the subprocess with PIPE, instead of | |||
| displaying on the screen. | |||
| :return: | |||
| """Fetches git repo by HTTPS protocol. | |||
| Args: | |||
| git_host: host address of git repo. Eg: github.com | |||
| repo_info: a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
| tag/branch. The default branch is ``master`` if not specified. Eg: ``"brain_sdk/MegBrain[:hub]"`` | |||
| use_cache: whether to use locally cached code or completely re-fetch. | |||
| commit: commit id on github or gitlab. | |||
| silent: whether to accept the stdout and stderr of the subprocess with PIPE, instead of | |||
| displaying on the screen. | |||
| Returns: | |||
| directory where the repo code is stored. | |||
| """ | |||
| if not cls._check_git_host(git_host): | |||
| @@ -43,9 +43,7 @@ PROTOCOLS = { | |||
| def _get_megengine_home() -> str: | |||
| """ | |||
| MGE_HOME setting complies with the XDG Base Directory Specification | |||
| """ | |||
| r"""MGE_HOME setting complies with the XDG Base Directory Specification""" | |||
| megengine_home = os.path.expanduser( | |||
| os.getenv( | |||
| ENV_MGE_HOME, | |||
| @@ -95,24 +93,18 @@ def _init_hub( | |||
| commit: str = None, | |||
| protocol: str = DEFAULT_PROTOCOL, | |||
| ): | |||
| """ | |||
| Imports hubmodule like python import. | |||
| :param repo_info: | |||
| a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
| tag/branch. The default branch is ``master`` if not specified. | |||
| Example: ``"brain_sdk/MegBrain[:hub]"`` | |||
| :param git_host: | |||
| host address of git repo. | |||
| Example: github.com | |||
| :param use_cache: | |||
| whether to use locally cached code or completely re-fetch. | |||
| :param commit: | |||
| commit id on github or gitlab. | |||
| :param protocol: | |||
| which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. | |||
| The value should be one of HTTPS, SSH. | |||
| :return: | |||
| r"""Imports hubmodule like python import. | |||
| Args: | |||
| repo_info: a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
| tag/branch. The default branch is ``master`` if not specified. Eg: ``"brain_sdk/MegBrain[:hub]"`` | |||
| git_host: host address of git repo. Eg: github.com | |||
| use_cache: whether to use locally cached code or completely re-fetch. | |||
| commit: commit id on github or gitlab. | |||
| protocol: which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. | |||
| The value should be one of HTTPS, SSH. | |||
| Returns: | |||
| a python module. | |||
| """ | |||
| cache_dir = os.path.expanduser(os.path.join(_get_megengine_home(), "hub")) | |||
| @@ -139,24 +131,18 @@ def list( | |||
| commit: str = None, | |||
| protocol: str = DEFAULT_PROTOCOL, | |||
| ) -> List[str]: | |||
| """ | |||
| Lists all entrypoints available in repo hubconf. | |||
| :param repo_info: | |||
| a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
| tag/branch. The default branch is ``master`` if not specified. | |||
| Example: ``"brain_sdk/MegBrain[:hub]"`` | |||
| :param git_host: | |||
| host address of git repo. | |||
| Example: github.com | |||
| :param use_cache: | |||
| whether to use locally cached code or completely re-fetch. | |||
| :param commit: | |||
| commit id on github or gitlab. | |||
| :param protocol: | |||
| which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. | |||
| The value should be one of HTTPS, SSH. | |||
| :return: | |||
| r"""Lists all entrypoints available in repo hubconf. | |||
| Args: | |||
| repo_info: a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
| tag/branch. The default branch is ``master`` if not specified. Eg: ``"brain_sdk/MegBrain[:hub]"`` | |||
| git_host: host address of git repo. Eg: github.com | |||
| use_cache: whether to use locally cached code or completely re-fetch. | |||
| commit: commit id on github or gitlab. | |||
| protocol: which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. | |||
| The value should be one of HTTPS, SSH. | |||
| Returns: | |||
| all entrypoint names of the model. | |||
| """ | |||
| hubmodule = _init_hub(repo_info, git_host, use_cache, commit, protocol) | |||
| @@ -178,26 +164,19 @@ def load( | |||
| protocol: str = DEFAULT_PROTOCOL, | |||
| **kwargs | |||
| ) -> Any: | |||
| """ | |||
| Loads model from github or gitlab repo, with pretrained weights. | |||
| :param repo_info: | |||
| a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
| tag/branch. The default branch is ``master`` if not specified. | |||
| Example: ``"brain_sdk/MegBrain[:hub]"`` | |||
| :param entry: | |||
| an entrypoint defined in hubconf. | |||
| :param git_host: | |||
| host address of git repo. | |||
| Example: github.com | |||
| :param use_cache: | |||
| whether to use locally cached code or completely re-fetch. | |||
| :param commit: | |||
| commit id on github or gitlab. | |||
| :param protocol: | |||
| which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. | |||
| The value should be one of HTTPS, SSH. | |||
| :return: | |||
| r"""Loads model from github or gitlab repo, with pretrained weights. | |||
| Args: | |||
| repo_info: a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
| tag/branch. The default branch is ``master`` if not specified. Eg: ``"brain_sdk/MegBrain[:hub]"`` | |||
| entry: an entrypoint defined in hubconf. | |||
| git_host: host address of git repo. Eg: github.com | |||
| use_cache: whether to use locally cached code or completely re-fetch. | |||
| commit: commit id on github or gitlab. | |||
| protocol: which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. | |||
| The value should be one of HTTPS, SSH. | |||
| Returns: | |||
| a single model with corresponding pretrained weights. | |||
| """ | |||
| hubmodule = _init_hub(repo_info, git_host, use_cache, commit, protocol) | |||
| @@ -219,30 +198,23 @@ def help( | |||
| commit: str = None, | |||
| protocol: str = DEFAULT_PROTOCOL, | |||
| ) -> str: | |||
| """ | |||
| This function returns docstring of entrypoint ``entry`` by following steps: | |||
| r"""This function returns docstring of entrypoint ``entry`` by following steps: | |||
| 1. Pull the repo code specified by git and repo_info. | |||
| 2. Load the entry defined in repo's hubconf.py | |||
| 3. Return docstring of function entry. | |||
| :param repo_info: | |||
| a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
| tag/branch. The default branch is ``master`` if not specified. | |||
| Example: ``"brain_sdk/MegBrain[:hub]"`` | |||
| :param entry: | |||
| an entrypoint defined in hubconf.py | |||
| :param git_host: | |||
| host address of git repo. | |||
| Example: github.com | |||
| :param use_cache: | |||
| whether to use locally cached code or completely re-fetch. | |||
| :param commit: | |||
| commit id on github or gitlab. | |||
| :param protocol: | |||
| which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. | |||
| The value should be one of HTTPS, SSH. | |||
| :return: | |||
| Args: | |||
| repo_info: a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
| tag/branch. The default branch is ``master`` if not specified. Eg: ``"brain_sdk/MegBrain[:hub]"`` | |||
| entry: an entrypoint defined in hubconf.py | |||
| git_host: host address of git repo. Eg: github.com | |||
| use_cache: whether to use locally cached code or completely re-fetch. | |||
| commit: commit id on github or gitlab. | |||
| protocol: which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. | |||
| The value should be one of HTTPS, SSH. | |||
| Returns: | |||
| docstring of entrypoint ``entry``. | |||
| """ | |||
| hubmodule = _init_hub(repo_info, git_host, use_cache, commit, protocol) | |||
| @@ -255,16 +227,17 @@ def help( | |||
| def load_serialized_obj_from_url(url: str, model_dir=None) -> Any: | |||
| """ | |||
| Loads MegEngine serialized object from the given URL. | |||
| """Loads MegEngine serialized object from the given URL. | |||
| If the object is already present in ``model_dir``, it's deserialized and | |||
| returned. If no ``model_dir`` is specified, it will be ``MGE_HOME/serialized``. | |||
| :param url: url to serialized object. | |||
| :param model_dir: dir to cache target serialized file. | |||
| Args: | |||
| url: url to serialized object. | |||
| model_dir: dir to cache target serialized file. | |||
| :return: loaded object. | |||
| Returns: | |||
| loaded object. | |||
| """ | |||
| if model_dir is None: | |||
| model_dir = os.path.join(_get_megengine_home(), "serialized") | |||
| @@ -297,8 +270,7 @@ def load_serialized_obj_from_url(url: str, model_dir=None) -> Any: | |||
| class pretrained: | |||
| r""" | |||
| Decorator which helps to download pretrained weights from the given url. | |||
| r"""Decorator which helps to download pretrained weights from the given url. | |||
| For example, we can decorate a resnet18 function as follows | |||
| @@ -306,10 +278,10 @@ class pretrained: | |||
| @hub.pretrained("https://url/to/pretrained_resnet18.pkl") | |||
| def resnet18(**kwargs): | |||
| return ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) | |||
| When decorated function is called with ``pretrained=True``, MegEngine will automatically | |||
| download and fill the returned model with pretrained weights. | |||
| Returns: | |||
| When decorated function is called with ``pretrained=True``, MegEngine will automatically | |||
| download and fill the returned model with pretrained weights. | |||
| """ | |||
| def __init__(self, url): | |||
| @@ -14,11 +14,11 @@ from typing import Iterator | |||
| def load_module(name: str, path: str) -> types.ModuleType: | |||
| """ | |||
| Loads module specified by name and path. | |||
| r"""Loads module specified by name and path. | |||
| :param name: module name. | |||
| :param path: module path. | |||
| Args: | |||
| name: module name. | |||
| path: module path. | |||
| """ | |||
| spec = importlib.util.spec_from_file_location(name, path) | |||
| module = importlib.util.module_from_spec(spec) | |||
| @@ -27,20 +27,20 @@ def load_module(name: str, path: str) -> types.ModuleType: | |||
| def check_module_exists(module: str) -> bool: | |||
| """ | |||
| Checks whether python module exists or not. | |||
| r"""Checks whether python module exists or not. | |||
| :param module: name of module. | |||
| Args: | |||
| module: name of module. | |||
| """ | |||
| return importlib.util.find_spec(module) is not None | |||
| @contextmanager | |||
| def cd(target: str) -> Iterator[None]: | |||
| """ | |||
| Changes current directory to target. | |||
| """Changes current directory to target. | |||
| :param target: target directory. | |||
| Args: | |||
| target: target directory. | |||
| """ | |||
| prev = os.getcwd() | |||
| os.chdir(os.path.expanduser(target)) | |||
| @@ -9,12 +9,12 @@ | |||
| class GraphOptimizationConfig: | |||
| r""" | |||
| Configuration for graph optimization: False for OFF, True for ON. The default value | |||
| r"""Configuration for graph optimization: False for OFF, True for ON. The default value | |||
| None means that opt_level will decide whther this optimization will be applied or not. | |||
| :param jit_fuse_dimshuffle: whether to fuse dimshuffle in JIT optimization | |||
| :param jit_fuse_reduce: whether to fuse reduce in JIT optimization | |||
| Args: | |||
| jit_fuse_dimshuffle: whether to fuse dimshuffle in JIT optimization | |||
| jit_fuse_reduce: whether to fuse reduce in JIT optimization | |||
| """ | |||
| def __init__(self): | |||
| @@ -10,26 +10,26 @@ from ..device import get_device_count | |||
| class SublinearMemoryConfig: | |||
| r""" | |||
| Configuration for sublinear memory optimization. | |||
| :param thresh_nr_try: number of samples both for searching in linear space | |||
| and around current thresh in sublinear memory optimization. Default: 10. | |||
| It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_THRESH_NR_TRY'. | |||
| :param genetic_nr_iter: number of iterations to find the best checkpoints in genetic algorithm. | |||
| Default: 0. | |||
| It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_GENETIC_NR_ITER'. | |||
| :param genetic_pool_size: number of samples for the crossover random selection | |||
| during genetic optimization. Default: 20. | |||
| It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_GENETIC_POOL_SIZE'. | |||
| :param lb_memory_mb: memory lower bound of bottleneck size in MB for sublinear memory optimization. | |||
| It can be used to perform manual tradeoff between memory and speed. Default: 0. | |||
| It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_LOWER_BOUND_MB'. | |||
| :param num_worker: number of thread workers to search the optimum checkpoints | |||
| in sublinear memory optimization. Default: half of cpu number in the system. | |||
| Note: the value must be greater or equal to one. | |||
| It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_WORKERS'. | |||
| r"""Configuration for sublinear memory optimization. | |||
| Args: | |||
| thresh_nr_try: number of samples both for searching in linear space | |||
| and around current thresh in sublinear memory optimization. Default: 10. | |||
| It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_THRESH_NR_TRY'. | |||
| genetic_nr_iter: number of iterations to find the best checkpoints in genetic algorithm. | |||
| Default: 0. | |||
| It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_GENETIC_NR_ITER'. | |||
| genetic_pool_size: number of samples for the crossover random selection | |||
| during genetic optimization. Default: 20. | |||
| It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_GENETIC_POOL_SIZE'. | |||
| lb_memory_mb: memory lower bound of bottleneck size in MB for sublinear memory optimization. | |||
| It can be used to perform manual tradeoff between memory and speed. Default: 0. | |||
| It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_LOWER_BOUND_MB'. | |||
| num_worker: number of thread workers to search the optimum checkpoints | |||
| in sublinear memory optimization. Default: half of cpu number in the system. | |||
| Note: the value must be greater or equal to one. | |||
| It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_WORKERS'. | |||
| Note that the environmental variable MGB_COMP_GRAPH_OPT must be set to 'enable_sublinear_memory_opt=1' | |||
| in order for the above environmental variable to be effective. | |||
| """ | |||
| @@ -120,21 +120,21 @@ _io_op_types = {AssertEqual, CollectiveComm, RemoteSend, RemoteRecv} | |||
| class trace: | |||
| """ | |||
| Wraps a callable and provide: | |||
| """Wraps a callable and provide: | |||
| * tracing via :meth:`.trace` and :meth:`.dump` | |||
| * accelerated evalutaion via :meth:`.__call__` | |||
| :param function: the function will be traced. | |||
| :param symbolic: whether to apply symbolic execution for tracing. Default: False | |||
| :param capture_as_const: capture global vars or closures as const value. Default: False | |||
| :param sublinear_memory_config: configuration for sublinear memory optimization. | |||
| If not None, it enables sublinear memory optimization with given setting. | |||
| :param profiling: whether to profile compiled trace. Default: False | |||
| :param opt_level: optimization level for compiling trace. Default: 2 | |||
| :param graph_opt_config: configuration for graph optimization. Default: None | |||
| :param symbolic_shape: whether to use symbolic shape for tracing. Default: True | |||
| Args: | |||
| function: the function will be traced. | |||
| symbolic: whether to apply symbolic execution for tracing. Default: False | |||
| capture_as_const: capture global vars or closures as const value. Default: False | |||
| sublinear_memory_config: configuration for sublinear memory optimization. | |||
| If not None, it enables sublinear memory optimization with given setting. | |||
| profiling: whether to profile compiled trace. Default: False | |||
| opt_level: optimization level for compiling trace. Default: 2 | |||
| graph_opt_config: configuration for graph optimization. Default: None | |||
| symbolic_shape: whether to use symbolic shape for tracing. Default: True | |||
| """ | |||
| def __new__(cls, *args, **kwargs): | |||
| @@ -696,75 +696,74 @@ class trace: | |||
| enable_metadata: bool = True, | |||
| **kwargs | |||
| ): | |||
| r""" | |||
| Serializes trace to file system. | |||
| :param file: output file, could be file object or filename. | |||
| :param arg_names: names of the input tensors in the traced function. | |||
| :param output_names: names of the output tensors in the traced function, | |||
| use the default name if not specified. | |||
| :param append: whether output is appended to ``file``. | |||
| Only works when ``file`` is str. | |||
| :param keep_var_name: level for keeping variable names: | |||
| * 0: none of the names are kept | |||
| * 1: (default)keep names of output vars | |||
| * 2: keep names of all (output and internal) vars | |||
| :param keep_opr_name: whether to keep operator names. | |||
| :param keep_param_name: whether to keep param names, so param values can be | |||
| easily manipulated after loading model | |||
| :param keep_opr_priority: whether to keep priority setting for operators | |||
| :param strip_info_file: a string for path or a file handler. if is not None, | |||
| then the dump information for code strip would be written to ``strip_info_file`` | |||
| :param append_json: will be check when `strip_info_file` is not None. if set | |||
| true, the information for code strip will be append to strip_info_file. | |||
| if set false, will rewrite strip_info_file | |||
| :param optimize_for_inference: enbale optmizations, | |||
| will skip all optimize options if this is False. Default: True | |||
| :param user_info: any type object, which will be pickled to bytes. | |||
| :param enable_metadata: whether to save metadata into output file. | |||
| :Keyword Arguments: | |||
| * enable_io16xc32 -- | |||
| whether to use float16 for I/O between oprs and use | |||
| float32 as internal computation precision. Note the output var would be | |||
| changed to float16. | |||
| * enable_ioc16 -- | |||
| whether to use float16 for both I/O and computation | |||
| precision. | |||
| * enable_hwcd4 -- | |||
| whether to use NHWCD4 data layout. This is faster on some | |||
| OpenCL backend. | |||
| * enable_nchw88 -- | |||
| whether to use NCHW88 data layout, currently | |||
| used in X86 AVX backend. | |||
| * enable_nchw44 -- | |||
| whether to use NCHW44 data layout, currently | |||
| used in arm backend. | |||
| * enable_nchw44_dot -- | |||
| whether to use NCHW44_dot data layout, currently | |||
| used in armv8.2+dotprod backend. | |||
| * enable_nchw4 -- | |||
| whether to use NCHW4 data layout, currently | |||
| used in nvidia backend(based on cudnn). | |||
| * enable_nchw32 -- | |||
| whether to use NCHW32 data layout, currently | |||
| used in nvidia backend with tensorcore(based on cudnn). | |||
| * enable_chwn4 -- | |||
| whether to use CHWN4 data layout, currently | |||
| used in nvidia backend with tensorcore. | |||
| * enable_nchw64 -- | |||
| whether to use NCHW64 data layout, used for fast int4 | |||
| support on Nvidia GPU. | |||
| * enable_fuse_conv_bias_nonlinearity: whether to fuse conv+bias+nonlinearty | |||
| into one opr. | |||
| * enable_fuse_conv_bias_with_z: whether to fuse conv_bias with z | |||
| input for inference on nvidia backend(this optimization pass will | |||
| result in mismatch of the precision of output of training and | |||
| inference) | |||
| r"""Serializes trace to file system. | |||
| Args: | |||
| file: output file, could be file object or filename. | |||
| arg_names: names of the input tensors in the traced function. | |||
| output_names: names of the output tensors in the traced function, | |||
| use the default name if not specified. | |||
| append: whether output is appended to ``file``. | |||
| Only works when ``file`` is str. | |||
| keep_var_name: level for keeping variable names: | |||
| * 0: none of the names are kept | |||
| * 1: (default)keep names of output vars | |||
| * 2: keep names of all (output and internal) vars | |||
| keep_opr_name: whether to keep operator names. | |||
| keep_param_name: whether to keep param names, so param values can be | |||
| easily manipulated after loading model | |||
| keep_opr_priority: whether to keep priority setting for operators | |||
| strip_info_file: a string for path or a file handler. if is not None, | |||
| then the dump information for code strip would be written to ``strip_info_file`` | |||
| append_json: will be check when `strip_info_file` is not None. if set | |||
| true, the information for code strip will be append to strip_info_file. | |||
| if set false, will rewrite strip_info_file | |||
| optimize_for_inference: enbale optmizations, | |||
| will skip all optimize options if this is False. Default: True | |||
| user_info: any type object, which will be pickled to bytes. | |||
| enable_metadata: whether to save metadata into output file. | |||
| Keyword Arguments: | |||
| * enable_io16xc32 -- | |||
| whether to use float16 for I/O between oprs and use | |||
| float32 as internal computation precision. Note the output var would be | |||
| changed to float16. | |||
| * enable_ioc16 -- | |||
| whether to use float16 for both I/O and computation | |||
| precision. | |||
| * enable_hwcd4 -- | |||
| whether to use NHWCD4 data layout. This is faster on some | |||
| OpenCL backend. | |||
| * enable_nchw88 -- | |||
| whether to use NCHW88 data layout, currently | |||
| used in X86 AVX backend. | |||
| * enable_nchw44 -- | |||
| whether to use NCHW44 data layout, currently | |||
| used in arm backend. | |||
| * enable_nchw44_dot -- | |||
| whether to use NCHW44_dot data layout, currently | |||
| used in armv8.2+dotprod backend. | |||
| * enable_nchw4 -- | |||
| whether to use NCHW4 data layout, currently | |||
| used in nvidia backend(based on cudnn). | |||
| * enable_nchw32 -- | |||
| whether to use NCHW32 data layout, currently | |||
| used in nvidia backend with tensorcore(based on cudnn). | |||
| * enable_chwn4 -- | |||
| whether to use CHWN4 data layout, currently | |||
| used in nvidia backend with tensorcore. | |||
| * enable_nchw64 -- | |||
| whether to use NCHW64 data layout, used for fast int4 | |||
| support on Nvidia GPU. | |||
| * enable_fuse_conv_bias_nonlinearity: whether to fuse conv+bias+nonlinearty | |||
| into one opr. | |||
| * enable_fuse_conv_bias_with_z: whether to fuse conv_bias with z | |||
| input for inference on nvidia backend(this optimization pass will | |||
| result in mismatch of the precision of output of training and | |||
| inference) | |||
| """ | |||
| if not self._capture_as_const: | |||
| raise ValueError( | |||
| @@ -1033,10 +1032,10 @@ class trace: | |||
| ) | |||
| def get_profile(self): | |||
| """ | |||
| Get profiling result for compiled trace. | |||
| r"""Get profiling result for compiled trace. | |||
| :return: a json compatible object. | |||
| Return: | |||
| a json compatible object. | |||
| """ | |||
| if not self._profiler: | |||
| raise RuntimeError("trace is not set with profiling=True") | |||
| @@ -1050,9 +1049,7 @@ class trace: | |||
| class CompiledTensorProxy: | |||
| """ | |||
| Duck-typed RawTensor | |||
| """ | |||
| r"""Duck-typed RawTensor""" | |||
| def __init__(self, handle): | |||
| self.__handle = handle | |||
| @@ -17,14 +17,11 @@ _default_level = logging.getLevelName(_default_level_name.upper()) | |||
| def set_log_file(fout, mode="a"): | |||
| r""" | |||
| Sets log output file. | |||
| :type fout: str or file-like | |||
| :param fout: file-like object that supports write and flush, or string for | |||
| the filename | |||
| :type mode: str | |||
| :param mode: specify the mode to open log file if *fout* is a string | |||
| r"""Sets log output file. | |||
| Args: | |||
| fout: file-like object that supports write and flush, or string for the filename | |||
| mode: specify the mode to open log file if *fout* is a string | |||
| """ | |||
| if isinstance(fout, str): | |||
| fout = open(fout, mode) | |||
| @@ -39,45 +36,31 @@ class MegEngineLogFormatter(logging.Formatter): | |||
| max_lines = 256 | |||
| def _color_exc(self, msg): | |||
| r""" | |||
| Sets the color of message as the execution type. | |||
| """ | |||
| r"""Sets the color of message as the execution type.""" | |||
| return "\x1b[34m{}\x1b[0m".format(msg) | |||
| def _color_dbg(self, msg): | |||
| r""" | |||
| Sets the color of message as the debugging type. | |||
| """ | |||
| r"""Sets the color of message as the debugging type.""" | |||
| return "\x1b[36m{}\x1b[0m".format(msg) | |||
| def _color_warn(self, msg): | |||
| r""" | |||
| Sets the color of message as the warning type. | |||
| """ | |||
| r"""Sets the color of message as the warning type.""" | |||
| return "\x1b[1;31m{}\x1b[0m".format(msg) | |||
| def _color_err(self, msg): | |||
| r""" | |||
| Sets the color of message as the error type. | |||
| """ | |||
| r"""Sets the color of message as the error type.""" | |||
| return "\x1b[1;4;31m{}\x1b[0m".format(msg) | |||
| def _color_omitted(self, msg): | |||
| r""" | |||
| Sets the color of message as the omitted type. | |||
| """ | |||
| r"""Sets the color of message as the omitted type.""" | |||
| return "\x1b[35m{}\x1b[0m".format(msg) | |||
| def _color_normal(self, msg): | |||
| r""" | |||
| Sets the color of message as the normal type. | |||
| """ | |||
| r"""Sets the color of message as the normal type.""" | |||
| return msg | |||
| def _color_date(self, msg): | |||
| r""" | |||
| Sets the color of message the same as date. | |||
| """ | |||
| r"""Sets the color of message the same as date.""" | |||
| return "\x1b[32m{}\x1b[0m".format(msg) | |||
| def format(self, record): | |||
| @@ -150,9 +133,7 @@ class MegEngineLogFormatter(logging.Formatter): | |||
| def get_logger(name=None, formatter=MegEngineLogFormatter): | |||
| r""" | |||
| Gets megengine logger with given name. | |||
| """ | |||
| r"""Gets megengine logger with given name.""" | |||
| logger = logging.getLogger(name) | |||
| if getattr(logger, "_init_done__", None): | |||
| @@ -170,12 +151,11 @@ def get_logger(name=None, formatter=MegEngineLogFormatter): | |||
| def set_log_level(level, update_existing=True): | |||
| """ | |||
| Sets default logging level. | |||
| r"""Sets default logging level. | |||
| :type level: int e.g. logging.INFO | |||
| :param level: loggin level given by python :mod:`logging` module | |||
| :param update_existing: whether to update existing loggers | |||
| Args: | |||
| level: loggin level given by python :mod:`logging` module | |||
| update_existing: whether to update existing loggers | |||
| """ | |||
| global _default_level # pylint: disable=global-statement | |||
| _default_level = level | |||
| @@ -202,12 +182,13 @@ try: | |||
| _imperative_rt_logger.set_log_handler(_megbrain_logger) | |||
| def set_mgb_log_level(level): | |||
| r""" | |||
| Sets megbrain log level | |||
| r"""Sets megbrain log level | |||
| Args: | |||
| level: new log level | |||
| :type level: int e.g. logging.INFO | |||
| :param level: new log level | |||
| :return: original log level | |||
| Returns: | |||
| original log level | |||
| """ | |||
| _megbrain_logger.setLevel(level) | |||
| if level == logging.getLevelName("ERROR"): | |||
| @@ -235,11 +216,10 @@ except ImportError as exc: | |||
| @contextlib.contextmanager | |||
| def replace_mgb_log_level(level): | |||
| r""" | |||
| Replaces megbrain log level in a block and restore after exiting. | |||
| r"""Replaces megbrain log level in a block and restore after exiting. | |||
| :type level: int e.g. logging.INFO | |||
| :param level: new log level | |||
| Args: | |||
| level: new log level | |||
| """ | |||
| old = set_mgb_log_level(level) | |||
| try: | |||
| @@ -249,8 +229,6 @@ def replace_mgb_log_level(level): | |||
| def enable_debug_log(): | |||
| r""" | |||
| Sets logging level to debug for all components. | |||
| """ | |||
| r"""Sets logging level to debug for all components.""" | |||
| set_log_level(logging.DEBUG) | |||
| set_mgb_log_level(logging.DEBUG) | |||
| @@ -14,8 +14,7 @@ from .module import Module | |||
| class Softmax(Module): | |||
| r""" | |||
| Applies a softmax function. Softmax is defined as: | |||
| r"""Applies a softmax function. Softmax is defined as: | |||
| .. math:: | |||
| \text{Softmax}(x_{i}) = \frac{exp(x_i)}{\sum_j exp(x_j)} | |||
| @@ -23,29 +22,29 @@ class Softmax(Module): | |||
| It is applied to all elements along axis, and rescales elements so that | |||
| they stay in the range `[0, 1]` and sum to 1. | |||
| :param axis: Along which axis softmax will be applied. By default, | |||
| softmax will apply along the highest ranked axis. | |||
| Args: | |||
| axis: Along which axis softmax will be applied. By default, | |||
| softmax will apply along the highest ranked axis. | |||
| Examples: | |||
| .. testcode:: | |||
| .. testcode:: | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| data = mge.tensor(np.array([-2,-1,0,1,2]).astype(np.float32)) | |||
| softmax = M.Softmax() | |||
| output = softmax(data) | |||
| with np.printoptions(precision=6): | |||
| print(output.numpy()) | |||
| Outputs: | |||
| data = mge.tensor(np.array([-2,-1,0,1,2]).astype(np.float32)) | |||
| softmax = M.Softmax() | |||
| output = softmax(data) | |||
| with np.printoptions(precision=6): | |||
| print(output.numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| [0.011656 0.031685 0.086129 0.234122 0.636409] | |||
| .. testoutput:: | |||
| [0.011656 0.031685 0.086129 0.234122 0.636409] | |||
| """ | |||
| def __init__(self, axis=None, **kwargs): | |||
| @@ -60,32 +59,31 @@ class Softmax(Module): | |||
| class Sigmoid(Module): | |||
| r""" | |||
| Applies the element-wise function: | |||
| r"""Applies the element-wise function: | |||
| .. math:: | |||
| \text{Sigmoid}(x) = \frac{1}{1 + \exp(-x)} | |||
| Examples: | |||
| .. testcode:: | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| .. testcode:: | |||
| data = mge.tensor(np.array([-2,-1,0,1,2,]).astype(np.float32)) | |||
| sigmoid = M.Sigmoid() | |||
| output = sigmoid(data) | |||
| with np.printoptions(precision=6): | |||
| print(output.numpy()) | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| Outputs: | |||
| data = mge.tensor(np.array([-2,-1,0,1,2,]).astype(np.float32)) | |||
| sigmoid = M.Sigmoid() | |||
| output = sigmoid(data) | |||
| with np.printoptions(precision=6): | |||
| print(output.numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| [0.119203 0.268941 0.5 0.731059 0.880797] | |||
| .. testoutput:: | |||
| [0.119203 0.268941 0.5 0.731059 0.880797] | |||
| """ | |||
| def forward(self, inputs): | |||
| @@ -93,32 +91,31 @@ class Sigmoid(Module): | |||
| class SiLU(Module): | |||
| r""" | |||
| Applies the element-wise function: | |||
| r"""Applies the element-wise function: | |||
| .. math:: | |||
| \text{SiLU}(x) = \frac{x}{1 + \exp(-x)} | |||
| Examples: | |||
| .. testcode:: | |||
| .. testcode:: | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| data = mge.tensor(np.array([-2,-1,0,1,2,]).astype(np.float32)) | |||
| silu = M.SiLU() | |||
| output = silu(data) | |||
| with np.printoptions(precision=6): | |||
| print(output.numpy()) | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| Outputs: | |||
| data = mge.tensor(np.array([-2,-1,0,1,2,]).astype(np.float32)) | |||
| silu = M.SiLU() | |||
| output = silu(data) | |||
| with np.printoptions(precision=6): | |||
| print(output.numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| [-0.238406 -0.268941 0. 0.731059 1.761594] | |||
| .. testoutput:: | |||
| [-0.238406 -0.268941 0. 0.731059 1.761594] | |||
| """ | |||
| def forward(self, inputs): | |||
| @@ -126,8 +123,7 @@ class SiLU(Module): | |||
| class GELU(Module): | |||
| r""" | |||
| Applies the element-wise function: | |||
| r"""Applies the element-wise function: | |||
| .. math:: | |||
| \text{GELU}(x) = x\Phi(x) | |||
| @@ -136,24 +132,23 @@ class GELU(Module): | |||
| Examples: | |||
| .. testcode:: | |||
| .. testcode:: | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| data = mge.tensor(np.array([-2,-1,0,1,2,]).astype(np.float32)) | |||
| gelu = M.GELU() | |||
| output = gelu(data) | |||
| with np.printoptions(precision=4): | |||
| print(output.numpy()) | |||
| Outputs: | |||
| data = mge.tensor(np.array([-2,-1,0,1,2,]).astype(np.float32)) | |||
| gelu = M.GELU() | |||
| output = gelu(data) | |||
| with np.printoptions(precision=4): | |||
| print(output.numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| [-0.0455 -0.1587 0. 0.8413 1.9545] | |||
| .. testoutput:: | |||
| [-0.0455 -0.1587 0. 0.8413 1.9545] | |||
| """ | |||
| def forward(self, inputs): | |||
| @@ -161,31 +156,29 @@ class GELU(Module): | |||
| class ReLU(Module): | |||
| r""" | |||
| Applies the element-wise function: | |||
| r"""Applies the element-wise function: | |||
| .. math:: | |||
| \text{ReLU}(x) = \max(x, 0) | |||
| Examples: | |||
| .. testcode:: | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| data = mge.tensor(np.array([-2,-1,0,1,2,]).astype(np.float32)) | |||
| relu = M.ReLU() | |||
| output = relu(data) | |||
| with np.printoptions(precision=6): | |||
| print(output.numpy()) | |||
| .. testcode:: | |||
| Outputs: | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| data = mge.tensor(np.array([-2,-1,0,1,2,]).astype(np.float32)) | |||
| relu = M.ReLU() | |||
| output = relu(data) | |||
| with np.printoptions(precision=6): | |||
| print(output.numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| [0. 0. 0. 1. 2.] | |||
| .. testoutput:: | |||
| [0. 0. 0. 1. 2.] | |||
| """ | |||
| def forward(self, x): | |||
| @@ -193,8 +186,7 @@ class ReLU(Module): | |||
| class PReLU(Module): | |||
| r""" | |||
| Applies the element-wise function: | |||
| r"""Applies the element-wise function: | |||
| .. math:: | |||
| \text{PReLU}(x) = \max(0,x) + a * \min(0,x) | |||
| @@ -211,28 +203,28 @@ class PReLU(Module): | |||
| Here :math:`a` is a learnable parameter. When called without arguments, `PReLU()` uses | |||
| a single paramter :math:`a` across all input channel. If called with `PReLU(num_of_channels)`, each input channle will has it's own :math:`a`. | |||
| :param num_parameters: number of :math:`a` to learn, there is only two | |||
| values are legitimate: 1, or the number of channels at input. Default: 1 | |||
| :param init: the initial value of :math:`a`. Default: 0.25 | |||
| Args: | |||
| num_parameters: number of :math:`a` to learn, there is only two | |||
| values are legitimate: 1, or the number of channels at input. Default: 1 | |||
| init: the initial value of :math:`a`. Default: 0.25 | |||
| Examples: | |||
| .. testcode:: | |||
| .. testcode:: | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| data = mge.tensor(np.array([-1.2, -3.7, 2.7]).astype(np.float32)) | |||
| prelu = M.PReLU() | |||
| output = prelu(data) | |||
| print(output.numpy()) | |||
| Outputs: | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| data = mge.tensor(np.array([-1.2, -3.7, 2.7]).astype(np.float32)) | |||
| prelu = M.PReLU() | |||
| output = prelu(data) | |||
| print(output.numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| [-0.3 -0.925 2.7 ] | |||
| .. testoutput:: | |||
| [-0.3 -0.925 2.7 ] | |||
| """ | |||
| def __init__(self, num_parameters: int = 1, init: float = 0.25, **kwargs): | |||
| @@ -257,8 +249,7 @@ class PReLU(Module): | |||
| class LeakyReLU(Module): | |||
| r""" | |||
| Applies the element-wise function: | |||
| r"""Applies the element-wise function: | |||
| .. math:: | |||
| \text{LeakyReLU}(x) = \max(0,x) + negative\_slope \times \min(0,x) | |||
| @@ -274,23 +265,22 @@ class LeakyReLU(Module): | |||
| Examples: | |||
| .. testcode:: | |||
| .. testcode:: | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| data = mge.tensor(np.array([-8, -12, 6, 10]).astype(np.float32)) | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| data = mge.tensor(np.array([-8, -12, 6, 10]).astype(np.float32)) | |||
| leakyrelu = M.LeakyReLU(0.01) | |||
| output = leakyrelu(data) | |||
| print(output.numpy()) | |||
| Outputs: | |||
| leakyrelu = M.LeakyReLU(0.01) | |||
| output = leakyrelu(data) | |||
| print(output.numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| [-0.08 -0.12 6. 10. ] | |||
| .. testoutput:: | |||
| [-0.08 -0.12 6. 10. ] | |||
| """ | |||
| def __init__(self, negative_slope: float = 0.01, **kwargs): | |||
| @@ -25,8 +25,7 @@ class _AdaptivePoolNd(Module): | |||
| class AdaptiveMaxPool2d(_AdaptivePoolNd): | |||
| r""" | |||
| Applies a 2D max adaptive pooling over an input. | |||
| r"""Applies a 2D max adaptive pooling over an input. | |||
| For instance, given an input of the size :math:`(N, C, H, W)` and | |||
| an output shape :math:`(OH, OW)`, this layer generates the output of | |||
| @@ -40,29 +39,30 @@ class AdaptiveMaxPool2d(_AdaptivePoolNd): | |||
| \end{aligned} | |||
| ``kernel_size`` and ``stride`` can be inferred from input shape and out shape: | |||
| * padding: (0, 0) | |||
| * stride: (floor(IH / OH), floor(IW / OW)) | |||
| * kernel_size: (IH - (OH - 1) * stride_h, IW - (OW - 1) * stride_w) | |||
| Examples: | |||
| .. testcode:: | |||
| .. testcode:: | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| m = M.AdaptiveMaxPool2d((2, 2)) | |||
| inp = mge.tensor(np.arange(0, 16).astype("float32").reshape(1, 1, 4, 4)) | |||
| oup = m(inp) | |||
| print(oup.numpy()) | |||
| m = M.AdaptiveMaxPool2d((2, 2)) | |||
| inp = mge.tensor(np.arange(0, 16).astype("float32").reshape(1, 1, 4, 4)) | |||
| oup = m(inp) | |||
| print(oup.numpy()) | |||
| Outputs: | |||
| Outputs: | |||
| .. testoutput:: | |||
| .. testoutput:: | |||
| [[[[ 5. 7.] | |||
| [13. 15.]]]] | |||
| [[[[ 5. 7.] | |||
| [13. 15.]]]] | |||
| """ | |||
| @@ -71,8 +71,7 @@ class AdaptiveMaxPool2d(_AdaptivePoolNd): | |||
| class AdaptiveAvgPool2d(_AdaptivePoolNd): | |||
| r""" | |||
| Applies a 2D average pooling over an input. | |||
| r"""Applies a 2D average pooling over an input. | |||
| For instance, given an input of the size :math:`(N, C, H, W)` and | |||
| an output shape :math:`(OH, OW)`, this layer generates the output of | |||
| @@ -84,29 +83,30 @@ class AdaptiveAvgPool2d(_AdaptivePoolNd): | |||
| input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n) | |||
| ``kernel_size`` and ``stride`` can be inferred from input shape and out shape: | |||
| * padding: (0, 0) | |||
| * stride: (floor(IH / OH), floor(IW / OW)) | |||
| * kernel_size: (IH - (OH - 1) * stride_h, IW - (OW - 1) * stride_w) | |||
| Examples: | |||
| .. testcode:: | |||
| .. testcode:: | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| m = M.AdaptiveAvgPool2d((2, 2)) | |||
| inp = mge.tensor(np.arange(0, 16).astype("float32").reshape(1, 1, 4, 4)) | |||
| oup = m(inp) | |||
| print(oup.numpy()) | |||
| m = M.AdaptiveAvgPool2d((2, 2)) | |||
| inp = mge.tensor(np.arange(0, 16).astype("float32").reshape(1, 1, 4, 4)) | |||
| oup = m(inp) | |||
| print(oup.numpy()) | |||
| Outputs: | |||
| Outputs: | |||
| .. testoutput:: | |||
| .. testoutput:: | |||
| [[[[ 2.5 4.5] | |||
| [10.5 12.5]]]] | |||
| [[[[ 2.5 4.5] | |||
| [10.5 12.5]]]] | |||
| """ | |||
| @@ -14,9 +14,7 @@ from .module import Module | |||
| class BatchMatMulActivation(Module): | |||
| r""" | |||
| Batched :func:`~.matmul` with activation(only :func:`~.relu` supported), no transpose anywhere. | |||
| """ | |||
| r"""Batched :func:`~.matmul` with activation(only :func:`~.relu` supported), no transpose anywhere.""" | |||
| def __init__( | |||
| self, | |||
| @@ -141,37 +141,29 @@ class _BatchNorm(Module): | |||
| class SyncBatchNorm(_BatchNorm): | |||
| r""" | |||
| Applies Synchronized Batch Normalization for distributed training. | |||
| :type num_features: int | |||
| :param num_features: usually :math:`C` from an input of shape | |||
| :math:`(N, C, H, W)` or the highest ranked dimension of an input | |||
| less than 4D. | |||
| :type eps: float | |||
| :param eps: a value added to the denominator for numerical stability. | |||
| Default: 1e-5 | |||
| :type momentum: float | |||
| :param momentum: the value used for the ``running_mean`` and ``running_var`` computation. | |||
| Default: 0.9 | |||
| :type affine: bool | |||
| :param affine: a boolean value that when set to True, this module has | |||
| learnable affine parameters. Default: True | |||
| :type track_running_stats: bool | |||
| :param track_running_stats: when set to True, this module tracks the | |||
| running mean and variance. When set to False, this module does not | |||
| track such statistics and always uses batch statistics in both training | |||
| and eval modes. Default: True | |||
| :type freeze: bool | |||
| :param freeze: when set to True, this module does not update the | |||
| running mean and variance, and uses the running mean and variance instead of | |||
| the batch mean and batch variance to normalize the input. The parameter takes effect | |||
| only when the module is initilized with track_running_stats as True. | |||
| Default: False | |||
| :type group: :class:`~megengine.distributed.Group` | |||
| :param group: communication group, caculate mean and variance between this group. | |||
| Default: :obj:`~megengine.distributed.WORLD` | |||
| :return: output tensor. | |||
| r"""Applies Synchronized Batch Normalization for distributed training. | |||
| Args: | |||
| num_features: usually :math:`C` from an input of shape | |||
| :math:`(N, C, H, W)` or the highest ranked dimension of an input | |||
| less than 4D. | |||
| eps: a value added to the denominator for numerical stability. | |||
| Default: 1e-5 | |||
| momentum: the value used for the ``running_mean`` and ``running_var`` computation. | |||
| Default: 0.9 | |||
| affine: a boolean value that when set to True, this module has | |||
| learnable affine parameters. Default: True | |||
| track_running_stats: when set to True, this module tracks the | |||
| running mean and variance. When set to False, this module does not | |||
| track such statistics and always uses batch statistics in both training | |||
| and eval modes. Default: True | |||
| freeze: when set to True, this module does not update the | |||
| running mean and variance, and uses the running mean and variance instead of | |||
| the batch mean and batch variance to normalize the input. The parameter takes effect | |||
| only when the module is initilized with track_running_stats as True. | |||
| Default: False | |||
| group: communication group, caculate mean and variance between this group. | |||
| Default: :obj:`~.distributed.WORLD` | |||
| """ | |||
| def __init__( | |||
| @@ -249,8 +241,7 @@ class SyncBatchNorm(_BatchNorm): | |||
| class BatchNorm1d(_BatchNorm): | |||
| r""" | |||
| Applies Batch Normalization over a 2D/3D tensor. | |||
| r"""Applies Batch Normalization over a 2D/3D tensor. | |||
| Refer to :class:`~.BatchNorm2d` for more information. | |||
| """ | |||
| @@ -263,8 +254,7 @@ class BatchNorm1d(_BatchNorm): | |||
| class BatchNorm2d(_BatchNorm): | |||
| r""" | |||
| Applies Batch Normalization over a 4D tensor. | |||
| r"""Applies Batch Normalization over a 4D tensor. | |||
| .. math:: | |||
| @@ -287,56 +277,50 @@ class BatchNorm2d(_BatchNorm): | |||
| statistics on `(N, H, W)` slices, it's common terminology to call this | |||
| Spatial Batch Normalization. | |||
| :type num_features: int | |||
| :param num_features: usually :math:`C` from an input of shape | |||
| :math:`(N, C, H, W)` or the highest ranked dimension of an input | |||
| less than 4D. | |||
| :type eps: float | |||
| :param eps: a value added to the denominator for numerical stability. | |||
| Default: 1e-5 | |||
| :type momentum: float | |||
| :param momentum: the value used for the ``running_mean`` and ``running_var`` computation. | |||
| Default: 0.9 | |||
| :type affine: bool | |||
| :param affine: a boolean value that when set to True, this module has | |||
| learnable affine parameters. Default: True | |||
| :type track_running_stats: bool | |||
| :param track_running_stats: when set to True, this module tracks the | |||
| running mean and variance. When set to False, this module does not | |||
| track such statistics and always uses batch statistics in both training | |||
| and eval modes. Default: True | |||
| :type freeze: bool | |||
| :param freeze: when set to True, this module does not update the | |||
| running mean and variance, and uses the running mean and variance instead of | |||
| the batch mean and batch variance to normalize the input. The parameter takes effect | |||
| only when the module is initilized with track_running_stats as True. | |||
| Default: False | |||
| Args: | |||
| num_features: usually :math:`C` from an input of shape | |||
| :math:`(N, C, H, W)` or the highest ranked dimension of an input | |||
| less than 4D. | |||
| eps: a value added to the denominator for numerical stability. | |||
| Default: 1e-5 | |||
| momentum: the value used for the ``running_mean`` and ``running_var`` computation. | |||
| Default: 0.9 | |||
| affine: a boolean value that when set to True, this module has | |||
| learnable affine parameters. Default: True | |||
| track_running_stats: when set to True, this module tracks the | |||
| running mean and variance. When set to False, this module does not | |||
| track such statistics and always uses batch statistics in both training | |||
| and eval modes. Default: True | |||
| freeze: when set to True, this module does not update the | |||
| running mean and variance, and uses the running mean and variance instead of | |||
| the batch mean and batch variance to normalize the input. The parameter takes effect | |||
| only when the module is initilized with track_running_stats as True. | |||
| Default: False | |||
| Examples: | |||
| .. testcode:: | |||
| .. testcode:: | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| # With Learnable Parameters | |||
| m = M.BatchNorm2d(4) | |||
| inp = mge.tensor(np.random.rand(1, 4, 3, 3).astype("float32")) | |||
| oup = m(inp) | |||
| print(m.weight.numpy().flatten(), m.bias.numpy().flatten()) | |||
| # Without L`e`arnable Parameters | |||
| m = M.BatchNorm2d(4, affine=False) | |||
| oup = m(inp) | |||
| print(m.weight, m.bias) | |||
| # With Learnable Parameters | |||
| m = M.BatchNorm2d(4) | |||
| inp = mge.tensor(np.random.rand(1, 4, 3, 3).astype("float32")) | |||
| oup = m(inp) | |||
| print(m.weight.numpy().flatten(), m.bias.numpy().flatten()) | |||
| # Without L`e`arnable Parameters | |||
| m = M.BatchNorm2d(4, affine=False) | |||
| oup = m(inp) | |||
| print(m.weight, m.bias) | |||
| Outputs: | |||
| Outputs: | |||
| .. testoutput:: | |||
| .. testoutput:: | |||
| [1. 1. 1. 1.] [0. 0. 0. 0.] | |||
| None None | |||
| [1. 1. 1. 1.] [0. 0. 0. 0.] | |||
| None None | |||
| """ | |||
| def _check_input_ndim(self, inp): | |||
| @@ -13,8 +13,7 @@ from .module import Module | |||
| class Concat(Module): | |||
| r""" | |||
| A :class:`~.Module` to do functional :func:`~.concat`. Could be replaced with :class:`~.QATModule` | |||
| r"""A :class:`~.Module` to do functional :func:`~.concat`. Could be replaced with :class:`~.QATModule` | |||
| version :class:`~.qat.Concat` using :func:`~.quantize.quantize_qat`. | |||
| """ | |||
| @@ -97,8 +97,7 @@ class _ConvNd(Module): | |||
| class Conv1d(_ConvNd): | |||
| r""" | |||
| Applies a 1D convolution over an input tensor. | |||
| r"""Applies a 1D convolution over an input tensor. | |||
| For instance, given an input of the size :math:`(N, C_{\text{in}}, H)`, | |||
| this layer generates an output of the size | |||
| @@ -121,52 +120,49 @@ class Conv1d(_ConvNd): | |||
| a depthwise convolution with a depthwise multiplier `K`, can be constructed | |||
| by arguments :math:`(in\_channels=C_{in}, out\_channels=C_{in} \times K, ..., groups=C_{in})`. | |||
| :param in_channels: number of input channels. | |||
| :param out_channels: number of output channels. | |||
| :param kernel_size: size of weight on spatial dimensions. | |||
| :param stride: stride of the 1D convolution operation. | |||
| :param padding: size of the paddings added to the input on both sides of its | |||
| spatial dimensions. Only zero-padding is supported. Default: 0 | |||
| :param dilation: dilation of the 1D convolution operation. Default: 1 | |||
| :param groups: number of groups into which the input and output channels are divided, | |||
| so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
| ``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
| and there would be an extra dimension at the beginning of the weight's | |||
| shape. Default: 1 | |||
| :param bias: whether to add a bias onto the result of convolution. Default: | |||
| True | |||
| :param conv_mode: Supports `cross_correlation`. Default: | |||
| `cross_correlation` | |||
| :param compute_mode: When set to "default", no special requirements will be | |||
| placed on the precision of intermediate results. When set to "float32", | |||
| "float32" would be used for accumulator and intermediate result, but only | |||
| effective when input and output are of float16 dtype. | |||
| .. note:: | |||
| * ``weight`` usually has shape ``(out_channels, in_channels, kernel_size)`` , | |||
| if groups is not 1, shape will be ``(groups, out_channels // groups, in_channels // groups, kernel_size)`` | |||
| * ``bias`` usually has shape ``(1, out_channels, 1)`` | |||
| Args: | |||
| in_channels: number of input channels. | |||
| out_channels: number of output channels. | |||
| kernel_size: size of weight on spatial dimensions. | |||
| stride: stride of the 1D convolution operation. | |||
| padding: size of the paddings added to the input on both sides of its | |||
| spatial dimensions. Only zero-padding is supported. Default: 0 | |||
| dilation: dilation of the 1D convolution operation. Default: 1 | |||
| groups: number of groups into which the input and output channels are divided, | |||
| so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
| ``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
| and there would be an extra dimension at the beginning of the weight's | |||
| shape. Default: 1 | |||
| bias: whether to add a bias onto the result of convolution. Default: True | |||
| conv_mode: Supports `cross_correlation`. Default: `cross_correlation` | |||
| compute_mode: When set to "default", no special requirements will be | |||
| placed on the precision of intermediate results. When set to "float32", | |||
| "float32" would be used for accumulator and intermediate result, but only | |||
| effective when input and output are of float16 dtype. | |||
| Note: | |||
| * ``weight`` usually has shape ``(out_channels, in_channels, kernel_size)`` , | |||
| if groups is not 1, shape will be ``(groups, out_channels // groups, in_channels // groups, kernel_size)`` | |||
| * ``bias`` usually has shape ``(1, out_channels, 1)`` | |||
| Examples: | |||
| .. testcode:: | |||
| .. testcode:: | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| m = M.Conv1d(in_channels=3, out_channels=1, kernel_size=3) | |||
| inp = mge.tensor(np.arange(0, 24).astype("float32").reshape(2, 3, 4)) | |||
| oup = m(inp) | |||
| print(oup.numpy().shape) | |||
| m = M.Conv1d(in_channels=3, out_channels=1, kernel_size=3) | |||
| inp = mge.tensor(np.arange(0, 24).astype("float32").reshape(2, 3, 4)) | |||
| oup = m(inp) | |||
| print(oup.numpy().shape) | |||
| Outputs: | |||
| Outputs: | |||
| .. testoutput:: | |||
| (2, 1, 2) | |||
| .. testoutput:: | |||
| (2, 1, 2) | |||
| """ | |||
| def __init__( | |||
| @@ -245,8 +241,7 @@ class Conv1d(_ConvNd): | |||
| class Conv2d(_ConvNd): | |||
| r""" | |||
| Applies a 2D convolution over an input tensor. | |||
| r"""Applies a 2D convolution over an input tensor. | |||
| For instance, given an input of the size :math:`(N, C_{\text{in}}, H, W)`, | |||
| this layer generates an output of the size | |||
| @@ -284,54 +279,51 @@ class Conv2d(_ConvNd): | |||
| a depthwise convolution with a depthwise multiplier `K`, can be constructed | |||
| by arguments :math:`(in\_channels=C_{in}, out\_channels=C_{in} \times K, ..., groups=C_{in})`. | |||
| :param in_channels: number of input channels. | |||
| :param out_channels: number of output channels. | |||
| :param kernel_size: size of weight on spatial dimensions. If kernel_size is | |||
| an :class:`int`, the actual kernel size would be | |||
| ``(kernel_size, kernel_size)``. | |||
| :param stride: stride of the 2D convolution operation. Default: 1 | |||
| :param padding: size of the paddings added to the input on both sides of its | |||
| spatial dimensions. Only zero-padding is supported. Default: 0 | |||
| :param dilation: dilation of the 2D convolution operation. Default: 1 | |||
| :param groups: number of groups into which the input and output channels are divided, | |||
| so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
| ``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
| and there would be an extra dimension at the beginning of the weight's | |||
| shape. Default: 1 | |||
| :param bias: whether to add a bias onto the result of convolution. Default: | |||
| True | |||
| :param conv_mode: Supports `cross_correlation`. Default: | |||
| `cross_correlation` | |||
| :param compute_mode: When set to "default", no special requirements will be | |||
| placed on the precision of intermediate results. When set to "float32", | |||
| "float32" would be used for accumulator and intermediate result, but only | |||
| effective when input and output are of float16 dtype. | |||
| .. note:: | |||
| * ``weight`` usually has shape ``(out_channels, in_channels, height, width)`` , | |||
| if groups is not 1, shape will be ``(groups, out_channels // groups, in_channels // groups, height, width)`` | |||
| * ``bias`` usually has shape ``(1, out_channels, *1)`` | |||
| Args: | |||
| in_channels: number of input channels. | |||
| out_channels: number of output channels. | |||
| kernel_size: size of weight on spatial dimensions. If kernel_size is | |||
| an :class:`int`, the actual kernel size would be | |||
| ``(kernel_size, kernel_size)``. | |||
| stride: stride of the 2D convolution operation. Default: 1 | |||
| padding: size of the paddings added to the input on both sides of its | |||
| spatial dimensions. Only zero-padding is supported. Default: 0 | |||
| dilation: dilation of the 2D convolution operation. Default: 1 | |||
| groups: number of groups into which the input and output channels are divided, | |||
| so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
| ``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
| and there would be an extra dimension at the beginning of the weight's | |||
| shape. Default: 1 | |||
| bias: whether to add a bias onto the result of convolution. Default: True | |||
| conv_mode: Supports `cross_correlation`. Default: `cross_correlation` | |||
| compute_mode: When set to "default", no special requirements will be | |||
| placed on the precision of intermediate results. When set to "float32", | |||
| "float32" would be used for accumulator and intermediate result, but only | |||
| effective when input and output are of float16 dtype. | |||
| Note: | |||
| * ``weight`` usually has shape ``(out_channels, in_channels, height, width)`` , | |||
| if groups is not 1, shape will be ``(groups, out_channels // groups, in_channels // groups, height, width)`` | |||
| * ``bias`` usually has shape ``(1, out_channels, *1)`` | |||
| Examples: | |||
| .. testcode:: | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| .. testcode:: | |||
| m = M.Conv2d(in_channels=3, out_channels=1, kernel_size=3) | |||
| inp = mge.tensor(np.arange(0, 96).astype("float32").reshape(2, 3, 4, 4)) | |||
| oup = m(inp) | |||
| print(oup.numpy().shape) | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| Outputs: | |||
| m = M.Conv2d(in_channels=3, out_channels=1, kernel_size=3) | |||
| inp = mge.tensor(np.arange(0, 96).astype("float32").reshape(2, 3, 4, 4)) | |||
| oup = m(inp) | |||
| print(oup.numpy().shape) | |||
| .. testoutput:: | |||
| Outputs: | |||
| (2, 1, 2, 2) | |||
| .. testoutput:: | |||
| (2, 1, 2, 2) | |||
| """ | |||
| def __init__( | |||
| @@ -411,8 +403,7 @@ class Conv2d(_ConvNd): | |||
| class Conv3d(_ConvNd): | |||
| r""" | |||
| Applies a 3D convolution over an input tensor. | |||
| r"""Applies a 3D convolution over an input tensor. | |||
| For instance, given an input of the size :math:`(N, C_{\text{in}}, T, H, W)`, | |||
| this layer generates an output of the size | |||
| @@ -434,50 +425,47 @@ class Conv3d(_ConvNd): | |||
| a depthwise convolution with a depthwise multiplier `K`, can be constructed | |||
| by arguments :math:`(in\_channels=C_{in}, out\_channels=C_{in} \times K, ..., groups=C_{in})`. | |||
| :param in_channels: number of input channels. | |||
| :param out_channels: number of output channels. | |||
| :param kernel_size: size of weight on spatial dimensions. If kernel_size is | |||
| an :class:`int`, the actual kernel size would be | |||
| `(kernel_size, kernel_size, kernel_size)`. | |||
| :param stride: stride of the 3D convolution operation. Default: 1 | |||
| :param padding: size of the paddings added to the input on both sides of its | |||
| spatial dimensions. Only zero-padding is supported. Default: 0 | |||
| :param dilation: dilation of the 3D convolution operation. Default: 1 | |||
| :param groups: number of groups into which the input and output channels are divided, | |||
| so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
| ``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
| and there would be an extra dimension at the beginning of the weight's | |||
| shape. Default: 1 | |||
| :param bias: whether to add a bias onto the result of convolution. Default: | |||
| True | |||
| :param conv_mode: Supports `cross_correlation`. Default: | |||
| `cross_correlation` | |||
| .. note:: | |||
| * ``weight`` usually has shape ``(out_channels, in_channels, depth, height, width)`` , | |||
| if groups is not 1, shape will be ``(groups, out_channels // groups, in_channels // groups, depth, height, width)`` | |||
| * ``bias`` usually has shape ``(1, out_channels, *1)`` | |||
| Args: | |||
| in_channels: number of input channels. | |||
| out_channels: number of output channels. | |||
| kernel_size: size of weight on spatial dimensions. If kernel_size is | |||
| an :class:`int`, the actual kernel size would be | |||
| `(kernel_size, kernel_size, kernel_size)`. | |||
| stride: stride of the 3D convolution operation. Default: 1 | |||
| padding: size of the paddings added to the input on both sides of its | |||
| spatial dimensions. Only zero-padding is supported. Default: 0 | |||
| dilation: dilation of the 3D convolution operation. Default: 1 | |||
| groups: number of groups into which the input and output channels are divided, | |||
| so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
| ``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
| and there would be an extra dimension at the beginning of the weight's | |||
| shape. Default: 1 | |||
| bias: whether to add a bias onto the result of convolution. Default: True | |||
| conv_mode: Supports `cross_correlation`. Default: `cross_correlation` | |||
| Note: | |||
| * ``weight`` usually has shape ``(out_channels, in_channels, depth, height, width)`` , | |||
| if groups is not 1, shape will be ``(groups, out_channels // groups, in_channels // groups, depth, height, width)`` | |||
| * ``bias`` usually has shape ``(1, out_channels, *1)`` | |||
| Examples: | |||
| .. testcode:: | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| .. testcode:: | |||
| m = M.Conv3d(in_channels=3, out_channels=1, kernel_size=3) | |||
| inp = mge.tensor(np.arange(0, 384).astype("float32").reshape(2, 3, 4, 4, 4)) | |||
| oup = m(inp) | |||
| print(oup.numpy().shape) | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| Outputs: | |||
| m = M.Conv3d(in_channels=3, out_channels=1, kernel_size=3) | |||
| inp = mge.tensor(np.arange(0, 384).astype("float32").reshape(2, 3, 4, 4, 4)) | |||
| oup = m(inp) | |||
| print(oup.numpy().shape) | |||
| .. testoutput:: | |||
| Outputs: | |||
| (2, 1, 2, 2, 2) | |||
| .. testoutput:: | |||
| (2, 1, 2, 2, 2) | |||
| """ | |||
| def __init__( | |||
| @@ -551,8 +539,7 @@ class Conv3d(_ConvNd): | |||
| class ConvTranspose2d(_ConvNd): | |||
| r""" | |||
| Applies a 2D transposed convolution over an input tensor. | |||
| r"""Applies a 2D transposed convolution over an input tensor. | |||
| This module is also known as a deconvolution or a fractionally-strided convolution. | |||
| :class:`ConvTranspose2d` can be seen as the gradient of :class:`Conv2d` operation | |||
| @@ -562,35 +549,32 @@ class ConvTranspose2d(_ConvNd): | |||
| the opposite way, transforming a smaller input to a larger output while preserving the | |||
| connectivity pattern. | |||
| :param in_channels: number of input channels. | |||
| :param out_channels: number of output channels. | |||
| :param kernel_size: size of weight on spatial dimensions. If ``kernel_size`` is | |||
| an :class:`int`, the actual kernel size would be | |||
| ``(kernel_size, kernel_size)``. | |||
| :param stride: stride of the 2D convolution operation. Default: 1 | |||
| :param padding: size of the paddings added to the input on both sides of its | |||
| spatial dimensions. Only zero-padding is supported. Default: 0 | |||
| :param dilation: dilation of the 2D convolution operation. Default: 1 | |||
| :param groups: number of groups into which the input and output channels are divided, | |||
| so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
| ``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
| and there would be an extra dimension at the beginning of the weight's | |||
| shape. Default: 1 | |||
| :param bias: wether to add a bias onto the result of convolution. Default: | |||
| True | |||
| :param conv_mode: Supports `cross_correlation`. Default: | |||
| `cross_correlation` | |||
| :param compute_mode: When set to "default", no special requirements will be | |||
| placed on the precision of intermediate results. When set to "float32", | |||
| "float32" would be used for accumulator and intermediate result, but only | |||
| effective when input and output are of float16 dtype. | |||
| .. note:: | |||
| * ``weight`` usually has shape ``(in_channels, out_channels, height, width)`` , | |||
| if groups is not 1, shape will be ``(groups, in_channels // groups, out_channels // groups, height, width)`` | |||
| * ``bias`` usually has shape ``(1, out_channels, *1)`` | |||
| Args: | |||
| in_channels: number of input channels. | |||
| out_channels: number of output channels. | |||
| kernel_size: size of weight on spatial dimensions. If ``kernel_size`` is | |||
| an :class:`int`, the actual kernel size would be | |||
| ``(kernel_size, kernel_size)``. | |||
| stride: stride of the 2D convolution operation. Default: 1 | |||
| padding: size of the paddings added to the input on both sides of its | |||
| spatial dimensions. Only zero-padding is supported. Default: 0 | |||
| dilation: dilation of the 2D convolution operation. Default: 1 | |||
| groups: number of groups into which the input and output channels are divided, | |||
| so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
| ``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
| and there would be an extra dimension at the beginning of the weight's | |||
| shape. Default: 1 | |||
| bias: wether to add a bias onto the result of convolution. Default: True | |||
| conv_mode: Supports `cross_correlation`. Default: `cross_correlation` | |||
| compute_mode: When set to "default", no special requirements will be | |||
| placed on the precision of intermediate results. When set to "float32", | |||
| "float32" would be used for accumulator and intermediate result, but only | |||
| effective when input and output are of float16 dtype. | |||
| Note: | |||
| * ``weight`` usually has shape ``(in_channels, out_channels, height, width)`` , | |||
| if groups is not 1, shape will be ``(groups, in_channels // groups, out_channels // groups, height, width)`` | |||
| * ``bias`` usually has shape ``(1, out_channels, *1)`` | |||
| """ | |||
| def __init__( | |||
| @@ -669,30 +653,28 @@ class ConvTranspose2d(_ConvNd): | |||
| class LocalConv2d(Conv2d): | |||
| r""" | |||
| Applies a spatial convolution with untied kernels over an groupped channeled input 4D tensor. | |||
| r"""Applies a spatial convolution with untied kernels over an groupped channeled input 4D tensor. | |||
| It is also known as the locally connected layer. | |||
| :param in_channels: number of input channels. | |||
| :param out_channels: number of output channels. | |||
| :param input_height: the height of the input images. | |||
| :param input_width: the width of the input images. | |||
| :param kernel_size: size of weight on spatial dimensions. If kernel_size is | |||
| an :class:`int`, the actual kernel size would be | |||
| ``(kernel_size, kernel_size)``. | |||
| :param stride: stride of the 2D convolution operation. Default: 1 | |||
| :param padding: size of the paddings added to the input on both sides of its | |||
| spatial dimensions. Only zero-padding is supported. Default: 0 | |||
| :param groups: number of groups into which the input and output channels are divided, | |||
| so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
| ``in_channels`` and ``out_channels`` must be divisible by ``groups``. Default: 1 | |||
| .. note:: | |||
| * ``weight`` usually has shape ``(out_height, out_width, in_channels, height, width, in_channels)`` , | |||
| if groups is not 1, shape will be ``(groups, out_height, out_width, in_channels // groups, height, width, out_channels // groups)`` | |||
| * ``bias`` usually has shape ``(1, out_channels, *1)`` | |||
| Args: | |||
| in_channels: number of input channels. | |||
| out_channels: number of output channels. | |||
| input_height: the height of the input images. | |||
| input_width: the width of the input images. | |||
| kernel_size: size of weight on spatial dimensions. If kernel_size is | |||
| an :class:`int`, the actual kernel size would be | |||
| ``(kernel_size, kernel_size)``. | |||
| stride: stride of the 2D convolution operation. Default: 1 | |||
| padding: size of the paddings added to the input on both sides of its | |||
| spatial dimensions. Only zero-padding is supported. Default: 0 | |||
| groups: number of groups into which the input and output channels are divided, | |||
| so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
| ``in_channels`` and ``out_channels`` must be divisible by ``groups``. Default: 1 | |||
| Note: | |||
| * ``weight`` usually has shape ``(out_height, out_width, in_channels, height, width, in_channels)`` , | |||
| if groups is not 1, shape will be ``(groups, out_height, out_width, in_channels // groups, height, width, out_channels // groups)`` | |||
| * ``bias`` usually has shape ``(1, out_channels, *1)`` | |||
| """ | |||
| def __init__( | |||
| @@ -755,8 +737,7 @@ class LocalConv2d(Conv2d): | |||
| class ConvRelu2d(Conv2d): | |||
| r""" | |||
| A fused :class:`~.Module` including :class:`~.module.Conv2d` and :func:`~.relu`. | |||
| r"""A fused :class:`~.Module` including :class:`~.module.Conv2d` and :func:`~.relu`. | |||
| Could be replaced with :class:`~.QATModule` version :class:`~.qat.ConvRelu2d` using :func:`~.quantize.quantize_qat`. | |||
| """ | |||
| @@ -765,38 +746,34 @@ class ConvRelu2d(Conv2d): | |||
| class DeformableConv2d(_ConvNd): | |||
| """ | |||
| Deformable Convolution. | |||
| :param in_channels: number of input channels. | |||
| :param out_channels: number of output channels. | |||
| :param kernel_size: size of weight on spatial dimensions. If kernel_size is | |||
| an :class:`int`, the actual kernel size would be | |||
| ``(kernel_size, kernel_size)``. | |||
| :param stride: stride of the 2D convolution operation. Default: 1 | |||
| :param padding: size of the paddings added to the input on both sides of its | |||
| spatial dimensions. Only zero-padding is supported. Default: 0 | |||
| :param dilation: dilation of the 2D convolution operation. Default: 1 | |||
| :param groups: number of groups into which the input and output channels are divided, | |||
| so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
| ``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
| and there would be an extra dimension at the beginning of the weight's | |||
| shape. Default: 1 | |||
| :param bias: whether to add a bias onto the result of convolution. Default: | |||
| True | |||
| :param conv_mode: Supports `cross_correlation`. Default: | |||
| `cross_correlation` | |||
| :param compute_mode: When set to "default", no special requirements will be | |||
| placed on the precision of intermediate results. When set to "float32", | |||
| "float32" would be used for accumulator and intermediate result, but only | |||
| effective when input and output are of float16 dtype. | |||
| .. note:: | |||
| * ``weight`` usually has shape ``(out_channels, in_channels, height, width)`` , | |||
| if groups is not 1, shape will be ``(groups, out_channels // groups, in_channels // groups, height, width)`` | |||
| * ``bias`` usually has shape ``(1, out_channels, *1)`` | |||
| r"""Deformable Convolution. | |||
| Args: | |||
| in_channels: number of input channels. | |||
| out_channels: number of output channels. | |||
| kernel_size: size of weight on spatial dimensions. If kernel_size is | |||
| an :class:`int`, the actual kernel size would be | |||
| ``(kernel_size, kernel_size)``. | |||
| stride: stride of the 2D convolution operation. Default: 1 | |||
| padding: size of the paddings added to the input on both sides of its | |||
| spatial dimensions. Only zero-padding is supported. Default: 0 | |||
| dilation: dilation of the 2D convolution operation. Default: 1 | |||
| groups: number of groups into which the input and output channels are divided, | |||
| so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
| ``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
| and there would be an extra dimension at the beginning of the weight's | |||
| shape. Default: 1 | |||
| bias: whether to add a bias onto the result of convolution. Default: True | |||
| conv_mode: Supports `cross_correlation`. Default: `cross_correlation` | |||
| compute_mode: When set to "default", no special requirements will be | |||
| placed on the precision of intermediate results. When set to "float32", | |||
| "float32" would be used for accumulator and intermediate result, but only | |||
| effective when input and output are of float16 dtype. | |||
| Note: | |||
| * ``weight`` usually has shape ``(out_channels, in_channels, height, width)`` , | |||
| if groups is not 1, shape will be ``(groups, out_channels // groups, in_channels // groups, height, width)`` | |||
| * ``bias`` usually has shape ``(1, out_channels, *1)`` | |||
| """ | |||
| def __init__( | |||
| @@ -877,8 +854,7 @@ class DeformableConv2d(_ConvNd): | |||
| class ConvTranspose3d(_ConvNd): | |||
| r""" | |||
| Applies a 3D transposed convolution over an input tensor. | |||
| r"""Applies a 3D transposed convolution over an input tensor. | |||
| Only support the case that groups = 1 and conv_mode = "cross_correlation". | |||
| @@ -889,23 +865,21 @@ class ConvTranspose3d(_ConvNd): | |||
| works the opposite way, transforming a smaller input to a larger output while | |||
| preserving the connectivity pattern. | |||
| :param in_channels: number of input channels. | |||
| :param out_channels: number of output channels. | |||
| :param kernel_size: size of weight on spatial dimensions. If ``kernel_size`` is | |||
| an :class:`int`, the actual kernel size would be | |||
| ``(kernel_size, kernel_size, kernel_size)``. | |||
| :param stride: stride of the 3D convolution operation. Default: 1 | |||
| :param padding: size of the paddings added to the input on all sides of its | |||
| spatial dimensions. Only zero-padding is supported. Default: 0 | |||
| :param dilation: dilation of the 3D convolution operation. Default: 1 | |||
| :param bias: wether to add a bias onto the result of convolution. Default: | |||
| True | |||
| .. note:: | |||
| * ``weight`` usually has shape ``(in_channels, out_channels, depth, height, width)`` . | |||
| * ``bias`` usually has shape ``(1, out_channels, *1)`` | |||
| Args: | |||
| in_channels: number of input channels. | |||
| out_channels: number of output channels. | |||
| kernel_size: size of weight on spatial dimensions. If ``kernel_size`` is | |||
| an :class:`int`, the actual kernel size would be | |||
| ``(kernel_size, kernel_size, kernel_size)``. | |||
| stride: stride of the 3D convolution operation. Default: 1 | |||
| padding: size of the paddings added to the input on all sides of its | |||
| spatial dimensions. Only zero-padding is supported. Default: 0 | |||
| dilation: dilation of the 3D convolution operation. Default: 1 | |||
| bias: wether to add a bias onto the result of convolution. Default: True | |||
| Note: | |||
| * ``weight`` usually has shape ``(in_channels, out_channels, depth, height, width)`` . | |||
| * ``bias`` usually has shape ``(1, out_channels, *1)`` | |||
| """ | |||
| def __init__( | |||
| @@ -50,8 +50,7 @@ class _ConvBnActivation2d(Module): | |||
| class ConvBn2d(_ConvBnActivation2d): | |||
| r""" | |||
| A fused :class:`~.Module` including :class:`~.module.Conv2d` and :class:`~.module.BatchNorm2d`. | |||
| r"""A fused :class:`~.Module` including :class:`~.module.Conv2d` and :class:`~.module.BatchNorm2d`. | |||
| Could be replaced with :class:`~.QATModule` version :class:`~.qat.ConvBn2d` using | |||
| :func:`~.quantize.quantize_qat`. | |||
| """ | |||
| @@ -61,8 +60,7 @@ class ConvBn2d(_ConvBnActivation2d): | |||
| class ConvBnRelu2d(_ConvBnActivation2d): | |||
| r""" | |||
| A fused :class:`~.Module` including :class:`~.module.Conv2d`, :class:`~.module.BatchNorm2d` and :func:`~.relu`. | |||
| r"""A fused :class:`~.Module` including :class:`~.module.Conv2d`, :class:`~.module.BatchNorm2d` and :func:`~.relu`. | |||
| Could be replaced with :class:`~.QATModule` version :class:`~.qat.ConvBnRelu2d` using :func:`~.quantize.quantize_qat`. | |||
| """ | |||
| @@ -11,13 +11,13 @@ from .module import Module | |||
| class Dropout(Module): | |||
| r""" | |||
| Randomly sets input elements to zeros with the probability :math:`drop\_prob` during training. | |||
| r"""Randomly sets input elements to zeros with the probability :math:`drop\_prob` during training. | |||
| Commonly used in large networks to prevent overfitting. | |||
| Note that we perform dropout only during training, we also rescale(multiply) the output tensor | |||
| by :math:`\frac{1}{1 - drop\_prob}`. During inference :class:`~.Dropout` is equal to :class:`~.Identity`. | |||
| :param drop_prob: The probability to drop (set to zero) each single element | |||
| Args: | |||
| drop_prob: The probability to drop (set to zero) each single element | |||
| """ | |||
| def __init__(self, drop_prob=0.0, **kwargs): | |||
| @@ -11,67 +11,12 @@ from .module import Module | |||
| class Elemwise(Module): | |||
| r""" | |||
| A :class:`~.Module` to do :mod:`~.functional.elemwise` operator. Could be replaced with :class:`~.QATModule` | |||
| r"""A :class:`~.Module` to do :mod:`~.functional.elemwise` operator. Could be replaced with :class:`~.QATModule` | |||
| version :class:`~.qat.Elemwise` using :func:`~.quantize.quantize_qat`. | |||
| :param method: the elemwise method, support the following string. | |||
| It will do the normal elemwise operator for float. | |||
| * "add": a + b | |||
| * "fuse_add_relu": max(x+y, 0) | |||
| * "mul": x * y | |||
| * "min": min(x, y) | |||
| * "max": max(x, y) | |||
| * "sub": x - y | |||
| * "true_div": x / y | |||
| * "fuse_add_sigmoid": sigmoid(x + y) | |||
| * "fuse_add_tanh": tanh(x + y) | |||
| * "relu": x > 0 ? x : 0 | |||
| * "silu": silu(x) | |||
| * "gelu": gelu(x) | |||
| * "abs": x > 0 ? x : -x | |||
| * "sigmoid": sigmoid(x) | |||
| * "exp": exp(x) | |||
| * "tanh": tanh(x) | |||
| * "fuse_mul_add3": x * y + z | |||
| * "fast_tanh": x * (27. + x * x) / (27. + 9. * x * x) | |||
| * "negate": -x | |||
| * "acos": acos(x) | |||
| * "asin": asin(x) | |||
| * "ceil": ceil(x) | |||
| * "cos": cos(x) | |||
| * "expm1": expm1(x) | |||
| * "floor": floor(x) | |||
| * "log": log(x) | |||
| * "log1p": log1p(x) | |||
| * "sin": sin(x) | |||
| * "round": round(x) | |||
| * "erf": erf(x) | |||
| * "erfinv": erfinv(x) | |||
| * "erfc": erfc(x) | |||
| * "erfcinv": erfcinv(x) | |||
| * "abs_grad": abs_grad | |||
| * "floor_div": floor_div | |||
| * "mod": mod | |||
| * "sigmoid_grad": sigmoid_grad | |||
| * "switch_gt0": switch_gt0 | |||
| * "tanh_grad": tanh_grad | |||
| * "lt": less | |||
| * "leq": leq | |||
| * "eq": equal | |||
| * "pow": pow | |||
| * "log_sum_exp": log_sum_exp | |||
| * "fast_tanh_grad": fast_tanh_grad | |||
| * "atan2": atan2 | |||
| * "cond_leq_mov": cond_leq_mov | |||
| * "h_swish": h_swish | |||
| * "fuse_add_h_swish": h_swish(x+y) | |||
| * "h_swish_grad": h_swish_grad | |||
| * "and": bool binary: x && y | |||
| * "or": bool binary: x || y | |||
| * "xor": bool binary: x ^ y | |||
| * "not": bool unary: ~x | |||
| Args: | |||
| method: the elemwise method, support the following string. | |||
| It will do the normal elemwise operator for float. | |||
| """ | |||
| def __init__(self, method, **kwargs): | |||
| @@ -17,42 +17,41 @@ from .module import Module | |||
| class Embedding(Module): | |||
| r""" | |||
| A simple lookup table that stores embeddings of a fixed dictionary and size. | |||
| r"""A simple lookup table that stores embeddings of a fixed dictionary and size. | |||
| This module is often used to store word embeddings and retrieve them using indices. | |||
| The input to the module is a list of indices, and the output is the corresponding word embeddings. | |||
| The indices should less than num_embeddings. | |||
| :param num_embeddings: size of embedding dictionary. | |||
| :param embedding_dim: size of each embedding vector. | |||
| :param padding_idx: should be set to None, not supportted now. | |||
| :param max_norm: should be set to None, not supportted now. | |||
| :param norm_type: should be set to None, not supportted now. | |||
| :param initial_weight: the learnable weights of the module of shape (num_embeddings, embedding_dim). | |||
| Args: | |||
| num_embeddings: size of embedding dictionary. | |||
| embedding_dim: size of each embedding vector. | |||
| padding_idx: should be set to None, not supportted now. | |||
| max_norm: should be set to None, not supportted now. | |||
| norm_type: should be set to None, not supportted now. | |||
| initial_weight: the learnable weights of the module of shape (num_embeddings, embedding_dim). | |||
| Examples: | |||
| .. testcode:: | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| weight = mge.tensor(np.array([(1.2,2.3,3.4,4.5,5.6)], dtype=np.float32)) | |||
| data = mge.tensor(np.array([(0,0)], dtype=np.int32)) | |||
| .. testcode:: | |||
| embedding = M.Embedding(1, 5, initial_weight=weight) | |||
| output = embedding(data) | |||
| with np.printoptions(precision=6): | |||
| print(output.numpy()) | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| weight = mge.tensor(np.array([(1.2,2.3,3.4,4.5,5.6)], dtype=np.float32)) | |||
| data = mge.tensor(np.array([(0,0)], dtype=np.int32)) | |||
| Outputs: | |||
| embedding = M.Embedding(1, 5, initial_weight=weight) | |||
| output = embedding(data) | |||
| with np.printoptions(precision=6): | |||
| print(output.numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| [[[1.2 2.3 3.4 4.5 5.6] | |||
| [1.2 2.3 3.4 4.5 5.6]]] | |||
| .. testoutput:: | |||
| [[[1.2 2.3 3.4 4.5 5.6] | |||
| [1.2 2.3 3.4 4.5 5.6]]] | |||
| """ | |||
| def __init__( | |||
| @@ -110,36 +109,35 @@ class Embedding(Module): | |||
| max_norm: Optional[float] = None, | |||
| norm_type: Optional[float] = None, | |||
| ): | |||
| r""" | |||
| Creates Embedding instance from given 2-dimensional FloatTensor. | |||
| r"""Creates Embedding instance from given 2-dimensional FloatTensor. | |||
| :param embeddings: tensor contained weight for the embedding. | |||
| :param freeze: if ``True``, the weight does not get updated during the learning process. Default: True. | |||
| :param padding_idx: should be set to None, not support Now. | |||
| :param max_norm: should be set to None, not support Now. | |||
| :param norm_type: should be set to None, not support Now. | |||
| Args: | |||
| embeddings: tensor contained weight for the embedding. | |||
| freeze: if ``True``, the weight does not get updated during the learning process. Default: True. | |||
| padding_idx: should be set to None, not support Now. | |||
| max_norm: should be set to None, not support Now. | |||
| norm_type: should be set to None, not support Now. | |||
| Examples: | |||
| .. testcode:: | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| weight = mge.tensor(np.array([(1.2,2.3,3.4,4.5,5.6)], dtype=np.float32)) | |||
| data = mge.tensor(np.array([(0,0)], dtype=np.int32)) | |||
| .. testcode:: | |||
| embedding = M.Embedding.from_pretrained(weight, freeze=False) | |||
| output = embedding(data) | |||
| print(output.numpy()) | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| weight = mge.tensor(np.array([(1.2,2.3,3.4,4.5,5.6)], dtype=np.float32)) | |||
| data = mge.tensor(np.array([(0,0)], dtype=np.int32)) | |||
| Outputs: | |||
| embedding = M.Embedding.from_pretrained(weight, freeze=False) | |||
| output = embedding(data) | |||
| print(output.numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| [[[1.2 2.3 3.4 4.5 5.6] | |||
| [1.2 2.3 3.4 4.5 5.6]]] | |||
| .. testoutput:: | |||
| [[[1.2 2.3 3.4 4.5 5.6] | |||
| [1.2 2.3 3.4 4.5 5.6]]] | |||
| """ | |||
| embeddings_shape = embeddings.shape | |||
| embeddings_dim = len(embeddings_shape) | |||
| @@ -19,7 +19,7 @@ from .module import Module | |||
| class TensorrtRuntimeSubgraph(Module): | |||
| r"""Load a serialized TensorrtRuntime subgraph. | |||
| See :func:`~.tensorrt_runtime_opr` for more details. | |||
| """ | |||
| @@ -41,7 +41,7 @@ class TensorrtRuntimeSubgraph(Module): | |||
| class CambriconRuntimeSubgraph(Module): | |||
| r"""Load a serialized CambriconRuntime subgraph. | |||
| See :func:`~.cambricon_runtime_opr` for more details. | |||
| """ | |||
| @@ -68,7 +68,7 @@ class CambriconRuntimeSubgraph(Module): | |||
| class AtlasRuntimeSubgraph(Module): | |||
| r"""Load a serialized AtlasRuntime subgraph. | |||
| See :func:`~.atlas_runtime_opr` for more details. | |||
| """ | |||
| @@ -18,53 +18,53 @@ from ..tensor import Tensor | |||
| def fill_(tensor: Tensor, val: Union[float, int]) -> None: | |||
| """ | |||
| Fills the given ``tensor`` with value ``val``. | |||
| """Fills the given ``tensor`` with value ``val``. | |||
| :param tensor: tensor to be initialized. | |||
| :param val: value to be filled throughout the tensor. | |||
| Args: | |||
| tensor: tensor to be initialized. | |||
| val: value to be filled throughout the tensor. | |||
| """ | |||
| tensor._reset(full(shape=tensor.shape, value=val, dtype=tensor.dtype)) | |||
| def zeros_(tensor: Tensor) -> None: | |||
| """ | |||
| Fills the given ``tensor`` with scalar value `0`. | |||
| """Fills the given ``tensor`` with scalar value `0`. | |||
| :param tensor: tensor to be initialized. | |||
| Args: | |||
| tensor: tensor to be initialized. | |||
| """ | |||
| fill_(tensor, 0) | |||
| def ones_(tensor: Tensor) -> None: | |||
| """ | |||
| Fills the given ``tensor`` with the scalar value `1`. | |||
| """Fills the given ``tensor`` with the scalar value `1`. | |||
| :param tensor: tensor to be initialized. | |||
| Args: | |||
| tensor: tensor to be initialized. | |||
| """ | |||
| fill_(tensor, 1) | |||
| def uniform_(tensor: Tensor, a: float = 0.0, b: float = 1.0) -> None: | |||
| r""" | |||
| Fills the given ``tensor`` with random value sampled from uniform distribution | |||
| r"""Fills the given ``tensor`` with random value sampled from uniform distribution | |||
| :math:`\mathcal{U}(\text{a}, \text{b})`. | |||
| :param tensor: tensor to be initialized. | |||
| :param a: lower bound of the sampling interval. | |||
| :param b: upper bound of the sampling interval. | |||
| Args: | |||
| tensor: tensor to be initialized. | |||
| a: lower bound of the sampling interval. | |||
| b: upper bound of the sampling interval. | |||
| """ | |||
| tensor._reset(uniform(size=tensor.shape, low=a, high=b).astype(tensor.dtype)) | |||
| def normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> None: | |||
| r""" | |||
| Fills the given ``tensor`` with random value sampled from normal distribution | |||
| r"""Fills the given ``tensor`` with random value sampled from normal distribution | |||
| :math:`\mathcal{N}(\text{mean}, \text{std}^2)`. | |||
| :param tensor: tensor to be initialized. | |||
| :param mean: mean of the normal distribution. | |||
| :param std: standard deviation of the normal distribution. | |||
| Args: | |||
| tensor: tensor to be initialized. | |||
| mean: mean of the normal distribution. | |||
| std: standard deviation of the normal distribution. | |||
| """ | |||
| tensor._reset(normal(size=tensor.shape, mean=mean, std=std).astype(tensor.dtype)) | |||
| @@ -72,10 +72,9 @@ def normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> None: | |||
| def calculate_gain( | |||
| nonlinearity: str, param: Optional[Union[int, float]] = None | |||
| ) -> float: | |||
| r""" | |||
| Returns a recommended gain value (see the table below) for the given nonlinearity | |||
| r"""Returns a recommended gain value (see the table below) for the given nonlinearity | |||
| function. | |||
| ================= ==================================================== | |||
| nonlinearity gain | |||
| ================= ==================================================== | |||
| @@ -87,10 +86,10 @@ def calculate_gain( | |||
| Leaky Relu :math:`\sqrt{\frac{2}{1 + {\text{negative}_\text{slope}}^2}}` | |||
| ================= ==================================================== | |||
| :param nonlinearity: name of the non-linear function. | |||
| :param param: optional parameter for leaky_relu. Only effective when | |||
| ``nonlinearity`` is "leaky_relu". | |||
| Args: | |||
| nonlinearity: name of the non-linear function. | |||
| param: optional parameter for leaky_relu. Only effective when | |||
| ``nonlinearity`` is "leaky_relu". | |||
| """ | |||
| linear_fns = [ | |||
| "linear", | |||
| @@ -124,11 +123,11 @@ def calculate_gain( | |||
| def calculate_fan_in_and_fan_out(tensor: Tensor) -> Tuple[float, float]: | |||
| """ | |||
| Calculates fan_in / fan_out value for given weight tensor. This function assumes | |||
| r"""Calculates fan_in / fan_out value for given weight tensor. This function assumes | |||
| input tensor is stored in ``NCHW`` format. | |||
| :param tensor: weight tensor in ``NCHW`` format. | |||
| Args: | |||
| tensor: weight tensor in ``NCHW`` format. | |||
| """ | |||
| shape = tensor.shape | |||
| ndim = len(shape) | |||
| @@ -153,14 +152,14 @@ def calculate_fan_in_and_fan_out(tensor: Tensor) -> Tuple[float, float]: | |||
| def calculate_correct_fan(tensor: Tensor, mode: str) -> float: | |||
| """ | |||
| Calculates fan_in / fan_out value for given weight tensor, depending on given | |||
| r"""Calculates fan_in / fan_out value for given weight tensor, depending on given | |||
| ``mode``. | |||
| See :func:`calculate_fan_in_and_fan_out` for details. | |||
| :param tensor: weight tensor in ``NCHW`` format. | |||
| :param mode: "fan_in" or "fan_out". | |||
| Args: | |||
| tensor: weight tensor in ``NCHW`` format. | |||
| mode: fan_in" or "fan_out". | |||
| """ | |||
| mode = mode.lower() | |||
| valid_modes = ["fan_in", "fan_out"] | |||
| @@ -174,19 +173,20 @@ def calculate_correct_fan(tensor: Tensor, mode: str) -> float: | |||
| def xavier_uniform_(tensor: Tensor, gain: float = 1.0) -> None: | |||
| r""" | |||
| Fills tensor with random values sampled from :math:`\mathcal{U}(-a, a)` | |||
| r"""Fills tensor with random values sampled from :math:`\mathcal{U}(-a, a)` | |||
| where | |||
| .. math:: | |||
| a = \text{gain} \times \sqrt{\frac{6}{\text{fan_in} + \text{fan_out}}} | |||
| a = \text{gain} \times \sqrt{\frac{6}{\text{fan_in} + \text{fan_out}}} | |||
| Also known as Glorot initialization. Detailed information can be retrieved from | |||
| `Understanding the difficulty of training deep feedforward neural networks` - | |||
| Glorot, X. & Bengio, Y. (2010). | |||
| :param tensor: tensor to be initialized. | |||
| :param gain: scaling factor for :math:`a`. | |||
| Args: | |||
| tensor: tensor to be initialized. | |||
| gain: scaling factor for :math:`a`. | |||
| """ | |||
| fan_in, fan_out = calculate_fan_in_and_fan_out(tensor) | |||
| std = gain * math.sqrt(2.0 / float(fan_in + fan_out)) | |||
| @@ -195,19 +195,20 @@ def xavier_uniform_(tensor: Tensor, gain: float = 1.0) -> None: | |||
| def xavier_normal_(tensor: Tensor, gain: float = 1.0) -> None: | |||
| r""" | |||
| Fills tensor with random values sampled from | |||
| r"""Fills tensor with random values sampled from | |||
| :math:`\mathcal{N}(0, \text{std}^2)` where | |||
| .. math:: | |||
| \text{std} = \text{gain} \times \sqrt{\frac{2}{\text{fan_in} + \text{fan_out}}} | |||
| \text{std} = \text{gain} \times \sqrt{\frac{2}{\text{fan_in} + \text{fan_out}}} | |||
| Also known as Glorot initialization. Detailed information can be retrieved from | |||
| `Understanding the difficulty of training deep feedforward neural networks` - | |||
| Glorot, X. & Bengio, Y. (2010). | |||
| :param tensor: tensor to be initialized. | |||
| :param gain: scaling factor for :math:`std`. | |||
| Args: | |||
| tensor: tensor to be initialized. | |||
| gain: scaling factor for :math:`std`. | |||
| """ | |||
| fan_in, fan_out = calculate_fan_in_and_fan_out(tensor) | |||
| std = gain * math.sqrt(2.0 / float(fan_in + fan_out)) | |||
| @@ -217,25 +218,26 @@ def xavier_normal_(tensor: Tensor, gain: float = 1.0) -> None: | |||
| def msra_uniform_( | |||
| tensor: Tensor, a: float = 0, mode: str = "fan_in", nonlinearity: str = "leaky_relu" | |||
| ) -> None: | |||
| r""" | |||
| Fills tensor wilth random values sampled from | |||
| r"""Fills tensor wilth random values sampled from | |||
| :math:`\mathcal{U}(-\text{bound}, \text{bound})` where | |||
| .. math:: | |||
| \text{bound} = \sqrt{\frac{6}{(1 + a^2) \times \text{fan_in}}} | |||
| \text{bound} = \sqrt{\frac{6}{(1 + a^2) \times \text{fan_in}}} | |||
| Detailed information can be retrieved from | |||
| `Delving deep into rectifiers: Surpassing human-level performance on ImageNet | |||
| classification` | |||
| :param tensor: tensor to be initialized. | |||
| :param a: optional parameter for calculating gain for leaky_relu. See | |||
| :func:`calculate_gain` for details. | |||
| :param mode: "fan_in" or "fan_out", used to calculate :math:`gain`, the | |||
| scaling factor for :math:`bound`. See :func:`calculate_fan_in_and_fan_out` for | |||
| details. | |||
| :param nonlinearity: name of the non-linear function used to calculate :math:`gain`. | |||
| See :func:`calculate_gain` for details. | |||
| Args: | |||
| tensor: tensor to be initialized. | |||
| a: optional parameter for calculating gain for leaky_relu. See | |||
| :func:`calculate_gain` for details. | |||
| mode: fan_in" or "fan_out", used to calculate :math:`gain`, the | |||
| scaling factor for :math:`bound`. See :func:`calculate_fan_in_and_fan_out` for | |||
| details. | |||
| nonlinearity: name of the non-linear function used to calculate :math:`gain`. | |||
| See :func:`calculate_gain` for details. | |||
| """ | |||
| fan = calculate_correct_fan(tensor, mode) | |||
| gain = calculate_gain(nonlinearity, a) | |||
| @@ -247,25 +249,26 @@ def msra_uniform_( | |||
| def msra_normal_( | |||
| tensor: Tensor, a: float = 0, mode: str = "fan_in", nonlinearity: str = "leaky_relu" | |||
| ) -> None: | |||
| r""" | |||
| Fills tensor wilth random values sampled from | |||
| r"""Fills tensor wilth random values sampled from | |||
| :math:`\mathcal{N}(0, \text{std}^2)` where | |||
| .. math:: | |||
| \text{std} = \sqrt{\frac{2}{(1 + a^2) \times \text{fan_in}}} | |||
| \text{std} = \sqrt{\frac{2}{(1 + a^2) \times \text{fan_in}}} | |||
| Detailed information can be retrieved from | |||
| `Delving deep into rectifiers: Surpassing human-level performance on ImageNet | |||
| classification` | |||
| :param tensor: tensor to be initialized | |||
| :param a: optional parameter for calculating gain for leaky_relu. See | |||
| :func:`calculate_gain` for details. | |||
| :param mode: "fan_in" or "fan_out", used to calculate :math:`gain`, the | |||
| scaling factor for :math:`gain`. See :func:`calculate_fan_in_and_fan_out` for | |||
| details. | |||
| :param nonlinearity: name of the non-linear function used to calculate :math:`gain`. | |||
| See :func:`calculate_gain` for details. | |||
| Args: | |||
| tensor: tensor to be initialized | |||
| a: optional parameter for calculating gain for leaky_relu. See | |||
| :func:`calculate_gain` for details. | |||
| mode: fan_in" or "fan_out", used to calculate :math:`gain`, the | |||
| scaling factor for :math:`gain`. See :func:`calculate_fan_in_and_fan_out` for | |||
| details. | |||
| nonlinearity: name of the non-linear function used to calculate :math:`gain`. | |||
| See :func:`calculate_gain` for details. | |||
| """ | |||
| fan = calculate_correct_fan(tensor, mode) | |||
| gain = calculate_gain(nonlinearity, a) | |||
| @@ -14,8 +14,7 @@ from .module import Module | |||
| class Linear(Module): | |||
| r""" | |||
| Applies a linear transformation to the input. For instance, if input | |||
| r"""Applies a linear transformation to the input. For instance, if input | |||
| is x, then output y is: | |||
| .. math:: | |||
| @@ -24,30 +23,31 @@ class Linear(Module): | |||
| where :math:`y_i= \sum_j W_{ij} x_j + b_i` | |||
| :param in_features: size of each input sample. | |||
| :param out_features: size of each output sample. | |||
| :param bias: if it's ``False``, the layer will not learn an additional ``bias``. | |||
| Default: ``True`` | |||
| Args: | |||
| in_features: size of each input sample. | |||
| out_features: size of each output sample. | |||
| bias: if it's ``False``, the layer will not learn an additional ``bias``. | |||
| Default: ``True`` | |||
| Examples: | |||
| .. testcode:: | |||
| Examples: | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| .. testcode:: | |||
| m = M.Linear(in_features=3, out_features=1) | |||
| inp = mge.tensor(np.arange(0, 6).astype("float32").reshape(2, 3)) | |||
| oup = m(inp) | |||
| print(oup.numpy().shape) | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| Outputs: | |||
| m = M.Linear(in_features=3, out_features=1) | |||
| inp = mge.tensor(np.arange(0, 6).astype("float32").reshape(2, 3)) | |||
| oup = m(inp) | |||
| print(oup.numpy().shape) | |||
| .. testoutput:: | |||
| Outputs: | |||
| (2, 1) | |||
| .. testoutput:: | |||
| (2, 1) | |||
| """ | |||
| def __init__( | |||
| @@ -84,15 +84,14 @@ def _get_XNorm_typeclass(): | |||
| class Module(metaclass=ABCMeta): | |||
| """ | |||
| Base Module class. | |||
| r"""Base Module class. | |||
| Args: | |||
| name: module's name, can be initialized by the ``kwargs`` parameter | |||
| of child class. | |||
| """ | |||
| def __init__(self, name=None): | |||
| """ | |||
| :param name: module's name, can be initialized by the ``kwargs`` parameter | |||
| of child class. | |||
| """ | |||
| self._modules = [] | |||
| if name is not None: | |||
| @@ -118,18 +117,19 @@ class Module(metaclass=ABCMeta): | |||
| pass | |||
| def register_forward_pre_hook(self, hook: Callable) -> HookHandler: | |||
| """ | |||
| Registers a hook to handle forward inputs. `hook` should be a function. | |||
| """Registers a hook to handle forward inputs. `hook` should be a function. | |||
| :param hook: a function that receive `module` and `inputs`, then return | |||
| a modified `inputs` or `None`. | |||
| :return: a handler with :meth:`~.HookHandler.remove` interface to delete the hook. | |||
| Args: | |||
| hook: a function that receive `module` and `inputs`, then return | |||
| a modified `inputs` or `None`. | |||
| Returns: | |||
| a handler with :meth:`~.HookHandler.remove` interface to delete the hook. | |||
| """ | |||
| return HookHandler(self._forward_pre_hooks, hook) | |||
| def register_forward_hook(self, hook: Callable) -> HookHandler: | |||
| """ | |||
| Registers a hook to handle forward results. `hook` should be a function that | |||
| """Registers a hook to handle forward results. `hook` should be a function that | |||
| receive `module`, `inputs` and `outputs`, then return a modified `outputs` or `None`. | |||
| This method return a handler with :meth:`~.HookHandler.remove` interface to delete the hook. | |||
| @@ -164,19 +164,19 @@ class Module(metaclass=ABCMeta): | |||
| predicate: Callable[[Any], bool] = lambda _: True, | |||
| seen: Optional[Set[int]] = None | |||
| ) -> Union[Iterable[Any], Iterable[Tuple[str, Any]]]: | |||
| """ | |||
| Scans the module object and returns an iterable for the :class:`~.Tensor` | |||
| """Scans the module object and returns an iterable for the :class:`~.Tensor` | |||
| and :class:`~.Module` attributes that agree with the ``predicate``. For multiple | |||
| calls of this function with same arguments, the order of objects within the | |||
| returned iterable is guaranteed to be identical, as long as all the involved | |||
| module objects' ``__dict__`` does not change thoughout those calls. | |||
| :param recursive: whether to recursively scan all the submodules. | |||
| :param with_key: whether to yield keys along with yielded objects. | |||
| :param with_parent: whether to yield ``self`` along with yielded objects. | |||
| :param prefix: prefix appended to the yielded keys. | |||
| :param predicate: the predication function applied to scanned objects. | |||
| :param seen: a dict that records whether a module has been traversed yet. | |||
| Args: | |||
| recursive: whether to recursively scan all the submodules. | |||
| with_key: whether to yield keys along with yielded objects. | |||
| with_parent: whether to yield ``self`` along with yielded objects. | |||
| prefix: prefix appended to the yielded keys. | |||
| predicate: the predication function applied to scanned objects. | |||
| seen: a dict that records whether a module has been traversed yet. | |||
| """ | |||
| if seen is None: | |||
| seen = set([id(self)]) | |||
| @@ -212,12 +212,12 @@ class Module(metaclass=ABCMeta): | |||
| ) | |||
| def parameters(self, recursive: bool = True, **kwargs) -> Iterable[Parameter]: | |||
| r""" | |||
| Returns an iterable for the :class:`~.Parameter` of the module. | |||
| r"""Returns an iterable for the :class:`~.Parameter` of the module. | |||
| :param recursive: If ``True``, returns all :class:`~.Parameter` within this | |||
| module, else only returns :class:`~.Parameter` that are direct attributes | |||
| of this module. | |||
| Args: | |||
| recursive: If ``True``, returns all :class:`~.Parameter` within this | |||
| module, else only returns :class:`~.Parameter` that are direct attributes | |||
| of this module. | |||
| """ | |||
| if "requires_grad" in kwargs: | |||
| @@ -237,14 +237,14 @@ class Module(metaclass=ABCMeta): | |||
| def named_parameters( | |||
| self, prefix: Optional[str] = None, recursive: bool = True, **kwargs | |||
| ) -> Iterable[Tuple[str, Parameter]]: | |||
| """ | |||
| Returns an iterable for key :class:`~.Parameter` pairs of the module, where | |||
| r"""Returns an iterable for key :class:`~.Parameter` pairs of the module, where | |||
| ``key`` is the dotted path from this module to the :class:`~.Parameter`. | |||
| :param prefix: prefix prepended to the keys. | |||
| :param recursive: if ``True``, returns all :class:`~.Parameter` within this | |||
| module, else only returns :class:`~.Parameter` that are direct attributes | |||
| of this module. | |||
| Args: | |||
| prefix: prefix prepended to the keys. | |||
| recursive: if ``True``, returns all :class:`~.Parameter` within this | |||
| module, else only returns :class:`~.Parameter` that are direct attributes | |||
| of this module. | |||
| """ | |||
| if "requires_grad" in kwargs: | |||
| @@ -266,14 +266,13 @@ class Module(metaclass=ABCMeta): | |||
| ) | |||
| def buffers(self, recursive: bool = True, **kwargs) -> Iterable[Tensor]: | |||
| """ | |||
| Returns an iterable for the buffers of the module. | |||
| r"""Returns an iterable for the buffers of the module. | |||
| Buffer is defined to be :class:`~.Tensor` excluding :class:`~.Parameter`. | |||
| :param recursive: if ``True``, returns all buffers within this | |||
| module, else only returns buffers that are direct attributes | |||
| of this module. | |||
| Args: | |||
| recursive: if ``True``, returns all buffers within this | |||
| module, else only returns buffers that are direct attributes | |||
| """ | |||
| yield from self._flatten( | |||
| with_key=False, predicate=_is_buffer, recursive=recursive, **kwargs | |||
| @@ -282,16 +281,17 @@ class Module(metaclass=ABCMeta): | |||
| def named_buffers( | |||
| self, prefix: Optional[str] = None, recursive: bool = True, **kwargs | |||
| ) -> Iterable[Tuple[str, Tensor]]: | |||
| """ | |||
| Returns an iterable for key buffer pairs of the module, where | |||
| r"""Returns an iterable for key buffer pairs of the module, where | |||
| ``key`` is the dotted path from this module to the buffer. | |||
| Buffer is defined to be :class:`~.Tensor` excluding :class:`~.Parameter`. | |||
| :param prefix: prefix prepended to the keys. | |||
| :param recursive: if ``True``, returns all buffers within this | |||
| module, else only returns buffers that are direct attributes | |||
| of this module. | |||
| Args: | |||
| prefix: prefix prepended to the keys. | |||
| recursive: if ``True``, returns all buffers within this | |||
| module, else only returns buffers that are direct attributes | |||
| of this module. | |||
| prefix: Optional[str]: | |||
| """ | |||
| yield from self._flatten( | |||
| with_key=True, | |||
| @@ -302,8 +302,7 @@ class Module(metaclass=ABCMeta): | |||
| ) | |||
| def children(self, **kwargs) -> "Iterable[Module]": | |||
| """ | |||
| Returns an iterable for all the submodules that are direct attributes of this | |||
| r"""Returns an iterable for all the submodules that are direct attributes of this | |||
| module. | |||
| """ | |||
| yield from self._flatten( | |||
| @@ -311,8 +310,7 @@ class Module(metaclass=ABCMeta): | |||
| ) | |||
| def named_children(self, **kwargs) -> "Iterable[Tuple[str, Module]]": | |||
| """ | |||
| Returns an iterable of key-submodule pairs for all the submodules that are | |||
| r"""Returns an iterable of key-submodule pairs for all the submodules that are | |||
| direct attributes of this module, where 'key' is the attribute name of | |||
| submodules. | |||
| """ | |||
| @@ -321,9 +319,7 @@ class Module(metaclass=ABCMeta): | |||
| ) | |||
| def modules(self, **kwargs) -> "Iterable[Module]": | |||
| """ | |||
| Returns an iterable for all the modules within this module, including itself. | |||
| """ | |||
| r"""Returns an iterable for all the modules within this module, including itself.""" | |||
| if "with_parent" in kwargs and kwargs["with_parent"]: | |||
| yield self, None | |||
| else: | |||
| @@ -333,12 +329,12 @@ class Module(metaclass=ABCMeta): | |||
| def named_modules( | |||
| self, prefix: Optional[str] = None, **kwargs | |||
| ) -> "Iterable[Tuple[str, Module]]": | |||
| """ | |||
| Returns an iterable of key-module pairs for all the modules within this | |||
| r"""Returns an iterable of key-module pairs for all the modules within this | |||
| module, including itself, where 'key' is the dotted path from this module to the | |||
| submodules. | |||
| :param prefix: prefix prepended to the path. | |||
| Args: | |||
| prefix: prefix prepended to the path. | |||
| """ | |||
| if "with_parent" in kwargs and kwargs["with_parent"]: | |||
| yield ("" if prefix is None else prefix), self, None | |||
| @@ -349,33 +345,31 @@ class Module(metaclass=ABCMeta): | |||
| ) | |||
| def apply(self, fn: "Callable[[Module], Any]") -> None: | |||
| """ | |||
| Applies function ``fn`` to all the modules within this module, including | |||
| r"""Applies function ``fn`` to all the modules within this module, including | |||
| itself. | |||
| :param fn: the function to be applied on modules. | |||
| Args: | |||
| fn: the function to be applied on modules. | |||
| """ | |||
| for it in self.modules(): | |||
| fn(it) | |||
| @deprecated(version="1.0") | |||
| def zero_grad(self) -> None: | |||
| """ | |||
| Sets all parameters' grads to zero | |||
| """ | |||
| r"""Sets all parameters' grads to zero""" | |||
| for param in self.parameters(): | |||
| if param.grad is not None: | |||
| param.grad.reset_zero() | |||
| def train(self, mode: bool = True, recursive: bool = True) -> None: | |||
| """ | |||
| Sets training mode of all the modules within this module (including itself) to | |||
| r"""Sets training mode of all the modules within this module (including itself) to | |||
| ``mode``. This effectively sets the ``training`` attributes of those modules | |||
| to ``mode``, but only has effect on certain modules (e.g. | |||
| :class:`~.BatchNorm2d`, :class:`~.Dropout`, :class:`~.Observer`) | |||
| :param mode: the training mode to be set on modules. | |||
| :param recursive: whether to recursively call submodules' ``train()``. | |||
| Args: | |||
| mode: the training mode to be set on modules. | |||
| recursive: whether to recursively call submodules' ``train()``. | |||
| """ | |||
| if not recursive: | |||
| self.training = mode | |||
| @@ -387,15 +381,13 @@ class Module(metaclass=ABCMeta): | |||
| self.apply(fn) | |||
| def eval(self) -> None: | |||
| """ | |||
| Sets training mode of all the modules within this module (including itself) to | |||
| r"""Sets training mode of all the modules within this module (including itself) to | |||
| ``False``. See :meth:`~.Module.train` for details. | |||
| """ | |||
| self.train(False) | |||
| def disable_quantize(self, value=True): | |||
| r""" | |||
| Sets ``module``'s ``quantize_disabled`` attribute and return ``module``. | |||
| r"""Sets ``module``'s ``quantize_disabled`` attribute and return ``module``. | |||
| Could be used as a decorator. | |||
| """ | |||
| @@ -408,8 +400,7 @@ class Module(metaclass=ABCMeta): | |||
| def replace_param( | |||
| self, params: dict, start_pos: int, seen: Optional[Set[int]] = None | |||
| ): | |||
| """ | |||
| Replaces module's parameters with ``params``, used by :class:`~.ParamPack` to | |||
| r"""Replaces module's parameters with ``params``, used by :class:`~.ParamPack` to | |||
| speedup multimachine training. | |||
| """ | |||
| offset = 0 | |||
| @@ -447,9 +438,7 @@ class Module(metaclass=ABCMeta): | |||
| return rst | |||
| def _state_dict(self, rst=None, prefix="", keep_var=False): | |||
| r""" | |||
| Returns a dictionary containing whole states of the module. | |||
| """ | |||
| r"""Returns a dictionary containing whole states of the module.""" | |||
| def is_state(obj): | |||
| return _is_parameter(obj) or _is_buffer(obj) | |||
| @@ -479,8 +468,7 @@ class Module(metaclass=ABCMeta): | |||
| state_dict: Union[dict, Callable[[str, Tensor], Optional[np.ndarray]]], | |||
| strict=True, | |||
| ): | |||
| r""" | |||
| Loads a given dictionary created by :func:`state_dict` into this module. | |||
| r"""Loads a given dictionary created by :func:`state_dict` into this module. | |||
| If ``strict`` is ``True``, the keys of :func:`state_dict` must exactly match the keys | |||
| returned by :func:`state_dict`. | |||
| @@ -515,8 +503,7 @@ class Module(metaclass=ABCMeta): | |||
| if 'bias' in k: | |||
| M.init.zero_(v) | |||
| if 'conv' in k: | |||
| return v.numpy() * (np.abs(v.numpy()) > 1e-3).astype("float32) | |||
| model.load_state_dict(reinit_and_pruning, strict=False) | |||
| """ | |||
| unused = [] | |||
| if isinstance(state_dict, dict): | |||
| @@ -558,8 +545,7 @@ class Module(metaclass=ABCMeta): | |||
| ) | |||
| def _load_state_dict_with_closure(self, closure): | |||
| """ | |||
| Advance state_dict load through callable ``closure`` whose signature is | |||
| r"""Advance state_dict load through callable ``closure`` whose signature is | |||
| ``closure(key: str, var: Tensor) -> Union[np.ndarry, None]`` | |||
| """ | |||
| XNorm_typeclass = _get_XNorm_typeclass() | |||
| @@ -642,9 +628,7 @@ class Module(metaclass=ABCMeta): | |||
| super().__delattr__(name) | |||
| def _module_info_string(self) -> str: | |||
| r""" | |||
| Set the extra representation of the module. | |||
| """ | |||
| r"""Set the extra representation of the module.""" | |||
| return "" | |||
| def __repr__(self): | |||
| @@ -15,8 +15,7 @@ from .module import Module | |||
| class GroupNorm(Module): | |||
| """ | |||
| Simple implementation of GroupNorm. Only support 4d tensor now. | |||
| """Simple implementation of GroupNorm. Only support 4d tensor now. | |||
| Reference: https://arxiv.org/pdf/1803.08494.pdf. | |||
| """ | |||
| @@ -64,8 +63,7 @@ class GroupNorm(Module): | |||
| class InstanceNorm(Module): | |||
| """ | |||
| Simple implementation of InstanceNorm. Only support 4d tensor now. | |||
| """Simple implementation of InstanceNorm. Only support 4d tensor now. | |||
| Reference: https://arxiv.org/abs/1607.08022. | |||
| Note that InstanceNorm equals using GroupNome with num_groups=num_channels. | |||
| """ | |||
| @@ -108,8 +106,7 @@ class InstanceNorm(Module): | |||
| class LayerNorm(Module): | |||
| """ | |||
| Simple implementation of LayerNorm. Support tensor of any shape as input. | |||
| """Simple implementation of LayerNorm. Support tensor of any shape as input. | |||
| Reference: https://arxiv.org/pdf/1803.08494.pdf. | |||
| """ | |||
| @@ -37,14 +37,14 @@ class _PoolNd(Module): | |||
| class MaxPool2d(_PoolNd): | |||
| r""" | |||
| Applies a 2D max pooling over an input. | |||
| r"""Applies a 2D max pooling over an input. | |||
| For instance, given an input of the size :math:`(N, C, H, W)` and | |||
| :attr:`kernel_size` :math:`(kH, kW)`, this layer generates the output of | |||
| the size :math:`(N, C, H_{out}, W_{out})` through a process described as: | |||
| .. math:: | |||
| \begin{aligned} | |||
| out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, kH-1} \max_{n=0, \ldots, kW-1} | |||
| \text{input}(N_i, C_j, \text{stride[0]} \times h + m, | |||
| @@ -54,30 +54,30 @@ class MaxPool2d(_PoolNd): | |||
| If :attr:`padding` is non-zero, then the input is implicitly zero-padded on | |||
| both sides for :attr:`padding` number of points. | |||
| :param kernel_size: the size of the window to take a max over. | |||
| :param stride: the stride of the window. Default value is kernel_size. | |||
| :param padding: implicit zero padding to be added on both sides. | |||
| Args: | |||
| kernel_size: the size of the window to take a max over. | |||
| stride: the stride of the window. Default value is kernel_size. | |||
| padding: implicit zero padding to be added on both sides. | |||
| Examples: | |||
| .. testcode:: | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| .. testcode:: | |||
| m = M.MaxPool2d(kernel_size=3, stride=1, padding=0) | |||
| inp = mge.tensor(np.arange(0, 16).astype("float32").reshape(1, 1, 4, 4)) | |||
| oup = m(inp) | |||
| print(oup.numpy()) | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| Outputs: | |||
| m = M.MaxPool2d(kernel_size=3, stride=1, padding=0) | |||
| inp = mge.tensor(np.arange(0, 16).astype("float32").reshape(1, 1, 4, 4)) | |||
| oup = m(inp) | |||
| print(oup.numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| [[[[10. 11.] | |||
| [14. 15.]]]] | |||
| .. testoutput:: | |||
| [[[[10. 11.] | |||
| [14. 15.]]]] | |||
| """ | |||
| def forward(self, inp): | |||
| @@ -85,8 +85,7 @@ class MaxPool2d(_PoolNd): | |||
| class AvgPool2d(_PoolNd): | |||
| r""" | |||
| Applies a 2D average pooling over an input. | |||
| r"""Applies a 2D average pooling over an input. | |||
| For instance, given an input of the size :math:`(N, C, H, W)` and | |||
| :attr:`kernel_size` :math:`(kH, kW)`, this layer generates the output of | |||
| @@ -100,33 +99,13 @@ class AvgPool2d(_PoolNd): | |||
| If :attr:`padding` is non-zero, then the input is implicitly zero-padded on | |||
| both sides for :attr:`padding` number of points. | |||
| :param kernel_size: the size of the window. | |||
| :param stride: the stride of the window. Default value is kernel_size。 | |||
| :param padding: implicit zero padding to be added on both sides. | |||
| :param mode: whether to count padding values. "average" mode will do counting and | |||
| "average_count_exclude_padding" mode won't do counting. | |||
| Default: "average_count_exclude_padding" | |||
| Examples: | |||
| .. testcode:: | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| m = M.AvgPool2d(kernel_size=3, stride=1, padding=0) | |||
| inp = mge.tensor(np.arange(0, 16).astype("float32").reshape(1, 1, 4, 4)) | |||
| oup = m(inp) | |||
| print(oup.numpy()) | |||
| Outputs: | |||
| .. testoutput:: | |||
| [[[[ 5. 6.] | |||
| [ 9. 10.]]]] | |||
| Args: | |||
| kernel_size: the size of the window. | |||
| stride: the stride of the window. Default value is kernel_size。 | |||
| padding: implicit zero padding to be added on both sides. | |||
| mode: whether to count padding values. "average" mode will do counting and | |||
| "average_count_exclude_padding" mode won't do counting. | |||
| Default: "average_count_exclude_padding" | |||
| """ | |||
| def __init__( | |||
| @@ -10,9 +10,7 @@ from .module import QATModule | |||
| class BatchMatMulActivation(Float.BatchMatMulActivation, QATModule): | |||
| r""" | |||
| A :class:`~.QATModule` :class:`~.module.BatchMatMulActivation` with QAT support. | |||
| """ | |||
| r"""A :class:`~.QATModule` :class:`~.module.BatchMatMulActivation` with QAT support.""" | |||
| def forward(self, inp): | |||
| w_qat = self.apply_quant_weight(self.weight) | |||
| @@ -13,8 +13,7 @@ from .module import QATModule | |||
| class Concat(Float.Concat, QATModule): | |||
| r""" | |||
| A :class:`~.QATModule` to do functional :func:`~.concat` with QAT support. | |||
| r"""A :class:`~.QATModule` to do functional :func:`~.concat` with QAT support. | |||
| Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`. | |||
| """ | |||
| @@ -23,8 +22,4 @@ class Concat(Float.Concat, QATModule): | |||
| @classmethod | |||
| def from_float_module(cls, float_module): | |||
| r""" | |||
| Return a :class:`~.QATModule` instance converted from | |||
| a float :class:`~.Module` instance. | |||
| """ | |||
| return cls(name=float_module.name) | |||
| @@ -11,8 +11,7 @@ from .module import QATModule | |||
| class Conv2d(Float.Conv2d, QATModule): | |||
| r""" | |||
| A :class:`~.QATModule` :class:`~.module.Conv2d` with QAT support. | |||
| r"""A :class:`~.QATModule` :class:`~.module.Conv2d` with QAT support. | |||
| Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`. | |||
| """ | |||
| @@ -50,8 +49,7 @@ class Conv2d(Float.Conv2d, QATModule): | |||
| class ConvRelu2d(Conv2d): | |||
| r""" | |||
| A :class:`~.QATModule` include :class:`~.module.Conv2d` and :func:`~.relu` with QAT support. | |||
| r"""A :class:`~.QATModule` include :class:`~.module.Conv2d` and :func:`~.relu` with QAT support. | |||
| Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`. | |||
| """ | |||
| @@ -60,8 +58,7 @@ class ConvRelu2d(Conv2d): | |||
| class ConvTranspose2d(Float.ConvTranspose2d, QATModule): | |||
| r""" | |||
| A :class:`~.QATModule` :class:`~.module.ConvTranspose2d` with QAT support. | |||
| r"""A :class:`~.QATModule` :class:`~.module.ConvTranspose2d` with QAT support. | |||
| Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`. | |||
| """ | |||
| @@ -136,10 +136,6 @@ class _ConvBnActivation2d(Float._ConvBnActivation2d, QATModule): | |||
| @classmethod | |||
| def from_float_module(cls, float_module: Float._ConvBnActivation2d): | |||
| r""" | |||
| Return a :class:`~.QATModule` instance converted from | |||
| a float :class:`~.Module` instance. | |||
| """ | |||
| qat_module = cls( | |||
| float_module.conv.in_channels, | |||
| float_module.conv.out_channels, | |||
| @@ -160,8 +156,7 @@ class _ConvBnActivation2d(Float._ConvBnActivation2d, QATModule): | |||
| class ConvBn2d(_ConvBnActivation2d): | |||
| r""" | |||
| A fused :class:`~.QATModule` including :class:`~.module.Conv2d` and :class:`~.module.BatchNorm2d` with QAT support. | |||
| r"""A fused :class:`~.QATModule` including :class:`~.module.Conv2d` and :class:`~.module.BatchNorm2d` with QAT support. | |||
| Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`. | |||
| """ | |||
| @@ -170,8 +165,7 @@ class ConvBn2d(_ConvBnActivation2d): | |||
| class ConvBnRelu2d(_ConvBnActivation2d): | |||
| r""" | |||
| A fused :class:`~.QATModule` including :class:`~.module.Conv2d`, :class:`~.module.BatchNorm2d` and :func:`~.relu` with QAT support. | |||
| r"""A fused :class:`~.QATModule` including :class:`~.module.Conv2d`, :class:`~.module.BatchNorm2d` and :func:`~.relu` with QAT support. | |||
| Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`. | |||
| """ | |||
| @@ -10,11 +10,8 @@ from .module import QATModule | |||
| class Elemwise(Float.Elemwise, QATModule): | |||
| r""" | |||
| A :class:`~.QATModule` to do :mod:`~.functional.elemwise` operator with QAT support. | |||
| r"""A :class:`~.QATModule` to do :mod:`~.functional.elemwise` operator with QAT support. | |||
| Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`. | |||
| :param method: the elemwise method, see :class:`~.module.Elemwise` for detail. | |||
| """ | |||
| with_weight = False | |||
| @@ -10,15 +10,14 @@ from .module import QATModule | |||
| class Linear(Float.Linear, QATModule): | |||
| r""" | |||
| A :class:`~.QATModule` version of :class:`~.module.Linear`. | |||
| r"""A :class:`~.QATModule` version of :class:`~.module.Linear`. | |||
| Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`. | |||
| :param in_features: size of each input sample. | |||
| :param out_features: size of each output sample. | |||
| :param bias: If set to ``False``, the layer will not learn an additive bias. | |||
| Default: True | |||
| Args: | |||
| in_features: size of each input sample. | |||
| out_features: size of each output sample. | |||
| bias: If set to ``False``, the layer will not learn an additive bias. | |||
| Default: True | |||
| """ | |||
| def forward(self, inp): | |||
| @@ -17,12 +17,11 @@ from ..module import Module | |||
| class QATModule(Module): | |||
| r""" | |||
| Base class of quantized-float related :class:`~.Module`, basically for QAT and Calibration. | |||
| r"""Base class of quantized-float related :class:`~.Module`, basically for QAT and Calibration. | |||
| Use :meth:`from_float_module` to generate a instance from float :class:`~.Module`. | |||
| Or use :func:`~.quantize.quantize_qat` to do it recursively and automatically. | |||
| Can also be converted to :class:`~.QuantizedModule` for deployment using | |||
| :func:`~.quantize.quantize` further. | |||
| """ | |||
| @@ -43,8 +42,7 @@ class QATModule(Module): | |||
| return "QAT." + super().__repr__() | |||
| def set_qconfig(self, qconfig: QConfig): | |||
| r""" | |||
| Set quantization related configs with ``qconfig``, including | |||
| r"""Set quantization related configs with ``qconfig``, including | |||
| observer and fake_quant for weight and activation. | |||
| """ | |||
| @@ -96,24 +94,19 @@ class QATModule(Module): | |||
| return oup | |||
| def apply_quant_weight(self, target: Tensor): | |||
| r""" | |||
| Apply weight's observer and fake_quant from ``qconfig`` on ``target``. | |||
| """ | |||
| r"""Apply weight's observer and fake_quant from ``qconfig`` on ``target``.""" | |||
| return self._apply_fakequant_with_observer( | |||
| target, self.weight_fake_quant, self.weight_observer | |||
| ) | |||
| def apply_quant_activation(self, target: Tensor): | |||
| r""" | |||
| Apply weight's observer and fake_quant from ``qconfig`` on ``target``. | |||
| """ | |||
| r"""Apply weight's observer and fake_quant from ``qconfig`` on ``target``.""" | |||
| return self._apply_fakequant_with_observer( | |||
| target, self.act_fake_quant, self.act_observer | |||
| ) | |||
| def apply_quant_bias(self, target: Tensor, inp: Tensor, w_qat: Tensor): | |||
| r""" | |||
| Use :func:`~.fake_quant_bias` to process ``target``. Only valid when | |||
| r"""Use :func:`~.fake_quant_bias` to process ``target``. Only valid when | |||
| ``act_fake_quant`` and ``weight_fake_quant`` are both enabled. | |||
| """ | |||
| # bias should have the same dtype as activation, so act_fake_quant can also | |||
| @@ -139,33 +132,25 @@ class QATModule(Module): | |||
| return None | |||
| def get_weight_dtype(self): | |||
| r""" | |||
| Get weight's quantization dtype as the method from ``qconfig``. | |||
| """ | |||
| r"""Get weight's quantization dtype as the method from ``qconfig``.""" | |||
| return self._get_method_result( | |||
| "get_quantized_dtype", self.weight_fake_quant, self.weight_observer | |||
| ) | |||
| def get_activation_dtype(self): | |||
| r""" | |||
| Get activation's quantization dtype as the method from ``qconfig``. | |||
| """ | |||
| r"""Get activation's quantization dtype as the method from ``qconfig``.""" | |||
| return self._get_method_result( | |||
| "get_quantized_dtype", self.act_fake_quant, self.act_observer | |||
| ) | |||
| def get_weight_qparams(self): | |||
| r""" | |||
| Get weight's quantization parameters. | |||
| """ | |||
| r"""Get weight's quantization parameters.""" | |||
| return self._get_method_result( | |||
| "get_qparams", self.weight_fake_quant, self.weight_observer | |||
| ) | |||
| def get_activation_qparams(self): | |||
| r""" | |||
| Get activation's quantization parameters. | |||
| """ | |||
| r"""Get activation's quantization parameters.""" | |||
| return self._get_method_result( | |||
| "get_qparams", self.act_fake_quant, self.act_observer | |||
| ) | |||
| @@ -173,7 +158,6 @@ class QATModule(Module): | |||
| @classmethod | |||
| @abstractmethod | |||
| def from_float_module(cls, float_module: Module): | |||
| r""" | |||
| Return a :class:`~.QATModule` instance converted from | |||
| r"""Return a :class:`~.QATModule` instance converted from | |||
| a float :class:`~.Module` instance. | |||
| """ | |||
| @@ -10,8 +10,7 @@ from .module import QATModule | |||
| class QuantStub(Float.QuantStub, QATModule): | |||
| r""" | |||
| A helper :class:`~.QATModule` simply return input, but will quantize | |||
| r"""A helper :class:`~.QATModule` simply return input, but will quantize | |||
| input after converted to :class:`~.QuantizedModule`. | |||
| """ | |||
| @@ -30,8 +29,7 @@ class QuantStub(Float.QuantStub, QATModule): | |||
| class DequantStub(Float.DequantStub, QATModule): | |||
| r""" | |||
| A helper :class:`~.QATModule` simply return input, but will de-quantize | |||
| r"""A helper :class:`~.QATModule` simply return input, but will de-quantize | |||
| input after converted to :class:`~.QuantizedModule`. | |||
| """ | |||
| @@ -9,8 +9,7 @@ from .module import Module | |||
| class QuantStub(Module): | |||
| r""" | |||
| A helper :class:`~.Module` simply returning input. Could be replaced with :class:`~.QATModule` | |||
| r"""A helper :class:`~.Module` simply returning input. Could be replaced with :class:`~.QATModule` | |||
| version :class:`~.qat.QuantStub` using :func:`~.quantize.quantize_qat`. | |||
| """ | |||
| @@ -19,8 +18,7 @@ class QuantStub(Module): | |||
| class DequantStub(Module): | |||
| r""" | |||
| A helper :class:`~.Module` simply returning input. Could be replaced with :class:`~.QATModule` | |||
| r"""A helper :class:`~.Module` simply returning input. Could be replaced with :class:`~.QATModule` | |||
| version :class:`~.qat.DequantStub` using :func:`~.quantize.quantize_qat`. | |||
| """ | |||
| @@ -14,9 +14,7 @@ from .module import QuantizedModule | |||
| class Concat(QuantizedModule): | |||
| r""" | |||
| A :class:`~.QuantizedModule` to do quantized :func:`~.concat`, used for inference only. | |||
| """ | |||
| r"""A :class:`~.QuantizedModule` to do quantized :func:`~.concat`, used for inference only.""" | |||
| def __init__(self, dtype=None, **kwargs): | |||
| super().__init__(**kwargs) | |||
| @@ -75,7 +75,7 @@ class Conv2d(Float.Conv2d, QuantizedModule): | |||
| @classmethod | |||
| def from_qat_module(cls, qat_module: QAT.Conv2d): | |||
| r""" | |||
| return a :class:`~.QuantizedModule` instance converted from a | |||
| Return a :class:`~.QuantizedModule` instance converted from a | |||
| :class:`~.QATModule` instance. | |||
| """ | |||
| output_dtype = qat_module.get_activation_dtype() | |||
| @@ -119,7 +119,8 @@ class ConvTranspose2d(Float.ConvTranspose2d, QuantizedModule): | |||
| The parameter is same with :class:`~.module.ConvTranspose2d` but dtype. | |||
| :param dtype: data type of the output, should be qint8. | |||
| Args: | |||
| dtype: data type of the output, should be qint8. | |||
| """ | |||
| def __init__( | |||
| @@ -11,10 +11,7 @@ from .conv import Conv2d | |||
| class _ConvBnActivation2d(Conv2d): | |||
| r""" | |||
| Applies a 2D convolution over a quantized input tensor, used for inference only. | |||
| The parameter is same with :class: `~.module.Conv2d`. | |||
| r"""Applies a 2D convolution over a quantized input tensor, used for inference only. | |||
| """ | |||
| @classmethod | |||
| @@ -12,8 +12,7 @@ from ..qat import QATModule | |||
| class QuantizedModule(Module): | |||
| r""" | |||
| Base class of quantized :class:`~.Module`, | |||
| r"""Base class of quantized :class:`~.Module`, | |||
| which should be converted from :class:`~.QATModule` and not support traning. | |||
| """ | |||
| @@ -29,6 +28,6 @@ class QuantizedModule(Module): | |||
| @abstractmethod | |||
| def from_qat_module(cls, qat_module: QATModule): | |||
| r""" | |||
| Return a :class:`~.QuantizedModule` instance converted from a | |||
| :class:`~.QATModule` instance. | |||
| Return a :class:`~.QATModule` instance converted from | |||
| a float :class:`~.Module` instance. | |||
| """ | |||
| @@ -10,8 +10,7 @@ from .module import QuantizedModule | |||
| class QuantStub(QuantizedModule): | |||
| r""" | |||
| Quantized version of :class:`~.qat.QuantStub`, | |||
| r"""Quantized version of :class:`~.qat.QuantStub`, | |||
| will convert input to quantized dtype. | |||
| """ | |||
| @@ -24,16 +23,11 @@ class QuantStub(QuantizedModule): | |||
| @classmethod | |||
| def from_qat_module(cls, qat_module: QAT.QuantStub): | |||
| r""" | |||
| Return a :class:`~.QuantizedModule` instance converted from a | |||
| :class:`~.QATModule` instance. | |||
| """ | |||
| return cls(qat_module.get_activation_dtype(), name=qat_module.name) | |||
| class DequantStub(QuantizedModule): | |||
| r""" | |||
| Quantized version of :class:`~.qat.DequantStub`, | |||
| r"""Quantized version of :class:`~.qat.DequantStub`, | |||
| will restore quantized input to float32 dtype. | |||
| """ | |||
| @@ -42,8 +36,4 @@ class DequantStub(QuantizedModule): | |||
| @classmethod | |||
| def from_qat_module(cls, qat_module: QAT.DequantStub): | |||
| r""" | |||
| Return a :class:`~.QuantizedModule` instance converted from a | |||
| :class:`~.QATModule` instance. | |||
| """ | |||
| return cls(name=qat_module.name) | |||
| @@ -12,38 +12,35 @@ from .module import Module | |||
| class Sequential(Module): | |||
| r""" | |||
| A sequential container. | |||
| r"""A sequential container. | |||
| Modules will be added to it in the order they are passed in the constructor. | |||
| Alternatively, an ordered dict of modules can also be passed in. | |||
| To make it easier to understand, here is a small example: | |||
| Examples: | |||
| .. testcode:: | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| import megengine.functional as F | |||
| from collections import OrderedDict | |||
| batch_size = 64 | |||
| data = mge.tensor(np.zeros((batch_size, 28 * 28)), dtype=np.float32) | |||
| label = mge.tensor(np.zeros(batch_size,), dtype=np.int32) | |||
| net0 = M.Sequential( | |||
| M.Linear(28 * 28, 320), | |||
| M.Linear(320, 10) | |||
| ) | |||
| pred0 = net0(data) | |||
| modules = OrderedDict() | |||
| modules["fc0"] = M.Linear(28 * 28, 320) | |||
| modules["fc1"] = M.Linear(320, 10) | |||
| net1 = M.Sequential(modules) | |||
| pred1 = net1(data) | |||
| .. testcode:: | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| import megengine.functional as F | |||
| from collections import OrderedDict | |||
| batch_size = 64 | |||
| data = mge.tensor(np.zeros((batch_size, 28 * 28)), dtype=np.float32) | |||
| label = mge.tensor(np.zeros(batch_size,), dtype=np.int32) | |||
| net0 = M.Sequential( | |||
| M.Linear(28 * 28, 320), | |||
| M.Linear(320, 10) | |||
| ) | |||
| pred0 = net0(data) | |||
| modules = OrderedDict() | |||
| modules["fc0"] = M.Linear(28 * 28, 320) | |||
| modules["fc1"] = M.Linear(320, 10) | |||
| net1 = M.Sequential(modules) | |||
| pred1 = net1(data) | |||
| """ | |||
| def __init__(self, *args, **kwargs): | |||
| @@ -13,8 +13,7 @@ from .module import Module | |||
| class SlidingWindow(Module): | |||
| r""" | |||
| Apply a sliding window to input tensor and copy content in the window to | |||
| r"""Apply a sliding window to input tensor and copy content in the window to | |||
| corresponding output location. Assume input shape is :math:`(N, C, IH, IW)`, | |||
| then output shape would be :math:`(N, C, OH, OW, window_h, window_w)` where | |||
| :math:`(OH, OW)` would be computed from padding, stride, window and | |||
| @@ -26,46 +25,45 @@ class SlidingWindow(Module): | |||
| \text{where } & ih=-pad_h+oh \times stride_h + (wh-1) \times (dilation_h-1) \\ | |||
| & iw=-pad_w+ow \times stride_w + (ww-1) \times (dilation_w-1) | |||
| :param kernel_size: the size of the window to take a max over. | |||
| :param padding: implicit zero padding to be added on both sides. Default: 0 | |||
| :param stride: the stride of the window. Default: 1 | |||
| :param dilation: the dilation of the window. Default: 1 | |||
| Args: | |||
| kernel_size: the size of the window to take a max over. | |||
| padding: implicit zero padding to be added on both sides. Default: 0 | |||
| stride: the stride of the window. Default: 1 | |||
| dilation: the dilation of the window. Default: 1 | |||
| Example: | |||
| .. testcode:: | |||
| from megengine import tensor | |||
| import megengine.module as M | |||
| import numpy as np | |||
| .. testcode:: | |||
| inp = tensor(np.arange(30).reshape(1,1,5,6)) | |||
| op = M.SlidingWindow(kernel_size=3, padding=1, stride=2, dilation=2) | |||
| out = op(inp) | |||
| print(out.numpy()) | |||
| from megengine import tensor | |||
| import megengine.module as M | |||
| import numpy as np | |||
| Outputs: | |||
| inp = tensor(np.arange(30).reshape(1,1,5,6)) | |||
| op = M.SlidingWindow(kernel_size=3, padding=1, stride=2, dilation=2) | |||
| out = op(inp) | |||
| print(out.numpy()) | |||
| .. testoutput:: | |||
| Outputs: | |||
| [[[[[[ 0 0 0] | |||
| [ 0 7 9] | |||
| [ 0 19 21]] | |||
| .. testoutput:: | |||
| [[ 0 0 0] | |||
| [ 7 9 11] | |||
| [19 21 23]]] | |||
| [[[[[[ 0 0 0] | |||
| [ 0 7 9] | |||
| [ 0 19 21]] | |||
| [[ 0 0 0] | |||
| [ 7 9 11] | |||
| [19 21 23]]] | |||
| [[[ 0 7 9] | |||
| [ 0 19 21] | |||
| [ 0 0 0]] | |||
| [[ 7 9 11] | |||
| [19 21 23] | |||
| [ 0 0 0]]]]]] | |||
| [[[ 0 7 9] | |||
| [ 0 19 21] | |||
| [ 0 0 0]] | |||
| [[ 7 9 11] | |||
| [19 21 23] | |||
| [ 0 0 0]]]]]] | |||
| """ | |||
| def __init__( | |||
| @@ -89,21 +87,20 @@ class SlidingWindow(Module): | |||
| class SlidingWindowTranspose(Module): | |||
| r""" | |||
| Opposite opration of SlidingWindow, sum over the sliding windows on the | |||
| corresponding input location. Given an input of the size | |||
| :math:`(N, C, IH, IW, window_h, window_w)` and :attr:`output_size`, the | |||
| r"""Opposite opration of SlidingWindow, sum over the sliding windows on the | |||
| corresponding input location. Given an input of the size | |||
| :math:`(N, C, IH, IW, window_h, window_w)` and :attr:`output_size`, the | |||
| output shape would be :math:`(N, C, output\_size_{h}, output\_size_{w})` and the | |||
| arguments must satisfy | |||
| .. math:: | |||
| \text{IH} = \lfloor \frac{\text{output_size}_{h} + 2 * \text{padding}_{h} - | |||
| \text{IH} = \lfloor \frac{\text{output_size}_{h} + 2 * \text{padding}_{h} - | |||
| \text{dilation}_{h} * (\text{kernel_size}_{h} - 1) - 1}{\text{stride}_{h}} + 1 \rfloor | |||
| .. math:: | |||
| \text{IW} = \lfloor \frac{\text{output_size}_{w} + 2 * \text{padding}_{w} - | |||
| \text{IW} = \lfloor \frac{\text{output_size}_{w} + 2 * \text{padding}_{w} - | |||
| \text{dilation}_{w} * (\text{kernel_size}_{w} - 1) - 1}{\text{stride}_{w}} + 1 \rfloor | |||
| For each output location, we have: | |||
| .. math:: | |||
| @@ -113,36 +110,13 @@ class SlidingWindowTranspose(Module): | |||
| \text{location}(n, c, ih, iw, wh, ww) &= (n, c, oh+wh, ow+ww) \\ | |||
| \text{where } & oh=-pad_h+ih \times stride_h + (wh-1) \times (dilation_h-1) \\ | |||
| & ow=-pad_w+iw \times stride_w + (ww-1) \times (dilation_w-1) | |||
| :param output_size: the size of the output tensor. | |||
| :param kernel_size: the size of the window to take a max over. | |||
| :param padding: implicit zero padding to be added on both sides. Default: 0 | |||
| :param stride: the stride of the window. Default: 1 | |||
| :param dilation: the dilation of the window. Default: 1 | |||
| Example: | |||
| .. testcode:: | |||
| from megengine import tensor | |||
| import megengine.module as M | |||
| import numpy as np | |||
| inp = tensor(np.arange(20).reshape(1,1,4,5)) | |||
| unfold = M.SlidingWindow(kernel_size=3, padding=0, stride=1, dilation=1) | |||
| fold = M.SlidingWindowTranspose((4,5), kernel_size=3, padding=0, stride=1, dilation=1) | |||
| out = fold(unfold(inp)) | |||
| print(out.numpy()) | |||
| Outputs: | |||
| .. testoutput:: | |||
| [[[[ 0 2 6 6 4] | |||
| [10 24 42 32 18] | |||
| [20 44 72 52 28] | |||
| [15 32 51 36 19]]]] | |||
| Args: | |||
| output_size: the size of the output tensor. | |||
| kernel_size: the size of the window to take a max over. | |||
| padding: implicit zero padding to be added on both sides. Default: 0 | |||
| stride: the stride of the window. Default: 1 | |||
| dilation: the dilation of the window. Default: 1 | |||
| """ | |||
| def __init__( | |||
| @@ -15,20 +15,20 @@ from .optimizer import Optimizer | |||
| class Adadelta(Optimizer): | |||
| r""" | |||
| Implements Adadelta algorithm. | |||
| r"""Implements Adadelta algorithm. | |||
| It has been proposed in `"ADADELTA: An Adaptive Learning Rate Method" <https://arxiv.org/abs/1212.5701>`_. | |||
| :param params: iterable of parameters to optimize or dicts defining | |||
| parameter groups. | |||
| :param lr: coefficient that scales delta before it is applied | |||
| to the parameters. Default: 1.0 | |||
| :param rho: coefficient used for computing a running average | |||
| of squared gradients. Default: 0.9 | |||
| :param eps: term added to the denominator to improve | |||
| numerical stability. Default: 1e-6 | |||
| :param weight_decay: weight decay (L2 penalty). Default: 0 | |||
| Args: | |||
| params: iterable of parameters to optimize or dicts defining | |||
| parameter groups. | |||
| lr: coefficient that scales delta before it is applied | |||
| to the parameters. Default: 1.0 | |||
| rho: coefficient used for computing a running average | |||
| of squared gradients. Default: 0.9 | |||
| eps: term added to the denominator to improve | |||
| numerical stability. Default: 1e-6 | |||
| weight_decay: weight decay (L2 penalty). Default: 0 | |||
| """ | |||
| def __init__( | |||
| @@ -15,20 +15,20 @@ from .optimizer import Optimizer | |||
| class Adagrad(Optimizer): | |||
| r""" | |||
| Implements Adagrad algorithm. | |||
| r"""Implements Adagrad algorithm. | |||
| It has been proposed in `"Adaptive Subgradient Methods for Online Learning | |||
| and Stochastic Optimization" <http://jmlr.org/papers/v12/duchi11a.html>`_. | |||
| :param params: iterable of parameters to optimize or dicts defining | |||
| parameter groups. | |||
| :param lr: coefficient that scales delta before it is applied | |||
| to the parameters. Default: 1e-2 | |||
| :param lr_decay: learning rate decay. Default: 0 | |||
| :param eps: term added to the denominator to improve | |||
| numerical stability. Default: 1e-10 | |||
| :param weight_decay: weight decay (L2 penalty). Default: 0 | |||
| Args: | |||
| params: iterable of parameters to optimize or dicts defining | |||
| parameter groups. | |||
| lr: coefficient that scales delta before it is applied | |||
| to the parameters. Default: 1e-2 | |||
| lr_decay: learning rate decay. Default: 0 | |||
| eps: term added to the denominator to improve | |||
| numerical stability. Default: 1e-10 | |||
| weight_decay: weight decay (L2 penalty). Default: 0 | |||
| """ | |||
| def __init__( | |||
| @@ -15,17 +15,16 @@ from .optimizer import Optimizer | |||
| class Adam(Optimizer): | |||
| r""" | |||
| Implements Adam algorithm proposed in `"Adam: A Method for Stochastic Optimization" <https://arxiv.org/abs/1412.6980>`_. | |||
| r"""Implements Adam algorithm proposed in `"Adam: A Method for Stochastic Optimization" <https://arxiv.org/abs/1412.6980>`_. | |||
| :param params: iterable of parameters to optimize or dicts defining | |||
| Args: | |||
| params: iterable of parameters to optimize or dicts defining | |||
| parameter groups. | |||
| :param lr: learning rate. | |||
| :param betas: coefficients used for computing running averages of gradient | |||
| and its square. Default: (0.9, 0.999) | |||
| :param eps: term added to the denominator to improve numerical stability | |||
| Default: 1e-8 | |||
| :param weight_decay: weight decay (L2 penalty). Default: 0 | |||
| lr: learning rate. | |||
| betas: coefficients used for computing running averages of gradient | |||
| and its square. Default: (0.9, 0.999) | |||
| eps: term added to the denominator to improve numerical stability. Default: 1e-8 | |||
| weight_decay: weight decay (L2 penalty). Default: 0 | |||
| """ | |||
| def __init__( | |||
| @@ -15,17 +15,16 @@ from .optimizer import Optimizer | |||
| class AdamW(Optimizer): | |||
| r""" | |||
| Implements AdamW algorithm proposed in `"Decoupled Weight Decay Regularization" <https://arxiv.org/abs/1711.05101>`_. | |||
| r"""Implements AdamW algorithm proposed in `"Decoupled Weight Decay Regularization" <https://arxiv.org/abs/1711.05101>`_. | |||
| :param params: iterable of parameters to optimize or dicts defining | |||
| Args: | |||
| params: iterable of parameters to optimize or dicts defining | |||
| parameter groups. | |||
| :param lr: learning rate. | |||
| :param betas: coefficients used for computing running averages of gradient | |||
| and its square. Default: (0.9, 0.999) | |||
| :param eps: term added to the denominator to improve numerical stability | |||
| Default: 1e-8 | |||
| :param weight_decay: weight decay (L2 penalty). Default: 1e-2 | |||
| lr: learning rate. | |||
| betas: coefficients used for computing running averages of gradient | |||
| and its square. Default: (0.9, 0.999) | |||
| eps: term added to the denominator to improve numerical stability. Default: 1e-8 | |||
| weight_decay: weight decay (L2 penalty). Default: 1e-2 | |||
| """ | |||
| def __init__( | |||
| @@ -23,10 +23,13 @@ def clip_grad_norm( | |||
| The norm is computed over all gradients together, as if they were | |||
| concatenated into a single vector. Gradients are modified in-place. | |||
| :param tensors: an iterable of Tensors or a single Tensor. | |||
| :param max_norm: max norm of the gradients. | |||
| :param ord: type of the used p-norm. Can be ``'inf'`` for infinity norm. | |||
| :return: total norm of the parameters (viewed as a single vector). | |||
| Args: | |||
| tensors: an iterable of Tensors or a single Tensor. | |||
| max_norm: max norm of the gradients. | |||
| ord: type of the used p-norm. Can be ``'inf'`` for infinity norm. | |||
| Returns: | |||
| total norm of the parameters (viewed as a single vector). | |||
| """ | |||
| push_scope("clip_grad_norm") | |||
| if isinstance(tensors, Tensor): | |||
| @@ -53,14 +56,15 @@ def clip_grad_value( | |||
| ): | |||
| r"""Clips gradient of an iterable of parameters to a specified lower and | |||
| upper. Gradients are modified in-place. | |||
| The gradients are clipped in the range: | |||
| .. math:: \left[\text{lower}, \text{upper}\right] | |||
| :param tensors: an iterable of Tensors or a single Tensor. | |||
| :param lower: minimum allowed value of the gradients. | |||
| :param upper: maximum allowed value of the gradients. | |||
| Args: | |||
| tensors: an iterable of Tensors or a single Tensor. | |||
| lower: minimum allowed value of the gradients. | |||
| upper: maximum allowed value of the gradients. | |||
| """ | |||
| push_scope("clip_grad_value") | |||
| if isinstance(tensors, Tensor): | |||
| @@ -12,11 +12,11 @@ from .optimizer import Optimizer | |||
| class LRScheduler(metaclass=ABCMeta): | |||
| r""" | |||
| Base class for all learning rate based schedulers. | |||
| r"""Base class for all learning rate based schedulers. | |||
| :param optimizer: wrapped optimizer. | |||
| :param current_epoch: the index of current epoch. Default: -1 | |||
| Args: | |||
| optimizer: wrapped optimizer. | |||
| current_epoch: the index of current epoch. Default: -1 | |||
| """ | |||
| def __init__( # pylint: disable=too-many-branches | |||
| @@ -45,25 +45,22 @@ class LRScheduler(metaclass=ABCMeta): | |||
| self.step() | |||
| def state_dict(self): | |||
| r""" | |||
| Returns the state of the scheduler as a :class:`dict`. | |||
| r"""Returns the state of the scheduler as a :class:`dict`. | |||
| It contains an entry for every variable in self.__dict__ which | |||
| is not the optimizer. | |||
| """ | |||
| raise NotImplementedError | |||
| def load_state_dict(self, state_dict): | |||
| r""" | |||
| Loads the schedulers state. | |||
| r"""Loads the schedulers state. | |||
| :type state_dict: dict | |||
| :param state_dict: scheduler state. | |||
| Args: | |||
| state_dict: scheduler state. | |||
| """ | |||
| raise NotImplementedError | |||
| def get_lr(self): | |||
| r""" Compute current learning rate for the scheduler. | |||
| """ | |||
| r"""Compute current learning rate for the scheduler.""" | |||
| raise NotImplementedError | |||
| def step(self, epoch=None): | |||
| @@ -14,16 +14,14 @@ from .optimizer import Optimizer | |||
| class MultiStepLR(LRScheduler): | |||
| r""" | |||
| Decays the learning rate of each parameter group by gamma once the | |||
| r"""Decays the learning rate of each parameter group by gamma once the | |||
| number of epoch reaches one of the milestones. | |||
| :param optimizer: wrapped optimizer. | |||
| :type milestones: list | |||
| :param milestones: list of epoch indices which should be increasing. | |||
| :type gamma: float | |||
| :param gamma: multiplicative factor of learning rate decay. Default: 0.1 | |||
| :param current_epoch: the index of current epoch. Default: -1 | |||
| Args: | |||
| optimizer: wrapped optimizer. | |||
| milestones: list of epoch indices which should be increasing. | |||
| gamma: multiplicative factor of learning rate decay. Default: 0.1 | |||
| current_epoch: the index of current epoch. Default: -1 | |||
| """ | |||
| def __init__( | |||
| @@ -45,8 +43,7 @@ class MultiStepLR(LRScheduler): | |||
| super().__init__(optimizer, current_epoch) | |||
| def state_dict(self): | |||
| r""" | |||
| Returns the state of the scheduler as a :class:`dict`. | |||
| r"""Returns the state of the scheduler as a :class:`dict`. | |||
| It contains an entry for every variable in self.__dict__ which | |||
| is not the optimizer. | |||
| """ | |||
| @@ -57,11 +54,10 @@ class MultiStepLR(LRScheduler): | |||
| } | |||
| def load_state_dict(self, state_dict): | |||
| r""" | |||
| Loads the schedulers state. | |||
| r"""Loads the schedulers state. | |||
| :type state_dict: dict | |||
| :param state_dict: scheduler state. | |||
| Args: | |||
| state_dict: scheduler state. | |||
| """ | |||
| tmp_dict = {} | |||
| for key in ["milestones", "gamma", "current_epoch"]: | |||
| @@ -30,11 +30,11 @@ required = _RequiredParameter() | |||
| class Optimizer(metaclass=ABCMeta): | |||
| r""" | |||
| Base class for all optimizers. | |||
| r"""Base class for all optimizers. | |||
| :param params: specifies what Tensors should be optimized. | |||
| :param defaults: a dict of default parameters of Optimizer, like learning rate or momentum. | |||
| Args: | |||
| params: specifies what Tensors should be optimized. | |||
| defaults: a dict of default parameters of Optimizer, like learning rate or momentum. | |||
| """ | |||
| def __init__( # pylint: disable=too-many-branches | |||
| @@ -76,14 +76,13 @@ class Optimizer(metaclass=ABCMeta): | |||
| self._create_state(group) | |||
| def add_param_group(self, param_group: dict): | |||
| r""" | |||
| Add a param group to ``param_groups`` of the :class:`~megengine.optim.optimizer.Optimizer`. | |||
| r"""Add a param group to ``param_groups`` of the :class:`~megengine.optim.optimizer.Optimizer`. | |||
| This can be useful when fine tuning a pre-trained network as frozen layers can be made | |||
| trainable and added to the :class:`~megengine.optim.optimizer.Optimizer` as training progresses. | |||
| :param param_group: specifies what tensors should be optimized along with group. | |||
| Args: | |||
| param_group: specifies what tensors should be optimized along with group. | |||
| """ | |||
| assert isinstance(param_group, dict), "param group must be a dict" | |||
| @@ -143,10 +142,7 @@ class Optimizer(metaclass=ABCMeta): | |||
| return params | |||
| def step(self): | |||
| r""" | |||
| Performs a single optimization step. | |||
| """ | |||
| r"""Performs a single optimization step.""" | |||
| # set the globle state `_enable_convert_inputs` to `False` to disable | |||
| # the `convert_inputs` for param updates | |||
| set_option("record_computing_path", 0) | |||
| @@ -176,9 +172,7 @@ class Optimizer(metaclass=ABCMeta): | |||
| param.grad.reset_zero() | |||
| def clear_grad(self): | |||
| r""" | |||
| Set the grad attribute to None for all parameters. | |||
| """ | |||
| r"""Set the grad attribute to None for all parameters.""" | |||
| for param_group in self.param_groups: | |||
| push_scope("clear_grad") | |||
| for param in param_group["params"]: | |||
| @@ -186,10 +180,10 @@ class Optimizer(metaclass=ABCMeta): | |||
| pop_scope("clear_grad") | |||
| def state_dict(self, keep_var=False) -> Dict: | |||
| r""" | |||
| Export the optimizer state. | |||
| r"""Export the optimizer state. | |||
| :return: optimizer state. Can be loaded by :meth:`load_state_dict`. | |||
| Return: | |||
| optimizer state. Can be loaded by :meth:`load_state_dict`. | |||
| """ | |||
| param_groups = [] | |||
| state = dict() | |||
| @@ -217,10 +211,10 @@ class Optimizer(metaclass=ABCMeta): | |||
| return {"param_groups": param_groups, "state": state} | |||
| def load_state_dict(self, state: dict): | |||
| r""" | |||
| Loads the optimizer state. | |||
| r"""Loads the optimizer state. | |||
| :param state: optimizer state. Should be an object returned | |||
| Args: | |||
| state: optimizer state. Should be an object returned | |||
| from a call to :meth:`state_dict`. | |||
| """ | |||
| if len(self.param_groups) != len(state["param_groups"]): | |||
| @@ -15,17 +15,17 @@ from .optimizer import Optimizer | |||
| class SGD(Optimizer): | |||
| r""" | |||
| Implements stochastic gradient descent. | |||
| r"""Implements stochastic gradient descent. | |||
| Nesterov momentum is based on the formula from | |||
| `"On the importance of initialization and momentum in deep learning" <http://www.cs.toronto.edu/%7Ehinton/absps/momentum.pdf>`_ . | |||
| :param params: iterable of parameters to optimize or dicts defining | |||
| Args: | |||
| params: iterable of parameters to optimize or dicts defining | |||
| parameter groups. | |||
| :param lr: learning rate. | |||
| :param momentum: momentum factor. Default: 0.0 | |||
| :param weight_decay: weight decay (L2 penalty). Default: 0.0 | |||
| lr: learning rate. | |||
| momentum: momentum factor. Default: 0.0 | |||
| weight_decay: weight decay (L2 penalty). Default: 0.0 | |||
| """ | |||
| def __init__( | |||
| @@ -72,13 +72,13 @@ class _FakeQuantize(Module): | |||
| class TQT(_FakeQuantize, QParamsModuleMixin): | |||
| r""" | |||
| TQT: https://arxiv.org/abs/1903.08066 Trained Quantization Thresholds | |||
| r"""TQT: https://arxiv.org/abs/1903.08066 Trained Quantization Thresholds | |||
| for Accurate and Efficient Fixed-Point Inference of Deep Neural Networks. | |||
| :param dtype: a string or :class:`~.QuantDtypeMeta` indicating the target | |||
| quantization dtype of input. | |||
| :param enable: whether do ``normal_forward`` or ``fake_quant_forward``. | |||
| Args: | |||
| dtype: a string or :class:`~.QuantDtypeMeta` indicating the target | |||
| quantization dtype of input. | |||
| enable: whether do ``normal_forward`` or ``fake_quant_forward``. | |||
| """ | |||
| def __init__( | |||
| @@ -104,12 +104,12 @@ class TQT(_FakeQuantize, QParamsModuleMixin): | |||
| class FakeQuantize(_FakeQuantize): | |||
| r""" | |||
| A module to do quant and dequant according to observer's scale and zero_point. | |||
| r"""A module to do quant and dequant according to observer's scale and zero_point. | |||
| :param dtype: a string or :class:`~.QuantDtypeMeta` indicating the target | |||
| quantization dtype of input. | |||
| :param enable: whether do ``normal_forward`` or ``fake_quant_forward``. | |||
| Args: | |||
| dtype: a string or :class:`~.QuantDtypeMeta` indicating the target | |||
| quantization dtype of input. | |||
| enable: whether do ``normal_forward`` or ``fake_quant_forward``. | |||
| """ | |||
| def fake_quant_forward(self, inp, qparams: QParams = None): | |||
| @@ -122,14 +122,14 @@ class FakeQuantize(_FakeQuantize): | |||
| class LSQ(_FakeQuantize, QParamsModuleMixin): | |||
| r""" | |||
| LSQ: https://arxiv.org/pdf/1902.08153.pdf Estimating and scaling the | |||
| r"""LSQ: https://arxiv.org/pdf/1902.08153.pdf Estimating and scaling the | |||
| task loss gradient at each weight and activation layer's quantizer step size | |||
| :param dtype: a string or :class:`~.QuantDtypeMeta` indicating the target | |||
| quantization dtype of input. | |||
| :param enable: whether do ``normal_forward`` or ``fake_quant_forward``. | |||
| :param eps:a small value to avoid division by zero. Default: 1e-5 | |||
| Args: | |||
| dtype: a string or :class:`~.QuantDtypeMeta` indicating the target | |||
| quantization dtype of input. | |||
| enable: whether do ``normal_forward`` or ``fake_quant_forward``. | |||
| eps: a small value to avoid division by zero. Default: 1e-5 | |||
| """ | |||
| def __init__( | |||
| @@ -25,11 +25,11 @@ logger = get_logger(__name__) | |||
| class Observer(Module, QParamsModuleMixin): | |||
| r""" | |||
| A base class for Observer Module. Used to record input tensor's statistics for | |||
| r"""A base class for Observer Module. Used to record input tensor's statistics for | |||
| quantization. | |||
| :param dtype: a string indicating which dtype to collect scale and zero_point of. | |||
| Args: | |||
| dtype: a string indicating which dtype to collect scale and zero_point of. | |||
| """ | |||
| def __init__(self, dtype: Union[str, QuantDtypeMeta], **kwargs): | |||
| @@ -73,12 +73,12 @@ class Observer(Module, QParamsModuleMixin): | |||
| class MinMaxObserver(Observer): | |||
| r""" | |||
| A Observer Module records input tensor's running min and max values to calc scale. | |||
| r"""A Observer Module records input tensor's running min and max values to calc scale. | |||
| :param mode: set quantization mode. | |||
| :param eps: a initial maximum value to avoid division by zero problem. | |||
| :param dtype: a string indicating which dtype to collect scale and zero_point of. | |||
| Args: | |||
| mode: set quantization mode. | |||
| eps: a initial maximum value to avoid division by zero problem. | |||
| dtype: a string indicating which dtype to collect scale and zero_point of. | |||
| """ | |||
| def __init__( | |||
| @@ -128,12 +128,12 @@ class MinMaxObserver(Observer): | |||
| class SyncMinMaxObserver(MinMaxObserver): | |||
| r""" | |||
| A distributed version of :class:`~.MinMaxObserver`. | |||
| r"""A distributed version of :class:`~.MinMaxObserver`. | |||
| :param mode: set quantization mode. | |||
| :param eps: a initial maximum value to avoid division by zero problem. | |||
| :param dtype: a string indicating which dtype to collect scale and zero_point of. | |||
| Args: | |||
| mode: set quantization mode. | |||
| eps: a initial maximum value to avoid division by zero problem. | |||
| dtype: a string indicating which dtype to collect scale and zero_point of. | |||
| """ | |||
| def forward(self, x_orig): | |||
| @@ -151,13 +151,13 @@ class SyncMinMaxObserver(MinMaxObserver): | |||
| class ExponentialMovingAverageObserver(MinMaxObserver): | |||
| r""" | |||
| A :class:`~.MinMaxObserver` with momentum support for min/max updating. | |||
| r"""A :class:`~.MinMaxObserver` with momentum support for min/max updating. | |||
| :param momentum: momentum ratio for min/max updating. | |||
| :param mode: set quantization mode. | |||
| :param eps: a initial maximum value to avoid division by zero problem. | |||
| :param dtype: a string indicating which dtype to collect scale and zero_point of. | |||
| Args: | |||
| momentum: momentum ratio for min/max updating. | |||
| mode: set quantization mode. | |||
| eps: a initial maximum value to avoid division by zero problem. | |||
| dtype: a string indicating which dtype to collect scale and zero_point of. | |||
| """ | |||
| def __init__( | |||
| @@ -196,13 +196,13 @@ class ExponentialMovingAverageObserver(MinMaxObserver): | |||
| class SyncExponentialMovingAverageObserver(ExponentialMovingAverageObserver): | |||
| r""" | |||
| A distributed version of :class:`~.ExponentialMovingAverageObserver`. | |||
| r"""A distributed version of :class:`~.ExponentialMovingAverageObserver`. | |||
| :param momentum: momentum ratio for min/max updating. | |||
| :param mode: set quantization mode. | |||
| :param eps: a initial maximum value to avoid division by zero problem. | |||
| :param dtype: a string indicating which dtype to collect scale and zero_point of. | |||
| Args: | |||
| momentum: momentum ratio for min/max updating. | |||
| mode: set quantization mode. | |||
| eps: a initial maximum value to avoid division by zero problem. | |||
| dtype: a string indicating which dtype to collect scale and zero_point of. | |||
| """ | |||
| def forward(self, x_orig): | |||
| @@ -227,15 +227,15 @@ class SyncExponentialMovingAverageObserver(ExponentialMovingAverageObserver): | |||
| class HistogramObserver(MinMaxObserver): | |||
| r""" | |||
| A :class:`~.MinMaxObserver` using running histogram of tensor values | |||
| r"""A :class:`~.MinMaxObserver` using running histogram of tensor values | |||
| for min/max updating. Usually used for calibration quantization. | |||
| :param bins: number of bins to use for the histogram. | |||
| :param upsample_rate: which ratio to interpolate histograms in. | |||
| :param mode: set quantization mode. | |||
| :param eps: a initial maximum value to avoid division by zero problem. | |||
| :param dtype: a string indicating which dtype to collect scale and zero_point of. | |||
| Args: | |||
| bins: number of bins to use for the histogram. | |||
| upsample_rate: which ratio to interpolate histograms in. | |||
| mode: set quantization mode. | |||
| eps: a initial maximum value to avoid division by zero problem. | |||
| dtype: a string indicating which dtype to collect scale and zero_point of. | |||
| """ | |||
| def __init__( | |||
| @@ -256,8 +256,7 @@ class HistogramObserver(MinMaxObserver): | |||
| self.histogram = Tensor([-1] + [0.0] * (bins - 1), dtype="float32") | |||
| def _non_linear_param_search(self): | |||
| r""" | |||
| Non-linear parameter search. | |||
| r"""Non-linear parameter search. | |||
| An approximation for L2 error minimization for selecting min/max. | |||
| By selecting new min/max, we filter out outliers in input distribution. | |||
| """ | |||
| @@ -269,8 +268,7 @@ class HistogramObserver(MinMaxObserver): | |||
| bin_width = (np_max_val - np_min_val) / self.bins | |||
| def _get_norm(delta_begin, delta_end, density, norm_type): | |||
| r""" | |||
| Compute the norm of the values uniformaly distributed between | |||
| r"""Compute the norm of the values uniformaly distributed between | |||
| delta_begin and delta_end. | |||
| norm = density * (integral_{begin, end} x^2) | |||
| = density * (end^3 - begin^3) / 3 | |||
| @@ -285,8 +283,7 @@ class HistogramObserver(MinMaxObserver): | |||
| return density * norm | |||
| def _compute_quantization_error(next_start_bin, next_end_bin, norm_type): | |||
| r""" | |||
| Compute the quantization error if we use start_bin to end_bin as the | |||
| r"""Compute the quantization error if we use start_bin to end_bin as the | |||
| min and max to do the quantization. | |||
| """ | |||
| @@ -488,9 +485,7 @@ class HistogramObserver(MinMaxObserver): | |||
| class PassiveObserver(Observer): | |||
| r""" | |||
| An Observer that supports setting :attr:`scale` directly. | |||
| """ | |||
| r"""An Observer that supports setting :attr:`scale` directly.""" | |||
| def __init__(self, dtype: Union[str, QuantDtypeMeta], **kwargs): | |||
| super().__init__(dtype, **kwargs) | |||
| @@ -510,8 +505,10 @@ class PassiveObserver(Observer): | |||
| return self.qparams | |||
| def set_qparams(self, qparams: QParams): | |||
| """ | |||
| :param qparams: used to set initial scale. | |||
| r"""set the ``qparams``. | |||
| Args: | |||
| qparams: used to set initial scale. | |||
| """ | |||
| self.qparams = deepcopy(qparams) | |||
| if qparams.scale is None: | |||
| @@ -527,7 +524,5 @@ class PassiveObserver(Observer): | |||
| self.orig_scale = qparams.scale.numpy() | |||
| def forward(self, x): | |||
| r""" | |||
| Just return input because :attr:`qparams` is set by :func:`~.apply_easy_quant`. | |||
| """ | |||
| r"""Just return input because :attr:`qparams` is set by :func:`~.apply_easy_quant`.""" | |||
| return x | |||
| @@ -27,33 +27,33 @@ class QConfig( | |||
| ["weight_observer", "act_observer", "weight_fake_quant", "act_fake_quant"], | |||
| ) | |||
| ): | |||
| r""" | |||
| A config class indicating how to do quantize toward :class:`~.QATModule` 's | |||
| r"""A config class indicating how to do quantize toward :class:`~.QATModule` 's | |||
| ``activation`` and ``weight``. See :meth:`~.QATModule.set_qconfig` for detail usage. | |||
| :param weight_observer: interface to instantiate an :class:`~.Observer` indicating | |||
| how to collect scales and zero_point of wegiht. | |||
| :param act_observer: similar to ``weight_observer`` but toward activation. | |||
| :param weight_fake_quant: interface to instantiate a :class:`~.FakeQuantize` indicating | |||
| how to do fake_quant calculation. | |||
| :param act_observer: similar to ``weight_fake_quant`` but toward activation. | |||
| Args: | |||
| weight_observer: interface to instantiate an :class:`~.Observer` indicating | |||
| how to collect scales and zero_point of wegiht. | |||
| act_observer: similar to ``weight_observer`` but toward activation. | |||
| weight_fake_quant: interface to instantiate a :class:`~.FakeQuantize` indicating | |||
| how to do fake_quant calculation. | |||
| act_observer: similar to ``weight_fake_quant`` but toward activation. | |||
| Examples: | |||
| .. code-block:: | |||
| # Default EMA QConfig for QAT. | |||
| ema_fakequant_qconfig = QConfig( | |||
| weight_observer=partial(MinMaxObserver, dtype="qint8_narrow"), | |||
| act_observer=partial(ExponentialMovingAverageObserver, dtype="qint8"), | |||
| weight_fake_quant=partial(FakeQuantize, dtype="qint8_narrow"), | |||
| act_fake_quant=partial(FakeQuantize, dtype="qint8"), | |||
| ) | |||
| .. code-block:: | |||
| # Default EMA QConfig for QAT. | |||
| ema_fakequant_qconfig = QConfig( | |||
| weight_observer=partial(MinMaxObserver, dtype="qint8_narrow"), | |||
| act_observer=partial(ExponentialMovingAverageObserver, dtype="qint8"), | |||
| weight_fake_quant=partial(FakeQuantize, dtype="qint8_narrow"), | |||
| act_fake_quant=partial(FakeQuantize, dtype="qint8"), | |||
| ) | |||
| Each parameter is a ``class`` rather than an instance. And we recommand using ``functools.partial`` | |||
| to add initialization parameters of the ``class``, so that don't need to provide parameters in | |||
| :meth:`~.QATModule.set_qconfig`. | |||
| Usually we choose narrow version dtype (like ``qint8_narrow``) for weight related | |||
| paramters and normal version for activation related ones. For the result of | |||
| multiplication and addition as ``a * b + c * d``, if four variables are all -128 of | |||
| @@ -57,14 +57,14 @@ qat_modules = tuple(_qat2quantized_dict.keys()) | |||
| def quantize(module: Module, inplace: bool = True, mapping: dict = None): | |||
| r""" | |||
| Recursively convert :class:`~.QATModule` to :class:`~.QuantizedModule` | |||
| r"""Recursively convert :class:`~.QATModule` to :class:`~.QuantizedModule` | |||
| through :meth:`~.Module.apply`. | |||
| :param module: root module to do convert recursively. | |||
| :param inplace: whether to convert submodules in-place. | |||
| :param mapping: a dict indicating how to convert custom modules from QATModule to | |||
| QuantizedModule. Will be combined with internal default convert mapping dict. | |||
| Args: | |||
| module: root module to do convert recursively. | |||
| inplace: whether to convert submodules in-place. | |||
| mapping: a dict indicating how to convert custom modules from QATModule to | |||
| QuantizedModule. Will be combined with internal default convert mapping dict. | |||
| """ | |||
| if not inplace: | |||
| @@ -94,16 +94,16 @@ def quantize_qat( | |||
| qconfig: QConfig = ema_fakequant_qconfig, | |||
| mapping: dict = None, | |||
| ): | |||
| r""" | |||
| Recursively convert float :class:`~.Module` to :class:`~.QATModule` | |||
| r"""Recursively convert float :class:`~.Module` to :class:`~.QATModule` | |||
| through :meth:`~.Module.apply` and set qconfig relatively. | |||
| :param module: root module to do convert recursively. | |||
| :param inplace: whether to convert submodules in-place. | |||
| :param qconfig: an instance of :class:`~.QConfig` to be set as submodules' qconfig. | |||
| default is ``ema_fakequant_qconfig``. | |||
| :param mapping: a dict indicating how to convert custom modules from Module to QATModule. | |||
| Will be combined with internal default convert mapping dict. | |||
| Args: | |||
| module: root module to do convert recursively. | |||
| inplace: whether to convert submodules in-place. | |||
| qconfig: an instance of :class:`~.QConfig` to be set as submodules' qconfig. | |||
| default is ``ema_fakequant_qconfig``. | |||
| mapping: a dict indicating how to convert custom modules from Module to QATModule. | |||
| Will be combined with internal default convert mapping dict. | |||
| """ | |||
| if not inplace: | |||
| @@ -133,12 +133,12 @@ def quantize_qat( | |||
| def reset_qconfig(module: Module, qconfig: QConfig, inplace: bool = True): | |||
| r""" | |||
| Reset :class:`~._FakeQuantize` and :class:`~.Observer` according to ``qconfig`` | |||
| r"""Reset :class:`~._FakeQuantize` and :class:`~.Observer` according to ``qconfig`` | |||
| :param module: root module to reset recursively. | |||
| :param qconfig: an instance of :class:`~.QConfig` to be set as submodules' qconfig. | |||
| :param inplace: whether to reset submodules in-place. | |||
| Args: | |||
| module: root module to reset recursively. | |||
| qconfig: an instance of :class:`~.QConfig` to be set as submodules' qconfig. | |||
| inplace: whether to reset submodules in-place. | |||
| """ | |||
| if not inplace: | |||
| @@ -175,19 +175,17 @@ def _propagate(module: Module, func_str: str, *args, **kargs): | |||
| def propagate_qconfig(module: QATModule, qconfig: QConfig): | |||
| r""" | |||
| Recursively set ``module``'s qconfig through :meth:`~.Module.apply`. | |||
| r"""Recursively set ``module``'s qconfig through :meth:`~.Module.apply`. | |||
| :param module: root module to traverse recursively. | |||
| :param qconfig: a instance of :class:`~.QConfig` to be set as submodules' qconfig. | |||
| Args: | |||
| module: root module to traverse recursively. | |||
| qconfig: a instance of :class:`~.QConfig` to be set as submodules' qconfig. | |||
| """ | |||
| _propagate(module, "set_qconfig", qconfig) | |||
| def hook_qat_module(module: Module, func: Callable): | |||
| r""" | |||
| Add hooks for all :class:`~.QATModule` submodule | |||
| """ | |||
| r"""Add hooks for all :class:`~.QATModule` submodule""" | |||
| def is_qat(mod: Module): | |||
| return isinstance(mod, QATModule) | |||
| @@ -202,15 +200,16 @@ def hook_qat_module(module: Module, func: Callable): | |||
| def apply_easy_quant( | |||
| module: Module, data: Tensor, start: float = 0.8, stop: float = 1.2, num: int = 40 | |||
| ): | |||
| r""" | |||
| Implementation of ``EasyQuant``: https://arxiv.org/pdf/2006.16669. | |||
| r"""Implementation of ``EasyQuant``: https://arxiv.org/pdf/2006.16669. | |||
| Search for optimal scales. | |||
| :param module: root module. | |||
| :param data: input tensor used to search optimal scale. | |||
| :param start: lower bound of the search interval. | |||
| :param stop: upper bound of the search interval. | |||
| :param num: number of samples to search. | |||
| Args: | |||
| module: root module. | |||
| data: input tensor used to search optimal scale. | |||
| start: lower bound of the search interval. | |||
| stop: upper bound of the search interval. | |||
| num: number of samples to search. | |||
| module: Module: | |||
| """ | |||
| batch_size = data.shape[0] | |||
| @@ -267,40 +266,40 @@ def apply_easy_quant( | |||
| def disable_fake_quant(module: Module): | |||
| r""" | |||
| Recursively disable ``module`` fake quantization in QATModule through :meth:`~.Module.apply` | |||
| r"""Recursively disable ``module`` fake quantization in QATModule through :meth:`~.Module.apply` | |||
| :param module: root module to do disable fake quantization recursively. | |||
| Args: | |||
| module: root module to do disable fake quantization recursively. | |||
| """ | |||
| _propagate(module, "set_fake_quant", False) | |||
| def disable_observer(module: Module): | |||
| r""" | |||
| Recursively disable ``module`` observer in QATModule through :meth:`~.Module.apply` | |||
| r"""Recursively disable ``module`` observer in QATModule through :meth:`~.Module.apply` | |||
| :param module: root module to do disable observer recursively. | |||
| Args: | |||
| module: root module to do disable observer recursively. | |||
| """ | |||
| _propagate(module, "set_observer", False) | |||
| def enable_fake_quant(module: Module): | |||
| r""" | |||
| Recursively enable ``module`` fake quantization in QATModule through :meth:`~.Module.apply` | |||
| r"""Recursively enable ``module`` fake quantization in QATModule through :meth:`~.Module.apply` | |||
| :param module: root module to do enable fake quantization recursively. | |||
| Args: | |||
| module: root module to do enable fake quantization recursively. | |||
| """ | |||
| _propagate(module, "set_fake_quant", True) | |||
| def enable_observer(module: Module): | |||
| r""" | |||
| Recursively enable ``module`` observer in QATModule through :meth:`~.Module.apply` | |||
| r"""Recursively enable ``module`` observer in QATModule through :meth:`~.Module.apply` | |||
| :param module: root module to do enable observer recursively. | |||
| Args: | |||
| module: root module to do enable observer recursively. | |||
| """ | |||
| _propagate(module, "set_observer", True) | |||
| @@ -25,8 +25,7 @@ from ..tensor import Tensor | |||
| class Round(Function): | |||
| """ | |||
| The functional round have no grad and can not use for quantization-aware-training. | |||
| r"""The functional round have no grad and can not use for quantization-aware-training. | |||
| We use Function and STE(Straight-Through Estimator) to implement backward propagation. | |||
| """ | |||
| @@ -68,17 +67,14 @@ def register_method_to_class(cls): | |||
| class QuantMode(Enum): | |||
| """ | |||
| Quantization mode enumerate class. | |||
| """ | |||
| r"""Quantization mode enumerate class.""" | |||
| SYMMERTIC = 1 | |||
| ASYMMERTIC = 2 | |||
| class QParams: | |||
| """ | |||
| To standardize FakeQuant, Observer and Tensor's qparams format. If custom | |||
| r"""To standardize FakeQuant, Observer and Tensor's qparams format. If custom | |||
| qparams is needed, inherit this class and add custom ``__slots__``. | |||
| """ | |||
| @@ -116,8 +112,7 @@ class QParams: | |||
| class LSQParams: | |||
| """ | |||
| To standardize LSQ's qparams format. If custom | |||
| r"""To standardize LSQ's qparams format. If custom | |||
| qparams is needed, inherit this class and add custom ``__slots__``. | |||
| """ | |||
| @@ -183,8 +178,14 @@ def create_qparams( | |||
| scale: Tensor = None, | |||
| zero_point: Tensor = None, | |||
| ): | |||
| """ | |||
| Return :class:`~.QParams` according to the mode. | |||
| r""" | |||
| Args: | |||
| mode: QuantMode: | |||
| dtype_meta: Union[str: | |||
| QuantDtypeMeta]: | |||
| scale: Tensor: | |||
| zero_point: Tensor: | |||
| """ | |||
| if isinstance(dtype_meta, str): | |||
| dtype_meta = _builtin_quant_dtypes[dtype_meta] | |||
| @@ -197,12 +198,11 @@ def create_qparams( | |||
| def fake_quant_tensor(inp: Tensor, qparams: QParams) -> Tensor: | |||
| """ | |||
| Apply fake quantization to the inp tensor. | |||
| :param inp: the input tensor which need to be faked. | |||
| :param qparams: to get mode, qmin, qmax, scale and zero_point from. | |||
| """Apply fake quantization to the inp tensor. | |||
| Args: | |||
| inp: the input tensor which need to be faked. | |||
| qparams: to get mode, qmin, qmax, scale and zero_point from. | |||
| """ | |||
| scale = qparams.scale | |||
| if qparams.mode == QuantMode.ASYMMERTIC: | |||
| @@ -217,17 +217,16 @@ def fake_quant_tensor(inp: Tensor, qparams: QParams) -> Tensor: | |||
| def fake_quant_bias(bias: Tensor, inp: Tensor, w_qat: Tensor) -> Tensor: | |||
| """ | |||
| Apply fake quantization to bias, with the special scale from input tensor | |||
| """Apply fake quantization to bias, with the special scale from input tensor | |||
| and weight tensor, the quantized type set to qint32 also. | |||
| :param bias: the bias tensor which need to be faked. | |||
| :param inp: the input tensor which contain the quantization parameters. | |||
| :param w_qat: the weight tensor which contain the quantization parameters. | |||
| Args: | |||
| bias: the bias tensor which need to be faked. | |||
| inp: the input tensor which contain the quantization parameters. | |||
| w_qat: the weight tensor which contain the quantization parameters. | |||
| .. warning:: | |||
| Warning: | |||
| Only work for symmetric quantization method now. | |||
| """ | |||
| b_qat = bias | |||
| if ( | |||
| @@ -220,29 +220,29 @@ def _permutation(n: int, seed: int, device: str, handle: int, dtype: str) -> Ten | |||
| class RNG: | |||
| r""" | |||
| :class:`RNG` exposes a number of methods for generating random numbers. | |||
| r""":class:`RNG` exposes a number of methods for generating random numbers. | |||
| Args: | |||
| seed: random seed used to initialize the pseudo-random number generator. Default: None | |||
| device: the device of generated tensor. Default: None | |||
| :param seed: random seed used to initialize the pseudo-random number generator. | |||
| Default: None | |||
| :param device: the device of generated tensor. Default: None | |||
| Examples: | |||
| .. testcode:: | |||
| .. testcode:: | |||
| import megengine.random as rand | |||
| rng = rand.RNG(seed=100) | |||
| x = rng.uniform(size=(2, 2)) | |||
| print(x.numpy()) | |||
| import megengine.random as rand | |||
| rng = rand.RNG(seed=100) | |||
| x = rng.uniform(size=(2, 2)) | |||
| print(x.numpy()) | |||
| Outputs: | |||
| Outputs: | |||
| .. testoutput:: | |||
| :options: +SKIP | |||
| .. testoutput:: | |||
| :options: +SKIP | |||
| [[0.84811664 0.6147553 ] | |||
| [0.59429836 0.64727545]] | |||
| [[0.84811664 0.6147553 ] | |||
| [0.59429836 0.64727545]] | |||
| """ | |||
| @@ -259,32 +259,33 @@ class RNG: | |||
| def uniform( | |||
| self, low: float = 0, high: float = 1, size: Optional[Iterable[int]] = None | |||
| ): | |||
| r""" | |||
| Random variable with uniform distribution $U(0, 1)$. | |||
| r"""Random variable with uniform distribution $U(0, 1)$. | |||
| Args: | |||
| low: lower range. Default: 0 | |||
| high: upper range. Default: 1 | |||
| size: the size of output tensor. Default: None | |||
| :param low: lower range. Default: 0 | |||
| :param high: upper range. Default: 1 | |||
| :param size: the size of output tensor. Default: None | |||
| :return: the output tensor. | |||
| Returns: | |||
| the output tensor. | |||
| Examples: | |||
| .. testcode:: | |||
| .. testcode:: | |||
| import megengine as mge | |||
| import megengine.random as rand | |||
| import megengine as mge | |||
| import megengine.random as rand | |||
| x = rand.uniform(size=(2, 2)) | |||
| print(x.numpy()) | |||
| Outputs: | |||
| .. testoutput:: | |||
| :options: +SKIP | |||
| x = rand.uniform(size=(2, 2)) | |||
| print(x.numpy()) | |||
| Outputs: | |||
| [[0.91600335 0.6680226 ] | |||
| [0.2046729 0.2769141 ]] | |||
| .. testoutput:: | |||
| :options: +SKIP | |||
| [[0.91600335 0.6680226 ] | |||
| [0.2046729 0.2769141 ]] | |||
| """ | |||
| _seed = self._seed() if callable(self._seed) else self._seed | |||
| return _uniform( | |||
| @@ -299,33 +300,34 @@ class RNG: | |||
| def normal( | |||
| self, mean: float = 0, std: float = 1, size: Optional[Iterable[int]] = None | |||
| ): | |||
| r""" | |||
| Random variable with Gaussian distribution :math:`N(\mu, \sigma)`. | |||
| r"""Random variable with Gaussian distribution :math:`N(\mu, \sigma)`. | |||
| :param mean: the mean or expectation of the distribution. Default: 0 | |||
| :param std: the standard deviation of the distribution (variance = :math:`\sigma ^ 2`). | |||
| Default: 1 | |||
| :param size: the size of output tensor. Default: None | |||
| :return: the output tensor. | |||
| Args: | |||
| mean: the mean or expectation of the distribution. Default: 0 | |||
| std: the standard deviation of the distribution (variance = :math:`\sigma ^ 2`). | |||
| Default: 1 | |||
| size: the size of output tensor. Default: None | |||
| Returns: | |||
| the output tensor. | |||
| Examples: | |||
| .. testcode:: | |||
| .. testcode:: | |||
| import megengine as mge | |||
| import megengine.random as rand | |||
| import megengine as mge | |||
| import megengine.random as rand | |||
| x = rand.normal(mean=0, std=1, size=(2, 2)) | |||
| print(x.numpy()) | |||
| Outputs: | |||
| .. testoutput:: | |||
| :options: +SKIP | |||
| x = rand.normal(mean=0, std=1, size=(2, 2)) | |||
| print(x.numpy()) | |||
| [[-1.4010863 -0.9874344 ] | |||
| [ 0.56373274 0.79656655]] | |||
| Outputs: | |||
| .. testoutput:: | |||
| :options: +SKIP | |||
| [[-1.4010863 -0.9874344 ] | |||
| [ 0.56373274 0.79656655]] | |||
| """ | |||
| _seed = self._seed() if callable(self._seed) else self._seed | |||
| return _normal( | |||
| @@ -343,12 +345,12 @@ class RNG: | |||
| scale: Union[Tensor, float] = 1, | |||
| size: Optional[Iterable[int]] = None, | |||
| ): | |||
| r""" | |||
| Random variable with Gamma distribution :math:`\Gamma(k, \theta)`. | |||
| r"""Random variable with Gamma distribution :math:`\Gamma(k, \theta)`. | |||
| The corresponding probability density function is | |||
| .. math:: | |||
| p(x)=x^{k-1} \frac{e^{-x / \theta}}{\theta^{k} \Gamma(k)} | |||
| \quad \text { for } x>0 \quad k, \theta>0, | |||
| @@ -357,52 +359,54 @@ class RNG: | |||
| .. math:: | |||
| \Gamma(k)=(k-1) ! \quad \text { for } \quad k>0. | |||
| :param shape: the shape parameter (sometimes designated "k") of the distribution. | |||
| Must be non-negative. | |||
| :param scale: the scale parameter (sometimes designated "theta") of the distribution. | |||
| Must be non-negative. Default: 1 | |||
| :param size: the size of output tensor. If shape and scale are scalars and given size is, e.g., | |||
| `(m, n)`, then the output shape is `(m, n)`. If shape or scale is a Tensor and given size | |||
| is, e.g., `(m, n)`, then the output shape is `(m, n) + broadcast(shape, scale).shape`. | |||
| The broadcast rules are consistent with `numpy.broadcast`. Default: None | |||
| :return: the output tensor. | |||
| Args: | |||
| shape: the shape parameter (sometimes designated "k") of the distribution. | |||
| Must be non-negative. | |||
| scale: the scale parameter (sometimes designated "theta") of the distribution. | |||
| Must be non-negative. Default: 1 | |||
| size: the size of output tensor. If shape and scale are scalars and given size is, e.g., | |||
| `(m, n)`, then the output shape is `(m, n)`. If shape or scale is a Tensor and given size | |||
| is, e.g., `(m, n)`, then the output shape is `(m, n) + broadcast(shape, scale).shape`. | |||
| The broadcast rules are consistent with `numpy.broadcast`. Default: None | |||
| Returns: | |||
| the output tensor. | |||
| Examples: | |||
| .. testcode:: | |||
| .. testcode:: | |||
| import megengine as mge | |||
| import megengine.random as rand | |||
| import megengine as mge | |||
| import megengine.random as rand | |||
| x = rand.gamma(shape=2, scale=1, size=(2, 2)) | |||
| print(x.numpy()) | |||
| x = rand.gamma(shape=2, scale=1, size=(2, 2)) | |||
| print(x.numpy()) | |||
| shape = mge.Tensor([[ 1], | |||
| [10]], dtype="float32") | |||
| scale = mge.Tensor([1,5], dtype="float32") | |||
| shape = mge.Tensor([[ 1], | |||
| [10]], dtype="float32") | |||
| scale = mge.Tensor([1,5], dtype="float32") | |||
| x = rand.gamma(shape=shape, scale=scale) | |||
| print(x.numpy()) | |||
| x = rand.gamma(shape=shape, scale=scale) | |||
| print(x.numpy()) | |||
| x = rand.gamma(shape=shape, scale=scale, size=2) | |||
| print(x.numpy()) | |||
| Outputs: | |||
| .. testoutput:: | |||
| :options: +SKIP | |||
| [[1.5064533 4.0689363 ] | |||
| [0.71639484 1.4551026 ]] | |||
| x = rand.gamma(shape=shape, scale=scale, size=2) | |||
| print(x.numpy()) | |||
| Outputs: | |||
| [[ 0.4352188 11.399335 ] | |||
| [ 9.1888 52.009277 ]] | |||
| .. testoutput:: | |||
| :options: +SKIP | |||
| [[[ 1.1726005 3.9654975 ] | |||
| [13.656933 36.559006 ]] | |||
| [[ 0.25848487 2.5540342 ] | |||
| [11.960409 21.031536 ]]] | |||
| [[1.5064533 4.0689363 ] | |||
| [0.71639484 1.4551026 ]] | |||
| [[ 0.4352188 11.399335 ] | |||
| [ 9.1888 52.009277 ]] | |||
| [[[ 1.1726005 3.9654975 ] | |||
| [13.656933 36.559006 ]] | |||
| [[ 0.25848487 2.5540342 ] | |||
| [11.960409 21.031536 ]]] | |||
| """ | |||
| _seed = self._seed() if callable(self._seed) else self._seed | |||
| return _gamma( | |||
| @@ -415,155 +419,161 @@ class RNG: | |||
| beta: Union[Tensor, float], | |||
| size: Optional[Iterable[int]] = None, | |||
| ): | |||
| r""" | |||
| Random variable with Beta distribution :math:`\operatorname{Beta}(\alpha, \beta)`. | |||
| r"""Random variable with Beta distribution :math:`\operatorname{Beta}(\alpha, \beta)`. | |||
| The corresponding probability density function is | |||
| .. math:: | |||
| p(x)=\frac{1}{\mathrm{~B}(\alpha, \beta)} x^{\alpha-1}(1-x)^{\beta-1} | |||
| p(x)=\frac{1}{\mathrm{~B}(\alpha, \beta)} x^{\alpha-1}(1-x)^{\beta-1} | |||
| \quad \text { for } \alpha, \beta>0, | |||
| where :math:`\mathrm{~B}(\alpha, \beta)` is the beta function, | |||
| .. math:: | |||
| \mathrm{~B}(\alpha, \beta)=\int_{0}^{1} t^{\alpha-1}(1-t)^{\beta-1} d t. | |||
| :param alpha: the alpha parameter of the distribution. Must be non-negative. | |||
| :param beta: the beta parameter of the distribution. Must be non-negative. | |||
| :param size: the size of output tensor. If alpha and beta are scalars and given size is, e.g., | |||
| `(m, n)`, then the output shape is `(m, n)`. If alpha or beta is a Tensor and given size | |||
| is, e.g., `(m, n)`, then the output shape is `(m, n) + broadcast(alpha, beta).shape`. | |||
| The broadcast rules are consistent with `numpy.broadcast`. Default: None | |||
| :return: the output tensor. | |||
| Args: | |||
| alpha: the alpha parameter of the distribution. Must be non-negative. | |||
| beta: the beta parameter of the distribution. Must be non-negative. | |||
| size: the size of output tensor. If alpha and beta are scalars and given size is, e.g., | |||
| `(m, n)`, then the output shape is `(m, n)`. If alpha or beta is a Tensor and given size | |||
| is, e.g., `(m, n)`, then the output shape is `(m, n) + broadcast(alpha, beta).shape`. | |||
| Returns: | |||
| the output tensor. | |||
| Examples: | |||
| .. testcode:: | |||
| .. testcode:: | |||
| import megengine as mge | |||
| import megengine.random as rand | |||
| import megengine as mge | |||
| import megengine.random as rand | |||
| x = rand.beta(alpha=2, beta=1, size=(2, 2)) | |||
| print(x.numpy()) | |||
| x = rand.beta(alpha=2, beta=1, size=(2, 2)) | |||
| print(x.numpy()) | |||
| alpha = mge.Tensor([[0.5], | |||
| [ 3]], dtype="float32") | |||
| beta = mge.Tensor([0.5,5], dtype="float32") | |||
| alpha = mge.Tensor([[0.5], | |||
| [ 3]], dtype="float32") | |||
| beta = mge.Tensor([0.5,5], dtype="float32") | |||
| x = rand.beta(alpha=alpha, beta=beta) | |||
| print(x.numpy()) | |||
| x = rand.beta(alpha=alpha, beta=beta) | |||
| print(x.numpy()) | |||
| x = rand.beta(alpha=alpha, beta=beta, size=2) | |||
| print(x.numpy()) | |||
| Outputs: | |||
| .. testoutput:: | |||
| :options: +SKIP | |||
| [[0.582565 0.91763186] | |||
| [0.86963767 0.6088103 ]] | |||
| [[0.41503012 0.16438372] | |||
| [0.90159506 0.47588003]] | |||
| [[[0.55195075 0.01111084] | |||
| [0.95298755 0.25048104]] | |||
| [[0.11680304 0.13859665] | |||
| [0.997879 0.43259275]]] | |||
| x = rand.beta(alpha=alpha, beta=beta, size=2) | |||
| print(x.numpy()) | |||
| Outputs: | |||
| .. testoutput:: | |||
| :options: +SKIP | |||
| [[0.582565 0.91763186] | |||
| [0.86963767 0.6088103 ]] | |||
| [[0.41503012 0.16438372] | |||
| [0.90159506 0.47588003]] | |||
| [[[0.55195075 0.01111084] | |||
| [0.95298755 0.25048104]] | |||
| [[0.11680304 0.13859665] | |||
| [0.997879 0.43259275]]] | |||
| """ | |||
| _seed = self._seed() if callable(self._seed) else self._seed | |||
| return _beta(alpha=alpha, beta=beta, size=size, seed=_seed, handle=self._handle) | |||
| def poisson(self, lam: Union[float, Tensor], size: Optional[Iterable[int]] = None): | |||
| r""" | |||
| Random variable with poisson distribution :math:`\operatorname{Poisson}(\lambda)`. | |||
| r"""Random variable with poisson distribution :math:`\operatorname{Poisson}(\lambda)`. | |||
| The corresponding probability density function is | |||
| .. math:: | |||
| f(k ; \lambda)=\frac{\lambda^{k} e^{-\lambda}}{k !}, | |||
| where k is the number of occurrences :math:`({\displaystyle k=0,1,2...})`. | |||
| :param lam: the lambda parameter of the distribution. Must be non-negative. | |||
| :param size: the size of output tensor. If lam is a scalar and given size is, e.g., `(m, n)`, | |||
| then the output shape is `(m, n)`. If lam is a Tensor with shape `(k, v)` and given | |||
| size is, e.g., `(m, n)`, then the output shape is `(m, n, k, v)`. Default: None. | |||
| :return: the output tensor. | |||
| Args: | |||
| lam: the lambda parameter of the distribution. Must be non-negative. | |||
| size: the size of output tensor. If lam is a scalar and given size is, e.g., `(m, n)`, | |||
| then the output shape is `(m, n)`. If lam is a Tensor with shape `(k, v)` and given | |||
| size is, e.g., `(m, n)`, then the output shape is `(m, n, k, v)`. Default: None. | |||
| Returns: | |||
| the output tensor. | |||
| Examples: | |||
| .. testcode:: | |||
| .. testcode:: | |||
| import megengine as mge | |||
| import megengine.random as rand | |||
| import megengine as mge | |||
| import megengine.random as rand | |||
| x = rand.poisson(lam=2., size=(1, 3)) | |||
| print(x.numpy()) | |||
| x = rand.poisson(lam=2., size=(1, 3)) | |||
| print(x.numpy()) | |||
| lam = mge.Tensor([[1.,1.], | |||
| [10,10]], dtype="float32") | |||
| lam = mge.Tensor([[1.,1.], | |||
| [10,10]], dtype="float32") | |||
| x = rand.poisson(lam=lam) | |||
| print(x.numpy()) | |||
| x = rand.poisson(lam=lam) | |||
| print(x.numpy()) | |||
| x = rand.poisson(lam=lam, size=(1,3)) | |||
| print(x.numpy()) | |||
| x = rand.poisson(lam=lam, size=(1,3)) | |||
| print(x.numpy()) | |||
| Outputs: | |||
| .. testoutput:: | |||
| :options: +SKIP | |||
| Outputs: | |||
| [[3. 1. 3.]] | |||
| .. testoutput:: | |||
| :options: +SKIP | |||
| [[ 2. 2.] | |||
| [12. 11.]] | |||
| [[3. 1. 3.]] | |||
| [[[[ 1. 1.] | |||
| [11. 4.]] | |||
| [[ 0. 0.] | |||
| [ 9. 13.]] | |||
| [[ 0. 1.] | |||
| [ 7. 12.]]]] | |||
| [[ 2. 2.] | |||
| [12. 11.]] | |||
| [[[[ 1. 1.] | |||
| [11. 4.]] | |||
| [[ 0. 0.] | |||
| [ 9. 13.]] | |||
| [[ 0. 1.] | |||
| [ 7. 12.]]]] | |||
| """ | |||
| _seed = self._seed() if callable(self._seed) else self._seed | |||
| return _poisson(lam=lam, size=size, seed=_seed, handle=self._handle) | |||
| def permutation(self, n: int, *, dtype: str = "int32"): | |||
| r""" | |||
| Generates a random permutation of integers from :math:`0` to :math:`n - 1`. | |||
| r"""Generates a random permutation of integers from :math:`0` to :math:`n - 1`. | |||
| :param n: the upper bound. Must be larger than 0. | |||
| :param dtype: the output data type. int32, int16 and float32 are | |||
| supported. Default: int32 | |||
| :return: the output tensor. | |||
| Args: | |||
| n: the upper bound. Must be larger than 0. | |||
| dtype: the output data type. int32, int16 and float32 are supported. Default: int32 | |||
| Returns: | |||
| the output tensor. | |||
| Examples: | |||
| .. testcode:: | |||
| .. testcode:: | |||
| import megengine as mge | |||
| import megengine.random as rand | |||
| import megengine as mge | |||
| import megengine.random as rand | |||
| x = rand.permutation(n=10, dtype="int32") | |||
| print(x.numpy()) | |||
| x = rand.permutation(n=10, dtype="int32") | |||
| print(x.numpy()) | |||
| x = rand.permutation(n=10, dtype="float32") | |||
| print(x.numpy()) | |||
| Outputs: | |||
| .. testoutput:: | |||
| :options: +SKIP | |||
| x = rand.permutation(n=10, dtype="float32") | |||
| print(x.numpy()) | |||
| Outputs: | |||
| [4 5 0 7 3 8 6 1 9 2] | |||
| [3. 4. 9. 0. 6. 8. 7. 1. 5. 2.] | |||
| .. testoutput:: | |||
| :options: +SKIP | |||
| [4 5 0 7 3 8 6 1 9 2] | |||
| [3. 4. 9. 0. 6. 8. 7. 1. 5. 2.] | |||
| """ | |||
| _seed = self._seed() if callable(self._seed) else self._seed | |||
| return _permutation( | |||