| @@ -17,7 +17,8 @@ def get_backwarding_grad_manager(): | |||
| class GradManager: | |||
| r"""GradManager manages auto differentiation and all resources required to perform it. | |||
| r""" | |||
| GradManager manages auto differentiation and all resources required to perform it. | |||
| Our auto differentiation framework requires that the user explicitly indicates when | |||
| the forward operations start and when all resources should be released. A typical usage of | |||
| @@ -71,7 +72,8 @@ class GradManager: | |||
| self._gradients = dict() | |||
| def attach(self, params: list, callbacks=None): | |||
| r"""Registers parameters that gradients should be calculated with respect to. | |||
| r""" | |||
| Registers parameters that gradients should be calculated with respect to. | |||
| Callback Functions should have a signature like this: | |||
| .. code-block:: | |||
| @@ -99,7 +101,8 @@ class GradManager: | |||
| return self | |||
| def detach(self, params: list): | |||
| r"""Remove specific registered parameters and callback functions. | |||
| r""" | |||
| Remove specific registered parameters and callback functions. | |||
| :param params: registered parameters | |||
| """ | |||
| @@ -125,7 +128,8 @@ class GradManager: | |||
| return self | |||
| def backward(self, ys, dys=None): | |||
| r"""Performs back-propagation and computes gradients. | |||
| r""" | |||
| Performs back-propagation and computes gradients. | |||
| :param ys: outputs of forward operators, e.g., the loss tensor | |||
| :param dys: derivatives of ys | |||
| @@ -165,7 +169,8 @@ class GradManager: | |||
| backwarding_grad_manager = cache | |||
| def record(self): | |||
| r"""Starts recording forward operations. | |||
| r""" | |||
| Starts recording forward operations. | |||
| """ | |||
| if self._recording: | |||
| raise RuntimeError("already recording") | |||
| @@ -190,7 +195,8 @@ class GradManager: | |||
| self._grad.wrt(param_wrapper, callback=callback) | |||
| def release(self): | |||
| r"""Stops recording and releases resources for gradients calculation. | |||
| r""" | |||
| Stops recording and releases resources for gradients calculation. | |||
| """ | |||
| if self._grad is not None: | |||
| self._grad.__exit__(None, None, None) | |||
| @@ -15,7 +15,8 @@ if os.environ.get("MEGENGINE_USE_SYMBOLIC_SHAPE"): | |||
| def use_symbolic_shape() -> bool: | |||
| """Returns whether tensor.shape returns a tensor instead of a tuple | |||
| """ | |||
| Returns whether tensor.shape returns a tensor instead of a tuple | |||
| """ | |||
| return _use_symbolic_shape | |||
| @@ -78,7 +78,8 @@ class auto: | |||
| class _EnumDict(dict): | |||
| """Track enum member order and ensure member names are not reused. | |||
| """ | |||
| Track enum member order and ensure member names are not reused. | |||
| EnumMeta will use the names found in self._member_names as the | |||
| enumeration member names. | |||
| @@ -91,7 +92,8 @@ class _EnumDict(dict): | |||
| self._last_values = [] | |||
| def __setitem__(self, key, value): | |||
| """Changes anything not dundered or not a descriptor. | |||
| """ | |||
| Changes anything not dundered or not a descriptor. | |||
| If an enum member name is used twice, an error is raised; duplicate | |||
| values are not checked for. | |||
| @@ -303,7 +305,8 @@ class EnumMeta(type): | |||
| def __call__( | |||
| cls, value, names=None, *, module=None, qualname=None, type=None, start=1 | |||
| ): | |||
| """Either returns an existing member, or creates a new enum class. | |||
| """ | |||
| Either returns an existing member, or creates a new enum class. | |||
| This method is used both when an enum class is given a value to match | |||
| to an enumeration member (i.e. Color(3)) and for the functional API | |||
| @@ -353,7 +356,8 @@ class EnumMeta(type): | |||
| ] + self._member_names_ | |||
| def __getattr__(cls, name): | |||
| """Return the enum member matching `name` | |||
| """ | |||
| Return the enum member matching `name` | |||
| We use __getattr__ instead of descriptors or inserting into the enum | |||
| class' __dict__ in order to support `name` and `value` being both | |||
| @@ -379,7 +383,8 @@ class EnumMeta(type): | |||
| @property | |||
| def __members__(cls): | |||
| """Returns a mapping of member name->value. | |||
| """ | |||
| Returns a mapping of member name->value. | |||
| This mapping lists all enum members, including aliases. Note that this | |||
| is a read-only view of the internal mapping. | |||
| @@ -394,7 +399,8 @@ class EnumMeta(type): | |||
| return (cls._member_map_[name] for name in reversed(cls._member_names_)) | |||
| def __setattr__(cls, name, value): | |||
| """Block attempts to reassign Enum members. | |||
| """ | |||
| Block attempts to reassign Enum members. | |||
| A simple assignment to the class namespace only changes one of the | |||
| several possible ways to get an Enum member from the Enum class, | |||
| @@ -409,7 +415,8 @@ class EnumMeta(type): | |||
| def _create_( | |||
| cls, class_name, names=None, *, module=None, qualname=None, type=None, start=1 | |||
| ): | |||
| """Convenience method to create a new Enum class. | |||
| """ | |||
| Convenience method to create a new Enum class. | |||
| `names` can be: | |||
| @@ -465,7 +472,8 @@ class EnumMeta(type): | |||
| @staticmethod | |||
| def _get_mixins_(bases): | |||
| """Returns the type for creating enum members, and the first inherited | |||
| """ | |||
| Returns the type for creating enum members, and the first inherited | |||
| enum class. | |||
| bases: the tuple of bases that was given to __new__ | |||
| @@ -510,7 +518,8 @@ class EnumMeta(type): | |||
| @staticmethod | |||
| def _find_new_(classdict, member_type, first_enum): | |||
| """Returns the __new__ to be used for creating the enum members. | |||
| """ | |||
| Returns the __new__ to be used for creating the enum members. | |||
| classdict: the class dictionary given to __new__ | |||
| member_type: the data type whose __new__ will be used by default | |||
| @@ -556,7 +565,8 @@ class EnumMeta(type): | |||
| class Enum(metaclass=EnumMeta): | |||
| """Generic enumeration. | |||
| """ | |||
| Generic enumeration. | |||
| Derive from this class to define new enumerations. | |||
| @@ -188,7 +188,8 @@ class OpNode: | |||
| def optimize_for_inference(dest_vars, **kwargs): | |||
| r"""Applies optimize_for_inference pass for computing graph. | |||
| r""" | |||
| Applies optimize_for_inference pass for computing graph. | |||
| :param dest_vars: list of output vars in the computing graph | |||
| @@ -287,7 +288,8 @@ def dump_graph( | |||
| strip_info_file=None, | |||
| append_json=False | |||
| ): | |||
| """serialize the computing graph of `output_vars` and get byte result. | |||
| """ | |||
| serialize the computing graph of `output_vars` and get byte result. | |||
| :param output_vars: output variables which are the graph's end point. | |||
| @@ -385,7 +387,8 @@ CompGraphLoadResult = collections.namedtuple( | |||
| def load_graph(fpath): | |||
| """Load a serialized computing graph from file. | |||
| """ | |||
| Load a serialized computing graph from file. | |||
| :param fpath: Path or Handle of the input file | |||
| :return: An instance of namedtuple :class:`CompGraphLoadResult`, | |||
| @@ -69,7 +69,8 @@ def ambiguity_warn(dispatcher, ambiguities): | |||
| def variadic_signature_matches_iter(types, full_signature): | |||
| """Check if a set of input types matches a variadic signature. | |||
| """ | |||
| Check if a set of input types matches a variadic signature. | |||
| Notes | |||
| ----- | |||
| @@ -288,7 +289,8 @@ class Dispatcher(CDispatcher): | |||
| __repr__ = __str__ | |||
| def dispatch(self, *types): | |||
| """Deterimine appropriate implementation for this type signature | |||
| """ | |||
| Deterimine appropriate implementation for this type signature | |||
| This method is internal. Users should call this object as a function. | |||
| Implementation resolution occurs within the ``__call__`` method. | |||
| @@ -110,7 +110,8 @@ def _toposort(edges): | |||
| def reverse_dict(d): | |||
| """Reverses direction of dependence dict | |||
| """ | |||
| Reverses direction of dependence dict | |||
| >>> d = {'a': (1, 2), 'b': (2, 3), 'c':()} | |||
| >>> reverse_dict(d) # doctest: +SKIP | |||
| @@ -156,7 +157,8 @@ def groupby(func, seq): | |||
| def typename(type): | |||
| """Get the name of `type`. | |||
| """ | |||
| Get the name of `type`. | |||
| Parameters | |||
| ---------- | |||
| @@ -72,7 +72,8 @@ class VariadicSignatureType(type): | |||
| def isvariadic(obj): | |||
| """Check whether the type `obj` is variadic. | |||
| """ | |||
| Check whether the type `obj` is variadic. | |||
| Parameters | |||
| ---------- | |||
| @@ -95,7 +96,8 @@ def isvariadic(obj): | |||
| class VariadicSignatureMeta(type): | |||
| """A metaclass that overrides ``__getitem__`` on the class. This is used to | |||
| """ | |||
| A metaclass that overrides ``__getitem__`` on the class. This is used to | |||
| generate a new type for Variadic signatures. See the Variadic class for | |||
| examples of how this behaves. | |||
| """ | |||
| @@ -117,7 +119,8 @@ class VariadicSignatureMeta(type): | |||
| class Variadic(metaclass=VariadicSignatureMeta): | |||
| """A class whose getitem method can be used to generate a new type | |||
| """ | |||
| A class whose getitem method can be used to generate a new type | |||
| representing a specific variadic signature. | |||
| Examples | |||
| @@ -389,7 +389,8 @@ class ArrayMethodMixin(abc.ABC): | |||
| return self.reshape(-1) | |||
| def sum(self, axis=None, keepdims: bool = False): | |||
| r"""Returns the sum of each row of the input tensor in the given dimension ``axis``. | |||
| r""" | |||
| Returns the sum of each row of the input tensor in the given dimension ``axis``. | |||
| If ``axis`` is a list of axises, reduce over all of them. | |||
| If ``keepdims`` is ``True``, the shape of output tensor is the same as the input tensor, except in the dimension(s) ``axis`` where it is of size 1. Otherwise, ``axis`` is squeezed(see :meth:`~.functional.tensor.squeeze`). | |||
| @@ -59,7 +59,8 @@ class _PlasmaStoreManager: | |||
| class PlasmaShmQueue: | |||
| def __init__(self, maxsize: int = 0): | |||
| r"""Use pyarrow in-memory plasma store to implement shared memory queue. | |||
| r""" | |||
| Use pyarrow in-memory plasma store to implement shared memory queue. | |||
| Compared to native `multiprocess.Queue`, `PlasmaShmQueue` avoid pickle/unpickle | |||
| and communication overhead, leading to better performance in multi-process | |||
| @@ -42,7 +42,8 @@ class DataLoader: | |||
| timeout: int = 0, | |||
| divide: bool = False, | |||
| ): | |||
| r"""Provides a convenient way to iterate on a given dataset. | |||
| r""" | |||
| Provides a convenient way to iterate on a given dataset. | |||
| `DataLoader` combines a dataset with `sampler`, `transform` and `collator`, | |||
| make it flexible to get minibatch continually from a dataset. | |||
| @@ -23,7 +23,8 @@ from .meta_vision import VisionDataset | |||
| class Cityscapes(VisionDataset): | |||
| r"""`Cityscapes <http://www.cityscapes-dataset.com/>`_ Dataset. | |||
| r""" | |||
| `Cityscapes <http://www.cityscapes-dataset.com/>`_ Dataset. | |||
| """ | |||
| supported_order = ( | |||
| @@ -46,7 +46,8 @@ def has_valid_annotation(anno, order): | |||
| class COCO(VisionDataset): | |||
| r"""`MS COCO <http://cocodataset.org/#home>`_ Dataset. | |||
| r""" | |||
| `MS COCO <http://cocodataset.org/#home>`_ Dataset. | |||
| """ | |||
| supported_order = ( | |||
| @@ -23,7 +23,8 @@ from .meta_vision import VisionDataset | |||
| class Objects365(VisionDataset): | |||
| r"""`Objects365 <https://www.objects365.org/overview.html>`_ Dataset. | |||
| r""" | |||
| `Objects365 <https://www.objects365.org/overview.html>`_ Dataset. | |||
| """ | |||
| supported_order = ( | |||
| @@ -24,7 +24,8 @@ from .meta_vision import VisionDataset | |||
| class PascalVOC(VisionDataset): | |||
| r"""`Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Dataset. | |||
| r""" | |||
| `Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Dataset. | |||
| """ | |||
| supported_order = ( | |||
| @@ -154,7 +154,8 @@ class VisionTransform(Transform): | |||
| class ToMode(VisionTransform): | |||
| r"""Change input data to a target mode. | |||
| r""" | |||
| Change input data to a target mode. | |||
| For example, most transforms use HWC mode image, | |||
| while the neural network might use CHW mode input tensor. | |||
| @@ -301,7 +302,8 @@ class TorchTransformCompose(VisionTransform): | |||
| class Pad(VisionTransform): | |||
| r"""Pad the input data. | |||
| r""" | |||
| Pad the input data. | |||
| :param size: padding size of input image, it could be integer or sequence. | |||
| If it is an integer, the input image will be padded in four directions. | |||
| @@ -348,7 +350,8 @@ class Pad(VisionTransform): | |||
| class Resize(VisionTransform): | |||
| r"""Resize the input data. | |||
| r""" | |||
| Resize the input data. | |||
| :param output_size: target size of image, with (height, width) shape. | |||
| :param interpolation: interpolation method. All methods are listed below: | |||
| @@ -474,7 +477,8 @@ class ShortestEdgeResize(VisionTransform): | |||
| class RandomResize(VisionTransform): | |||
| r"""Resize the input data randomly. | |||
| r""" | |||
| Resize the input data randomly. | |||
| :param scale_range: range of scaling. | |||
| :param order: the same with :class:`VisionTransform`. | |||
| @@ -518,7 +522,8 @@ class RandomResize(VisionTransform): | |||
| class RandomCrop(VisionTransform): | |||
| r"""Crop the input data randomly. Before applying the crop transform, | |||
| r""" | |||
| Crop the input data randomly. Before applying the crop transform, | |||
| pad the image first. If target size is still bigger than the size of | |||
| padded image, pad the image size to target size. | |||
| @@ -575,7 +580,8 @@ class RandomCrop(VisionTransform): | |||
| class RandomResizedCrop(VisionTransform): | |||
| r"""Crop the input data to random size and aspect ratio. | |||
| r""" | |||
| Crop the input data to random size and aspect ratio. | |||
| A crop of random size (default: of 0.08 to 1.0) of the original size and a random | |||
| aspect ratio (default: of 3/4 to 1.33) of the original aspect ratio is made. | |||
| After applying crop transfrom, the input data will be resized to given size. | |||
| @@ -664,7 +670,8 @@ class RandomResizedCrop(VisionTransform): | |||
| class CenterCrop(VisionTransform): | |||
| r"""Crops the given the input data at the center. | |||
| r""" | |||
| Crops the given the input data at the center. | |||
| :param output_size: target size of output image, with (height, width) shape. | |||
| :param order: the same with :class:`VisionTransform`. | |||
| @@ -707,7 +714,8 @@ class CenterCrop(VisionTransform): | |||
| class RandomHorizontalFlip(VisionTransform): | |||
| r"""Horizontally flip the input data randomly with a given probability. | |||
| r""" | |||
| Horizontally flip the input data randomly with a given probability. | |||
| :param p: probability of the input data being flipped. Default: 0.5 | |||
| :param order: the same with :class:`VisionTransform`. | |||
| @@ -739,7 +747,8 @@ class RandomHorizontalFlip(VisionTransform): | |||
| class RandomVerticalFlip(VisionTransform): | |||
| r"""Vertically flip the input data randomly with a given probability. | |||
| r""" | |||
| Vertically flip the input data randomly with a given probability. | |||
| :param p: probability of the input data being flipped. Default: 0.5 | |||
| :param order: the same with :class:`VisionTransform`. | |||
| @@ -771,7 +780,8 @@ class RandomVerticalFlip(VisionTransform): | |||
| class Normalize(VisionTransform): | |||
| r"""Normalize the input data with mean and standard deviation. | |||
| r""" | |||
| Normalize the input data with mean and standard deviation. | |||
| Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, | |||
| this transform will normalize each channel of the input data. | |||
| ``output[channel] = (input[channel] - mean[channel]) / std[channel]`` | |||
| @@ -797,7 +807,8 @@ class Normalize(VisionTransform): | |||
| class GaussianNoise(VisionTransform): | |||
| r"""Add random gaussian noise to the input data. | |||
| r""" | |||
| Add random gaussian noise to the input data. | |||
| Gaussian noise is generated with given mean and std. | |||
| :param mean: Gaussian mean used to generate noise. | |||
| @@ -824,7 +835,8 @@ class GaussianNoise(VisionTransform): | |||
| class BrightnessTransform(VisionTransform): | |||
| r"""Adjust brightness of the input data. | |||
| r""" | |||
| Adjust brightness of the input data. | |||
| :param value: how much to adjust the brightness. Can be any | |||
| non negative number. 0 gives the original image. | |||
| @@ -855,7 +867,8 @@ class BrightnessTransform(VisionTransform): | |||
| class ContrastTransform(VisionTransform): | |||
| r"""Adjust contrast of the input data. | |||
| r""" | |||
| Adjust contrast of the input data. | |||
| :param value: how much to adjust the contrast. Can be any | |||
| non negative number. 0 gives the original image. | |||
| @@ -886,7 +899,8 @@ class ContrastTransform(VisionTransform): | |||
| class SaturationTransform(VisionTransform): | |||
| r"""Adjust saturation of the input data. | |||
| r""" | |||
| Adjust saturation of the input data. | |||
| :param value: how much to adjust the saturation. Can be any | |||
| non negative number. 0 gives the original image. | |||
| @@ -917,7 +931,8 @@ class SaturationTransform(VisionTransform): | |||
| class HueTransform(VisionTransform): | |||
| r"""Adjust hue of the input data. | |||
| r""" | |||
| Adjust hue of the input data. | |||
| :param value: how much to adjust the hue. Can be any number | |||
| between 0 and 0.5, 0 gives the original image. | |||
| @@ -955,7 +970,8 @@ class HueTransform(VisionTransform): | |||
| class ColorJitter(VisionTransform): | |||
| r"""Randomly change the brightness, contrast, saturation and hue of an image. | |||
| r""" | |||
| Randomly change the brightness, contrast, saturation and hue of an image. | |||
| :param brightness: how much to jitter brightness. | |||
| Chosen uniformly from [max(0, 1 - brightness), 1 + brightness] | |||
| @@ -40,7 +40,8 @@ def _str2device_type(type_str: str, allow_unspec: bool = True): | |||
| def get_device_count(device_type: str) -> int: | |||
| """Gets number of devices installed on this system. | |||
| """ | |||
| Gets number of devices installed on this system. | |||
| :param device_type: device type, one of 'gpu' or 'cpu' | |||
| """ | |||
| @@ -54,7 +55,8 @@ def get_device_count(device_type: str) -> int: | |||
| def is_cuda_available() -> bool: | |||
| """Returns whether cuda device is available on this system. | |||
| """ | |||
| Returns whether cuda device is available on this system. | |||
| """ | |||
| t = _str2device_type("gpu") | |||
| @@ -62,7 +64,8 @@ def is_cuda_available() -> bool: | |||
| def set_default_device(device: str = "xpux"): | |||
| r"""Sets default computing node. | |||
| r""" | |||
| Sets default computing node. | |||
| :param device: default device type. The type can be 'cpu0', 'cpu1', etc., | |||
| or 'gpu0', 'gpu1', etc., to specify the particular cpu or gpu to use. | |||
| @@ -81,7 +84,8 @@ def set_default_device(device: str = "xpux"): | |||
| def get_default_device() -> str: | |||
| r"""Gets default computing node. | |||
| r""" | |||
| Gets default computing node. | |||
| It returns the value set by :func:`~.set_default_device`. | |||
| """ | |||
| @@ -98,7 +102,8 @@ def set_prealloc_config( | |||
| growth_factor=2.0, | |||
| device_type=DeviceType.CUDA, | |||
| ): | |||
| """Specifies how to pre-allocate from raw device allocator. | |||
| """ | |||
| Specifies how to pre-allocate from raw device allocator. | |||
| :param alignment: specifies the alignment in bytes. | |||
| :param min_req: min request size in bytes. | |||
| @@ -123,7 +123,8 @@ def collective_comm(inp, mode, group, device): | |||
| def reduce_sum( | |||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | |||
| ) -> Tensor: | |||
| """Create reduce_sum operator for collective communication. | |||
| """ | |||
| Create reduce_sum operator for collective communication. | |||
| :param inp: input tensor. | |||
| :param group: communication group. | |||
| @@ -136,7 +137,8 @@ def reduce_sum( | |||
| def broadcast( | |||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | |||
| ) -> Tensor: | |||
| """Create broadcast operator for collective communication. | |||
| """ | |||
| Create broadcast operator for collective communication. | |||
| :param inp: input tensor. | |||
| :param group: communication group. | |||
| @@ -149,7 +151,8 @@ def broadcast( | |||
| def all_gather( | |||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | |||
| ) -> Tensor: | |||
| """Create all_gather operator for collective communication. | |||
| """ | |||
| Create all_gather operator for collective communication. | |||
| :param inp: input tensor. | |||
| :param group: communication group. | |||
| @@ -162,7 +165,8 @@ def all_gather( | |||
| def reduce_scatter_sum( | |||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | |||
| ) -> Tensor: | |||
| """Create reduce_scatter_sum operator for collective communication. | |||
| """ | |||
| Create reduce_scatter_sum operator for collective communication. | |||
| :param inp: input tensor. | |||
| :param group: communication group. | |||
| @@ -175,7 +179,8 @@ def reduce_scatter_sum( | |||
| def all_reduce_sum( | |||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | |||
| ) -> Tensor: | |||
| """Create all_reduce_sum operator for collective communication. | |||
| """ | |||
| Create all_reduce_sum operator for collective communication. | |||
| :param inp: input tensor. | |||
| :param group: communication group. | |||
| @@ -188,7 +193,8 @@ def all_reduce_sum( | |||
| def all_reduce_max( | |||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | |||
| ) -> Tensor: | |||
| """Create all_reduce_max operator for collective communication. | |||
| """ | |||
| Create all_reduce_max operator for collective communication. | |||
| :param inp: input tensor. | |||
| :param group: communication group. | |||
| @@ -201,7 +207,8 @@ def all_reduce_max( | |||
| def all_reduce_min( | |||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | |||
| ) -> Tensor: | |||
| """Create all_reduce_min operator for collective communication. | |||
| """ | |||
| Create all_reduce_min operator for collective communication. | |||
| :param inp: input tensor. | |||
| :param group: communication group. | |||
| @@ -214,7 +221,8 @@ def all_reduce_min( | |||
| def gather( | |||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | |||
| ) -> Tensor: | |||
| """Create gather operator for collective communication. | |||
| """ | |||
| Create gather operator for collective communication. | |||
| :param inp: input tensor. | |||
| :param group: communication group. | |||
| @@ -227,7 +235,8 @@ def gather( | |||
| def scatter( | |||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | |||
| ) -> Tensor: | |||
| """Create scatter operator for collective communication. | |||
| """ | |||
| Create scatter operator for collective communication. | |||
| :param inp: input tensor. | |||
| :param group: communication group. | |||
| @@ -240,7 +249,8 @@ def scatter( | |||
| def all_to_all( | |||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | |||
| ) -> Tensor: | |||
| """Create all_to_all operator for collective communication. | |||
| """ | |||
| Create all_to_all operator for collective communication. | |||
| :param inp: input tensor. | |||
| :param group: communication group. | |||
| @@ -251,7 +261,8 @@ def all_to_all( | |||
| def remote_send(inp: Tensor, dest_rank: int) -> Tensor: | |||
| """Send a Tensor to a remote process. | |||
| """ | |||
| Send a Tensor to a remote process. | |||
| :param inp: tensor to send. | |||
| :param dest_rank: destination process rank. | |||
| @@ -266,7 +277,8 @@ def remote_send(inp: Tensor, dest_rank: int) -> Tensor: | |||
| def remote_recv( | |||
| src_rank: int, shape: Tuple[int], dtype: type, device: Optional[str] = None | |||
| ) -> Tensor: | |||
| """Receive a Tensor from a remote process. | |||
| """ | |||
| Receive a Tensor from a remote process. | |||
| :param src_rank: source process rank. | |||
| :param shape: the shape of the tensor to receive. | |||
| @@ -81,7 +81,8 @@ def init_process_group( | |||
| device: int, | |||
| backend: Optional[str] = "nccl", | |||
| ) -> None: | |||
| """Initialize the distributed process group and specify the device used in the current process | |||
| """ | |||
| Initialize the distributed process group and specify the device used in the current process | |||
| :param master_ip: ip address of the master node. | |||
| :param port: port available for all processes to communicate. | |||
| @@ -140,7 +140,8 @@ class TensorFuture(Future): | |||
| def synchronized(func: Callable): | |||
| """Decorator. Decorated function will synchronize when finished. | |||
| """ | |||
| Decorator. Decorated function will synchronize when finished. | |||
| Specifically, we use this to prevent data race during hub.load""" | |||
| @functools.wraps(func) | |||
| @@ -161,7 +162,8 @@ def _get_device_count_worker(queue, device_type): | |||
| def get_device_count_by_fork(device_type: str): | |||
| """Get device count in fork thread. | |||
| """ | |||
| Get device count in fork thread. | |||
| See https://stackoverflow.com/questions/22950047/cuda-initialization-error-after-fork | |||
| for more information. | |||
| """ | |||
| @@ -173,7 +175,8 @@ def get_device_count_by_fork(device_type: str): | |||
| def bcast_list_(inps: list, group: Group = WORLD): | |||
| """Broadcast tensors between given group. | |||
| """ | |||
| Broadcast tensors between given group. | |||
| :param inps: input tensors. | |||
| :param group: communication group. | |||
| @@ -183,7 +186,8 @@ def bcast_list_(inps: list, group: Group = WORLD): | |||
| class AllreduceCallback: | |||
| """Allreduce Callback with tensor fusion optimization. | |||
| """ | |||
| Allreduce Callback with tensor fusion optimization. | |||
| :param reduce_method: the method to reduce gradiants. | |||
| :param group: communication group. | |||
| @@ -21,7 +21,8 @@ from .util import get_free_ports | |||
| class Methods: | |||
| """Distributed Server Method. | |||
| """ | |||
| Distributed Server Method. | |||
| Used for exchange information between distributed nodes. | |||
| :param mm_server_port: multiple machine rpc server port. | |||
| @@ -45,7 +46,8 @@ class Methods: | |||
| return self.mm_server_port | |||
| def set_is_grad(self, key, is_grad): | |||
| """Mark send/recv need gradiants by key. | |||
| """ | |||
| Mark send/recv need gradiants by key. | |||
| :param key: key to match send/recv op. | |||
| :param is_grad: whether this op need grad. | |||
| @@ -56,7 +58,8 @@ class Methods: | |||
| return True | |||
| def check_is_grad(self, key): | |||
| """Check whether send/recv need gradiants. | |||
| """ | |||
| Check whether send/recv need gradiants. | |||
| :param key: key to match send/recv op. | |||
| """ | |||
| @@ -68,7 +71,8 @@ class Methods: | |||
| return ret | |||
| def set_remote_tracer(self, key, tracer_set): | |||
| """Set tracer dict for tracing send/recv op. | |||
| """ | |||
| Set tracer dict for tracing send/recv op. | |||
| :param key: key to match send/recv op. | |||
| :param tracer_set: valid tracer set. | |||
| @@ -79,7 +83,8 @@ class Methods: | |||
| return True | |||
| def check_remote_tracer(self, key): | |||
| """Get tracer dict for send/recv op. | |||
| """ | |||
| Get tracer dict for send/recv op. | |||
| :param key: key to match send/recv op. | |||
| """ | |||
| @@ -91,7 +96,8 @@ class Methods: | |||
| return ret | |||
| def group_barrier(self, key, size): | |||
| """A barrier wait for all group member. | |||
| """ | |||
| A barrier wait for all group member. | |||
| :param key: group key to match each other. | |||
| :param size: group size. | |||
| @@ -114,7 +120,8 @@ class ThreadXMLRPCServer(ThreadingMixIn, SimpleXMLRPCServer): | |||
| def start_server(py_server_port, mm_server_port): | |||
| """Start python distributed server and multiple machine server. | |||
| """ | |||
| Start python distributed server and multiple machine server. | |||
| :param py_server_port: python server port. | |||
| :param mm_server_port: multiple machine server port. | |||
| @@ -125,7 +132,8 @@ def start_server(py_server_port, mm_server_port): | |||
| class Server: | |||
| """Distributed Server for distributed training. | |||
| """ | |||
| Distributed Server for distributed training. | |||
| Should be running at master node. | |||
| :param port: python server port. | |||
| @@ -143,7 +151,8 @@ class Server: | |||
| class Client: | |||
| """Distributed Client for distributed training. | |||
| """ | |||
| Distributed Client for distributed training. | |||
| :param master_ip: ip address of master node. | |||
| :param port: port of server at master node. | |||
| @@ -171,7 +180,8 @@ class Client: | |||
| return self.proxy.get_mm_server_port() | |||
| def set_is_grad(self, key, is_grad): | |||
| """Mark send/recv need gradiants by key. | |||
| """ | |||
| Mark send/recv need gradiants by key. | |||
| :param key: key to match send/recv op. | |||
| :param is_grad: whether this op need grad. | |||
| @@ -179,14 +189,16 @@ class Client: | |||
| self.proxy.set_is_grad(key, is_grad) | |||
| def check_is_grad(self, key): | |||
| """Check whether send/recv need gradiants. | |||
| """ | |||
| Check whether send/recv need gradiants. | |||
| :param key: key to match send/recv op. | |||
| """ | |||
| return self.proxy.check_is_grad(key) | |||
| def set_remote_tracer(self, key, tracer_set): | |||
| """Set tracer dict for tracing send/recv op. | |||
| """ | |||
| Set tracer dict for tracing send/recv op. | |||
| :param key: key to match send/recv op. | |||
| :param tracer_set: valid tracer set. | |||
| @@ -194,14 +206,16 @@ class Client: | |||
| self.proxy.set_remote_tracer(key, tracer_set) | |||
| def check_remote_tracer(self, key): | |||
| """Get tracer dict for send/recv op. | |||
| """ | |||
| Get tracer dict for send/recv op. | |||
| :param key: key to match send/recv op. | |||
| """ | |||
| return self.proxy.check_remote_tracer(key) | |||
| def group_barrier(self, key, size): | |||
| """A barrier wait for all group member. | |||
| """ | |||
| A barrier wait for all group member. | |||
| :param key: group key to match each other. | |||
| :param size: group size. | |||
| @@ -12,7 +12,8 @@ from typing import List | |||
| def get_free_ports(num: int) -> List[int]: | |||
| """Get one or more free ports. | |||
| """ | |||
| Get one or more free ports. | |||
| """ | |||
| socks, ports = [], [] | |||
| for i in range(num): | |||
| @@ -12,7 +12,8 @@ _conv_execution_strategy = os.getenv("MEGENGINE_CONV_EXECUTION_STRATEGY", "HEURI | |||
| def get_conv_execution_strategy() -> str: | |||
| """Returns the execuation strategy of :class:`~.Conv2d`. | |||
| """ | |||
| Returns the execuation strategy of :class:`~.Conv2d`. | |||
| See :func:`~.set_conv_execution_strategy` for possible return values | |||
| """ | |||
| @@ -20,7 +21,8 @@ def get_conv_execution_strategy() -> str: | |||
| def set_conv_execution_strategy(option: str): | |||
| """Sets the execuation strategy of :class:`~.Conv2d`. | |||
| """ | |||
| Sets the execuation strategy of :class:`~.Conv2d`. | |||
| :param option: Decides how :class:`~.Conv2d` algorithm is chosen. | |||
| Available values: | |||
| @@ -100,7 +100,8 @@ def _elemwise_multi_type(*args, mode, **kwargs): | |||
| def add(x, y): | |||
| """Element-wise `addition`. | |||
| """ | |||
| Element-wise `addition`. | |||
| At least one operand should be tensor. | |||
| Same for sub/mul/div/floor_div/pow/mod/atan2/equal/not_equal/less/less_equal/greater/greater_equal/maximum/minmium. | |||
| @@ -193,7 +194,8 @@ def log1p(x): | |||
| def sqrt(x: Tensor) -> Tensor: | |||
| """Element-wise `sqrt`. | |||
| """ | |||
| Element-wise `sqrt`. | |||
| Returns ``NaN`` for negative input value. | |||
| :param x: input tensor. | |||
| @@ -209,7 +211,7 @@ def sqrt(x: Tensor) -> Tensor: | |||
| x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
| out = F.sqrt(x) | |||
| print(out.numpy()) | |||
| print(out.numpy().round(decimals=4)) | |||
| Outputs: | |||
| @@ -239,7 +241,7 @@ def square(x: Tensor) -> Tensor: | |||
| data = mge.tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
| out = F.square(data) | |||
| print(out.numpy()) | |||
| print(out.numpy().round(decimals=4)) | |||
| Outputs: | |||
| @@ -281,7 +283,8 @@ def minimum(x, y): | |||
| def cos(x): | |||
| """Element-wise `cosine`. | |||
| """ | |||
| Element-wise `cosine`. | |||
| :param x: input tensor. | |||
| :return: computed tensor. | |||
| @@ -296,7 +299,7 @@ def cos(x): | |||
| x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
| out = F.cos(x) | |||
| print(out.numpy()) | |||
| print(out.numpy().round(decimals=4)) | |||
| Outputs: | |||
| @@ -374,7 +377,8 @@ def atanh(x): | |||
| def left_shift(x, y): | |||
| """Element-wise `bitwise binary: x << y`. | |||
| """ | |||
| Element-wise `bitwise binary: x << y`. | |||
| :param x: input tensor, should be int. | |||
| :param y: how many bits to be left-shifted. | |||
| @@ -435,7 +439,8 @@ def logical_xor(x, y): | |||
| def equal(x, y): | |||
| """Element-wise `(x == y)`. | |||
| """ | |||
| Element-wise `(x == y)`. | |||
| :param x: input tensor 1. | |||
| :param y: input tensor 2. | |||
| @@ -494,7 +499,8 @@ def greater_equal(x, y): | |||
| def hswish(x): | |||
| """Element-wise `x * relu6(x + 3) / 6`. | |||
| """ | |||
| Element-wise `x * relu6(x + 3) / 6`. | |||
| :param x: input tensor. | |||
| :return: computed tensor. | |||
| @@ -509,7 +515,7 @@ def hswish(x): | |||
| x = tensor(np.arange(5).astype(np.float32)) | |||
| out = F.hswish(x) | |||
| print(out.numpy()) | |||
| print(out.numpy().round(decimals=4)) | |||
| .. testoutput:: | |||
| @@ -540,7 +546,8 @@ def sigmoid(x): | |||
| def clip(x: Tensor, lower=None, upper=None) -> Tensor: | |||
| r"""Clamps all elements in input tensor into the range `[` :attr:`lower`, :attr:`upper` `]` and returns | |||
| r""" | |||
| Clamps all elements in input tensor into the range `[` :attr:`lower`, :attr:`upper` `]` and returns | |||
| a resulting tensor: | |||
| .. math:: | |||
| @@ -24,7 +24,8 @@ __all__ = [ | |||
| def l1_loss(pred: Tensor, label: Tensor) -> Tensor: | |||
| r"""Calculates the mean absolute error (MAE) between | |||
| r""" | |||
| Calculates the mean absolute error (MAE) between | |||
| each element in the pred :math:`x` and label :math:`y`. | |||
| The mean absolute error can be described as: | |||
| @@ -70,7 +71,8 @@ def l1_loss(pred: Tensor, label: Tensor) -> Tensor: | |||
| def square_loss(pred: Tensor, label: Tensor) -> Tensor: | |||
| r"""Calculates the mean squared error (squared L2 norm) between | |||
| r""" | |||
| Calculates the mean squared error (squared L2 norm) between | |||
| each element in the pred :math:`x` and label :math:`y`. | |||
| The mean squared error can be described as: | |||
| @@ -127,7 +129,8 @@ def cross_entropy( | |||
| with_logits: bool = True, | |||
| label_smooth: float = 0, | |||
| ) -> Tensor: | |||
| r"""Computes the multi-class cross entropy loss (using logits by default). | |||
| r""" | |||
| Computes the multi-class cross entropy loss (using logits by default). | |||
| By default(``with_logitis`` is True), ``pred`` is assumed to be logits, | |||
| class probabilities are given by softmax. | |||
| @@ -161,7 +164,7 @@ def cross_entropy( | |||
| pred = tensor(np.array([0, 0], dtype=np.float32).reshape(data_shape)) | |||
| label = tensor(np.ones(label_shape, dtype=np.int32)) | |||
| loss = F.nn.cross_entropy(pred, label) | |||
| print(loss.numpy()) | |||
| print(loss.numpy().round(decimals=4)) | |||
| Outputs: | |||
| @@ -195,7 +198,8 @@ def cross_entropy( | |||
| def binary_cross_entropy( | |||
| pred: Tensor, label: Tensor, with_logits: bool = True | |||
| ) -> Tensor: | |||
| r"""Computes the binary cross entropy loss (using logits by default). | |||
| r""" | |||
| Computes the binary cross entropy loss (using logits by default). | |||
| By default(``with_logitis`` is True), ``pred`` is assumed to be logits, | |||
| class probabilities are given by sigmoid. | |||
| @@ -216,7 +220,7 @@ def binary_cross_entropy( | |||
| pred = tensor(np.array([0, 0], dtype=np.float32).reshape(1, 2)) | |||
| label = tensor(np.ones((1, 2), dtype=np.float32)) | |||
| loss = F.nn.binary_cross_entropy(pred, label) | |||
| print(loss.numpy()) | |||
| print(loss.numpy().round(decimals=4)) | |||
| Outputs: | |||
| @@ -233,7 +237,8 @@ def binary_cross_entropy( | |||
| def hinge_loss(pred: Tensor, label: Tensor, norm: str = "L1") -> Tensor: | |||
| r"""Caculates the hinge loss which is often used in SVM. | |||
| r""" | |||
| Caculates the hinge loss which is often used in SVM. | |||
| The hinge loss can be described as: | |||
| @@ -43,7 +43,8 @@ __all__ = [ | |||
| def isnan(inp: Tensor) -> Tensor: | |||
| r"""Returns a new tensor representing if each element is ``NaN`` or not. | |||
| r""" | |||
| Returns a new tensor representing if each element is ``NaN`` or not. | |||
| :param inp: input tensor. | |||
| :return: result tensor. | |||
| @@ -69,7 +70,8 @@ def isnan(inp: Tensor) -> Tensor: | |||
| def isinf(inp: Tensor) -> Tensor: | |||
| r"""Returns a new tensor representing if each element is ``Inf`` or not. | |||
| r""" | |||
| Returns a new tensor representing if each element is ``Inf`` or not. | |||
| :param inp: input tensor. | |||
| :return: result tensor. | |||
| @@ -95,7 +97,8 @@ def isinf(inp: Tensor) -> Tensor: | |||
| def sign(inp: Tensor): | |||
| r"""Returns a new tensor representing the sign of each element in input tensor. | |||
| r""" | |||
| Returns a new tensor representing the sign of each element in input tensor. | |||
| :param: input tensor. | |||
| :return: the sign of input tensor. | |||
| @@ -125,7 +128,8 @@ def sum( | |||
| axis: Optional[Union[int, Sequence[int]]] = None, | |||
| keepdims: bool = False, | |||
| ) -> Tensor: | |||
| r"""Returns the sum of input tensor along given axis. If axis is a list of dimensions, | |||
| r""" | |||
| Returns the sum of input tensor along given axis. If axis is a list of dimensions, | |||
| reduce over all of them. | |||
| :param inp: input tensor. | |||
| @@ -160,7 +164,8 @@ def sum( | |||
| def prod( | |||
| inp: Tensor, axis: Optional[Union[int, Sequence[int]]] = None, keepdims=False | |||
| ) -> Tensor: | |||
| r"""Returns the product of input tensor along given axis. If axis is a list of dimensions, | |||
| r""" | |||
| Returns the product of input tensor along given axis. If axis is a list of dimensions, | |||
| reduce over all of them. | |||
| :param inp: input tensor. | |||
| @@ -195,7 +200,8 @@ def mean( | |||
| axis: Optional[Union[int, Sequence[int]]] = None, | |||
| keepdims: bool = False, | |||
| ) -> Tensor: | |||
| """Returns the mean value of input tensor along | |||
| """ | |||
| Returns the mean value of input tensor along | |||
| given axis. If axis is a list of dimensions, | |||
| reduce over all of them. | |||
| @@ -231,7 +237,8 @@ def var( | |||
| axis: Optional[Union[int, Sequence[int]]] = None, | |||
| keepdims: bool = False, | |||
| ) -> Tensor: | |||
| """Returns the variance value of input tensor along | |||
| """ | |||
| Returns the variance value of input tensor along | |||
| given axis. If axis is a list of dimensions, | |||
| reduce over all of them. | |||
| @@ -250,7 +257,7 @@ def var( | |||
| data = tensor(np.arange(1, 7, dtype=np.float32).reshape(2, 3)) | |||
| out = F.var(data) | |||
| print(out.numpy()) | |||
| print(out.numpy().round(decimals=4)) | |||
| Outputs: | |||
| @@ -271,7 +278,8 @@ def std( | |||
| axis: Optional[Union[int, Sequence[int]]] = None, | |||
| keepdims: bool = False, | |||
| ) -> Tensor: | |||
| """Returns the standard deviation of input tensor along | |||
| """ | |||
| Returns the standard deviation of input tensor along | |||
| given axis. If axis is a list of dimensions, | |||
| reduce over all of them. | |||
| @@ -290,7 +298,7 @@ def std( | |||
| data = tensor(np.arange(1, 7, dtype=np.float32).reshape(2, 3)) | |||
| out = F.std(data, axis=1) | |||
| print(out.numpy()) | |||
| print(out.numpy().round(decimals=4)) | |||
| Outputs: | |||
| @@ -306,7 +314,8 @@ def min( | |||
| axis: Optional[Union[int, Sequence[int]]] = None, | |||
| keepdims: bool = False, | |||
| ) -> Tensor: | |||
| r"""Returns the min value of input tensor along | |||
| r""" | |||
| Returns the min value of input tensor along | |||
| given axis. If axis is a list of dimensions, | |||
| reduce over all of them. | |||
| @@ -342,7 +351,8 @@ def max( | |||
| axis: Optional[Union[int, Sequence[int]]] = None, | |||
| keepdims: bool = False, | |||
| ) -> Tensor: | |||
| r"""Returns the max value of the input tensor along | |||
| r""" | |||
| Returns the max value of the input tensor along | |||
| given axis. If axis is a list of dimensions, | |||
| reduce over all of them. | |||
| @@ -376,7 +386,8 @@ def max( | |||
| def norm( | |||
| inp: Tensor, ord: float = None, axis: int = None, keepdims=False, | |||
| ): | |||
| """Calculates ``p``-norm of input tensor along | |||
| """ | |||
| Calculates ``p``-norm of input tensor along | |||
| given axis. | |||
| :param inp: input tensor. | |||
| @@ -395,7 +406,7 @@ def norm( | |||
| x = tensor(np.arange(-3, 3, dtype=np.float32)) | |||
| out = F.norm(x) | |||
| print(out.numpy()) | |||
| print(out.numpy().round(decimals=4)) | |||
| Outputs: | |||
| @@ -423,7 +434,8 @@ def argmin( | |||
| axis: Optional[Union[int, Sequence[int]]] = None, | |||
| keepdims: bool = False, | |||
| ) -> Tensor: | |||
| r"""Returns the indices of the minimum values along | |||
| r""" | |||
| Returns the indices of the minimum values along | |||
| given axis. If axis is a list of dimensions, | |||
| reduce over all of them. | |||
| @@ -481,7 +493,8 @@ def argmax( | |||
| axis: Optional[Union[int, Sequence[int]]] = None, | |||
| keepdims: bool = False, | |||
| ) -> Tensor: | |||
| r"""Returns the indices of the maximum values along | |||
| r""" | |||
| Returns the indices of the maximum values along | |||
| given axis. If axis is a list of dimensions, | |||
| reduce over all of them. | |||
| @@ -537,7 +550,8 @@ def argmax( | |||
| def normalize( | |||
| inp: Tensor, ord: float = None, axis: int = None, eps: float = 1e-12, | |||
| ) -> Tensor: | |||
| r"""Performs :math:`L_p` normalization of input tensor along | |||
| r""" | |||
| Performs :math:`L_p` normalization of input tensor along | |||
| given axis. | |||
| For a tensor of shape :math:`(n_0, ..., n_{dim}, ..., n_k)`, each | |||
| @@ -559,7 +573,8 @@ def normalize( | |||
| def argsort(inp: Tensor, descending: bool = False) -> Tensor: | |||
| r"""Returns the indices that would sort the input tensor. | |||
| r""" | |||
| Returns the indices that would sort the input tensor. | |||
| :param inp: input tensor. If it's 2d, the result would be array of indices show how to sort each row in the input tensor. | |||
| :param descending: sort in descending order, where the largest comes first. Default: False | |||
| @@ -600,7 +615,8 @@ def argsort(inp: Tensor, descending: bool = False) -> Tensor: | |||
| def sort(inp: Tensor, descending: bool = False) -> Tuple[Tensor, Tensor]: | |||
| r"""Returns sorted tensor and the indices would sort the input tensor. | |||
| r""" | |||
| Returns sorted tensor and the indices would sort the input tensor. | |||
| :param inp: input tensor. If it's 2d, the result would be sorted by row. | |||
| :param descending: sort in descending order, where the largest comes first. Default: False | |||
| @@ -647,7 +663,8 @@ def topk( | |||
| kth_only: bool = False, | |||
| no_sort: bool = False, | |||
| ) -> Tuple[Tensor, Tensor]: | |||
| r"""Selects the ``Top-K``(by default) smallest elements of 2d matrix by row. | |||
| r""" | |||
| Selects the ``Top-K``(by default) smallest elements of 2d matrix by row. | |||
| :param inp: input tensor. If input tensor is 2d, each row will be sorted. | |||
| :param k: number of elements needed. | |||
| @@ -75,7 +75,8 @@ def expand_hw(x): | |||
| def linear(inp: Tensor, weight: Tensor, bias: Optional[Tensor] = None) -> Tensor: | |||
| """Applies a linear transformation to the input tensor. | |||
| """ | |||
| Applies a linear transformation to the input tensor. | |||
| Refer to :class:`~.module.linear.Linear` for more information. | |||
| @@ -101,7 +102,8 @@ def conv2d( | |||
| conv_mode="CROSS_CORRELATION", | |||
| compute_mode="DEFAULT", | |||
| ) -> Tensor: | |||
| """2D convolution operation. | |||
| """ | |||
| 2D convolution operation. | |||
| Refer to :class:`~.Conv2d` for more information. | |||
| @@ -166,7 +168,8 @@ def conv_transpose2d( | |||
| conv_mode="CROSS_CORRELATION", | |||
| compute_mode="DEFAULT", | |||
| ) -> Tensor: | |||
| """2D transposed convolution operation. | |||
| """ | |||
| 2D transposed convolution operation. | |||
| Refer to :class:`~.ConvTranspose2d` for more information. | |||
| @@ -227,7 +230,8 @@ def local_conv2d( | |||
| dilation: Union[int, Tuple[int, int]] = 1, | |||
| conv_mode="CROSS_CORRELATION", | |||
| ): | |||
| """Applies spatial 2D convolution over an groupped channeled image with untied kernels. | |||
| """ | |||
| Applies spatial 2D convolution over an groupped channeled image with untied kernels. | |||
| """ | |||
| assert conv_mode == "CROSS_CORRELATION" or conv_mode.name == "CROSS_CORRELATION" | |||
| @@ -261,7 +265,8 @@ def max_pool2d( | |||
| stride: Optional[Union[int, Tuple[int, int]]] = None, | |||
| padding: Union[int, Tuple[int, int]] = 0, | |||
| ) -> Tensor: | |||
| """Applies a 2D max pooling over an input tensor. | |||
| """ | |||
| Applies a 2D max pooling over an input tensor. | |||
| Refer to :class:`~.MaxPool2d` for more information. | |||
| @@ -298,7 +303,8 @@ def avg_pool2d( | |||
| padding: Union[int, Tuple[int, int]] = 0, | |||
| mode: str = "AVERAGE_COUNT_EXCLUDE_PADDING", | |||
| ) -> Tensor: | |||
| """Applies 2D average pooling over an input tensor. | |||
| """ | |||
| Applies 2D average pooling over an input tensor. | |||
| Refer to :class:`~.AvgPool2d` for more information. | |||
| @@ -332,7 +338,8 @@ def avg_pool2d( | |||
| def adaptive_max_pool2d( | |||
| inp: Tensor, oshp: Union[Tuple[int, int], int, Tensor], | |||
| ) -> Tensor: | |||
| """Applies a 2D max adaptive pooling over an input. | |||
| """ | |||
| Applies a 2D max adaptive pooling over an input. | |||
| Refer to :class:`~.MaxAdaptivePool2d` for more information. | |||
| @@ -353,7 +360,8 @@ def adaptive_max_pool2d( | |||
| def adaptive_avg_pool2d( | |||
| inp: Tensor, oshp: Union[Tuple[int, int], int, Tensor], | |||
| ) -> Tensor: | |||
| """Applies a 2D average adaptive pooling over an input. | |||
| """ | |||
| Applies a 2D average adaptive pooling over an input. | |||
| Refer to :class:`~.AvgAdaptivePool2d` for more information. | |||
| @@ -390,7 +398,8 @@ def leaky_relu(inp: Tensor, negative_slope: float = 0.01) -> Tensor: | |||
| def softplus(inp: Tensor) -> Tensor: | |||
| r"""Applies the element-wise function: | |||
| r""" | |||
| Applies the element-wise function: | |||
| .. math:: | |||
| \text{softplus}(x) = \log(1 + \exp(x)) | |||
| @@ -416,7 +425,7 @@ def softplus(inp: Tensor) -> Tensor: | |||
| x = tensor(np.arange(-3, 3, dtype=np.float32)) | |||
| y = F.softplus(x) | |||
| print(y.numpy()) | |||
| print(y.numpy().round(decimals=4)) | |||
| Outputs: | |||
| @@ -429,7 +438,8 @@ def softplus(inp: Tensor) -> Tensor: | |||
| def logsoftmax(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor: | |||
| r"""Applies the :math:`\log(\text{Softmax}(x))` function to an n-dimensional | |||
| r""" | |||
| Applies the :math:`\log(\text{Softmax}(x))` function to an n-dimensional | |||
| input Tensor. The LogSoftmax formulation can be simplified as: | |||
| .. math:: | |||
| @@ -456,7 +466,7 @@ def logsoftmax(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor: | |||
| x = tensor(np.arange(-5, 5, dtype=np.float32)).reshape(2,5) | |||
| y = F.logsoftmax(x, axis=1) | |||
| print(y.numpy()) | |||
| print(y.numpy().round(decimals=4)) | |||
| Outputs: | |||
| @@ -470,7 +480,8 @@ def logsoftmax(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor: | |||
| def logsigmoid(inp: Tensor) -> Tensor: | |||
| r"""Applies the element-wise function: | |||
| r""" | |||
| Applies the element-wise function: | |||
| .. math:: | |||
| \text{logsigmoid}(x) = \log(\frac{ 1 }{ 1 + \exp(-x)}) | |||
| @@ -490,13 +501,13 @@ def logsigmoid(inp: Tensor) -> Tensor: | |||
| x = tensor(np.arange(-5, 5, dtype=np.float32)) | |||
| y = F.logsigmoid(x) | |||
| print(y.numpy()) | |||
| print(y.numpy().round(decimals=4)) | |||
| Outputs: | |||
| .. testoutput:: | |||
| [-5.0067 -4.0181 -3.0486 -2.1269 -1.3133 -0.6931 -0.3133 -0.1269 -0.0486 | |||
| [-5.0067 -4.0182 -3.0486 -2.1269 -1.3133 -0.6931 -0.3133 -0.1269 -0.0486 | |||
| -0.0181] | |||
| """ | |||
| @@ -539,7 +550,7 @@ def logsumexp( | |||
| x = tensor(np.arange(-5, 5, dtype=np.float32)).reshape(2,5) | |||
| y = F.logsumexp(x, axis=1, keepdims=False) | |||
| print(y.numpy()) | |||
| print(y.numpy().round(decimals=4)) | |||
| Outputs: | |||
| @@ -589,7 +600,7 @@ def softmax(inp: Tensor, axis: Optional[int] = None) -> Tensor: | |||
| x = tensor(np.arange(-5, 5, dtype=np.float32)).reshape(2,5) | |||
| out = F.softmax(x) | |||
| print(out.numpy()) | |||
| print(out.numpy().round(decimals=4)) | |||
| Outputs: | |||
| @@ -619,7 +630,8 @@ def batch_norm( | |||
| eps: float = 1e-5, | |||
| inplace: bool = True | |||
| ): | |||
| r"""Applies batch normalization to the input. | |||
| r""" | |||
| Applies batch normalization to the input. | |||
| Refer to :class:`~.BatchNorm2d` and :class:`~.BatchNorm1d` for more information. | |||
| @@ -734,7 +746,8 @@ def sync_batch_norm( | |||
| eps_mode="ADDITIVE", | |||
| group=WORLD, | |||
| ) -> Tensor: | |||
| r"""Applies synchronized batch normalization to the input. | |||
| r""" | |||
| Applies synchronized batch normalization to the input. | |||
| Refer to :class:`~.BatchNorm2d` and :class:`~.BatchNorm1d` for more information. | |||
| @@ -835,7 +848,8 @@ def sync_batch_norm( | |||
| def one_hot(inp: Tensor, num_classes: int) -> Tensor: | |||
| r"""Performs one-hot encoding for the input tensor. | |||
| r""" | |||
| Performs one-hot encoding for the input tensor. | |||
| :param inp: input tensor. | |||
| :param num_classes: number of classes denotes the last dimension of the output tensor. | |||
| @@ -878,7 +892,8 @@ def warp_perspective( | |||
| border_val: float = 0.0, | |||
| interp_mode: str = "LINEAR", | |||
| ): | |||
| r"""Applies perspective transformation to batched 2D images. | |||
| r""" | |||
| Applies perspective transformation to batched 2D images. | |||
| The input images are transformed to the output images by the transformation matrix: | |||
| @@ -1094,13 +1109,13 @@ def svd(inp: Tensor, full_matrices=False, compute_uv=True) -> Tensor: | |||
| x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2,3)) | |||
| _, y, _ = F.svd(x) | |||
| print(y.numpy()) | |||
| print(y.numpy().round(decimals=3)) | |||
| Outputs: | |||
| .. testoutput:: | |||
| [7.3485 1. ] | |||
| [7.348 1. ] | |||
| """ | |||
| op = builtin.SVD(full_matrices=full_matrices, compute_uv=compute_uv) | |||
| @@ -1115,7 +1130,8 @@ def interpolate( | |||
| mode: str = "BILINEAR", | |||
| align_corners: bool = None, | |||
| ) -> Tensor: | |||
| r"""Down/up samples the input tensor to either the given size or with the given scale_factor. ``size`` can not coexist with ``scale_factor``. | |||
| r""" | |||
| Down/up samples the input tensor to either the given size or with the given scale_factor. ``size`` can not coexist with ``scale_factor``. | |||
| :param inp: input tensor. | |||
| :param size: size of the output tensor. Default: None | |||
| @@ -1257,7 +1273,8 @@ def interpolate( | |||
| def dropout(inp: Tensor, drop_prob: float, training: bool = True) -> Tensor: | |||
| """Returns a new tensor where each of the elements are randomly set to zero | |||
| """ | |||
| Returns a new tensor where each of the elements are randomly set to zero | |||
| with probability P = ``drop_prob``. Optionally rescale the output tensor if ``training`` is True. | |||
| :param inp: input tensor. | |||
| @@ -1302,7 +1319,8 @@ def embedding( | |||
| max_norm: Optional[float] = None, | |||
| norm_type: Optional[float] = None, | |||
| ): | |||
| """Applies lookup table for embedding. | |||
| """ | |||
| Applies lookup table for embedding. | |||
| :param inp: tensor with indices. | |||
| :param weight: learnable weights which embeds from. | |||
| @@ -1329,7 +1347,8 @@ def roi_pooling( | |||
| mode: str = "max", | |||
| scale: float = 1.0, | |||
| ) -> Tensor: | |||
| """Applies roi pooling on input feature. | |||
| """ | |||
| Applies roi pooling on input feature. | |||
| :param inp: tensor that represents the input feature, `(N, C, H, W)` images. | |||
| :param rois: `(K, 5)` boxes. First column is the index into N. The other 4 columns are xyxy. | |||
| @@ -1350,7 +1369,7 @@ def roi_pooling( | |||
| inp = tensor(np.random.randn(1, 1, 128, 128)) | |||
| rois = tensor(np.random.random((4, 5))) | |||
| y = F.nn.roi_pooling(inp, rois, (2, 2)) | |||
| print(y.numpy()[0]) | |||
| print(y.numpy()[0].round(decimals=4)) | |||
| Outputs: | |||
| @@ -1382,7 +1401,8 @@ def roi_align( | |||
| sample_points: Union[int, tuple, list] = 2, | |||
| aligned: bool = True, | |||
| ) -> Tensor: | |||
| """Applies roi align on input feature. | |||
| """ | |||
| Applies roi align on input feature. | |||
| :param inp: tensor that represents the input feature, shape is `(N, C, H, W)`. | |||
| :param rois: `(N, 5)` boxes. First column is the box index. The other 4 columns are ``xyxy``. | |||
| @@ -1407,7 +1427,7 @@ def roi_align( | |||
| inp = tensor(np.random.randn(1, 1, 128, 128)) | |||
| rois = tensor(np.random.random((4, 5))) | |||
| y = F.nn.roi_align(inp, rois, (2, 2)) | |||
| print(y.numpy()[0]) | |||
| print(y.numpy()[0].round(decimals=4)) | |||
| Outputs: | |||
| @@ -1444,7 +1464,8 @@ def roi_align( | |||
| def indexing_one_hot( | |||
| src: Tensor, index: Tensor, axis: int = 1, keepdims=False | |||
| ) -> Tensor: | |||
| r"""One-hot indexing for some axes. | |||
| r""" | |||
| One-hot indexing for some axes. | |||
| :param src: input tensor. | |||
| :param index: index tensor. | |||
| @@ -28,7 +28,8 @@ def conv_bias_activation( | |||
| conv_mode="CROSS_CORRELATION", | |||
| compute_mode="DEFAULT", | |||
| ) -> Tensor: | |||
| """Convolution bias with activation operation, only for inference. | |||
| """ | |||
| Convolution bias with activation operation, only for inference. | |||
| :param inp: feature map of the convolution operation. | |||
| :param weight: convolution kernel. | |||
| @@ -58,7 +58,8 @@ __all__ = [ | |||
| def eye(N, M=None, *, dtype="float32", device: Optional[CompNode] = None) -> Tensor: | |||
| """Returns a 2D tensor with ones on the diagonal and zeros elsewhere. | |||
| """ | |||
| Returns a 2D tensor with ones on the diagonal and zeros elsewhere. | |||
| :param shape: expected shape of output tensor. | |||
| :param dtype: data type. Default: None | |||
| @@ -100,7 +101,8 @@ def eye(N, M=None, *, dtype="float32", device: Optional[CompNode] = None) -> Ten | |||
| def full(shape, value, dtype="float32", device=None): | |||
| """Returns a tensor with given shape and value. | |||
| """ | |||
| Returns a tensor with given shape and value. | |||
| """ | |||
| if isinstance(shape, int): | |||
| shape = (shape,) | |||
| @@ -113,7 +115,8 @@ def full(shape, value, dtype="float32", device=None): | |||
| def ones(shape, dtype="float32", device=None): | |||
| """Returns a ones tensor with given shape. | |||
| """ | |||
| Returns a ones tensor with given shape. | |||
| :param inp: input tensor. | |||
| :return: output zero tensor. | |||
| @@ -139,13 +142,15 @@ def ones(shape, dtype="float32", device=None): | |||
| def zeros(shape, dtype="float32", device=None): | |||
| """Returns a zero tensor with given shape. | |||
| """ | |||
| Returns a zero tensor with given shape. | |||
| """ | |||
| return full(shape, 0.0, dtype=dtype, device=device) | |||
| def zeros_like(inp: Tensor) -> Tensor: | |||
| """Returns a zero tensor with the same shape as input tensor. | |||
| """ | |||
| Returns a zero tensor with the same shape as input tensor. | |||
| :param inp: input tensor. | |||
| :return: output zero tensor. | |||
| @@ -174,13 +179,15 @@ def zeros_like(inp: Tensor) -> Tensor: | |||
| def ones_like(inp: Tensor) -> Tensor: | |||
| """Returns a ones tensor with the same shape as input tensor. | |||
| """ | |||
| Returns a ones tensor with the same shape as input tensor. | |||
| """ | |||
| return ones(inp.shape, dtype=inp.dtype, device=inp.device) | |||
| def full_like(inp: Tensor, value: Union[int, float]) -> Tensor: | |||
| """Returns a tensor filled with given value with the same shape as input tensor. | |||
| """ | |||
| Returns a tensor filled with given value with the same shape as input tensor. | |||
| """ | |||
| return full(inp.shape, value, dtype=inp.dtype, device=inp.device) | |||
| @@ -274,7 +281,8 @@ def concat(inps: Iterable[Tensor], axis: int = 0, device=None) -> Tensor: | |||
| def stack(inps, axis=0, device=None): | |||
| """Concats a sequence of tensors along a new axis. | |||
| """ | |||
| Concats a sequence of tensors along a new axis. | |||
| The input tensors must have the same shape. | |||
| :param inps: input tensors. | |||
| @@ -316,7 +324,8 @@ def stack(inps, axis=0, device=None): | |||
| def split(inp, nsplits_or_sections, axis=0): | |||
| """Splits the input tensor into several smaller tensors. | |||
| """ | |||
| Splits the input tensor into several smaller tensors. | |||
| When nsplits_or_sections is int, the last tensor may be smaller than others. | |||
| :param inp: input tensor. | |||
| @@ -334,7 +343,7 @@ def split(inp, nsplits_or_sections, axis=0): | |||
| x = tensor(np.random.random((2,3,4,5)), dtype=np.float32) | |||
| out = F.split(x, 2, axis=3) | |||
| print(out[0].shape, out[1].shape) | |||
| print(out[0].numpy().shape, out[1].numpy().shape) | |||
| Outputs: | |||
| @@ -400,7 +409,8 @@ def _get_idx(index, axis): | |||
| def gather(inp: Tensor, axis: int, index: Tensor) -> Tensor: | |||
| # TODO: rewrite doc | |||
| r"""Gathers data from input tensor on axis using index. | |||
| r""" | |||
| Gathers data from input tensor on axis using index. | |||
| For a 3-D tensor, the output is specified by:: | |||
| @@ -472,7 +482,8 @@ def gather(inp: Tensor, axis: int, index: Tensor) -> Tensor: | |||
| def scatter(inp: Tensor, axis: int, index: Tensor, source: Tensor) -> Tensor: | |||
| # TODO: rewrite doc | |||
| r"""Writes all values from the tensor source into input tensor | |||
| r""" | |||
| Writes all values from the tensor source into input tensor | |||
| at the indices specified in the index tensor. | |||
| For each value in source, its output index is specified by its index | |||
| @@ -577,7 +588,8 @@ def scatter(inp: Tensor, axis: int, index: Tensor, source: Tensor) -> Tensor: | |||
| def where(mask: Tensor, x: Tensor, y: Tensor) -> Tensor: | |||
| r"""Selects elements either from Tensor x or Tensor y, according to mask. | |||
| r""" | |||
| Selects elements either from Tensor x or Tensor y, according to mask. | |||
| .. math:: | |||
| @@ -764,7 +776,8 @@ AxisDesc = AxisAddRemove.AxisDesc | |||
| def flatten(inp: Tensor, start_axis: int = 0, end_axis: int = -1) -> Tensor: | |||
| r"""Reshapes the tensor by flattening the sub-tensor from dimension ``start_axis`` to dimension ``end_axis``. | |||
| r""" | |||
| Reshapes the tensor by flattening the sub-tensor from dimension ``start_axis`` to dimension ``end_axis``. | |||
| :param inp: input tensor. | |||
| :param start_axis: start dimension that the sub-tensor to be flattened. Default: 0 | |||
| @@ -819,7 +832,7 @@ def expand_dims(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor: | |||
| x = tensor([1, 2]) | |||
| out = F.expand_dims(x, 0) | |||
| print(out.shape) | |||
| print(out.numpy().shape) | |||
| Outputs: | |||
| @@ -865,7 +878,7 @@ def squeeze(inp: Tensor, axis: Optional[Union[int, Sequence[int]]] = None) -> Te | |||
| x = tensor(np.array([1, 2], dtype=np.int32).reshape(1, 1, 2, 1)) | |||
| out = F.squeeze(x, 3) | |||
| print(out.shape) | |||
| print(out.numpy().shape) | |||
| Outputs: | |||
| @@ -884,7 +897,8 @@ def linspace( | |||
| dtype="float32", | |||
| device: Optional[CompNode] = None, | |||
| ) -> Tensor: | |||
| r"""Returns equally spaced numbers over a specified interval. | |||
| r""" | |||
| Returns equally spaced numbers over a specified interval. | |||
| :param start: starting value of the squence, shoule be scalar. | |||
| :param stop: last value of the squence, shoule be scalar. | |||
| @@ -928,7 +942,8 @@ def arange( | |||
| dtype="float32", | |||
| device: Optional[CompNode] = None, | |||
| ) -> Tensor: | |||
| r"""Returns a tensor with values from start to stop with adjacent interval step. | |||
| r""" | |||
| Returns a tensor with values from start to stop with adjacent interval step. | |||
| :param start: starting value of the squence, shoule be scalar. | |||
| :param stop: ending value of the squence, shoule be scalar. | |||
| @@ -11,7 +11,8 @@ import functools | |||
| def get_ndtuple(value, *, n, allow_zero: bool = True): | |||
| r"""Converts possibly 1D tuple to n-dim tuple. | |||
| r""" | |||
| Converts possibly 1D tuple to n-dim tuple. | |||
| :param value: value will be filled in generated tuple. | |||
| :param n: how many elements will the tuple have. | |||
| @@ -43,7 +43,8 @@ PROTOCOLS = { | |||
| def _get_megengine_home() -> str: | |||
| """MGE_HOME setting complies with the XDG Base Directory Specification | |||
| """ | |||
| MGE_HOME setting complies with the XDG Base Directory Specification | |||
| """ | |||
| megengine_home = os.path.expanduser( | |||
| os.getenv( | |||
| @@ -94,7 +95,8 @@ def _init_hub( | |||
| commit: str = None, | |||
| protocol: str = DEFAULT_PROTOCOL, | |||
| ): | |||
| """Imports hubmodule like python import. | |||
| """ | |||
| Imports hubmodule like python import. | |||
| :param repo_info: | |||
| a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
| @@ -137,7 +139,8 @@ def list( | |||
| commit: str = None, | |||
| protocol: str = DEFAULT_PROTOCOL, | |||
| ) -> List[str]: | |||
| """Lists all entrypoints available in repo hubconf. | |||
| """ | |||
| Lists all entrypoints available in repo hubconf. | |||
| :param repo_info: | |||
| a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
| @@ -175,7 +178,8 @@ def load( | |||
| protocol: str = DEFAULT_PROTOCOL, | |||
| **kwargs | |||
| ) -> Any: | |||
| """Loads model from github or gitlab repo, with pretrained weights. | |||
| """ | |||
| Loads model from github or gitlab repo, with pretrained weights. | |||
| :param repo_info: | |||
| a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
| @@ -215,7 +219,8 @@ def help( | |||
| commit: str = None, | |||
| protocol: str = DEFAULT_PROTOCOL, | |||
| ) -> str: | |||
| """This function returns docstring of entrypoint ``entry`` by following steps: | |||
| """ | |||
| This function returns docstring of entrypoint ``entry`` by following steps: | |||
| 1. Pull the repo code specified by git and repo_info. | |||
| 2. Load the entry defined in repo's hubconf.py | |||
| @@ -250,7 +255,8 @@ def help( | |||
| def load_serialized_obj_from_url(url: str, model_dir=None) -> Any: | |||
| """Loads MegEngine serialized object from the given URL. | |||
| """ | |||
| Loads MegEngine serialized object from the given URL. | |||
| If the object is already present in ``model_dir``, it's deserialized and | |||
| returned. If no ``model_dir`` is specified, it will be ``MGE_HOME/serialized``. | |||
| @@ -27,7 +27,8 @@ def load_module(name: str, path: str) -> types.ModuleType: | |||
| def check_module_exists(module: str) -> bool: | |||
| """Checks whether python module exists or not. | |||
| """ | |||
| Checks whether python module exists or not. | |||
| :param module: name of module. | |||
| """ | |||
| @@ -36,7 +37,8 @@ def check_module_exists(module: str) -> bool: | |||
| @contextmanager | |||
| def cd(target: str) -> Iterator[None]: | |||
| """Changes current directory to target. | |||
| """ | |||
| Changes current directory to target. | |||
| :param target: target directory. | |||
| """ | |||
| @@ -519,7 +519,8 @@ class trace: | |||
| optimize_for_inference=True, | |||
| **kwargs | |||
| ): | |||
| r"""Serializes trace to file system. | |||
| r""" | |||
| Serializes trace to file system. | |||
| :param file: output file, could be file object or filename. | |||
| :param arg_names: names of the input tensors in the traced function. | |||
| @@ -17,7 +17,8 @@ _default_level = logging.getLevelName(_default_level_name.upper()) | |||
| def set_log_file(fout, mode="a"): | |||
| r"""Sets log output file. | |||
| r""" | |||
| Sets log output file. | |||
| :type fout: str or file-like | |||
| :param fout: file-like object that supports write and flush, or string for | |||
| @@ -38,37 +39,44 @@ class MegEngineLogFormatter(logging.Formatter): | |||
| max_lines = 256 | |||
| def _color_exc(self, msg): | |||
| r"""Sets the color of message as the execution type. | |||
| r""" | |||
| Sets the color of message as the execution type. | |||
| """ | |||
| return "\x1b[34m{}\x1b[0m".format(msg) | |||
| def _color_dbg(self, msg): | |||
| r"""Sets the color of message as the debugging type. | |||
| r""" | |||
| Sets the color of message as the debugging type. | |||
| """ | |||
| return "\x1b[36m{}\x1b[0m".format(msg) | |||
| def _color_warn(self, msg): | |||
| r"""Sets the color of message as the warning type. | |||
| r""" | |||
| Sets the color of message as the warning type. | |||
| """ | |||
| return "\x1b[1;31m{}\x1b[0m".format(msg) | |||
| def _color_err(self, msg): | |||
| r"""Sets the color of message as the error type. | |||
| r""" | |||
| Sets the color of message as the error type. | |||
| """ | |||
| return "\x1b[1;4;31m{}\x1b[0m".format(msg) | |||
| def _color_omitted(self, msg): | |||
| r"""Sets the color of message as the omitted type. | |||
| r""" | |||
| Sets the color of message as the omitted type. | |||
| """ | |||
| return "\x1b[35m{}\x1b[0m".format(msg) | |||
| def _color_normal(self, msg): | |||
| r"""Sets the color of message as the normal type. | |||
| r""" | |||
| Sets the color of message as the normal type. | |||
| """ | |||
| return msg | |||
| def _color_date(self, msg): | |||
| r"""Sets the color of message the same as date. | |||
| r""" | |||
| Sets the color of message the same as date. | |||
| """ | |||
| return "\x1b[32m{}\x1b[0m".format(msg) | |||
| @@ -142,7 +150,8 @@ class MegEngineLogFormatter(logging.Formatter): | |||
| def get_logger(name=None, formatter=MegEngineLogFormatter): | |||
| r"""Gets megengine logger with given name. | |||
| r""" | |||
| Gets megengine logger with given name. | |||
| """ | |||
| logger = logging.getLogger(name) | |||
| @@ -161,7 +170,8 @@ def get_logger(name=None, formatter=MegEngineLogFormatter): | |||
| def set_log_level(level, update_existing=True): | |||
| """Sets default logging level. | |||
| """ | |||
| Sets default logging level. | |||
| :type level: int e.g. logging.INFO | |||
| :param level: loggin level given by python :mod:`logging` module | |||
| @@ -198,7 +208,8 @@ try: | |||
| _imperative_rt_logger.set_log_level(_imperative_rt_logger.LogLevel.Debug) | |||
| def set_mgb_log_level(level): | |||
| r"""Sets megbrain log level | |||
| r""" | |||
| Sets megbrain log level | |||
| :type level: int e.g. logging.INFO | |||
| :param level: new log level | |||
| @@ -218,7 +229,8 @@ except ImportError as exc: | |||
| @contextlib.contextmanager | |||
| def replace_mgb_log_level(level): | |||
| r"""Replaces megbrain log level in a block and restore after exiting. | |||
| r""" | |||
| Replaces megbrain log level in a block and restore after exiting. | |||
| :type level: int e.g. logging.INFO | |||
| :param level: new log level | |||
| @@ -231,7 +243,8 @@ def replace_mgb_log_level(level): | |||
| def enable_debug_log(): | |||
| r"""Sets logging level to debug for all components. | |||
| r""" | |||
| Sets logging level to debug for all components. | |||
| """ | |||
| set_log_level(logging.DEBUG) | |||
| set_mgb_log_level(logging.DEBUG) | |||
| @@ -27,7 +27,8 @@ class _AdaptivePoolNd(Module): | |||
| class AdaptiveMaxPool2d(_AdaptivePoolNd): | |||
| r"""Applies a 2D max adaptive pooling over an input. | |||
| r""" | |||
| Applies a 2D max adaptive pooling over an input. | |||
| For instance, given an input of the size :math:`(N, C, H, W)` and | |||
| an output shape :math:`(OH, OW)`, this layer generates the output of | |||
| @@ -62,7 +63,7 @@ class AdaptiveMaxPool2d(_AdaptivePoolNd): | |||
| .. testoutput:: | |||
| [[[[5. 7.] | |||
| [[[[ 5. 7.] | |||
| [13. 15.]]]] | |||
| """ | |||
| @@ -72,7 +73,8 @@ class AdaptiveMaxPool2d(_AdaptivePoolNd): | |||
| class AdaptiveAvgPool2d(_AdaptivePoolNd): | |||
| r"""Applies a 2D average pooling over an input. | |||
| r""" | |||
| Applies a 2D average pooling over an input. | |||
| For instance, given an input of the size :math:`(N, C, H, W)` and | |||
| an output shape :math:`(OH, OW)`, this layer generates the output of | |||
| @@ -105,7 +107,7 @@ class AdaptiveAvgPool2d(_AdaptivePoolNd): | |||
| .. testoutput:: | |||
| [[[[2.5 4.5] | |||
| [[[[ 2.5 4.5] | |||
| [10.5 12.5]]]] | |||
| """ | |||
| @@ -87,7 +87,8 @@ class _ConvNd(Module): | |||
| class Conv2d(_ConvNd): | |||
| r"""Applies a 2D convolution over an input tensor. | |||
| r""" | |||
| Applies a 2D convolution over an input tensor. | |||
| For instance, given an input of the size :math:`(N, C_{\text{in}}, H, W)`, | |||
| this layer generates an output of the size | |||
| @@ -145,7 +146,7 @@ class Conv2d(_ConvNd): | |||
| m = M.Conv2d(in_channels=3, out_channels=1, kernel_size=3) | |||
| inp = mge.tensor(np.arange(0, 96).astype("float32").reshape(2, 3, 4, 4)) | |||
| oup = m(inp) | |||
| print(oup.shape) | |||
| print(oup.numpy().shape) | |||
| Outputs: | |||
| @@ -232,7 +233,8 @@ class Conv2d(_ConvNd): | |||
| class ConvTranspose2d(_ConvNd): | |||
| r"""Applies a 2D transposed convolution over an input tensor. | |||
| r""" | |||
| Applies a 2D transposed convolution over an input tensor. | |||
| This module is also known as a deconvolution or a fractionally-strided convolution. | |||
| :class:`ConvTranspose2d` can be seen as the gradient of :class:`Conv2d` operation | |||
| @@ -340,7 +342,8 @@ class ConvTranspose2d(_ConvNd): | |||
| class LocalConv2d(Conv2d): | |||
| r"""Applies a spatial convolution with untied kernels over an groupped channeled input 4D tensor. | |||
| r""" | |||
| Applies a spatial convolution with untied kernels over an groupped channeled input 4D tensor. | |||
| It is also known as the locally connected layer. | |||
| :param in_channels: number of input channels. | |||
| @@ -11,7 +11,8 @@ from .module import Module | |||
| class Dropout(Module): | |||
| r"""Randomly sets input elements to zeros with the probability :math:`drop\_prob` during training. | |||
| r""" | |||
| Randomly sets input elements to zeros with the probability :math:`drop\_prob` during training. | |||
| Commonly used in large networks to prevent overfitting. | |||
| Note that we perform dropout only during training, we also rescale(multiply) the output tensor | |||
| by :math:`\frac{1}{1 - drop\_prob}`. During inference :class:`~.Dropout` is equal to :class:`~.Identity`. | |||
| @@ -93,7 +93,7 @@ class Embedding(Module): | |||
| ) | |||
| self.reset_parameters() | |||
| else: | |||
| if initial_weight.shape != (num_embeddings, embedding_dim): | |||
| if initial_weight.numpy().shape != (num_embeddings, embedding_dim): | |||
| raise ValueError( | |||
| "The weight shape should match num_embeddings and embedding_dim" | |||
| ) | |||
| @@ -18,7 +18,8 @@ from ..tensor import Tensor | |||
| def fill_(tensor: Tensor, val: Union[float, int]) -> None: | |||
| """Fills the given ``tensor`` with value ``val``. | |||
| """ | |||
| Fills the given ``tensor`` with value ``val``. | |||
| :param tensor: tensor to be initialized. | |||
| :param val: value to be filled throughout the tensor. | |||
| @@ -27,7 +28,8 @@ def fill_(tensor: Tensor, val: Union[float, int]) -> None: | |||
| def zeros_(tensor: Tensor) -> None: | |||
| """Fills the given ``tensor`` with scalar value `0`. | |||
| """ | |||
| Fills the given ``tensor`` with scalar value `0`. | |||
| :param tensor: tensor to be initialized. | |||
| """ | |||
| @@ -35,7 +37,8 @@ def zeros_(tensor: Tensor) -> None: | |||
| def ones_(tensor: Tensor) -> None: | |||
| """Fills the given ``tensor`` with the scalar value `1`. | |||
| """ | |||
| Fills the given ``tensor`` with the scalar value `1`. | |||
| :param tensor: tensor to be initialized. | |||
| """ | |||
| @@ -43,7 +46,8 @@ def ones_(tensor: Tensor) -> None: | |||
| def uniform_(tensor: Tensor, a: float = 0.0, b: float = 1.0) -> None: | |||
| r"""Fills the given ``tensor`` with random value sampled from uniform distribution | |||
| r""" | |||
| Fills the given ``tensor`` with random value sampled from uniform distribution | |||
| :math:`\mathcal{U}(\text{a}, \text{b})`. | |||
| :param tensor: tensor to be initialized. | |||
| @@ -54,7 +58,8 @@ def uniform_(tensor: Tensor, a: float = 0.0, b: float = 1.0) -> None: | |||
| def normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> None: | |||
| r"""Fills the given ``tensor`` with random value sampled from normal distribution | |||
| r""" | |||
| Fills the given ``tensor`` with random value sampled from normal distribution | |||
| :math:`\mathcal{N}(\text{mean}, \text{std}^2)`. | |||
| :param tensor: tensor to be initialized. | |||
| @@ -67,7 +72,8 @@ def normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> None: | |||
| def calculate_gain( | |||
| nonlinearity: str, param: Optional[Union[int, float]] = None | |||
| ) -> float: | |||
| r"""Returns a recommended gain value (see the table below) for the given nonlinearity | |||
| r""" | |||
| Returns a recommended gain value (see the table below) for the given nonlinearity | |||
| function. | |||
| ================= ==================================================== | |||
| @@ -168,7 +174,8 @@ def calculate_correct_fan(tensor: Tensor, mode: str) -> float: | |||
| def xavier_uniform_(tensor: Tensor, gain: float = 1.0) -> None: | |||
| r"""Fills tensor with random values sampled from :math:`\mathcal{U}(-a, a)` | |||
| r""" | |||
| Fills tensor with random values sampled from :math:`\mathcal{U}(-a, a)` | |||
| where | |||
| .. math:: | |||
| @@ -188,7 +195,8 @@ def xavier_uniform_(tensor: Tensor, gain: float = 1.0) -> None: | |||
| def xavier_normal_(tensor: Tensor, gain: float = 1.0) -> None: | |||
| r"""Fills tensor with random values sampled from | |||
| r""" | |||
| Fills tensor with random values sampled from | |||
| :math:`\mathcal{N}(0, \text{std}^2)` where | |||
| .. math:: | |||
| @@ -209,7 +217,8 @@ def xavier_normal_(tensor: Tensor, gain: float = 1.0) -> None: | |||
| def msra_uniform_( | |||
| tensor: Tensor, a: float = 0, mode: str = "fan_in", nonlinearity: str = "leaky_relu" | |||
| ) -> None: | |||
| r"""Fills tensor wilth random values sampled from | |||
| r""" | |||
| Fills tensor wilth random values sampled from | |||
| :math:`\mathcal{U}(-\text{bound}, \text{bound})` where | |||
| .. math:: | |||
| @@ -238,7 +247,8 @@ def msra_uniform_( | |||
| def msra_normal_( | |||
| tensor: Tensor, a: float = 0, mode: str = "fan_in", nonlinearity: str = "leaky_relu" | |||
| ) -> None: | |||
| r"""Fills tensor wilth random values sampled from | |||
| r""" | |||
| Fills tensor wilth random values sampled from | |||
| :math:`\mathcal{N}(0, \text{std}^2)` where | |||
| .. math:: | |||
| @@ -14,7 +14,8 @@ from .module import Module | |||
| class Linear(Module): | |||
| r"""Applies a linear transformation to the input. For instance, if input | |||
| r""" | |||
| Applies a linear transformation to the input. For instance, if input | |||
| is x, then output y is: | |||
| .. math:: | |||
| @@ -39,7 +40,7 @@ class Linear(Module): | |||
| m = M.Linear(in_features=3, out_features=1) | |||
| inp = mge.tensor(np.arange(0, 6).astype("float32").reshape(2, 3)) | |||
| oup = m(inp) | |||
| print(oup.shape) | |||
| print(oup.numpy().shape) | |||
| Outputs: | |||
| @@ -57,7 +57,8 @@ def _is_module(obj): | |||
| class Module(metaclass=ABCMeta): | |||
| """Base Module class. | |||
| """ | |||
| Base Module class. | |||
| """ | |||
| def __init__(self): | |||
| @@ -76,7 +77,8 @@ class Module(metaclass=ABCMeta): | |||
| pass | |||
| def register_forward_pre_hook(self, hook: Callable) -> HookHandler: | |||
| """Registers a hook to handle forward inputs. `hook` should be a function. | |||
| """ | |||
| Registers a hook to handle forward inputs. `hook` should be a function. | |||
| :param hook: a function that receive `module` and `inputs`, then return | |||
| a modified `inputs` or `None`. | |||
| @@ -85,7 +87,8 @@ class Module(metaclass=ABCMeta): | |||
| return HookHandler(self._forward_pre_hooks, hook) | |||
| def register_forward_hook(self, hook: Callable) -> HookHandler: | |||
| """Registers a hook to handle forward results. `hook` should be a function that | |||
| """ | |||
| Registers a hook to handle forward results. `hook` should be a function that | |||
| receive `module`, `inputs` and `outputs`, then return a modified `outputs` or `None`. | |||
| This method return a handler with :meth:`~.HookHandler.remove` interface to delete the hook. | |||
| @@ -118,7 +121,8 @@ class Module(metaclass=ABCMeta): | |||
| predicate: Callable[[Any], bool] = lambda _: True, | |||
| seen: Optional[Set[int]] = None | |||
| ) -> Union[Iterable[Any], Iterable[Tuple[str, Any]]]: | |||
| """Scans the module object and returns an iterable for the :class:`~.Tensor` | |||
| """ | |||
| Scans the module object and returns an iterable for the :class:`~.Tensor` | |||
| and :class:`~.Module` attributes that agree with the ``predicate``. For multiple | |||
| calls of this function with same arguments, the order of objects within the | |||
| returned iterable is guaranteed to be identical, as long as all the involved | |||
| @@ -165,7 +169,8 @@ class Module(metaclass=ABCMeta): | |||
| ) | |||
| def parameters(self, recursive: bool = True, **kwargs) -> Iterable[Parameter]: | |||
| r"""Returns an iterable for the :class:`~.Parameter` of the module. | |||
| r""" | |||
| Returns an iterable for the :class:`~.Parameter` of the module. | |||
| :param recursive: If ``True``, returns all :class:`~.Parameter` within this | |||
| module, else only returns :class:`~.Parameter` that are direct attributes | |||
| @@ -190,7 +195,8 @@ class Module(metaclass=ABCMeta): | |||
| def named_parameters( | |||
| self, prefix: Optional[str] = None, recursive: bool = True, **kwargs | |||
| ) -> Iterable[Tuple[str, Parameter]]: | |||
| """Returns an iterable for key :class:`~.Parameter` pairs of the module, where | |||
| """ | |||
| Returns an iterable for key :class:`~.Parameter` pairs of the module, where | |||
| ``key`` is the dotted path from this module to the :class:`~.Parameter`. | |||
| :param prefix: prefix prepended to the keys. | |||
| @@ -219,7 +225,8 @@ class Module(metaclass=ABCMeta): | |||
| ) | |||
| def buffers(self, recursive: bool = True, **kwargs) -> Iterable[Tensor]: | |||
| """Returns an iterable for the buffers of the module. | |||
| """ | |||
| Returns an iterable for the buffers of the module. | |||
| Buffer is defined to be :class:`~.Tensor` excluding :class:`~.Parameter`. | |||
| @@ -234,7 +241,8 @@ class Module(metaclass=ABCMeta): | |||
| def named_buffers( | |||
| self, prefix: Optional[str] = None, recursive: bool = True, **kwargs | |||
| ) -> Iterable[Tuple[str, Tensor]]: | |||
| """Returns an iterable for key buffer pairs of the module, where | |||
| """ | |||
| Returns an iterable for key buffer pairs of the module, where | |||
| ``key`` is the dotted path from this module to the buffer. | |||
| Buffer is defined to be :class:`~.Tensor` excluding :class:`~.Parameter`. | |||
| @@ -253,7 +261,8 @@ class Module(metaclass=ABCMeta): | |||
| ) | |||
| def children(self, **kwargs) -> "Iterable[Module]": | |||
| """Returns an iterable for all the submodules that are direct attributes of this | |||
| """ | |||
| Returns an iterable for all the submodules that are direct attributes of this | |||
| module. | |||
| """ | |||
| yield from self._flatten( | |||
| @@ -261,7 +270,8 @@ class Module(metaclass=ABCMeta): | |||
| ) | |||
| def named_children(self, **kwargs) -> "Iterable[Tuple[str, Module]]": | |||
| """Returns an iterable of key-submodule pairs for all the submodules that are | |||
| """ | |||
| Returns an iterable of key-submodule pairs for all the submodules that are | |||
| direct attributes of this module, where 'key' is the attribute name of | |||
| submodules. | |||
| """ | |||
| @@ -270,7 +280,8 @@ class Module(metaclass=ABCMeta): | |||
| ) | |||
| def modules(self, **kwargs) -> "Iterable[Module]": | |||
| """Returns an iterable for all the modules within this module, including itself. | |||
| """ | |||
| Returns an iterable for all the modules within this module, including itself. | |||
| """ | |||
| if "with_parent" in kwargs and kwargs["with_parent"]: | |||
| yield self, None | |||
| @@ -281,7 +292,8 @@ class Module(metaclass=ABCMeta): | |||
| def named_modules( | |||
| self, prefix: Optional[str] = None, **kwargs | |||
| ) -> "Iterable[Tuple[str, Module]]": | |||
| """Returns an iterable of key-module pairs for all the modules within this | |||
| """ | |||
| Returns an iterable of key-module pairs for all the modules within this | |||
| module, including itself, where 'key' is the dotted path from this module to the | |||
| submodules. | |||
| @@ -296,7 +308,8 @@ class Module(metaclass=ABCMeta): | |||
| ) | |||
| def apply(self, fn: "Callable[[Module], Any]") -> None: | |||
| """Applies function ``fn`` to all the modules within this module, including | |||
| """ | |||
| Applies function ``fn`` to all the modules within this module, including | |||
| itself. | |||
| :param fn: the function to be applied on modules. | |||
| @@ -306,14 +319,16 @@ class Module(metaclass=ABCMeta): | |||
| @deprecated(version="1.0") | |||
| def zero_grad(self) -> None: | |||
| """Sets all parameters' grads to zero | |||
| """ | |||
| Sets all parameters' grads to zero | |||
| """ | |||
| for param in self.parameters(): | |||
| if param.grad is not None: | |||
| param.grad.reset_zero() | |||
| def train(self, mode: bool = True, recursive: bool = True) -> None: | |||
| """Sets training mode of all the modules within this module (including itself) to | |||
| """ | |||
| Sets training mode of all the modules within this module (including itself) to | |||
| ``mode``. This effectively sets the ``training`` attributes of those modules | |||
| to ``mode``, but only has effect on certain modules (e.g. | |||
| :class:`~.BatchNorm2d`, :class:`~.Dropout`, :class:`~.Observer`) | |||
| @@ -331,7 +346,8 @@ class Module(metaclass=ABCMeta): | |||
| self.apply(fn) | |||
| def eval(self) -> None: | |||
| """Sets training mode of all the modules within this module (including itself) to | |||
| """ | |||
| Sets training mode of all the modules within this module (including itself) to | |||
| ``False``. See :meth:`~.Module.train` for details. | |||
| """ | |||
| self.train(False) | |||
| @@ -351,7 +367,8 @@ class Module(metaclass=ABCMeta): | |||
| def replace_param( | |||
| self, params: dict, start_pos: int, seen: Optional[Set[int]] = None | |||
| ): | |||
| """Replaces module's parameters with ``params``, used by :class:`~.ParamPack` to | |||
| """ | |||
| Replaces module's parameters with ``params``, used by :class:`~.ParamPack` to | |||
| speedup multimachine training. | |||
| """ | |||
| offset = 0 | |||
| @@ -377,7 +394,8 @@ class Module(metaclass=ABCMeta): | |||
| return offset | |||
| def state_dict(self, rst=None, prefix="", keep_var=False): | |||
| r"""Returns a dictionary containing whole states of the module. | |||
| r""" | |||
| Returns a dictionary containing whole states of the module. | |||
| """ | |||
| def is_state(obj): | |||
| @@ -407,7 +425,8 @@ class Module(metaclass=ABCMeta): | |||
| state_dict: Union[dict, Callable[[str, Tensor], Optional[np.ndarray]]], | |||
| strict=True, | |||
| ): | |||
| r"""Loads a given dictionary created by :func:`state_dict` into this module. | |||
| r""" | |||
| Loads a given dictionary created by :func:`state_dict` into this module. | |||
| If ``strict`` is ``True``, the keys of :func:`state_dict` must exactly match the keys | |||
| returned by :func:`state_dict`. | |||
| @@ -485,7 +504,8 @@ class Module(metaclass=ABCMeta): | |||
| ) | |||
| def _load_state_dict_with_closure(self, closure): | |||
| """Advance state_dict load through callable ``closure`` whose signature is | |||
| """ | |||
| Advance state_dict load through callable ``closure`` whose signature is | |||
| ``closure(key: str, var: Tensor) -> Union[np.ndarry, None]`` | |||
| """ | |||
| assert callable(closure), "closure must be a function" | |||
| @@ -536,7 +556,8 @@ class Module(metaclass=ABCMeta): | |||
| super().__delattr__(name) | |||
| def _module_info_string(self) -> str: | |||
| r"""Set the extra representation of the module. | |||
| r""" | |||
| Set the extra representation of the module. | |||
| """ | |||
| return "" | |||
| @@ -36,7 +36,8 @@ class _PoolNd(Module): | |||
| class MaxPool2d(_PoolNd): | |||
| r"""Applies a 2D max pooling over an input. | |||
| r""" | |||
| Applies a 2D max pooling over an input. | |||
| For instance, given an input of the size :math:`(N, C, H, W)` and | |||
| :attr:`kernel_size` :math:`(kH, kW)`, this layer generates the output of | |||
| @@ -83,7 +84,8 @@ class MaxPool2d(_PoolNd): | |||
| class AvgPool2d(_PoolNd): | |||
| r"""Applies a 2D average pooling over an input. | |||
| r""" | |||
| Applies a 2D average pooling over an input. | |||
| For instance, given an input of the size :math:`(N, C, H, W)` and | |||
| :attr:`kernel_size` :math:`(kH, kW)`, this layer generates the output of | |||
| @@ -19,7 +19,8 @@ from .module import QuantizedModule | |||
| class Conv2d(Float.Conv2d, QuantizedModule): | |||
| r"""Quantized version of :class:`~.qat.conv.Conv2d`.""" | |||
| r"""Applies a 2D convolution over a quantized input tensor, used for inference only. | |||
| r""" | |||
| Applies a 2D convolution over a quantized input tensor, used for inference only. | |||
| The parameter is same with :class: `~.Conv2d`. | |||
| """ | |||
| @@ -11,7 +11,8 @@ from .conv import Conv2d | |||
| class _ConvBnActivation2d(Conv2d): | |||
| r"""Applies a 2D convolution over a quantized input tensor, used for inference only. | |||
| r""" | |||
| Applies a 2D convolution over a quantized input tensor, used for inference only. | |||
| The parameter is same with :class: `~.Conv2d`. | |||
| """ | |||
| @@ -12,7 +12,8 @@ from .module import Module | |||
| class Sequential(Module): | |||
| r"""A sequential container. | |||
| r""" | |||
| A sequential container. | |||
| Modules will be added to it in the order they are passed in the constructor. | |||
| Alternatively, an ordered dict of modules can also be passed in. | |||
| @@ -29,10 +30,9 @@ class Sequential(Module): | |||
| from collections import OrderedDict | |||
| batch_size = 64 | |||
| data = mge.tensor(np.zeros((batch_size, 1, 28, 28)), dtype=np.float32) | |||
| data = mge.tensor(np.zeros((batch_size, 28 * 28)), dtype=np.float32) | |||
| label = mge.tensor(np.zeros(batch_size,), dtype=np.int32) | |||
| data = data.reshape(batch_size, -1) | |||
| net0 = M.Sequential( | |||
| M.Linear(28 * 28, 320), | |||
| M.Linear(320, 10) | |||
| @@ -40,10 +40,9 @@ class Sequential(Module): | |||
| pred0 = net0(data) | |||
| modules = OrderedDict() | |||
| modules["fc0"] = nn.Linear(28 * 28, 320) | |||
| modules["fc1"] = nn.Linear(320, 10) | |||
| net1 = nn.Sequential(modules) | |||
| modules["fc0"] = M.Linear(28 * 28, 320) | |||
| modules["fc1"] = M.Linear(320, 10) | |||
| net1 = M.Sequential(modules) | |||
| pred1 = net1(data) | |||
| """ | |||
| @@ -16,7 +16,8 @@ from .optimizer import Optimizer | |||
| class Adadelta(Optimizer): | |||
| r"""Implements Adadelta algorithm. | |||
| r""" | |||
| Implements Adadelta algorithm. | |||
| It has been proposed in `"ADADELTA: An Adaptive Learning Rate Method" <https://arxiv.org/abs/1212.5701>`_. | |||
| @@ -16,7 +16,8 @@ from .optimizer import Optimizer | |||
| class Adagrad(Optimizer): | |||
| r"""Implements Adagrad algorithm. | |||
| r""" | |||
| Implements Adagrad algorithm. | |||
| It has been proposed in `"Adaptive Subgradient Methods for Online Learning | |||
| and Stochastic Optimization" <http://jmlr.org/papers/v12/duchi11a.html>`_. | |||
| @@ -13,7 +13,8 @@ from .optimizer import Optimizer | |||
| class Adam(Optimizer): | |||
| r"""Implements Adam algorithm proposed in `"Adam: A Method for Stochastic Optimization" <https://arxiv.org/abs/1412.6980>`_. | |||
| r""" | |||
| Implements Adam algorithm proposed in `"Adam: A Method for Stochastic Optimization" <https://arxiv.org/abs/1412.6980>`_. | |||
| :param params: iterable of parameters to optimize or dicts defining | |||
| parameter groups. | |||
| @@ -12,7 +12,8 @@ from .optimizer import Optimizer | |||
| class LRScheduler(metaclass=ABCMeta): | |||
| r"""Base class for all learning rate based schedulers. | |||
| r""" | |||
| Base class for all learning rate based schedulers. | |||
| :param optimizer: wrapped optimizer. | |||
| :param current_epoch: the index of current epoch. Default: -1 | |||
| @@ -44,14 +45,16 @@ class LRScheduler(metaclass=ABCMeta): | |||
| self.step() | |||
| def state_dict(self): | |||
| r"""Returns the state of the scheduler as a :class:`dict`. | |||
| r""" | |||
| Returns the state of the scheduler as a :class:`dict`. | |||
| It contains an entry for every variable in self.__dict__ which | |||
| is not the optimizer. | |||
| """ | |||
| raise NotImplementedError | |||
| def load_state_dict(self, state_dict): | |||
| r"""Loads the schedulers state. | |||
| r""" | |||
| Loads the schedulers state. | |||
| :type state_dict: dict | |||
| :param state_dict: scheduler state. | |||
| @@ -14,7 +14,8 @@ from .optimizer import Optimizer | |||
| class MultiStepLR(LRScheduler): | |||
| r"""Decays the learning rate of each parameter group by gamma once the | |||
| r""" | |||
| Decays the learning rate of each parameter group by gamma once the | |||
| number of epoch reaches one of the milestones. | |||
| :param optimizer: wrapped optimizer. | |||
| @@ -44,7 +45,8 @@ class MultiStepLR(LRScheduler): | |||
| super().__init__(optimizer, current_epoch) | |||
| def state_dict(self): | |||
| r"""Returns the state of the scheduler as a :class:`dict`. | |||
| r""" | |||
| Returns the state of the scheduler as a :class:`dict`. | |||
| It contains an entry for every variable in self.__dict__ which | |||
| is not the optimizer. | |||
| """ | |||
| @@ -55,7 +57,8 @@ class MultiStepLR(LRScheduler): | |||
| } | |||
| def load_state_dict(self, state_dict): | |||
| r"""Loads the schedulers state. | |||
| r""" | |||
| Loads the schedulers state. | |||
| :type state_dict: dict | |||
| :param state_dict: scheduler state. | |||
| @@ -28,7 +28,8 @@ required = _RequiredParameter() | |||
| class Optimizer(metaclass=ABCMeta): | |||
| r"""Base class for all optimizers. | |||
| r""" | |||
| Base class for all optimizers. | |||
| :param params: specifies what Tensors should be optimized. | |||
| :param defaults: a dict of default parameters of Optimizer, like learning rate or momentum. | |||
| @@ -72,7 +73,8 @@ class Optimizer(metaclass=ABCMeta): | |||
| self._create_state(group) | |||
| def add_param_group(self, param_group: dict): | |||
| r"""Add a param group to ``param_groups`` of the :class:`~megengine.optim.optimizer.Optimizer`. | |||
| r""" | |||
| Add a param group to ``param_groups`` of the :class:`~megengine.optim.optimizer.Optimizer`. | |||
| This can be useful when fine tuning a pre-trained network as frozen layers can be made | |||
| trainable and added to the :class:`~megengine.optim.optimizer.Optimizer` as training progresses. | |||
| @@ -137,7 +139,8 @@ class Optimizer(metaclass=ABCMeta): | |||
| return params | |||
| def step(self): | |||
| r"""Performs a single optimization step. | |||
| r""" | |||
| Performs a single optimization step. | |||
| """ | |||
| for group in self.param_groups: | |||
| @@ -158,14 +161,16 @@ class Optimizer(metaclass=ABCMeta): | |||
| param.grad.reset_zero() | |||
| def clear_grad(self): | |||
| r"""Set the grad attribute to None for all parameters. | |||
| r""" | |||
| Set the grad attribute to None for all parameters. | |||
| """ | |||
| for param_group in self.param_groups: | |||
| for param in param_group["params"]: | |||
| param.grad = None | |||
| def state_dict(self) -> Dict: | |||
| r"""Export the optimizer state. | |||
| r""" | |||
| Export the optimizer state. | |||
| :return: optimizer state. Can be loaded by :meth:`load_state_dict`. | |||
| """ | |||
| @@ -191,7 +196,8 @@ class Optimizer(metaclass=ABCMeta): | |||
| return {"param_groups": param_groups, "state": state} | |||
| def load_state_dict(self, state: dict): | |||
| r"""Loads the optimizer state. | |||
| r""" | |||
| Loads the optimizer state. | |||
| :param state: optimizer state. Should be an object returned | |||
| from a call to :meth:`state_dict`. | |||
| @@ -13,7 +13,8 @@ from .optimizer import Optimizer | |||
| class SGD(Optimizer): | |||
| r"""Implements stochastic gradient descent. | |||
| r""" | |||
| Implements stochastic gradient descent. | |||
| Nesterov momentum is based on the formula from | |||
| `"On the importance of initialization and momentum in deep learning" <http://www.cs.toronto.edu/%7Ehinton/absps/momentum.pdf>`_ . | |||
| @@ -174,7 +174,8 @@ class HistogramObserver(MinMaxObserver): | |||
| self.histogram = Tensor([-1] + [0.0] * (bins - 1), dtype="float32") | |||
| def _non_linear_param_search(self): | |||
| r"""Non-linear parameter search. | |||
| r""" | |||
| Non-linear parameter search. | |||
| An approximation for L2 error minimization for selecting min/max. | |||
| By selecting new min/max, we filter out outliers in input distribution. | |||
| """ | |||
| @@ -43,7 +43,8 @@ def register_method_to_class(cls): | |||
| class QuantMode(Enum): | |||
| """Quantization mode enumerate class. | |||
| """ | |||
| Quantization mode enumerate class. | |||
| """ | |||
| SYMMERTIC = 1 | |||
| @@ -63,13 +64,15 @@ qparam_dict = { | |||
| def get_qparam_dict(mode: QuantMode): | |||
| """Return the quantization parameters dictionary according to the mode. | |||
| """ | |||
| Return the quantization parameters dictionary according to the mode. | |||
| """ | |||
| return qparam_dict.get(mode, None) | |||
| def fake_quant_tensor(inp: Tensor, qmin: int, qmax: int, q_dict: Dict) -> Tensor: | |||
| """Apply fake quantization to the inp tensor. | |||
| """ | |||
| Apply fake quantization to the inp tensor. | |||
| :param inp: the input tensor which need to be faked. | |||
| :param qmin: the minimum value which the integer limit to. | |||
| @@ -91,7 +94,8 @@ def fake_quant_tensor(inp: Tensor, qmin: int, qmax: int, q_dict: Dict) -> Tensor | |||
| def fake_quant_bias(bias: Tensor, inp: Tensor, w_qat: Tensor) -> Tensor: | |||
| """Apply fake quantization to bias, with the special scale from input tensor | |||
| """ | |||
| Apply fake quantization to bias, with the special scale from input tensor | |||
| and weight tensor, the quantized type set to qint32 also. | |||
| :param bias: the bias tensor which need to be faked. | |||
| @@ -21,7 +21,8 @@ __all__ = ["normal", "uniform"] | |||
| def normal( | |||
| mean: float = 0, std: float = 1, size: Optional[Iterable[int]] = None | |||
| ) -> Tensor: | |||
| r"""Random variable with Gaussian distribution :math:`N(\mu, \sigma)`. | |||
| r""" | |||
| Random variable with Gaussian distribution :math:`N(\mu, \sigma)`. | |||
| :param size: output tensor size. | |||
| :param mean: the mean or expectation of the distribution. | |||
| @@ -59,7 +60,8 @@ def normal( | |||
| def uniform( | |||
| low: float = 0, high: float = 1, size: Optional[Iterable[int]] = None | |||
| ) -> Tensor: | |||
| r"""Random variable with uniform distribution $U(0, 1)$. | |||
| r""" | |||
| Random variable with uniform distribution $U(0, 1)$. | |||
| :param size: output tensor size. | |||
| :param low: lower range. | |||
| @@ -14,7 +14,8 @@ from .utils.max_recursion_limit import max_recursion_limit | |||
| def save(obj, f, pickle_module=pickle, pickle_protocol=pickle.HIGHEST_PROTOCOL): | |||
| r"""Save an object to disk file. | |||
| r""" | |||
| Save an object to disk file. | |||
| :type obj: object | |||
| :param obj: object to save. Only ``module`` or ``state_dict`` are allowed. | |||
| @@ -81,7 +82,8 @@ def _get_callable_map_location(map_location): | |||
| def load(f, map_location=None, pickle_module=pickle): | |||
| r"""Load an object saved with save() from a file. | |||
| r""" | |||
| Load an object saved with save() from a file. | |||
| :type f: text file object | |||
| :param f: a string of file name or a text file object from which to load. | |||
| @@ -97,5 +97,6 @@ tensor = Tensor | |||
| class Parameter(Tensor): | |||
| r"""A kind of Tensor that is to be considered a module parameter. | |||
| r""" | |||
| A kind of Tensor that is to be considered a module parameter. | |||
| """ | |||
| @@ -17,7 +17,8 @@ from ..core.tensor.raw_tensor import as_raw_tensor | |||
| def get_dep_vars(var: VarNode, var_type: str = None) -> List[VarNode]: | |||
| """Returns :class:`.tensor.core.megbrain_graph.VarNode` of type ``var_type`` that input ``var`` | |||
| """ | |||
| Returns :class:`.tensor.core.megbrain_graph.VarNode` of type ``var_type`` that input ``var`` | |||
| depands on. If ``var_type`` is None, returns all types. | |||
| """ | |||
| outputs = [] | |||
| @@ -46,14 +47,16 @@ def get_dep_vars(var: VarNode, var_type: str = None) -> List[VarNode]: | |||
| def get_owner_opr_inputs(var: VarNode) -> List[VarNode]: | |||
| """Gets the inputs of owner opr of a variable. | |||
| """ | |||
| Gets the inputs of owner opr of a variable. | |||
| """ | |||
| assert isinstance(var, VarNode) | |||
| return var.owner.inputs | |||
| def get_owner_opr_type(var: VarNode) -> str: | |||
| """Gets the type of owner opr of a variable. | |||
| """ | |||
| Gets the type of owner opr of a variable. | |||
| """ | |||
| assert isinstance(var, VarNode) | |||
| @@ -61,14 +64,16 @@ def get_owner_opr_type(var: VarNode) -> str: | |||
| def get_opr_type(opr: OperatorNode) -> str: | |||
| """Gets the type of an opr. | |||
| """ | |||
| Gets the type of an opr. | |||
| """ | |||
| assert isinstance(opr, OperatorNode) | |||
| return opr.type | |||
| def graph_traversal(outputs: VarNode): | |||
| """Helper function to traverse the computing graph and return enough useful information. | |||
| """ | |||
| Helper function to traverse the computing graph and return enough useful information. | |||
| :param outputs: model outputs. | |||
| :return: tuple (map_oprs, map_vars, var2oprs, opr2receivers, indegree2opr, opr2indegree) | |||
| @@ -124,7 +129,8 @@ def graph_traversal(outputs: VarNode): | |||
| def get_oprs_seq(outputs: List[VarNode], prune_reshape=False) -> List[OperatorNode]: | |||
| """Gets oprs in some topological order for a dumped model. | |||
| """ | |||
| Gets oprs in some topological order for a dumped model. | |||
| :param outputs: model outputs. | |||
| :param prune_reshape: whether to prune the useless operators during inference. | |||
| @@ -194,7 +200,8 @@ def get_oprs_seq(outputs: List[VarNode], prune_reshape=False) -> List[OperatorNo | |||
| def replace_vars(dst: VarNode, varmap: Dict[VarNode, VarNode]) -> List[VarNode]: | |||
| """Replaces vars in the graph. | |||
| """ | |||
| Replaces vars in the graph. | |||
| :param dst: target vars representing the graph. | |||
| :param varmap: the map that specifies how to replace the vars. | |||
| @@ -221,7 +228,8 @@ def replace_vars(dst: VarNode, varmap: Dict[VarNode, VarNode]) -> List[VarNode]: | |||
| def replace_oprs( | |||
| dst: List[VarNode], oprmap: Dict[OperatorNode, OperatorNode] | |||
| ) -> List[VarNode]: | |||
| """Replaces operators in the graph. | |||
| """ | |||
| Replaces operators in the graph. | |||
| :param dst: target vars representing the graph. | |||
| :param oprmap: the map that specifies how to replace the operators. | |||
| @@ -246,7 +254,8 @@ def replace_oprs( | |||
| def set_priority_to_id(dest_vars): | |||
| """For all oprs in the subgraph constructed by dest_vars, | |||
| """ | |||
| For all oprs in the subgraph constructed by dest_vars, | |||
| sets its priority to id if its original priority is zero. | |||
| :param dest_vars: target vars representing the graph. | |||
| """ | |||
| @@ -258,7 +267,8 @@ def set_priority_to_id(dest_vars): | |||
| def load_and_inference(file, inp_data_list: List[numpy.ndarray]) -> List[numpy.ndarray]: | |||
| """Loads a serialized computing graph and run inference with input data. | |||
| """ | |||
| Loads a serialized computing graph and run inference with input data. | |||
| :param file: path or handle of the input file. | |||
| :param inp_data_list: list of input data. | |||
| @@ -16,7 +16,8 @@ if platform.system() != "Windows": | |||
| class AlternativeRecursionLimit: | |||
| r"""A reentrant context manager for setting global recursion limits. | |||
| r""" | |||
| A reentrant context manager for setting global recursion limits. | |||
| """ | |||
| def __init__(self, new_py_limit): | |||
| @@ -73,6 +74,7 @@ _max_recursion_limit_context_manager = AlternativeRecursionLimit(2 ** 31 - 1) | |||
| def max_recursion_limit(): | |||
| r"""Sets recursion limit to the max possible value. | |||
| r""" | |||
| Sets recursion limit to the max possible value. | |||
| """ | |||
| return _max_recursion_limit_context_manager | |||
| @@ -12,7 +12,8 @@ import numpy as np | |||
| def load_tensor_binary(fobj): | |||
| """Load a tensor dumped by the :class:`BinaryOprIODump` plugin; the actual | |||
| """ | |||
| Load a tensor dumped by the :class:`BinaryOprIODump` plugin; the actual | |||
| tensor value dump is implemented by ``mgb::debug::dump_tensor``. | |||
| Multiple values can be compared by ``tools/compare_binary_iodump.py``. | |||
| @@ -57,7 +57,8 @@ def _tabulate_confluence(tab, **kwargs): | |||
| def main(passed_args=None): # pylint: disable=too-many-statements | |||
| """Analyses profile info from :mod:`~.utils.profile_analyzer` . | |||
| """ | |||
| Analyses profile info from :mod:`~.utils.profile_analyzer` . | |||
| Run this file with ``--help`` to get more usage. | |||
| """ | |||
| @@ -15,7 +15,8 @@ import numpy as np | |||
| class NonExistNum: | |||
| """An object that behaves like a number but means a field does not exist; It is | |||
| """ | |||
| An object that behaves like a number but means a field does not exist; It is | |||
| always greater than any real number. | |||
| """ | |||
| @@ -64,15 +65,18 @@ class OprProfRst: | |||
| """A dict containing operator info: name, id and type.""" | |||
| time_dict = None | |||
| """A mapping from ``"host"`` or ``"device"`` to list of profiling | |||
| """ | |||
| A mapping from ``"host"`` or ``"device"`` to list of profiling | |||
| results.""" | |||
| footprint = None | |||
| """A mapping from ``"memory"`` or ``"computation"`` to the actual number | |||
| """ | |||
| A mapping from ``"memory"`` or ``"computation"`` to the actual number | |||
| of corresponding operations.""" | |||
| def __init__(self, entry: dict): | |||
| """Opr profiling initialization, which sets up name, type and id of opr_info. | |||
| """ | |||
| Opr profiling initialization, which sets up name, type and id of opr_info. | |||
| :param entry: profiling json exec_graph items. | |||
| """ | |||
| @@ -84,7 +88,8 @@ class OprProfRst: | |||
| self.footprint = collections.defaultdict(NonExistNum) | |||
| def update_device_prof_info(self, dev_time: dict): | |||
| """Updates device profiling info. | |||
| """ | |||
| Updates device profiling info. | |||
| :param dev_time: device time for single opr, | |||
| is an attribute of profiling result. | |||
| @@ -93,7 +98,8 @@ class OprProfRst: | |||
| self.time_dict["device"].append(copy.deepcopy(dev_time)) | |||
| def update_host_prof_info(self, host_time: dict): | |||
| """Updates host profiling info. | |||
| """ | |||
| Updates host profiling info. | |||
| :param host_time: host time for single opr, | |||
| is an attribute of profiling result. | |||
| @@ -102,7 +108,8 @@ class OprProfRst: | |||
| self.time_dict["host"].append(copy.deepcopy(host_time)) | |||
| def update_footprint(self, footprint: dict): | |||
| """Updates opr footprint. | |||
| """ | |||
| Updates opr footprint. | |||
| :param footprint: footprint for single opr, | |||
| is an attribute of profiling result. | |||
| @@ -128,7 +135,8 @@ class Record: | |||
| ] | |||
| def __init__(self, time: float, info: dict, footprint: dict): | |||
| """Initializes single record. | |||
| """ | |||
| Initializes single record. | |||
| :param time: opr running time, evaluated by applying users providing | |||
| function to OprProfRst. | |||
| @@ -153,7 +161,8 @@ class Record: | |||
| self.opr_id = int(self.opr_id) | |||
| def get_column_by_name(self, name: str = None): | |||
| """Extracts column value by its column name. | |||
| """ | |||
| Extracts column value by its column name. | |||
| :param name: column name, None for time. | |||
| """ | |||
| @@ -165,7 +174,8 @@ class Record: | |||
| class ProfileAnalyzer: | |||
| def __init__(self, obj: dict, opr_filter: Callable = lambda opr, inp, out: True): | |||
| """Initializes ProfileAnalyzer. | |||
| """ | |||
| Initializes ProfileAnalyzer. | |||
| :param obj: dict dumped from json str. | |||
| :param opr_filter: function that filter oprs. | |||
| @@ -202,7 +212,8 @@ class ProfileAnalyzer: | |||
| def _aggregate( | |||
| self, records: List[Record], aop: Union[str, Callable], atype: Optional[str] | |||
| ) -> List[Record]: | |||
| """Aggregate operation. | |||
| """ | |||
| Aggregate operation. | |||
| :param records: selected records. | |||
| :param aop: aggregate operation, if aop is str, we would replace it | |||
| @@ -247,7 +258,8 @@ class ProfileAnalyzer: | |||
| return rst | |||
| def _sort(self, records: List[Record], sort_by: str) -> List[Record]: | |||
| """Sort operation. | |||
| """ | |||
| Sort operation. | |||
| :param records: the records after aggregate operation. | |||
| :param sort_by: keyword for sorting the list. | |||
| @@ -271,7 +283,8 @@ class ProfileAnalyzer: | |||
| sort_by: str = None, | |||
| top_k: int = 0, | |||
| ) -> List[Record]: | |||
| """Select operation. | |||
| """ | |||
| Select operation. | |||
| :param time_func: time_func provided by user, would apply to every | |||
| OprProfRst. | |||
| @@ -304,7 +317,8 @@ class TimeFuncHelper: | |||
| @staticmethod | |||
| def _eval_time(prof_type, end_key, func, opr_prof): | |||
| """Eval time. | |||
| """ | |||
| Eval time. | |||
| :type prof_type: str | |||
| :param prof_type: 'host' or 'device'. | |||
| @@ -325,7 +339,8 @@ class TimeFuncHelper: | |||
| @staticmethod | |||
| def eval_time_func(prof_type: str, end_key: str, func: Callable) -> float: | |||
| """Eval oprerator profile time. | |||
| """ | |||
| Eval oprerator profile time. | |||
| :param prof_type: 'host' or 'device'. | |||
| :param end_key: 'kern' or 'end'. | |||
| @@ -338,7 +353,8 @@ class TimeFuncHelper: | |||
| def _min_start( | |||
| prof_type, end_key, func, opr_prof | |||
| ): # pylint: disable=unused-argument | |||
| """Eval minimum start time. | |||
| """ | |||
| Eval minimum start time. | |||
| :type prof_type: str | |||
| :param prof_type: 'host' or 'device'. | |||
| @@ -360,7 +376,8 @@ class TimeFuncHelper: | |||
| def min_start_func( | |||
| prof_type: str, end_key: str, func: Callable | |||
| ) -> float: # pylint: disable=unused-argument | |||
| """Eval oprerator profile min start time. | |||
| """ | |||
| Eval oprerator profile min start time. | |||
| :param prof_type: 'host' or 'device'. | |||
| :param end_key: 'kern' or 'end'. | |||
| @@ -371,7 +388,8 @@ class TimeFuncHelper: | |||
| @staticmethod | |||
| def _max_end(prof_type, end_key, func, opr_prof): # pylint: disable=unused-argument | |||
| """Eval maximum end time | |||
| """ | |||
| Eval maximum end time | |||
| :type prof_type: str | |||
| :param prof_type: 'host' or 'device'. | |||
| @@ -391,7 +409,8 @@ class TimeFuncHelper: | |||
| @staticmethod | |||
| def max_end_func(prof_type: str, end_key: str, func: Callable) -> float: | |||
| """Eval oprerator profile max end time. | |||
| """ | |||
| Eval oprerator profile max end time. | |||
| :param prof_type: 'host' or 'device'. | |||
| :param end_key: 'kern' or 'end'. | |||
| @@ -169,7 +169,7 @@ class Profiler: | |||
| Examples: | |||
| .. testcode:: | |||
| .. code-block:: | |||
| import megengine as mge | |||
| import megengine.module as M | |||
| @@ -3,7 +3,8 @@ from ..core._imperative_rt.imperative import sync | |||
| class TensorSanityCheck: | |||
| r"""An object that checks whether the input tensors of each operator have changed before and after the operation. | |||
| r""" | |||
| An object that checks whether the input tensors of each operator have changed before and after the operation. | |||
| Examples: | |||
| @@ -11,7 +11,8 @@ import functools | |||
| def get_ndtuple(value, *, n, allow_zero=True): | |||
| r"""Converts possibly 1D tuple to nd tuple. | |||
| r""" | |||
| Converts possibly 1D tuple to nd tuple. | |||
| :type allow_zero: bool | |||
| :param allow_zero: whether to allow zero tuple value.""" | |||
| @@ -1,12 +0,0 @@ | |||
| # -*- coding: utf-8 -*- | |||
| # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
| # | |||
| # Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, | |||
| # software distributed under the License is distributed on an | |||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| [pytest] | |||
| markers = | |||
| isolated_distributed: marks distributed tests that should runs without cuda use | |||
| in main thread (deselect with '-m "not "isolated_distributed"') | |||
| @@ -1,6 +1,6 @@ | |||
| #!/bin/bash -e | |||
| test_dirs="test megengine" | |||
| test_dirs="megengine test" | |||
| TEST_PLAT=$1 | |||