From: @tiancixiao Reviewed-by: @pandoublefeng,@liucunwei Signed-off-by: @pandoublefeng,@liucunweitags/v1.2.0-rc1
| @@ -1732,10 +1732,7 @@ class MappableDataset(SourceDataset): | |||||
| new_sampler (Sampler): The sampler to use for the current dataset. | new_sampler (Sampler): The sampler to use for the current dataset. | ||||
| Examples: | Examples: | ||||
| >>> # Note: A SequentialSampler is created by default | |||||
| >>> dataset = ds.ImageFolderDataset(image_folder_dataset_dir) | |||||
| >>> | |||||
| >>> # Use a DistributedSampler instead of the SequentialSampler | |||||
| >>> # use a DistributedSampler instead | |||||
| >>> new_sampler = ds.DistributedSampler(10, 2) | >>> new_sampler = ds.DistributedSampler(10, 2) | ||||
| >>> dataset.use_sampler(new_sampler) | >>> dataset.use_sampler(new_sampler) | ||||
| """ | """ | ||||
| @@ -2888,15 +2885,15 @@ class MnistDataset(MappableDataset): | |||||
| The generated dataset has two columns ['image', 'label']. | The generated dataset has two columns ['image', 'label']. | ||||
| The type of the image tensor is uint8. The label is a scalar uint32 tensor. | The type of the image tensor is uint8. The label is a scalar uint32 tensor. | ||||
| This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive. The table | |||||
| This dataset can take in a sampler. `sampler` and `shuffle` are mutually exclusive. The table | |||||
| below shows what input arguments are allowed and their expected behavior. | below shows what input arguments are allowed and their expected behavior. | ||||
| .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle' | .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle' | ||||
| :widths: 25 25 50 | :widths: 25 25 50 | ||||
| :header-rows: 1 | :header-rows: 1 | ||||
| * - Parameter 'sampler' | |||||
| - Parameter 'shuffle' | |||||
| * - Parameter `sampler` | |||||
| - Parameter `shuffle` | |||||
| - Expected Order Behavior | - Expected Order Behavior | ||||
| * - None | * - None | ||||
| - None | - None | ||||
| @@ -2937,19 +2934,19 @@ class MnistDataset(MappableDataset): | |||||
| dataset_dir (str): Path to the root directory that contains the dataset. | dataset_dir (str): Path to the root directory that contains the dataset. | ||||
| usage (str, optional): Usage of this dataset, can be "train", "test" or "all" . "train" will read from 60,000 | usage (str, optional): Usage of this dataset, can be "train", "test" or "all" . "train" will read from 60,000 | ||||
| train samples, "test" will read from 10,000 test samples, "all" will read from all 70,000 samples. | train samples, "test" will read from 10,000 test samples, "all" will read from all 70,000 samples. | ||||
| (default=None, all samples) | |||||
| (default=None, will read all samples) | |||||
| num_samples (int, optional): The number of images to be included in the dataset | num_samples (int, optional): The number of images to be included in the dataset | ||||
| (default=None, all images). | |||||
| (default=None, will read all images). | |||||
| num_parallel_workers (int, optional): Number of workers to read the data | num_parallel_workers (int, optional): Number of workers to read the data | ||||
| (default=None, set in the config). | |||||
| (default=None, will use value set in the config). | |||||
| shuffle (bool, optional): Whether or not to perform shuffle on the dataset | shuffle (bool, optional): Whether or not to perform shuffle on the dataset | ||||
| (default=None, expected order behavior shown in the table). | (default=None, expected order behavior shown in the table). | ||||
| sampler (Sampler, optional): Object used to choose samples from the | sampler (Sampler, optional): Object used to choose samples from the | ||||
| dataset (default=None, expected order behavior shown in the table). | dataset (default=None, expected order behavior shown in the table). | ||||
| num_shards (int, optional): Number of shards that the dataset will be divided into (default=None). | num_shards (int, optional): Number of shards that the dataset will be divided into (default=None). | ||||
| When this argument is specified, 'num_samples' reflects the max sample number of per shard. | |||||
| shard_id (int, optional): The shard ID within num_shards (default=None). This | |||||
| argument can only be specified when num_shards is also specified. | |||||
| When this argument is specified, `num_samples` reflects the max sample number of per shard. | |||||
| shard_id (int, optional): The shard ID within `num_shards` (default=None). This | |||||
| argument can only be specified when `num_shards` is also specified. | |||||
| cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. | cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. | ||||
| (default=None, which means no cache is used). | (default=None, which means no cache is used). | ||||
| @@ -3587,15 +3584,15 @@ class ManifestDataset(MappableDataset): | |||||
| The shape of the image column is [image_size] if decode flag is False, or [H,W,C] | The shape of the image column is [image_size] if decode flag is False, or [H,W,C] | ||||
| otherwise. | otherwise. | ||||
| The type of the image tensor is uint8. The label is a scalar uint64 tensor. | The type of the image tensor is uint8. The label is a scalar uint64 tensor. | ||||
| This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive. The table | |||||
| This dataset can take in a sampler. `sampler` and `shuffle` are mutually exclusive. The table | |||||
| below shows what input arguments are allowed and their expected behavior. | below shows what input arguments are allowed and their expected behavior. | ||||
| .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle' | |||||
| .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle` | |||||
| :widths: 25 25 50 | :widths: 25 25 50 | ||||
| :header-rows: 1 | :header-rows: 1 | ||||
| * - Parameter 'sampler' | |||||
| - Parameter 'shuffle' | |||||
| * - Parameter `sampler` | |||||
| - Parameter `shuffle` | |||||
| - Expected Order Behavior | - Expected Order Behavior | ||||
| * - None | * - None | ||||
| - None | - None | ||||
| @@ -3618,11 +3615,11 @@ class ManifestDataset(MappableDataset): | |||||
| Args: | Args: | ||||
| dataset_file (str): File to be read. | dataset_file (str): File to be read. | ||||
| usage (str, optional): acceptable usages include train, eval and inference (default="train"). | |||||
| usage (str, optional): Acceptable usages include "train", "eval" and "inference" (default="train"). | |||||
| num_samples (int, optional): The number of images to be included in the dataset. | num_samples (int, optional): The number of images to be included in the dataset. | ||||
| (default=None, all images). | |||||
| (default=None, will include all images). | |||||
| num_parallel_workers (int, optional): Number of workers to read the data | num_parallel_workers (int, optional): Number of workers to read the data | ||||
| (default=None, number set in the config). | |||||
| (default=None, will use value set in the config). | |||||
| shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected | shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected | ||||
| order behavior shown in the table). | order behavior shown in the table). | ||||
| sampler (Sampler, optional): Object used to choose samples from the | sampler (Sampler, optional): Object used to choose samples from the | ||||
| @@ -3632,10 +3629,10 @@ class ManifestDataset(MappableDataset): | |||||
| class will be given a unique index starting from 0). | class will be given a unique index starting from 0). | ||||
| decode (bool, optional): decode the images after reading (default=False). | decode (bool, optional): decode the images after reading (default=False). | ||||
| num_shards (int, optional): Number of shards that the dataset will be divided | num_shards (int, optional): Number of shards that the dataset will be divided | ||||
| into (default=None). When this argument is specified, 'num_samples' reflects | |||||
| into (default=None). When this argument is specified, `num_samples` reflects | |||||
| the max sample number of per shard. | the max sample number of per shard. | ||||
| shard_id (int, optional): The shard ID within num_shards (default=None). This | |||||
| argument can only be specified when num_shards is also specified. | |||||
| shard_id (int, optional): The shard ID within `num_shards` (default=None). This | |||||
| argument can only be specified when `num_shards` is also specified. | |||||
| cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. | cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. | ||||
| (default=None, which means no cache is used). | (default=None, which means no cache is used). | ||||
| @@ -4195,7 +4192,8 @@ class CocoDataset(MappableDataset): | |||||
| """ | """ | ||||
| A source dataset for reading and parsing COCO dataset. | A source dataset for reading and parsing COCO dataset. | ||||
| CocoDataset support four kinds of task: 2017 Train/Val/Test Detection, Keypoints, Stuff, Panoptic. | |||||
| `CocoDataset` supports four kinds of tasks, which are Object Detection, Keypoint Detection, Stuff Segmentation and | |||||
| Panoptic Segmentation of 2017 Train/Val/Test dataset. | |||||
| The generated dataset has multi-columns : | The generated dataset has multi-columns : | ||||
| @@ -4339,11 +4337,12 @@ class CocoDataset(MappableDataset): | |||||
| class CelebADataset(MappableDataset): | class CelebADataset(MappableDataset): | ||||
| """ | """ | ||||
| A source dataset for reading and parsing CelebA dataset. Currently supported: list_attr_celeba.txt only. | |||||
| A source dataset for reading and parsing CelebA dataset. Only support to read `list_attr_celeba.txt` currently, | |||||
| which is the attribute annotations of the dataset. | |||||
| Note: | Note: | ||||
| The generated dataset has two columns ['image', 'attr']. | The generated dataset has two columns ['image', 'attr']. | ||||
| The type of the image tensor is uint8. The attribute tensor is uint32 and one hot type. | |||||
| The image tensor is of the uint8 type. The attribute tensor is of the uint32 type and one hot encoded. | |||||
| Citation of CelebA dataset. | Citation of CelebA dataset. | ||||
| @@ -4376,20 +4375,20 @@ class CelebADataset(MappableDataset): | |||||
| Args: | Args: | ||||
| dataset_dir (str): Path to the root directory that contains the dataset. | dataset_dir (str): Path to the root directory that contains the dataset. | ||||
| num_parallel_workers (int, optional): Number of workers to read the data (default=value set in the config). | |||||
| num_parallel_workers (int, optional): Number of workers to read the data (default=None, will use value set in | |||||
| the config). | |||||
| shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None). | shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None). | ||||
| usage (str): one of 'all', 'train', 'valid' or 'test'. | |||||
| usage (str): one of 'all', 'train', 'valid' or 'test' (default='all', will read all samples). | |||||
| sampler (Sampler, optional): Object used to choose samples from the dataset (default=None). | sampler (Sampler, optional): Object used to choose samples from the dataset (default=None). | ||||
| decode (bool, optional): decode the images after reading (default=False). | decode (bool, optional): decode the images after reading (default=False). | ||||
| extensions (list[str], optional): List of file extensions to be | |||||
| included in the dataset (default=None). | |||||
| num_samples (int, optional): The number of images to be included in the dataset. | |||||
| (default=None, all images). | |||||
| extensions (list[str], optional): List of file extensions to be included in the dataset (default=None). | |||||
| num_samples (int, optional): The number of images to be included in the dataset | |||||
| (default=None, will include all images). | |||||
| num_shards (int, optional): Number of shards that the dataset will be divided | num_shards (int, optional): Number of shards that the dataset will be divided | ||||
| into (default=None). When this argument is specified, 'num_samples' reflects | |||||
| into (default=None). When this argument is specified, `num_samples` reflects | |||||
| the max sample number of per shard. | the max sample number of per shard. | ||||
| shard_id (int, optional): The shard ID within num_shards (default=None). This | |||||
| argument can only be specified when num_shards is also specified. | |||||
| shard_id (int, optional): The shard ID within `num_shards` (default=None). This | |||||
| argument can only be specified when `num_shards` is also specified. | |||||
| cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. | cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. | ||||
| (default=None, which means no cache is used). | (default=None, which means no cache is used). | ||||
| @@ -63,6 +63,7 @@ class TextTensorOperation(TensorOperation): | |||||
| """ | """ | ||||
| Base class of Text Tensor Ops | Base class of Text Tensor Ops | ||||
| """ | """ | ||||
| def __call__(self, input_tensor): | def __call__(self, input_tensor): | ||||
| if not isinstance(input_tensor, list): | if not isinstance(input_tensor, list): | ||||
| input_list = [input_tensor] | input_list = [input_tensor] | ||||
| @@ -95,13 +96,11 @@ DE_C_INTER_JIEBA_MODE = { | |||||
| JiebaMode.HMM: cde.JiebaMode.DE_JIEBA_HMM | JiebaMode.HMM: cde.JiebaMode.DE_JIEBA_HMM | ||||
| } | } | ||||
| DE_C_INTER_SENTENCEPIECE_LOADTYPE = { | DE_C_INTER_SENTENCEPIECE_LOADTYPE = { | ||||
| SPieceTokenizerLoadType.FILE: cde.SPieceTokenizerLoadType.DE_SPIECE_TOKENIZER_LOAD_KFILE, | SPieceTokenizerLoadType.FILE: cde.SPieceTokenizerLoadType.DE_SPIECE_TOKENIZER_LOAD_KFILE, | ||||
| SPieceTokenizerLoadType.MODEL: cde.SPieceTokenizerLoadType.DE_SPIECE_TOKENIZER_LOAD_KMODEL | SPieceTokenizerLoadType.MODEL: cde.SPieceTokenizerLoadType.DE_SPIECE_TOKENIZER_LOAD_KMODEL | ||||
| } | } | ||||
| DE_C_INTER_SENTENCEPIECE_OUTTYPE = { | DE_C_INTER_SENTENCEPIECE_OUTTYPE = { | ||||
| SPieceTokenizerOutType.STRING: cde.SPieceTokenizerOutType.DE_SPIECE_TOKENIZER_OUTTYPE_KString, | SPieceTokenizerOutType.STRING: cde.SPieceTokenizerOutType.DE_SPIECE_TOKENIZER_OUTTYPE_KString, | ||||
| SPieceTokenizerOutType.INT: cde.SPieceTokenizerOutType.DE_SPIECE_TOKENIZER_OUTTYPE_KINT | SPieceTokenizerOutType.INT: cde.SPieceTokenizerOutType.DE_SPIECE_TOKENIZER_OUTTYPE_KINT | ||||
| @@ -282,7 +281,7 @@ class Lookup(TextTensorOperation): | |||||
| vocab (Vocab): A vocabulary object. | vocab (Vocab): A vocabulary object. | ||||
| unknown_token (str, optional): Word used for lookup if the word being looked up is out-of-vocabulary (OOV). | unknown_token (str, optional): Word used for lookup if the word being looked up is out-of-vocabulary (OOV). | ||||
| If unknown_token is OOV, a runtime error will be thrown (default=None). | If unknown_token is OOV, a runtime error will be thrown (default=None). | ||||
| data_type (mindspore.dtype, optional): mindspore.dtype that lookup maps string to (default=mstype.int32) | |||||
| data_type (mindspore.dtype, optional): mindspore.dtype that lookup maps string to (default=mindspore.int32) | |||||
| Examples: | Examples: | ||||
| >>> # Load vocabulary from list | >>> # Load vocabulary from list | ||||
| @@ -309,18 +308,19 @@ class Ngram(TextTensorOperation): | |||||
| Refer to https://en.wikipedia.org/wiki/N-gram#Examples for an overview of what n-gram is and how it works. | Refer to https://en.wikipedia.org/wiki/N-gram#Examples for an overview of what n-gram is and how it works. | ||||
| Args: | Args: | ||||
| n (list[int]): n in n-gram, n >= 1. n is a list of positive integers. For example, if n=[4,3], then the result | |||||
| n (list[int]): n in n-gram, which is a list of positive integers. For example, if n=[4, 3], then the result | |||||
| would be a 4-gram followed by a 3-gram in the same tensor. If the number of words is not enough to make up | would be a 4-gram followed by a 3-gram in the same tensor. If the number of words is not enough to make up | ||||
| for a n-gram, an empty string will be returned. For example, 3 grams on ["mindspore","best"] will result in | |||||
| for a n-gram, an empty string will be returned. For example, 3 grams on ["mindspore", "best"] will result in | |||||
| an empty string produced. | an empty string produced. | ||||
| left_pad (tuple, optional): ("pad_token", pad_width). Padding performed on left side of the sequence. pad_width | |||||
| will be capped at n-1. left_pad=("_",2) would pad left side of the sequence with "__" (default=None). | |||||
| right_pad (tuple, optional): ("pad_token", pad_width). Padding performed on right side of the sequence. | |||||
| pad_width will be capped at n-1. right_pad=("-":2) would pad right side of the sequence with "--" | |||||
| (default=None). | |||||
| separator (str, optional): symbol used to join strings together. For example. if 2-gram is | |||||
| left_pad (tuple, optional): Padding performed on left side of the sequence shaped like ("pad_token", pad_width). | |||||
| `pad_width` will be capped at n-1. For example, specifying left_pad=("_", 2) would pad left side of the | |||||
| sequence with "__" (default=None). | |||||
| right_pad (tuple, optional): Padding performed on right side of the sequence shaped like | |||||
| ("pad_token", pad_width). `pad_width` will be capped at n-1. For example, specifying right_pad=("-", 2) | |||||
| would pad right side of the sequence with "--" (default=None). | |||||
| separator (str, optional): Symbol used to join strings together. For example. if 2-gram is | |||||
| ["mindspore", "amazing"] with separator="-", the result would be ["mindspore-amazing"] | ["mindspore", "amazing"] with separator="-", the result would be ["mindspore-amazing"] | ||||
| (default=None, which means whitespace is used). | |||||
| (default=None, which will use whitespace as separator). | |||||
| Examples: | Examples: | ||||
| >>> text_file_dataset = text_file_dataset.map(operations=text.Ngram(3, separator="")) | >>> text_file_dataset = text_file_dataset.map(operations=text.Ngram(3, separator="")) | ||||
| @@ -389,6 +389,7 @@ class SlidingWindow(TextTensorOperation): | |||||
| >>> # | [3,4,5]] | | >>> # | [3,4,5]] | | ||||
| >>> # +--------------+ | >>> # +--------------+ | ||||
| """ | """ | ||||
| @check_slidingwindow | @check_slidingwindow | ||||
| def __init__(self, width, axis=0): | def __init__(self, width, axis=0): | ||||
| self.width = width | self.width = width | ||||
| @@ -557,6 +558,7 @@ class PythonTokenizer: | |||||
| tokens = self.tokenizer(in_array) | tokens = self.tokenizer(in_array) | ||||
| return tokens | return tokens | ||||
| if platform.system().lower() != 'windows': | if platform.system().lower() != 'windows': | ||||
| DE_C_INTER_NORMALIZE_FORM = { | DE_C_INTER_NORMALIZE_FORM = { | ||||
| NormalizeForm.NONE: cde.NormalizeForm.DE_NORMALIZE_NONE, | NormalizeForm.NONE: cde.NormalizeForm.DE_NORMALIZE_NONE, | ||||
| @@ -575,12 +577,12 @@ if platform.system().lower() != 'windows': | |||||
| BasicTokenizer is not supported on Windows platform yet. | BasicTokenizer is not supported on Windows platform yet. | ||||
| Args: | Args: | ||||
| lower_case (bool, optional): If True, apply CaseFold, NormalizeUTF8(NFD mode), RegexReplace operation | |||||
| lower_case (bool, optional): If True, apply CaseFold, NormalizeUTF8 with `NFD` mode, RegexReplace operation | |||||
| on input text to fold the text to lower case and strip accents characters. If False, only apply | on input text to fold the text to lower case and strip accents characters. If False, only apply | ||||
| NormalizeUTF8('normalization_form' mode) operation on input text (default=False). | |||||
| keep_whitespace (bool, optional): If True, the whitespace will be kept in out tokens (default=False). | |||||
| NormalizeUTF8 operation with the specified mode on input text (default=False). | |||||
| keep_whitespace (bool, optional): If True, the whitespace will be kept in output tokens (default=False). | |||||
| normalization_form (NormalizeForm, optional): Used to specify a specific normalize mode. This is | normalization_form (NormalizeForm, optional): Used to specify a specific normalize mode. This is | ||||
| only effective when 'lower_case' is False. See NormalizeUTF8 for details (default=NormalizeForm.NONE). | |||||
| only effective when `lower_case` is False. See NormalizeUTF8 for details (default=NormalizeForm.NONE). | |||||
| preserve_unused_token (bool, optional): If True, do not split special tokens like | preserve_unused_token (bool, optional): If True, do not split special tokens like | ||||
| '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]' (default=True). | '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]' (default=True). | ||||
| with_offsets (bool, optional): If or not output offsets of tokens (default=False). | with_offsets (bool, optional): If or not output offsets of tokens (default=False). | ||||
| @@ -637,14 +639,14 @@ if platform.system().lower() != 'windows': | |||||
| vocab (Vocab): A vocabulary object. | vocab (Vocab): A vocabulary object. | ||||
| suffix_indicator (str, optional): Used to show that the subword is the last part of a word (default='##'). | suffix_indicator (str, optional): Used to show that the subword is the last part of a word (default='##'). | ||||
| max_bytes_per_token (int, optional): Tokens exceeding this length will not be further split (default=100). | max_bytes_per_token (int, optional): Tokens exceeding this length will not be further split (default=100). | ||||
| unknown_token (str, optional): When a token cannot be found: if 'unknown_token' is empty string, | |||||
| return the token directly, else return 'unknown_token'(default='[UNK]'). | |||||
| lower_case (bool, optional): If True, apply CaseFold, NormalizeUTF8(NFD mode), RegexReplace operation | |||||
| unknown_token (str, optional): When an unknown token is found, return the token directly if `unknown_token` | |||||
| is an empty string, else return `unknown_token` instead (default='[UNK]'). | |||||
| lower_case (bool, optional): If True, apply CaseFold, NormalizeUTF8 with `NFD` mode, RegexReplace operation | |||||
| on input text to fold the text to lower case and strip accented characters. If False, only apply | on input text to fold the text to lower case and strip accented characters. If False, only apply | ||||
| NormalizeUTF8('normalization_form' mode) operation on input text (default=False). | |||||
| NormalizeUTF8 operation with the specified mode on input text (default=False). | |||||
| keep_whitespace (bool, optional): If True, the whitespace will be kept in out tokens (default=False). | keep_whitespace (bool, optional): If True, the whitespace will be kept in out tokens (default=False). | ||||
| normalization_form (NormalizeForm, optional): Used to specify a specific normalize mode, | normalization_form (NormalizeForm, optional): Used to specify a specific normalize mode, | ||||
| only effective when 'lower_case' is False. See NormalizeUTF8 for details (default='NONE'). | |||||
| only effective when `lower_case` is False. See NormalizeUTF8 for details (default=NormalizeForm.NONE). | |||||
| preserve_unused_token (bool, optional): If True, do not split special tokens like | preserve_unused_token (bool, optional): If True, do not split special tokens like | ||||
| '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]' (default=True). | '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]' (default=True). | ||||
| with_offsets (bool, optional): If or not output offsets of tokens (default=False). | with_offsets (bool, optional): If or not output offsets of tokens (default=False). | ||||
| @@ -703,7 +705,8 @@ if platform.system().lower() != 'windows': | |||||
| class CaseFold(TextTensorOperation): | class CaseFold(TextTensorOperation): | ||||
| """ | """ | ||||
| Apply case fold operation on UTF-8 string tensor. | |||||
| Apply case fold operation on UTF-8 string tensor, which is aggressive that can convert more characters into | |||||
| lower case. | |||||
| Note: | Note: | ||||
| CaseFold is not supported on Windows platform yet. | CaseFold is not supported on Windows platform yet. | ||||
| @@ -59,23 +59,24 @@ class OneHot(cde.OneHotOp): | |||||
| class Fill(cde.FillOp): | class Fill(cde.FillOp): | ||||
| """ | """ | ||||
| Tensor operation to create a tensor filled with input scalar value. | |||||
| Tensor operation to fill all elements in the tensor with the specified value. | |||||
| The output tensor will have the same shape and type as the input tensor. | The output tensor will have the same shape and type as the input tensor. | ||||
| Args: | Args: | ||||
| fill_value (Union[str, bytes, int, float, bool])) : scalar value | fill_value (Union[str, bytes, int, float, bool])) : scalar value | ||||
| to fill created tensor with. | |||||
| to fill the tensor with. | |||||
| Examples: | Examples: | ||||
| >>> import numpy as np | >>> import numpy as np | ||||
| >>> from mindspore.dataset import GeneratorDataset | |||||
| >>> # Generate 1d int numpy array from 0 - 63 | |||||
| >>> # generate a 1D integer numpy array from 0 to 4 | |||||
| >>> def generator_1d(): | >>> def generator_1d(): | ||||
| >>> for i in range(64): | |||||
| ... for i in range(5): | |||||
| ... yield (np.array([i]),) | ... yield (np.array([i]),) | ||||
| >>> generator_dataset = GeneratorDataset(generator_1d,column_names='col') | |||||
| >>> generator_dataset = ds.GeneratorDataset(generator_1d, column_names="col1") | |||||
| >>> # [[0], [1], [2], [3], [4]] | |||||
| >>> fill_op = c_transforms.Fill(3) | >>> fill_op = c_transforms.Fill(3) | ||||
| >>> generator_dataset = generator_dataset.map(operations=fill_op) | >>> generator_dataset = generator_dataset.map(operations=fill_op) | ||||
| >>> # [[3], [3], [3], [3], [3]] | |||||
| """ | """ | ||||
| @check_fill_value | @check_fill_value | ||||
| @@ -351,6 +352,8 @@ class Unique(cde.UniqueOp): | |||||
| >>> # +---------+-----------------+---------+ | >>> # +---------+-----------------+---------+ | ||||
| """ | """ | ||||
| class Compose(): | class Compose(): | ||||
| """ | """ | ||||
| Compose a list of transforms into a single transform. | Compose a list of transforms into a single transform. | ||||
| @@ -376,6 +379,7 @@ class Compose(): | |||||
| operations.append(op) | operations.append(op) | ||||
| return cde.ComposeOperation(operations) | return cde.ComposeOperation(operations) | ||||
| class RandomApply(): | class RandomApply(): | ||||
| """ | """ | ||||
| Randomly perform a series of transforms with a given probability. | Randomly perform a series of transforms with a given probability. | ||||
| @@ -62,6 +62,7 @@ class ImageTensorOperation(TensorOperation): | |||||
| """ | """ | ||||
| Base class of Image Tensor Ops | Base class of Image Tensor Ops | ||||
| """ | """ | ||||
| def __call__(self, input_tensor): | def __call__(self, input_tensor): | ||||
| if not isinstance(input_tensor, list): | if not isinstance(input_tensor, list): | ||||
| input_list = [input_tensor] | input_list = [input_tensor] | ||||
| @@ -93,11 +94,9 @@ DE_C_BORDER_TYPE = {Border.CONSTANT: cde.BorderType.DE_BORDER_CONSTANT, | |||||
| Border.REFLECT: cde.BorderType.DE_BORDER_REFLECT, | Border.REFLECT: cde.BorderType.DE_BORDER_REFLECT, | ||||
| Border.SYMMETRIC: cde.BorderType.DE_BORDER_SYMMETRIC} | Border.SYMMETRIC: cde.BorderType.DE_BORDER_SYMMETRIC} | ||||
| DE_C_IMAGE_BATCH_FORMAT = {ImageBatchFormat.NHWC: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NHWC, | DE_C_IMAGE_BATCH_FORMAT = {ImageBatchFormat.NHWC: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NHWC, | ||||
| ImageBatchFormat.NCHW: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NCHW} | ImageBatchFormat.NCHW: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NCHW} | ||||
| DE_C_INTER_MODE = {Inter.NEAREST: cde.InterpolationMode.DE_INTER_NEAREST_NEIGHBOUR, | DE_C_INTER_MODE = {Inter.NEAREST: cde.InterpolationMode.DE_INTER_NEAREST_NEIGHBOUR, | ||||
| Inter.LINEAR: cde.InterpolationMode.DE_INTER_LINEAR, | Inter.LINEAR: cde.InterpolationMode.DE_INTER_LINEAR, | ||||
| Inter.CUBIC: cde.InterpolationMode.DE_INTER_CUBIC, | Inter.CUBIC: cde.InterpolationMode.DE_INTER_CUBIC, | ||||
| @@ -307,6 +306,7 @@ class Equalize(ImageTensorOperation): | |||||
| >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, | >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, | ||||
| ... input_columns=["image"]) | ... input_columns=["image"]) | ||||
| """ | """ | ||||
| def parse(self): | def parse(self): | ||||
| return cde.EqualizeOperation() | return cde.EqualizeOperation() | ||||
| @@ -337,6 +337,7 @@ class Invert(ImageTensorOperation): | |||||
| >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, | >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, | ||||
| ... input_columns=["image"]) | ... input_columns=["image"]) | ||||
| """ | """ | ||||
| def parse(self): | def parse(self): | ||||
| return cde.InvertOperation() | return cde.InvertOperation() | ||||
| @@ -729,7 +730,7 @@ class RandomCrop(ImageTensorOperation): | |||||
| class RandomCropDecodeResize(ImageTensorOperation): | class RandomCropDecodeResize(ImageTensorOperation): | ||||
| """ | """ | ||||
| Equivalent to RandomResizedCrop, but crops before decodes. | |||||
| A combination of `Crop`, `Decode` and `Resize`. It will get better performance for JPEG images. | |||||
| Args: | Args: | ||||
| size (Union[int, sequence]): The size of the output image. | size (Union[int, sequence]): The size of the output image. | ||||
| @@ -813,7 +814,7 @@ class RandomCropWithBBox(ImageTensorOperation): | |||||
| Examples: | Examples: | ||||
| >>> decode_op = c_vision.Decode() | >>> decode_op = c_vision.Decode() | ||||
| >>> random_crop_with_bbox_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200]) | |||||
| >>> random_crop_with_bbox_op = c_vision.RandomCropWithBBox([512, 512], [200, 200, 200, 200]) | |||||
| >>> transforms_list = [decode_op, random_crop_with_bbox_op] | >>> transforms_list = [decode_op, random_crop_with_bbox_op] | ||||
| >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, | >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, | ||||
| ... input_columns=["image"]) | ... input_columns=["image"]) | ||||