From 23e90216d946b76a4c732e52ca71ff6b9f9668a4 Mon Sep 17 00:00:00 2001 From: Lixia Chen Date: Tue, 15 Dec 2020 13:56:06 -0500 Subject: [PATCH] Minor modification for DatasetCache docstring --- mindspore/dataset/__init__.py | 3 +- mindspore/dataset/engine/cache_client.py | 4 +++ mindspore/dataset/engine/datasets.py | 45 ++++++++++++++++-------- 3 files changed, 36 insertions(+), 16 deletions(-) diff --git a/mindspore/dataset/__init__.py b/mindspore/dataset/__init__.py index b21d1b94aa..826578264f 100644 --- a/mindspore/dataset/__init__.py +++ b/mindspore/dataset/__init__.py @@ -37,4 +37,5 @@ __all__ = ["config", "ImageFolderDataset", "MnistDataset", "PaddedDataset", "MindDataset", "GeneratorDataset", "TFRecordDataset", "ManifestDataset", "Cifar10Dataset", "Cifar100Dataset", "CelebADataset", "NumpySlicesDataset", "VOCDataset", "CocoDataset", "TextFileDataset", "CLUEDataset", "CSVDataset", "Schema", "DistributedSampler", "PKSampler", - "RandomSampler", "SequentialSampler", "SubsetRandomSampler", "WeightedRandomSampler", "zip", "GraphData"] + "RandomSampler", "SequentialSampler", "SubsetRandomSampler", "WeightedRandomSampler", "zip", "GraphData", + "DatasetCache"] diff --git a/mindspore/dataset/engine/cache_client.py b/mindspore/dataset/engine/cache_client.py index 6691f9da46..391eeb6bc7 100644 --- a/mindspore/dataset/engine/cache_client.py +++ b/mindspore/dataset/engine/cache_client.py @@ -33,6 +33,10 @@ class DatasetCache: port (int, optional): Port to connect to server (default=50052). num_connections (int, optional): Number of tcp/ip connections (default=12). prefetch_size (int, optional): Prefetch size (default=20). + + Tutorials: + https://www.mindspore.cn/doc/programming_guide/zh-CN/master/cache.html?highlight=datasetcache + https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/enable_cache.html """ def __init__(self, session_id, size=0, spilling=False, hostname=None, port=None, num_connections=None, diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py index bc230b0984..72efe62bc0 100644 --- a/mindspore/dataset/engine/datasets.py +++ b/mindspore/dataset/engine/datasets.py @@ -488,7 +488,8 @@ class Dataset: parallel (default=None, the value from the configuration will be used). python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker processes. This option could be beneficial if the Python operation is computational heavy (default=False). - cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used). + cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. + (default=None which means no cache is used). callbacks: (DSCallback, list[DSCallback], optional): List of Dataset callbacks to be called (Default=None). @@ -2241,7 +2242,8 @@ class MapDataset(Dataset): in parallel (default=None). python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker process. This option could be beneficial if the Python operation is computational heavy (default=False). - cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used). + cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. + (default=None which means no cache is used). callbacks: (DSCallback, list[DSCallback], optional): List of Dataset callbacks to be called (Default=None) Raises: @@ -2980,7 +2982,8 @@ class ImageFolderDataset(MappableDataset): into (default=None). shard_id (int, optional): The shard ID within num_shards (default=None). This argument can only be specified when num_shards is also specified. - cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used). + cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. + (default=None which means no cache is used). Raises: RuntimeError: If sampler and shuffle are specified at the same time. @@ -3128,7 +3131,8 @@ class MnistDataset(MappableDataset): into (default=None). shard_id (int, optional): The shard ID within num_shards (default=None). This argument can only be specified when num_shards is also specified. - cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used). + cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. + (default=None which means no cache is used). Raises: RuntimeError: If sampler and shuffle are specified at the same time. @@ -3829,7 +3833,8 @@ class TFRecordDataset(SourceDataset): argument can only be specified when num_shards is also specified. shard_equal_rows (bool, optional): Get equal rows for all shards(default=False). If shard_equal_rows is false, number of rows of each shard may be not equal. - cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used). + cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. + (default=None which means no cache is used). Examples: >>> import mindspore.dataset as ds @@ -4019,7 +4024,8 @@ class ManifestDataset(MappableDataset): into (default=None). shard_id (int, optional): The shard ID within num_shards (default=None). This argument can only be specified when num_shards is also specified. - cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used). + cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. + (default=None which means no cache is used). Raises: RuntimeError: If sampler and shuffle are specified at the same time. @@ -4182,7 +4188,8 @@ class Cifar10Dataset(MappableDataset): into (default=None). shard_id (int, optional): The shard ID within num_shards (default=None). This argument can only be specified when num_shards is also specified. - cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used). + cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. + (default=None which means no cache is used). Raises: RuntimeError: If sampler and shuffle are specified at the same time. @@ -4322,7 +4329,8 @@ class Cifar100Dataset(MappableDataset): into (default=None). shard_id (int, optional): The shard ID within num_shards (default=None). This argument can only be specified when num_shards is also specified. - cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used). + cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. + (default=None which means no cache is used). Raises: RuntimeError: If sampler and shuffle are specified at the same time. @@ -4403,7 +4411,8 @@ class RandomDataset(SourceDataset): num_samples (int): number of samples to draw from the total. (default=None, which means all rows) num_parallel_workers (int, optional): Number of workers to read the data (default=None, number set in the config). - cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used). + cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. + (default=None which means no cache is used). shuffle (bool, optional): Whether or not to perform shuffle on the dataset (default=None, expected order behavior shown in the table). num_shards (int, optional): Number of shards that the dataset will be divided @@ -4666,7 +4675,8 @@ class VOCDataset(MappableDataset): into (default=None). shard_id (int, optional): The shard ID within num_shards (default=None). This argument can only be specified when num_shards is also specified. - cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used). + cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. + (default=None which means no cache is used). Raises: RuntimeError: If xml of Annotations is an invalid format. @@ -4861,7 +4871,8 @@ class CocoDataset(MappableDataset): into (default=None). shard_id (int, optional): The shard ID within num_shards (default=None). This argument can only be specified when num_shards is also specified. - cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used). + cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. + (default=None which means no cache is used). Raises: RuntimeError: If sampler and shuffle are specified at the same time. @@ -5013,7 +5024,8 @@ class CelebADataset(MappableDataset): into (default=None). shard_id (int, optional): The shard ID within num_shards (default=None). This argument can only be specified when num_shards is also specified. - cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used). + cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. + (default=None which means no cache is used). Examples: >>> import mindspore.dataset as ds @@ -5125,7 +5137,8 @@ class CLUEDataset(SourceDataset): num_shards (int, optional): Number of shards that the dataset will be divided into (default=None). shard_id (int, optional): The shard ID within num_shards (default=None). This argument can only be specified when num_shards is also specified. - cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used). + cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. + (default=None which means no cache is used). Examples: >>> import mindspore.dataset as ds @@ -5358,7 +5371,8 @@ class CSVDataset(SourceDataset): num_shards (int, optional): Number of shards that the dataset will be divided into (default=None). shard_id (int, optional): The shard ID within num_shards (default=None). This argument can only be specified when num_shards is also specified. - cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used). + cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. + (default=None which means no cache is used). Examples: @@ -5469,7 +5483,8 @@ class TextFileDataset(SourceDataset): num_shards (int, optional): Number of shards that the dataset will be divided into (default=None). shard_id (int, optional): The shard ID within num_shards (default=None). This argument can only be specified when num_shards is also specified. - cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used). + cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. + (default=None which means no cache is used). Examples: >>> import mindspore.dataset as ds