|
|
@@ -435,7 +435,8 @@ class Dataset: |
|
|
parallel (default=None, the value from the config will be used). |
|
|
parallel (default=None, the value from the config will be used). |
|
|
python_multiprocessing (bool, optional): Parallelize python operations with multiple worker process. This |
|
|
python_multiprocessing (bool, optional): Parallelize python operations with multiple worker process. This |
|
|
option could be beneficial if the python operation is computational heavy (default=False). |
|
|
option could be beneficial if the python operation is computational heavy (default=False). |
|
|
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used) |
|
|
|
|
|
|
|
|
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used). |
|
|
|
|
|
The cache feature is under development and is not recommended. |
|
|
|
|
|
|
|
|
Returns: |
|
|
Returns: |
|
|
MapDataset, dataset after mapping operation. |
|
|
MapDataset, dataset after mapping operation. |
|
|
@@ -1951,7 +1952,9 @@ class MapDataset(DatasetOp): |
|
|
in parallel (default=None). |
|
|
in parallel (default=None). |
|
|
python_multiprocessing (bool, optional): Parallelize python operations with multiple worker process. This |
|
|
python_multiprocessing (bool, optional): Parallelize python operations with multiple worker process. This |
|
|
option could be beneficial if the python operation is computational heavy (default=False). |
|
|
option could be beneficial if the python operation is computational heavy (default=False). |
|
|
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used) |
|
|
|
|
|
|
|
|
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used). |
|
|
|
|
|
The cache feature is under development and is not recommended. |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Raises: |
|
|
Raises: |
|
|
ValueError: If len(input_columns) != len(output_columns) and columns_order is not specified. |
|
|
ValueError: If len(input_columns) != len(output_columns) and columns_order is not specified. |
|
|
@@ -2141,6 +2144,7 @@ class RepeatDataset(DatasetOp): |
|
|
""" |
|
|
""" |
|
|
return self.count |
|
|
return self.count |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SkipDataset(DatasetOp): |
|
|
class SkipDataset(DatasetOp): |
|
|
""" |
|
|
""" |
|
|
The result of applying Skip operator to the input Dataset. |
|
|
The result of applying Skip operator to the input Dataset. |
|
|
@@ -2406,6 +2410,7 @@ class TransferDataset(DatasetOp): |
|
|
def stop_send(self): |
|
|
def stop_send(self): |
|
|
self.iterator.depipeline.StopSend() |
|
|
self.iterator.depipeline.StopSend() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class RangeDataset(MappableDataset): |
|
|
class RangeDataset(MappableDataset): |
|
|
""" |
|
|
""" |
|
|
A source dataset that reads and parses datasets stored on disk in a range. |
|
|
A source dataset that reads and parses datasets stored on disk in a range. |
|
|
@@ -2552,7 +2557,8 @@ class ImageFolderDatasetV2(MappableDataset): |
|
|
into (default=None). |
|
|
into (default=None). |
|
|
shard_id (int, optional): The shard ID within num_shards (default=None). This |
|
|
shard_id (int, optional): The shard ID within num_shards (default=None). This |
|
|
argument should be specified only when num_shards is also specified. |
|
|
argument should be specified only when num_shards is also specified. |
|
|
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used) |
|
|
|
|
|
|
|
|
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used). |
|
|
|
|
|
The cache feature is under development and is not recommended. |
|
|
|
|
|
|
|
|
Raises: |
|
|
Raises: |
|
|
RuntimeError: If sampler and shuffle are specified at the same time. |
|
|
RuntimeError: If sampler and shuffle are specified at the same time. |
|
|
@@ -3348,7 +3354,8 @@ class TFRecordDataset(SourceDataset): |
|
|
argument should be specified only when num_shards is also specified. |
|
|
argument should be specified only when num_shards is also specified. |
|
|
shard_equal_rows (bool): Get equal rows for all shards(default=False). If shard_equal_rows is false, number |
|
|
shard_equal_rows (bool): Get equal rows for all shards(default=False). If shard_equal_rows is false, number |
|
|
of rows of each shard may be not equal. |
|
|
of rows of each shard may be not equal. |
|
|
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used) |
|
|
|
|
|
|
|
|
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used). |
|
|
|
|
|
The cache feature is under development and is not recommended. |
|
|
Examples: |
|
|
Examples: |
|
|
>>> import mindspore.dataset as ds |
|
|
>>> import mindspore.dataset as ds |
|
|
>>> import mindspore.common.dtype as mstype |
|
|
>>> import mindspore.common.dtype as mstype |
|
|
@@ -3919,7 +3926,8 @@ class RandomDataset(SourceDataset): |
|
|
num_samples (int): number of samples to draw from the total. (default=None, which means all rows) |
|
|
num_samples (int): number of samples to draw from the total. (default=None, which means all rows) |
|
|
num_parallel_workers (int, optional): number of workers to read the data |
|
|
num_parallel_workers (int, optional): number of workers to read the data |
|
|
(default=None, number set in the config). |
|
|
(default=None, number set in the config). |
|
|
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used) |
|
|
|
|
|
|
|
|
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used). |
|
|
|
|
|
The cache feature is under development and is not recommended. |
|
|
shuffle (bool, optional): Whether or not to perform shuffle on the dataset |
|
|
shuffle (bool, optional): Whether or not to perform shuffle on the dataset |
|
|
(default=None, expected order behavior shown in the table). |
|
|
(default=None, expected order behavior shown in the table). |
|
|
num_shards (int, optional): Number of shards that the dataset should be divided |
|
|
num_shards (int, optional): Number of shards that the dataset should be divided |
|
|
@@ -5313,6 +5321,7 @@ class BuildVocabDataset(DatasetOp): |
|
|
|
|
|
|
|
|
return new_op |
|
|
return new_op |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class BuildSentencePieceVocabDataset(DatasetOp): |
|
|
class BuildSentencePieceVocabDataset(DatasetOp): |
|
|
""" |
|
|
""" |
|
|
Build a SentencePieceVocab from a dataset. |
|
|
Build a SentencePieceVocab from a dataset. |
|
|
|