diff --git a/mindspore/dataset/core/validator_helpers.py b/mindspore/dataset/core/validator_helpers.py
index 46a16d100e..6497bb6021 100644
--- a/mindspore/dataset/core/validator_helpers.py
+++ b/mindspore/dataset/core/validator_helpers.py
@@ -288,14 +288,17 @@ def check_sampler_shuffle_shard_options(param_dict):
     """
     shuffle, sampler = param_dict.get('shuffle'), param_dict.get('sampler')
     num_shards, shard_id = param_dict.get('num_shards'), param_dict.get('shard_id')
+    num_samples = param_dict.get('num_samples')
 
     type_check(sampler, (type(None), samplers.BuiltinSampler, samplers.Sampler), "sampler")
 
     if sampler is not None:
         if shuffle is not None:
             raise RuntimeError("sampler and shuffle cannot be specified at the same time.")
-        if num_shards is not None:
+        if num_shards is not None or shard_id is not None:
             raise RuntimeError("sampler and sharding cannot be specified at the same time.")
+        if num_samples is not None:
+            raise RuntimeError("sampler and num_samples cannot be specified at the same time.")
 
     if num_shards is not None:
         check_pos_int32(num_shards)
diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py
index c48fced552..f0b1e6006b 100644
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -3045,7 +3045,8 @@ class ImageFolderDataset(MappableDataset):
             unique index starting from 0).
         decode (bool, optional): Decode the images after reading (default=False).
         num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None).
+            into (default=None). When this argument is specified, 'num_samples' reflects
+            the max sample number of per shard.
         shard_id (int, optional): The shard ID within num_shards (default=None). This
             argument can only be specified when num_shards is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
@@ -3194,7 +3195,8 @@ class MnistDataset(MappableDataset):
         sampler (Sampler, optional): Object used to choose samples from the
             dataset (default=None, expected order behavior shown in the table).
         num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None).
+            into (default=None). When this argument is specified, 'num_samples' reflects
+            the max sample number of per shard.
         shard_id (int, optional): The shard ID within num_shards (default=None). This
             argument can only be specified when num_shards is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
@@ -3277,6 +3279,7 @@ class MindDataset(MappableDataset):
         shuffle (bool, optional): Whether or not to perform shuffle on the dataset
             (default=None, performs shuffle).
         num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, 'num_samples' reflects the max sample number of per shard.
         shard_id (int, optional): The shard ID within num_shards (default=None). This
             argument can only be specified when num_shards is also specified.
         sampler (Sampler, optional): Object used to choose samples from the
@@ -3742,7 +3745,8 @@ class GeneratorDataset(MappableDataset):
         sampler (Union[Sampler, Iterable], optional): Object used to choose samples from the dataset. Random accessible
             input is required (default=None, expected order behavior shown in the table).
         num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, 'num_samples' will not used. Random accessible input is required.
+            Random accessible input is required. When this argument is specified, 'num_samples' reflects the max sample
+            number of per shard.
         shard_id (int, optional): The shard ID within num_shards (default=None). This argument must be specified only
             when num_shards is also specified. Random accessible input is required.
         python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker process. This
@@ -3922,7 +3926,8 @@ class TFRecordDataset(SourceDataset):
             - Shuffle.FILES: Shuffle files only.
 
         num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None).
+            into (default=None). When this argument is specified, 'num_samples' reflects
+            the max sample number of per shard.
         shard_id (int, optional): The shard ID within num_shards (default=None). This
             argument can only be specified when num_shards is also specified.
         shard_equal_rows (bool, optional): Get equal rows for all shards(default=False). If shard_equal_rows
@@ -4115,7 +4120,8 @@ class ManifestDataset(MappableDataset):
             class will be given a unique index starting from 0).
         decode (bool, optional): decode the images after reading (default=False).
         num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None).
+            into (default=None). When this argument is specified, 'num_samples' reflects
+            the max sample number of per shard.
         shard_id (int, optional): The shard ID within num_shards (default=None). This
             argument can only be specified when num_shards is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
@@ -4279,7 +4285,8 @@ class Cifar10Dataset(MappableDataset):
         sampler (Sampler, optional): Object used to choose samples from the
             dataset (default=None, expected order behavior shown in the table).
         num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None).
+            into (default=None). When this argument is specified, 'num_samples' reflects
+            the max sample number of per shard.
         shard_id (int, optional): The shard ID within num_shards (default=None). This
             argument can only be specified when num_shards is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
@@ -4420,7 +4427,8 @@ class Cifar100Dataset(MappableDataset):
         sampler (Sampler, optional): Object used to choose samples from the
             dataset (default=None, expected order behavior shown in the table).
         num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None).
+            into (default=None). When this argument is specified, 'num_samples' reflects
+            the max sample number of per shard.
         shard_id (int, optional): The shard ID within num_shards (default=None). This
             argument can only be specified when num_shards is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
@@ -4510,7 +4518,8 @@ class RandomDataset(SourceDataset):
         shuffle (bool, optional): Whether or not to perform shuffle on the dataset
             (default=None, expected order behavior shown in the table).
         num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None).
+            into (default=None). When this argument is specified, 'num_samples' reflects
+            the max sample number of per shard.
         shard_id (int, optional): The shard ID within num_shards (default=None). This
             argument can only be specified when num_shards is also specified.
     """
@@ -4766,7 +4775,8 @@ class VOCDataset(MappableDataset):
         sampler (Sampler, optional): Object used to choose samples from the dataset
             (default=None, expected order behavior shown in the table).
         num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None).
+            into (default=None). When this argument is specified, 'num_samples' reflects
+            the max sample number of per shard.
         shard_id (int, optional): The shard ID within num_shards (default=None). This
             argument can only be specified when num_shards is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
@@ -4962,7 +4972,8 @@ class CocoDataset(MappableDataset):
         sampler (Sampler, optional): Object used to choose samples from the dataset
             (default=None, expected order behavior shown in the table).
         num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None).
+            into (default=None). When this argument is specified, 'num_samples' reflects
+            the max sample number of per shard.
         shard_id (int, optional): The shard ID within num_shards (default=None). This
             argument can only be specified when num_shards is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
@@ -5115,7 +5126,8 @@ class CelebADataset(MappableDataset):
         num_samples (int, optional): The number of images to be included in the dataset.
             (default=None, all images).
         num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None).
+            into (default=None). When this argument is specified, 'num_samples' reflects
+            the max sample number of per shard.
         shard_id (int, optional): The shard ID within num_shards (default=None). This
             argument can only be specified when num_shards is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
@@ -5229,6 +5241,7 @@ class CLUEDataset(SourceDataset):
             - Shuffle.FILES: Shuffle files only.
 
         num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, 'num_samples' reflects the max sample number of per shard.
         shard_id (int, optional): The shard ID within num_shards (default=None). This
             argument can only be specified when num_shards is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
@@ -5463,6 +5476,7 @@ class CSVDataset(SourceDataset):
             - Shuffle.FILES: Shuffle files only.
 
         num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, 'num_samples' reflects the max sample number of per shard.
         shard_id (int, optional): The shard ID within num_shards (default=None). This
             argument can only be specified when num_shards is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
@@ -5575,6 +5589,7 @@ class TextFileDataset(SourceDataset):
             - Shuffle.FILES: Shuffle files only.
 
         num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, 'num_samples' reflects the max sample number of per shard.
         shard_id (int, optional): The shard ID within num_shards (default=None). This
             argument can only be specified when num_shards is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
@@ -5779,7 +5794,8 @@ class NumpySlicesDataset(GeneratorDataset):
         sampler (Union[Sampler, Iterable], optional): Object used to choose samples from the dataset. Random accessible
             input is required (default=None, expected order behavior shown in the table).
         num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, 'num_samples' will not used. Random accessible input is required.
+            Random accessible input is required. When this argument is specified, 'num_samples' reflects the max
+            sample number of per shard.
         shard_id (int, optional): The shard ID within num_shards (default=None). This argument must be specified only
             when num_shards is also specified. Random accessible input is required.
 
diff --git a/mindspore/dataset/engine/samplers.py b/mindspore/dataset/engine/samplers.py
index 1fa866ae29..69672e24a5 100644
--- a/mindspore/dataset/engine/samplers.py
+++ b/mindspore/dataset/engine/samplers.py
@@ -137,6 +137,20 @@ class BuiltinSampler:
         pass
 
     def add_child(self, sampler):
+        """
+        Add a sub-sampler for given sampler. The sub-sampler will receive all data from the
+        output of parent sampler and apply its sample logic to return new samples.
+
+        Args:
+            sampler (Sampler): Object used to choose samples from the dataset. Only builtin
+                samplers(DistributedSampler, PKSampler, RandomSampler, SequentialSampler,
+                SubsetRandomSampler, WeightedRandomSampler) are supported.
+
+        Examples:
+            >>> sampler = ds.SequentialSampler(start_index=0, num_samples=3)
+            >>> sampler.add_child(ds.RandomSampler(num_samples=2))
+            >>> dataset = ds.Cifar10Dataset(cifar10_dataset_dir, sampler=sampler)
+        """
         self.child_sampler = sampler
 
     def get_child(self):
@@ -448,7 +462,7 @@ class SequentialSampler(BuiltinSampler):
     Samples the dataset elements sequentially, same as not having a sampler.
 
     Args:
-        start_index (int, optional): Index to start sampling at. (dafault=None, start at first ID)
+        start_index (int, optional): Index to start sampling at. (default=None, start at first ID)
         num_samples (int, optional): Number of elements to sample (default=None, all elements).
 
     Examples:
diff --git a/tests/ut/python/dataset/test_sampler.py b/tests/ut/python/dataset/test_sampler.py
index 80688bed5e..258c2d907a 100644
--- a/tests/ut/python/dataset/test_sampler.py
+++ b/tests/ut/python/dataset/test_sampler.py
@@ -232,9 +232,9 @@ def test_add_sampler_invalid_input():
     assert "not an instance of a sampler" in str(info.value)
 
     sampler = ds.SequentialSampler()
-    with pytest.raises(ValueError) as info:
+    with pytest.raises(RuntimeError) as info:
         data2 = ds.ManifestDataset(manifest_file, sampler=sampler, num_samples=20)
-    assert "Conflicting arguments during sampler assignments" in str(info.value)
+    assert "sampler and num_samples cannot be specified at the same time" in str(info.value)
 
 
 def test_distributed_sampler_invalid_offset():