add common import

4 years ago · bfd306a6f2
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_op.cc
@@ -109,12 +109,15 @@ Status RandomCropAndResizeOp::GetCropBox(int h_in, int w_in, int *x, int *y, int
    // Note rnd_aspect_ is already a random distribution of the input aspect ratio in logarithmic sample_scale.
    double const sample_aspect = exp(rnd_aspect_(rnd_));

    CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() / h_in) > w_in,
                                 "RandomCropAndResizeOp: multiplication out of bounds");
    CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() / h_in / w_in) > sample_scale,
                                 "RandomCropAndResizeOp: multiplication out of bounds");
    CHECK_FAIL_RETURN_UNEXPECTED(
      (std::numeric_limits<int32_t>::max() / h_in) > w_in,
      "RandomCropAndResizeOp: multiplication out of bounds, check image width and image height first.");
    CHECK_FAIL_RETURN_UNEXPECTED(
      (std::numeric_limits<int32_t>::max() / h_in / w_in) > sample_scale,
      "RandomCropAndResizeOp: multiplication out of bounds, check image width, image height and sample scale first.");
    CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() / h_in / w_in / sample_scale) > sample_aspect,
                                 "RandomCropAndResizeOp: multiplication out of bounds");
                                 "RandomCropAndResizeOp: multiplication out of bounds, check image width, image "
                                 "height, sample scale and sample aspect first.");
    *crop_width = static_cast<int32_t>(std::round(std::sqrt(h_in * w_in * sample_scale * sample_aspect)));
    *crop_height = static_cast<int32_t>(std::round(*crop_width / sample_aspect));

--- a/mindspore/dataset/init.py
+++ b/mindspore/dataset/init.py
@@ -22,6 +22,13 @@ Besides, this module provides APIs to sample data while loading.
 We can enable cache in most of the dataset with its key arguments 'cache'. Please notice that cache is not supported
 on Windows platform yet. Do not use it while loading and processing data on Windows. More introductions and limitations
 can refer `Single-Node Tensor Cache <https://www.mindspore.cn/docs/programming_guide/en/master/cache.html>`_.

 Common imported modules in corresponding API examples are as follows:

 .. code-block::

    import mindspore.dataset as ds
    from mindspore.dataset.transforms import c_transforms
 """

 from .core import config
--- a/mindspore/dataset/audio/init.py
+++ b/mindspore/dataset/audio/init.py
@@ -17,6 +17,13 @@ This module is to support audio augmentations.
 It includes two parts: transforms and utils.
 transforms is a high performance processing module with common audio operations.
 utils provides some general methods for audio processing.

 Common imported modules in corresponding API examples are as follows:

 .. code-block::

    import mindspore.dataset as ds
    from mindspore.dataset import audio
 """
 from . import transforms
 from . import utils
--- a/mindspore/dataset/callback/ds_callback.py
+++ b/mindspore/dataset/callback/ds_callback.py
@@ -30,12 +30,15 @@ class DSCallback:
        step_size (int, optional): The number of steps between the step_begin and step_end are called (Default=1).

    Examples:
        >>> from mindspore.dataset import DSCallback
        >>>
        >>> class PrintInfo(DSCallback):
        >>>     def ds_epoch_end(self, ds_run_context):
        >>>         print(cb_params.cur_epoch_num)
        >>>         print(cb_params.cur_step_num)
        >>>
        >>> data = data.map(operations=op, callbacks=PrintInfo())
        >>> # dataset is an instance of Dataset object
        >>> dataset = dataset.map(operations=op, callbacks=PrintInfo())
    """

    @check_callback
@@ -127,9 +130,12 @@ class WaitedDSCallback(Callback, DSCallback):
           will be equal to the batch size (Default=1).

    Examples:
        >>> my_cb = MyWaitedCallback(32)
        >>> data = data.map(operations=AugOp(), callbacks=my_cb)
        >>> data = data.batch(32)
        >>> from mindspore.dataset import WaitedDSCallback
        >>>
        >>> my_cb = WaitedDSCallback(32)
        >>> # dataset is an instance of Dataset object
        >>> dataset = dataset.map(operations=AugOp(), callbacks=my_cb)
        >>> dataset = dataset.batch(32)
        >>> # define the model
        >>> model.train(epochs, data, callbacks=[my_cb])
    """
--- a/mindspore/dataset/core/config.py
+++ b/mindspore/dataset/core/config.py
@@ -15,6 +15,12 @@
 """
 The configuration module provides various functions to set and get the supported
 configuration parameters, and read a configuration file.

 Common imported modules in corresponding API examples are as follows:

 .. code-block::

    import mindspore.dataset as ds
 """
 import os
 import platform
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -264,7 +264,7 @@ class Dataset:
    def close_pool(self):
        """
        Close multiprocessing pool in dataset. If you are familiar with multiprocessing library, you can regard this
        as a deconstructor for a processingPool object.
        as a destructor for a processingPool object.
        """
        if hasattr(self, 'process_pool') and self.process_pool is not None:
            self.process_pool.close()
@@ -587,7 +587,7 @@ class Dataset:
            RuntimeError: If exist sync operators before shuffle.

        Examples:
            >>> # dataset is an instance of Dataset object.
            >>> # dataset is an instance object of Dataset
            >>> # Optionally set the seed for the first epoch
            >>> ds.config.set_seed(58)
            >>> # Create a shuffled dataset using a shuffle buffer of size 4
@@ -823,7 +823,7 @@ class Dataset:
            RepeatDataset, dataset repeated.

        Examples:
            >>> # dataset is an instance of Dataset object.
            >>> # dataset is an instance object of Dataset
            >>>
            >>> # Create a dataset where the dataset is repeated for 50 epochs
            >>> dataset = dataset.repeat(50)
@@ -852,7 +852,7 @@ class Dataset:
            SkipDataset, dataset that containing rows like origin rows subtract skipped rows.

        Examples:
            >>> # dataset is an instance of Dataset object.
            >>> # dataset is an instance object of Dataset
            >>> # Create a dataset which skips first 3 elements from data
            >>> dataset = dataset.skip(3)
        """
@@ -876,7 +876,7 @@ class Dataset:
            TakeDataset, dataset taken.

        Examples:
            >>> # dataset is an instance of Dataset object.
            >>> # dataset is an instance object of Dataset
            >>> # Create a dataset where the dataset includes 50 elements.
            >>> dataset = dataset.take(50)
        """
@@ -1085,7 +1085,7 @@ class Dataset:
            RenameDataset, dataset renamed.

        Examples:
            >>> # dataset is an instance of Dataset object.
            >>> # dataset is an instance object of Dataset
            >>> input_columns = ["input_col1", "input_col2", "input_col3"]
            >>> output_columns = ["output_col1", "output_col2", "output_col3"]
            >>>
@@ -1112,7 +1112,7 @@ class Dataset:
            ProjectDataset, dataset projected.

        Examples:
            >>> # dataset is an instance of Dataset object
            >>> # dataset is an instance object of Dataset
            >>> columns_to_project = ["column3", "column1", "column2"]
            >>>
            >>> # Create a dataset that consists of column3, column1, column2
@@ -1135,28 +1135,30 @@ class Dataset:
            freq_range(tuple[int]): A tuple of integers (min_frequency, max_frequency). Words within the frequency
                range will be stored.
                Naturally 0 <= min_frequency <= max_frequency <= total_words. min_frequency/max_frequency
                an be set to default, which corresponds to 0/total_words separately
                can be set to default, which corresponds to 0/total_words separately.
            top_k(int): Number of words to be built into vocab. top_k most frequent words are
                taken. The top_k is taken after freq_range. If not enough top_k, all words will be taken
            special_tokens(list[str]): A list of strings, each one is a special token
            special_tokens(list[str]): A list of strings, each one is a special token.
            special_first(bool): Whether special_tokens will be prepended/appended to vocab, If special_tokens
                is specified and special_first is set to default, special_tokens will be prepended
                is specified and special_first is set to default, special_tokens will be prepended.

        Returns:
            Vocab, vocab built from the dataset.

        Example:
            >>> import numpy as np
            >>>
            >>> def gen_corpus():
            ...     # key: word, value: number of occurrences, reason for using letters is so their order is apparent
            ...     corpus = {"Z": 4, "Y": 4, "X": 4, "W": 3, "U": 3, "V": 2, "T": 1}
            ...     for k, v in corpus.items():
            ...         yield (np.array([k] * v, dtype='S'),)
            >>> column_names = ["column1", "column2", "column3"]
            >>> column_names = ["column1"]
            >>> dataset = ds.GeneratorDataset(gen_corpus, column_names)
            >>> dataset = dataset.build_vocab(columns=["column3", "column1", "column2"],
            >>> dataset = dataset.build_vocab(columns=["column1"],
            ...                               freq_range=(1, 10), top_k=5,
            ...                               special_tokens=["<pad>", "<unk>"],
            ...                               special_first=True,vocab='vocab')
            ...                               special_first=True)

        """
        vocab = cde.Vocab()
@@ -1213,6 +1215,7 @@ class Dataset:

        Example:
            >>> from mindspore.dataset.text import SentencePieceModel
            >>>
            >>> def gen_corpus():
            ...     # key: word, value: number of occurrences, reason for using letters is so their order is apparent
            ...     corpus = {"Z": 4, "Y": 4, "X": 4, "W": 3, "U": 3, "V": 2, "T": 1}
@@ -1223,8 +1226,8 @@ class Dataset:
            >>> dataset = dataset.build_sentencepiece_vocab(columns=["column3", "column1", "column2"],
            ...                                             vocab_size=5000,
            ...                                             character_coverage=0.9995,
            ...                                             model_type=SentencePieceModel.Unigram,
            ...                                             params={},vocab='vocab')
            ...                                             model_type=SentencePieceModel.UNIGRAM,
            ...                                             params={})
        """
        vocab = cde.SentencePieceVocab()

@@ -1253,13 +1256,13 @@ class Dataset:

        Args:
            apply_func (function): A function that must take one 'Dataset' as an argument and
                                   return a preprogressing 'Dataset'.
                                   return a preprocessed 'Dataset'.

        Returns:
            Dataset, dataset applied by the function.

        Examples:
            >>> # dataset is an instance of Dataset object
            >>> # dataset is an instance object of Dataset
            >>>
            >>> # Declare an apply_func function which returns a Dataset object
            >>> def apply_func(data):
@@ -1427,7 +1430,7 @@ class Dataset:
            TupleIterator, tuple iterator over the dataset.

        Examples:
            >>> # dataset is an instance of Dataset object
            >>> # dataset is an instance object of Dataset
            >>> iterator = dataset.create_tuple_iterator()
            >>> for item in iterator:
            ...     # item is a list
@@ -1459,7 +1462,7 @@ class Dataset:
            DictIterator, dictionary iterator over the dataset.

        Examples:
            >>> # dataset is an instance of Dataset object
            >>> # dataset is an instance object of Dataset
            >>> iterator = dataset.create_dict_iterator()
            >>> for item in iterator:
            ...     # item is a dict
@@ -1487,7 +1490,7 @@ class Dataset:
            tuple, tuple of the input index information.

        Examples:
            >>> # dataset is an instance of Dataset object
            >>> # dataset is an instance object of Dataset
            >>> # set input_indexs
            >>> dataset.input_indexs = 10
            >>> print(dataset.input_indexs)
@@ -1939,6 +1942,7 @@ class MappableDataset(SourceDataset):
            new_sampler (Sampler): The sampler to use for the current dataset.

        Examples:
            >>> # dataset is an instance object of Dataset
            >>> # use a DistributedSampler instead
            >>> new_sampler = ds.DistributedSampler(10, 2)
            >>> dataset.use_sampler(new_sampler)
@@ -1987,8 +1991,8 @@ class MappableDataset(SourceDataset):
            1. There is an optimized split function, which will be called automatically when the dataset
               that calls this function is a MappableDataset.
            2. Dataset should not be sharded if split is going to be called. Instead, create a
               DistributedSampler and specify a split to shard after splitting. If dataset is
               sharded after a split, it is strongly recommended to set the same seed in each instance
               DistributedSampler and specify a split to shard after splitting. If the dataset is
               sharded after a split, it is strongly recommended setting the same seed in each instance
               of execution, otherwise each shard may not be part of the same split (see Examples).
            3. It is strongly recommended to not shuffle the dataset, but use randomize=True instead.
               Shuffling the dataset may not be deterministic, which means the data in each split
--- a/mindspore/dataset/engine/graphdata.py
+++ b/mindspore/dataset/engine/graphdata.py
@@ -333,7 +333,7 @@ class GraphData:
        next-hop sampling. A maximum of 6-hop are allowed.

        The sampling result is tiled into a list in the format of [input node, 1-hop sampling result,
        2-hop samling result ...]
        2-hop sampling result ...]

        Args:
            node_list (Union[list, numpy.ndarray]): The given list of nodes.
--- a/mindspore/dataset/text/init.py
+++ b/mindspore/dataset/text/init.py
@@ -16,6 +16,13 @@ This module is to support text processing for NLP. It includes two parts:
 transforms and utils. transforms is a high performance
 NLP text processing module which is developed with ICU4C and cppjieba.
 utils provides some general methods for NLP text processing.

 Common imported modules in corresponding API examples are as follows:

 .. code-block::

    import mindspore.dataset as ds
    from mindspore.dataset import text
 """
 import platform
 from .transforms import Lookup, JiebaTokenizer, UnicodeCharTokenizer, Ngram, WordpieceTokenizer, \
--- a/mindspore/dataset/text/utils.py
+++ b/mindspore/dataset/text/utils.py
@@ -66,6 +66,13 @@ class Vocab(cde.Vocab):

        Returns:
            Vocab, vocab built from the dataset.

        Examples:
            >>> dataset = ds.TextFileDataset("/path/to/sentence/piece/vocab/file", shuffle=False)
            >>> vocab = text.Vocab.from_dataset(dataset, "text", freq_range=None, top_k=None,
            ...                                 special_tokens=["<pad>", "<unk>"],
            ...                                 special_first=True)
            >>> dataset = dataset.map(operations=text.Lookup(vocab, "<unk>"), input_columns=["text"])
        """
        return dataset.build_vocab(columns, freq_range, top_k, special_tokens, special_first)

@@ -84,6 +91,9 @@ class Vocab(cde.Vocab):

        Returns:
            Vocab, vocab built from the `list`.

        Examples:
            >>> vocab = text.Vocab.from_list(["w1", "w2", "w3"], special_tokens=["<unk>"], special_first=True)
        """
        if special_tokens is None:
            special_tokens = []
@@ -108,6 +118,9 @@ class Vocab(cde.Vocab):

        Returns:
            Vocab, vocab built from the file.

        Examples:
            >>> vocab = text.Vocab.from_file("/path/to/wordpiece/vocab/file", ",", None, ["<pad>", "<unk>"], True)
        """
        if vocab_size is None:
            vocab_size = -1
@@ -127,6 +140,9 @@ class Vocab(cde.Vocab):

        Returns:
            Vocab, vocab built from the `dict`.

        Examples:
            >>> vocab = text.Vocab.from_dict({"home": 3, "behind": 2, "the": 4, "world": 5, "<unk>": 6})
        """

        return super().from_dict(word_dict)
@@ -165,6 +181,11 @@ class SentencePieceVocab(cde.SentencePieceVocab):

        Returns:
            SentencePieceVocab, vocab built from the dataset.

        Examples:
            >>> dataset = ds.TextFileDataset("/path/to/sentence/piece/vocab/file", shuffle=False)
            >>> vocab = text.SentencePieceVocab.from_dataset(dataset, ["text"], 5000, 0.9995,
            ...                                              SentencePieceModel.UNIGRAM, {})
        """

        return dataset.build_sentencepiece_vocab(col_names, vocab_size, character_coverage,
@@ -203,6 +224,10 @@ class SentencePieceVocab(cde.SentencePieceVocab):

        Returns:
            SentencePieceVocab, vocab built from the file.

        Examples:
            >>> vocab = text.SentencePieceVocab.from_file(["/path/to/sentence/piece/vocab/file"], 5000, 0.9995,
            ...                                           SentencePieceModel.UNIGRAM, {})
        """
        return super().from_file(file_path, vocab_size, character_coverage,
                                 DE_C_INTER_SENTENCEPIECE_MODE[model_type], params)
@@ -217,6 +242,11 @@ class SentencePieceVocab(cde.SentencePieceVocab):
            vocab(SentencePieceVocab): A SentencePiece object.
            path(str): Path to store model.
            filename(str): The name of the file.

        Examples:
            >>> vocab = text.SentencePieceVocab.from_file(["/path/to/sentence/piece/vocab/file"], 5000, 0.9995,
            ...                                           SentencePieceModel.UNIGRAM, {})
            >>> text.SentencePieceVocab.save_model(vocab, "./", "m.model")
        """
        super().save_model(vocab, path, filename)

@@ -231,6 +261,11 @@ def to_str(array, encoding='utf8'):

    Returns:
        numpy.ndarray, NumPy array of `str`.

    Examples:
        >>> dataset = ds.TextFileDataset("/path/to/text_file_dataset_file", shuffle=False)
        >>> for item in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
        >>>     print(text.to_str(item["text"]))
    """

    if not isinstance(array, np.ndarray):
--- a/mindspore/dataset/transforms/init.py
+++ b/mindspore/dataset/transforms/init.py
@@ -15,6 +15,15 @@
 This module is to support common augmentations. C_transforms is a high performance
 image augmentation module which is developed with C++ OpenCV. Py_transforms
 provide more kinds of image augmentations which is developed with Python PIL.

 Common imported modules in corresponding API examples are as follows:

 .. code-block::

    import mindspore.dataset as ds
    import mindspore.dataset.vision.c_transforms as c_vision
    from mindspore.dataset.transforms import c_transforms
    from mindspore.dataset.transforms import py_transforms
 """
 from .. import vision
 from . import c_transforms
--- a/mindspore/dataset/vision/init.py
+++ b/mindspore/dataset/vision/init.py
@@ -16,6 +16,14 @@ This module is to support vision augmentations. It includes two parts:
 c_transforms and py_transforms. C_transforms is a high performance
 image augmentation module which is developed with c++ opencv. Py_transforms
 provide more kinds of image augmentations which is developed with Python PIL.

 Common imported modules in corresponding API examples are as follows:

 .. code-block::

    import mindspore.dataset.vision.c_transforms as c_vision
    import mindspore.dataset.vision.py_transforms as py_vision
    from mindspore.dataset.transforms import c_transforms
 """
 from . import c_transforms
 from . import py_transforms