Fix CI warning and test team issues

Signed-off-by: alex-yuyue <yue.yu1@huawei.com>
4 years ago · 2457f52596
--- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/ir/image/bindings.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/ir/image/bindings.cc
@@ -492,6 +492,5 @@ PYBIND_REGISTER(
        return uniform_aug;
      }));
  }));
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/text/kernels/ir/bindings.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/text/kernels/ir/bindings.cc
@@ -25,7 +25,6 @@
 namespace mindspore {
 namespace dataset {
 #ifdef ENABLE_ICU4C
 PYBIND_REGISTER(
@@ -262,6 +261,5 @@ PYBIND_REGISTER(SPieceTokenizerOutType, 0, ([](const py::module *m) {
                    .value("DE_SPIECE_TOKENIZER_OUTTYPE_KINT", SPieceTokenizerOutType::kInt)
                    .export_values();
                }));
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/dataset/engine/samplers.py
+++ b/mindspore/dataset/engine/samplers.py
@@ -605,7 +605,7 @@ class SubsetSampler(BuiltinSampler):
    Samples the elements from a sequence of indices.
    Args:
        indices (list[int]): A sequence of indices.
        indices (Any iterable python object but string): A sequence of indices.
        num_samples (int, optional): Number of elements to sample (default=None, all elements).
    Examples:
@@ -633,6 +633,13 @@ class SubsetSampler(BuiltinSampler):
            return [sample_id for sample_id, _ in zip(sampler, range(number_of_samples))]
        if num_samples is not None:
            if not isinstance(num_samples, int):
                raise TypeError("num_samples must be integer but was: {}.".format(num_samples))
            if num_samples < 0 or num_samples > validator.INT64_MAX:
                raise ValueError("num_samples exceeds the boundary between {} and {}(INT64_MAX)!"
                                 .format(0, validator.INT64_MAX))
        if not isinstance(indices, str) and validator.is_iterable(indices):
            indices = _get_sample_ids_as_list(indices, num_samples)
        elif isinstance(indices, int):
@@ -645,13 +652,6 @@ class SubsetSampler(BuiltinSampler):
                raise TypeError("SubsetSampler: Type of indices element must be int, "
                                "but got list[{}]: {}, type: {}.".format(i, item, type(item)))
        if num_samples is not None:
            if not isinstance(num_samples, int):
                raise TypeError("num_samples must be integer but was: {}.".format(num_samples))
            if num_samples < 0 or num_samples > validator.INT64_MAX:
                raise ValueError("num_samples exceeds the boundary between {} and {}(INT64_MAX)!"
                                 .format(0, validator.INT64_MAX))
        self.indices = indices
        super().__init__(num_samples)
--- a/mindspore/dataset/transforms/c_transforms.py
+++ b/mindspore/dataset/transforms/c_transforms.py
@@ -31,7 +31,13 @@ class TensorOperation:
    Base class Tensor Ops
    """
    def __call__(self, *input_tensor_list):
        tensor_row = [cde.Tensor(np.asarray(tensor)) for tensor in input_tensor_list]
        tensor_row = []
        for tensor in input_tensor_list:
            try:
                tensor_row.append(cde.Tensor(np.asarray(tensor)))
            except RuntimeError:
                raise TypeError("Invalid user input. Got {}: {}, cannot be converted into tensor." \
                      .format(type(tensor), tensor))
        callable_op = cde.Execute(self.parse())
        output_tensor_list = callable_op(tensor_row)
        for i, element in enumerate(output_tensor_list):
--- a/mindspore/dataset/vision/c_transforms.py
+++ b/mindspore/dataset/vision/c_transforms.py
@@ -1197,12 +1197,13 @@ class RandomSharpness(ImageTensorOperation):
 class RandomSolarize(ImageTensorOperation):
    """
    Invert all pixel values with given range.
    Randomly invert the pixel values of input image within given range.
    Args:
        threshold (tuple, optional): Range of random solarize threshold. Threshold values should always be
            in the range (0, 255), include at least one integer value in the given range and be in
            (min, max) format. If min=max, then invert all pixel values above min(max) (default=(0, 255)).
        threshold (tuple, optional): Range of random solarize threshold (default=(0, 255)).
            Threshold values should always be in (min, max) format,
            where min <= max, min and max are integers in the range (0, 255).
            If min=max, then invert all pixel values above min(max).
    Examples:
        >>> transforms_list = [c_vision.Decode(), c_vision.RandomSolarize(threshold=(10,100))]
--- a/tests/ut/python/dataset/test_sentencepiece_tokenizer.py
+++ b/tests/ut/python/dataset/test_sentencepiece_tokenizer.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 import copy
 import numpy as np
 import mindspore.dataset.text as text
 import mindspore.dataset as ds
 from mindspore.dataset.text import SentencePieceModel, to_str, SPieceTokenizerOutType
@@ -21,6 +22,13 @@ VOCAB_FILE = "../data/dataset/test_sentencepiece/botchan.txt"
 DATA_FILE = "../data/dataset/testTokenizerData/sentencepiece_tokenizer.txt"
 def test_sentence_piece_tokenizer_callable():
    vocab = text.SentencePieceVocab.from_file([VOCAB_FILE], 5000, 0.9995, SentencePieceModel.UNIGRAM, {})
    tokenizer = text.SentencePieceTokenizer(vocab, out_type=SPieceTokenizerOutType.STRING)
    data = '123'
    assert np.array_equal(tokenizer(data), ['▁', '12', '3'])
 def test_from_vocab_to_str_UNIGRAM():
    vocab = text.SentencePieceVocab.from_file([VOCAB_FILE], 5000, 0.9995, SentencePieceModel.UNIGRAM, {})
    tokenizer = text.SentencePieceTokenizer(vocab, out_type=SPieceTokenizerOutType.STRING)
@@ -160,6 +168,7 @@ def test_with_zip_concat():
 if __name__ == "__main__":
    test_sentence_piece_tokenizer_callable()
    test_from_vocab_to_str_UNIGRAM()
    test_from_vocab_to_str_BPE()
    test_from_vocab_to_str_CHAR()
--- a/tests/ut/python/dataset/test_text_bert_tokenizer.py
+++ b/tests/ut/python/dataset/test_text_bert_tokenizer.py
@@ -16,6 +16,7 @@
 Testing BertTokenizer op in DE
 """
 import numpy as np
 import pytest
 import mindspore.dataset as ds
 from mindspore import log as logger
 import mindspore.dataset.text as text
@@ -127,7 +128,7 @@ test_paras = [
        preserve_unused_token=True,
        vocab_list=vocab_bert
    ),
    # test non-default parms
    # test non-default params
    dict(
        first=8,
        last=8,
@@ -242,6 +243,19 @@ def test_bert_tokenizer_with_offsets():
        check_bert_tokenizer_with_offsets(**paras)
 def test_bert_tokenizer_callable_invalid_input():
    """
    Test WordpieceTokenizer in eager mode with invalid input
    """
    data = {'张三': 18, '王五': 20}
    vocab = text.Vocab.from_list(vocab_bert)
    tokenizer_op = text.BertTokenizer(vocab=vocab)
    with pytest.raises(TypeError) as info:
        _ = tokenizer_op(data)
    assert "Invalid user input. Got <class 'dict'>: {'张三': 18, '王五': 20}, cannot be converted into tensor." in str(info)
 if __name__ == '__main__':
    test_bert_tokenizer_callable_invalid_input()
    test_bert_tokenizer_default()
    test_bert_tokenizer_with_offsets()
--- a/tests/ut/python/dataset/test_to_number_op.py
+++ b/tests/ut/python/dataset/test_to_number_op.py
@@ -52,9 +52,10 @@ def test_to_number_eager():
    # test input invalid tensor
    invalid_input = [["1", "2", "3"], ["4", "5"]]
    with pytest.raises(RuntimeError) as info:
    with pytest.raises(TypeError) as info:
        _ = op(invalid_input)
    assert "Invalid data type." in str(info.value)
    assert "Invalid user input. Got <class 'list'>: [['1', '2', '3'], ['4', '5']], cannot be converted into tensor" in \
           str(info.value)
 def test_to_number_typical_case_integral():