Browse Source

!12398 fix minddata issues

From: @luoyang42
Reviewed-by: @heleiwang,@pandoublefeng
Signed-off-by: @pandoublefeng
tags/v1.2.0-rc1
mindspore-ci-bot Gitee 4 years ago
parent
commit
f940fe51b1
10 changed files with 63 additions and 36 deletions
  1. +1
    -0
      cmake/package.cmake
  2. +5
    -0
      mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/generator_node.cc
  3. +0
    -6
      mindspore/ccsrc/minddata/dataset/kernels/image/sharpness_op.cc
  4. +3
    -2
      mindspore/dataset/core/validator_helpers.py
  5. +37
    -12
      mindspore/dataset/engine/samplers.py
  6. +13
    -12
      mindspore/dataset/vision/c_transforms.py
  7. +1
    -1
      tests/ut/python/dataset/test_datasets_textfileop.py
  8. +1
    -1
      tests/ut/python/dataset/test_exceptions.py
  9. +1
    -1
      tests/ut/python/dataset/test_minddataset_exception.py
  10. +1
    -1
      tests/ut/python/dataset/test_sampler.py

+ 1
- 0
cmake/package.cmake View File

@@ -326,6 +326,7 @@ install(
${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/transforms.h ${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/transforms.h
${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/vision.h ${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/vision.h
${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/vision_lite.h ${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/vision_lite.h
${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/vision_ascend.h
${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/execute.h ${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/execute.h
DESTINATION ${INSTALL_BASE_DIR}/include/minddata/dataset/include DESTINATION ${INSTALL_BASE_DIR}/include/minddata/dataset/include
COMPONENT mindspore COMPONENT mindspore


+ 5
- 0
mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/generator_node.cc View File

@@ -105,6 +105,11 @@ Status GeneratorNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_
// no validation is needed for generator op. // no validation is needed for generator op.
Status GeneratorNode::ValidateParams() { Status GeneratorNode::ValidateParams() {
RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); RETURN_IF_NOT_OK(DatasetNode::ValidateParams());
if (source_len_ == 0) {
std::string err_msg = "GeneratorNode: data row of input source must not be 0, got: " + std::to_string(source_len_);
MS_LOG(ERROR) << err_msg;
RETURN_STATUS_SYNTAX_ERROR(err_msg);
}
return Status::OK(); return Status::OK();
} }




+ 0
- 6
mindspore/ccsrc/minddata/dataset/kernels/image/sharpness_op.cc View File

@@ -38,12 +38,6 @@ Status SharpnessOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_pt
RETURN_STATUS_UNEXPECTED("Sharpness: image shape is not <H,W,C> or <H,W>"); RETURN_STATUS_UNEXPECTED("Sharpness: image shape is not <H,W,C> or <H,W>");
} }


/// Get number of channels and image matrix
std::size_t num_of_channels = input_cv->shape()[2];
if (num_of_channels != 1 && num_of_channels != 3) {
RETURN_STATUS_UNEXPECTED("Sharpness: image channel is not 1 or 3.");
}

/// creating a smoothing filter. 1, 1, 1, /// creating a smoothing filter. 1, 1, 1,
/// 1, 5, 1, /// 1, 5, 1,
/// 1, 1, 1 /// 1, 1, 1


+ 3
- 2
mindspore/dataset/core/validator_helpers.py View File

@@ -344,8 +344,9 @@ def check_num_parallel_workers(value):


def check_num_samples(value): def check_num_samples(value):
type_check(value, (int,), "num_samples") type_check(value, (int,), "num_samples")
check_value(value, [0, INT32_MAX], "num_samples")

if value < 0 or value > INT64_MAX:
raise ValueError(
"num_samples exceeds the boundary between {} and {}(INT64_MAX)!".format(0, INT64_MAX))


def validate_dataset_param_value(param_list, param_dict, param_type): def validate_dataset_param_value(param_list, param_dict, param_type):
for param_name in param_list: for param_name in param_list:


+ 37
- 12
mindspore/dataset/engine/samplers.py View File

@@ -23,6 +23,7 @@ import numbers
import numpy as np import numpy as np
import mindspore._c_dataengine as cde import mindspore._c_dataengine as cde
import mindspore.dataset as ds import mindspore.dataset as ds
from ..core import validator_helpers as validator




def select_sampler(num_samples, input_sampler, shuffle, num_shards, shard_id): def select_sampler(num_samples, input_sampler, shuffle, num_shards, shard_id):
@@ -349,8 +350,12 @@ class DistributedSampler(BuiltinSampler):
if not isinstance(shuffle, bool): if not isinstance(shuffle, bool):
raise TypeError("shuffle must be a boolean value but was: {}.".format(shuffle)) raise TypeError("shuffle must be a boolean value but was: {}.".format(shuffle))


if num_samples is not None and not isinstance(num_samples, int):
raise TypeError("num_samples must be integer but was: {}.".format(num_samples))
if num_samples is not None:
if not isinstance(num_samples, int):
raise TypeError("num_samples must be integer but was: {}.".format(num_samples))
if num_samples < 0 or num_samples > validator.INT64_MAX:
raise ValueError("num_samples exceeds the boundary between {} and {}(INT64_MAX)!"
.format(0, validator.INT64_MAX))


if not isinstance(offset, int): if not isinstance(offset, int):
raise TypeError("offset must be integer but was: {}.".format(offset)) raise TypeError("offset must be integer but was: {}.".format(offset))
@@ -441,8 +446,12 @@ class PKSampler(BuiltinSampler):
if not isinstance(class_column, str): if not isinstance(class_column, str):
raise TypeError("class_column must be a str value but was: {}.".format(class_column)) raise TypeError("class_column must be a str value but was: {}.".format(class_column))


if num_samples is not None and not isinstance(num_samples, int):
raise TypeError("num_samples must be integer but was: {}.".format(num_samples))
if num_samples is not None:
if not isinstance(num_samples, int):
raise TypeError("num_samples must be integer but was: {}.".format(num_samples))
if num_samples < 0 or num_samples > validator.INT64_MAX:
raise ValueError("num_samples exceeds the boundary between {} and {}(INT64_MAX)!"
.format(0, validator.INT64_MAX))


self.num_val = num_val self.num_val = num_val
self.shuffle = shuffle self.shuffle = shuffle
@@ -505,8 +514,12 @@ class RandomSampler(BuiltinSampler):
if not isinstance(replacement, bool): if not isinstance(replacement, bool):
raise TypeError("replacement must be a boolean value but was: {}.".format(replacement)) raise TypeError("replacement must be a boolean value but was: {}.".format(replacement))


if num_samples is not None and not isinstance(num_samples, int):
raise TypeError("num_samples must be integer but was: {}.".format(num_samples))
if num_samples is not None:
if not isinstance(num_samples, int):
raise TypeError("num_samples must be integer but was: {}.".format(num_samples))
if num_samples < 0 or num_samples > validator.INT64_MAX:
raise ValueError("num_samples exceeds the boundary between {} and {}(INT64_MAX)!"
.format(0, validator.INT64_MAX))


self.deterministic = False self.deterministic = False
self.replacement = replacement self.replacement = replacement
@@ -564,8 +577,12 @@ class SequentialSampler(BuiltinSampler):
if start_index is not None and not isinstance(start_index, int): if start_index is not None and not isinstance(start_index, int):
raise TypeError("start_index must be integer but was: {}.".format(start_index)) raise TypeError("start_index must be integer but was: {}.".format(start_index))


if num_samples is not None and not isinstance(num_samples, int):
raise TypeError("num_samples must be integer but was: {}.".format(num_samples))
if num_samples is not None:
if not isinstance(num_samples, int):
raise TypeError("num_samples must be integer but was: {}.".format(num_samples))
if num_samples < 0 or num_samples > validator.INT64_MAX:
raise ValueError("num_samples exceeds the boundary between {} and {}(INT64_MAX)!"
.format(0, validator.INT64_MAX))


self.start_index = start_index self.start_index = start_index
super().__init__(num_samples) super().__init__(num_samples)
@@ -631,8 +648,12 @@ class SubsetSampler(BuiltinSampler):
raise TypeError("type of indices element must be number, " raise TypeError("type of indices element must be number, "
"but got w[{}]: {}, type: {}.".format(i, item, type(item))) "but got w[{}]: {}, type: {}.".format(i, item, type(item)))


if num_samples is not None and not isinstance(num_samples, int):
raise TypeError("num_samples must be integer but was: {}.".format(num_samples))
if num_samples is not None:
if not isinstance(num_samples, int):
raise TypeError("num_samples must be integer but was: {}.".format(num_samples))
if num_samples < 0 or num_samples > validator.INT64_MAX:
raise ValueError("num_samples exceeds the boundary between {} and {}(INT64_MAX)!"
.format(0, validator.INT64_MAX))


self.indices = indices self.indices = indices
super().__init__(num_samples) super().__init__(num_samples)
@@ -744,8 +765,12 @@ class WeightedRandomSampler(BuiltinSampler):
raise TypeError("type of weights element must be number, " raise TypeError("type of weights element must be number, "
"but got w[{}]: {}, type: {}.".format(ind, w, type(w))) "but got w[{}]: {}, type: {}.".format(ind, w, type(w)))


if num_samples is not None and not isinstance(num_samples, int):
raise TypeError("num_samples must be integer but was: {}.".format(num_samples))
if num_samples is not None:
if not isinstance(num_samples, int):
raise TypeError("num_samples must be integer but was: {}.".format(num_samples))
if num_samples < 0 or num_samples > validator.INT64_MAX:
raise ValueError("num_samples exceeds the boundary between {} and {}(INT64_MAX)!"
.format(0, validator.INT64_MAX))


if not isinstance(replacement, bool): if not isinstance(replacement, bool):
raise TypeError("replacement must be a boolean value but was: {}.".format(replacement)) raise TypeError("replacement must be a boolean value but was: {}.".format(replacement))


+ 13
- 12
mindspore/dataset/vision/c_transforms.py View File

@@ -444,10 +444,9 @@ class Pad(ImageTensorOperation):
If 4 values are provided as a list or tuple, If 4 values are provided as a list or tuple,
it pads the left, top, right and bottom respectively. it pads the left, top, right and bottom respectively.
fill_value (Union[int, tuple], optional): The pixel intensity of the borders, only valid for fill_value (Union[int, tuple], optional): The pixel intensity of the borders, only valid for
padding_mode Border.CONSTANT (default=0).
padding_mode Border.CONSTANT. If it is a 3-tuple, it is used to fill R, G, B channels respectively.
If it is an integer, it is used for all RGB channels. If it is an integer, it is used for all RGB channels.
If it is a 3-tuple, it is used to fill R, G, B channels respectively.
The fill_value values must be in range [0, 255].
The fill_value values must be in range [0, 255] (default=0).
padding_mode (Border mode, optional): The method of padding (default=Border.CONSTANT). Can be any of padding_mode (Border mode, optional): The method of padding (default=Border.CONSTANT). Can be any of
[Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC]. [Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC].


@@ -684,9 +683,10 @@ class RandomCrop(ImageTensorOperation):
pad the left, top, right and bottom respectively. pad the left, top, right and bottom respectively.
pad_if_needed (bool, optional): Pad the image if either side is smaller than pad_if_needed (bool, optional): Pad the image if either side is smaller than
the given output size (default=False). the given output size (default=False).
fill_value (Union[int, tuple], optional): The pixel intensity of the borders if
the padding_mode is Border.CONSTANT (default=0). If it is a 3-tuple, it is used to
fill R, G, B channels respectively.
fill_value (Union[int, tuple], optional): The pixel intensity of the borders, only valid for
padding_mode Border.CONSTANT. If it is a 3-tuple, it is used to fill R, G, B channels respectively.
If it is an integer, it is used for all RGB channels.
The fill_value values must be in range [0, 255] (default=0).
padding_mode (Border mode, optional): The method of padding (default=Border.CONSTANT). It can be any of padding_mode (Border mode, optional): The method of padding (default=Border.CONSTANT). It can be any of
[Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC]. [Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC].


@@ -799,9 +799,10 @@ class RandomCropWithBBox(ImageTensorOperation):
If 4 values are provided as a list or tuple, pad the left, top, right and bottom respectively. If 4 values are provided as a list or tuple, pad the left, top, right and bottom respectively.
pad_if_needed (bool, optional): Pad the image if either side is smaller than pad_if_needed (bool, optional): Pad the image if either side is smaller than
the given output size (default=False). the given output size (default=False).
fill_value (Union[int, tuple], optional): The pixel intensity of the borders if
the padding_mode is Border.CONSTANT (default=0). If it is a 3-tuple, it is used to
fill R, G, B channels respectively.
fill_value (Union[int, tuple], optional): The pixel intensity of the borders, only valid for
padding_mode Border.CONSTANT. If it is a 3-tuple, it is used to fill R, G, B channels respectively.
If it is an integer, it is used for all RGB channels.
The fill_value values must be in range [0, 255] (default=0).
padding_mode (Border mode, optional): The method of padding (default=Border.CONSTANT). It can be any of padding_mode (Border mode, optional): The method of padding (default=Border.CONSTANT). It can be any of
[Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC]. [Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC].


@@ -1105,10 +1106,10 @@ class RandomRotation(ImageTensorOperation):
Note that the expand flag assumes rotation around the center and no translation. Note that the expand flag assumes rotation around the center and no translation.
center (tuple, optional): Optional center of rotation (a 2-tuple) (default=None). center (tuple, optional): Optional center of rotation (a 2-tuple) (default=None).
Origin is the top left corner. None sets to the center of the image. Origin is the top left corner. None sets to the center of the image.
fill_value (Union[int, tuple], optional): Optional fill color for the area outside the rotated image
(default=0).
If it is a 3-tuple, it is used for R, G, B channels respectively.
fill_value (Union[int, tuple], optional): Optional fill color for the area outside the rotated image.
If it is a 3-tuple, it is used to fill R, G, B channels respectively.
If it is an integer, it is used for all RGB channels. If it is an integer, it is used for all RGB channels.
The fill_value values must be in range [0, 255] (default=0).


Examples: Examples:
>>> from mindspore.dataset.vision import Inter >>> from mindspore.dataset.vision import Inter


+ 1
- 1
tests/ut/python/dataset/test_datasets_textfileop.py View File

@@ -195,7 +195,7 @@ def test_textline_dataset_to_device():
def test_textline_dataset_exceptions(): def test_textline_dataset_exceptions():
with pytest.raises(ValueError) as error_info: with pytest.raises(ValueError) as error_info:
_ = ds.TextFileDataset(DATA_FILE, num_samples=-1) _ = ds.TextFileDataset(DATA_FILE, num_samples=-1)
assert "Input num_samples is not within the required interval" in str(error_info.value)
assert "num_samples exceeds the boundary" in str(error_info.value)


with pytest.raises(ValueError) as error_info: with pytest.raises(ValueError) as error_info:
_ = ds.TextFileDataset("does/not/exist/no.txt") _ = ds.TextFileDataset("does/not/exist/no.txt")


+ 1
- 1
tests/ut/python/dataset/test_exceptions.py View File

@@ -41,7 +41,7 @@ def test_exception_02():
num_samples = -1 num_samples = -1
with pytest.raises(ValueError) as info: with pytest.raises(ValueError) as info:
ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples) ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples)
assert 'Input num_samples is not within the required interval of (0 to 2147483647).' in str(info.value)
assert 'num_samples exceeds the boundary between 0 and 9223372036854775807(INT64_MAX)' in str(info.value)


num_samples = 1 num_samples = 1
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples) data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples)


+ 1
- 1
tests/ut/python/dataset/test_minddataset_exception.py View File

@@ -280,7 +280,7 @@ def test_cv_minddataset_partition_num_samples_equals_0():
with pytest.raises(ValueError) as error_info: with pytest.raises(ValueError) as error_info:
partitions(5) partitions(5)
try: try:
assert 'Input num_samples is not within the required interval of (0 to 2147483647).' in str(error_info.value)
assert 'num_samples exceeds the boundary between 0 and 9223372036854775807(INT64_MAX)' in str(error_info.value)
except Exception as error: except Exception as error:
os.remove(CV_FILE_NAME) os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME)) os.remove("{}.db".format(CV_FILE_NAME))


+ 1
- 1
tests/ut/python/dataset/test_sampler.py View File

@@ -211,7 +211,7 @@ def test_subset_sampler():
test_config([0, 9, -6, 2], exception_msg="Sample ID (-6) is out of bound, expected range [0, 9]") test_config([0, 9, -6, 2], exception_msg="Sample ID (-6) is out of bound, expected range [0, 9]")
# test_config([], exception_msg="Indices list is empty") # temporary until we check with MindDataset # test_config([], exception_msg="Indices list is empty") # temporary until we check with MindDataset
test_config([0, 9, 3, 2], num_samples=-1, test_config([0, 9, 3, 2], num_samples=-1,
exception_msg="SubsetRandomSampler: num_samples must be greater than or equal to 0")
exception_msg="num_samples exceeds the boundary between 0 and 9223372036854775807(INT64_MAX)")




def test_sampler_chain(): def test_sampler_chain():


Loading…
Cancel
Save