Browse Source

md fix voc and manifest error scenario

pull/15727/head
xiefangqi 4 years ago
parent
commit
458ff5fed9
10 changed files with 113 additions and 13 deletions
  1. +14
    -1
      mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc
  2. +12
    -0
      mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc
  3. +9
    -0
      mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.h
  4. +6
    -0
      tests/ut/data/dataset/testManifestData/invalidNoSource.manifest
  5. +6
    -0
      tests/ut/data/dataset/testManifestData/invalidNoUsage.manifest
  6. +0
    -12
      tests/ut/data/dataset/testVOC2012/Annotations/33.xml
  7. +39
    -0
      tests/ut/data/dataset/testVOC2012/Annotations/invalidbbox.xml
  8. +1
    -0
      tests/ut/data/dataset/testVOC2012/ImageSets/Main/xmlinvalidbbox.txt
  9. +18
    -0
      tests/ut/python/dataset/test_datasets_manifestop.py
  10. +8
    -0
      tests/ut/python/dataset/test_datasets_voc.py

+ 14
- 1
mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc View File

@@ -174,10 +174,16 @@ Status ManifestOp::ParseManifestFile() {
}
std::string line;
std::set<std::string> classes;
uint64_t line_count = 1;
while (getline(file_handle, line)) {
try {
nlohmann::json js = nlohmann::json::parse(line);
std::string image_file_path = js.value("source", "");
if (image_file_path == "") {
file_handle.close();
RETURN_STATUS_UNEXPECTED("Invalid data, source is not found in Manifest file: " + file_ + " at line " +
std::to_string(line_count));
}
// If image is not JPEG/PNG/GIF/BMP, drop it
bool valid = false;
RETURN_IF_NOT_OK(CheckImageType(image_file_path, &valid));
@@ -185,6 +191,11 @@ Status ManifestOp::ParseManifestFile() {
continue;
}
std::string usage = js.value("usage", "");
if (usage == "") {
file_handle.close();
RETURN_STATUS_UNEXPECTED("Invalid data, usage is not found in Manifest file: " + file_ + " at line " +
std::to_string(line_count));
}
(void)std::transform(usage.begin(), usage.end(), usage.begin(), ::tolower);
if (usage != usage_) {
continue;
@@ -197,7 +208,8 @@ Status ManifestOp::ParseManifestFile() {
classes.insert(label_name);
if (label_name == "") {
file_handle.close();
RETURN_STATUS_UNEXPECTED("Invalid data, label name is not found in Manifest file: " + image_file_path);
RETURN_STATUS_UNEXPECTED("Invalid data, label name is not found in Manifest file: " + file_ + " at line " +
std::to_string(line_count));
}
if (class_index_.empty() || class_index_.find(label_name) != class_index_.end()) {
if (label_index_.find(label_name) == label_index_.end()) {
@@ -209,6 +221,7 @@ Status ManifestOp::ParseManifestFile() {
if (!labels.empty()) {
image_labelname_.emplace_back(std::make_pair(image_file_path, labels));
}
line_count++;
} catch (const std::exception &err) {
file_handle.close();
RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse manifest file: " + file_);


+ 12
- 0
mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc View File

@@ -213,6 +213,17 @@ void VOCOp::ParseNodeValue(XMLElement *bbox_node, const char *name, float *value
}
}

Status VOCOp::CheckIfBboxValid(const float &xmin, const float &ymin, const float &xmax, const float &ymax,
const std::string &path) {
if (!(xmin > 0 && ymin > 0 && xmax > xmin && ymax > ymin)) {
std::string invalid_bbox = "{" + std::to_string(static_cast<int>(xmin)) + ", " +
std::to_string(static_cast<int>(ymin)) + ", " + std::to_string(static_cast<int>(xmax)) +
", " + std::to_string(static_cast<int>(ymax)) + "}";
RETURN_STATUS_UNEXPECTED("Invalid bndbox: " + invalid_bbox + " found in " + path);
}
return Status::OK();
}

Status VOCOp::ParseAnnotationBbox(const std::string &path) {
if (!Path(path).Exists()) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + path);
@@ -245,6 +256,7 @@ Status VOCOp::ParseAnnotationBbox(const std::string &path) {
ParseNodeValue(bbox_node, "xmax", &xmax);
ParseNodeValue(bbox_node, "ymin", &ymin);
ParseNodeValue(bbox_node, "ymax", &ymax);
RETURN_IF_NOT_OK(CheckIfBboxValid(xmin, ymin, xmax, ymax, path));
} else {
RETURN_STATUS_UNEXPECTED("Invalid data, bndbox dismatch in " + path);
}


+ 9
- 0
mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.h View File

@@ -230,6 +230,15 @@ class VOCOp : public MappableLeafOp {
// @return Status The status code returned
Status ParseAnnotationBbox(const std::string &path);

// @param xmin - the left coordinate of bndbox
// @param ymin - the top coordinate of bndbox
// @param xmax - the right coordinate of bndbox
// @param ymax - the bottom coordinate of bndbox
// @param path - the file path of bndbox xml
// @return Status The status code returned
Status CheckIfBboxValid(const float &xmin, const float &ymin, const float &xmax, const float &ymax,
const std::string &path);

// @param XMLElement *bbox_node - bbox node info found in json object
// @param const char *name - sub node name in object
// @param float *value - value of certain sub node


+ 6
- 0
tests/ut/data/dataset/testManifestData/invalidNoSource.manifest View File

@@ -0,0 +1,6 @@
{"source":"../data/dataset/testManifestData/train/1.JPEG", "usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "dog","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
{"usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
{"source":"../data/dataset/testManifestData/train/1.JPEG", "usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
{"source":"../data/dataset/testManifestData/train/1.JPEG", "usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"},{"type": "modelarts/image_classification","name": "flower","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
{"source":"../data/dataset/testManifestData/eval/1.JPEG", "usage":"EVAL","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
{"source":"../data/dataset/testManifestData/eval/2.JPEG", "usage":"EVAL","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "dog","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}

+ 6
- 0
tests/ut/data/dataset/testManifestData/invalidNoUsage.manifest View File

@@ -0,0 +1,6 @@
{"source":"../data/dataset/testManifestData/train/1.JPEG","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "dog","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
{"source":"../data/dataset/testManifestData/train/1.JPEG", "usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
{"source":"../data/dataset/testManifestData/train/1.JPEG", "usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
{"source":"../data/dataset/testManifestData/train/1.JPEG", "usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"},{"type": "modelarts/image_classification","name": "flower","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
{"source":"../data/dataset/testManifestData/eval/1.JPEG", "usage":"EVAL","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
{"source":"../data/dataset/testManifestData/eval/2.JPEG", "usage":"EVAL","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "dog","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}

+ 0
- 12
tests/ut/data/dataset/testVOC2012/Annotations/33.xml View File

@@ -24,18 +24,6 @@
<ymax>242</ymax>
</bndbox>
</object>
<object>
<name>person</name>
<pose>Left</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>425</xmin>
<ymin>239</ymin>
<xmax>445</xmax>
<ymax>235</ymax>
</bndbox>
</object>
<object>
<name>person</name>
<pose>Left</pose>


+ 39
- 0
tests/ut/data/dataset/testVOC2012/Annotations/invalidbbox.xml View File

@@ -0,0 +1,39 @@
<annotation>
<folder>VOC2012</folder>
<filename>33.jpg</filename>
<source>
<database>simulate VOC2007 Database</database>
<annotation>simulate VOC2007</annotation>
<image>flickr</image>
</source>
<size>
<width>500</width>
<height>366</height>
<depth>3</depth>
</size>
<segmented>1</segmented>
<object>
<name>person</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>8</xmin>
<ymin>121</ymin>
<xmax>471</xmax>
<ymax>242</ymax>
</bndbox>
</object>
<object>
<name>person</name>
<pose>Left</pose>
<truncated>1</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>321</xmin>
<ymin>121</ymin>
<xmax>421</xmax>
<ymax>120</ymax>
</bndbox>
</object>
</annotation>

+ 1
- 0
tests/ut/data/dataset/testVOC2012/ImageSets/Main/xmlinvalidbbox.txt View File

@@ -0,0 +1 @@
invalidbbox

+ 18
- 0
tests/ut/python/dataset/test_datasets_manifestop.py View File

@@ -160,6 +160,24 @@ def test_manifest_dataset_exception():
except RuntimeError as e:
assert "map operation: [PyFunc] failed. The corresponding data files" in str(e)

NO_SOURCE_DATA_FILE = "../data/dataset/testManifestData/invalidNoSource.manifest"
try:
data = ds.ManifestDataset(NO_SOURCE_DATA_FILE)
for _ in data.__iter__():
pass
assert False
except RuntimeError as e:
assert "Invalid data, source is not found in Manifest file" in str(e)

NO_USAGE_DATA_FILE = "../data/dataset/testManifestData/invalidNoUsage.manifest"
try:
data = ds.ManifestDataset(NO_USAGE_DATA_FILE)
for _ in data.__iter__():
pass
assert False
except RuntimeError as e:
assert "Invalid data, usage is not found in Manifest file" in str(e)


if __name__ == '__main__':
test_manifest_dataset_train()


+ 8
- 0
tests/ut/python/dataset/test_datasets_voc.py View File

@@ -181,6 +181,14 @@ def test_voc_exception():
except RuntimeError:
pass

try:
data7 = ds.VOCDataset(DATA_DIR, task="Detection", usage="xmlinvalidbbox")
for _ in data7.create_dict_iterator(num_epochs=1):
pass
assert False
except RuntimeError as e:
assert "Invalid bndbox: {321, 121, 421, 120}" in str(e)

def exception_func(item):
raise Exception("Error occur!")



Loading…
Cancel
Save