!3183 Remove files on test fail for test_minddataset*.py

Merge pull request !3183 from tony_liu2/master
5 years ago · 06ed9ffd6a
--- a/tests/ut/python/dataset/test_minddataset.py
+++ b/tests/ut/python/dataset/test_minddataset.py
--- a/tests/ut/python/dataset/test_minddataset_exception.py
+++ b/tests/ut/python/dataset/test_minddataset_exception.py
@@ -99,8 +99,13 @@ def test_invalid_mindrecord():
        num_iter = 0
        for _ in data_set.create_dict_iterator():
            num_iter += 1
        assert num_iter == 0
    os.remove('dummy.mindrecord')
        try:
            assert num_iter == 0
        except Exception as error:
            os.remove('dummy.mindrecord')
            raise error
        else:
            os.remove('dummy.mindrecord')


 def test_minddataset_lack_db():
@@ -113,8 +118,13 @@ def test_minddataset_lack_db():
        num_iter = 0
        for _ in data_set.create_dict_iterator():
            num_iter += 1
        assert num_iter == 0
    os.remove(CV_FILE_NAME)
        try:
            assert num_iter == 0
        except Exception as error:
            os.remove(CV_FILE_NAME)
            raise error
        else:
            os.remove(CV_FILE_NAME)


 def test_cv_minddataset_pk_sample_error_class_column():
@@ -189,10 +199,16 @@ def test_minddataset_invalidate_num_shards():
        num_iter = 0
        for _ in data_set.create_dict_iterator():
            num_iter += 1
    assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info.value)
    try:
        assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info.value)
    except Exception as error:
        os.remove(CV_FILE_NAME)
        os.remove("{}.db".format(CV_FILE_NAME))
        raise error
    else:
        os.remove(CV_FILE_NAME)
        os.remove("{}.db".format(CV_FILE_NAME))

    os.remove(CV_FILE_NAME)
    os.remove("{}.db".format(CV_FILE_NAME))

 def test_minddataset_invalidate_shard_id():
    create_cv_mindrecord(1)
@@ -203,9 +219,15 @@ def test_minddataset_invalidate_shard_id():
        num_iter = 0
        for _ in data_set.create_dict_iterator():
            num_iter += 1
    assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info.value)
    os.remove(CV_FILE_NAME)
    os.remove("{}.db".format(CV_FILE_NAME))
    try:
        assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info.value)
    except Exception as error:
        os.remove(CV_FILE_NAME)
        os.remove("{}.db".format(CV_FILE_NAME))
        raise error
    else:
        os.remove(CV_FILE_NAME)
        os.remove("{}.db".format(CV_FILE_NAME))


 def test_minddataset_shard_id_bigger_than_num_shard():
@@ -217,17 +239,28 @@ def test_minddataset_shard_id_bigger_than_num_shard():
        num_iter = 0
        for _ in data_set.create_dict_iterator():
            num_iter += 1
    assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info.value)
    try:
        assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info.value)
    except Exception as error:
        os.remove(CV_FILE_NAME)
        os.remove("{}.db".format(CV_FILE_NAME))
        raise error

    with pytest.raises(Exception) as error_info:
        data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, True, 2, 5)
        num_iter = 0
        for _ in data_set.create_dict_iterator():
            num_iter += 1
    assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info.value)
    try:
        assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info.value)
    except Exception as error:
        os.remove(CV_FILE_NAME)
        os.remove("{}.db".format(CV_FILE_NAME))
        raise error
    else:
        os.remove(CV_FILE_NAME)
        os.remove("{}.db".format(CV_FILE_NAME))

    os.remove(CV_FILE_NAME)
    os.remove("{}.db".format(CV_FILE_NAME))

 def test_cv_minddataset_partition_num_samples_equals_0():
    """tutorial for cv minddataset."""
@@ -245,7 +278,26 @@ def test_cv_minddataset_partition_num_samples_equals_0():
                num_iter += 1
    with pytest.raises(Exception) as error_info:
        partitions(5)
    assert 'num_samples should be a positive integer value, but got num_samples=0' in str(error_info.value)
    try:
        assert 'num_samples should be a positive integer value, but got num_samples=0' in str(error_info.value)
    except Exception as error:
        os.remove(CV_FILE_NAME)
        os.remove("{}.db".format(CV_FILE_NAME))
        raise error
    else:
        os.remove(CV_FILE_NAME)
        os.remove("{}.db".format(CV_FILE_NAME))

    os.remove(CV_FILE_NAME)
    os.remove("{}.db".format(CV_FILE_NAME))
 if __name__ == '__main__':
    test_cv_lack_json()
    test_cv_lack_mindrecord()
    test_invalid_mindrecord()
    test_minddataset_lack_db()
    test_cv_minddataset_pk_sample_error_class_column()
    test_cv_minddataset_pk_sample_exclusive_shuffle()
    test_cv_minddataset_reader_different_schema()
    test_cv_minddataset_reader_different_page_size()
    test_minddataset_invalidate_num_shards()
    test_minddataset_invalidate_shard_id()
    test_minddataset_shard_id_bigger_than_num_shard()
    test_cv_minddataset_partition_num_samples_equals_0()
--- a/tests/ut/python/dataset/test_minddataset_multi_images_and_ndarray.py
+++ b/tests/ut/python/dataset/test_minddataset_multi_images_and_ndarray.py
@@ -27,54 +27,64 @@ CV_FILE_NAME = "./complex.mindrecord"


 def test_cv_minddataset_reader_multi_image_and_ndarray_tutorial():
    writer = FileWriter(CV_FILE_NAME, FILES_NUM)
    cv_schema_json = {"id": {"type": "int32"},
                      "image_0": {"type": "bytes"},
                      "image_2": {"type": "bytes"},
                      "image_3": {"type": "bytes"},
                      "image_4": {"type": "bytes"},
                      "input_mask": {"type": "int32", "shape": [-1]},
                      "segments": {"type": "float32", "shape": [2, 3]}}
    writer.add_schema(cv_schema_json, "two_images_schema")
    with open("../data/mindrecord/testImageNetData/images/image_00010.jpg", "rb") as file_reader:
        img_data = file_reader.read()
    ndarray_1 = np.array([1, 2, 3, 4, 5], np.int32)
    ndarray_2 = np.array(([2, 3, 1], [7, 9, 0]), np.float32)
    data = []
    for i in range(5):
        item = {"id": i, "image_0": img_data, "image_2": img_data, "image_3": img_data, "image_4": img_data,
                "input_mask": ndarray_1, "segments": ndarray_2}
        data.append(item)
    writer.write_raw_data(data)
    writer.commit()
    assert os.path.exists(CV_FILE_NAME)
    assert os.path.exists(CV_FILE_NAME + ".db")
    try:
        writer = FileWriter(CV_FILE_NAME, FILES_NUM)
        cv_schema_json = {"id": {"type": "int32"},
                          "image_0": {"type": "bytes"},
                          "image_2": {"type": "bytes"},
                          "image_3": {"type": "bytes"},
                          "image_4": {"type": "bytes"},
                          "input_mask": {"type": "int32", "shape": [-1]},
                          "segments": {"type": "float32", "shape": [2, 3]}}
        writer.add_schema(cv_schema_json, "two_images_schema")
        with open("../data/mindrecord/testImageNetData/images/image_00010.jpg", "rb") as file_reader:
            img_data = file_reader.read()
        ndarray_1 = np.array([1, 2, 3, 4, 5], np.int32)
        ndarray_2 = np.array(([2, 3, 1], [7, 9, 0]), np.float32)
        data = []
        for i in range(5):
            item = {"id": i, "image_0": img_data, "image_2": img_data, "image_3": img_data, "image_4": img_data,
                    "input_mask": ndarray_1, "segments": ndarray_2}
            data.append(item)
        writer.write_raw_data(data)
        writer.commit()
        assert os.path.exists(CV_FILE_NAME)
        assert os.path.exists(CV_FILE_NAME + ".db")

    # tutorial for minderdataset.
    columns_list = ["id", "image_0", "image_2", "image_3", "image_4", "input_mask", "segments"]
    num_readers = 1
    data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers)
    assert data_set.get_dataset_size() == 5
    num_iter = 0
    for item in data_set.create_dict_iterator():
        assert len(item) == 7
        logger.info("item: {}".format(item))
        assert item["image_0"].dtype == np.uint8
        assert (item["image_0"] == item["image_2"]).all()
        assert (item["image_3"] == item["image_4"]).all()
        assert (item["image_0"] == item["image_4"]).all()
        assert item["image_2"].dtype == np.uint8
        assert item["image_3"].dtype == np.uint8
        assert item["image_4"].dtype == np.uint8
        assert item["id"].dtype == np.int32
        assert item["input_mask"].shape == (5,)
        assert item["input_mask"].dtype == np.int32
        assert item["segments"].shape == (2, 3)
        assert item["segments"].dtype == np.float32
        num_iter += 1
    assert num_iter == 5
        # tutorial for minderdataset.
        columns_list = ["id", "image_0", "image_2", "image_3", "image_4", "input_mask", "segments"]
        num_readers = 1
        data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers)
        assert data_set.get_dataset_size() == 5
        num_iter = 0
        for item in data_set.create_dict_iterator():
            assert len(item) == 7
            logger.info("item: {}".format(item))
            assert item["image_0"].dtype == np.uint8
            assert (item["image_0"] == item["image_2"]).all()
            assert (item["image_3"] == item["image_4"]).all()
            assert (item["image_0"] == item["image_4"]).all()
            assert item["image_2"].dtype == np.uint8
            assert item["image_3"].dtype == np.uint8
            assert item["image_4"].dtype == np.uint8
            assert item["id"].dtype == np.int32
            assert item["input_mask"].shape == (5,)
            assert item["input_mask"].dtype == np.int32
            assert item["segments"].shape == (2, 3)
            assert item["segments"].dtype == np.float32
            num_iter += 1
        assert num_iter == 5
    except Exception as error:
        if os.path.exists("{}".format(CV_FILE_NAME + ".db")):
            os.remove(CV_FILE_NAME + ".db")
        if os.path.exists("{}".format(CV_FILE_NAME)):
            os.remove(CV_FILE_NAME)
        raise error
    else:
        if os.path.exists("{}".format(CV_FILE_NAME + ".db")):
            os.remove(CV_FILE_NAME + ".db")
        if os.path.exists("{}".format(CV_FILE_NAME)):
            os.remove(CV_FILE_NAME)

    if os.path.exists("{}".format(CV_FILE_NAME + ".db")):
        os.remove(CV_FILE_NAME + ".db")
    if os.path.exists("{}".format(CV_FILE_NAME)):
        os.remove(CV_FILE_NAME)
 if __name__ == '__main__':
    test_cv_minddataset_reader_multi_image_and_ndarray_tutorial()
--- a/tests/ut/python/dataset/test_minddataset_padded.py
+++ b/tests/ut/python/dataset/test_minddataset_padded.py
@@ -44,24 +44,31 @@ def add_and_remove_cv_file():
    """add/remove cv file"""
    paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
             for x in range(FILES_NUM)]
    for x in paths:
        os.remove("{}".format(x)) if os.path.exists("{}".format(x)) else None
        os.remove("{}.db".format(x)) if os.path.exists(
            "{}.db".format(x)) else None
    writer = FileWriter(CV_FILE_NAME, FILES_NUM)
    data = get_data(CV_DIR_NAME)
    cv_schema_json = {"id": {"type": "int32"},
                      "file_name": {"type": "string"},
                      "label": {"type": "int32"},
                      "data": {"type": "bytes"}}
    writer.add_schema(cv_schema_json, "img_schema")
    writer.add_index(["file_name", "label"])
    writer.write_raw_data(data)
    writer.commit()
    yield "yield_cv_data"
    for x in paths:
        os.remove("{}".format(x))
        os.remove("{}.db".format(x))
    try:
        for x in paths:
            os.remove("{}".format(x)) if os.path.exists("{}".format(x)) else None
            os.remove("{}.db".format(x)) if os.path.exists(
                "{}.db".format(x)) else None
        writer = FileWriter(CV_FILE_NAME, FILES_NUM)
        data = get_data(CV_DIR_NAME)
        cv_schema_json = {"id": {"type": "int32"},
                        "file_name": {"type": "string"},
                        "label": {"type": "int32"},
                        "data": {"type": "bytes"}}
        writer.add_schema(cv_schema_json, "img_schema")
        writer.add_index(["file_name", "label"])
        writer.write_raw_data(data)
        writer.commit()
        yield "yield_cv_data"
    except Exception as error:
        for x in paths:
            os.remove("{}".format(x))
            os.remove("{}.db".format(x))
        raise error
    else:
        for x in paths:
            os.remove("{}".format(x))
            os.remove("{}.db".format(x))


@pytest.fixture
@@ -69,32 +76,39 @@ def add_and_remove_nlp_file():
    """add/remove nlp file"""
    paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0'))
             for x in range(FILES_NUM)]
    for x in paths:
        if os.path.exists("{}".format(x)):
    try:
        for x in paths:
            if os.path.exists("{}".format(x)):
                os.remove("{}".format(x))
            if os.path.exists("{}.db".format(x)):
                os.remove("{}.db".format(x))
        writer = FileWriter(NLP_FILE_NAME, FILES_NUM)
        data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)]
        nlp_schema_json = {"id": {"type": "string"}, "label": {"type": "int32"},
                        "rating": {"type": "float32"},
                        "input_ids": {"type": "int64",
                                        "shape": [-1]},
                        "input_mask": {"type": "int64",
                                        "shape": [1, -1]},
                        "segment_ids": {"type": "int64",
                                        "shape": [2, -1]}
                        }
        writer.set_header_size(1 << 14)
        writer.set_page_size(1 << 15)
        writer.add_schema(nlp_schema_json, "nlp_schema")
        writer.add_index(["id", "rating"])
        writer.write_raw_data(data)
        writer.commit()
        yield "yield_nlp_data"
    except Exception as error:
        for x in paths:
            os.remove("{}".format(x))
            os.remove("{}.db".format(x))
        raise error
    else:
        for x in paths:
            os.remove("{}".format(x))
        if os.path.exists("{}.db".format(x)):
            os.remove("{}.db".format(x))
    writer = FileWriter(NLP_FILE_NAME, FILES_NUM)
    data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)]
    nlp_schema_json = {"id": {"type": "string"}, "label": {"type": "int32"},
                       "rating": {"type": "float32"},
                       "input_ids": {"type": "int64",
                                     "shape": [-1]},
                       "input_mask": {"type": "int64",
                                      "shape": [1, -1]},
                       "segment_ids": {"type": "int64",
                                       "shape": [2, -1]}
                       }
    writer.set_header_size(1 << 14)
    writer.set_page_size(1 << 15)
    writer.add_schema(nlp_schema_json, "nlp_schema")
    writer.add_index(["id", "rating"])
    writer.write_raw_data(data)
    writer.commit()
    yield "yield_nlp_data"
    for x in paths:
        os.remove("{}".format(x))
        os.remove("{}.db".format(x))

 def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file):
    """tutorial for cv minderdataset."""
@@ -119,7 +133,7 @@ def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file):
                    encoding='utf8')
            assert item['label'] == padded_sample['label']
            assert (item['data'] == np.array(list(padded_sample['data']))).all()
        num_iter += 1 
        num_iter += 1
    assert num_padded_iter == 5
    assert num_iter == 15

@@ -636,3 +650,17 @@ def inputs(vectors, maxlen=50):
    mask = [1] * length + [0] * (maxlen - length)
    segment = [0] * maxlen
    return input_, mask, segment

 if __name__ == '__main__':
    test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file)
    test_cv_minddataset_partition_padded_samples(add_and_remove_cv_file)
    test_cv_minddataset_partition_padded_samples_multi_epoch(add_and_remove_cv_file)
    test_cv_minddataset_partition_padded_samples_no_dividsible(add_and_remove_cv_file)
    test_cv_minddataset_partition_padded_samples_dataset_size_no_divisible(add_and_remove_cv_file)
    test_cv_minddataset_partition_padded_samples_no_equal_column_list(add_and_remove_cv_file)
    test_cv_minddataset_partition_padded_samples_no_column_list(add_and_remove_cv_file)
    test_cv_minddataset_partition_padded_samples_no_num_padded(add_and_remove_cv_file)
    test_cv_minddataset_partition_padded_samples_no_padded_samples(add_and_remove_cv_file)
    test_nlp_minddataset_reader_basic_padded_samples(add_and_remove_nlp_file)
    test_nlp_minddataset_reader_basic_padded_samples_multi_epoch(add_and_remove_nlp_file)
    test_nlp_minddataset_reader_basic_padded_samples_check_whole_reshuffle_result_per_epoch(add_and_remove_nlp_file)
--- a/tests/ut/python/dataset/test_minddataset_sampler.py
+++ b/tests/ut/python/dataset/test_minddataset_sampler.py
@@ -34,26 +34,32 @@ def add_and_remove_cv_file():
    """add/remove cv file"""
    paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
             for x in range(FILES_NUM)]
    for x in paths:
        if os.path.exists("{}".format(x)):
    try:
        for x in paths:
            if os.path.exists("{}".format(x)):
                os.remove("{}".format(x))
            if os.path.exists("{}.db".format(x)):
                os.remove("{}.db".format(x))
        writer = FileWriter(CV_FILE_NAME, FILES_NUM)
        data = get_data(CV_DIR_NAME, True)
        cv_schema_json = {"id": {"type": "int32"},
                          "file_name": {"type": "string"},
                          "label": {"type": "int32"},
                          "data": {"type": "bytes"}}
        writer.add_schema(cv_schema_json, "img_schema")
        writer.add_index(["file_name", "label"])
        writer.write_raw_data(data)
        writer.commit()
        yield "yield_cv_data"
    except Exception as error:
        for x in paths:
            os.remove("{}".format(x))
            os.remove("{}.db".format(x))
        raise error
    else:
        for x in paths:
            os.remove("{}".format(x))
        if os.path.exists("{}.db".format(x)):
            os.remove("{}.db".format(x))
    writer = FileWriter(CV_FILE_NAME, FILES_NUM)
    data = get_data(CV_DIR_NAME, True)
    cv_schema_json = {"id": {"type": "int32"},
                      "file_name": {"type": "string"},
                      "label": {"type": "int32"},
                      "data": {"type": "bytes"}}
    writer.add_schema(cv_schema_json, "img_schema")
    writer.add_index(["file_name", "label"])
    writer.write_raw_data(data)
    writer.commit()
    yield "yield_cv_data"
    for x in paths:
        os.remove("{}".format(x))
        os.remove("{}.db".format(x))


 def test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file):
    """tutorial for cv minderdataset."""
@@ -626,3 +632,24 @@ def get_data(dir_name, sampler=False):
        except FileNotFoundError:
            continue
    return data_list

 if __name__ == '__main__':
    test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file)
    test_cv_minddataset_pk_sample_basic(add_and_remove_cv_file)
    test_cv_minddataset_pk_sample_shuffle(add_and_remove_cv_file)
    test_cv_minddataset_pk_sample_out_of_range(add_and_remove_cv_file)
    test_cv_minddataset_subset_random_sample_basic(add_and_remove_cv_file)
    test_cv_minddataset_subset_random_sample_replica(add_and_remove_cv_file)
    test_cv_minddataset_subset_random_sample_empty(add_and_remove_cv_file)
    test_cv_minddataset_subset_random_sample_out_of_range(add_and_remove_cv_file)
    test_cv_minddataset_subset_random_sample_negative(add_and_remove_cv_file)
    test_cv_minddataset_random_sampler_basic(add_and_remove_cv_file)
    test_cv_minddataset_random_sampler_repeat(add_and_remove_cv_file)
    test_cv_minddataset_random_sampler_replacement(add_and_remove_cv_file)
    test_cv_minddataset_sequential_sampler_basic(add_and_remove_cv_file)
    test_cv_minddataset_sequential_sampler_exceed_size(add_and_remove_cv_file)
    test_cv_minddataset_split_basic(add_and_remove_cv_file)
    test_cv_minddataset_split_exact_percent(add_and_remove_cv_file)
    test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file)
    test_cv_minddataset_split_deterministic(add_and_remove_cv_file)
    test_cv_minddataset_split_sharding(add_and_remove_cv_file)