Merge pull request !25006 from liyong126/fix_md_uttags/v1.6.0
| @@ -31,20 +31,15 @@ from mindspore.dataset.vision import Inter | |||
| from mindspore.mindrecord import FileWriter | |||
| FILES_NUM = 4 | |||
| CV_FILE_NAME = "../data/mindrecord/imagenet.mindrecord" | |||
| CV1_FILE_NAME = "../data/mindrecord/imagenet1.mindrecord" | |||
| CV2_FILE_NAME = "../data/mindrecord/imagenet2.mindrecord" | |||
| CV_DIR_NAME = "../data/mindrecord/testImageNetData" | |||
| NLP_FILE_NAME = "../data/mindrecord/aclImdb.mindrecord" | |||
| OLD_NLP_FILE_NAME = "../data/mindrecord/testOldVersion/aclImdb.mindrecord" | |||
| NLP_FILE_POS = "../data/mindrecord/testAclImdbData/pos" | |||
| NLP_FILE_VOCAB = "../data/mindrecord/testAclImdbData/vocab.txt" | |||
| @pytest.fixture | |||
| def add_and_remove_cv_file(): | |||
| """add/remove cv file""" | |||
| paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) | |||
| for x in range(FILES_NUM)] | |||
| try: | |||
| for x in paths: | |||
| @@ -52,7 +47,7 @@ def add_and_remove_cv_file(): | |||
| os.remove("{}".format(x)) | |||
| if os.path.exists("{}.db".format(x)): | |||
| os.remove("{}.db".format(x)) | |||
| writer = FileWriter(CV_FILE_NAME, FILES_NUM) | |||
| writer = FileWriter(file_name, FILES_NUM) | |||
| data = get_data(CV_DIR_NAME) | |||
| cv_schema_json = {"id": {"type": "int32"}, | |||
| "file_name": {"type": "string"}, | |||
| @@ -77,7 +72,8 @@ def add_and_remove_cv_file(): | |||
| @pytest.fixture | |||
| def add_and_remove_nlp_file(): | |||
| """add/remove nlp file""" | |||
| paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0')) | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) | |||
| for x in range(FILES_NUM)] | |||
| try: | |||
| for x in paths: | |||
| @@ -85,7 +81,7 @@ def add_and_remove_nlp_file(): | |||
| os.remove("{}".format(x)) | |||
| if os.path.exists("{}.db".format(x)): | |||
| os.remove("{}.db".format(x)) | |||
| writer = FileWriter(NLP_FILE_NAME, FILES_NUM) | |||
| writer = FileWriter(file_name, FILES_NUM) | |||
| data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)] | |||
| nlp_schema_json = {"id": {"type": "string"}, "label": {"type": "int32"}, | |||
| "rating": {"type": "float32"}, | |||
| @@ -117,7 +113,8 @@ def add_and_remove_nlp_file(): | |||
| @pytest.fixture | |||
| def add_and_remove_nlp_compress_file(): | |||
| """add/remove nlp file""" | |||
| paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0')) | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) | |||
| for x in range(FILES_NUM)] | |||
| try: | |||
| for x in paths: | |||
| @@ -125,7 +122,7 @@ def add_and_remove_nlp_compress_file(): | |||
| os.remove("{}".format(x)) | |||
| if os.path.exists("{}.db".format(x)): | |||
| os.remove("{}.db".format(x)) | |||
| writer = FileWriter(NLP_FILE_NAME, FILES_NUM) | |||
| writer = FileWriter(file_name, FILES_NUM) | |||
| data = [] | |||
| for row_id in range(16): | |||
| data.append({ | |||
| @@ -183,8 +180,9 @@ def test_nlp_compress_data(add_and_remove_nlp_compress_file): | |||
| "array_d": np.reshape(np.array([[-10, -127], [10, 127]]), [2, -1]) | |||
| }) | |||
| num_readers = 1 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset( | |||
| NLP_FILE_NAME + "0", None, num_readers, shuffle=False) | |||
| file_name + "0", None, num_readers, shuffle=False) | |||
| assert data_set.get_dataset_size() == 16 | |||
| num_iter = 0 | |||
| for x, item in zip(data, data_set.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| @@ -197,29 +195,10 @@ def test_nlp_compress_data(add_and_remove_nlp_compress_file): | |||
| assert num_iter == 16 | |||
| def test_nlp_compress_data_old_version(add_and_remove_nlp_compress_file): | |||
| """tutorial for nlp minderdataset.""" | |||
| num_readers = 1 | |||
| data_set = ds.MindDataset( | |||
| NLP_FILE_NAME + "0", None, num_readers, shuffle=False) | |||
| old_data_set = ds.MindDataset( | |||
| OLD_NLP_FILE_NAME + "0", None, num_readers, shuffle=False) | |||
| assert old_data_set.get_dataset_size() == 16 | |||
| num_iter = 0 | |||
| for x, item in zip(old_data_set.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| data_set.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| assert (item["array_a"] == x["array_a"]).all() | |||
| assert (item["array_b"] == x["array_b"]).all() | |||
| assert (item["array_c"] == x["array_c"]).all() | |||
| assert (item["array_d"] == x["array_d"]).all() | |||
| assert item["label"] == x["label"] | |||
| num_iter += 1 | |||
| assert num_iter == 16 | |||
| def test_cv_minddataset_writer_tutorial(): | |||
| """tutorial for cv dataset writer.""" | |||
| paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) | |||
| for x in range(FILES_NUM)] | |||
| try: | |||
| for x in paths: | |||
| @@ -227,7 +206,7 @@ def test_cv_minddataset_writer_tutorial(): | |||
| os.remove("{}".format(x)) | |||
| if os.path.exists("{}.db".format(x)): | |||
| os.remove("{}.db".format(x)) | |||
| writer = FileWriter(CV_FILE_NAME, FILES_NUM) | |||
| writer = FileWriter(file_name, FILES_NUM) | |||
| data = get_data(CV_DIR_NAME) | |||
| cv_schema_json = {"file_name": {"type": "string"}, "label": {"type": "int32"}, | |||
| "data": {"type": "bytes"}} | |||
| @@ -250,10 +229,11 @@ def test_cv_minddataset_partition_tutorial(add_and_remove_cv_file): | |||
| """tutorial for cv minddataset.""" | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| def partitions(num_shards): | |||
| for partition_id in range(num_shards): | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| num_shards=num_shards, shard_id=partition_id) | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| @@ -272,10 +252,11 @@ def test_cv_minddataset_partition_num_samples_0(add_and_remove_cv_file): | |||
| """tutorial for cv minddataset.""" | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| def partitions(num_shards): | |||
| for partition_id in range(num_shards): | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| num_shards=num_shards, | |||
| shard_id=partition_id, num_samples=1) | |||
| @@ -297,10 +278,11 @@ def test_cv_minddataset_partition_num_samples_1(add_and_remove_cv_file): | |||
| """tutorial for cv minddataset.""" | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| def partitions(num_shards): | |||
| for partition_id in range(num_shards): | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| num_shards=num_shards, | |||
| shard_id=partition_id, num_samples=2) | |||
| @@ -322,10 +304,11 @@ def test_cv_minddataset_partition_num_samples_2(add_and_remove_cv_file): | |||
| """tutorial for cv minddataset.""" | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| def partitions(num_shards, expect): | |||
| for partition_id in range(num_shards): | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| num_shards=num_shards, | |||
| shard_id=partition_id, num_samples=3) | |||
| @@ -346,8 +329,9 @@ def test_cv_minddataset_partition_num_samples_3(add_and_remove_cv_file): | |||
| """tutorial for cv minddataset.""" | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, num_shards=1, shard_id=0, num_samples=5) | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, num_shards=1, shard_id=0, num_samples=5) | |||
| assert data_set.get_dataset_size() == 5 | |||
| num_iter = 0 | |||
| @@ -366,9 +350,10 @@ def test_cv_minddataset_partition_tutorial_check_shuffle_result(add_and_remove_c | |||
| epoch1 = [] | |||
| epoch2 = [] | |||
| epoch3 = [] | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| for partition_id in range(num_shards): | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| num_shards=num_shards, shard_id=partition_id) | |||
| data_set = data_set.repeat(3) | |||
| @@ -401,13 +386,14 @@ def test_cv_minddataset_partition_tutorial_check_whole_reshuffle_result_per_epoc | |||
| """tutorial for cv minddataset.""" | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| num_shards = 3 | |||
| epoch_result = [[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 0 result | |||
| [["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 1 result | |||
| [["", "", "", ""], ["", "", "", ""], ["", "", "", ""]]] # svae partition 2 result | |||
| for partition_id in range(num_shards): | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| num_shards=num_shards, shard_id=partition_id) | |||
| data_set = data_set.repeat(3) | |||
| @@ -436,13 +422,14 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file): | |||
| """tutorial for cv minddataset.""" | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| ds.config.set_seed(54321) | |||
| epoch1 = [] | |||
| epoch2 = [] | |||
| epoch3 = [] | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers) | |||
| data_set = data_set.repeat(3) | |||
| num_iter = 0 | |||
| @@ -468,7 +455,7 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file): | |||
| epoch2_new_dataset = [] | |||
| epoch3_new_dataset = [] | |||
| data_set2 = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) | |||
| data_set2 = ds.MindDataset(file_name + "0", columns_list, num_readers) | |||
| data_set2 = data_set2.repeat(3) | |||
| num_iter = 0 | |||
| @@ -499,7 +486,7 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file): | |||
| epoch2_new_dataset2 = [] | |||
| epoch3_new_dataset2 = [] | |||
| data_set3 = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) | |||
| data_set3 = ds.MindDataset(file_name + "0", columns_list, num_readers) | |||
| data_set3 = data_set3.repeat(3) | |||
| num_iter = 0 | |||
| @@ -530,7 +517,8 @@ def test_cv_minddataset_dataset_size(add_and_remove_cv_file): | |||
| """tutorial for cv minddataset.""" | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers) | |||
| assert data_set.get_dataset_size() == 10 | |||
| repeat_num = 2 | |||
| data_set = data_set.repeat(repeat_num) | |||
| @@ -544,7 +532,7 @@ def test_cv_minddataset_dataset_size(add_and_remove_cv_file): | |||
| "-------------- item[data]: {} ----------------------".format(item["data"])) | |||
| num_iter += 1 | |||
| assert num_iter == 20 | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| num_shards=4, shard_id=3) | |||
| assert data_set.get_dataset_size() == 3 | |||
| @@ -553,7 +541,8 @@ def test_cv_minddataset_repeat_reshuffle(add_and_remove_cv_file): | |||
| """tutorial for cv minddataset.""" | |||
| columns_list = ["data", "label"] | |||
| num_readers = 4 | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers) | |||
| decode_op = vision.Decode() | |||
| data_set = data_set.map( | |||
| input_columns=["data"], operations=decode_op, num_parallel_workers=2) | |||
| @@ -584,7 +573,8 @@ def test_cv_minddataset_batch_size_larger_than_records(add_and_remove_cv_file): | |||
| """tutorial for cv minddataset.""" | |||
| columns_list = ["data", "label"] | |||
| num_readers = 4 | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers) | |||
| decode_op = vision.Decode() | |||
| data_set = data_set.map( | |||
| input_columns=["data"], operations=decode_op, num_parallel_workers=2) | |||
| @@ -608,7 +598,8 @@ def test_cv_minddataset_issue_888(add_and_remove_cv_file): | |||
| """issue 888 test.""" | |||
| columns_list = ["data", "label"] | |||
| num_readers = 2 | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, shuffle=False, num_shards=5, shard_id=1) | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, shuffle=False, num_shards=5, shard_id=1) | |||
| data_set = data_set.shuffle(2) | |||
| data_set = data_set.repeat(9) | |||
| num_iter = 0 | |||
| @@ -621,7 +612,8 @@ def test_cv_minddataset_reader_file_list(add_and_remove_cv_file): | |||
| """tutorial for cv minderdataset.""" | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| data_set = ds.MindDataset([CV_FILE_NAME + str(x) | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset([file_name + str(x) | |||
| for x in range(FILES_NUM)], columns_list, num_readers) | |||
| assert data_set.get_dataset_size() == 10 | |||
| num_iter = 0 | |||
| @@ -644,7 +636,8 @@ def test_cv_minddataset_reader_one_partition(add_and_remove_cv_file): | |||
| """tutorial for cv minderdataset.""" | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| data_set = ds.MindDataset([CV_FILE_NAME + "0"], columns_list, num_readers) | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset([file_name + "0"], columns_list, num_readers) | |||
| assert data_set.get_dataset_size() < 10 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| @@ -664,6 +657,8 @@ def test_cv_minddataset_reader_one_partition(add_and_remove_cv_file): | |||
| def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file): | |||
| """tutorial for cv minderdataset.""" | |||
| CV1_FILE_NAME = "../data/mindrecord/test_cv_minddataset_reader_two_dataset_1.mindrecord" | |||
| CV2_FILE_NAME = "../data/mindrecord/test_cv_minddataset_reader_two_dataset_2.mindrecord" | |||
| try: | |||
| if os.path.exists(CV1_FILE_NAME): | |||
| os.remove(CV1_FILE_NAME) | |||
| @@ -696,7 +691,8 @@ def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file): | |||
| writer.commit() | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| data_set = ds.MindDataset([CV_FILE_NAME + str(x) for x in range(FILES_NUM)] + [CV1_FILE_NAME, CV2_FILE_NAME], | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset([file_name + str(x) for x in range(FILES_NUM)] + [CV1_FILE_NAME, CV2_FILE_NAME], | |||
| columns_list, num_readers) | |||
| assert data_set.get_dataset_size() == 30 | |||
| num_iter = 0 | |||
| @@ -735,6 +731,7 @@ def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file): | |||
| def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file): | |||
| CV1_FILE_NAME = "../data/mindrecord/test_cv_minddataset_reader_two_dataset_partition_1" | |||
| paths = ["{}{}".format(CV1_FILE_NAME, str(x).rjust(1, '0')) | |||
| for x in range(FILES_NUM)] | |||
| try: | |||
| @@ -756,7 +753,8 @@ def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file): | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| data_set = ds.MindDataset([CV_FILE_NAME + str(x) for x in range(2)] + | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset([file_name + str(x) for x in range(2)] + | |||
| [CV1_FILE_NAME + str(x) for x in range(2, 4)], | |||
| columns_list, num_readers) | |||
| assert data_set.get_dataset_size() < 20 | |||
| @@ -789,7 +787,8 @@ def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file): | |||
| """tutorial for cv minderdataset.""" | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers) | |||
| assert data_set.get_dataset_size() == 10 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| @@ -810,7 +809,8 @@ def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file): | |||
| def test_nlp_minddataset_reader_basic_tutorial(add_and_remove_nlp_file): | |||
| """tutorial for nlp minderdataset.""" | |||
| num_readers = 4 | |||
| data_set = ds.MindDataset(NLP_FILE_NAME + "0", None, num_readers) | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset(file_name + "0", None, num_readers) | |||
| assert data_set.get_dataset_size() == 10 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| @@ -839,7 +839,8 @@ def test_cv_minddataset_reader_basic_tutorial_5_epoch(add_and_remove_cv_file): | |||
| """tutorial for cv minderdataset.""" | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers) | |||
| assert data_set.get_dataset_size() == 10 | |||
| for _ in range(5): | |||
| num_iter = 0 | |||
| @@ -855,7 +856,8 @@ def test_cv_minddataset_reader_basic_tutorial_5_epoch_with_batch(add_and_remove_ | |||
| """tutorial for cv minderdataset.""" | |||
| columns_list = ["data", "label"] | |||
| num_readers = 4 | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers) | |||
| resize_height = 32 | |||
| resize_width = 32 | |||
| @@ -881,7 +883,8 @@ def test_cv_minddataset_reader_basic_tutorial_5_epoch_with_batch(add_and_remove_ | |||
| def test_cv_minddataset_reader_no_columns(add_and_remove_cv_file): | |||
| """tutorial for cv minderdataset.""" | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0") | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset(file_name + "0") | |||
| assert data_set.get_dataset_size() == 10 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| @@ -903,7 +906,8 @@ def test_cv_minddataset_reader_repeat_tutorial(add_and_remove_cv_file): | |||
| """tutorial for cv minderdataset.""" | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers) | |||
| repeat_num = 2 | |||
| data_set = data_set.repeat(repeat_num) | |||
| num_iter = 0 | |||
| @@ -1753,7 +1757,8 @@ def test_write_with_multi_array_and_MindDataset(): | |||
| def test_numpy_generic(): | |||
| paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) | |||
| for x in range(FILES_NUM)] | |||
| try: | |||
| for x in paths: | |||
| @@ -1761,7 +1766,7 @@ def test_numpy_generic(): | |||
| os.remove("{}".format(x)) | |||
| if os.path.exists("{}.db".format(x)): | |||
| os.remove("{}.db".format(x)) | |||
| writer = FileWriter(CV_FILE_NAME, FILES_NUM) | |||
| writer = FileWriter(file_name, FILES_NUM) | |||
| cv_schema_json = {"label1": {"type": "int32"}, "label2": {"type": "int64"}, | |||
| "label3": {"type": "float32"}, "label4": {"type": "float64"}} | |||
| data = [] | |||
| @@ -1777,7 +1782,7 @@ def test_numpy_generic(): | |||
| writer.commit() | |||
| num_readers = 4 | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", None, num_readers, shuffle=False) | |||
| data_set = ds.MindDataset(file_name + "0", None, num_readers, shuffle=False) | |||
| assert data_set.get_dataset_size() == 10 | |||
| idx = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| @@ -1799,7 +1804,7 @@ def test_numpy_generic(): | |||
| def test_write_with_float32_float64_float32_array_float64_array_and_MindDataset(): | |||
| mindrecord_file_name = "test.mindrecord" | |||
| mindrecord_file_name = "test_write_with_float32_float64_float32_array_float64_array_and_MindDataset.mindrecord" | |||
| try: | |||
| data = [{"float32_array": np.array([1.2, 2.78, 3.1234, 4.9871, 5.12341], dtype=np.float32), | |||
| "float64_array": np.array([48.1234556789, 49.3251241431, 50.13514312414, 51.8971298471, | |||
| @@ -2570,7 +2575,8 @@ def test_distributed_shuffle_with_multi_epochs(create_multi_mindrecord_files): | |||
| def test_field_is_null_numpy(): | |||
| """add/remove nlp file""" | |||
| paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0')) | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) | |||
| for x in range(FILES_NUM)] | |||
| for x in paths: | |||
| if os.path.exists("{}".format(x)): | |||
| @@ -2578,7 +2584,7 @@ def test_field_is_null_numpy(): | |||
| if os.path.exists("{}.db".format(x)): | |||
| os.remove("{}.db".format(x)) | |||
| writer = FileWriter(NLP_FILE_NAME, FILES_NUM) | |||
| writer = FileWriter(file_name, FILES_NUM) | |||
| data = [] | |||
| # field array_d is null | |||
| for row_id in range(16): | |||
| @@ -2607,7 +2613,7 @@ def test_field_is_null_numpy(): | |||
| writer.write_raw_data(data) | |||
| writer.commit() | |||
| data_set = ds.MindDataset(dataset_file=NLP_FILE_NAME + "0", | |||
| data_set = ds.MindDataset(dataset_file=file_name + "0", | |||
| columns_list=["label", "array_a", "array_b", "array_d"], | |||
| num_parallel_workers=2, | |||
| shuffle=False) | |||
| @@ -2639,8 +2645,9 @@ def test_for_loop_dataset_iterator(add_and_remove_nlp_compress_file): | |||
| "array_d": np.reshape(np.array([[-10, -127], [10, 127]]), [2, -1]) | |||
| }) | |||
| num_readers = 1 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset( | |||
| NLP_FILE_NAME + "0", None, num_readers, shuffle=False) | |||
| file_name + "0", None, num_readers, shuffle=False) | |||
| assert data_set.get_dataset_size() == 16 | |||
| # create_dict_iterator in for loop | |||
| @@ -28,26 +28,22 @@ from mindspore import log as logger | |||
| from mindspore.mindrecord import FileWriter | |||
| FILES_NUM = 4 | |||
| CV_FILE_NAME = "../data/mindrecord/imagenet.mindrecord" | |||
| CV1_FILE_NAME = "../data/mindrecord/imagenet1.mindrecord" | |||
| CV2_FILE_NAME = "../data/mindrecord/imagenet2.mindrecord" | |||
| CV_DIR_NAME = "../data/mindrecord/testImageNetData" | |||
| NLP_FILE_NAME = "../data/mindrecord/aclImdb.mindrecord" | |||
| NLP_FILE_POS = "../data/mindrecord/testAclImdbData/pos" | |||
| NLP_FILE_VOCAB = "../data/mindrecord/testAclImdbData/vocab.txt" | |||
| @pytest.fixture | |||
| def add_and_remove_cv_file(): | |||
| """add/remove cv file""" | |||
| paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) | |||
| for x in range(FILES_NUM)] | |||
| try: | |||
| for x in paths: | |||
| os.remove("{}".format(x)) if os.path.exists("{}".format(x)) else None | |||
| os.remove("{}.db".format(x)) if os.path.exists( | |||
| "{}.db".format(x)) else None | |||
| writer = FileWriter(CV_FILE_NAME, FILES_NUM) | |||
| writer = FileWriter(file_name, FILES_NUM) | |||
| data = get_data(CV_DIR_NAME) | |||
| cv_schema_json = {"id": {"type": "int32"}, | |||
| "file_name": {"type": "string"}, | |||
| @@ -72,7 +68,8 @@ def add_and_remove_cv_file(): | |||
| @pytest.fixture | |||
| def add_and_remove_nlp_file(): | |||
| """add/remove nlp file""" | |||
| paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0')) | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) | |||
| for x in range(FILES_NUM)] | |||
| try: | |||
| for x in paths: | |||
| @@ -80,7 +77,7 @@ def add_and_remove_nlp_file(): | |||
| os.remove("{}".format(x)) | |||
| if os.path.exists("{}.db".format(x)): | |||
| os.remove("{}.db".format(x)) | |||
| writer = FileWriter(NLP_FILE_NAME, FILES_NUM) | |||
| writer = FileWriter(file_name, FILES_NUM) | |||
| data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)] | |||
| nlp_schema_json = {"id": {"type": "string"}, "label": {"type": "int32"}, | |||
| "rating": {"type": "float32"}, | |||
| @@ -118,7 +115,8 @@ def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file): | |||
| padded_sample['label'] = -1 | |||
| padded_sample['file_name'] = 'dummy.jpg' | |||
| num_readers = 4 | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, padded_sample=padded_sample, num_padded=5) | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, padded_sample=padded_sample, num_padded=5) | |||
| assert data_set.get_dataset_size() == 15 | |||
| num_iter = 0 | |||
| num_padded_iter = 0 | |||
| @@ -145,7 +143,8 @@ def test_cv_minddataset_reader_basic_padded_samples_type_cast(add_and_remove_cv_ | |||
| padded_sample['label'] = -1 | |||
| padded_sample['file_name'] = 99999 | |||
| num_readers = 4 | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, padded_sample=padded_sample, num_padded=5) | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, padded_sample=padded_sample, num_padded=5) | |||
| assert data_set.get_dataset_size() == 15 | |||
| num_iter = 0 | |||
| num_padded_iter = 0 | |||
| @@ -173,12 +172,13 @@ def test_cv_minddataset_partition_padded_samples(add_and_remove_cv_file): | |||
| padded_sample['label'] = -2 | |||
| padded_sample['file_name'] = 'dummy.jpg' | |||
| num_readers = 4 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| def partitions(num_shards, num_padded, dataset_size): | |||
| num_padded_iter = 0 | |||
| num_iter = 0 | |||
| for partition_id in range(num_shards): | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| num_shards=num_shards, | |||
| shard_id=partition_id, | |||
| padded_sample=padded_sample, | |||
| @@ -213,6 +213,7 @@ def test_cv_minddataset_partition_padded_samples_multi_epoch(add_and_remove_cv_f | |||
| padded_sample['label'] = -2 | |||
| padded_sample['file_name'] = 'dummy.jpg' | |||
| num_readers = 4 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| def partitions(num_shards, num_padded, dataset_size): | |||
| repeat_size = 5 | |||
| @@ -224,7 +225,7 @@ def test_cv_minddataset_partition_padded_samples_multi_epoch(add_and_remove_cv_f | |||
| epoch3_shuffle_result = [] | |||
| epoch4_shuffle_result = [] | |||
| epoch5_shuffle_result = [] | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| num_shards=num_shards, | |||
| shard_id=partition_id, | |||
| padded_sample=padded_sample, | |||
| @@ -285,10 +286,11 @@ def test_cv_minddataset_partition_padded_samples_no_dividsible(add_and_remove_cv | |||
| padded_sample['label'] = -2 | |||
| padded_sample['file_name'] = 'dummy.jpg' | |||
| num_readers = 4 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| def partitions(num_shards, num_padded): | |||
| for partition_id in range(num_shards): | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| num_shards=num_shards, | |||
| shard_id=partition_id, | |||
| padded_sample=padded_sample, | |||
| @@ -310,10 +312,11 @@ def test_cv_minddataset_partition_padded_samples_dataset_size_no_divisible(add_a | |||
| padded_sample['label'] = -2 | |||
| padded_sample['file_name'] = 'dummy.jpg' | |||
| num_readers = 4 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| def partitions(num_shards, num_padded): | |||
| for partition_id in range(num_shards): | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| num_shards=num_shards, | |||
| shard_id=partition_id, | |||
| padded_sample=padded_sample, | |||
| @@ -332,10 +335,11 @@ def test_cv_minddataset_partition_padded_samples_no_equal_column_list(add_and_re | |||
| padded_sample.pop('label', None) | |||
| padded_sample['file_name'] = 'dummy.jpg' | |||
| num_readers = 4 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| def partitions(num_shards, num_padded): | |||
| for partition_id in range(num_shards): | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| num_shards=num_shards, | |||
| shard_id=partition_id, | |||
| padded_sample=padded_sample, | |||
| @@ -356,10 +360,11 @@ def test_cv_minddataset_partition_padded_samples_no_column_list(add_and_remove_c | |||
| padded_sample['label'] = -2 | |||
| padded_sample['file_name'] = 'dummy.jpg' | |||
| num_readers = 4 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| def partitions(num_shards, num_padded): | |||
| for partition_id in range(num_shards): | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", None, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", None, num_readers, | |||
| num_shards=num_shards, | |||
| shard_id=partition_id, | |||
| padded_sample=padded_sample, | |||
| @@ -380,10 +385,11 @@ def test_cv_minddataset_partition_padded_samples_no_num_padded(add_and_remove_cv | |||
| padded_sample = data[0] | |||
| padded_sample['file_name'] = 'dummy.jpg' | |||
| num_readers = 4 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| def partitions(num_shards, num_padded): | |||
| for partition_id in range(num_shards): | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", None, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", None, num_readers, | |||
| num_shards=num_shards, | |||
| shard_id=partition_id, | |||
| padded_sample=padded_sample) | |||
| @@ -403,10 +409,11 @@ def test_cv_minddataset_partition_padded_samples_no_padded_samples(add_and_remov | |||
| padded_sample = data[0] | |||
| padded_sample['file_name'] = 'dummy.jpg' | |||
| num_readers = 4 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| def partitions(num_shards, num_padded): | |||
| for partition_id in range(num_shards): | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", None, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", None, num_readers, | |||
| num_shards=num_shards, | |||
| shard_id=partition_id, | |||
| num_padded=num_padded) | |||
| @@ -429,12 +436,13 @@ def test_nlp_minddataset_reader_basic_padded_samples(add_and_remove_nlp_file): | |||
| padded_sample['input_ids'] = np.array([-1, -1, -1, -1], dtype=np.int64) | |||
| padded_sample['rating'] = 1.0 | |||
| num_readers = 4 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| def partitions(num_shards, num_padded, dataset_size): | |||
| num_padded_iter = 0 | |||
| num_iter = 0 | |||
| for partition_id in range(num_shards): | |||
| data_set = ds.MindDataset(NLP_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| num_shards=num_shards, | |||
| shard_id=partition_id, | |||
| padded_sample=padded_sample, | |||
| @@ -470,6 +478,7 @@ def test_nlp_minddataset_reader_basic_padded_samples_multi_epoch(add_and_remove_ | |||
| padded_sample['rating'] = 1.0 | |||
| num_readers = 4 | |||
| repeat_size = 3 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| def partitions(num_shards, num_padded, dataset_size): | |||
| num_padded_iter = 0 | |||
| @@ -479,7 +488,7 @@ def test_nlp_minddataset_reader_basic_padded_samples_multi_epoch(add_and_remove_ | |||
| epoch1_shuffle_result = [] | |||
| epoch2_shuffle_result = [] | |||
| epoch3_shuffle_result = [] | |||
| data_set = ds.MindDataset(NLP_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| num_shards=num_shards, | |||
| shard_id=partition_id, | |||
| padded_sample=padded_sample, | |||
| @@ -534,6 +543,7 @@ def test_nlp_minddataset_reader_basic_padded_samples_check_whole_reshuffle_resul | |||
| padded_sample['rating'] = 1.0 | |||
| num_readers = 4 | |||
| repeat_size = 3 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| def partitions(num_shards, num_padded, dataset_size): | |||
| num_padded_iter = 0 | |||
| @@ -542,7 +552,7 @@ def test_nlp_minddataset_reader_basic_padded_samples_check_whole_reshuffle_resul | |||
| epoch_result = [[["" for i in range(dataset_size)] for i in range(repeat_size)] for i in range(num_shards)] | |||
| for partition_id in range(num_shards): | |||
| data_set = ds.MindDataset(NLP_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| num_shards=num_shards, | |||
| shard_id=partition_id, | |||
| padded_sample=padded_sample, | |||
| @@ -25,14 +25,13 @@ from mindspore.dataset.text import to_str | |||
| from mindspore.mindrecord import FileWriter | |||
| FILES_NUM = 4 | |||
| CV_FILE_NAME = "../data/mindrecord/imagenet.mindrecord" | |||
| CV_DIR_NAME = "../data/mindrecord/testImageNetData" | |||
| @pytest.fixture | |||
| def add_and_remove_cv_file(): | |||
| """add/remove cv file""" | |||
| paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) | |||
| for x in range(FILES_NUM)] | |||
| try: | |||
| for x in paths: | |||
| @@ -40,7 +39,7 @@ def add_and_remove_cv_file(): | |||
| os.remove("{}".format(x)) | |||
| if os.path.exists("{}.db".format(x)): | |||
| os.remove("{}.db".format(x)) | |||
| writer = FileWriter(CV_FILE_NAME, FILES_NUM) | |||
| writer = FileWriter(file_name, FILES_NUM) | |||
| data = get_data(CV_DIR_NAME, True) | |||
| cv_schema_json = {"id": {"type": "int32"}, | |||
| "file_name": {"type": "string"}, | |||
| @@ -66,7 +65,8 @@ def test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file): | |||
| """tutorial for cv minderdataset.""" | |||
| num_readers = 4 | |||
| sampler = ds.PKSampler(2) | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", None, num_readers, | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset(file_name + "0", None, num_readers, | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 6 | |||
| @@ -86,7 +86,8 @@ def test_cv_minddataset_pk_sample_basic(add_and_remove_cv_file): | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| sampler = ds.PKSampler(2) | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 6 | |||
| @@ -108,7 +109,8 @@ def test_cv_minddataset_pk_sample_shuffle(add_and_remove_cv_file): | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| sampler = ds.PKSampler(3, None, True) | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 9 | |||
| @@ -129,7 +131,8 @@ def test_cv_minddataset_pk_sample_shuffle_1(add_and_remove_cv_file): | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| sampler = ds.PKSampler(3, None, True, 'label', 5) | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 5 | |||
| @@ -150,7 +153,8 @@ def test_cv_minddataset_pk_sample_shuffle_2(add_and_remove_cv_file): | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| sampler = ds.PKSampler(3, None, True, 'label', 10) | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 9 | |||
| @@ -171,7 +175,8 @@ def test_cv_minddataset_pk_sample_out_of_range_0(add_and_remove_cv_file): | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| sampler = ds.PKSampler(5, None, True) | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 15 | |||
| num_iter = 0 | |||
| @@ -191,7 +196,8 @@ def test_cv_minddataset_pk_sample_out_of_range_1(add_and_remove_cv_file): | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| sampler = ds.PKSampler(5, None, True, 'label', 20) | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 15 | |||
| num_iter = 0 | |||
| @@ -211,7 +217,8 @@ def test_cv_minddataset_pk_sample_out_of_range_2(add_and_remove_cv_file): | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| sampler = ds.PKSampler(5, None, True, 'label', 10) | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 10 | |||
| num_iter = 0 | |||
| @@ -230,10 +237,11 @@ def test_cv_minddataset_subset_random_sample_basic(add_and_remove_cv_file): | |||
| """tutorial for cv minderdataset.""" | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| indices = [1, 2, 3, 5, 7] | |||
| samplers = (ds.SubsetRandomSampler(indices), ds.SubsetSampler(indices)) | |||
| for sampler in samplers: | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 5 | |||
| num_iter = 0 | |||
| @@ -255,9 +263,10 @@ def test_cv_minddataset_subset_random_sample_replica(add_and_remove_cv_file): | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| indices = [1, 2, 2, 5, 7, 9] | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| samplers = ds.SubsetRandomSampler(indices), ds.SubsetSampler(indices) | |||
| for sampler in samplers: | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 6 | |||
| num_iter = 0 | |||
| @@ -279,9 +288,10 @@ def test_cv_minddataset_subset_random_sample_empty(add_and_remove_cv_file): | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| indices = [] | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| samplers = ds.SubsetRandomSampler(indices), ds.SubsetSampler(indices) | |||
| for sampler in samplers: | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 0 | |||
| num_iter = 0 | |||
| @@ -304,8 +314,9 @@ def test_cv_minddataset_subset_random_sample_out_of_range(add_and_remove_cv_file | |||
| num_readers = 4 | |||
| indices = [1, 2, 4, 11, 13] | |||
| samplers = ds.SubsetRandomSampler(indices), ds.SubsetSampler(indices) | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| for sampler in samplers: | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 5 | |||
| num_iter = 0 | |||
| @@ -327,8 +338,9 @@ def test_cv_minddataset_subset_random_sample_negative(add_and_remove_cv_file): | |||
| num_readers = 4 | |||
| indices = [1, 2, 4, -1, -2] | |||
| samplers = ds.SubsetRandomSampler(indices), ds.SubsetSampler(indices) | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| for sampler in samplers: | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 5 | |||
| num_iter = 0 | |||
| @@ -350,7 +362,8 @@ def test_cv_minddataset_random_sampler_basic(add_and_remove_cv_file): | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| sampler = ds.RandomSampler() | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 10 | |||
| num_iter = 0 | |||
| @@ -373,8 +386,9 @@ def test_cv_minddataset_random_sampler_basic(add_and_remove_cv_file): | |||
| def test_cv_minddataset_random_sampler_repeat(add_and_remove_cv_file): | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| sampler = ds.RandomSampler() | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 10 | |||
| ds1 = data_set.repeat(3) | |||
| @@ -407,8 +421,9 @@ def test_cv_minddataset_random_sampler_repeat(add_and_remove_cv_file): | |||
| def test_cv_minddataset_random_sampler_replacement(add_and_remove_cv_file): | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| sampler = ds.RandomSampler(replacement=True, num_samples=5) | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 5 | |||
| num_iter = 0 | |||
| @@ -428,8 +443,9 @@ def test_cv_minddataset_random_sampler_replacement(add_and_remove_cv_file): | |||
| def test_cv_minddataset_random_sampler_replacement_false_1(add_and_remove_cv_file): | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| sampler = ds.RandomSampler(replacement=False, num_samples=2) | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 2 | |||
| num_iter = 0 | |||
| @@ -449,8 +465,9 @@ def test_cv_minddataset_random_sampler_replacement_false_1(add_and_remove_cv_fil | |||
| def test_cv_minddataset_random_sampler_replacement_false_2(add_and_remove_cv_file): | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| sampler = ds.RandomSampler(replacement=False, num_samples=20) | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 10 | |||
| num_iter = 0 | |||
| @@ -471,8 +488,9 @@ def test_cv_minddataset_sequential_sampler_basic(add_and_remove_cv_file): | |||
| data = get_data(CV_DIR_NAME, True) | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| sampler = ds.SequentialSampler(1, 4) | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 4 | |||
| num_iter = 0 | |||
| @@ -495,8 +513,9 @@ def test_cv_minddataset_sequential_sampler_offeset(add_and_remove_cv_file): | |||
| data = get_data(CV_DIR_NAME, True) | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| sampler = ds.SequentialSampler(2, 10) | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| sampler=sampler) | |||
| dataset_size = data_set.get_dataset_size() | |||
| assert dataset_size == 10 | |||
| @@ -520,8 +539,9 @@ def test_cv_minddataset_sequential_sampler_exceed_size(add_and_remove_cv_file): | |||
| data = get_data(CV_DIR_NAME, True) | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| sampler = ds.SequentialSampler(2, 20) | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, | |||
| sampler=sampler) | |||
| dataset_size = data_set.get_dataset_size() | |||
| assert dataset_size == 10 | |||
| @@ -545,7 +565,8 @@ def test_cv_minddataset_split_basic(add_and_remove_cv_file): | |||
| data = get_data(CV_DIR_NAME, True) | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| d = ds.MindDataset(CV_FILE_NAME + "0", columns_list, | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| d = ds.MindDataset(file_name + "0", columns_list, | |||
| num_readers, shuffle=False) | |||
| d1, d2 = d.split([8, 2], randomize=False) | |||
| assert d.get_dataset_size() == 10 | |||
| @@ -581,7 +602,8 @@ def test_cv_minddataset_split_exact_percent(add_and_remove_cv_file): | |||
| data = get_data(CV_DIR_NAME, True) | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| d = ds.MindDataset(CV_FILE_NAME + "0", columns_list, | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| d = ds.MindDataset(file_name + "0", columns_list, | |||
| num_readers, shuffle=False) | |||
| d1, d2 = d.split([0.8, 0.2], randomize=False) | |||
| assert d.get_dataset_size() == 10 | |||
| @@ -617,7 +639,8 @@ def test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file): | |||
| data = get_data(CV_DIR_NAME, True) | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| d = ds.MindDataset(CV_FILE_NAME + "0", columns_list, | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| d = ds.MindDataset(file_name + "0", columns_list, | |||
| num_readers, shuffle=False) | |||
| d1, d2 = d.split([0.41, 0.59], randomize=False) | |||
| assert d.get_dataset_size() == 10 | |||
| @@ -652,7 +675,8 @@ def test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file): | |||
| def test_cv_minddataset_split_deterministic(add_and_remove_cv_file): | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| d = ds.MindDataset(CV_FILE_NAME + "0", columns_list, | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| d = ds.MindDataset(file_name + "0", columns_list, | |||
| num_readers, shuffle=False) | |||
| # should set seed to avoid data overlap | |||
| ds.config.set_seed(111) | |||
| @@ -693,7 +717,8 @@ def test_cv_minddataset_split_sharding(add_and_remove_cv_file): | |||
| data = get_data(CV_DIR_NAME, True) | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| d = ds.MindDataset(CV_FILE_NAME + "0", columns_list, | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| d = ds.MindDataset(file_name + "0", columns_list, | |||
| num_readers, shuffle=False) | |||
| # should set seed to avoid data overlap | |||
| ds.config.set_seed(111) | |||
| @@ -23,38 +23,25 @@ import mindspore.dataset as ds | |||
| from mindspore import log as logger | |||
| from mindspore.mindrecord import FileWriter | |||
| TEMP_FILE = "../data/mindrecord/testMindDataSet/temp.mindrecord" | |||
| AUTO_FILE = "../data/mindrecord/testMindDataSet/auto.mindrecord" | |||
| TFRECORD_FILES = "../data/mindrecord/testTFRecordData/dummy.tfrecord" | |||
| FILES_NUM = 1 | |||
| num_readers = 1 | |||
| @pytest.fixture(name="add_remove_file") | |||
| def fixture_remove(): | |||
| def remove_file(file_name): | |||
| """add/remove cv file""" | |||
| if os.path.exists("{}".format(TEMP_FILE)): | |||
| os.remove("{}".format(TEMP_FILE)) | |||
| if os.path.exists("{}.db".format(TEMP_FILE)): | |||
| os.remove("{}.db".format(TEMP_FILE)) | |||
| if os.path.exists("{}".format(AUTO_FILE)): | |||
| os.remove("{}".format(AUTO_FILE)) | |||
| if os.path.exists("{}.db".format(AUTO_FILE)): | |||
| os.remove("{}.db".format(AUTO_FILE)) | |||
| yield "yield_cv_data" | |||
| if os.path.exists("{}".format(TEMP_FILE)): | |||
| os.remove("{}".format(TEMP_FILE)) | |||
| if os.path.exists("{}.db".format(TEMP_FILE)): | |||
| os.remove("{}.db".format(TEMP_FILE)) | |||
| if os.path.exists("{}".format(AUTO_FILE)): | |||
| os.remove("{}".format(AUTO_FILE)) | |||
| if os.path.exists("{}.db".format(AUTO_FILE)): | |||
| os.remove("{}.db".format(AUTO_FILE)) | |||
| def test_case_00(add_remove_file): # only bin data | |||
| if os.path.exists("{}".format(file_name)): | |||
| os.remove("{}".format(file_name)) | |||
| if os.path.exists("{}.db".format(file_name)): | |||
| os.remove("{}.db".format(file_name)) | |||
| def test_case_00(): | |||
| """ | |||
| Feature: save op | |||
| Description: all bin data | |||
| Expectation: generated mindrecord file | |||
| """ | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data = [{"image1": bytes("image1 bytes abc", encoding='UTF-8'), | |||
| "image2": bytes("image1 bytes def", encoding='UTF-8'), | |||
| "image3": bytes("image1 bytes ghi", encoding='UTF-8'), | |||
| @@ -86,13 +73,16 @@ def test_case_00(add_remove_file): # only bin data | |||
| "image3": {"type": "bytes"}, | |||
| "image4": {"type": "bytes"}, | |||
| "image5": {"type": "bytes"}} | |||
| writer = FileWriter(TEMP_FILE, FILES_NUM) | |||
| writer = FileWriter(file_name, FILES_NUM) | |||
| writer.add_schema(schema, "schema") | |||
| writer.write_raw_data(data) | |||
| writer.commit() | |||
| d1 = ds.MindDataset(TEMP_FILE, None, num_readers, shuffle=False) | |||
| d1.save(AUTO_FILE, FILES_NUM) | |||
| file_name_auto = './' | |||
| file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| file_name_auto += '_auto' | |||
| d1 = ds.MindDataset(file_name, None, num_readers, shuffle=False) | |||
| d1.save(file_name_auto, FILES_NUM) | |||
| data_value_to_list = [] | |||
| for item in data: | |||
| @@ -104,7 +94,7 @@ def test_case_00(add_remove_file): # only bin data | |||
| new_data['image5'] = np.asarray(list(item["image5"]), dtype=np.uint8) | |||
| data_value_to_list.append(new_data) | |||
| d2 = ds.MindDataset(dataset_file=AUTO_FILE, | |||
| d2 = ds.MindDataset(dataset_file=file_name_auto, | |||
| num_parallel_workers=num_readers, | |||
| shuffle=False) | |||
| assert d2.get_dataset_size() == 5 | |||
| @@ -119,9 +109,12 @@ def test_case_00(add_remove_file): # only bin data | |||
| assert item[field] == data_value_to_list[num_iter][field] | |||
| num_iter += 1 | |||
| assert num_iter == 5 | |||
| remove_file(file_name) | |||
| remove_file(file_name_auto) | |||
| def test_case_01(add_remove_file): # only raw data | |||
| file_name_auto = './' | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data = [{"file_name": "001.jpg", "label": 43}, | |||
| {"file_name": "002.jpg", "label": 91}, | |||
| {"file_name": "003.jpg", "label": 61}, | |||
| @@ -132,13 +125,16 @@ def test_case_01(add_remove_file): # only raw data | |||
| "label": {"type": "int32"} | |||
| } | |||
| writer = FileWriter(TEMP_FILE, FILES_NUM) | |||
| writer = FileWriter(file_name, FILES_NUM) | |||
| writer.add_schema(schema, "schema") | |||
| writer.write_raw_data(data) | |||
| writer.commit() | |||
| d1 = ds.MindDataset(TEMP_FILE, None, num_readers, shuffle=False) | |||
| d1.save(AUTO_FILE, FILES_NUM) | |||
| file_name_auto = './' | |||
| file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| file_name_auto += '_auto' | |||
| d1 = ds.MindDataset(file_name, None, num_readers, shuffle=False) | |||
| d1.save(file_name_auto, FILES_NUM) | |||
| data_value_to_list = [] | |||
| for item in data: | |||
| @@ -147,7 +143,7 @@ def test_case_01(add_remove_file): # only raw data | |||
| new_data['label'] = np.asarray(list([item["label"]]), dtype=np.int32) | |||
| data_value_to_list.append(new_data) | |||
| d2 = ds.MindDataset(dataset_file=AUTO_FILE, | |||
| d2 = ds.MindDataset(dataset_file=file_name_auto, | |||
| num_parallel_workers=num_readers, | |||
| shuffle=False) | |||
| assert d2.get_dataset_size() == 6 | |||
| @@ -163,9 +159,17 @@ def test_case_01(add_remove_file): # only raw data | |||
| assert item[field] == data_value_to_list[num_iter][field] | |||
| num_iter += 1 | |||
| assert num_iter == 6 | |||
| remove_file(file_name) | |||
| remove_file(file_name_auto) | |||
| def test_case_02(add_remove_file): # muti-bytes | |||
| def test_case_02(): # muti-bytes | |||
| """ | |||
| Feature: save op | |||
| Description: multiple byte fields | |||
| Expectation: generated mindrecord file | |||
| """ | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| data = [{"file_name": "001.jpg", "label": 43, | |||
| "float32_array": np.array([1.2, 2.78, 3.1234, 4.9871, 5.12341], dtype=np.float32), | |||
| "float64_array": np.array([48.1234556789, 49.3251241431, 50.13514312414, 51.8971298471, | |||
| @@ -258,13 +262,16 @@ def test_case_02(add_remove_file): # muti-bytes | |||
| "label": {"type": "int32"}, | |||
| "image4": {"type": "bytes"}, | |||
| "image5": {"type": "bytes"}} | |||
| writer = FileWriter(TEMP_FILE, FILES_NUM) | |||
| writer = FileWriter(file_name, FILES_NUM) | |||
| writer.add_schema(schema, "schema") | |||
| writer.write_raw_data(data) | |||
| writer.commit() | |||
| d1 = ds.MindDataset(TEMP_FILE, None, num_readers, shuffle=False) | |||
| d1.save(AUTO_FILE, FILES_NUM) | |||
| file_name_auto = './' | |||
| file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| file_name_auto += '_auto' | |||
| d1 = ds.MindDataset(file_name, None, num_readers, shuffle=False) | |||
| d1.save(file_name_auto, FILES_NUM) | |||
| data_value_to_list = [] | |||
| for item in data: | |||
| @@ -284,7 +291,7 @@ def test_case_02(add_remove_file): # muti-bytes | |||
| new_data['image5'] = np.asarray(list(item["image5"]), dtype=np.uint8) | |||
| data_value_to_list.append(new_data) | |||
| d2 = ds.MindDataset(dataset_file=AUTO_FILE, | |||
| d2 = ds.MindDataset(dataset_file=file_name_auto, | |||
| num_parallel_workers=num_readers, | |||
| shuffle=False) | |||
| assert d2.get_dataset_size() == 6 | |||
| @@ -303,6 +310,8 @@ def test_case_02(add_remove_file): # muti-bytes | |||
| assert item[field] == data_value_to_list[num_iter][field] | |||
| num_iter += 1 | |||
| assert num_iter == 6 | |||
| remove_file(file_name) | |||
| remove_file(file_name_auto) | |||
| def generator_1d(): | |||
| @@ -310,14 +319,21 @@ def generator_1d(): | |||
| yield (np.array([i]),) | |||
| def test_case_03(add_remove_file): | |||
| def test_case_03(): | |||
| """ | |||
| Feature: save op | |||
| Description: 1D numpy array | |||
| Expectation: generated mindrecord file | |||
| """ | |||
| file_name_auto = './' | |||
| file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| file_name_auto += '_auto' | |||
| # apply dataset operations | |||
| d1 = ds.GeneratorDataset(generator_1d, ["data"], shuffle=False) | |||
| d1.save(AUTO_FILE) | |||
| d1.save(file_name_auto) | |||
| d2 = ds.MindDataset(dataset_file=AUTO_FILE, | |||
| d2 = ds.MindDataset(dataset_file=file_name_auto, | |||
| num_parallel_workers=num_readers, | |||
| shuffle=False) | |||
| @@ -327,6 +343,7 @@ def test_case_03(add_remove_file): | |||
| golden = np.array([i]) | |||
| np.testing.assert_array_equal(item["data"], golden) | |||
| i = i + 1 | |||
| remove_file(file_name_auto) | |||
| def generator_with_type(t): | |||
| @@ -335,6 +352,9 @@ def generator_with_type(t): | |||
| def type_tester(t): | |||
| file_name_auto = './' | |||
| file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| file_name_auto += '_auto' | |||
| logger.info("Test with Type {}".format(t.__name__)) | |||
| # apply dataset operations | |||
| @@ -344,9 +364,9 @@ def type_tester(t): | |||
| data1 = data1.repeat(3) | |||
| data1.save(AUTO_FILE) | |||
| data1.save(file_name_auto) | |||
| d2 = ds.MindDataset(dataset_file=AUTO_FILE, | |||
| d2 = ds.MindDataset(dataset_file=file_name_auto, | |||
| num_parallel_workers=num_readers, | |||
| shuffle=False) | |||
| @@ -362,10 +382,7 @@ def type_tester(t): | |||
| i = 0 | |||
| num_repeat += 1 | |||
| assert num_repeat == 3 | |||
| if os.path.exists("{}".format(AUTO_FILE)): | |||
| os.remove("{}".format(AUTO_FILE)) | |||
| if os.path.exists("{}.db".format(AUTO_FILE)): | |||
| os.remove("{}.db".format(AUTO_FILE)) | |||
| remove_file(file_name_auto) | |||
| def test_case_04(): | |||
| @@ -377,20 +394,31 @@ def test_case_04(): | |||
| type_tester(t) | |||
| def test_case_05(add_remove_file): | |||
| def test_case_05(): | |||
| """ | |||
| Feature: save op | |||
| Description: Exception Test | |||
| Expectation: exception | |||
| """ | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| d1 = ds.GeneratorDataset(generator_1d, ["data"], shuffle=False) | |||
| with pytest.raises(Exception, match="num_files should between 0 and 1000."): | |||
| d1.save(AUTO_FILE, 0) | |||
| d1.save(file_name, 0) | |||
| def test_case_06(add_remove_file): | |||
| def test_case_06(): | |||
| """ | |||
| Feature: save op | |||
| Description: Exception Test | |||
| Expectation: exception | |||
| """ | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| d1 = ds.GeneratorDataset(generator_1d, ["data"], shuffle=False) | |||
| with pytest.raises(Exception, match="tfrecord dataset format is not supported."): | |||
| d1.save(AUTO_FILE, 1, "tfrecord") | |||
| d1.save(file_name, 1, "tfrecord") | |||
| def cast_name(key): | |||
| @@ -405,16 +433,20 @@ def cast_name(key): | |||
| def test_case_07(): | |||
| if os.path.exists("{}".format(AUTO_FILE)): | |||
| os.remove("{}".format(AUTO_FILE)) | |||
| if os.path.exists("{}.db".format(AUTO_FILE)): | |||
| os.remove("{}.db".format(AUTO_FILE)) | |||
| """ | |||
| Feature: save op | |||
| Description: save tfrecord files | |||
| Expectation: generated mindrecord file | |||
| """ | |||
| file_name_auto = './' | |||
| file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| file_name_auto += '_auto' | |||
| d1 = ds.TFRecordDataset(TFRECORD_FILES, shuffle=False) | |||
| tf_data = [] | |||
| for x in d1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| tf_data.append(x) | |||
| d1.save(AUTO_FILE, FILES_NUM) | |||
| d2 = ds.MindDataset(dataset_file=AUTO_FILE, | |||
| d1.save(file_name_auto, FILES_NUM) | |||
| d2 = ds.MindDataset(dataset_file=file_name_auto, | |||
| num_parallel_workers=num_readers, | |||
| shuffle=False) | |||
| mr_data = [] | |||
| @@ -429,11 +461,7 @@ def test_case_07(): | |||
| assert v == mr_data[count][cast_name(k)] | |||
| count += 1 | |||
| assert count == 10 | |||
| if os.path.exists("{}".format(AUTO_FILE)): | |||
| os.remove("{}".format(AUTO_FILE)) | |||
| if os.path.exists("{}.db".format(AUTO_FILE)): | |||
| os.remove("{}.db".format(AUTO_FILE)) | |||
| remove_file(file_name_auto) | |||
| def generator_dynamic_1d(): | |||
| @@ -461,14 +489,21 @@ def generator_dynamic_2d_1(): | |||
| yield (np.arange(10).reshape([5, 2]),) | |||
| def test_case_08(add_remove_file): | |||
| def test_case_08(): | |||
| """ | |||
| Feature: save op | |||
| Description: save dynamic 1D numpy array | |||
| Expectation: generated mindrecord file | |||
| """ | |||
| file_name_auto = './' | |||
| file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| file_name_auto += '_auto' | |||
| # apply dataset operations | |||
| d1 = ds.GeneratorDataset(generator_dynamic_1d, ["data"], shuffle=False) | |||
| d1.save(AUTO_FILE) | |||
| d1.save(file_name_auto) | |||
| d2 = ds.MindDataset(dataset_file=AUTO_FILE, | |||
| d2 = ds.MindDataset(dataset_file=file_name_auto, | |||
| num_parallel_workers=num_readers, | |||
| shuffle=False) | |||
| @@ -481,16 +516,23 @@ def test_case_08(add_remove_file): | |||
| golden = np.array(arr) | |||
| np.testing.assert_array_equal(item["data"], golden) | |||
| i = i + 1 | |||
| remove_file(file_name_auto) | |||
| def test_case_09(add_remove_file): | |||
| def test_case_09(): | |||
| """ | |||
| Feature: save op | |||
| Description: save dynamic 2D numpy array | |||
| Expectation: generated mindrecord file | |||
| """ | |||
| file_name_auto = './' | |||
| file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| file_name_auto += '_auto' | |||
| # apply dataset operations | |||
| d1 = ds.GeneratorDataset(generator_dynamic_2d_0, ["data"], shuffle=False) | |||
| d1.save(AUTO_FILE) | |||
| d1.save(file_name_auto) | |||
| d2 = ds.MindDataset(dataset_file=AUTO_FILE, | |||
| d2 = ds.MindDataset(dataset_file=file_name_auto, | |||
| num_parallel_workers=num_readers, | |||
| shuffle=False) | |||
| @@ -502,13 +544,23 @@ def test_case_09(add_remove_file): | |||
| golden = np.arange(10).reshape([2, 5]) | |||
| np.testing.assert_array_equal(item["data"], golden) | |||
| i = i + 1 | |||
| remove_file(file_name_auto) | |||
| def test_case_10(add_remove_file): | |||
| def test_case_10(): | |||
| """ | |||
| Feature: save op | |||
| Description: save 2D Tensor of different shape | |||
| Expectation: Exception | |||
| """ | |||
| file_name_auto = './' | |||
| file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| file_name_auto += '_auto' | |||
| # apply dataset operations | |||
| d1 = ds.GeneratorDataset(generator_dynamic_2d_1, ["data"], shuffle=False) | |||
| with pytest.raises(Exception, match= | |||
| "Error: besides dimension 0, other dimension shape is different from the previous's"): | |||
| d1.save(AUTO_FILE) | |||
| d1.save(file_name_auto) | |||
| remove_file(file_name_auto) | |||
| @@ -22,7 +22,6 @@ import os | |||
| import pytest | |||
| import numpy as np | |||
| from test_minddataset_sampler import add_and_remove_cv_file, get_data, CV_DIR_NAME, CV_FILE_NAME | |||
| from util import config_get_set_num_parallel_workers, config_get_set_seed | |||
| import mindspore.common.dtype as mstype | |||
| @@ -509,38 +508,6 @@ def delete_json_files(): | |||
| except IOError: | |||
| logger.info("Error while deleting: {}".format(f)) | |||
| # Test save load minddataset | |||
| def skip_test_minddataset(add_and_remove_cv_file=True): | |||
| """tutorial for cv minderdataset.""" | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| indices = [1, 2, 3, 5, 7] | |||
| sampler = ds.SubsetRandomSampler(indices) | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| sampler=sampler) | |||
| # Serializing into python dictionary | |||
| ds1_dict = ds.serialize(data_set) | |||
| # Serializing into json object | |||
| ds1_json = json.dumps(ds1_dict, sort_keys=True) | |||
| # Reconstruct dataset pipeline from its serialized form | |||
| data_set = ds.deserialize(input_dict=ds1_dict) | |||
| ds2_dict = ds.serialize(data_set) | |||
| # Serializing into json object | |||
| ds2_json = json.dumps(ds2_dict, sort_keys=True) | |||
| assert ds1_json == ds2_json | |||
| _ = get_data(CV_DIR_NAME) | |||
| assert data_set.get_dataset_size() == 5 | |||
| num_iter = 0 | |||
| for _ in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| num_iter += 1 | |||
| assert num_iter == 5 | |||
| if __name__ == '__main__': | |||
| test_serdes_imagefolder_dataset() | |||
| test_serdes_mnist_dataset() | |||
| @@ -555,4 +522,3 @@ if __name__ == '__main__': | |||
| test_serdes_uniform_augment() | |||
| skip_test_serdes_fill() | |||
| test_serdes_exception() | |||
| skip_test_minddataset() | |||
| @@ -23,8 +23,6 @@ from mindspore import log as logger | |||
| from mindspore.mindrecord import FileWriter, FileReader, MindPage, SUCCESS | |||
| from mindspore.mindrecord import ParamValueError, MRMGetMetaError | |||
| CV_FILE_NAME = "./imagenet.mindrecord" | |||
| NLP_FILE_NAME = "./aclImdb.mindrecord" | |||
| FILES_NUM = 4 | |||
| def remove_one_file(x): | |||
| @@ -42,20 +40,6 @@ def remove_file(file_name): | |||
| x = file_name + str(i) + ".db" | |||
| remove_one_file(x) | |||
| @pytest.fixture | |||
| def fixture_cv_file(): | |||
| """add/remove file""" | |||
| remove_file(CV_FILE_NAME) | |||
| yield "yield_fixture_data" | |||
| remove_file(CV_FILE_NAME) | |||
| @pytest.fixture | |||
| def fixture_nlp_file(): | |||
| """add/remove file""" | |||
| remove_file(NLP_FILE_NAME) | |||
| yield "yield_fixture_data" | |||
| remove_file(NLP_FILE_NAME) | |||
| def test_cv_file_writer_shard_num_none(): | |||
| """test cv file writer when shard num is None.""" | |||
| with pytest.raises(Exception, match="Shard num is illegal."): | |||
| @@ -71,29 +55,29 @@ def test_cv_file_writer_shard_num_str(): | |||
| def test_cv_page_reader_consumer_num_none(): | |||
| """test cv page reader when consumer number is None.""" | |||
| with pytest.raises(Exception, match="Consumer number is illegal."): | |||
| MindPage(CV_FILE_NAME + "0", None) | |||
| MindPage("dummy.mindrecord", None) | |||
| def test_cv_page_reader_consumer_num_str(): | |||
| """test cv page reader when consumer number is string.""" | |||
| with pytest.raises(Exception, match="Consumer number is illegal."): | |||
| MindPage(CV_FILE_NAME + "0", "2") | |||
| MindPage("dummy.mindrecord", "2") | |||
| def test_nlp_file_reader_consumer_num_none(): | |||
| """test nlp file reader when consumer number is None.""" | |||
| with pytest.raises(Exception, match="Consumer number is illegal."): | |||
| FileReader(NLP_FILE_NAME + "0", None) | |||
| FileReader("dummy.mindrecord", None) | |||
| def test_nlp_file_reader_consumer_num_str(): | |||
| """test nlp file reader when consumer number is string.""" | |||
| with pytest.raises(Exception, match="Consumer number is illegal."): | |||
| FileReader(NLP_FILE_NAME + "0", "4") | |||
| FileReader("dummy.mindrecord", "4") | |||
| def create_cv_mindrecord(files_num): | |||
| writer = FileWriter(CV_FILE_NAME, files_num) | |||
| def create_cv_mindrecord(files_num, file_name): | |||
| writer = FileWriter(file_name, files_num) | |||
| data = get_data("../data/mindrecord/testImageNetData/") | |||
| cv_schema_json = {"file_name": {"type": "string"}, | |||
| "label": {"type": "int64"}, "data": {"type": "bytes"}} | |||
| @@ -104,139 +88,218 @@ def create_cv_mindrecord(files_num): | |||
| def test_lack_partition_and_db(): | |||
| """test file reader when mindrecord file does not exist.""" | |||
| """ | |||
| Feature: FileReader | |||
| Description: test file reader when mindrecord file does not exist | |||
| Expectation: exception occur | |||
| """ | |||
| with pytest.raises(RuntimeError) as err: | |||
| reader = FileReader('dummy.mindrecord') | |||
| reader.close() | |||
| assert 'Unexpected error. Invalid file, path:' in str(err.value) | |||
| def test_lack_db(fixture_cv_file): | |||
| """test file reader when db file does not exist.""" | |||
| create_cv_mindrecord(1) | |||
| os.remove("{}.db".format(CV_FILE_NAME)) | |||
| def test_lack_db(): | |||
| """ | |||
| Feature: FileReader | |||
| Description: test file reader when db file does not exist | |||
| Expectation: exception occur | |||
| """ | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| create_cv_mindrecord(1, file_name) | |||
| os.remove("{}.db".format(file_name)) | |||
| with pytest.raises(RuntimeError) as err: | |||
| reader = FileReader(CV_FILE_NAME) | |||
| reader = FileReader(file_name) | |||
| reader.close() | |||
| assert 'Unexpected error. Invalid database file, path:' in str(err.value) | |||
| def test_lack_some_partition_and_db(fixture_cv_file): | |||
| """test file reader when some partition and db do not exist.""" | |||
| create_cv_mindrecord(4) | |||
| paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) | |||
| remove_file(file_name) | |||
| def test_lack_some_partition_and_db(): | |||
| """ | |||
| Feature: FileReader | |||
| Description: test file reader when some partition and db do not exist | |||
| Expectation: exception occur | |||
| """ | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| create_cv_mindrecord(4, file_name) | |||
| paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) | |||
| for x in range(FILES_NUM)] | |||
| os.remove("{}".format(paths[3])) | |||
| os.remove("{}.db".format(paths[3])) | |||
| with pytest.raises(RuntimeError) as err: | |||
| reader = FileReader(CV_FILE_NAME + "0") | |||
| reader = FileReader(file_name + "0") | |||
| reader.close() | |||
| assert 'Unexpected error. Invalid file, path:' in str(err.value) | |||
| def test_lack_some_partition_first(fixture_cv_file): | |||
| """test file reader when first partition does not exist.""" | |||
| create_cv_mindrecord(4) | |||
| paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) | |||
| remove_file(file_name) | |||
| def test_lack_some_partition_first(): | |||
| """ | |||
| Feature: FileReader | |||
| Description: test file reader when first partition does not exist | |||
| Expectation: exception occur | |||
| """ | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| create_cv_mindrecord(4, file_name) | |||
| paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) | |||
| for x in range(FILES_NUM)] | |||
| os.remove("{}".format(paths[0])) | |||
| with pytest.raises(RuntimeError) as err: | |||
| reader = FileReader(CV_FILE_NAME + "0") | |||
| reader = FileReader(file_name + "0") | |||
| reader.close() | |||
| assert 'Unexpected error. Invalid file, path:' in str(err.value) | |||
| def test_lack_some_partition_middle(fixture_cv_file): | |||
| """test file reader when some partition does not exist.""" | |||
| create_cv_mindrecord(4) | |||
| paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) | |||
| remove_file(file_name) | |||
| def test_lack_some_partition_middle(): | |||
| """ | |||
| Feature: FileReader | |||
| Description: test file reader when some partition does not exist | |||
| Expectation: exception occur | |||
| """ | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| create_cv_mindrecord(4, file_name) | |||
| paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) | |||
| for x in range(FILES_NUM)] | |||
| os.remove("{}".format(paths[1])) | |||
| with pytest.raises(RuntimeError) as err: | |||
| reader = FileReader(CV_FILE_NAME + "0") | |||
| reader = FileReader(file_name + "0") | |||
| reader.close() | |||
| assert 'Unexpected error. Invalid file, path:' in str(err.value) | |||
| def test_lack_some_partition_last(fixture_cv_file): | |||
| """test file reader when last partition does not exist.""" | |||
| create_cv_mindrecord(4) | |||
| paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) | |||
| remove_file(file_name) | |||
| def test_lack_some_partition_last(): | |||
| """ | |||
| Feature: FileReader | |||
| Description: test file reader when last partition does not exist | |||
| Expectation: exception occur | |||
| """ | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| create_cv_mindrecord(4, file_name) | |||
| paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) | |||
| for x in range(FILES_NUM)] | |||
| os.remove("{}".format(paths[3])) | |||
| with pytest.raises(RuntimeError) as err: | |||
| reader = FileReader(CV_FILE_NAME + "0") | |||
| reader = FileReader(file_name + "0") | |||
| reader.close() | |||
| assert 'Unexpected error. Invalid file, path:' in str(err.value) | |||
| def test_mindpage_lack_some_partition(fixture_cv_file): | |||
| """test page reader when some partition does not exist.""" | |||
| create_cv_mindrecord(4) | |||
| paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) | |||
| remove_file(file_name) | |||
| def test_mindpage_lack_some_partition(): | |||
| """ | |||
| Feature: MindPage | |||
| Description: test page reader when some partition does not exist | |||
| Expectation: exception occur | |||
| """ | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| create_cv_mindrecord(4, file_name) | |||
| paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) | |||
| for x in range(FILES_NUM)] | |||
| os.remove("{}".format(paths[0])) | |||
| with pytest.raises(RuntimeError) as err: | |||
| MindPage(CV_FILE_NAME + "0") | |||
| MindPage(file_name + "0") | |||
| assert 'Unexpected error. Invalid file, path:' in str(err.value) | |||
| def test_lack_some_db(fixture_cv_file): | |||
| """test file reader when some db does not exist.""" | |||
| create_cv_mindrecord(4) | |||
| paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) | |||
| remove_file(file_name) | |||
| def test_lack_some_db(): | |||
| """ | |||
| Feature: FileReader | |||
| Description: test file reader when some db does not exist | |||
| Expectation: exception occur | |||
| """ | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| create_cv_mindrecord(4, file_name) | |||
| paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) | |||
| for x in range(FILES_NUM)] | |||
| os.remove("{}.db".format(paths[3])) | |||
| with pytest.raises(RuntimeError) as err: | |||
| reader = FileReader(CV_FILE_NAME + "0") | |||
| reader = FileReader(file_name + "0") | |||
| reader.close() | |||
| assert 'Unexpected error. Invalid database file, path:' in str(err.value) | |||
| remove_file(file_name) | |||
| def test_invalid_mindrecord(): | |||
| """test file reader when the content of mindrecord is illegal.""" | |||
| with open(CV_FILE_NAME, 'w') as f: | |||
| """ | |||
| Feature: FileReader | |||
| Description: test file reader when the content of mindrecord is illegal | |||
| Expectation: exception occur | |||
| """ | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| with open(file_name, 'w') as f: | |||
| dummy = 's' * 100 | |||
| f.write(dummy) | |||
| with pytest.raises(RuntimeError) as err: | |||
| FileReader(CV_FILE_NAME) | |||
| FileReader(file_name) | |||
| assert "Unexpected error. Invalid file content, incorrect file or file header" in str(err.value) | |||
| os.remove(CV_FILE_NAME) | |||
| def test_invalid_db(fixture_cv_file): | |||
| """test file reader when the content of db is illegal.""" | |||
| create_cv_mindrecord(1) | |||
| os.remove("imagenet.mindrecord.db") | |||
| with open('imagenet.mindrecord.db', 'w') as f: | |||
| remove_file(file_name) | |||
| def test_invalid_db(): | |||
| """ | |||
| Feature: FileReader | |||
| Description: test file reader when the content of db is illegal | |||
| Expectation: exception occur | |||
| """ | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| create_cv_mindrecord(1, file_name) | |||
| os.remove(file_name + ".db") | |||
| with open(file_name + ".db", 'w') as f: | |||
| f.write('just for test') | |||
| with pytest.raises(RuntimeError) as err: | |||
| FileReader('imagenet.mindrecord') | |||
| FileReader(file_name) | |||
| assert "Unexpected error. Failed to execute sql [ SELECT NAME from SHARD_NAME; ], " in str(err.value) | |||
| def test_overwrite_invalid_mindrecord(fixture_cv_file): | |||
| """test file writer when overwrite invalid mindreocrd file.""" | |||
| with open(CV_FILE_NAME, 'w') as f: | |||
| remove_file(file_name) | |||
| def test_overwrite_invalid_mindrecord(): | |||
| """ | |||
| Feature: FileWriter | |||
| Description: test file writer when overwrite invalid mindreocrd file | |||
| Expectation: exception occur | |||
| """ | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| with open(file_name, 'w') as f: | |||
| f.write('just for test') | |||
| with pytest.raises(RuntimeError) as err: | |||
| create_cv_mindrecord(1) | |||
| create_cv_mindrecord(1, file_name) | |||
| assert 'Unexpected error. Invalid file, Mindrecord files already existed in path:' in str(err.value) | |||
| def test_overwrite_invalid_db(fixture_cv_file): | |||
| """test file writer when overwrite invalid db file.""" | |||
| with open('imagenet.mindrecord.db', 'w') as f: | |||
| remove_file(file_name) | |||
| def test_overwrite_invalid_db(): | |||
| """ | |||
| Feature: FileWriter | |||
| Description: test file writer when overwrite invalid db file | |||
| Expectation: exception occur | |||
| """ | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| with open(file_name + '.db', 'w') as f: | |||
| f.write('just for test') | |||
| with pytest.raises(RuntimeError) as err: | |||
| create_cv_mindrecord(1) | |||
| create_cv_mindrecord(1, file_name) | |||
| assert 'Unexpected error. Failed to write data to db.' in str(err.value) | |||
| def test_read_after_close(fixture_cv_file): | |||
| """test file reader when close read.""" | |||
| create_cv_mindrecord(1) | |||
| reader = FileReader(CV_FILE_NAME) | |||
| remove_file(file_name) | |||
| def test_read_after_close(): | |||
| """ | |||
| Feature: FileReader | |||
| Description: test file reader when close read | |||
| Expectation: exception occur | |||
| """ | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| create_cv_mindrecord(1, file_name) | |||
| reader = FileReader(file_name) | |||
| reader.close() | |||
| count = 0 | |||
| for index, x in enumerate(reader.get_next()): | |||
| count = count + 1 | |||
| logger.info("#item{}: {}".format(index, x)) | |||
| assert count == 0 | |||
| def test_file_read_after_read(fixture_cv_file): | |||
| """test file reader when finish read.""" | |||
| create_cv_mindrecord(1) | |||
| reader = FileReader(CV_FILE_NAME) | |||
| remove_file(file_name) | |||
| def test_file_read_after_read(): | |||
| """ | |||
| Feature: FileReader | |||
| Description: test file reader when finish read | |||
| Expectation: exception occur | |||
| """ | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| create_cv_mindrecord(1, file_name) | |||
| reader = FileReader(file_name) | |||
| count = 0 | |||
| for index, x in enumerate(reader.get_next()): | |||
| assert len(x) == 3 | |||
| @@ -249,25 +312,40 @@ def test_file_read_after_read(fixture_cv_file): | |||
| cnt = cnt + 1 | |||
| logger.info("#item{}: {}".format(index, x)) | |||
| assert cnt == 0 | |||
| remove_file(file_name) | |||
| def test_cv_file_writer_shard_num_greater_than_1000(): | |||
| """test cv file writer shard number greater than 1000.""" | |||
| """ | |||
| Feature: FileWriter | |||
| Description: test cv file writer shard number greater than 1000 | |||
| Expectation: exception occur | |||
| """ | |||
| with pytest.raises(ParamValueError) as err: | |||
| FileWriter(CV_FILE_NAME, 1001) | |||
| FileWriter('dummy.mindrecord', 1001) | |||
| assert 'Shard number should between' in str(err.value) | |||
| def test_add_index_without_add_schema(): | |||
| """ | |||
| Feature: FileWriter | |||
| Description: test add index without adding schema | |||
| Expectation: exception occur | |||
| """ | |||
| with pytest.raises(MRMGetMetaError) as err: | |||
| fw = FileWriter(CV_FILE_NAME) | |||
| fw = FileWriter('dummy.mindrecord') | |||
| fw.add_index(["label"]) | |||
| assert 'Failed to get meta info' in str(err.value) | |||
| def test_mindpage_pageno_pagesize_not_int(fixture_cv_file): | |||
| """test page reader when some partition does not exist.""" | |||
| create_cv_mindrecord(4) | |||
| reader = MindPage(CV_FILE_NAME + "0") | |||
| def test_mindpage_pageno_pagesize_not_int(): | |||
| """ | |||
| Feature: MindPage | |||
| Description: test page reader when some partition does not exist | |||
| Expectation: exception occur | |||
| """ | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| create_cv_mindrecord(4, file_name) | |||
| reader = MindPage(file_name + "0") | |||
| fields = reader.get_category_fields() | |||
| assert fields == ['file_name', 'label'], \ | |||
| 'failed on getting candidate category fields.' | |||
| @@ -293,12 +371,18 @@ def test_mindpage_pageno_pagesize_not_int(fixture_cv_file): | |||
| with pytest.raises(RuntimeError, match=r"Unexpected error. Invalid data, " | |||
| r"category_id: 99999 must be in the range \[0, 10\]."): | |||
| reader.read_at_page_by_id(99999, 0, 1) | |||
| def test_mindpage_filename_not_exist(fixture_cv_file): | |||
| """test page reader when some partition does not exist.""" | |||
| create_cv_mindrecord(4) | |||
| reader = MindPage(CV_FILE_NAME + "0") | |||
| remove_file(file_name) | |||
| def test_mindpage_filename_not_exist(): | |||
| """ | |||
| Feature: FileWrite | |||
| Description: test page reader when some partition does not exist | |||
| Expectation: exception occur | |||
| """ | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| create_cv_mindrecord(4, file_name) | |||
| reader = MindPage(file_name + "0") | |||
| fields = reader.get_category_fields() | |||
| assert fields == ['file_name', 'label'], \ | |||
| 'failed on getting candidate category fields.' | |||
| @@ -319,11 +403,15 @@ def test_mindpage_filename_not_exist(fixture_cv_file): | |||
| with pytest.raises(ParamValueError): | |||
| reader.read_at_page_by_name(1, 0, 1) | |||
| _ = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) | |||
| for x in range(FILES_NUM)] | |||
| remove_file(file_name) | |||
| def test_invalid_schema(): | |||
| mindrecord_file_name = "test.mindrecord" | |||
| """ | |||
| Feature: FileWrite | |||
| Description: test invalid schema | |||
| Expectation: exception occur | |||
| """ | |||
| mindrecord_file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| writer = FileWriter(mindrecord_file_name) | |||
| # string => str | |||
| @@ -462,7 +550,7 @@ def test_invalid_schema(): | |||
| os.remove("{}.db".format(mindrecord_file_name)) | |||
| def test_write_with_invalid_data(): | |||
| mindrecord_file_name = "test.mindrecord" | |||
| mindrecord_file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| # field: file_name => filename | |||
| with pytest.raises(RuntimeError, match="Unexpected error. Invalid data, schema count should be positive."): | |||