From 27579fe8583eb9215a9893100e21d7157f61706b Mon Sep 17 00:00:00 2001 From: Lixia Chen Date: Fri, 4 Dec 2020 20:58:11 -0500 Subject: [PATCH] Fix a seg fault caused by std::regex --- .../dataset/engine/datasetops/dataset_op.cc | 4 +-- tests/ut/python/cachetests/cachetest_py.sh | 3 ++ tests/ut/python/dataset/test_cache_map.py | 8 ++--- tests/ut/python/dataset/test_cache_nomap.py | 29 +++++++++++++++++++ 4 files changed, 36 insertions(+), 8 deletions(-) diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc index 20b0d9df5d..0e9d9aa977 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc @@ -453,8 +453,8 @@ uint32_t DatasetOp::GenerateCRC(const std::shared_ptr &op) { ss_str = std::regex_replace(ss_str, std::regex("device_id.*\n"), ""); // Filter out the operator id field - ss_str = std::regex_replace(ss_str, std::regex(".*Parent.*\n"), ""); - ss_str = std::regex_replace(ss_str, std::regex(".*Child.*\n"), ""); + ss_str = std::regex_replace(ss_str, std::regex(" *Parent.*\n"), ""); + ss_str = std::regex_replace(ss_str, std::regex(" *Child.*\n"), ""); ss_str = std::regex_replace(ss_str, std::regex(R"(\(\s*\d+?\))"), ""); // Doesn't matter whether there is any parent node above CacheOp or not. diff --git a/tests/ut/python/cachetests/cachetest_py.sh b/tests/ut/python/cachetests/cachetest_py.sh index d02486f22e..2a7aaedd16 100755 --- a/tests/ut/python/cachetests/cachetest_py.sh +++ b/tests/ut/python/cachetests/cachetest_py.sh @@ -324,6 +324,9 @@ HandleRcExit $? 0 0 PytestCmd "test_cache_nomap.py" "test_cache_nomap_get_repeat_count" HandleRcExit $? 0 0 +PytestCmd "test_cache_nomap.py" "test_cache_nomap_long_file_list" +HandleRcExit $? 0 0 + for i in $(seq 1 3) do test_name="test_cache_nomap_multiple_cache${i}" diff --git a/tests/ut/python/dataset/test_cache_map.py b/tests/ut/python/dataset/test_cache_map.py index b6d69210f4..36717b951a 100644 --- a/tests/ut/python/dataset/test_cache_map.py +++ b/tests/ut/python/dataset/test_cache_map.py @@ -1828,9 +1828,7 @@ def test_cache_map_cifar3(): cache | - Map(resize) - | - Cifar100 + Cifar10 """ logger.info("Test cache map cifar3") @@ -1841,9 +1839,7 @@ def test_cache_map_cifar3(): some_cache = ds.DatasetCache(session_id=session_id, size=1, spilling=False) - ds1 = ds.Cifar10Dataset(CIFAR10_DATA_DIR) - resize_op = c_vision.Resize((224, 224)) - ds1 = ds1.map(input_columns=["image"], operations=resize_op, cache=some_cache) + ds1 = ds.Cifar10Dataset(CIFAR10_DATA_DIR, cache=some_cache) num_epoch = 2 iter1 = ds1.create_dict_iterator(num_epochs=num_epoch) diff --git a/tests/ut/python/dataset/test_cache_nomap.py b/tests/ut/python/dataset/test_cache_nomap.py index 59f56da9a5..3e6f847479 100644 --- a/tests/ut/python/dataset/test_cache_nomap.py +++ b/tests/ut/python/dataset/test_cache_nomap.py @@ -1775,6 +1775,7 @@ def test_cache_nomap_textfile2(): | TextFile """ + def my_tokenizer(line): words = line.split() if not words: @@ -1884,6 +1885,34 @@ def test_cache_nomap_get_repeat_count(): num_iter += 1 assert num_iter == 12 + +@pytest.mark.skipif(os.environ.get('RUN_CACHE_TEST') != 'TRUE', reason="Require to bring up cache server") +def test_cache_nomap_long_file_list(): + """ + Test cache after TFRecord with a long list of files as arguments + + Cache + | + TFRecord + """ + + logger.info("Test cache nomap long file list") + if "SESSION_ID" in os.environ: + session_id = int(os.environ['SESSION_ID']) + else: + raise RuntimeError("Testcase requires SESSION_ID environment variable") + + some_cache = ds.DatasetCache(session_id=session_id, size=1, spilling=False) + + ds1 = ds.TFRecordDataset([DATA_DIR[0] for _ in range(0, 1000)], SCHEMA_DIR, columns_list=["image"], + cache=some_cache) + + with pytest.raises(RuntimeError) as e: + sum([1 for _ in ds1]) + assert "Out of memory" in str(e.value) + logger.info("test_cache_nomap_long_file_list Ended.\n") + + if __name__ == '__main__': test_cache_nomap_basic1() test_cache_nomap_basic2()