Browse Source

!9544 Fix a possible seg fault in GenerateCRC caused by std::regex

From: @lixiachen
Reviewed-by: @jonyguo,@heleiwang
Signed-off-by: @heleiwang
tags/v1.1.0
mindspore-ci-bot Gitee 5 years ago
parent
commit
aa5d4e08a9
4 changed files with 36 additions and 8 deletions
  1. +2
    -2
      mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc
  2. +3
    -0
      tests/ut/python/cachetests/cachetest_py.sh
  3. +2
    -6
      tests/ut/python/dataset/test_cache_map.py
  4. +29
    -0
      tests/ut/python/dataset/test_cache_nomap.py

+ 2
- 2
mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc View File

@@ -453,8 +453,8 @@ uint32_t DatasetOp::GenerateCRC(const std::shared_ptr<DatasetOp> &op) {
ss_str = std::regex_replace(ss_str, std::regex("device_id.*\n"), "");

// Filter out the operator id field
ss_str = std::regex_replace(ss_str, std::regex(".*Parent.*\n"), "");
ss_str = std::regex_replace(ss_str, std::regex(".*Child.*\n"), "");
ss_str = std::regex_replace(ss_str, std::regex(" *Parent.*\n"), "");
ss_str = std::regex_replace(ss_str, std::regex(" *Child.*\n"), "");
ss_str = std::regex_replace(ss_str, std::regex(R"(\(\s*\d+?\))"), "");

// Doesn't matter whether there is any parent node above CacheOp or not.


+ 3
- 0
tests/ut/python/cachetests/cachetest_py.sh View File

@@ -324,6 +324,9 @@ HandleRcExit $? 0 0
PytestCmd "test_cache_nomap.py" "test_cache_nomap_get_repeat_count"
HandleRcExit $? 0 0

PytestCmd "test_cache_nomap.py" "test_cache_nomap_long_file_list"
HandleRcExit $? 0 0

for i in $(seq 1 3)
do
test_name="test_cache_nomap_multiple_cache${i}"


+ 2
- 6
tests/ut/python/dataset/test_cache_map.py View File

@@ -1828,9 +1828,7 @@ def test_cache_map_cifar3():

cache
|
Map(resize)
|
Cifar100
Cifar10
"""

logger.info("Test cache map cifar3")
@@ -1841,9 +1839,7 @@ def test_cache_map_cifar3():

some_cache = ds.DatasetCache(session_id=session_id, size=1, spilling=False)

ds1 = ds.Cifar10Dataset(CIFAR10_DATA_DIR)
resize_op = c_vision.Resize((224, 224))
ds1 = ds1.map(input_columns=["image"], operations=resize_op, cache=some_cache)
ds1 = ds.Cifar10Dataset(CIFAR10_DATA_DIR, cache=some_cache)

num_epoch = 2
iter1 = ds1.create_dict_iterator(num_epochs=num_epoch)


+ 29
- 0
tests/ut/python/dataset/test_cache_nomap.py View File

@@ -1775,6 +1775,7 @@ def test_cache_nomap_textfile2():
|
TextFile
"""

def my_tokenizer(line):
words = line.split()
if not words:
@@ -1884,6 +1885,34 @@ def test_cache_nomap_get_repeat_count():
num_iter += 1
assert num_iter == 12


@pytest.mark.skipif(os.environ.get('RUN_CACHE_TEST') != 'TRUE', reason="Require to bring up cache server")
def test_cache_nomap_long_file_list():
"""
Test cache after TFRecord with a long list of files as arguments

Cache
|
TFRecord
"""

logger.info("Test cache nomap long file list")
if "SESSION_ID" in os.environ:
session_id = int(os.environ['SESSION_ID'])
else:
raise RuntimeError("Testcase requires SESSION_ID environment variable")

some_cache = ds.DatasetCache(session_id=session_id, size=1, spilling=False)

ds1 = ds.TFRecordDataset([DATA_DIR[0] for _ in range(0, 1000)], SCHEMA_DIR, columns_list=["image"],
cache=some_cache)

with pytest.raises(RuntimeError) as e:
sum([1 for _ in ds1])
assert "Out of memory" in str(e.value)
logger.info("test_cache_nomap_long_file_list Ended.\n")


if __name__ == '__main__':
test_cache_nomap_basic1()
test_cache_nomap_basic2()


Loading…
Cancel
Save