!9544 Fix a possible seg fault in GenerateCRC caused by std::regex

From: @lixiachen Reviewed-by: @jonyguo,@heleiwang Signed-off-by: @heleiwang
5 years ago · aa5d4e08a9
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc
@@ -453,8 +453,8 @@ uint32_t DatasetOp::GenerateCRC(const std::shared_ptr<DatasetOp> &op) {
  ss_str = std::regex_replace(ss_str, std::regex("device_id.*\n"), "");

  // Filter out the operator id field
  ss_str = std::regex_replace(ss_str, std::regex(".*Parent.*\n"), "");
  ss_str = std::regex_replace(ss_str, std::regex(".*Child.*\n"), "");
  ss_str = std::regex_replace(ss_str, std::regex(" *Parent.*\n"), "");
  ss_str = std::regex_replace(ss_str, std::regex(" *Child.*\n"), "");
  ss_str = std::regex_replace(ss_str, std::regex(R"(\(\s*\d+?\))"), "");

  // Doesn't matter whether there is any parent node above CacheOp or not.
--- a/tests/ut/python/cachetests/cachetest_py.sh
+++ b/tests/ut/python/cachetests/cachetest_py.sh
@@ -324,6 +324,9 @@ HandleRcExit $? 0 0
 PytestCmd "test_cache_nomap.py" "test_cache_nomap_get_repeat_count"
 HandleRcExit $? 0 0

 PytestCmd "test_cache_nomap.py" "test_cache_nomap_long_file_list"
 HandleRcExit $? 0 0

 for i in $(seq 1 3)
 do
   test_name="test_cache_nomap_multiple_cache${i}"
--- a/tests/ut/python/dataset/test_cache_map.py
+++ b/tests/ut/python/dataset/test_cache_map.py
@@ -1828,9 +1828,7 @@ def test_cache_map_cifar3():

       cache
         |
     Map(resize)
         |
      Cifar100
      Cifar10
    """

    logger.info("Test cache map cifar3")
@@ -1841,9 +1839,7 @@ def test_cache_map_cifar3():

    some_cache = ds.DatasetCache(session_id=session_id, size=1, spilling=False)

    ds1 = ds.Cifar10Dataset(CIFAR10_DATA_DIR)
    resize_op = c_vision.Resize((224, 224))
    ds1 = ds1.map(input_columns=["image"], operations=resize_op, cache=some_cache)
    ds1 = ds.Cifar10Dataset(CIFAR10_DATA_DIR, cache=some_cache)

    num_epoch = 2
    iter1 = ds1.create_dict_iterator(num_epochs=num_epoch)
--- a/tests/ut/python/dataset/test_cache_nomap.py
+++ b/tests/ut/python/dataset/test_cache_nomap.py
@@ -1775,6 +1775,7 @@ def test_cache_nomap_textfile2():
         |
     TextFile
    """

    def my_tokenizer(line):
        words = line.split()
        if not words:
@@ -1884,6 +1885,34 @@ def test_cache_nomap_get_repeat_count():
        num_iter += 1
    assert num_iter == 12


@pytest.mark.skipif(os.environ.get('RUN_CACHE_TEST') != 'TRUE', reason="Require to bring up cache server")
 def test_cache_nomap_long_file_list():
    """
    Test cache after TFRecord with a long list of files as arguments

        Cache
          |
      TFRecord
    """

    logger.info("Test cache nomap long file list")
    if "SESSION_ID" in os.environ:
        session_id = int(os.environ['SESSION_ID'])
    else:
        raise RuntimeError("Testcase requires SESSION_ID environment variable")

    some_cache = ds.DatasetCache(session_id=session_id, size=1, spilling=False)

    ds1 = ds.TFRecordDataset([DATA_DIR[0] for _ in range(0, 1000)], SCHEMA_DIR, columns_list=["image"],
                             cache=some_cache)

    with pytest.raises(RuntimeError) as e:
        sum([1 for _ in ds1])
    assert "Out of memory" in str(e.value)
    logger.info("test_cache_nomap_long_file_list Ended.\n")


 if __name__ == '__main__':
    test_cache_nomap_basic1()
    test_cache_nomap_basic2()