!10063 Modify profiler directory structure to fit the new Run Package

From: @gzhcv Reviewed-by: Signed-off-by:
5 years ago · 261bb736de
--- a/mindspore/context.py
+++ b/mindspore/context.py
@@ -214,11 +214,8 @@ class _Context:
        self.set_param(ms_ctx_param.max_call_depth, max_call_depth)

    def set_profiling_options(self, option):
        options = ["training_trace", "task_trace",
                   "task_trace:training_trace", "training_trace:task_trace", "op_trace"]
        if option not in options:
            raise ValueError("Profiling options must be in 'training_trace' 'task_trace' "
                             "'task_trace:training_trace' 'training_trace:task_trace' or 'op_trace'.")
        if not isinstance(option, str):
            raise TypeError("The parameter option must be str.")
        self.set_param(ms_ctx_param.profiling_options, option)

    def set_variable_memory_max_size(self, variable_memory_max_size):
--- a/mindspore/profiler/parser/framework_parser.py
+++ b/mindspore/profiler/parser/framework_parser.py
@@ -174,7 +174,6 @@ class FrameworkParser:
        device_id (str): The device ID.
        output_path (str): The directory of the parsed file. Default: `./`.
    """
    _raw_data_dir = '/var/log/npu/profiling'
    _regex_framework = r'Framework\.(?P<data_type>.+)\.(?P<device_id>\d).+'
    _regex_framework_in_data = r'Framework\.(?P<data_type>.+)\.' \
                               r'(?P<device_id>\d)\.(?P<profiling_id>[a-zA-Z0-9]+).+'
@@ -193,6 +192,7 @@ class FrameworkParser:
    _task_id_threshold = 25000

    def __init__(self, profiling_id, device_id, output_path='./'):
        self._raw_data_dir = output_path
        self._profiling_path = self._get_raw_profiling_path(profiling_id)
        self._backend_type = None
        self._framework_path = {'graph': [], 'task': [], 'point': []}
--- a/mindspore/profiler/profiling.py
+++ b/mindspore/profiler/profiling.py
@@ -16,6 +16,7 @@
 import os
 import stat
 import time
 import json
 from enum import Enum

 from mindspore import log as logger, context
@@ -37,7 +38,6 @@ from mindspore.profiler.parser.optime_parser import OPComputeTimeParser
 from mindspore.profiler.parser.step_trace_parser import GpuStepTraceParser, AscendStepTraceParser
 from mindspore.nn.cell import Cell

 PROFILING_LOG_BASE_PATH = "/var/log/npu/profiling"
 INIT_OP_NAME = 'Default/InitDataSetQueue'

 class ProfileOption(Enum):
@@ -72,7 +72,6 @@ class Profiler:
        >>> profiler.analyse()
    """

    _base_profiling_container_path = "/var/log/npu/profiling/container"
    _hwts_output_filename_target = "output_format_data_hwts_"
    _opcompute_output_filename_target = "output_op_compute_time_"
    _aicpu_op_output_filename_target = "output_data_preprocess_aicpu_"
@@ -80,9 +79,11 @@ class Profiler:
    def __init__(self, **kwargs):
        # get device_id and device_target
        self._get_devid_and_devtarget()
        output_path = kwargs.pop("output_path", "./data")
        format_time = int(time.time())
        output_path = kwargs.pop("output_path", f"data-{format_time}")
        self._output_path = validate_and_normalize_path(output_path)
        self._output_path = os.path.join(self._output_path, "profiler")
        self._output_path = os.path.join(self._output_path, f"profiler-{format_time}")
        self._base_profiling_container_path = os.path.join(self._output_path, "container")
        if not os.path.exists(self._output_path):
            os.makedirs(self._output_path, exist_ok=True)
            os.chmod(self._output_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
@@ -113,10 +114,25 @@ class Profiler:
                logger.warning("There are invalid params which don't work.")

            os.environ['DEVICE_ID'] = self._dev_id
            os.environ['AICPU_PROFILING_MODE'] = 'true'

            fp_point = os.environ.get("PROFILING_FP_START", "")
            bp_point = os.environ.get("PROFILING_BP_END", "")

            profiling_options = {
                "result_path": self._output_path,
                "fp_point": fp_point,
                "bp_point": bp_point,
                "training_trace": "on",
                "task_trace": "on",
                "ai_core_metrics": "PipeUtilization",
                "aicpu_trace": "on"
            }

            profiling_options = json.dumps(profiling_options)
            # Characters longer than 2048 are ignored, resulting in profiling option resolution errors
            if len(profiling_options) > 2048:
                raise ValueError("The parameter length exceeds the limit (2048)")
            # use context interface to open profiling, for the new mindspore version(after 2020.5.21)
            context.set_context(enable_profiling=True, profiling_options="training_trace:task_trace")
            context.set_context(enable_profiling=True, profiling_options=profiling_options)

            self._container_path = os.path.join(self._base_profiling_container_path, self._dev_id)
            data_path = os.path.join(self._container_path, "data")
@@ -174,7 +190,7 @@ class Profiler:
            job_id = self._get_profiling_job_id()
            logger.info("Profiling: job id is %s ", job_id)

            source_path = os.path.join(PROFILING_LOG_BASE_PATH, job_id)
            source_path = os.path.join(self._output_path, job_id)
            # parse hwts.log.data.45.dev file, and get task profiling data
            hwts_output_filename = self._hwts_output_filename_target + self._dev_id + ".txt"
            hwts_output_filename = os.path.join(self._output_path, hwts_output_filename)
@@ -353,12 +369,12 @@ class Profiler:
            return self._profiling_job_id

        job_id = ""
        cmd = "ls -t " + PROFILING_LOG_BASE_PATH + "|grep JOB|awk '{print $1}'"
        cmd = "ls -t " + self._output_path + "|grep JOB|awk '{print $1}'"
        r = os.popen(cmd)
        profiling_job_dirs = r.readlines()
        r.close()
        for item in profiling_job_dirs:
            path = os.path.join(PROFILING_LOG_BASE_PATH, item.strip())
            path = os.path.join(self._output_path, item.strip())
            log_file = get_file_names(path, "host_start.log")
            if not log_file:
                logger.error("Profiling: job path %s, host_start.log not exist.", path)
--- a/tests/st/profiler/test_profiler.py
+++ b/tests/st/profiler/test_profiler.py
@@ -128,7 +128,6 @@ def cleanup():
 class TestProfiler:
    device_id = int(os.getenv('DEVICE_ID')) if os.getenv('DEVICE_ID') else 0
    mnist_path = '/home/workspace/mindspore_dataset/mnist'
    profiler_path = os.path.join(os.getcwd(), 'data/profiler/')

    @classmethod
    def teardown_class(cls):
@@ -140,7 +139,9 @@ class TestProfiler:
    @pytest.mark.env_onecard
    def test_gpu_profiler(self):
        context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
        profiler = Profiler()
        profiler = Profiler(output_path='data')
        profiler_name = os.listdir(os.path.join(os.getcwd(), 'data'))[0]
        self.profiler_path = os.path.join(os.getcwd(), f'data/{profiler_name}/')
        ds_train = create_dataset(os.path.join(self.mnist_path, "train"))
        if ds_train.get_dataset_size() == 0:
            raise ValueError("Please check dataset size > 0 and batch_size <= dataset size")
--- a/tests/ut/python/profiler/parser/test_framework_parser.py
+++ b/tests/ut/python/profiler/parser/test_framework_parser.py
@@ -49,13 +49,15 @@ class TestFrameworkParser:
    """Test the class of `FrameworkParser`."""
    def setup_method(self):
        """Initialization before test case execution."""
        with mock.patch.object(FrameworkParser, '_raw_data_dir', RAW_DATA_BASE):
            self._output_path_1 = tempfile.mkdtemp(prefix='test_framework_parser_')
            self._parser_1 = FrameworkParser('JOB1', '0', self._output_path_1)
            self._output_path_2 = tempfile.mkdtemp(prefix='test_framework_parser_')
            self._parser_2 = FrameworkParser('JOB2', '0', self._output_path_2)
            self._output_path_4 = tempfile.mkdtemp(prefix='test_framework_parser_')
            self._parser_4 = FrameworkParser('JOB4', '0', self._output_path_4)
        self._output_path_1 = tempfile.NamedTemporaryFile(prefix='test_framework_parser_').name
        shutil.copytree(RAW_DATA_BASE, self._output_path_1)
        self._parser_1 = FrameworkParser('JOB1', '0', self._output_path_1)
        self._output_path_2 = tempfile.NamedTemporaryFile(prefix='test_framework_parser_').name
        shutil.copytree(RAW_DATA_BASE, self._output_path_2)
        self._parser_2 = FrameworkParser('JOB2', '0', self._output_path_2)
        self._output_path_4 = tempfile.NamedTemporaryFile(prefix='test_framework_parser_').name
        shutil.copytree(RAW_DATA_BASE, self._output_path_4)
        self._parser_4 = FrameworkParser('JOB4', '0', self._output_path_4)

    def teardown_method(self) -> None:
        """Clear up after test case execution."""
--- a/tests/ut/python/pynative_mode/test_context.py
+++ b/tests/ut/python/pynative_mode/test_context.py
@@ -15,6 +15,7 @@
 """ test_context """
 import os
 import shutil
 import json
 import pytest

 from mindspore import context
@@ -94,14 +95,18 @@ def test_profiling_options():
        context.set_context(profiling_options=True)
    with pytest.raises(TypeError):
        context.set_context(profiling_options=1)
    with pytest.raises(ValueError):
        context.set_context(profiling_options="training_")
    with pytest.raises(ValueError):
        context.set_context(profiling_options="training_trace:op_trace")
    context.set_context(profiling_options="training_trace")
    assert context.get_context("profiling_options") == "training_trace"
    context.set_context(profiling_options="training_trace:task_trace")
    assert context.get_context("profiling_options") == "training_trace:task_trace"
    profiling_options = {
        "result_path": "",
        "fp_point": "",
        "bp_point": "",
        "training_trace": "on",
        "task_trace": "on",
        "ai_core_metrics": "PipeUtilization",
        "aicpu_trace": "on"
    }
    profiling_options = json.dumps(profiling_options)
    context.set_context(profiling_options=profiling_options)
    assert context.get_context("profiling_options") == profiling_options


 def test_variable_memory_max_size():