!287 Change the method to find the step trace files.

Merge pull request !287 from yelihua/dev_profiler
5 years ago · 3956a90a39
--- a/mindinsight/backend/profiler/profile_api.py
+++ b/mindinsight/backend/profiler/profile_api.py
@@ -31,6 +31,7 @@ from mindinsight.datavisual.utils.tools import get_train_id, get_profiler_dir, \
    unquote_args, to_int, get_device_id
 from mindinsight.profiler.analyser.analyser_factory import AnalyserFactory
 from mindinsight.profiler.analyser.minddata_analyser import MinddataAnalyser
 from mindinsight.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException
 from mindinsight.profiler.proposer.compose_proposer import ComposeProposal
 from mindinsight.profiler.common.util import analyse_device_list_from_profiler_dir
 from mindinsight.profiler.common.validator.validate import validate_condition, \
@@ -131,9 +132,13 @@ def get_training_trace_graph():
    graph_type = to_int(graph_type, 'graph_type')
    device_id = request.args.get("device_id", default='0')
    _ = to_int(device_id, 'device_id')
    graph_info = {}
    try:
        analyser = AnalyserFactory.instance().get_analyser(
            'step_trace', profiler_dir, device_id)
    except ProfilerFileNotFoundException:
        return jsonify(graph_info)

    analyser = AnalyserFactory.instance().get_analyser(
        'step_trace', profiler_dir, device_id)
    graph_info = analyser.query({
        'filter_condition': {
            'mode': 'step',
--- a/mindinsight/profiler/common/util.py
+++ b/mindinsight/profiler/common/util.py
@@ -106,9 +106,12 @@ def get_summary_for_step_trace(average_info, header):
    tail = get_field_value(average_info, 'tail', header)
    summary = {
        'total_time': total_time,
        'iteration_interval': calculate_percent(iteration_interval, total_time),
        'fp_and_bp': calculate_percent(fp_and_bp, total_time),
        'tail': calculate_percent(tail, total_time)
        'iteration_interval': iteration_interval,
        'iteration_interval_percent': calculate_percent(iteration_interval, total_time),
        'fp_and_bp': fp_and_bp,
        'fp_and_bp_percent': calculate_percent(fp_and_bp, total_time),
        'tail': tail,
        'tail_percent': calculate_percent(tail, total_time)
    }
    return summary

--- a/mindinsight/profiler/parser/step_trace_parser.py
+++ b/mindinsight/profiler/parser/step_trace_parser.py
@@ -21,10 +21,9 @@ from collections import namedtuple
 from decimal import Decimal

 from mindinsight.profiler.common.exceptions.exceptions import ProfilerPathErrorException, \
    JobIdMismatchException
    JobIdMismatchException, ProfilerIOException
 from mindinsight.profiler.common.log import logger as log
 from mindinsight.profiler.common.util import get_summary_for_step_trace
 from mindinsight.utils.exceptions import MindInsightException

 StepTraceStruct = namedtuple(
    'TrainingTraceStruct', ['tag_id', 'task_id', 'stream_id', 'sys_count']
@@ -72,25 +71,39 @@ class StepTraceParser:
    def parse_and_save(self):
        """Parse step trace files and save the result."""
        try:
            source_file = self._get_step_trace_file()
            self._parse(source_file)
            source_files = self._get_step_trace_files()
            self._parse(source_files)
            self._save()
        except MindInsightException as err:
            log.error("Failed to parse and save step trace files.")
        except IOError as err:
            log.exception(err)
            raise ProfilerIOException()
        else:
            log.info("Finish to save intermediate result for step trace file.")

    def _get_step_trace_file(self):
        """Get step trace file."""
        profiling_path = self._input_dir
    def _get_step_trace_files(self):
        """Get step trace files."""
        # step trace files may under $profiler_dir or $profiler_dir/data
        profiler_dir = self._input_dir
        step_trace_files = self._search_file(profiler_dir)
        if not step_trace_files:
            # try to find step trace files under $profiler_dir/data
            profiler_dir = os.path.join(profiler_dir, 'data')
            step_trace_files = self._search_file(profiler_dir)
        if not step_trace_files:
            raise ProfilerPathErrorException('Training trace file does not exist.')

        return step_trace_files

    @staticmethod
    def _search_file(input_dir):
        """Search step trace file under specific input directory."""
        # validate input_dir
        if not os.path.isdir(profiling_path):
        if not os.path.isdir(input_dir):
            raise ProfilerPathErrorException(
                '{} does not exist or is not a dir'.format(profiling_path)
                '{} does not exist or is not a dir'.format(input_dir)
            )
        # get step trace files
        files = os.listdir(profiling_path)
        files = os.listdir(input_dir)
        step_trace_files = list(
            filter(
                lambda file: file.startswith('training_trace') and not file.endswith('.done'),
@@ -98,36 +111,46 @@ class StepTraceParser:
            )
        )
        # validate result
        if not step_trace_files:
            raise ProfilerPathErrorException('training trace file does not exist')
        if len(step_trace_files) > 1:
            log.warning("Not enable to parse multiple step trace files yet.")
        step_trace_file = os.path.join(profiling_path, step_trace_files[0])
        return step_trace_file
            # the format of file name is like
            # `training_trace.46.dev.profiler_default_tag.$id.slice_$number`
            # use the $number as the sorted key
            try:
                step_trace_files.sort(key=lambda path: int(path.rsplit('_', 1)[-1]))
            except ValueError as err:
                log.warning("Unable to parse file names: %s. %s", step_trace_files, err)
                step_trace_files = []

        file_paths = [os.path.join(input_dir, file) for file in step_trace_files]
        log.info("Find %d step trace files.", len(file_paths))
        return file_paths

    def _parse(self, source_file):
        """Parse source step trace file."""
        log.info("Start to parse step  trace file.")
        with open(source_file, 'rb') as handler:
            content = handler.read()
            for step_trace in self._get_next_step_trace(content):
                if self._skip_first_step:
                    self._skip_first_step = False
                else:
    def _parse(self, source_files):
        """Parse source step trace files."""
        log.info("Start to parse step trace file.")
        event_info = {}
        for source_file in source_files:
            with open(source_file, 'rb') as handler:
                content = handler.read()
                for step_trace in self._get_next_step_trace(content, event_info):
                    if self._skip_first_step:
                        self._skip_first_step = False
                        continue
                    self._record_trace_event(step_trace)
        self._record_average_info()
        log.info("Finish to parse step trace file.")

    def _get_next_step_trace(self, content):
    def _get_next_step_trace(self, content, event_info):
        """
        Get next step trace info.

        Args:
            content (bytes): The input step trace info
            content (bytes): The input step trace info.
            event_info (dict): The event info.

        Returns:
            Generator, return the step trace one by one.
        """
        event_info = {}
        for pos in range(0, len(content), 20):
            next_event = self._get_trace_struct(content[pos:pos + self._event_size])
            self._construct_event_info(next_event, event_info)
@@ -251,7 +274,7 @@ class StepTraceParser:
        log.info("Finish add average info for step trace.")

    def _save(self):
        log.info("Start to save step  trace file.")
        log.info("Start to save step trace file.")
        if not self._header:
            return
        with open(self._output_path, 'w') as file_handle:
--- a/mindinsight/profiler/profiling.py
+++ b/mindinsight/profiler/profiling.py
@@ -221,7 +221,10 @@ class Profiler:
            logger.warning(err.message)

        # analyse step trace info
        self._analyse_step_trace(source_path, framework_parser)
        try:
            self._analyse_step_trace(source_path, framework_parser)
        except MindInsightException as err:
            logger.warning(err.message)

        # analyse timeline info
        self._analyse_timeline()
--- a/tests/st/func/profiler/test_analyse.py
+++ b/tests/st/func/profiler/test_analyse.py
@@ -149,9 +149,12 @@ class TestProfilerAnalyse(TestCase):
        summary = analyser.summary
        assert summary == {
            'total_time': 205.3809,
            'iteration_interval': '0.1%',
            'fp_and_bp': '57.48%',
            'tail': '42.42%',
            'iteration_interval': 0.2038,
            'iteration_interval_percent': '0.1%',
            'fp_and_bp': 118.054,
            'fp_and_bp_percent': '57.48%',
            'tail': 87.1231,
            'tail_percent': '42.42%',
            'total_steps': 322}

    @pytest.mark.level0