Browse Source

!6631 fix a bug of gpu timeline data incorrect in multi card training

Merge pull request !6631 from gzhcv/GpuTimeline
tags/v1.0.0
mindspore-ci-bot Gitee 5 years ago
parent
commit
3b3324fabc
1 changed files with 16 additions and 13 deletions
  1. +16
    -13
      mindspore/profiler/parser/integrator.py

+ 16
- 13
mindspore/profiler/parser/integrator.py View File

@@ -507,10 +507,6 @@ class BaseTimelineGenerator:
'op_exe_times': 0
}

def __init__(self, profiling_dir, device_id):
self._profiling_dir = profiling_dir
self._device_id = device_id

def _load_timeline_data(self):
"""Load timeline data from file."""

@@ -651,6 +647,17 @@ class GpuTimelineGenerator(BaseTimelineGenerator):
_output_gpu_activity_info_file_path = "gpu_activity_data_{}.csv"
_activity_keys_list = []

def __init__(self, profiling_dir, device_id):
self._profiling_dir = profiling_dir
self._device_id = device_id
self._timeline_meta = []
self._timeline_summary = {
'total_time': 0,
'num_of_streams': 0,
'num_of_ops': 0,
'op_exe_times': 0
}

def _get_and_validate_path(self, file_name):
"""Generate op or activity file path from file name, and validate this path."""
file_path = os.path.join(
@@ -755,15 +762,7 @@ class GpuTimelineGenerator(BaseTimelineGenerator):


def init_timeline(self):
"""
Init timeline metadata, adding all collected info.

Args:
all_reduce_info (list[list]): The metadata of AllReduce operator.
framework_info (dict): The framework metadata.
aicpu_info (dict): The metadata of AICPU operator.
min_cycle_counter (float): The minimum cycle counter of the timeline.
"""
"""Init timeline metadata, adding all collected info."""
timeline_list = self._load_timeline_data()

# Init a dict for counting the num of streams.
@@ -782,6 +781,10 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
_display_filename = 'ascend_timeline_display_{}.json'
_timeline_summary_filename = 'ascend_timeline_summary_{}.json'

def __init__(self, profiling_dir, device_id):
self._profiling_dir = profiling_dir
self._device_id = device_id

def _load_timeline_data(self):
"""Load timeline data from file."""
file_path = os.path.join(


Loading…
Cancel
Save