|
- # Copyright 2020 Huawei Technologies Co., Ltd
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ============================================================================
- """The integrator for integrating parsed profiling files."""
- import csv
- import json
- import os
- import stat
- from decimal import Decimal
-
- from mindspore import log as logger
- from mindspore.profiler.common.exceptions.exceptions import ProfilerIOException, \
- ProfilerFileNotFoundException, ProfilerRawFileException
- from mindspore.profiler.common.util import query_latest_trace_time_file, to_int, to_millisecond
- from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path
- from mindspore.profiler.parser.container import TimelineContainer
-
- SIZE_LIMIT_DEFAULT = 20 * 1024 * 1024 # 20MB
-
-
- class Integrator:
- """
- The integrator for integrating parsed profiling files.
-
- Args:
- profiling_dir (str): The directory where the parsed profiling files are
- located.
- device_id (str): The device ID.
- """
- _file_name_aicore_detail_time = 'output_op_compute_time_{}.txt'
- _file_name_aicpu_time = 'output_data_preprocess_aicpu_{}.txt'
- _file_name_framework = 'framework_raw_{}.csv'
- _header_aicore_type = ['op_type', 'execution_time', 'execution_frequency',
- 'percent']
- _header_aicore_detail = ['full_op_name', 'execution_time']
- _header_aicpu = ['serial_number', 'op_type', 'total_time', 'dispatch_time',
- 'run_start', 'run_end']
-
- _file_name_aicore_type_time = 'aicore_intermediate_{}_type.csv'
- _file_name_aicore_detail_info = 'aicore_intermediate_{}_detail.csv'
- _col_names_detail = ['op_name', 'op_type', 'avg_execution_time', 'subgraph', 'full_op_name', 'op_info']
- _none_filter_condition_key = ['is_display_detail', 'is_display_full_op_name']
- _none_sort_col_names = ['op_info']
- _aicore_data = []
- _aicore_detail_data = []
- _aicore_trace_data = []
-
- def __init__(self, profiling_dir, device_id):
- self._profiling_dir = profiling_dir
- self._device_id = device_id
- self._op_time_cache = {}
- self._total_time = Decimal('0.0')
-
- def integrate(self):
- """Integrate the parsed profiling files."""
- self._parse_aicore_detail_time()
- self._parse_aicore_type_time()
- self._parse_aicpu_time()
-
- def get_aicore_data(self):
- self._aicore_data_load()
- return self._aicore_data
-
- def get_aicore_detail_data(self):
- self._aicore_detail_data_load()
- return self._aicore_detail_data
-
- def get_aicore_trace_data(self):
- self._aicore_trace_data_load()
- return self._aicore_trace_data
-
- def query_for_all_reduce(self):
- return self._query_for_all_reduce()
-
- def query_and_sort_by_op_type(self, filter_condition, op_type_order):
- return self._query_and_sort_by_op_type(filter_condition, op_type_order)
-
- def _parse_aicore_type_time(self):
- """Parse the parsed AICORE operator type file."""
- framework_file = os.path.join(
- self._profiling_dir,
- self._file_name_framework.format(self._device_id)
- )
- framework_file = validate_and_normalize_path(framework_file)
- if not os.path.isfile(framework_file):
- return
-
- op_name_type_cache = {}
- with open(framework_file, 'r') as src_file:
- csv_reader = csv.reader(src_file)
- _ = next(csv_reader)
-
- for row in csv_reader:
- op_name_type_cache[row[3]] = row[5]
-
- op_type_time_cache = {}
- for full_op_name, op_time in self._op_time_cache.items():
- op_type = op_name_type_cache.get(full_op_name)
- if op_type_time_cache.get(op_type) is None:
- op_type_time_cache[op_type] = [op_time, 1]
- else:
- op_type_time_cache[op_type][0] += op_time
- op_type_time_cache[op_type][1] += 1
-
- op_type_file_name = 'aicore_intermediate_' + self._device_id + '_type.csv'
- op_type_file_path = os.path.join(self._profiling_dir, op_type_file_name)
- with open(op_type_file_path, 'w') as type_file:
- csv_writer = csv.writer(type_file)
- csv_writer.writerow(self._header_aicore_type)
-
- for op_type, op_type_time_info in op_type_time_cache.items():
- type_info = [
- op_type, op_type_time_info[0], op_type_time_info[1],
- round((op_type_time_info[0] / self._total_time) * 100, 2)
- ]
- csv_writer.writerow(type_info)
-
- def _parse_aicore_detail_time(self):
- """Parse the parsed AICORE operator time file."""
- aicore_detail_file = os.path.join(
- self._profiling_dir,
- self._file_name_aicore_detail_time.format(self._device_id)
- )
- aicore_detail_file = validate_and_normalize_path(aicore_detail_file)
- if not os.path.isfile(aicore_detail_file):
- return
-
- op_detail_file_name = 'aicore_intermediate_' + self._device_id + '_detail.csv'
- op_detail_file_path = os.path.join(
- self._profiling_dir, op_detail_file_name
- )
- with open(aicore_detail_file, 'r') as src_file:
- row = src_file.readline()
- if row.startswith('op_name'):
- _ = src_file.readline()
- elif row.startswith('====='):
- _ = src_file.readline()
- _ = src_file.readline()
- else:
- return
-
- with open(op_detail_file_path, 'w') as detail_file:
- csv_writer = csv.writer(detail_file)
- csv_writer.writerow(self._header_aicore_detail)
-
- while True:
- row = src_file.readline()
- if not row:
- break
-
- op_infos = row.split()
- if op_infos[0] == 'total':
- self._total_time = Decimal(op_infos[2])
- continue
- self._op_time_cache[op_infos[0]] = Decimal(op_infos[1])
- csv_writer.writerow([op_infos[0], op_infos[1]])
-
- def _parse_aicpu_time(self):
- """Parse the parsed AICPU operator time file."""
- aicpu_file = os.path.join(
- self._profiling_dir,
- self._file_name_aicpu_time.format(self._device_id)
- )
- aicpu_file = validate_and_normalize_path(aicpu_file)
- if not os.path.isfile(aicpu_file):
- return
-
- save_file_name = 'aicpu_intermediate_' + self._device_id + '.csv'
- save_file_path = os.path.join(self._profiling_dir, save_file_name)
- with open(aicpu_file, 'r') as src_file:
- row = src_file.readline()
- if not row.startswith('serial_number'):
- return
- with open(save_file_path, 'w') as save_file:
- csv_writer = csv.writer(save_file)
- csv_writer.writerow(self._header_aicpu)
-
- while True:
- row = src_file.readline()
- if not row:
- break
- infos = row.split()
- if infos[0] == 'AI':
- continue
- csv_writer.writerow(infos)
-
- def _aicore_data_load(self):
- """Load data according to the parsed AICORE operator types file."""
- op_type_file_path = os.path.join(
- self._profiling_dir,
- self._file_name_aicore_type_time.format(self._device_id)
- )
- op_type_file_path = validate_and_normalize_path(op_type_file_path)
- if not os.path.isfile(op_type_file_path):
- logger.warning('The file <%s> does not exist.', op_type_file_path)
- return
-
- with open(op_type_file_path, 'r') as file:
- csv_reader = csv.reader(file)
- _ = next(csv_reader)
- for info in csv_reader:
- self._aicore_data.append([info[0], float(info[1]), int(info[2]), float(info[3])])
-
- def _aicore_detail_data_load(self):
- """Load data according to the parsed AICORE operator file."""
- op_detail_file_path = os.path.join(
- self._profiling_dir,
- self._file_name_aicore_detail_info.format(self._device_id)
- )
- framework_file_path = os.path.join(
- self._profiling_dir,
- self._file_name_framework.format(self._device_id)
- )
- op_detail_file_path = validate_and_normalize_path(op_detail_file_path)
- framework_file_path = validate_and_normalize_path(framework_file_path)
- if not os.path.isfile(op_detail_file_path):
- logger.warning('The file <%s> does not exist.', op_detail_file_path)
- return
- if not os.path.isfile(framework_file_path):
- logger.warning('The file <%s> does not exist.', framework_file_path)
- return
-
- framework_infos = dict()
- with open(framework_file_path, 'r') as file:
- csv_reader = csv.reader(file)
- _ = next(csv_reader)
- for info in csv_reader:
- framework_infos[info[3]] = [
- info[3], info[4], info[5], info[6], json.loads(info[7]) if info[7] else None]
-
- with open(op_detail_file_path, 'r') as file:
- csv_reader = csv.reader(file)
- _ = next(csv_reader)
- for info in csv_reader:
- framework_info = framework_infos.get(info[0])
- self._aicore_detail_data.append(
- [
- framework_info[1], framework_info[2], float(info[1]),
- framework_info[3], framework_info[0], framework_info[4]
- ]
- )
- del framework_infos
-
- def _aicore_trace_data_load(self):
- """Load data according to the parsed AICORE operator types file."""
- file_path = query_latest_trace_time_file(self._profiling_dir, int(self._device_id))
- if not file_path:
- logger.error("Failed to find parsed trace time file.")
- raise ProfilerFileNotFoundException('parsed step trace time file')
- file_path = validate_and_normalize_path(file_path)
- with open(file_path, 'r') as handle:
- csv_reader = csv.reader(handle)
- self.__column__ = next(csv_reader)
- self._aicore_trace_data = list(csv_reader)
- self._size = len(self._aicore_trace_data) - 1
- self._load_point_info()
-
- def _load_point_info(self):
- """Load point info."""
- file_path = os.path.join(self._profiling_dir, 'step_trace_point_info.json')
- file_path = validate_and_normalize_path(file_path)
- if os.path.isfile(file_path):
- with open(file_path, 'r', encoding='utf-8') as file:
- try:
- self._point_info = json.load(file)
- except (json.JSONDecodeError, TypeError) as err:
- logger.warning(err)
- raise ProfilerRawFileException('Fail to parse point info file.')
-
- def _query_for_all_reduce(self):
- """
- Query for all reduce info.
-
- Returns:
- list[dict], reduce information. Each item is the reduce info for one step.
- The reduce info is format like:
- {stream_id: List[Tuple(start_point, end_point, duration, field_name)]}.
- """
- self._aicore_trace_data_load()
- reduce_infos = []
- for row_info in self._aicore_trace_data[:-1]:
- row_info_dict = self._get_info_dict_from_row_data(row_info, 'systime')
- reduce_info = self._sort_reduce_by_time(row_info_dict)
- if reduce_info:
- reduce_infos.extend(reduce_info)
-
- return reduce_infos
-
- def _get_info_dict_from_row_data(self, row_info, time_type):
- """
- Get step info in dict format.
-
- Args:
- row_info (list[str]): Step info, the value is corresponding to `__column__`.
- time_type (str): The value type. `systime` keeps the original value.
- `realtime` transforms the value in millisecond. Default: `realtime`.
-
- Returns:
- dict, step trace information. The key is in `__column__`.
- """
- row_info_dict = {}
- for key, value in zip(self.__column__, row_info):
- if key == 'step_num':
- continue
- value = to_int(value, key)
- row_info_dict[key] = to_millisecond(value) if time_type == 'realtime' else value
- return row_info_dict
-
- def _sort_reduce_by_time(self, row_info_dict):
- """
- Sort reduce info by time.
-
- Args:
- row_info_dict (dict): Step trace information.
-
- Returns:
- list, including the all reduce info sorted by start time only.
- [
- [reduce_field, stream_id, reduce_start, reduce_duration],
- [...],
- [...]
- ]
- """
- factor = 1e5 # convert time unit from 10ns to 1ms
- reduce_pid = 10000
- reduce_info = []
- reduce_fields = [field_name for field_name in self.__column__
- if field_name.startswith('stream_') and not field_name.endswith('point')]
- for reduce_field in reduce_fields:
- reduce_start = row_info_dict.get(reduce_field + '_start_point')
- reduce_start = reduce_start / factor \
- if reduce_start else 0
- reduce_duration = row_info_dict.get(reduce_field)
- reduce_duration = reduce_duration / factor if reduce_duration else 0
- if not (reduce_start and reduce_duration):
- logger.info("Reduce event missing value.")
- continue
- cur_stream_id = reduce_field.split('_', 2)[1]
- reduce_meta = [reduce_field, int(cur_stream_id), reduce_start,
- reduce_duration, reduce_pid]
- reduce_info.append(reduce_meta)
-
- return reduce_info
-
- def _query_and_sort_by_op_type(self, filter_condition, op_type_order: list):
- """
- Query the AICORE operator detail information by `filter_condition`,
- and sort by `op_type_order` and execution time.
-
- Args:
- filter_condition (dict): The filter condition.
- op_type_order (list[str]): The name of the operator type in order.
-
- Returns:
- dict, The results are filtered and sorted.
- """
- self._aicore_detail_data_load()
- if filter_condition is None:
- filter_condition = {}
- self._filter(filter_condition)
-
- type_detail_cache = {}
- for detail_info in self._result:
- op_type = detail_info[1]
- if op_type not in op_type_order:
- continue
- infos = type_detail_cache.get(op_type)
- if infos:
- infos.append(detail_info)
- else:
- type_detail_cache[op_type] = [detail_info]
-
- result = []
- for op_type in op_type_order:
- detail_infos = type_detail_cache.get(op_type)
- if detail_infos is None:
- continue
- detail_infos.sort(key=lambda item: item[2], reverse=True)
- result.extend(detail_infos)
-
- return {
- 'col_name_detail': self._display_col_names_detail,
- 'object': result
- }
-
- def _filter(self, filter_condition):
- """
- Filter the profiling data according to the filter condition.
-
- Args:
- filter_condition (dict): The filter condition.
- """
- def _inner_filter(item: list):
- return self._default_filter(item, filter_condition)
-
- def _inner_map(item: list):
- inner_item = item[0:4]
- if is_display_full_op_name:
- inner_item.append(item[4])
- if is_display_detail:
- inner_item.append(item[5])
- return inner_item
-
- is_display_detail = filter_condition.get('is_display_detail', True)
- is_display_full_op_name = filter_condition.get(
- 'is_display_full_op_name', True
- )
- self._set_display_col_name(is_display_detail, is_display_full_op_name)
- if is_display_detail and is_display_full_op_name:
- self._result = list(filter(_inner_filter, self._aicore_detail_data))
- else:
- self._result = list(
- map(_inner_map, filter(_inner_filter, self._aicore_detail_data))
- )
-
- def _default_filter(self, item, condition):
- """
- The default filter method.
-
- Args:
- item (list[Union[str, float, int]]): A piece of data to be filtered.
- condition (dict): The filter condition.
-
- Returns:
- bool, `True` if the item is satisfied.
- """
- for condition_key, condition_value in condition.items():
- if condition_key in self._none_filter_condition_key:
- continue
- if condition_key in self._col_names_detail:
- index = self._col_names_detail.index(condition_key)
- actual_value = item[index]
- for exp_key, exp_value in condition_value.items():
- if not self._is_match_condition(
- exp_key, exp_value, actual_value):
- return False
- return True
-
- def _is_match_condition(self, exp_key, exp_value, actual_value):
- """
- Check whether the actual value meets the expect condition.
-
- Args:
- exp_key (str): Expect key of the condition.
- exp_value (str): Expect value.
- actual_value (str): Actual value.
-
- Returns:
- bool, `True` if the actual meets the expect condition, else `False`.
- """
- if exp_key == 'in':
- if actual_value not in exp_value:
- return False
- elif exp_key == 'not_in':
- if actual_value in exp_value:
- return False
- elif exp_key == 'partial_match_str_in':
- for partial_match_str in exp_value:
- if partial_match_str in actual_value:
- return True
- return False
- else:
- return False
-
- return True
-
- def _set_display_col_name(self, is_display_detail, is_display_full_op_name):
- """
- Set the display column name according to the filter condition.
-
- Args:
- is_display_detail (bool): Whether to display the detailed operator
- information.
- is_display_full_op_name (bool): Whether to display the operator full
- name.
- """
- self._display_col_names_detail = self._col_names_detail[0:4]
- if is_display_full_op_name:
- self._display_col_names_detail.append(self._col_names_detail[4])
- if is_display_detail:
- self._display_col_names_detail.append(self._col_names_detail[5])
-
-
- class BaseTimelineGenerator:
- """
- Analyse timeline data from file.
- """
- __col_names__ = ['op_name', 'stream_id', 'start_time', 'duration']
- _output_timeline_data_file_path = 'output_timeline_data_{}.txt'
- _min_cycle_counter_file_path = 'min_cycle_counter_{}.txt'
- _timeline_meta = []
- _timeline_summary = {
- 'total_time': 0,
- 'num_of_streams': 0,
- 'num_of_ops': 0,
- 'op_exe_times': 0
- }
-
- def _load_timeline_data(self):
- """Load timeline data from file."""
-
- def _parse_timeline_data(self):
- """Parse timeline data."""
-
- def init_timeline(self):
- """Init timeline metadata, adding all collected info."""
-
- def write_timeline(self, size_limit=SIZE_LIMIT_DEFAULT):
- """Load data according to the parsed profiling files."""
- # Write timeline to file.
- logger.info('Writing timeline file...')
- self.write_timeline_to_json_by_limitation(size_limit)
- logger.info('Finished file writing!')
-
- def write_timeline_to_json_by_limitation(self, size_limit):
- """Write timeline to json by limitation."""
- display_filename = self._display_filename.format(self._device_id)
- display_file_path = os.path.join(
- self._profiling_dir,
- display_filename
- )
- display_file_path = validate_and_normalize_path(display_file_path)
-
- length = len(self._timeline_meta)
- try:
- with open(display_file_path, 'w') as json_file:
- json_file.write('[')
- for index, item in enumerate(self._timeline_meta):
- json.dump(item, json_file)
- file_size = os.path.getsize(display_file_path)
- if file_size > size_limit:
- break
- if index == length - 1:
- break
- json_file.write(',')
- json_file.write(']')
- os.chmod(display_file_path, stat.S_IREAD | stat.S_IWRITE)
- except (IOError, OSError) as err:
- logger.error('Error occurred when write timeline display file: %s', err)
- raise ProfilerIOException
-
- def write_timeline_summary(self):
- """Write timeline summary to json."""
- timeline_summary_file_path = os.path.join(
- self._profiling_dir,
- self._timeline_summary_filename.format(self._device_id)
- )
-
- timeline_summary_file_path = validate_and_normalize_path(timeline_summary_file_path)
-
- try:
- with open(timeline_summary_file_path, 'w') as json_file:
- json.dump(self._timeline_summary, json_file)
- os.chmod(timeline_summary_file_path, stat.S_IREAD | stat.S_IWRITE)
- except (IOError, OSError) as err:
- logger.error('Error occurred when write timeline summary file: %s', err)
- raise ProfilerIOException
-
- @staticmethod
- def _update_num_of_streams(timeline, stream_count_dict):
- """Update number of streams."""
- stream_id = timeline[1]
- if stream_id not in stream_count_dict.keys():
- stream_count_dict[stream_id] = 1
- else:
- stream_count_dict[stream_id] += 1
-
- def get_min_cycle_counter(self):
- """
- Get minimum cycle counter.
-
- Returns:
- float, the minimum value of the cycle counter.
- """
- file_path = os.path.join(
- self._profiling_dir,
- self._min_cycle_counter_file_path.format(self._device_id)
- )
-
- file_path = validate_and_normalize_path(file_path)
-
- if os.path.exists(file_path):
- try:
- with open(file_path, 'r') as f_obj:
- min_cycle_counter = f_obj.read()
- min_cycle_counter = float(min_cycle_counter) \
- if not min_cycle_counter == 'inf' else 0
- except (IOError, OSError) as err:
- logger.error('Error occurred when read minimum cycle counter: %s', err)
- raise ProfilerIOException
- else:
- min_cycle_counter = 0
- logger.info("No min cycle counter recorded.")
-
- return min_cycle_counter
-
- def _add_framework_info(self, framework_obj_list):
- """
- Add framework info into timeline metadata.
-
- Args:
- framework_obj_list (list): The framework metadata.
- """
- logger.debug('Start adding framework info into timeline...')
- # Get the framework info that will be written into timeline.
- framework_info_dict = {}
- for framework_obj in framework_obj_list:
- op_name = framework_obj[0]
- op_type = framework_obj[1]
- op_full_name = framework_obj[4]
- op_info = framework_obj[5]
- framework_info_dict[op_full_name] = {
- 'name': op_name,
- 'args': {
- 'type': op_type,
- 'fullname': op_full_name
- }
- }
- framework_info_dict[op_full_name]['args'].update(op_info)
-
- # Insert framework info into timeline.
- for timeline_item in self._timeline_meta:
- op_full_name = timeline_item.get('name')
- framework_item = framework_info_dict.get(op_full_name)
- if framework_item:
- timeline_item['name'] = framework_item.get('name')
- timeline_item['args'] = framework_item.get('args')
- logger.debug('Finished adding framework info into timeline...')
-
- class GpuTimelineGenerator(BaseTimelineGenerator):
- """Generate gpu Timeline data from file."""
- _display_filename = 'gpu_timeline_display_{}.json'
- _timeline_summary_filename = 'gpu_timeline_summary_{}.json'
- _output_op_execute_time_file_path = "op_execute_timestamp_{}.txt"
- _output_activity_execute_time_file_path = "activity_execute_timestamp_{}.txt"
- _output_gpu_activity_info_file_path = "gpu_activity_data_{}.csv"
- _activity_keys_list = []
-
- def __init__(self, profiling_dir, device_id):
- self._profiling_dir = profiling_dir
- self._device_id = device_id
- self._timeline_meta = []
- self._timeline_summary = {
- 'total_time': 0,
- 'num_of_streams': 0,
- 'num_of_ops': 0,
- 'op_exe_times': 0
- }
-
- def _get_and_validate_path(self, file_name):
- """Generate op or activity file path from file name, and validate this path."""
- file_path = os.path.join(
- self._profiling_dir,
- file_name.format(self._device_id)
- )
- file_path = validate_and_normalize_path(file_path)
- if not os.path.exists(file_path):
- logger.error("Failed to find parsed timeline file.")
- raise ProfilerFileNotFoundException('parsed timeline file')
-
- return file_path
-
- def _parse_timeline_data(self, timeline, min_cycle_counter):
- """Parse timeline data."""
- # factor to convert the time unit of start_time(ts) from 1ns to 1us for timeline display
- factor = 1000
- op_meta = TimelineContainer(timeline)
- timeline_dict = {}
- timeline_dict['name'] = op_meta.op_name
- timeline_dict['ph'] = 'X'
- timeline_dict['tid'] = op_meta.stream_id
- timeline_dict['ts'] = (op_meta.start_time - min_cycle_counter) / factor
- dur = op_meta.duration
- timeline_dict['dur'] = dur
- if op_meta.pid is None:
- timeline_dict['pid'] = int(self._device_id)
- else: # AllReduce and AI CPU pid
- timeline_dict['pid'] = op_meta.pid
- if len(timeline) > 4:
- # len(timeline) > 4 refers to activity data, else op data.
- # Add args for activity data
- args_dict = {}
- for ix, value in enumerate(timeline[4:]):
- args_dict[self._activity_keys_list[ix]] = value
- timeline_dict['args'] = args_dict
- else:
- # Update total time of operator execution.
- self._timeline_summary['total_time'] += dur / factor
- self._timeline_summary['op_exe_times'] += 1
-
- self._timeline_meta.append(timeline_dict)
-
- def _load_timeline_data(self):
- """Load timeline data from file."""
- op_file_path = self._get_and_validate_path(
- self._output_op_execute_time_file_path)
- activity_file_path = self._get_and_validate_path(
- self._output_activity_execute_time_file_path)
- activity_args_file_path = self._get_and_validate_path(
- self._output_gpu_activity_info_file_path)
-
- timeline_list = self._load_op_data(op_file_path) + \
- self._load_activity_data(activity_file_path, activity_args_file_path)
- timeline_list.sort(key=lambda x: float(x[2]))
-
- return timeline_list
-
- def _load_op_data(self, op_file_path):
- """Load operator data from file"""
- op_timeline_list = []
- try:
- with open(op_file_path, 'r') as f_obj:
- for line in f_obj:
- self._timeline_summary['num_of_ops'] += 1
- op_list = line.strip('\n').strip().split(';')
- time_arr = op_list[-1]
- time_arr = time_arr.split(" ")
- for time in time_arr:
- time = time.split(",")
- line_list = op_list[:2] + time
- op_timeline_list.append(line_list)
- except (IOError, OSError) as err:
- logger.error('Error occurred when load operator timeline data intermediate file: %s', err)
- raise ProfilerIOException
-
- return op_timeline_list
-
- def _load_activity_data(self, activity_file_path, activity_args_file_path):
- """Load activity data from file"""
- activity_timeline_list = []
- try:
- args_dict = {}
- with open(activity_args_file_path, 'r') as args_file:
- csv_reader = csv.reader(args_file)
- keys_list = next(csv_reader)
- # keys_list [name, type, op_full_name, stream_id, block_dim, grid_dim, ...]
- self._activity_keys_list = keys_list[1:3] + keys_list[4:6]
- for info in csv_reader:
- args_dict[info[0]] = info[1:3] + info[4:6]
- with open(activity_file_path, 'r') as f_obj:
- for line in f_obj:
- line_list = line.strip('\n').split(';')
- # concat activity args info.
- line_list += args_dict[line_list[0]]
- activity_timeline_list.append(line_list)
- except (IOError, OSError) as err:
- logger.error('Error occurred when load activity timeline data intermediate file: %s', err)
- raise ProfilerIOException
-
- return activity_timeline_list
-
-
- def init_timeline(self):
- """Init timeline metadata, adding all collected info."""
- timeline_list = self._load_timeline_data()
-
- # Init a dict for counting the num of streams.
- stream_count_dict = {}
- for timeline in timeline_list:
- self._parse_timeline_data(timeline, 0)
- # Updating the collection of streams.
- if len(timeline) == 4:
- self._update_num_of_streams(timeline, stream_count_dict)
-
- # Update timeline summary info
- self._timeline_summary['num_of_streams'] += len(stream_count_dict.keys())
-
- class AscendTimelineGenerator(BaseTimelineGenerator):
- """Generate ascend Timeline data from file."""
- _display_filename = 'ascend_timeline_display_{}.json'
- _timeline_summary_filename = 'ascend_timeline_summary_{}.json'
-
- def __init__(self, profiling_dir, device_id):
- self._profiling_dir = profiling_dir
- self._device_id = device_id
-
- def _load_timeline_data(self):
- """Load timeline data from file."""
- file_path = os.path.join(
- self._profiling_dir,
- self._output_timeline_data_file_path.format(self._device_id)
- )
- file_path = validate_and_normalize_path(file_path)
- if not os.path.exists(file_path):
- logger.error("Failed to find parsed timeline file.")
- raise ProfilerFileNotFoundException('parsed timeline file')
-
- timeline_list = []
- try:
- with open(file_path, 'r') as f_obj:
- for line in f_obj:
- if not line.startswith('op_name'):
- line_list = line.strip('\n').split(',')
- timeline_list.append(line_list)
- except (IOError, OSError) as err:
- logger.error('Error occurred when read timeline intermediate file: %s', err)
- raise ProfilerIOException
-
- return timeline_list
-
- def _parse_timeline_data(self, timeline, min_cycle_counter):
- """Parse timeline data."""
- # factor to convert the time unit from 1ms to 1us for timeline display
- factor = 1000
- op_meta = TimelineContainer(timeline)
- timeline_dict = {}
- timeline_dict['name'] = op_meta.op_name
- timeline_dict['ph'] = 'X'
- timeline_dict['tid'] = op_meta.stream_id
- timeline_dict['ts'] = (op_meta.start_time - min_cycle_counter) * factor
- dur = op_meta.duration * factor
- timeline_dict['dur'] = dur
- if op_meta.pid is None:
- timeline_dict['pid'] = int(self._device_id)
- # Update total time of operator execution.
- self._timeline_summary['total_time'] += dur
- else: # AllReduce and AI CPU pid
- timeline_dict['pid'] = op_meta.pid
- self._timeline_meta.append(timeline_dict)
-
- def init_timeline(self, all_reduce_info, framework_info, aicpu_info, min_cycle_counter):
- """
- Init timeline metadata, adding all collected info.
-
- Args:
- all_reduce_info (list[list]): The metadata of AllReduce operator.
- framework_info (dict): The framework metadata.
- aicpu_info (dict): The metadata of AI CPU operator.
- min_cycle_counter (float): The minimum cycle counter of the timeline.
- """
- if min_cycle_counter == float('inf'):
- min_cycle_counter = 0
-
- logger.info('Initiating timeline...')
- timeline_list = self._load_timeline_data()
- self._timeline_summary['op_exe_times'] = len(timeline_list)
-
- # Add AllReduce info to timeline temp list and sort by start time.
- if all_reduce_info:
- logger.debug('AllReduce info found. Start adding info into timeline...')
- timeline_list.extend(all_reduce_info)
- timeline_list.sort(key=lambda x: float(x[2]))
-
- # Add AI CPU data into timeline temp list and sort by start time.
- aicpu_data = aicpu_info.get('info')
- if aicpu_data:
- timeline_list.extend(aicpu_data)
- timeline_list.sort(key=lambda x: float(x[2]))
- self._timeline_summary['op_exe_times'] += aicpu_info.get('op_exe_times', 0)
- self._timeline_summary['num_of_streams'] += aicpu_info.get('num_of_streams', 0)
- self._timeline_summary['num_of_ops'] += aicpu_info.get('num_of_ops', 0)
- self._timeline_summary['total_time'] += aicpu_info.get('total_time', 0)
-
- # Init a dict for counting the num of streams.
- stream_count_dict = {}
- for timeline in timeline_list:
- self._parse_timeline_data(timeline, min_cycle_counter)
- # Updating the collection of streams.
- if len(timeline) == 4:
- self._update_num_of_streams(timeline, stream_count_dict)
-
- # Get framework metadata.
- framework_obj_list = framework_info.get('object')
- # The length of list is the number of operators.
- self._timeline_summary['num_of_ops'] += len(framework_obj_list)
- self._add_framework_info(framework_obj_list)
- logger.info('Finished adding info into timeline...')
-
- # Update timeline summary info
- self._timeline_summary['num_of_streams'] += len(stream_count_dict.keys())
|