Browse Source

!119 feature of profiler analyser and framework parser

Merge pull request !119 from chenchao99/profiler_analyser
tags/v0.3.0-alpha
mindspore-ci-bot Gitee 5 years ago
parent
commit
29d4ccc0af
17 changed files with 1712 additions and 1 deletions
  1. +14
    -0
      mindinsight/profiler/__init__.py
  2. +14
    -0
      mindinsight/profiler/analyser/__init__.py
  3. +298
    -0
      mindinsight/profiler/analyser/analyser.py
  4. +76
    -0
      mindinsight/profiler/analyser/analyser_factory.py
  5. +240
    -0
      mindinsight/profiler/analyser/base_analyser.py
  6. +157
    -0
      mindinsight/profiler/analyser/integrator.py
  7. +14
    -0
      mindinsight/profiler/common/__init__.py
  8. +14
    -0
      mindinsight/profiler/common/exceptions/__init__.py
  9. +63
    -0
      mindinsight/profiler/common/exceptions/error_code.py
  10. +128
    -0
      mindinsight/profiler/common/exceptions/exceptions.py
  11. +20
    -0
      mindinsight/profiler/common/log.py
  12. +14
    -0
      mindinsight/profiler/common/validator/__init__.py
  13. +119
    -0
      mindinsight/profiler/common/validator/validate_path.py
  14. +14
    -0
      mindinsight/profiler/parser/__init__.py
  15. +520
    -0
      mindinsight/profiler/parser/framework_parser.py
  16. +5
    -0
      mindinsight/utils/constant.py
  17. +2
    -1
      requirements.txt

+ 14
- 0
mindinsight/profiler/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 14
- 0
mindinsight/profiler/analyser/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 298
- 0
mindinsight/profiler/analyser/analyser.py View File

@@ -0,0 +1,298 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""The specific analyser class."""
import csv
import json
import os

from mindinsight.profiler.analyser.base_analyser import BaseAnalyser
from mindinsight.profiler.common.log import logger


class AicoreTypeAnalyser(BaseAnalyser):
"""
The analyser for analyzing the AICORE operator types.

Args:
profiling_dir (str): The directory where the parsed profiling files are
located.
device_id (str): The device ID.
"""
__col_names__ = ['op_type', 'execution_time', 'execution_frequency',
'percent']
_file_name_aicore_type_time = 'aicore_intermediate_{}_type.csv'

def _load(self):
"""Load data according to the parsed AICORE operator types file."""
op_type_file_path = os.path.join(
self._profiling_dir,
self._file_name_aicore_type_time.format(self._device_id)
)
if not os.path.isfile(op_type_file_path):
logger.warning('The file <%s> does not exist.', op_type_file_path)
return

with open(op_type_file_path, 'r') as file:
csv_reader = csv.reader(file)
_ = next(csv_reader)
for info in csv_reader:
self._data.append(self._convert_field_type(info))

def _filter(self, filter_condition):
"""
Filter the profiling data according to the filter condition.

Args:
filter_condition (dict): The filter condition.
"""
def _inner_filter(item: list):
return self._default_filter(item, filter_condition)
self._result = list(filter(_inner_filter, self._data))

def _convert_field_type(self, row):
"""
Convert the field type to the specific type.

Args:
row (list[str]): One row data from parsed data.

Returns:
list[Union[str, float]], the converted data.
"""
return [row[0], float(row[1]), int(row[2]), float(row[3])]


class AicoreDetailAnalyser(BaseAnalyser):
"""
The analyser for analyzing all the AICORE operators.

Args:
profiling_dir (str): The directory where the parsed profiling files are
located.
device_id (str): The device ID.
"""
__col_names__ = ['op_name', 'op_type', 'execution_time', 'subgraph',
'full_op_name', 'op_info']
_file_name_aicore_detail_time = 'aicore_intermediate_{}_detail.csv'
_file_name_framework_info = 'framework_raw_{}.csv'

def __init__(self, profiling_dir, device_id):
super().__init__(profiling_dir, device_id)
self._none_filter_condition_key = [
'is_display_detail', 'is_display_full_op_name'
]

def query_and_sort_by_op_type(self, filter_condition, op_type_order: list):
"""
Query the AICORE operator detail information by `filter_condition`,
and sort by `op_type_order` and execution time.

Args:
filter_condition (dict): The filter condition.
op_type_order (list[str]): The name of the operator type in order.

Returns:
dict, The results are filtered and sorted.
"""
if filter_condition is None:
filter_condition = {}
self._filter(filter_condition)

type_detail_cache = {}
for detail_info in self._result:
op_type = detail_info[1]
if op_type not in op_type_order:
continue
infos = type_detail_cache.get(op_type)
if infos:
infos.append(detail_info)
else:
type_detail_cache[op_type] = [detail_info]

result = []
for op_type in op_type_order:
detail_infos = type_detail_cache.get(op_type)
detail_infos.sort(key=lambda item: item[2], reverse=True)
result.extend(detail_infos)

return {
'col_name': self._display_col_names,
'object': result
}

def _load(self):
"""Load data according to the parsed AICORE operator file."""
op_detail_file_path = os.path.join(
self._profiling_dir,
self._file_name_aicore_detail_time.format(self._device_id)
)
framework_file_path = os.path.join(
self._profiling_dir,
self._file_name_framework_info.format(self._device_id)
)
if not os.path.isfile(op_detail_file_path):
logger.warning('The file <%s> does not exist.', op_detail_file_path)
return
if not os.path.isfile(framework_file_path):
logger.warning('The file <%s> does not exist.', framework_file_path)
return

framework_infos = dict()
with open(framework_file_path, 'r') as file:
csv_reader = csv.reader(file)
_ = next(csv_reader)
for info in csv_reader:
framework_infos[info[3]] = self._convert_framework_field_type(
info
)

with open(op_detail_file_path, 'r') as file:
csv_reader = csv.reader(file)
_ = next(csv_reader)
for info in csv_reader:
detail_info = self._get_op_detail_info(info, framework_infos)
self._data.append(detail_info)

del framework_infos

def _filter(self, filter_condition):
"""
Filter the profiling data according to the filter condition.

Args:
filter_condition (dict): The filter condition.
"""
def _inner_filter(item: list):
return self._default_filter(item, filter_condition)

def _inner_map(item: list):
inner_item = item[0:4]
if is_display_full_op_name:
inner_item.append(item[4])
if is_display_detail:
inner_item.append(item[5])
return inner_item

is_display_detail = filter_condition.get('is_display_detail', True)
is_display_full_op_name = filter_condition.get(
'is_display_full_op_name', True
)
self._set_display_col_name(is_display_detail, is_display_full_op_name)
if is_display_detail and is_display_full_op_name:
self._result = list(filter(_inner_filter, self._data))
else:
self._result = list(
map(_inner_map, filter(_inner_filter, self._data))
)

def _set_display_col_name(self, is_display_detail, is_display_full_op_name):
"""
Set the display column name according to the filter condition.

Args:
is_display_detail (bool): Whether to display the detailed operator
information.
is_display_full_op_name (bool): Whether to display the operator full
name.
"""
self._display_col_names = self.__col_names__[0:4]
if is_display_full_op_name:
self._display_col_names.append(self.__col_names__[4])
if is_display_detail:
self._display_col_names.append(self.__col_names__[5])

def _convert_framework_field_type(self, row):
"""
Convert the field type of framework file to the specific type.

Args:
row (list[str]): One row data from parsed data.

Returns:
list[Union[str, float]], the converted data.
"""
return [row[3], row[4], row[5], row[6],
json.loads(row[7]) if row[7] else None]

def _get_op_detail_info(self, row, framework_infos):
"""
Get operator detail information.

Args:
row (list[str]): One row data from parsed operator file.
framework_infos (dict): All framework information.

Returns:
list[Union[str, float]], the operator detail information in one row.
"""
framework_info = framework_infos.get(row[0])
return [framework_info[1], framework_info[2], float(row[1]),
framework_info[3], framework_info[0], framework_info[4]]


class AicpuAnalyser(BaseAnalyser):
"""
The analyser for analyzing all the AICPU operators.

Args:
profiling_dir (str): The directory where the parsed profiling files are
located.
device_id (str): The device ID.
"""
__col_names__ = ['serial_number', 'op_name', 'total_time', 'dispatch_time',
'RunV2_start', 'compute_start', 'memcpy_start',
'memcpy_end', 'RunV2_end']
_file_name_aicpu_time = 'aicpu_intermediate_{}.csv'

def _load(self):
"""Load data according to the parsed AICPU operator file."""
aicpu_file_path = os.path.join(
self._profiling_dir,
self._file_name_aicpu_time.format(self._device_id)
)
if not os.path.isfile(aicpu_file_path):
logger.warning('The file <%s> does not exist.', aicpu_file_path)
return

with open(aicpu_file_path, 'r') as file:
csv_reader = csv.reader(file)
_ = next(csv_reader)
for info in csv_reader:
aicpu_info = self._convert_field_type(info)
self._data.append(aicpu_info)

def _filter(self, filter_condition):
"""
Filter the profiling data according to the filter condition.

Args:
filter_condition (dict): The filter condition.
"""
def _inner_filter(item: list):
return self._default_filter(item, filter_condition)
self._result = list(filter(_inner_filter, self._data))

def _convert_field_type(self, row):
"""
Convert the field type to the specific type.

Args:
row (list[str]): One row data from parsed data.

Returns:
list[Union[str, float]], the converted data.
"""
return [int(row[0]), row[1], float(row[2]), float(row[3]), int(row[4]),
int(row[5]), int(row[6]), int(row[7]), int(row[8])]

+ 76
- 0
mindinsight/profiler/analyser/analyser_factory.py View File

@@ -0,0 +1,76 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""The analyser factory."""
import threading

import mindinsight.profiler.analyser.analyser as analyser_module
from mindinsight.profiler.common.exceptions.exceptions import \
ProfilerAnalyserNotExistException


class AnalyserFactory:
"""
The analyser factory is used to create analyser special instance.

Currently the factory supports creating `AicoreTypeAnalyser`,
`AicoreDetailAnalyser` and `AicpuAnalyser`. The `AicoreTypeAnalyser` is used
to analyze execution time according to AICORE operator type.
The `AicoreDetailAnalyser` is used to analyze execution time according to
all specific AICORE operator. The `AicpuAnalyser` is used to analyze
execution time according to all specific AICPU operator.

Examples:
>>> analyser = AnalyserFactory.instance().get_analyser(
>>> 'aicore_type', '/path/to/profiling/dir', '0'
>>> )
"""
_lock = threading.Lock()
_instance = None

def __new__(cls, *args, **kwargs):
if cls._instance is None:
with cls._lock:
if cls._instance is None:
cls._instance = super().__new__(cls, *args, **kwargs)
return cls._instance

@classmethod
def instance(cls):
"""The factory instance."""
if cls._instance is None:
cls._instance = cls()
return cls._instance

def get_analyser(self, analyser_type, *args):
"""
Get the specified analyser according to the analyser type.

Args:
analyser_type (str): The analyser type.
args (list): The parameters required for the specific analyser class.

Returns:
BaseAnalyser, the specified analyser instance.

Raises:
ProfilerAnalyserNotExistException: If the analyser type does not exist.
"""
subnames = analyser_type.split('_')
analyser_class_name = ''.join([name.capitalize() for name in subnames])
analyser_class_name += 'Analyser'

if not hasattr(analyser_module, analyser_class_name):
raise ProfilerAnalyserNotExistException(analyser_type)
return getattr(analyser_module, analyser_class_name)(*args)

+ 240
- 0
mindinsight/profiler/analyser/base_analyser.py View File

@@ -0,0 +1,240 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""The base analyser."""
import functools
from abc import ABC, abstractmethod

from marshmallow import ValidationError

from mindinsight.profiler.common.exceptions.exceptions import \
ProfilerColumnNotExistException, ProfilerPathErrorException, \
ProfilerIOException
from mindinsight.profiler.common.log import logger
from mindinsight.profiler.common.validator.validate_path import \
validate_and_normalize_path


class BaseAnalyser(ABC):
"""
The base analyser.

A concrete analyser class can be constructed by inheriting the class. The
analyser provides the ability to filter, sort and group. The subclass only
need to implement `_load`, `_filter`, `_sort` and `_group`. The condition
defines the rules for filtering, sorting and grouping.

Args:
profiling_dir (str): The directory where the parsed profiling files
are located.
device_id (str): The device ID.
"""
__col_names__ = []

def __init__(self, profiling_dir, device_id):
self._profiling_dir = self._normalize_profiling_dir(profiling_dir)
self._device_id = device_id
self._data = []
self._result = None
self._display_col_names = None
self._size = 0
self._none_filter_condition_key = []

try:
self._load()
except IOError as err:
logger.exception(err)
raise ProfilerIOException()

@property
def col_names(self):
"""The column names in the parsed profiling file."""
return self.__col_names__

@property
def data(self):
"""The data in the parsed profiling file."""
return self._data

def query(self, condition=None):
"""
Query data according to the condition.

Args:
condition (dict): The search condition, including filter condition,
sort condition and group condition. If the condition is `None`,
all data will be returned. Default: None.

Returns:
dict, the result after filtered, sorted and grouped.
"""
if condition is None:
condition = {}
filter_condition = condition.get('filter_condition', {})
sort_condition = condition.get('sort_condition')
group_condition = condition.get('group_condition')

self._result = []
self._display_col_names = self.__col_names__[:]
self._filter(filter_condition)
self._size = len(self._result)
if sort_condition:
self._sort(sort_condition)
if group_condition:
self._group(group_condition)
return {
'col_name': self._display_col_names,
'object': self._result,
'size': self._size
}

@abstractmethod
def _load(self):
"""Load data according to the parsed profiling files."""

@abstractmethod
def _filter(self, filter_condition):
"""
Filter the profiling data according to the filter condition.

Args:
filter_condition (dict): The filter condition.
"""

def _sort(self, sort_condition: dict):
"""
Sort the profiling data according to the filter condition.

Args:
sort_condition (dict): The sort condition.

Raises:
ProfilerColumnNotExistException: If the sort name does not exist.
"""
def _cmp(item1, item2):
value1 = item1[index]
value2 = item2[index]

if value1 is None and value2 is None:
cmp_result = 0
elif value1 is None:
cmp_result = -1
elif value2 is None:
cmp_result = 1
else:
try:
cmp_result = (value1 > value2) - (value1 < value2)
except TypeError:
type1 = type(value1).__name__
type2 = type(value2).__name__
cmp_result = (type1 > type2) - (type1 < type2)
return cmp_result

sort_name = sort_condition.get('name')
sort_type = sort_condition.get('type', 'descending')
reverse = sort_type == 'descending'
if not sort_name:
return
try:
index = self.__col_names__.index(sort_name)
except ValueError:
raise ProfilerColumnNotExistException(sort_name)
self._result.sort(key=functools.cmp_to_key(_cmp), reverse=reverse)

def _group(self, group_condition: dict):
"""
Group the profiling data according to the group condition.

Args:
group_condition (dict): The group condition.
"""
limit = group_condition.get('limit')
offset = group_condition.get('offset')
if limit is None and offset is None:
return
if limit is None:
limit = 10
if offset is None:
offset = 0
self._result = self._result[limit * offset: limit * (offset + 1)]

def _default_filter(self, item, condition):
"""
The default filter method.

Args:
item (list[Union[str, float, int]]): A piece of data to be filtered.
condition (dict): The filter condition.

Returns:
bool, `True` if the item is satisfied.
"""
for condition_key, condition_value in condition.items():
if condition_key in self._none_filter_condition_key:
continue
if condition_key in self.__col_names__:
index = self.__col_names__.index(condition_key)
actual_value = item[index]
for exp_key, exp_value in condition_value.items():
if not self._is_match_condition(
exp_key, exp_value, actual_value):
return False
return True

def _is_match_condition(self, exp_key, exp_value, actual_value):
"""
Check whether the actual value meets the expect condition.

Args:
exp_key (str): Expect key of the condition.
exp_value (str): Expect value.
actual_value (str): Actual value.

Returns:
bool, `True` if the actual meets the expect condition, else `False`.
"""
if exp_key == 'in':
if actual_value not in exp_value:
return False
elif exp_key == 'not_in':
if actual_value in exp_value:
return False
elif exp_key == 'partial_match_str_in':
for partial_match_str in exp_value:
if partial_match_str in actual_value:
return True
return False
else:
return False

return True

def _normalize_profiling_dir(self, profiling_dir):
"""
Normalize the profiling dir.

Args:
profiling_dir (str): The directory where the parsed profiling files
are located.

Returns:
str, the normalized profiling dir.
"""
try:
normalized_profiling_dir = validate_and_normalize_path(
profiling_dir, 'profiler'
)
except ValidationError:
raise ProfilerPathErrorException('The profiling dir is invalid.')
return normalized_profiling_dir

+ 157
- 0
mindinsight/profiler/analyser/integrator.py View File

@@ -0,0 +1,157 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""The integrator for integrating parsed profiling files."""
import csv
import os
from decimal import Decimal


class Integrator:
"""
The integrator for integrating parsed profiling files.

Args:
profiling_dir (str): The directory where the parsed profiling files are
located.
device_id (str): The device ID.
"""
_file_name_aicore_detail_time = 'output_op_compute_time_{}.txt'
_file_name_aicpu_time = 'output_data_preprocess_aicpu_{}.txt'
_file_name_framework = 'framework_raw_{}.csv'
_header_aicore_type = ['op_type', 'execution_time', 'execution_frequency',
'percent']
_header_aicore_detail = ['full_op_name', 'execution_time']
_header_aicpu = ['serial_number', 'op_name', 'total_time', 'dispatch_time',
'RunV2_start', 'compute_start', 'memcpy_start',
'memcpy_end', 'RunV2_end']

def __init__(self, profiling_dir, device_id):
self._profiling_dir = profiling_dir
self._device_id = device_id
self._op_time_cache = {}
self._total_time = Decimal('0.0')

def integrate(self):
"""Integrate the parsed profiling files."""
self._parse_aicore_detail_time()
self._parse_aicore_type_time()
self._parse_aicpu_time()

def _parse_aicore_type_time(self):
"""Parse the parsed AICORE operator type file."""
framework_file = os.path.join(
self._profiling_dir,
self._file_name_framework.format(self._device_id)
)
if not os.path.isfile(framework_file):
return

op_name_type_cache = {}
with open(framework_file, 'r') as src_file:
csv_reader = csv.reader(src_file)
_ = next(csv_reader)

for row in csv_reader:
op_name_type_cache[row[3]] = row[5]

op_type_time_cache = {}
for full_op_name, op_time in self._op_time_cache.items():
op_type = op_name_type_cache.get(full_op_name)
if op_type_time_cache.get(op_type) is None:
op_type_time_cache[op_type] = [op_time, 1]
else:
op_type_time_cache[op_type][0] += op_time
op_type_time_cache[op_type][1] += 1

op_type_file_name = 'aicore_intermediate_' + self._device_id + '_type.csv'
op_type_file_path = os.path.join(self._profiling_dir, op_type_file_name)
with open(op_type_file_path, 'w') as type_file:
csv_writer = csv.writer(type_file)
csv_writer.writerow(self._header_aicore_type)

for op_type, op_type_time_info in op_type_time_cache.items():
type_info = [
op_type, op_type_time_info[0], op_type_time_info[1],
round((op_type_time_info[0] / self._total_time) * 100, 2)
]
csv_writer.writerow(type_info)

def _parse_aicore_detail_time(self):
"""Parse the parsed AICORE operator time file."""
aicore_detail_file = os.path.join(
self._profiling_dir,
self._file_name_aicore_detail_time.format(self._device_id)
)
if not os.path.isfile(aicore_detail_file):
return

op_detail_file_name = 'aicore_intermediate_' + self._device_id + '_detail.csv'
op_detail_file_path = os.path.join(
self._profiling_dir, op_detail_file_name
)
with open(aicore_detail_file, 'r') as src_file:
row = src_file.readline()
if row.startswith('op_name'):
_ = src_file.readline()
elif row.startswith('====='):
_ = src_file.readline()
_ = src_file.readline()
else:
return

with open(op_detail_file_path, 'w') as detail_file:
csv_writer = csv.writer(detail_file)
csv_writer.writerow(self._header_aicore_detail)

while True:
row = src_file.readline()
if not row:
break

op_infos = row.split()
if op_infos[0] == 'total':
self._total_time = Decimal(op_infos[2])
continue
self._op_time_cache[op_infos[0]] = Decimal(op_infos[1])
csv_writer.writerow([op_infos[0], op_infos[1]])

def _parse_aicpu_time(self):
"""Parse the parsed AICPU operator time file."""
aicpu_file = os.path.join(
self._profiling_dir,
self._file_name_aicpu_time.format(self._device_id)
)
if not os.path.isfile(aicpu_file):
return

save_file_name = 'aicpu_intermediate_' + self._device_id + '.csv'
save_file_path = os.path.join(self._profiling_dir, save_file_name)
with open(aicpu_file, 'r') as src_file:
row = src_file.readline()
if not row.startswith('serial_number'):
return
_ = src_file.readline()
with open(save_file_path, 'w') as save_file:
csv_writer = csv.writer(save_file)
csv_writer.writerow(self._header_aicpu)

while True:
row = src_file.readline()
if not row:
break
infos = row.split()
if infos[0] == 'AI':
continue
csv_writer.writerow(infos)

+ 14
- 0
mindinsight/profiler/common/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 14
- 0
mindinsight/profiler/common/exceptions/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 63
- 0
mindinsight/profiler/common/exceptions/error_code.py View File

@@ -0,0 +1,63 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Profiler error code and messages."""
from enum import unique, Enum

from mindinsight.utils.constant import ProfilerMgrErrors


_GENERAL_MASK = 0b00001 << 7
_PARSER_MASK = 0b00010 << 7
_ANALYSER_MASK = 0b00011 << 7


@unique
class ProfilerErrors(ProfilerMgrErrors):
"""Profiler error codes."""
# general error code
PARAM_VALUE_ERROR = 0 | _GENERAL_MASK
PATH_ERROR = 1 | _GENERAL_MASK
PARAM_TYPE_ERROR = 2 | _GENERAL_MASK
DIR_NOT_FOUND_ERROR = 3 | _GENERAL_MASK
FILE_NOT_FOUND_ERROR = 4 | _GENERAL_MASK
IO_ERROR = 5 | _GENERAL_MASK

# parser error code
DEVICE_ID_MISMATCH_ERROR = 0 | _PARSER_MASK
RAW_FILE_ERROR = 1 | _PARSER_MASK

# analyser error code
COLUMN_NOT_EXIST_ERROR = 0 | _ANALYSER_MASK
ANALYSER_NOT_EXIST_ERROR = 1 | _ANALYSER_MASK


@unique
class ProfilerErrorMsg(Enum):
"""Profiler error messages."""
# general error msg
PARAM_VALUE_ERROR = 'Param value error. {}'
PATH_ERROR = 'Path error. {}'
PARAM_TYPE_ERROR = 'Param type error. {}'
DIR_NOT_FOUND_ERROR = 'The dir <{}> not found.'
FILE_NOT_FOUND_ERROR = 'The file <{}> not found.'
IO_ERROR = 'Read or write file fail.'

# parser error msg
DEVICE_ID_MISMATCH_ERROR = 'The device ID mismatch.'
RAW_FILE_ERROR = 'Raw file error. {}'

# analyser error msg
COLUMN_NOT_EXIST_ERROR = 'The column {} does not exist.'
ANALYSER_NOT_EXIST_ERROR = 'The analyser {} does not exist.'

+ 128
- 0
mindinsight/profiler/common/exceptions/exceptions.py View File

@@ -0,0 +1,128 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Definition of error code and relative messages in profiler module."""
from mindinsight.profiler.common.exceptions.error_code import ProfilerErrors, \
ProfilerErrorMsg
from mindinsight.utils.exceptions import MindInsightException


class ProfilerParamValueErrorException(MindInsightException):
"""The parameter value error in profiler module."""

def __init__(self, msg):
super(ProfilerParamValueErrorException, self).__init__(
error=ProfilerErrors.PARAM_VALUE_ERROR,
message=ProfilerErrorMsg.PARAM_VALUE_ERROR.value.format(msg),
http_code=400
)


class ProfilerPathErrorException(MindInsightException):
"""The path error in profiler module."""

def __init__(self, msg):
super(ProfilerPathErrorException, self).__init__(
error=ProfilerErrors.PATH_ERROR,
message=ProfilerErrorMsg.PATH_ERROR.value.format(msg),
http_code=400
)


class ProfilerParamTypeErrorException(MindInsightException):
"""The parameter type error in profiler module."""

def __init__(self, msg):
super(ProfilerParamTypeErrorException, self).__init__(
error=ProfilerErrors.PARAM_TYPE_ERROR,
message=ProfilerErrorMsg.PARAM_TYPE_ERROR.value.format(msg),
http_code=400
)


class ProfilerDirNotFoundException(MindInsightException):
"""The dir not found exception in profiler module."""

def __init__(self, msg):
super(ProfilerDirNotFoundException, self).__init__(
error=ProfilerErrors.DIR_NOT_FOUND_ERROR,
message=ProfilerErrorMsg.DIR_NOT_FOUND_ERROR.value.format(msg),
http_code=400
)


class ProfilerFileNotFoundException(MindInsightException):
"""The file not found exception in profiler module."""

def __init__(self, msg):
super(ProfilerFileNotFoundException, self).__init__(
error=ProfilerErrors.FILE_NOT_FOUND_ERROR,
message=ProfilerErrorMsg.FILE_NOT_FOUND_ERROR.value.format(msg),
http_code=400
)


class ProfilerIOException(MindInsightException):
"""The IO exception in profiler module."""

def __init__(self):
super(ProfilerIOException, self).__init__(
error=ProfilerErrors.IO_ERROR,
message=ProfilerErrorMsg.IO_ERROR.value,
http_code=400
)


class ProfilerDeviceIdMismatchException(MindInsightException):
"""The device id mismatch exception in profiler module."""

def __init__(self):
super(ProfilerDeviceIdMismatchException, self).__init__(
error=ProfilerErrors.DEVICE_ID_MISMATCH_ERROR,
message=ProfilerErrorMsg.DEVICE_ID_MISMATCH_ERROR.value,
http_code=400
)


class ProfilerRawFileException(MindInsightException):
"""The raw file exception in profiler module."""

def __init__(self, msg):
super(ProfilerRawFileException, self).__init__(
error=ProfilerErrors.RAW_FILE_ERROR,
message=ProfilerErrorMsg.RAW_FILE_ERROR.value.format(msg),
http_code=400
)


class ProfilerColumnNotExistException(MindInsightException):
"""The column does not exist exception in profiler module."""

def __init__(self, msg):
super(ProfilerColumnNotExistException, self).__init__(
error=ProfilerErrors.COLUMN_NOT_EXIST_ERROR,
message=ProfilerErrorMsg.COLUMN_NOT_EXIST_ERROR.value.format(msg),
http_code=400
)


class ProfilerAnalyserNotExistException(MindInsightException):
"""The analyser in profiler module."""

def __init__(self, msg):
super(ProfilerAnalyserNotExistException, self).__init__(
error=ProfilerErrors.ANALYSER_NOT_EXIST_ERROR,
message=ProfilerErrorMsg.ANALYSER_NOT_EXIST_ERROR.value.format(msg),
http_code=400
)

+ 20
- 0
mindinsight/profiler/common/log.py View File

@@ -0,0 +1,20 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Profiler logger."""
from mindinsight.utils.log import setup_logger

LOG_NAME = "profiler"
LOG_MODULE = "profiler"
logger = setup_logger(sub_module=LOG_MODULE, log_name=LOG_NAME)

+ 14
- 0
mindinsight/profiler/common/validator/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 119
- 0
mindinsight/profiler/common/validator/validate_path.py View File

@@ -0,0 +1,119 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Validate the input path."""
import os
from typing import Union, List
from marshmallow import ValidationError


def safe_normalize_path(
path,
raise_key,
safe_prefixes: Union[None, List[str]],
check_absolute_path=False,
allow_parent_dir=False,
):
"""
Returns safe normalized path.

This func validates given path, and returns its normalized form. If
safe_prefixes is given, this func will check whether the path is safe.

Note:
This func is not compatible with windows.

Caller should check returned path to ensure safety according to
business logic.

File scheme (rfc8089) is currently not supported.

Args:
path (str): Path to be normalized.

raise_key (str): The exception raise key

safe_prefixes (list[str]): If not none, path must startswith one of the
safe_prefixes. Set this arg to [] will cause all paths considered
unsafe. Normally, prefix in this arg should end with "/".

check_absolute_path (bool): Whether check path is absolute.

allow_parent_dir (bool): Whether allow parent dir in path.

Returns:
str, normalized path.
"""
normalized_path = validate_and_normalize_path(
path,
raise_key=raise_key,
check_absolute_path=check_absolute_path,
allow_parent_dir=allow_parent_dir,
)

if safe_prefixes is None:
return normalized_path

normalized_str = str(normalized_path)
for prefix in safe_prefixes:
if normalized_str.startswith(prefix):
return normalized_path

raise ValidationError({raise_key: {"The path is invalid!"}})


def validate_and_normalize_path(
path,
raise_key,
check_absolute_path=False,
allow_parent_dir=False,
):
"""
Validates path and returns its normalized form.

If path has a valid scheme, treat path as url, otherwise consider path a
unix local path.

Note:
File scheme (rfc8089) is currently not supported.

Args:
path (str): Path to be normalized.
raise_key (str): The exception raise key.
check_absolute_path (bool): Whether check path scheme is supported.
allow_parent_dir (bool): Whether allow parent dir in path.

Returns:
str, normalized path.
"""
if not path:
raise ValidationError({raise_key: {"The path is invalid!"}})

path_str = str(path)
if not allow_parent_dir:
path_components = path_str.split("/")
if ".." in path_components:
raise ValidationError({raise_key: {"The path is invalid!"}})

# path does not have valid schema, treat it as unix local path.
if check_absolute_path:
if not path_str.startswith("/"):
raise ValidationError({raise_key: {"The path is invalid!"}})
try:
# most unix systems allow
normalized_path = os.path.realpath(path)
except ValueError:
raise ValidationError({raise_key: {"The path is invalid!"}})

return normalized_path

+ 14
- 0
mindinsight/profiler/parser/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 520
- 0
mindinsight/profiler/parser/framework_parser.py View File

@@ -0,0 +1,520 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Thr parser for parsing framework files."""
import csv
import enum
import json
import os
import re

from marshmallow import ValidationError

from mindinsight.profiler.common.exceptions.exceptions import \
ProfilerPathErrorException, ProfilerDirNotFoundException, \
ProfilerFileNotFoundException, ProfilerDeviceIdMismatchException, \
ProfilerRawFileException
from mindinsight.profiler.common.validator.validate_path import \
validate_and_normalize_path


class VmDataType(enum.IntEnum):
"""Definition of vm data type."""
NUMBER_TYPE_BEGIN = 26
NUMBER_TYPE_BOOL = 27
NUMBER_TYPE_INT = 28
NUMBER_TYPE_INT8 = 29
NUMBER_TYPE_INT16 = 30
NUMBER_TYPE_INT32 = 31
NUMBER_TYPE_INT64 = 32
NUMBER_TYPE_UINT = 33
NUMBER_TYPE_UINT8 = 34
NUMBER_TYPE_UINT16 = 35
NUMBER_TYPE_UINT32 = 36
NUMBER_TYPE_UINT64 = 37
NUMBER_TYPE_FLOAT = 38
NUMBER_TYPE_FLOAT16 = 39
NUMBER_TYPE_FLOAT32 = 40
NUMBER_TYPE_FLOAT64 = 41
NUMBER_TYPE_END = 42

@classmethod
def get_data_type_name(cls, num):
"""
Get the name of data type by enum number.

Args:
num (int): Enum number.

Returns:
str, the name of data type.
"""
data_type = cls._value2member_map_.get(num)
return 'UNKNOWN' if data_type is None else data_type.name


class GeDataType(enum.IntEnum):
"""Definition of ge data type."""
DT_FLOAT = 0
DT_FLOAT16 = 1
DT_INT8 = 2
DT_INT16 = 6
DT_UINT16 = 7
DT_UINT8 = 4
DT_INT32 = 3
DT_INT64 = 9
DT_UINT32 = 8
DT_UINT64 = 10
DT_BOOL = 12
DT_DOUBLE = 11
DT_STRING = 13
DT_DUAL_SUB_INT8 = 14
DT_DUAL_SUB_UINT8 = 15
DT_COMPLEX64 = 16
DT_COMPLEX128 = 17
DT_QINT8 = 18
DT_QINT16 = 19
DT_QINT32 = 20
DT_QUINT8 = 21
DT_QUINT16 = 22
DT_RESOURCE = 23
DT_STRING_REF = 24
DT_DUAL = 25
DT_UNDEFINED = 26

@classmethod
def get_data_type_name(cls, num):
"""
Get the name of data type by enum number.

Args:
num (int): Enum number.

Returns:
str, the name of data type.
"""
data_type = cls._value2member_map_.get(num)
return 'UNKNOWN' if data_type is None else data_type.name


class GeFormat(enum.IntEnum):
"""Definition of ge format type."""
FORMAT_NCHW = 0
FORMAT_NHWC = 1
FORMAT_ND = 2
FORMAT_NC1HWC0 = 3
FORMAT_FRACTAL_Z = 4
FORMAT_NC1C0HWPAD = 5
FORMAT_NHWC1C0 = 6
FORMAT_FSR_NCHW = 7
FORMAT_FRACTAL_DECONV = 8
FORMAT_C1HWNC0 = 9
FORMAT_FRACTAL_DECONV_TRANSPOSE = 10
FORMAT_FRACTAL_DECONV_SP_STRIDE_TRANS = 11
FORMAT_NC1HWC0_C04 = 12
FORMAT_FRACTAL_Z_C04 = 13
FORMAT_CHWN = 14
FORMAT_FRACTAL_DECONV_SP_STRIDE8_TRANS = 15
FORMAT_HWCN = 16
FORMAT_NC1KHKWHWC0 = 17
FORMAT_BN_WEIGHT = 18
FORMAT_FILTER_HWCK = 19
FORMAT_HASHTABLE_LOOKUP_LOOKUPS = 20
FORMAT_HASHTABLE_LOOKUP_KEYS = 21
FORMAT_HASHTABLE_LOOKUP_VALUE = 22
FORMAT_HASHTABLE_LOOKUP_OUTPUT = 23
FORMAT_HASHTABLE_LOOKUP_HITS = 24
FORMAT_C1HWNCOC0 = 25
FORMAT_MD = 26
FORMAT_NDHWC = 27
FORMAT_FRACTAL_ZZ = 28
FORMAT_FRACTAL_NZ = 29
FORMAT_NCDHW = 30
FORMAT_DHWCN = 31
FORMAT_NDC1HWC0 = 32
FORMAT_FRACTAL_Z_3D = 33
FORMAT_CN = 34
FORMAT_NC = 35
FORMAT_DHWNC = 36
FORMAT_FRACTAL_Z_3D_TRANSPOSE = 37
FORMAT_RESERVED = 38
FORMAT_ALL = 39

@classmethod
def get_format_name(cls, num):
"""
Get the name of format type by enum number.

Args:
num (int): Enum number.

Returns:
str, the name of format type.
"""
format_type = cls._value2member_map_.get(num)
return 'UNKNOWN' if format_type is None else format_type.name


class FrameworkParser:
"""
Thr parser for parsing framework files.

Args:
profiling_id (str): The profiling ID.
device_id (str): The device ID.
output_path (str): The directory of the parsed file. Default: `./`.
"""
_raw_data_dir = '/var/log/npu/profiling'
_regex_framework = r'Framework\.host\.(?P<data_type>.+)\.(?P<device_id>\d).+'
_regex_framework_in_data = r'Framework\.host\.(?P<data_type>.+)\.' \
r'(?P<device_id>\d)\.(?P<profiling_id>[a-zA-Z0-9]+).+'
_col_names = [
'task_id', 'stream_id', 'block_dim', 'full_op_name', 'op_name',
'op_type', 'subgraph', 'op_info'
]
_graph_attr_name = [
'input_format', 'input_data_type', 'input_shape', 'output_format',
'output_data_type', 'output_shape'
]

def __init__(self, profiling_id, device_id, output_path='./'):
self._profiling_path = self._get_raw_profiling_path(profiling_id)
self._backend_type = None
self._framework_path = {'graph': [], 'task': []}
self._search_file(profiling_id, device_id)
self._device_id = device_id
self._save_path = self._get_save_path(device_id, output_path)
self._task_id_full_op_name_dict = {}
self._task_cache = {}
self._parse_task_files()

@property
def save_path(self):
"""
The property of save path.

Returns:
str, the save path.
"""
return self._save_path

def to_task_id_full_op_name_dict(self):
"""
Get the task id and full operator name dict.

Returns:
dict, the task id and full operator name dict.
"""
return self._task_id_full_op_name_dict

def parse(self):
"""Parse the framework files."""
self._parse_graph_files_and_save(self._task_cache)
del self._task_cache

def _get_raw_profiling_path(self, profiling_id):
"""
Get raw profiling path.

Args:
profiling_id (str): The profiling ID.

Returns:
str, the raw profiling path.

Raises:
ProfilerPathErrorException: If the profiling path is invalid.
ProfilerDirNotFoundException: If the profiling dir is not found.
"""
profiling_path = os.path.join(self._raw_data_dir, profiling_id)
try:
profiling_path = validate_and_normalize_path(
profiling_path, 'profiler'
)
except ValidationError:
raise ProfilerPathErrorException('Profiling path is invalid.')
if not os.path.isdir(profiling_path):
raise ProfilerDirNotFoundException(profiling_path)
return profiling_path

def _search_file(self, profiling_id, device_id):
"""
Search all framework files in raw profiling path.

Args:
profiling_id (str): The profiling ID.
device_id (str): The device ID.

Raises:
ProfilerFileNotFoundException: If the framework files are not found.
"""
self._search_file_from_job_path(device_id)
self._search_file_from_data_path(profiling_id, device_id)

if self._backend_type is None:
raise ProfilerFileNotFoundException('Framework')
self._framework_path['graph'].sort()
self._framework_path['task'].sort()

def _search_file_from_job_path(self, device_id):
"""
Search framework files from job path.

Args:
device_id (str): The device ID.

Raises:
ProfilerRawFileException: If the framework file type is inconsistent.
ProfilerDeviceIdMismatchException: If the device id is mismatch
with framework in the raw dir.
"""
files = os.listdir(self._profiling_path)
for file in files:
pattern = re.search(self._regex_framework, file)
if not pattern or file.endswith('.done'):
continue
attrs = pattern.groupdict()

device_id_in_path = attrs.get('device_id')
if device_id_in_path != device_id:
raise ProfilerDeviceIdMismatchException()

data_type = attrs.get('data_type')
if data_type.startswith('vm.'):
if self._backend_type and self._backend_type != 'vm':
raise ProfilerRawFileException('Backend type is inconsistent.')
self._backend_type = 'vm'
data_type = data_type.split('.')[1]
else:
if self._backend_type and self._backend_type != 'ge':
raise ProfilerRawFileException('Backend type is inconsistent.')
self._backend_type = 'ge'
if data_type.startswith('graph_desc_info'):
self._framework_path['graph'].append(
os.path.join(self._profiling_path, file)
)
elif data_type.startswith('task_desc_info'):
self._framework_path['task'].append(
os.path.join(self._profiling_path, file)
)

def _search_file_from_data_path(self, profiling_id, device_id):
"""
Search framework files from data path.

Args:
profiling_id (str): The profiling ID.
device_id (str): The device ID.

Raises:
ProfilerRawFileException: If the framework file type is inconsistent.
ProfilerDeviceIdMismatchException: If the device id is mismatch
with framework in the raw dir.
"""
profiling_data_path = os.path.join(
self._raw_data_dir, 'container', device_id, 'data'
)
if not os.path.isdir(profiling_data_path):
return

files = os.listdir(profiling_data_path)
for file in files:
pattern = re.search(self._regex_framework_in_data, file)
if not pattern or file.endswith('.done') or file.endswith('.zip'):
continue
attrs = pattern.groupdict()

profiling_id_in_path = attrs.get('profiling_id')
if profiling_id_in_path != profiling_id:
continue

device_id_in_path = attrs.get('device_id')
if device_id_in_path != device_id:
raise ProfilerDeviceIdMismatchException()

data_type = attrs.get('data_type')
if data_type.startswith('vm.'):
if self._backend_type and self._backend_type != 'vm':
raise ProfilerRawFileException('Backend type is inconsistent.')
self._backend_type = 'vm'
data_type = data_type.split('.')[1]
else:
if self._backend_type and self._backend_type != 'ge':
raise ProfilerRawFileException('Backend type is inconsistent.')
self._backend_type = 'ge'
if data_type.startswith('graph_desc_info'):
self._framework_path['graph'].append(
os.path.join(profiling_data_path, file)
)
elif data_type.startswith('task_desc_info'):
self._framework_path['task'].append(
os.path.join(profiling_data_path, file)
)

def _get_save_path(self, device_id, output_path):
"""
Get the save path.

Args:
device_id (str): The device ID.
output_path (str): The output dir.

Returns:
str, the save path.

Raises:
ProfilerPathErrorException: If the output path is invalid.
ProfilerDirNotFoundException: If the output dir is not found.
"""
try:
output_dir = validate_and_normalize_path(output_path, 'profiler')
except ValidationError:
raise ProfilerPathErrorException('Output path is invalid.')
if not os.path.isdir(output_dir):
raise ProfilerDirNotFoundException(output_dir)
return os.path.join(
output_dir, '_'.join(['framework', 'raw', device_id]) + '.csv'
)

def _parse_task_files(self):
"""Parse the framework task files."""
for path in self._framework_path['task']:
with open(path, 'r') as file:
for task_info in file:
infos = task_info.strip('\n').split(' ')
# key is op name, values is task id, stream id, block_dim
self._task_cache[infos[0]] = [infos[2], infos[3], infos[1]]
self._task_id_full_op_name_dict[infos[2]] = infos[0]

def _parse_graph_files_and_save(self, task_cache):
"""
Parse the framework graph files and save the framework information.

Args:
task_cache (dict): The task information cache.
"""
with open(self._save_path, 'w') as save_file:
csv_writer = csv.writer(save_file)
csv_writer.writerow(self._col_names)
for path in self._framework_path['graph']:
with open(path, 'r') as graph_file:
for graph_info in graph_file:
result = self._parse_one_row_graph_info(graph_info)
task_info = task_cache.get(result[0])
task_info.extend(result)
csv_writer.writerow(task_info)
del task_cache[result[0]]

none_list = [None, None, None, None]
for key, value in task_cache.items():
value.append(key)
value.extend(none_list)
csv_writer.writerow(value)

def _parse_one_row_graph_info(self, row_info):
"""
Parse the graph information in one row.

Args:
row_info (str): One row graph information.

Returns:
list[str], the parsed graph information.
"""
full_op_name = None
op_name = None
subgraph_name = None
op_type = None
op_info = dict()
cur_op_info_key = None

infos = row_info.strip('\n').split(' ')
for info in infos:
attr_name, attr_value = info.split(':', 1)
if attr_name == 'op_name':
full_op_name = attr_value
subgraph_name = self._get_subgraph_name(full_op_name)
op_name = self._get_op_name(full_op_name, subgraph_name)
elif attr_name == 'op_type':
op_type = attr_value
elif attr_name in ['input_id', 'output_id']:
cur_op_info_key = '{}_{}'.format(
attr_name.split('_')[0], attr_value
)
op_info[cur_op_info_key] = dict()
elif attr_name in self._graph_attr_name:
op_attr = attr_name.split('_', 1)[1]
if op_attr == 'shape':
attr_value = attr_value.strip('"')
if self._backend_type == 'vm':
if op_attr == 'data_type':
attr_value = VmDataType.get_data_type_name(
int(attr_value)
)
else:
if op_attr == 'data_type':
attr_value = GeDataType.get_data_type_name(
int(attr_value)
)
elif op_attr == 'format':
attr_value = GeFormat.get_format_name(int(attr_value))

op_info[cur_op_info_key][op_attr] = attr_value

# the list info are full_op_name, op_name, op_type, subgraph, op_info
return [full_op_name, op_name, op_type, subgraph_name,
json.dumps(op_info)]

def _get_subgraph_name(self, full_op_name):
"""
Get subgraph name.

Args:
full_op_name (str): The full operator name.

Returns:
str, the subgraph name.
"""
subgraph_name = full_op_name.split('/', 1)[0]
if subgraph_name in ['Default', 'Gradients']:
return subgraph_name
return None

def _get_op_name(self, full_op_name, subgraph_name):
"""
Get operator name.

Args:
full_op_name (str): The full operator name.
subgraph_name (str): The subgraph name.

Returns:
str, the operator name.
"""
if subgraph_name is None:
return full_op_name

if self._backend_type == 'vm':
return full_op_name.split('/')[-1]

strs = full_op_name.split(subgraph_name + '/')
op_name = None
for name_str in strs:
if not name_str:
continue
if op_name is None:
op_name = name_str.split('/')[-1]
else:
op_name = '+'.join([op_name, name_str.split('/')[-1]])
return op_name

+ 5
- 0
mindinsight/utils/constant.py View File

@@ -29,6 +29,7 @@ class MindInsightModules(Enum):
GENERAL = 0
LINEAGEMGR = 2
DATAVISUAL = 5
PROFILERMGR = 6


class GeneralErrors(Enum):
@@ -43,6 +44,10 @@ class GeneralErrors(Enum):
URL_DECODE_ERROR = 10


class ProfilerMgrErrors(Enum):
"""Enum definition for profiler errors."""


class LineageMgrErrors(Enum):
"""Enum definition for lineage errors."""



+ 2
- 1
requirements.txt View File

@@ -12,4 +12,5 @@ numpy>=1.17.0
protobuf>=3.8.0
psutil>=5.6.1
six>=1.12.0
Werkzeug>=1.0.0
Werkzeug>=1.0.0
tabulate>=0.8.6

Loading…
Cancel
Save