Browse Source

profiler mem usage: added memory breakdowns for each execution id

tags/v1.2.0-rc1
zhangyunshu 4 years ago
parent
commit
aa42728e84
6 changed files with 213 additions and 585 deletions
  1. +2
    -2
      mindspore/ccsrc/CMakeLists.txt
  2. +0
    -15
      mindspore/profiler/common/proto_files/__init__.py
  3. +0
    -50
      mindspore/profiler/common/proto_files/memory_usage.proto
  4. +0
    -295
      mindspore/profiler/common/proto_files/memory_usage_pb2.py
  5. +18
    -45
      mindspore/profiler/parser/container.py
  6. +193
    -178
      mindspore/profiler/parser/memory_usage_parser.py

+ 2
- 2
mindspore/ccsrc/CMakeLists.txt View File

@@ -126,8 +126,8 @@ list(APPEND MINDSPORE_PROTO_LIST ${COMM_PROTO_SRCS})
include_directories("${CMAKE_BINARY_DIR}/profiler/device/common")
file(GLOB_RECURSE PROFILER_PROTO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"profiler/device/common/memory_profiling.proto")
ms_protobuf_generate(PROFILER_MEM_PROTO_SRCS PROFILER_MEM_PROTO_HDRS ${PROFILER_PROTO_LIST})
list(APPEND MINDSPORE_PROTO_LIST ${PROFILER_MEM_PROTO_SRCS})
ms_protobuf_generate_py(PROFILER_MEM_PROTO_PY PROFILER_MEM_PROTO_HDRS_PY PROFILER_MEM_PROTO_PYS ${PROFILER_PROTO_LIST})
list(APPEND MINDSPORE_PROTO_LIST ${PROFILER_MEM_PROTO_PY})

if(ENABLE_DEBUGGER)
# debugger: compile proto files


+ 0
- 15
mindspore/profiler/common/proto_files/__init__.py View File

@@ -1,15 +0,0 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""The proto files for profiler."""

+ 0
- 50
mindspore/profiler/common/proto_files/memory_usage.proto View File

@@ -1,50 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

syntax = "proto3";

package profiler;

message MemoryProto {
repeated GraphMemProto graph_mem = 1; // memory usage of multiple graphs
int64 total_mem = 2; // total allocated memory on device
}

message GraphMemProto {
int64 graph_id = 1; // graph id
int64 static_mem = 2; // size of allocated static memory for current graph
repeated NodeMemProto node_mems = 3; // execution nodes
repeated TensorMemProto tensor_mems = 4; // all tensors
string fp_start = 5; // node name of fp start
string bp_end = 6; // node name of bp end
}

message NodeMemProto {
string node_name = 1; // node name
int64 node_id = 2; // node id with respect to the execution order
repeated int64 input_tensor_id = 3; // input tensor id
repeated int64 output_tensor_id = 4; // output tensor id
repeated int64 workspace_tensor_id = 5; // workspace tensor id
}

message TensorMemProto {
int64 tensor_id = 1; // tensor id
int64 size = 2; // aligned tensor size
string type = 3; // tensor type, e.g. Common, OutputOnly
int64 life_start = 4; // node id at which memory allocated
int64 life_end = 5; // node id at which memory deallocated
string life_long = 6; // the type of tensor lifetime, e.g. LifeLongGraphAll
}

+ 0
- 295
mindspore/profiler/common/proto_files/memory_usage_pb2.py View File

@@ -1,295 +0,0 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: mindspore/profiler/common/proto_files/memory_usage.proto

import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)

_sym_db = _symbol_database.Default()




DESCRIPTOR = _descriptor.FileDescriptor(
name='mindspore/profiler/common/proto_files/memory_usage.proto',
package='profiler',
syntax='proto3',
serialized_options=None,
serialized_pb=_b('\n8mindspore/profiler/common/proto_files/memory_usage.proto\x12\x08profiler\"L\n\x0bMemoryProto\x12*\n\tgraph_mem\x18\x01 \x03(\x0b\x32\x17.profiler.GraphMemProto\x12\x11\n\ttotal_mem\x18\x02 \x01(\x03\"\xb1\x01\n\rGraphMemProto\x12\x10\n\x08graph_id\x18\x01 \x01(\x03\x12\x12\n\nstatic_mem\x18\x02 \x01(\x03\x12)\n\tnode_mems\x18\x03 \x03(\x0b\x32\x16.profiler.NodeMemProto\x12-\n\x0btensor_mems\x18\x04 \x03(\x0b\x32\x18.profiler.TensorMemProto\x12\x10\n\x08\x66p_start\x18\x05 \x01(\t\x12\x0e\n\x06\x62p_end\x18\x06 \x01(\t\"\x82\x01\n\x0cNodeMemProto\x12\x11\n\tnode_name\x18\x01 \x01(\t\x12\x0f\n\x07node_id\x18\x02 \x01(\x03\x12\x17\n\x0finput_tensor_id\x18\x03 \x03(\x03\x12\x18\n\x10output_tensor_id\x18\x04 \x03(\x03\x12\x1b\n\x13workspace_tensor_id\x18\x05 \x03(\x03\"x\n\x0eTensorMemProto\x12\x11\n\ttensor_id\x18\x01 \x01(\x03\x12\x0c\n\x04size\x18\x02 \x01(\x03\x12\x0c\n\x04type\x18\x03 \x01(\t\x12\x12\n\nlife_start\x18\x04 \x01(\x03\x12\x10\n\x08life_end\x18\x05 \x01(\x03\x12\x11\n\tlife_long\x18\x06 \x01(\tb\x06proto3')
)




_MEMORYPROTO = _descriptor.Descriptor(
name='MemoryProto',
full_name='profiler.MemoryProto',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='graph_mem', full_name='profiler.MemoryProto.graph_mem', index=0,
number=1, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='total_mem', full_name='profiler.MemoryProto.total_mem', index=1,
number=2, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=70,
serialized_end=146,
)


_GRAPHMEMPROTO = _descriptor.Descriptor(
name='GraphMemProto',
full_name='profiler.GraphMemProto',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='graph_id', full_name='profiler.GraphMemProto.graph_id', index=0,
number=1, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='static_mem', full_name='profiler.GraphMemProto.static_mem', index=1,
number=2, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='node_mems', full_name='profiler.GraphMemProto.node_mems', index=2,
number=3, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='tensor_mems', full_name='profiler.GraphMemProto.tensor_mems', index=3,
number=4, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='fp_start', full_name='profiler.GraphMemProto.fp_start', index=4,
number=5, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='bp_end', full_name='profiler.GraphMemProto.bp_end', index=5,
number=6, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=149,
serialized_end=326,
)


_NODEMEMPROTO = _descriptor.Descriptor(
name='NodeMemProto',
full_name='profiler.NodeMemProto',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='node_name', full_name='profiler.NodeMemProto.node_name', index=0,
number=1, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='node_id', full_name='profiler.NodeMemProto.node_id', index=1,
number=2, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='input_tensor_id', full_name='profiler.NodeMemProto.input_tensor_id', index=2,
number=3, type=3, cpp_type=2, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='output_tensor_id', full_name='profiler.NodeMemProto.output_tensor_id', index=3,
number=4, type=3, cpp_type=2, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='workspace_tensor_id', full_name='profiler.NodeMemProto.workspace_tensor_id', index=4,
number=5, type=3, cpp_type=2, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=329,
serialized_end=459,
)


_TENSORMEMPROTO = _descriptor.Descriptor(
name='TensorMemProto',
full_name='profiler.TensorMemProto',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='tensor_id', full_name='profiler.TensorMemProto.tensor_id', index=0,
number=1, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='size', full_name='profiler.TensorMemProto.size', index=1,
number=2, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='type', full_name='profiler.TensorMemProto.type', index=2,
number=3, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='life_start', full_name='profiler.TensorMemProto.life_start', index=3,
number=4, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='life_end', full_name='profiler.TensorMemProto.life_end', index=4,
number=5, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='life_long', full_name='profiler.TensorMemProto.life_long', index=5,
number=6, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=461,
serialized_end=581,
)

_MEMORYPROTO.fields_by_name['graph_mem'].message_type = _GRAPHMEMPROTO
_GRAPHMEMPROTO.fields_by_name['node_mems'].message_type = _NODEMEMPROTO
_GRAPHMEMPROTO.fields_by_name['tensor_mems'].message_type = _TENSORMEMPROTO
DESCRIPTOR.message_types_by_name['MemoryProto'] = _MEMORYPROTO
DESCRIPTOR.message_types_by_name['GraphMemProto'] = _GRAPHMEMPROTO
DESCRIPTOR.message_types_by_name['NodeMemProto'] = _NODEMEMPROTO
DESCRIPTOR.message_types_by_name['TensorMemProto'] = _TENSORMEMPROTO
_sym_db.RegisterFileDescriptor(DESCRIPTOR)

MemoryProto = _reflection.GeneratedProtocolMessageType('MemoryProto', (_message.Message,), {
'DESCRIPTOR' : _MEMORYPROTO,
'__module__' : 'mindspore.profiler.common.proto_files.memory_usage_pb2'
# @@protoc_insertion_point(class_scope:profiler.MemoryProto)
})
_sym_db.RegisterMessage(MemoryProto)

GraphMemProto = _reflection.GeneratedProtocolMessageType('GraphMemProto', (_message.Message,), {
'DESCRIPTOR' : _GRAPHMEMPROTO,
'__module__' : 'mindspore.profiler.common.proto_files.memory_usage_pb2'
# @@protoc_insertion_point(class_scope:profiler.GraphMemProto)
})
_sym_db.RegisterMessage(GraphMemProto)

NodeMemProto = _reflection.GeneratedProtocolMessageType('NodeMemProto', (_message.Message,), {
'DESCRIPTOR' : _NODEMEMPROTO,
'__module__' : 'mindspore.profiler.common.proto_files.memory_usage_pb2'
# @@protoc_insertion_point(class_scope:profiler.NodeMemProto)
})
_sym_db.RegisterMessage(NodeMemProto)

TensorMemProto = _reflection.GeneratedProtocolMessageType('TensorMemProto', (_message.Message,), {
'DESCRIPTOR' : _TENSORMEMPROTO,
'__module__' : 'mindspore.profiler.common.proto_files.memory_usage_pb2'
# @@protoc_insertion_point(class_scope:profiler.TensorMemProto)
})
_sym_db.RegisterMessage(TensorMemProto)


# @@protoc_insertion_point(module_scope)

+ 18
- 45
mindspore/profiler/parser/container.py View File

@@ -13,8 +13,6 @@
# limitations under the License.
# ============================================================================
"""The container of metadata used in profiler parser."""
import heapq

GIGABYTES = 1024 * 1024 * 1024


@@ -131,6 +129,7 @@ class MemoryGraph:
self.bp_end = None
self.lines = []
self.nodes = {}
self.breakdowns = []

def to_dict(self):
"""Convert Graph to dict."""
@@ -140,7 +139,8 @@ class MemoryGraph:
'nodes': self.nodes,
'fp_start': self.fp_start,
'bp_end': self.bp_end,
'lines': self.lines
'lines': self.lines,
'breakdowns': self.breakdowns
}

return graph
@@ -152,17 +152,15 @@ class MemoryNode:

Args:
node_proto (proto): Node proto.
graph_id (int): Graph id.
"""
def __init__(self, node_proto, graph_id):
def __init__(self, node_proto):
self._node_proto = node_proto
self.graph_id = graph_id
self.node_id = node_proto.node_id
self.name = node_proto.node_name
self.fullname = ""
self.input_ids = [t_id for t_id in node_proto.input_tensor_id]
self.output_ids = [t_id for t_id in node_proto.output_tensor_id]
self.workspace_ids = [t_id for t_id in node_proto.workspace_tensor_id]
self.input_ids = list(node_proto.input_tensor_id)
self.output_ids = list(node_proto.output_tensor_id)
self.workspace_ids = list(node_proto.workspace_tensor_id)
self.inputs = []
self.outputs = []
self.workspaces = []
@@ -181,8 +179,7 @@ class MemoryNode:
'size': self.size,
'allocated': self.mem_change,
'inputs': self.inputs,
'outputs': self.outputs,
'workspaces': self.workspaces
'outputs': self.outputs
}

return node
@@ -194,9 +191,8 @@ class MemoryTensor:

Args:
tensor_proto (proto): Tensor proto.
graph_id (int): Graph id.
"""
def __init__(self, tensor_proto, graph_id):
def __init__(self, tensor_proto):
self._tensor_proto = tensor_proto
self.tensor_id = tensor_proto.tensor_id
self.life_long = tensor_proto.life_long
@@ -204,48 +200,25 @@ class MemoryTensor:
self.life_end = tensor_proto.life_end
self.size = tensor_proto.size / GIGABYTES
self.type = tensor_proto.type
self.graph_id = graph_id
self.shape = ""
self.format = ""
self.dtype = ""
self.source_node = ""
self.name = ""

def to_dict(self):
"""Convert Tensor to a dict."""
tensor = {
'tensor_name': self.name,
'tensor_id': self.tensor_id,
'size': self.size,
'type': self.type,
'shape': self.shape,
'format': self.format,
'data_type': self.dtype,
'life_long': self.life_long,
'life_start': self.life_start,
'life_end': self.life_end
}

return tensor


class MemoryQueue:
"""
A priority queue to keep specified number of active nodes in memory activities.

Args:
size (int): The upper limit of nodes to be saved.
"""
def __init__(self, size):
self._queue = []
self._index = 0
self._size = size

def push(self, item, priority):
"""
Push a node into MemoryQueue.

Args:
item (tuple): Node item including id, name, etc.
priority (int): The priority of the item.
"""
if self._index < self._size:
heapq.heappush(self._queue, (-priority, item))
self._index += 1
else:
heapq.heappushpop(self._queue, (-priority, item))

def get_items(self):
"""Get the elements in MemoryQueue."""
return self._queue

+ 193
- 178
mindspore/profiler/parser/memory_usage_parser.py View File

@@ -13,6 +13,7 @@
# limitations under the License.
# ============================================================================
"""Memory Usage Parser."""
from collections import OrderedDict
import json
import os
import stat
@@ -22,12 +23,11 @@ from google.protobuf.text_format import ParseError
from mindspore import log as logger
from mindspore.profiler.common.exceptions.exceptions import ProfilerIOException, \
ProfilerFileNotFoundException, ProfilerRawFileException
from mindspore.profiler.common.proto_files.memory_usage_pb2 import MemoryProto
from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path
from mindspore.profiler.parser.container import MemoryGraph as Graph
from mindspore.profiler.parser.container import MemoryNode as Node
from mindspore.profiler.parser.container import MemoryQueue
from mindspore.profiler.parser.container import MemoryTensor as Tensor
from mindspore.train.memory_profiling_pb2 import MemoryProto

GIGABYTES = 1024 * 1024 * 1024

@@ -47,11 +47,10 @@ class MemoryUsageParser:
'allocations': 0,
'deallocations': 0,
'peak_mem': 0,
'static_mem': 0,
'breakdowns': []
'static_mem': 0
}
self._active_nodes = MemoryQueue(size=10)
self._framework = {}
self._points = {}

def _get_file_path(self):
"""Get the proto file path."""
@@ -73,7 +72,9 @@ class MemoryUsageParser:
logger.info("Start to load memory usage data from pb file")
file_path = self._get_file_path()
self._framework = self._process_framework_info(aicore_detail_data)
self._points = points

# Open memory protobuf file.
try:
with open(file_path, 'rb') as f:
content = f.read()
@@ -81,106 +82,175 @@ class MemoryUsageParser:
logger.error('Failed to read memory file: %s', err)
raise ProfilerIOException

model_proto = MemoryProto()
# Parse memory raw data from file.
memory_proto = MemoryProto()
try:
model_proto.ParseFromString(content)
memory_proto.ParseFromString(content)
except ParseError as err:
msg = "Fail to parse memory proto file."
logger.error("Cannot parse the memory file. Please check the file schema.\n%s", err)
raise ProfilerRawFileException(msg)

graphs = model_proto.graph_mem
self._graphs_dict = self._parse_graphs(graphs, points)
self._mem_summary['capacity'] = model_proto.total_mem / GIGABYTES
# Parse memory details based on graphs in the network.
graphs = memory_proto.graph_mem
self._parse_graph_memory(graphs)
# Update memory summary information.
self._mem_summary['capacity'] = memory_proto.total_mem / GIGABYTES
self._mem_summary['peak_mem'] = self._peak_mem
self._process_memory_breakdowns()

logger.info('Finished processing memory usage data.')

def _parse_graphs(self, graphs, points):
"""Parse subgraphs."""
graphs_dict = {}
def _parse_graph_memory(self, graphs):
"""Parse memory usage based on subgraphs."""
for graph_proto in graphs:
graph_id = graph_proto.graph_id
if graph_id is None:
logger.info('Graph id is missing, skipped the graph.')
continue

graph = Graph(graph_proto)
graph_parser = GraphMemoryParser(graph_proto, self._points, self._framework)
graph = graph_parser.parse_graph()
if graph:
self._graphs_dict[graph_id] = graph

# process tensors in the graph
tensors_proto = graph_proto.tensor_mems
if not tensors_proto:
logger.info('No tensor in graph %s, skipped.', graph_id)
continue
tensors_dict = self._parse_tensors(tensors_proto, graph_id)
# update global memory usage data
self._peak_mem = max(self._peak_mem, graph_parser.peak_mem)
self._mem_summary['static_mem'] += graph_parser.static_mem
self._mem_summary['allocations'] += graph_parser.allocations
self._mem_summary['deallocations'] += graph_parser.deallocations

# calculate memory usage of the graph by number of nodes and details of tensors
nodes_proto = graph_proto.node_mems
# init memory usage list with static memory
mem_change = [graph.static_mem for _ in range(len(nodes_proto))]
self._calc_mem_change(mem_change, tensors_dict)
graph.lines = mem_change
def _write_memory_files(self, filename, content):
"""Write the summary and top breakdowns of memory usage."""
file_path = os.path.join(self._profiling_dir, filename)
file_path = validate_and_normalize_path(file_path)

# process nodes in graph
graph.nodes = self._parse_nodes(
nodes_proto, mem_change, tensors_dict, graph
)
try:
with open(file_path, 'w') as json_file:
json.dump(content, json_file)
os.chmod(file_path, stat.S_IREAD | stat.S_IWRITE)
except (IOError, OSError) as err:
logger.error('Fail to write memory file.\n%s', err)
raise ProfilerIOException

# update fp_start and bp_end
point_id = self._locate_fp_bp_id(points, graph.nodes)
graph.fp_start = point_id.get('fp_start')
graph.bp_end = point_id.get('bp_end')
def write_memory_files(self):
"""Write memory files."""
logger.info('Start recording memory data into files...')
# write memory summary to json file
summary_filename = self._summary_filename.format(self._device_id)
self._write_memory_files(summary_filename, self._mem_summary)

graphs_dict.update({graph_id: graph.to_dict()})
# write memory details to json file
details_filename = self._details_filename.format(self._device_id)
self._write_memory_files(details_filename, self._graphs_dict)
logger.info('Successfully write memory data into files.')

self._mem_summary['static_mem'] += graph.static_mem
self._mem_summary['allocations'] += len(tensors_dict) + 1
self._mem_summary['deallocations'] += len(tensors_dict) + 1
self._peak_mem = max(max(mem_change), self._peak_mem)
@staticmethod
def _process_framework_info(aicore_detail_data):
"""Process framework info."""
framework_info_dict = {}
for framework_obj in aicore_detail_data:
op_name = framework_obj[0]
op_full_name = framework_obj[4]
op_info = framework_obj[5]
framework_info_dict[op_name] = {
'fullname': op_full_name,
'name': op_name,
'args': op_info
}

return framework_info_dict

return graphs_dict

@staticmethod
def _parse_tensors(tensors_proto, graph_id):
class GraphMemoryParser:
"""Parse memory usage data for each graph."""
def __init__(self, graph_proto, points, framework):
self.graph = None
self.nodes = OrderedDict()
self.tensors = OrderedDict()
self._framework = framework
self._points = points
self._graph_proto = graph_proto
self.peak_mem = 0
self.static_mem = 0
self.allocations = 0
self.deallocations = 0
self._mem_change = []
self.breakdowns = []
self._lifetime = []

def parse_graph(self):
"""Parse memory usage data for subgraphs."""
graph_dict = {}
self.graph = Graph(self._graph_proto)
# process tensors in the graph
tensors_proto = self._graph_proto.tensor_mems
if not tensors_proto:
logger.info('No tensor in graph %s, skipped.', self.graph.graph_id)
return graph_dict
self._parse_tensors(tensors_proto)

# calculate memory usage of the graph by number of nodes and details of tensors
nodes_proto = self._graph_proto.node_mems
# init memory usage list with static memory
self._mem_change = [self.graph.static_mem for _ in range(len(nodes_proto))]
self._lifetime = [[] for _ in range(len(nodes_proto))]
self._calc_mem_change() # update self._mem_change and self._lifetime
self.graph.lines = self._mem_change

# process nodes in graph
self.graph.nodes = self._parse_nodes(nodes_proto)

self._process_memory_breakdowns()
self.graph.breakdowns = self.breakdowns

# update fp_start and bp_end
point_id = self._locate_fp_bp_id()
self.graph.fp_start = point_id.get('fp_start')
self.graph.bp_end = point_id.get('bp_end')

graph_dict = self.graph.to_dict()

self.static_mem = self.graph.static_mem
self.allocations = len(self.tensors)
self.deallocations = len(self.tensors)
self.peak_mem = max(max(self._mem_change), self.peak_mem)

return graph_dict

def _parse_tensors(self, tensors_proto):
"""Parse tensors."""
tensors_dict = {}
for tensor_proto in tensors_proto:
tensor = Tensor(tensor_proto, graph_id)
tensors_dict.update({tensor.tensor_id: tensor})
tensor = Tensor(tensor_proto)
self.tensors.update({tensor.tensor_id: tensor})

return tensors_dict

def _parse_nodes(self, nodes_proto, mem_change, tensors_dict, graph):
def _parse_nodes(self, nodes_proto):
"""Parse nodes."""
nodes_dict = {}
nodes_list = []
for index, node_proto in enumerate(nodes_proto):
node = Node(node_proto, graph.graph_id)
tensors = set(node.output_ids + node.workspace_ids)
node.size = self._calc_node_memory(tensors, tensors_dict)
node.allocations = len(tensors)
node.deallocations = len(tensors)
node = Node(node_proto)
# Calculate memory size allocated for this node
tensor_ids = set(node.output_ids + node.workspace_ids)
node.size = self._calc_node_memory(tensor_ids)
node.allocations = len(tensor_ids)
node.deallocations = len(tensor_ids)

# calculate the allocated/deallocated memory size on the node
if index == 0:
node.mem_change = mem_change[index] - graph.static_mem
node.mem_change = self._mem_change[index] - self.graph.static_mem
else:
node.mem_change = mem_change[index] - mem_change[index-1]

self._update_nodes(node, tensors_dict)
nodes_dict[node.name] = node.to_dict()
node.mem_change = self._mem_change[index] - self._mem_change[index-1]

# update active nodes
self._active_nodes.push(
item=(node.name, node.node_id, node.size, graph.graph_id),
priority=-node.size # priority is the negative value of node size
)
self._update_nodes(node)
self._update_tensor_source(node)
self.nodes[node.name] = node
nodes_list.append(node.to_dict())

return nodes_dict
return nodes_list

def _update_nodes(self, node, tensors_dict):
def _update_nodes(self, node):
"""Update nodes."""
skipped = self._find_conflict_tensors(node)
# Remove duplicate tensors
self._remove_duplicate_tensors(node)
name = node.name
if self._framework and name in self._framework:
node_frame = self._framework[name]
@@ -192,59 +262,38 @@ class MemoryUsageParser:
else:
node.outputs.append(value)

node.inputs = self._fill_tensor_dict(
node.inputs, node.input_ids, tensors_dict, 'input'
)
node.outputs = self._fill_tensor_dict(
node.outputs, node.output_ids, tensors_dict, 'output'
)
node.workspaces = self._fill_tensor_dict(
node.workspaces, node.workspace_ids, tensors_dict, 'workspace', skipped
)
def _update_tensor_source(self, node):
"""Update source node for tensors."""
for t_id in node.output_ids:
tensor = self.tensors.get(t_id)
tensor.source_node = node.name

@staticmethod
def _find_conflict_tensors(node):
def _remove_duplicate_tensors(node):
"""Find conflict tensors in node."""
output_list = []
if node.output_ids:
output_list = node.output_ids
skipped = []
if node.workspace_ids:
for t_id in node.workspace_ids:
if t_id in output_list:
skipped.append(t_id)

return skipped

@staticmethod
def _fill_tensor_dict(node_ios, tensor_ids, tensors_dict, tensor_type, skipped=None):
"""Fill tensor dict."""
full_list = []
for t_id, io_dict in zip(tensor_ids, node_ios):
if tensor_type == 'workspace' and t_id in skipped:
continue
tensor = tensors_dict.get(t_id)
tensor.type = tensor_type
io_dict.update(tensor.to_dict())
full_list.append(io_dict)

return full_list

@staticmethod
def _calc_node_memory(tensors, tensors_dict):
i = 0
while i < len(node.workspace_ids):
t_id = node.workspace_ids[i]
if t_id in node.output_ids:
del node.workspace_ids[i] # remove duplicate tensor
continue
i += 1

def _calc_node_memory(self, tensor_ids):
"""Calculate the allocated memory for the node."""
node_mem = 0
for t_id in tensors:
tensor = tensors_dict[t_id]
for t_id in tensor_ids:
tensor = self.tensors[t_id]
size = tensor.size
node_mem += size

return node_mem

def _calc_mem_change(self, mem_change, tensors_dict):
def _calc_mem_change(self):
"""Calculate the memory change for the subgraph."""
node_num = len(mem_change)
for tensor_id, tensor in tensors_dict.items():
node_num = len(self._mem_change)
for tensor_id, tensor in self.tensors.items():
life_long = tensor.life_long
life_start = tensor.life_start
life_end = tensor.life_end
@@ -255,101 +304,67 @@ class MemoryUsageParser:
if life_long == 'LifeLongGraphAll': # lifetime is from graph start to graph end
tensor.life_start = 0
tensor.life_end = node_num
self._update_mem_change(mem_change, size, 0, node_num)
self._update_mem_change(size, 0, node_num, tensor_id)
elif life_long == 'LifeLongGraphStart': # lifetime is from graph start to tensor end
if life_end is not None and life_end >= 0:
tensor.life_start = 0
self._update_mem_change(mem_change, size, 0, life_end+1)
self._update_mem_change(size, 0, life_end+1, tensor_id)
else:
logger.info('Cannot locate lifetime end for tensor: %s', tensor_id)
elif life_long == 'LifeLongGraphEnd': # lifetime is from tensor start to graph end
if life_start is not None and life_start <= node_num:
tensor.life_end = node_num
self._update_mem_change(mem_change, size, life_start, node_num)
self._update_mem_change(size, life_start, node_num, tensor_id)
else:
logger.info('Cannot locate lifetime start for tensor: %s', tensor_id)
elif life_long == 'LifeLongNone': # lifetime is from tensor start to tensor end
if life_start is not None and life_end is not None and life_start <= life_end:
self._update_mem_change(mem_change, size, life_start, life_end+1)
self._update_mem_change(size, life_start, life_end+1, tensor_id)
else:
logger.info('Cannot locate lifetime start or end for tensor: %s', tensor_id)

@staticmethod
def _update_mem_change(mem_change, size, start, end):
def _update_mem_change(self, size, start, end, tensor_id):
"""Update memory change for the subgraph."""
for i in range(start, end):
mem_change[i] += size
self._mem_change[i] += size
# Update tensor lifetime list.
self._lifetime[i].append(tensor_id)

@staticmethod
def _locate_fp_bp_id(points, nodes):
def _locate_fp_bp_id(self):
"""Locate the node id of fp_start and bp_end in graph."""
point_id = {
'fp_start': None,
'bp_end': None
}
fp_start = points.get('fp_start')
bp_end = points.get('bp_end')
fp_start = self._points.get('fp_start')
bp_end = self._points.get('bp_end')
fp_name = fp_start.split('/')[-1] if fp_start else ""
bp_name = bp_end.split('/')[-1] if bp_end else ""
if fp_name in nodes:
point_id['fp_start'] = nodes[fp_name].get('node_id')
if bp_name in nodes:
point_id['bp_end'] = nodes[bp_name].get('node_id')
if fp_name in self.nodes:
point_id['fp_start'] = self.nodes[fp_name].node_id
if bp_name in self.nodes:
point_id['bp_end'] = self.nodes[bp_name].node_id

return point_id

def _write_memory_files(self, filename, content):
"""Write the summary and top breakdowns of memory usage."""
file_path = os.path.join(self._profiling_dir, filename)
file_path = validate_and_normalize_path(file_path)

try:
with open(file_path, 'w') as json_file:
json.dump(content, json_file)
os.chmod(file_path, stat.S_IREAD | stat.S_IWRITE)
except (IOError, OSError) as err:
logger.error('Fail to write memory file.\n%s', err)
raise ProfilerIOException

def write_memory_files(self):
"""Write memory files."""
logger.info('Start recording memory data into files...')
# write memory summary to json file
summary = self._summary_filename.format(self._device_id)
self._write_memory_files(summary, self._mem_summary)

# write memory details to json file
details = self._details_filename.format(self._device_id)
self._write_memory_files(details, self._graphs_dict)
logger.info('Successfully write memory data into files.')

def _process_memory_breakdowns(self):
"""Process memory breakdowns."""
breakdowns = []
active_nodes = self._active_nodes.get_items()
for _, node_meta in active_nodes:
node_name, _, _, graph_id = node_meta
graph = self._graphs_dict[graph_id]
nodes_dict = graph.get('nodes')
node = nodes_dict.get(node_name)
if 'inputs' in node:
node.pop('inputs')
breakdowns.append(node)

self._mem_summary['breakdowns'] = breakdowns

@staticmethod
def _process_framework_info(aicore_detail_data):
"""Process framework info."""
framework_info_dict = {}
for framework_obj in aicore_detail_data:
op_name = framework_obj[0]
op_full_name = framework_obj[4]
op_info = framework_obj[5]
framework_info_dict[op_name] = {
'fullname': op_full_name,
'name': op_name,
'args': op_info
}

return framework_info_dict
"""Process memory breakdowns for each node."""
self.breakdowns = [[] for _ in range(len(self.nodes))]
for index, breakdown in enumerate(self._lifetime):
for t_id in breakdown:
tensor = self.tensors.get(t_id)
source_node = tensor.source_node
if not source_node:
continue
node = self.nodes.get(source_node)
for i, output_id in enumerate(node.output_ids):
if t_id == output_id:
output = node.outputs[i] if i < len(node.outputs) else {}
tensor.name = node.name + ':' + str(i)
tensor.shape = output.get('shape')
tensor.dtype = output.get('data_type')
tensor.format = output.get('format')
tensor.type = 'output'

tensor_dict = tensor.to_dict()
self.breakdowns[index].append(tensor_dict)

Loading…
Cancel
Save