Browse Source

profiler mem usage: added memory breakdowns for each execution id

tags/v1.2.0-rc1
zhangyunshu 4 years ago
parent
commit
aa42728e84
6 changed files with 213 additions and 585 deletions
  1. +2
    -2
      mindspore/ccsrc/CMakeLists.txt
  2. +0
    -15
      mindspore/profiler/common/proto_files/__init__.py
  3. +0
    -50
      mindspore/profiler/common/proto_files/memory_usage.proto
  4. +0
    -295
      mindspore/profiler/common/proto_files/memory_usage_pb2.py
  5. +18
    -45
      mindspore/profiler/parser/container.py
  6. +193
    -178
      mindspore/profiler/parser/memory_usage_parser.py

+ 2
- 2
mindspore/ccsrc/CMakeLists.txt View File

@@ -126,8 +126,8 @@ list(APPEND MINDSPORE_PROTO_LIST ${COMM_PROTO_SRCS})
include_directories("${CMAKE_BINARY_DIR}/profiler/device/common") include_directories("${CMAKE_BINARY_DIR}/profiler/device/common")
file(GLOB_RECURSE PROFILER_PROTO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} file(GLOB_RECURSE PROFILER_PROTO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"profiler/device/common/memory_profiling.proto") "profiler/device/common/memory_profiling.proto")
ms_protobuf_generate(PROFILER_MEM_PROTO_SRCS PROFILER_MEM_PROTO_HDRS ${PROFILER_PROTO_LIST})
list(APPEND MINDSPORE_PROTO_LIST ${PROFILER_MEM_PROTO_SRCS})
ms_protobuf_generate_py(PROFILER_MEM_PROTO_PY PROFILER_MEM_PROTO_HDRS_PY PROFILER_MEM_PROTO_PYS ${PROFILER_PROTO_LIST})
list(APPEND MINDSPORE_PROTO_LIST ${PROFILER_MEM_PROTO_PY})


if(ENABLE_DEBUGGER) if(ENABLE_DEBUGGER)
# debugger: compile proto files # debugger: compile proto files


+ 0
- 15
mindspore/profiler/common/proto_files/__init__.py View File

@@ -1,15 +0,0 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""The proto files for profiler."""

+ 0
- 50
mindspore/profiler/common/proto_files/memory_usage.proto View File

@@ -1,50 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

syntax = "proto3";

package profiler;

message MemoryProto {
repeated GraphMemProto graph_mem = 1; // memory usage of multiple graphs
int64 total_mem = 2; // total allocated memory on device
}

message GraphMemProto {
int64 graph_id = 1; // graph id
int64 static_mem = 2; // size of allocated static memory for current graph
repeated NodeMemProto node_mems = 3; // execution nodes
repeated TensorMemProto tensor_mems = 4; // all tensors
string fp_start = 5; // node name of fp start
string bp_end = 6; // node name of bp end
}

message NodeMemProto {
string node_name = 1; // node name
int64 node_id = 2; // node id with respect to the execution order
repeated int64 input_tensor_id = 3; // input tensor id
repeated int64 output_tensor_id = 4; // output tensor id
repeated int64 workspace_tensor_id = 5; // workspace tensor id
}

message TensorMemProto {
int64 tensor_id = 1; // tensor id
int64 size = 2; // aligned tensor size
string type = 3; // tensor type, e.g. Common, OutputOnly
int64 life_start = 4; // node id at which memory allocated
int64 life_end = 5; // node id at which memory deallocated
string life_long = 6; // the type of tensor lifetime, e.g. LifeLongGraphAll
}

+ 0
- 295
mindspore/profiler/common/proto_files/memory_usage_pb2.py View File

@@ -1,295 +0,0 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: mindspore/profiler/common/proto_files/memory_usage.proto

import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)

_sym_db = _symbol_database.Default()




DESCRIPTOR = _descriptor.FileDescriptor(
name='mindspore/profiler/common/proto_files/memory_usage.proto',
package='profiler',
syntax='proto3',
serialized_options=None,
serialized_pb=_b('\n8mindspore/profiler/common/proto_files/memory_usage.proto\x12\x08profiler\"L\n\x0bMemoryProto\x12*\n\tgraph_mem\x18\x01 \x03(\x0b\x32\x17.profiler.GraphMemProto\x12\x11\n\ttotal_mem\x18\x02 \x01(\x03\"\xb1\x01\n\rGraphMemProto\x12\x10\n\x08graph_id\x18\x01 \x01(\x03\x12\x12\n\nstatic_mem\x18\x02 \x01(\x03\x12)\n\tnode_mems\x18\x03 \x03(\x0b\x32\x16.profiler.NodeMemProto\x12-\n\x0btensor_mems\x18\x04 \x03(\x0b\x32\x18.profiler.TensorMemProto\x12\x10\n\x08\x66p_start\x18\x05 \x01(\t\x12\x0e\n\x06\x62p_end\x18\x06 \x01(\t\"\x82\x01\n\x0cNodeMemProto\x12\x11\n\tnode_name\x18\x01 \x01(\t\x12\x0f\n\x07node_id\x18\x02 \x01(\x03\x12\x17\n\x0finput_tensor_id\x18\x03 \x03(\x03\x12\x18\n\x10output_tensor_id\x18\x04 \x03(\x03\x12\x1b\n\x13workspace_tensor_id\x18\x05 \x03(\x03\"x\n\x0eTensorMemProto\x12\x11\n\ttensor_id\x18\x01 \x01(\x03\x12\x0c\n\x04size\x18\x02 \x01(\x03\x12\x0c\n\x04type\x18\x03 \x01(\t\x12\x12\n\nlife_start\x18\x04 \x01(\x03\x12\x10\n\x08life_end\x18\x05 \x01(\x03\x12\x11\n\tlife_long\x18\x06 \x01(\tb\x06proto3')
)




_MEMORYPROTO = _descriptor.Descriptor(
name='MemoryProto',
full_name='profiler.MemoryProto',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='graph_mem', full_name='profiler.MemoryProto.graph_mem', index=0,
number=1, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='total_mem', full_name='profiler.MemoryProto.total_mem', index=1,
number=2, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=70,
serialized_end=146,
)


_GRAPHMEMPROTO = _descriptor.Descriptor(
name='GraphMemProto',
full_name='profiler.GraphMemProto',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='graph_id', full_name='profiler.GraphMemProto.graph_id', index=0,
number=1, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='static_mem', full_name='profiler.GraphMemProto.static_mem', index=1,
number=2, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='node_mems', full_name='profiler.GraphMemProto.node_mems', index=2,
number=3, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='tensor_mems', full_name='profiler.GraphMemProto.tensor_mems', index=3,
number=4, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='fp_start', full_name='profiler.GraphMemProto.fp_start', index=4,
number=5, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='bp_end', full_name='profiler.GraphMemProto.bp_end', index=5,
number=6, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=149,
serialized_end=326,
)


_NODEMEMPROTO = _descriptor.Descriptor(
name='NodeMemProto',
full_name='profiler.NodeMemProto',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='node_name', full_name='profiler.NodeMemProto.node_name', index=0,
number=1, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='node_id', full_name='profiler.NodeMemProto.node_id', index=1,
number=2, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='input_tensor_id', full_name='profiler.NodeMemProto.input_tensor_id', index=2,
number=3, type=3, cpp_type=2, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='output_tensor_id', full_name='profiler.NodeMemProto.output_tensor_id', index=3,
number=4, type=3, cpp_type=2, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='workspace_tensor_id', full_name='profiler.NodeMemProto.workspace_tensor_id', index=4,
number=5, type=3, cpp_type=2, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=329,
serialized_end=459,
)


_TENSORMEMPROTO = _descriptor.Descriptor(
name='TensorMemProto',
full_name='profiler.TensorMemProto',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='tensor_id', full_name='profiler.TensorMemProto.tensor_id', index=0,
number=1, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='size', full_name='profiler.TensorMemProto.size', index=1,
number=2, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='type', full_name='profiler.TensorMemProto.type', index=2,
number=3, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='life_start', full_name='profiler.TensorMemProto.life_start', index=3,
number=4, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='life_end', full_name='profiler.TensorMemProto.life_end', index=4,
number=5, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='life_long', full_name='profiler.TensorMemProto.life_long', index=5,
number=6, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=461,
serialized_end=581,
)

_MEMORYPROTO.fields_by_name['graph_mem'].message_type = _GRAPHMEMPROTO
_GRAPHMEMPROTO.fields_by_name['node_mems'].message_type = _NODEMEMPROTO
_GRAPHMEMPROTO.fields_by_name['tensor_mems'].message_type = _TENSORMEMPROTO
DESCRIPTOR.message_types_by_name['MemoryProto'] = _MEMORYPROTO
DESCRIPTOR.message_types_by_name['GraphMemProto'] = _GRAPHMEMPROTO
DESCRIPTOR.message_types_by_name['NodeMemProto'] = _NODEMEMPROTO
DESCRIPTOR.message_types_by_name['TensorMemProto'] = _TENSORMEMPROTO
_sym_db.RegisterFileDescriptor(DESCRIPTOR)

MemoryProto = _reflection.GeneratedProtocolMessageType('MemoryProto', (_message.Message,), {
'DESCRIPTOR' : _MEMORYPROTO,
'__module__' : 'mindspore.profiler.common.proto_files.memory_usage_pb2'
# @@protoc_insertion_point(class_scope:profiler.MemoryProto)
})
_sym_db.RegisterMessage(MemoryProto)

GraphMemProto = _reflection.GeneratedProtocolMessageType('GraphMemProto', (_message.Message,), {
'DESCRIPTOR' : _GRAPHMEMPROTO,
'__module__' : 'mindspore.profiler.common.proto_files.memory_usage_pb2'
# @@protoc_insertion_point(class_scope:profiler.GraphMemProto)
})
_sym_db.RegisterMessage(GraphMemProto)

NodeMemProto = _reflection.GeneratedProtocolMessageType('NodeMemProto', (_message.Message,), {
'DESCRIPTOR' : _NODEMEMPROTO,
'__module__' : 'mindspore.profiler.common.proto_files.memory_usage_pb2'
# @@protoc_insertion_point(class_scope:profiler.NodeMemProto)
})
_sym_db.RegisterMessage(NodeMemProto)

TensorMemProto = _reflection.GeneratedProtocolMessageType('TensorMemProto', (_message.Message,), {
'DESCRIPTOR' : _TENSORMEMPROTO,
'__module__' : 'mindspore.profiler.common.proto_files.memory_usage_pb2'
# @@protoc_insertion_point(class_scope:profiler.TensorMemProto)
})
_sym_db.RegisterMessage(TensorMemProto)


# @@protoc_insertion_point(module_scope)

+ 18
- 45
mindspore/profiler/parser/container.py View File

@@ -13,8 +13,6 @@
# limitations under the License. # limitations under the License.
# ============================================================================ # ============================================================================
"""The container of metadata used in profiler parser.""" """The container of metadata used in profiler parser."""
import heapq

GIGABYTES = 1024 * 1024 * 1024 GIGABYTES = 1024 * 1024 * 1024




@@ -131,6 +129,7 @@ class MemoryGraph:
self.bp_end = None self.bp_end = None
self.lines = [] self.lines = []
self.nodes = {} self.nodes = {}
self.breakdowns = []


def to_dict(self): def to_dict(self):
"""Convert Graph to dict.""" """Convert Graph to dict."""
@@ -140,7 +139,8 @@ class MemoryGraph:
'nodes': self.nodes, 'nodes': self.nodes,
'fp_start': self.fp_start, 'fp_start': self.fp_start,
'bp_end': self.bp_end, 'bp_end': self.bp_end,
'lines': self.lines
'lines': self.lines,
'breakdowns': self.breakdowns
} }


return graph return graph
@@ -152,17 +152,15 @@ class MemoryNode:


Args: Args:
node_proto (proto): Node proto. node_proto (proto): Node proto.
graph_id (int): Graph id.
""" """
def __init__(self, node_proto, graph_id):
def __init__(self, node_proto):
self._node_proto = node_proto self._node_proto = node_proto
self.graph_id = graph_id
self.node_id = node_proto.node_id self.node_id = node_proto.node_id
self.name = node_proto.node_name self.name = node_proto.node_name
self.fullname = "" self.fullname = ""
self.input_ids = [t_id for t_id in node_proto.input_tensor_id]
self.output_ids = [t_id for t_id in node_proto.output_tensor_id]
self.workspace_ids = [t_id for t_id in node_proto.workspace_tensor_id]
self.input_ids = list(node_proto.input_tensor_id)
self.output_ids = list(node_proto.output_tensor_id)
self.workspace_ids = list(node_proto.workspace_tensor_id)
self.inputs = [] self.inputs = []
self.outputs = [] self.outputs = []
self.workspaces = [] self.workspaces = []
@@ -181,8 +179,7 @@ class MemoryNode:
'size': self.size, 'size': self.size,
'allocated': self.mem_change, 'allocated': self.mem_change,
'inputs': self.inputs, 'inputs': self.inputs,
'outputs': self.outputs,
'workspaces': self.workspaces
'outputs': self.outputs
} }


return node return node
@@ -194,9 +191,8 @@ class MemoryTensor:


Args: Args:
tensor_proto (proto): Tensor proto. tensor_proto (proto): Tensor proto.
graph_id (int): Graph id.
""" """
def __init__(self, tensor_proto, graph_id):
def __init__(self, tensor_proto):
self._tensor_proto = tensor_proto self._tensor_proto = tensor_proto
self.tensor_id = tensor_proto.tensor_id self.tensor_id = tensor_proto.tensor_id
self.life_long = tensor_proto.life_long self.life_long = tensor_proto.life_long
@@ -204,48 +200,25 @@ class MemoryTensor:
self.life_end = tensor_proto.life_end self.life_end = tensor_proto.life_end
self.size = tensor_proto.size / GIGABYTES self.size = tensor_proto.size / GIGABYTES
self.type = tensor_proto.type self.type = tensor_proto.type
self.graph_id = graph_id
self.shape = ""
self.format = ""
self.dtype = ""
self.source_node = ""
self.name = ""


def to_dict(self): def to_dict(self):
"""Convert Tensor to a dict.""" """Convert Tensor to a dict."""
tensor = { tensor = {
'tensor_name': self.name,
'tensor_id': self.tensor_id, 'tensor_id': self.tensor_id,
'size': self.size, 'size': self.size,
'type': self.type, 'type': self.type,
'shape': self.shape,
'format': self.format,
'data_type': self.dtype,
'life_long': self.life_long, 'life_long': self.life_long,
'life_start': self.life_start, 'life_start': self.life_start,
'life_end': self.life_end 'life_end': self.life_end
} }


return tensor return tensor


class MemoryQueue:
"""
A priority queue to keep specified number of active nodes in memory activities.

Args:
size (int): The upper limit of nodes to be saved.
"""
def __init__(self, size):
self._queue = []
self._index = 0
self._size = size

def push(self, item, priority):
"""
Push a node into MemoryQueue.

Args:
item (tuple): Node item including id, name, etc.
priority (int): The priority of the item.
"""
if self._index < self._size:
heapq.heappush(self._queue, (-priority, item))
self._index += 1
else:
heapq.heappushpop(self._queue, (-priority, item))

def get_items(self):
"""Get the elements in MemoryQueue."""
return self._queue

+ 193
- 178
mindspore/profiler/parser/memory_usage_parser.py View File

@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# ============================================================================ # ============================================================================
"""Memory Usage Parser.""" """Memory Usage Parser."""
from collections import OrderedDict
import json import json
import os import os
import stat import stat
@@ -22,12 +23,11 @@ from google.protobuf.text_format import ParseError
from mindspore import log as logger from mindspore import log as logger
from mindspore.profiler.common.exceptions.exceptions import ProfilerIOException, \ from mindspore.profiler.common.exceptions.exceptions import ProfilerIOException, \
ProfilerFileNotFoundException, ProfilerRawFileException ProfilerFileNotFoundException, ProfilerRawFileException
from mindspore.profiler.common.proto_files.memory_usage_pb2 import MemoryProto
from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path
from mindspore.profiler.parser.container import MemoryGraph as Graph from mindspore.profiler.parser.container import MemoryGraph as Graph
from mindspore.profiler.parser.container import MemoryNode as Node from mindspore.profiler.parser.container import MemoryNode as Node
from mindspore.profiler.parser.container import MemoryQueue
from mindspore.profiler.parser.container import MemoryTensor as Tensor from mindspore.profiler.parser.container import MemoryTensor as Tensor
from mindspore.train.memory_profiling_pb2 import MemoryProto


GIGABYTES = 1024 * 1024 * 1024 GIGABYTES = 1024 * 1024 * 1024


@@ -47,11 +47,10 @@ class MemoryUsageParser:
'allocations': 0, 'allocations': 0,
'deallocations': 0, 'deallocations': 0,
'peak_mem': 0, 'peak_mem': 0,
'static_mem': 0,
'breakdowns': []
'static_mem': 0
} }
self._active_nodes = MemoryQueue(size=10)
self._framework = {} self._framework = {}
self._points = {}


def _get_file_path(self): def _get_file_path(self):
"""Get the proto file path.""" """Get the proto file path."""
@@ -73,7 +72,9 @@ class MemoryUsageParser:
logger.info("Start to load memory usage data from pb file") logger.info("Start to load memory usage data from pb file")
file_path = self._get_file_path() file_path = self._get_file_path()
self._framework = self._process_framework_info(aicore_detail_data) self._framework = self._process_framework_info(aicore_detail_data)
self._points = points


# Open memory protobuf file.
try: try:
with open(file_path, 'rb') as f: with open(file_path, 'rb') as f:
content = f.read() content = f.read()
@@ -81,106 +82,175 @@ class MemoryUsageParser:
logger.error('Failed to read memory file: %s', err) logger.error('Failed to read memory file: %s', err)
raise ProfilerIOException raise ProfilerIOException


model_proto = MemoryProto()
# Parse memory raw data from file.
memory_proto = MemoryProto()
try: try:
model_proto.ParseFromString(content)
memory_proto.ParseFromString(content)
except ParseError as err: except ParseError as err:
msg = "Fail to parse memory proto file." msg = "Fail to parse memory proto file."
logger.error("Cannot parse the memory file. Please check the file schema.\n%s", err) logger.error("Cannot parse the memory file. Please check the file schema.\n%s", err)
raise ProfilerRawFileException(msg) raise ProfilerRawFileException(msg)


graphs = model_proto.graph_mem
self._graphs_dict = self._parse_graphs(graphs, points)
self._mem_summary['capacity'] = model_proto.total_mem / GIGABYTES
# Parse memory details based on graphs in the network.
graphs = memory_proto.graph_mem
self._parse_graph_memory(graphs)
# Update memory summary information.
self._mem_summary['capacity'] = memory_proto.total_mem / GIGABYTES
self._mem_summary['peak_mem'] = self._peak_mem self._mem_summary['peak_mem'] = self._peak_mem
self._process_memory_breakdowns()


logger.info('Finished processing memory usage data.') logger.info('Finished processing memory usage data.')


def _parse_graphs(self, graphs, points):
"""Parse subgraphs."""
graphs_dict = {}
def _parse_graph_memory(self, graphs):
"""Parse memory usage based on subgraphs."""
for graph_proto in graphs: for graph_proto in graphs:
graph_id = graph_proto.graph_id graph_id = graph_proto.graph_id
if graph_id is None: if graph_id is None:
logger.info('Graph id is missing, skipped the graph.') logger.info('Graph id is missing, skipped the graph.')
continue continue


graph = Graph(graph_proto)
graph_parser = GraphMemoryParser(graph_proto, self._points, self._framework)
graph = graph_parser.parse_graph()
if graph:
self._graphs_dict[graph_id] = graph


# process tensors in the graph
tensors_proto = graph_proto.tensor_mems
if not tensors_proto:
logger.info('No tensor in graph %s, skipped.', graph_id)
continue
tensors_dict = self._parse_tensors(tensors_proto, graph_id)
# update global memory usage data
self._peak_mem = max(self._peak_mem, graph_parser.peak_mem)
self._mem_summary['static_mem'] += graph_parser.static_mem
self._mem_summary['allocations'] += graph_parser.allocations
self._mem_summary['deallocations'] += graph_parser.deallocations


# calculate memory usage of the graph by number of nodes and details of tensors
nodes_proto = graph_proto.node_mems
# init memory usage list with static memory
mem_change = [graph.static_mem for _ in range(len(nodes_proto))]
self._calc_mem_change(mem_change, tensors_dict)
graph.lines = mem_change
def _write_memory_files(self, filename, content):
"""Write the summary and top breakdowns of memory usage."""
file_path = os.path.join(self._profiling_dir, filename)
file_path = validate_and_normalize_path(file_path)


# process nodes in graph
graph.nodes = self._parse_nodes(
nodes_proto, mem_change, tensors_dict, graph
)
try:
with open(file_path, 'w') as json_file:
json.dump(content, json_file)
os.chmod(file_path, stat.S_IREAD | stat.S_IWRITE)
except (IOError, OSError) as err:
logger.error('Fail to write memory file.\n%s', err)
raise ProfilerIOException


# update fp_start and bp_end
point_id = self._locate_fp_bp_id(points, graph.nodes)
graph.fp_start = point_id.get('fp_start')
graph.bp_end = point_id.get('bp_end')
def write_memory_files(self):
"""Write memory files."""
logger.info('Start recording memory data into files...')
# write memory summary to json file
summary_filename = self._summary_filename.format(self._device_id)
self._write_memory_files(summary_filename, self._mem_summary)


graphs_dict.update({graph_id: graph.to_dict()})
# write memory details to json file
details_filename = self._details_filename.format(self._device_id)
self._write_memory_files(details_filename, self._graphs_dict)
logger.info('Successfully write memory data into files.')


self._mem_summary['static_mem'] += graph.static_mem
self._mem_summary['allocations'] += len(tensors_dict) + 1
self._mem_summary['deallocations'] += len(tensors_dict) + 1
self._peak_mem = max(max(mem_change), self._peak_mem)
@staticmethod
def _process_framework_info(aicore_detail_data):
"""Process framework info."""
framework_info_dict = {}
for framework_obj in aicore_detail_data:
op_name = framework_obj[0]
op_full_name = framework_obj[4]
op_info = framework_obj[5]
framework_info_dict[op_name] = {
'fullname': op_full_name,
'name': op_name,
'args': op_info
}

return framework_info_dict


return graphs_dict


@staticmethod
def _parse_tensors(tensors_proto, graph_id):
class GraphMemoryParser:
"""Parse memory usage data for each graph."""
def __init__(self, graph_proto, points, framework):
self.graph = None
self.nodes = OrderedDict()
self.tensors = OrderedDict()
self._framework = framework
self._points = points
self._graph_proto = graph_proto
self.peak_mem = 0
self.static_mem = 0
self.allocations = 0
self.deallocations = 0
self._mem_change = []
self.breakdowns = []
self._lifetime = []

def parse_graph(self):
"""Parse memory usage data for subgraphs."""
graph_dict = {}
self.graph = Graph(self._graph_proto)
# process tensors in the graph
tensors_proto = self._graph_proto.tensor_mems
if not tensors_proto:
logger.info('No tensor in graph %s, skipped.', self.graph.graph_id)
return graph_dict
self._parse_tensors(tensors_proto)

# calculate memory usage of the graph by number of nodes and details of tensors
nodes_proto = self._graph_proto.node_mems
# init memory usage list with static memory
self._mem_change = [self.graph.static_mem for _ in range(len(nodes_proto))]
self._lifetime = [[] for _ in range(len(nodes_proto))]
self._calc_mem_change() # update self._mem_change and self._lifetime
self.graph.lines = self._mem_change

# process nodes in graph
self.graph.nodes = self._parse_nodes(nodes_proto)

self._process_memory_breakdowns()
self.graph.breakdowns = self.breakdowns

# update fp_start and bp_end
point_id = self._locate_fp_bp_id()
self.graph.fp_start = point_id.get('fp_start')
self.graph.bp_end = point_id.get('bp_end')

graph_dict = self.graph.to_dict()

self.static_mem = self.graph.static_mem
self.allocations = len(self.tensors)
self.deallocations = len(self.tensors)
self.peak_mem = max(max(self._mem_change), self.peak_mem)

return graph_dict

def _parse_tensors(self, tensors_proto):
"""Parse tensors.""" """Parse tensors."""
tensors_dict = {}
for tensor_proto in tensors_proto: for tensor_proto in tensors_proto:
tensor = Tensor(tensor_proto, graph_id)
tensors_dict.update({tensor.tensor_id: tensor})
tensor = Tensor(tensor_proto)
self.tensors.update({tensor.tensor_id: tensor})


return tensors_dict

def _parse_nodes(self, nodes_proto, mem_change, tensors_dict, graph):
def _parse_nodes(self, nodes_proto):
"""Parse nodes.""" """Parse nodes."""
nodes_dict = {}
nodes_list = []
for index, node_proto in enumerate(nodes_proto): for index, node_proto in enumerate(nodes_proto):
node = Node(node_proto, graph.graph_id)
tensors = set(node.output_ids + node.workspace_ids)
node.size = self._calc_node_memory(tensors, tensors_dict)
node.allocations = len(tensors)
node.deallocations = len(tensors)
node = Node(node_proto)
# Calculate memory size allocated for this node
tensor_ids = set(node.output_ids + node.workspace_ids)
node.size = self._calc_node_memory(tensor_ids)
node.allocations = len(tensor_ids)
node.deallocations = len(tensor_ids)


# calculate the allocated/deallocated memory size on the node # calculate the allocated/deallocated memory size on the node
if index == 0: if index == 0:
node.mem_change = mem_change[index] - graph.static_mem
node.mem_change = self._mem_change[index] - self.graph.static_mem
else: else:
node.mem_change = mem_change[index] - mem_change[index-1]

self._update_nodes(node, tensors_dict)
nodes_dict[node.name] = node.to_dict()
node.mem_change = self._mem_change[index] - self._mem_change[index-1]


# update active nodes
self._active_nodes.push(
item=(node.name, node.node_id, node.size, graph.graph_id),
priority=-node.size # priority is the negative value of node size
)
self._update_nodes(node)
self._update_tensor_source(node)
self.nodes[node.name] = node
nodes_list.append(node.to_dict())


return nodes_dict
return nodes_list


def _update_nodes(self, node, tensors_dict):
def _update_nodes(self, node):
"""Update nodes.""" """Update nodes."""
skipped = self._find_conflict_tensors(node)
# Remove duplicate tensors
self._remove_duplicate_tensors(node)
name = node.name name = node.name
if self._framework and name in self._framework: if self._framework and name in self._framework:
node_frame = self._framework[name] node_frame = self._framework[name]
@@ -192,59 +262,38 @@ class MemoryUsageParser:
else: else:
node.outputs.append(value) node.outputs.append(value)


node.inputs = self._fill_tensor_dict(
node.inputs, node.input_ids, tensors_dict, 'input'
)
node.outputs = self._fill_tensor_dict(
node.outputs, node.output_ids, tensors_dict, 'output'
)
node.workspaces = self._fill_tensor_dict(
node.workspaces, node.workspace_ids, tensors_dict, 'workspace', skipped
)
def _update_tensor_source(self, node):
"""Update source node for tensors."""
for t_id in node.output_ids:
tensor = self.tensors.get(t_id)
tensor.source_node = node.name


@staticmethod @staticmethod
def _find_conflict_tensors(node):
def _remove_duplicate_tensors(node):
"""Find conflict tensors in node.""" """Find conflict tensors in node."""
output_list = []
if node.output_ids:
output_list = node.output_ids
skipped = []
if node.workspace_ids: if node.workspace_ids:
for t_id in node.workspace_ids:
if t_id in output_list:
skipped.append(t_id)

return skipped

@staticmethod
def _fill_tensor_dict(node_ios, tensor_ids, tensors_dict, tensor_type, skipped=None):
"""Fill tensor dict."""
full_list = []
for t_id, io_dict in zip(tensor_ids, node_ios):
if tensor_type == 'workspace' and t_id in skipped:
continue
tensor = tensors_dict.get(t_id)
tensor.type = tensor_type
io_dict.update(tensor.to_dict())
full_list.append(io_dict)

return full_list

@staticmethod
def _calc_node_memory(tensors, tensors_dict):
i = 0
while i < len(node.workspace_ids):
t_id = node.workspace_ids[i]
if t_id in node.output_ids:
del node.workspace_ids[i] # remove duplicate tensor
continue
i += 1

def _calc_node_memory(self, tensor_ids):
"""Calculate the allocated memory for the node.""" """Calculate the allocated memory for the node."""
node_mem = 0 node_mem = 0
for t_id in tensors:
tensor = tensors_dict[t_id]
for t_id in tensor_ids:
tensor = self.tensors[t_id]
size = tensor.size size = tensor.size
node_mem += size node_mem += size


return node_mem return node_mem


def _calc_mem_change(self, mem_change, tensors_dict):
def _calc_mem_change(self):
"""Calculate the memory change for the subgraph.""" """Calculate the memory change for the subgraph."""
node_num = len(mem_change)
for tensor_id, tensor in tensors_dict.items():
node_num = len(self._mem_change)
for tensor_id, tensor in self.tensors.items():
life_long = tensor.life_long life_long = tensor.life_long
life_start = tensor.life_start life_start = tensor.life_start
life_end = tensor.life_end life_end = tensor.life_end
@@ -255,101 +304,67 @@ class MemoryUsageParser:
if life_long == 'LifeLongGraphAll': # lifetime is from graph start to graph end if life_long == 'LifeLongGraphAll': # lifetime is from graph start to graph end
tensor.life_start = 0 tensor.life_start = 0
tensor.life_end = node_num tensor.life_end = node_num
self._update_mem_change(mem_change, size, 0, node_num)
self._update_mem_change(size, 0, node_num, tensor_id)
elif life_long == 'LifeLongGraphStart': # lifetime is from graph start to tensor end elif life_long == 'LifeLongGraphStart': # lifetime is from graph start to tensor end
if life_end is not None and life_end >= 0: if life_end is not None and life_end >= 0:
tensor.life_start = 0 tensor.life_start = 0
self._update_mem_change(mem_change, size, 0, life_end+1)
self._update_mem_change(size, 0, life_end+1, tensor_id)
else: else:
logger.info('Cannot locate lifetime end for tensor: %s', tensor_id) logger.info('Cannot locate lifetime end for tensor: %s', tensor_id)
elif life_long == 'LifeLongGraphEnd': # lifetime is from tensor start to graph end elif life_long == 'LifeLongGraphEnd': # lifetime is from tensor start to graph end
if life_start is not None and life_start <= node_num: if life_start is not None and life_start <= node_num:
tensor.life_end = node_num tensor.life_end = node_num
self._update_mem_change(mem_change, size, life_start, node_num)
self._update_mem_change(size, life_start, node_num, tensor_id)
else: else:
logger.info('Cannot locate lifetime start for tensor: %s', tensor_id) logger.info('Cannot locate lifetime start for tensor: %s', tensor_id)
elif life_long == 'LifeLongNone': # lifetime is from tensor start to tensor end elif life_long == 'LifeLongNone': # lifetime is from tensor start to tensor end
if life_start is not None and life_end is not None and life_start <= life_end: if life_start is not None and life_end is not None and life_start <= life_end:
self._update_mem_change(mem_change, size, life_start, life_end+1)
self._update_mem_change(size, life_start, life_end+1, tensor_id)
else: else:
logger.info('Cannot locate lifetime start or end for tensor: %s', tensor_id) logger.info('Cannot locate lifetime start or end for tensor: %s', tensor_id)


@staticmethod
def _update_mem_change(mem_change, size, start, end):
def _update_mem_change(self, size, start, end, tensor_id):
"""Update memory change for the subgraph.""" """Update memory change for the subgraph."""
for i in range(start, end): for i in range(start, end):
mem_change[i] += size
self._mem_change[i] += size
# Update tensor lifetime list.
self._lifetime[i].append(tensor_id)


@staticmethod
def _locate_fp_bp_id(points, nodes):
def _locate_fp_bp_id(self):
"""Locate the node id of fp_start and bp_end in graph.""" """Locate the node id of fp_start and bp_end in graph."""
point_id = { point_id = {
'fp_start': None, 'fp_start': None,
'bp_end': None 'bp_end': None
} }
fp_start = points.get('fp_start')
bp_end = points.get('bp_end')
fp_start = self._points.get('fp_start')
bp_end = self._points.get('bp_end')
fp_name = fp_start.split('/')[-1] if fp_start else "" fp_name = fp_start.split('/')[-1] if fp_start else ""
bp_name = bp_end.split('/')[-1] if bp_end else "" bp_name = bp_end.split('/')[-1] if bp_end else ""
if fp_name in nodes:
point_id['fp_start'] = nodes[fp_name].get('node_id')
if bp_name in nodes:
point_id['bp_end'] = nodes[bp_name].get('node_id')
if fp_name in self.nodes:
point_id['fp_start'] = self.nodes[fp_name].node_id
if bp_name in self.nodes:
point_id['bp_end'] = self.nodes[bp_name].node_id


return point_id return point_id


def _write_memory_files(self, filename, content):
"""Write the summary and top breakdowns of memory usage."""
file_path = os.path.join(self._profiling_dir, filename)
file_path = validate_and_normalize_path(file_path)

try:
with open(file_path, 'w') as json_file:
json.dump(content, json_file)
os.chmod(file_path, stat.S_IREAD | stat.S_IWRITE)
except (IOError, OSError) as err:
logger.error('Fail to write memory file.\n%s', err)
raise ProfilerIOException

def write_memory_files(self):
"""Write memory files."""
logger.info('Start recording memory data into files...')
# write memory summary to json file
summary = self._summary_filename.format(self._device_id)
self._write_memory_files(summary, self._mem_summary)

# write memory details to json file
details = self._details_filename.format(self._device_id)
self._write_memory_files(details, self._graphs_dict)
logger.info('Successfully write memory data into files.')

def _process_memory_breakdowns(self): def _process_memory_breakdowns(self):
"""Process memory breakdowns."""
breakdowns = []
active_nodes = self._active_nodes.get_items()
for _, node_meta in active_nodes:
node_name, _, _, graph_id = node_meta
graph = self._graphs_dict[graph_id]
nodes_dict = graph.get('nodes')
node = nodes_dict.get(node_name)
if 'inputs' in node:
node.pop('inputs')
breakdowns.append(node)

self._mem_summary['breakdowns'] = breakdowns

@staticmethod
def _process_framework_info(aicore_detail_data):
"""Process framework info."""
framework_info_dict = {}
for framework_obj in aicore_detail_data:
op_name = framework_obj[0]
op_full_name = framework_obj[4]
op_info = framework_obj[5]
framework_info_dict[op_name] = {
'fullname': op_full_name,
'name': op_name,
'args': op_info
}

return framework_info_dict
"""Process memory breakdowns for each node."""
self.breakdowns = [[] for _ in range(len(self.nodes))]
for index, breakdown in enumerate(self._lifetime):
for t_id in breakdown:
tensor = self.tensors.get(t_id)
source_node = tensor.source_node
if not source_node:
continue
node = self.nodes.get(source_node)
for i, output_id in enumerate(node.output_ids):
if t_id == output_id:
output = node.outputs[i] if i < len(node.outputs) else {}
tensor.name = node.name + ':' + str(i)
tensor.shape = output.get('shape')
tensor.dtype = output.get('data_type')
tensor.format = output.get('format')
tensor.type = 'output'

tensor_dict = tensor.to_dict()
self.breakdowns[index].append(tensor_dict)

Loading…
Cancel
Save