| @@ -126,8 +126,8 @@ list(APPEND MINDSPORE_PROTO_LIST ${COMM_PROTO_SRCS}) | |||
| include_directories("${CMAKE_BINARY_DIR}/profiler/device/common") | |||
| file(GLOB_RECURSE PROFILER_PROTO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||
| "profiler/device/common/memory_profiling.proto") | |||
| ms_protobuf_generate(PROFILER_MEM_PROTO_SRCS PROFILER_MEM_PROTO_HDRS ${PROFILER_PROTO_LIST}) | |||
| list(APPEND MINDSPORE_PROTO_LIST ${PROFILER_MEM_PROTO_SRCS}) | |||
| ms_protobuf_generate_py(PROFILER_MEM_PROTO_PY PROFILER_MEM_PROTO_HDRS_PY PROFILER_MEM_PROTO_PYS ${PROFILER_PROTO_LIST}) | |||
| list(APPEND MINDSPORE_PROTO_LIST ${PROFILER_MEM_PROTO_PY}) | |||
| if(ENABLE_DEBUGGER) | |||
| # debugger: compile proto files | |||
| @@ -1,15 +0,0 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """The proto files for profiler.""" | |||
| @@ -1,50 +0,0 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| syntax = "proto3"; | |||
| package profiler; | |||
| message MemoryProto { | |||
| repeated GraphMemProto graph_mem = 1; // memory usage of multiple graphs | |||
| int64 total_mem = 2; // total allocated memory on device | |||
| } | |||
| message GraphMemProto { | |||
| int64 graph_id = 1; // graph id | |||
| int64 static_mem = 2; // size of allocated static memory for current graph | |||
| repeated NodeMemProto node_mems = 3; // execution nodes | |||
| repeated TensorMemProto tensor_mems = 4; // all tensors | |||
| string fp_start = 5; // node name of fp start | |||
| string bp_end = 6; // node name of bp end | |||
| } | |||
| message NodeMemProto { | |||
| string node_name = 1; // node name | |||
| int64 node_id = 2; // node id with respect to the execution order | |||
| repeated int64 input_tensor_id = 3; // input tensor id | |||
| repeated int64 output_tensor_id = 4; // output tensor id | |||
| repeated int64 workspace_tensor_id = 5; // workspace tensor id | |||
| } | |||
| message TensorMemProto { | |||
| int64 tensor_id = 1; // tensor id | |||
| int64 size = 2; // aligned tensor size | |||
| string type = 3; // tensor type, e.g. Common, OutputOnly | |||
| int64 life_start = 4; // node id at which memory allocated | |||
| int64 life_end = 5; // node id at which memory deallocated | |||
| string life_long = 6; // the type of tensor lifetime, e.g. LifeLongGraphAll | |||
| } | |||
| @@ -1,295 +0,0 @@ | |||
| # -*- coding: utf-8 -*- | |||
| # Generated by the protocol buffer compiler. DO NOT EDIT! | |||
| # source: mindspore/profiler/common/proto_files/memory_usage.proto | |||
| import sys | |||
| _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) | |||
| from google.protobuf import descriptor as _descriptor | |||
| from google.protobuf import message as _message | |||
| from google.protobuf import reflection as _reflection | |||
| from google.protobuf import symbol_database as _symbol_database | |||
| # @@protoc_insertion_point(imports) | |||
| _sym_db = _symbol_database.Default() | |||
| DESCRIPTOR = _descriptor.FileDescriptor( | |||
| name='mindspore/profiler/common/proto_files/memory_usage.proto', | |||
| package='profiler', | |||
| syntax='proto3', | |||
| serialized_options=None, | |||
| serialized_pb=_b('\n8mindspore/profiler/common/proto_files/memory_usage.proto\x12\x08profiler\"L\n\x0bMemoryProto\x12*\n\tgraph_mem\x18\x01 \x03(\x0b\x32\x17.profiler.GraphMemProto\x12\x11\n\ttotal_mem\x18\x02 \x01(\x03\"\xb1\x01\n\rGraphMemProto\x12\x10\n\x08graph_id\x18\x01 \x01(\x03\x12\x12\n\nstatic_mem\x18\x02 \x01(\x03\x12)\n\tnode_mems\x18\x03 \x03(\x0b\x32\x16.profiler.NodeMemProto\x12-\n\x0btensor_mems\x18\x04 \x03(\x0b\x32\x18.profiler.TensorMemProto\x12\x10\n\x08\x66p_start\x18\x05 \x01(\t\x12\x0e\n\x06\x62p_end\x18\x06 \x01(\t\"\x82\x01\n\x0cNodeMemProto\x12\x11\n\tnode_name\x18\x01 \x01(\t\x12\x0f\n\x07node_id\x18\x02 \x01(\x03\x12\x17\n\x0finput_tensor_id\x18\x03 \x03(\x03\x12\x18\n\x10output_tensor_id\x18\x04 \x03(\x03\x12\x1b\n\x13workspace_tensor_id\x18\x05 \x03(\x03\"x\n\x0eTensorMemProto\x12\x11\n\ttensor_id\x18\x01 \x01(\x03\x12\x0c\n\x04size\x18\x02 \x01(\x03\x12\x0c\n\x04type\x18\x03 \x01(\t\x12\x12\n\nlife_start\x18\x04 \x01(\x03\x12\x10\n\x08life_end\x18\x05 \x01(\x03\x12\x11\n\tlife_long\x18\x06 \x01(\tb\x06proto3') | |||
| ) | |||
| _MEMORYPROTO = _descriptor.Descriptor( | |||
| name='MemoryProto', | |||
| full_name='profiler.MemoryProto', | |||
| filename=None, | |||
| file=DESCRIPTOR, | |||
| containing_type=None, | |||
| fields=[ | |||
| _descriptor.FieldDescriptor( | |||
| name='graph_mem', full_name='profiler.MemoryProto.graph_mem', index=0, | |||
| number=1, type=11, cpp_type=10, label=3, | |||
| has_default_value=False, default_value=[], | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| _descriptor.FieldDescriptor( | |||
| name='total_mem', full_name='profiler.MemoryProto.total_mem', index=1, | |||
| number=2, type=3, cpp_type=2, label=1, | |||
| has_default_value=False, default_value=0, | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| ], | |||
| extensions=[ | |||
| ], | |||
| nested_types=[], | |||
| enum_types=[ | |||
| ], | |||
| serialized_options=None, | |||
| is_extendable=False, | |||
| syntax='proto3', | |||
| extension_ranges=[], | |||
| oneofs=[ | |||
| ], | |||
| serialized_start=70, | |||
| serialized_end=146, | |||
| ) | |||
| _GRAPHMEMPROTO = _descriptor.Descriptor( | |||
| name='GraphMemProto', | |||
| full_name='profiler.GraphMemProto', | |||
| filename=None, | |||
| file=DESCRIPTOR, | |||
| containing_type=None, | |||
| fields=[ | |||
| _descriptor.FieldDescriptor( | |||
| name='graph_id', full_name='profiler.GraphMemProto.graph_id', index=0, | |||
| number=1, type=3, cpp_type=2, label=1, | |||
| has_default_value=False, default_value=0, | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| _descriptor.FieldDescriptor( | |||
| name='static_mem', full_name='profiler.GraphMemProto.static_mem', index=1, | |||
| number=2, type=3, cpp_type=2, label=1, | |||
| has_default_value=False, default_value=0, | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| _descriptor.FieldDescriptor( | |||
| name='node_mems', full_name='profiler.GraphMemProto.node_mems', index=2, | |||
| number=3, type=11, cpp_type=10, label=3, | |||
| has_default_value=False, default_value=[], | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| _descriptor.FieldDescriptor( | |||
| name='tensor_mems', full_name='profiler.GraphMemProto.tensor_mems', index=3, | |||
| number=4, type=11, cpp_type=10, label=3, | |||
| has_default_value=False, default_value=[], | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| _descriptor.FieldDescriptor( | |||
| name='fp_start', full_name='profiler.GraphMemProto.fp_start', index=4, | |||
| number=5, type=9, cpp_type=9, label=1, | |||
| has_default_value=False, default_value=_b("").decode('utf-8'), | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| _descriptor.FieldDescriptor( | |||
| name='bp_end', full_name='profiler.GraphMemProto.bp_end', index=5, | |||
| number=6, type=9, cpp_type=9, label=1, | |||
| has_default_value=False, default_value=_b("").decode('utf-8'), | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| ], | |||
| extensions=[ | |||
| ], | |||
| nested_types=[], | |||
| enum_types=[ | |||
| ], | |||
| serialized_options=None, | |||
| is_extendable=False, | |||
| syntax='proto3', | |||
| extension_ranges=[], | |||
| oneofs=[ | |||
| ], | |||
| serialized_start=149, | |||
| serialized_end=326, | |||
| ) | |||
| _NODEMEMPROTO = _descriptor.Descriptor( | |||
| name='NodeMemProto', | |||
| full_name='profiler.NodeMemProto', | |||
| filename=None, | |||
| file=DESCRIPTOR, | |||
| containing_type=None, | |||
| fields=[ | |||
| _descriptor.FieldDescriptor( | |||
| name='node_name', full_name='profiler.NodeMemProto.node_name', index=0, | |||
| number=1, type=9, cpp_type=9, label=1, | |||
| has_default_value=False, default_value=_b("").decode('utf-8'), | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| _descriptor.FieldDescriptor( | |||
| name='node_id', full_name='profiler.NodeMemProto.node_id', index=1, | |||
| number=2, type=3, cpp_type=2, label=1, | |||
| has_default_value=False, default_value=0, | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| _descriptor.FieldDescriptor( | |||
| name='input_tensor_id', full_name='profiler.NodeMemProto.input_tensor_id', index=2, | |||
| number=3, type=3, cpp_type=2, label=3, | |||
| has_default_value=False, default_value=[], | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| _descriptor.FieldDescriptor( | |||
| name='output_tensor_id', full_name='profiler.NodeMemProto.output_tensor_id', index=3, | |||
| number=4, type=3, cpp_type=2, label=3, | |||
| has_default_value=False, default_value=[], | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| _descriptor.FieldDescriptor( | |||
| name='workspace_tensor_id', full_name='profiler.NodeMemProto.workspace_tensor_id', index=4, | |||
| number=5, type=3, cpp_type=2, label=3, | |||
| has_default_value=False, default_value=[], | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| ], | |||
| extensions=[ | |||
| ], | |||
| nested_types=[], | |||
| enum_types=[ | |||
| ], | |||
| serialized_options=None, | |||
| is_extendable=False, | |||
| syntax='proto3', | |||
| extension_ranges=[], | |||
| oneofs=[ | |||
| ], | |||
| serialized_start=329, | |||
| serialized_end=459, | |||
| ) | |||
| _TENSORMEMPROTO = _descriptor.Descriptor( | |||
| name='TensorMemProto', | |||
| full_name='profiler.TensorMemProto', | |||
| filename=None, | |||
| file=DESCRIPTOR, | |||
| containing_type=None, | |||
| fields=[ | |||
| _descriptor.FieldDescriptor( | |||
| name='tensor_id', full_name='profiler.TensorMemProto.tensor_id', index=0, | |||
| number=1, type=3, cpp_type=2, label=1, | |||
| has_default_value=False, default_value=0, | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| _descriptor.FieldDescriptor( | |||
| name='size', full_name='profiler.TensorMemProto.size', index=1, | |||
| number=2, type=3, cpp_type=2, label=1, | |||
| has_default_value=False, default_value=0, | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| _descriptor.FieldDescriptor( | |||
| name='type', full_name='profiler.TensorMemProto.type', index=2, | |||
| number=3, type=9, cpp_type=9, label=1, | |||
| has_default_value=False, default_value=_b("").decode('utf-8'), | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| _descriptor.FieldDescriptor( | |||
| name='life_start', full_name='profiler.TensorMemProto.life_start', index=3, | |||
| number=4, type=3, cpp_type=2, label=1, | |||
| has_default_value=False, default_value=0, | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| _descriptor.FieldDescriptor( | |||
| name='life_end', full_name='profiler.TensorMemProto.life_end', index=4, | |||
| number=5, type=3, cpp_type=2, label=1, | |||
| has_default_value=False, default_value=0, | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| _descriptor.FieldDescriptor( | |||
| name='life_long', full_name='profiler.TensorMemProto.life_long', index=5, | |||
| number=6, type=9, cpp_type=9, label=1, | |||
| has_default_value=False, default_value=_b("").decode('utf-8'), | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| ], | |||
| extensions=[ | |||
| ], | |||
| nested_types=[], | |||
| enum_types=[ | |||
| ], | |||
| serialized_options=None, | |||
| is_extendable=False, | |||
| syntax='proto3', | |||
| extension_ranges=[], | |||
| oneofs=[ | |||
| ], | |||
| serialized_start=461, | |||
| serialized_end=581, | |||
| ) | |||
| _MEMORYPROTO.fields_by_name['graph_mem'].message_type = _GRAPHMEMPROTO | |||
| _GRAPHMEMPROTO.fields_by_name['node_mems'].message_type = _NODEMEMPROTO | |||
| _GRAPHMEMPROTO.fields_by_name['tensor_mems'].message_type = _TENSORMEMPROTO | |||
| DESCRIPTOR.message_types_by_name['MemoryProto'] = _MEMORYPROTO | |||
| DESCRIPTOR.message_types_by_name['GraphMemProto'] = _GRAPHMEMPROTO | |||
| DESCRIPTOR.message_types_by_name['NodeMemProto'] = _NODEMEMPROTO | |||
| DESCRIPTOR.message_types_by_name['TensorMemProto'] = _TENSORMEMPROTO | |||
| _sym_db.RegisterFileDescriptor(DESCRIPTOR) | |||
| MemoryProto = _reflection.GeneratedProtocolMessageType('MemoryProto', (_message.Message,), { | |||
| 'DESCRIPTOR' : _MEMORYPROTO, | |||
| '__module__' : 'mindspore.profiler.common.proto_files.memory_usage_pb2' | |||
| # @@protoc_insertion_point(class_scope:profiler.MemoryProto) | |||
| }) | |||
| _sym_db.RegisterMessage(MemoryProto) | |||
| GraphMemProto = _reflection.GeneratedProtocolMessageType('GraphMemProto', (_message.Message,), { | |||
| 'DESCRIPTOR' : _GRAPHMEMPROTO, | |||
| '__module__' : 'mindspore.profiler.common.proto_files.memory_usage_pb2' | |||
| # @@protoc_insertion_point(class_scope:profiler.GraphMemProto) | |||
| }) | |||
| _sym_db.RegisterMessage(GraphMemProto) | |||
| NodeMemProto = _reflection.GeneratedProtocolMessageType('NodeMemProto', (_message.Message,), { | |||
| 'DESCRIPTOR' : _NODEMEMPROTO, | |||
| '__module__' : 'mindspore.profiler.common.proto_files.memory_usage_pb2' | |||
| # @@protoc_insertion_point(class_scope:profiler.NodeMemProto) | |||
| }) | |||
| _sym_db.RegisterMessage(NodeMemProto) | |||
| TensorMemProto = _reflection.GeneratedProtocolMessageType('TensorMemProto', (_message.Message,), { | |||
| 'DESCRIPTOR' : _TENSORMEMPROTO, | |||
| '__module__' : 'mindspore.profiler.common.proto_files.memory_usage_pb2' | |||
| # @@protoc_insertion_point(class_scope:profiler.TensorMemProto) | |||
| }) | |||
| _sym_db.RegisterMessage(TensorMemProto) | |||
| # @@protoc_insertion_point(module_scope) | |||
| @@ -13,8 +13,6 @@ | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """The container of metadata used in profiler parser.""" | |||
| import heapq | |||
| GIGABYTES = 1024 * 1024 * 1024 | |||
| @@ -131,6 +129,7 @@ class MemoryGraph: | |||
| self.bp_end = None | |||
| self.lines = [] | |||
| self.nodes = {} | |||
| self.breakdowns = [] | |||
| def to_dict(self): | |||
| """Convert Graph to dict.""" | |||
| @@ -140,7 +139,8 @@ class MemoryGraph: | |||
| 'nodes': self.nodes, | |||
| 'fp_start': self.fp_start, | |||
| 'bp_end': self.bp_end, | |||
| 'lines': self.lines | |||
| 'lines': self.lines, | |||
| 'breakdowns': self.breakdowns | |||
| } | |||
| return graph | |||
| @@ -152,17 +152,15 @@ class MemoryNode: | |||
| Args: | |||
| node_proto (proto): Node proto. | |||
| graph_id (int): Graph id. | |||
| """ | |||
| def __init__(self, node_proto, graph_id): | |||
| def __init__(self, node_proto): | |||
| self._node_proto = node_proto | |||
| self.graph_id = graph_id | |||
| self.node_id = node_proto.node_id | |||
| self.name = node_proto.node_name | |||
| self.fullname = "" | |||
| self.input_ids = [t_id for t_id in node_proto.input_tensor_id] | |||
| self.output_ids = [t_id for t_id in node_proto.output_tensor_id] | |||
| self.workspace_ids = [t_id for t_id in node_proto.workspace_tensor_id] | |||
| self.input_ids = list(node_proto.input_tensor_id) | |||
| self.output_ids = list(node_proto.output_tensor_id) | |||
| self.workspace_ids = list(node_proto.workspace_tensor_id) | |||
| self.inputs = [] | |||
| self.outputs = [] | |||
| self.workspaces = [] | |||
| @@ -181,8 +179,7 @@ class MemoryNode: | |||
| 'size': self.size, | |||
| 'allocated': self.mem_change, | |||
| 'inputs': self.inputs, | |||
| 'outputs': self.outputs, | |||
| 'workspaces': self.workspaces | |||
| 'outputs': self.outputs | |||
| } | |||
| return node | |||
| @@ -194,9 +191,8 @@ class MemoryTensor: | |||
| Args: | |||
| tensor_proto (proto): Tensor proto. | |||
| graph_id (int): Graph id. | |||
| """ | |||
| def __init__(self, tensor_proto, graph_id): | |||
| def __init__(self, tensor_proto): | |||
| self._tensor_proto = tensor_proto | |||
| self.tensor_id = tensor_proto.tensor_id | |||
| self.life_long = tensor_proto.life_long | |||
| @@ -204,48 +200,25 @@ class MemoryTensor: | |||
| self.life_end = tensor_proto.life_end | |||
| self.size = tensor_proto.size / GIGABYTES | |||
| self.type = tensor_proto.type | |||
| self.graph_id = graph_id | |||
| self.shape = "" | |||
| self.format = "" | |||
| self.dtype = "" | |||
| self.source_node = "" | |||
| self.name = "" | |||
| def to_dict(self): | |||
| """Convert Tensor to a dict.""" | |||
| tensor = { | |||
| 'tensor_name': self.name, | |||
| 'tensor_id': self.tensor_id, | |||
| 'size': self.size, | |||
| 'type': self.type, | |||
| 'shape': self.shape, | |||
| 'format': self.format, | |||
| 'data_type': self.dtype, | |||
| 'life_long': self.life_long, | |||
| 'life_start': self.life_start, | |||
| 'life_end': self.life_end | |||
| } | |||
| return tensor | |||
| class MemoryQueue: | |||
| """ | |||
| A priority queue to keep specified number of active nodes in memory activities. | |||
| Args: | |||
| size (int): The upper limit of nodes to be saved. | |||
| """ | |||
| def __init__(self, size): | |||
| self._queue = [] | |||
| self._index = 0 | |||
| self._size = size | |||
| def push(self, item, priority): | |||
| """ | |||
| Push a node into MemoryQueue. | |||
| Args: | |||
| item (tuple): Node item including id, name, etc. | |||
| priority (int): The priority of the item. | |||
| """ | |||
| if self._index < self._size: | |||
| heapq.heappush(self._queue, (-priority, item)) | |||
| self._index += 1 | |||
| else: | |||
| heapq.heappushpop(self._queue, (-priority, item)) | |||
| def get_items(self): | |||
| """Get the elements in MemoryQueue.""" | |||
| return self._queue | |||
| @@ -13,6 +13,7 @@ | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Memory Usage Parser.""" | |||
| from collections import OrderedDict | |||
| import json | |||
| import os | |||
| import stat | |||
| @@ -22,12 +23,11 @@ from google.protobuf.text_format import ParseError | |||
| from mindspore import log as logger | |||
| from mindspore.profiler.common.exceptions.exceptions import ProfilerIOException, \ | |||
| ProfilerFileNotFoundException, ProfilerRawFileException | |||
| from mindspore.profiler.common.proto_files.memory_usage_pb2 import MemoryProto | |||
| from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path | |||
| from mindspore.profiler.parser.container import MemoryGraph as Graph | |||
| from mindspore.profiler.parser.container import MemoryNode as Node | |||
| from mindspore.profiler.parser.container import MemoryQueue | |||
| from mindspore.profiler.parser.container import MemoryTensor as Tensor | |||
| from mindspore.train.memory_profiling_pb2 import MemoryProto | |||
| GIGABYTES = 1024 * 1024 * 1024 | |||
| @@ -47,11 +47,10 @@ class MemoryUsageParser: | |||
| 'allocations': 0, | |||
| 'deallocations': 0, | |||
| 'peak_mem': 0, | |||
| 'static_mem': 0, | |||
| 'breakdowns': [] | |||
| 'static_mem': 0 | |||
| } | |||
| self._active_nodes = MemoryQueue(size=10) | |||
| self._framework = {} | |||
| self._points = {} | |||
| def _get_file_path(self): | |||
| """Get the proto file path.""" | |||
| @@ -73,7 +72,9 @@ class MemoryUsageParser: | |||
| logger.info("Start to load memory usage data from pb file") | |||
| file_path = self._get_file_path() | |||
| self._framework = self._process_framework_info(aicore_detail_data) | |||
| self._points = points | |||
| # Open memory protobuf file. | |||
| try: | |||
| with open(file_path, 'rb') as f: | |||
| content = f.read() | |||
| @@ -81,106 +82,175 @@ class MemoryUsageParser: | |||
| logger.error('Failed to read memory file: %s', err) | |||
| raise ProfilerIOException | |||
| model_proto = MemoryProto() | |||
| # Parse memory raw data from file. | |||
| memory_proto = MemoryProto() | |||
| try: | |||
| model_proto.ParseFromString(content) | |||
| memory_proto.ParseFromString(content) | |||
| except ParseError as err: | |||
| msg = "Fail to parse memory proto file." | |||
| logger.error("Cannot parse the memory file. Please check the file schema.\n%s", err) | |||
| raise ProfilerRawFileException(msg) | |||
| graphs = model_proto.graph_mem | |||
| self._graphs_dict = self._parse_graphs(graphs, points) | |||
| self._mem_summary['capacity'] = model_proto.total_mem / GIGABYTES | |||
| # Parse memory details based on graphs in the network. | |||
| graphs = memory_proto.graph_mem | |||
| self._parse_graph_memory(graphs) | |||
| # Update memory summary information. | |||
| self._mem_summary['capacity'] = memory_proto.total_mem / GIGABYTES | |||
| self._mem_summary['peak_mem'] = self._peak_mem | |||
| self._process_memory_breakdowns() | |||
| logger.info('Finished processing memory usage data.') | |||
| def _parse_graphs(self, graphs, points): | |||
| """Parse subgraphs.""" | |||
| graphs_dict = {} | |||
| def _parse_graph_memory(self, graphs): | |||
| """Parse memory usage based on subgraphs.""" | |||
| for graph_proto in graphs: | |||
| graph_id = graph_proto.graph_id | |||
| if graph_id is None: | |||
| logger.info('Graph id is missing, skipped the graph.') | |||
| continue | |||
| graph = Graph(graph_proto) | |||
| graph_parser = GraphMemoryParser(graph_proto, self._points, self._framework) | |||
| graph = graph_parser.parse_graph() | |||
| if graph: | |||
| self._graphs_dict[graph_id] = graph | |||
| # process tensors in the graph | |||
| tensors_proto = graph_proto.tensor_mems | |||
| if not tensors_proto: | |||
| logger.info('No tensor in graph %s, skipped.', graph_id) | |||
| continue | |||
| tensors_dict = self._parse_tensors(tensors_proto, graph_id) | |||
| # update global memory usage data | |||
| self._peak_mem = max(self._peak_mem, graph_parser.peak_mem) | |||
| self._mem_summary['static_mem'] += graph_parser.static_mem | |||
| self._mem_summary['allocations'] += graph_parser.allocations | |||
| self._mem_summary['deallocations'] += graph_parser.deallocations | |||
| # calculate memory usage of the graph by number of nodes and details of tensors | |||
| nodes_proto = graph_proto.node_mems | |||
| # init memory usage list with static memory | |||
| mem_change = [graph.static_mem for _ in range(len(nodes_proto))] | |||
| self._calc_mem_change(mem_change, tensors_dict) | |||
| graph.lines = mem_change | |||
| def _write_memory_files(self, filename, content): | |||
| """Write the summary and top breakdowns of memory usage.""" | |||
| file_path = os.path.join(self._profiling_dir, filename) | |||
| file_path = validate_and_normalize_path(file_path) | |||
| # process nodes in graph | |||
| graph.nodes = self._parse_nodes( | |||
| nodes_proto, mem_change, tensors_dict, graph | |||
| ) | |||
| try: | |||
| with open(file_path, 'w') as json_file: | |||
| json.dump(content, json_file) | |||
| os.chmod(file_path, stat.S_IREAD | stat.S_IWRITE) | |||
| except (IOError, OSError) as err: | |||
| logger.error('Fail to write memory file.\n%s', err) | |||
| raise ProfilerIOException | |||
| # update fp_start and bp_end | |||
| point_id = self._locate_fp_bp_id(points, graph.nodes) | |||
| graph.fp_start = point_id.get('fp_start') | |||
| graph.bp_end = point_id.get('bp_end') | |||
| def write_memory_files(self): | |||
| """Write memory files.""" | |||
| logger.info('Start recording memory data into files...') | |||
| # write memory summary to json file | |||
| summary_filename = self._summary_filename.format(self._device_id) | |||
| self._write_memory_files(summary_filename, self._mem_summary) | |||
| graphs_dict.update({graph_id: graph.to_dict()}) | |||
| # write memory details to json file | |||
| details_filename = self._details_filename.format(self._device_id) | |||
| self._write_memory_files(details_filename, self._graphs_dict) | |||
| logger.info('Successfully write memory data into files.') | |||
| self._mem_summary['static_mem'] += graph.static_mem | |||
| self._mem_summary['allocations'] += len(tensors_dict) + 1 | |||
| self._mem_summary['deallocations'] += len(tensors_dict) + 1 | |||
| self._peak_mem = max(max(mem_change), self._peak_mem) | |||
| @staticmethod | |||
| def _process_framework_info(aicore_detail_data): | |||
| """Process framework info.""" | |||
| framework_info_dict = {} | |||
| for framework_obj in aicore_detail_data: | |||
| op_name = framework_obj[0] | |||
| op_full_name = framework_obj[4] | |||
| op_info = framework_obj[5] | |||
| framework_info_dict[op_name] = { | |||
| 'fullname': op_full_name, | |||
| 'name': op_name, | |||
| 'args': op_info | |||
| } | |||
| return framework_info_dict | |||
| return graphs_dict | |||
| @staticmethod | |||
| def _parse_tensors(tensors_proto, graph_id): | |||
| class GraphMemoryParser: | |||
| """Parse memory usage data for each graph.""" | |||
| def __init__(self, graph_proto, points, framework): | |||
| self.graph = None | |||
| self.nodes = OrderedDict() | |||
| self.tensors = OrderedDict() | |||
| self._framework = framework | |||
| self._points = points | |||
| self._graph_proto = graph_proto | |||
| self.peak_mem = 0 | |||
| self.static_mem = 0 | |||
| self.allocations = 0 | |||
| self.deallocations = 0 | |||
| self._mem_change = [] | |||
| self.breakdowns = [] | |||
| self._lifetime = [] | |||
| def parse_graph(self): | |||
| """Parse memory usage data for subgraphs.""" | |||
| graph_dict = {} | |||
| self.graph = Graph(self._graph_proto) | |||
| # process tensors in the graph | |||
| tensors_proto = self._graph_proto.tensor_mems | |||
| if not tensors_proto: | |||
| logger.info('No tensor in graph %s, skipped.', self.graph.graph_id) | |||
| return graph_dict | |||
| self._parse_tensors(tensors_proto) | |||
| # calculate memory usage of the graph by number of nodes and details of tensors | |||
| nodes_proto = self._graph_proto.node_mems | |||
| # init memory usage list with static memory | |||
| self._mem_change = [self.graph.static_mem for _ in range(len(nodes_proto))] | |||
| self._lifetime = [[] for _ in range(len(nodes_proto))] | |||
| self._calc_mem_change() # update self._mem_change and self._lifetime | |||
| self.graph.lines = self._mem_change | |||
| # process nodes in graph | |||
| self.graph.nodes = self._parse_nodes(nodes_proto) | |||
| self._process_memory_breakdowns() | |||
| self.graph.breakdowns = self.breakdowns | |||
| # update fp_start and bp_end | |||
| point_id = self._locate_fp_bp_id() | |||
| self.graph.fp_start = point_id.get('fp_start') | |||
| self.graph.bp_end = point_id.get('bp_end') | |||
| graph_dict = self.graph.to_dict() | |||
| self.static_mem = self.graph.static_mem | |||
| self.allocations = len(self.tensors) | |||
| self.deallocations = len(self.tensors) | |||
| self.peak_mem = max(max(self._mem_change), self.peak_mem) | |||
| return graph_dict | |||
| def _parse_tensors(self, tensors_proto): | |||
| """Parse tensors.""" | |||
| tensors_dict = {} | |||
| for tensor_proto in tensors_proto: | |||
| tensor = Tensor(tensor_proto, graph_id) | |||
| tensors_dict.update({tensor.tensor_id: tensor}) | |||
| tensor = Tensor(tensor_proto) | |||
| self.tensors.update({tensor.tensor_id: tensor}) | |||
| return tensors_dict | |||
| def _parse_nodes(self, nodes_proto, mem_change, tensors_dict, graph): | |||
| def _parse_nodes(self, nodes_proto): | |||
| """Parse nodes.""" | |||
| nodes_dict = {} | |||
| nodes_list = [] | |||
| for index, node_proto in enumerate(nodes_proto): | |||
| node = Node(node_proto, graph.graph_id) | |||
| tensors = set(node.output_ids + node.workspace_ids) | |||
| node.size = self._calc_node_memory(tensors, tensors_dict) | |||
| node.allocations = len(tensors) | |||
| node.deallocations = len(tensors) | |||
| node = Node(node_proto) | |||
| # Calculate memory size allocated for this node | |||
| tensor_ids = set(node.output_ids + node.workspace_ids) | |||
| node.size = self._calc_node_memory(tensor_ids) | |||
| node.allocations = len(tensor_ids) | |||
| node.deallocations = len(tensor_ids) | |||
| # calculate the allocated/deallocated memory size on the node | |||
| if index == 0: | |||
| node.mem_change = mem_change[index] - graph.static_mem | |||
| node.mem_change = self._mem_change[index] - self.graph.static_mem | |||
| else: | |||
| node.mem_change = mem_change[index] - mem_change[index-1] | |||
| self._update_nodes(node, tensors_dict) | |||
| nodes_dict[node.name] = node.to_dict() | |||
| node.mem_change = self._mem_change[index] - self._mem_change[index-1] | |||
| # update active nodes | |||
| self._active_nodes.push( | |||
| item=(node.name, node.node_id, node.size, graph.graph_id), | |||
| priority=-node.size # priority is the negative value of node size | |||
| ) | |||
| self._update_nodes(node) | |||
| self._update_tensor_source(node) | |||
| self.nodes[node.name] = node | |||
| nodes_list.append(node.to_dict()) | |||
| return nodes_dict | |||
| return nodes_list | |||
| def _update_nodes(self, node, tensors_dict): | |||
| def _update_nodes(self, node): | |||
| """Update nodes.""" | |||
| skipped = self._find_conflict_tensors(node) | |||
| # Remove duplicate tensors | |||
| self._remove_duplicate_tensors(node) | |||
| name = node.name | |||
| if self._framework and name in self._framework: | |||
| node_frame = self._framework[name] | |||
| @@ -192,59 +262,38 @@ class MemoryUsageParser: | |||
| else: | |||
| node.outputs.append(value) | |||
| node.inputs = self._fill_tensor_dict( | |||
| node.inputs, node.input_ids, tensors_dict, 'input' | |||
| ) | |||
| node.outputs = self._fill_tensor_dict( | |||
| node.outputs, node.output_ids, tensors_dict, 'output' | |||
| ) | |||
| node.workspaces = self._fill_tensor_dict( | |||
| node.workspaces, node.workspace_ids, tensors_dict, 'workspace', skipped | |||
| ) | |||
| def _update_tensor_source(self, node): | |||
| """Update source node for tensors.""" | |||
| for t_id in node.output_ids: | |||
| tensor = self.tensors.get(t_id) | |||
| tensor.source_node = node.name | |||
| @staticmethod | |||
| def _find_conflict_tensors(node): | |||
| def _remove_duplicate_tensors(node): | |||
| """Find conflict tensors in node.""" | |||
| output_list = [] | |||
| if node.output_ids: | |||
| output_list = node.output_ids | |||
| skipped = [] | |||
| if node.workspace_ids: | |||
| for t_id in node.workspace_ids: | |||
| if t_id in output_list: | |||
| skipped.append(t_id) | |||
| return skipped | |||
| @staticmethod | |||
| def _fill_tensor_dict(node_ios, tensor_ids, tensors_dict, tensor_type, skipped=None): | |||
| """Fill tensor dict.""" | |||
| full_list = [] | |||
| for t_id, io_dict in zip(tensor_ids, node_ios): | |||
| if tensor_type == 'workspace' and t_id in skipped: | |||
| continue | |||
| tensor = tensors_dict.get(t_id) | |||
| tensor.type = tensor_type | |||
| io_dict.update(tensor.to_dict()) | |||
| full_list.append(io_dict) | |||
| return full_list | |||
| @staticmethod | |||
| def _calc_node_memory(tensors, tensors_dict): | |||
| i = 0 | |||
| while i < len(node.workspace_ids): | |||
| t_id = node.workspace_ids[i] | |||
| if t_id in node.output_ids: | |||
| del node.workspace_ids[i] # remove duplicate tensor | |||
| continue | |||
| i += 1 | |||
| def _calc_node_memory(self, tensor_ids): | |||
| """Calculate the allocated memory for the node.""" | |||
| node_mem = 0 | |||
| for t_id in tensors: | |||
| tensor = tensors_dict[t_id] | |||
| for t_id in tensor_ids: | |||
| tensor = self.tensors[t_id] | |||
| size = tensor.size | |||
| node_mem += size | |||
| return node_mem | |||
| def _calc_mem_change(self, mem_change, tensors_dict): | |||
| def _calc_mem_change(self): | |||
| """Calculate the memory change for the subgraph.""" | |||
| node_num = len(mem_change) | |||
| for tensor_id, tensor in tensors_dict.items(): | |||
| node_num = len(self._mem_change) | |||
| for tensor_id, tensor in self.tensors.items(): | |||
| life_long = tensor.life_long | |||
| life_start = tensor.life_start | |||
| life_end = tensor.life_end | |||
| @@ -255,101 +304,67 @@ class MemoryUsageParser: | |||
| if life_long == 'LifeLongGraphAll': # lifetime is from graph start to graph end | |||
| tensor.life_start = 0 | |||
| tensor.life_end = node_num | |||
| self._update_mem_change(mem_change, size, 0, node_num) | |||
| self._update_mem_change(size, 0, node_num, tensor_id) | |||
| elif life_long == 'LifeLongGraphStart': # lifetime is from graph start to tensor end | |||
| if life_end is not None and life_end >= 0: | |||
| tensor.life_start = 0 | |||
| self._update_mem_change(mem_change, size, 0, life_end+1) | |||
| self._update_mem_change(size, 0, life_end+1, tensor_id) | |||
| else: | |||
| logger.info('Cannot locate lifetime end for tensor: %s', tensor_id) | |||
| elif life_long == 'LifeLongGraphEnd': # lifetime is from tensor start to graph end | |||
| if life_start is not None and life_start <= node_num: | |||
| tensor.life_end = node_num | |||
| self._update_mem_change(mem_change, size, life_start, node_num) | |||
| self._update_mem_change(size, life_start, node_num, tensor_id) | |||
| else: | |||
| logger.info('Cannot locate lifetime start for tensor: %s', tensor_id) | |||
| elif life_long == 'LifeLongNone': # lifetime is from tensor start to tensor end | |||
| if life_start is not None and life_end is not None and life_start <= life_end: | |||
| self._update_mem_change(mem_change, size, life_start, life_end+1) | |||
| self._update_mem_change(size, life_start, life_end+1, tensor_id) | |||
| else: | |||
| logger.info('Cannot locate lifetime start or end for tensor: %s', tensor_id) | |||
| @staticmethod | |||
| def _update_mem_change(mem_change, size, start, end): | |||
| def _update_mem_change(self, size, start, end, tensor_id): | |||
| """Update memory change for the subgraph.""" | |||
| for i in range(start, end): | |||
| mem_change[i] += size | |||
| self._mem_change[i] += size | |||
| # Update tensor lifetime list. | |||
| self._lifetime[i].append(tensor_id) | |||
| @staticmethod | |||
| def _locate_fp_bp_id(points, nodes): | |||
| def _locate_fp_bp_id(self): | |||
| """Locate the node id of fp_start and bp_end in graph.""" | |||
| point_id = { | |||
| 'fp_start': None, | |||
| 'bp_end': None | |||
| } | |||
| fp_start = points.get('fp_start') | |||
| bp_end = points.get('bp_end') | |||
| fp_start = self._points.get('fp_start') | |||
| bp_end = self._points.get('bp_end') | |||
| fp_name = fp_start.split('/')[-1] if fp_start else "" | |||
| bp_name = bp_end.split('/')[-1] if bp_end else "" | |||
| if fp_name in nodes: | |||
| point_id['fp_start'] = nodes[fp_name].get('node_id') | |||
| if bp_name in nodes: | |||
| point_id['bp_end'] = nodes[bp_name].get('node_id') | |||
| if fp_name in self.nodes: | |||
| point_id['fp_start'] = self.nodes[fp_name].node_id | |||
| if bp_name in self.nodes: | |||
| point_id['bp_end'] = self.nodes[bp_name].node_id | |||
| return point_id | |||
| def _write_memory_files(self, filename, content): | |||
| """Write the summary and top breakdowns of memory usage.""" | |||
| file_path = os.path.join(self._profiling_dir, filename) | |||
| file_path = validate_and_normalize_path(file_path) | |||
| try: | |||
| with open(file_path, 'w') as json_file: | |||
| json.dump(content, json_file) | |||
| os.chmod(file_path, stat.S_IREAD | stat.S_IWRITE) | |||
| except (IOError, OSError) as err: | |||
| logger.error('Fail to write memory file.\n%s', err) | |||
| raise ProfilerIOException | |||
| def write_memory_files(self): | |||
| """Write memory files.""" | |||
| logger.info('Start recording memory data into files...') | |||
| # write memory summary to json file | |||
| summary = self._summary_filename.format(self._device_id) | |||
| self._write_memory_files(summary, self._mem_summary) | |||
| # write memory details to json file | |||
| details = self._details_filename.format(self._device_id) | |||
| self._write_memory_files(details, self._graphs_dict) | |||
| logger.info('Successfully write memory data into files.') | |||
| def _process_memory_breakdowns(self): | |||
| """Process memory breakdowns.""" | |||
| breakdowns = [] | |||
| active_nodes = self._active_nodes.get_items() | |||
| for _, node_meta in active_nodes: | |||
| node_name, _, _, graph_id = node_meta | |||
| graph = self._graphs_dict[graph_id] | |||
| nodes_dict = graph.get('nodes') | |||
| node = nodes_dict.get(node_name) | |||
| if 'inputs' in node: | |||
| node.pop('inputs') | |||
| breakdowns.append(node) | |||
| self._mem_summary['breakdowns'] = breakdowns | |||
| @staticmethod | |||
| def _process_framework_info(aicore_detail_data): | |||
| """Process framework info.""" | |||
| framework_info_dict = {} | |||
| for framework_obj in aicore_detail_data: | |||
| op_name = framework_obj[0] | |||
| op_full_name = framework_obj[4] | |||
| op_info = framework_obj[5] | |||
| framework_info_dict[op_name] = { | |||
| 'fullname': op_full_name, | |||
| 'name': op_name, | |||
| 'args': op_info | |||
| } | |||
| return framework_info_dict | |||
| """Process memory breakdowns for each node.""" | |||
| self.breakdowns = [[] for _ in range(len(self.nodes))] | |||
| for index, breakdown in enumerate(self._lifetime): | |||
| for t_id in breakdown: | |||
| tensor = self.tensors.get(t_id) | |||
| source_node = tensor.source_node | |||
| if not source_node: | |||
| continue | |||
| node = self.nodes.get(source_node) | |||
| for i, output_id in enumerate(node.output_ids): | |||
| if t_id == output_id: | |||
| output = node.outputs[i] if i < len(node.outputs) else {} | |||
| tensor.name = node.name + ':' + str(i) | |||
| tensor.shape = output.get('shape') | |||
| tensor.dtype = output.get('data_type') | |||
| tensor.format = output.get('format') | |||
| tensor.type = 'output' | |||
| tensor_dict = tensor.to_dict() | |||
| self.breakdowns[index].append(tensor_dict) | |||