|
|
|
@@ -18,6 +18,8 @@ import time |
|
|
|
from collections import deque |
|
|
|
from multiprocessing import Pool, Process, Queue, cpu_count |
|
|
|
|
|
|
|
import mindspore.log as logger |
|
|
|
|
|
|
|
from ._lineage_adapter import serialize_to_lineage_event |
|
|
|
from ._summary_adapter import package_graph_event, package_summary_event |
|
|
|
from ._summary_writer import LineageWriter, SummaryWriter |
|
|
|
@@ -25,20 +27,18 @@ from ._summary_writer import LineageWriter, SummaryWriter |
|
|
|
|
|
|
|
def _pack_data(datadict, wall_time): |
|
|
|
"""Pack data according to which plugin.""" |
|
|
|
result = [] |
|
|
|
summaries, step, mode = [], None, None |
|
|
|
result, summaries, step = [], [], None |
|
|
|
for plugin, datalist in datadict.items(): |
|
|
|
for data in datalist: |
|
|
|
if plugin == 'graph': |
|
|
|
result.append([plugin, data.get('mode'), package_graph_event(data.get('value')).SerializeToString()]) |
|
|
|
result.append([plugin, package_graph_event(data.get('value')).SerializeToString()]) |
|
|
|
elif plugin in ('train_lineage', 'eval_lineage', 'custom_lineage_data', 'dataset_graph'): |
|
|
|
result.append([plugin, data.get('mode'), serialize_to_lineage_event(plugin, data.get('value'))]) |
|
|
|
result.append([plugin, serialize_to_lineage_event(plugin, data.get('value'))]) |
|
|
|
elif plugin in ('scalar', 'tensor', 'histogram', 'image'): |
|
|
|
summaries.append({'_type': plugin.title(), 'name': data.get('tag'), 'data': data.get('value')}) |
|
|
|
step = data.get('step') |
|
|
|
mode = data.get('mode') |
|
|
|
if summaries: |
|
|
|
result.append(['summary', mode, package_summary_event(summaries, step, wall_time).SerializeToString()]) |
|
|
|
result.append(['summary', package_summary_event(summaries, step, wall_time).SerializeToString()]) |
|
|
|
return result |
|
|
|
|
|
|
|
|
|
|
|
@@ -54,46 +54,65 @@ class WriterPool(Process): |
|
|
|
def __init__(self, base_dir, **filedict) -> None: |
|
|
|
super().__init__() |
|
|
|
self._base_dir, self._filedict = base_dir, filedict |
|
|
|
self._queue = Queue(cpu_count() * 2) |
|
|
|
self._queue, self._writers_ = Queue(cpu_count() * 2), None |
|
|
|
self.start() |
|
|
|
|
|
|
|
def run(self): |
|
|
|
writers = self._get_writers() |
|
|
|
|
|
|
|
with Pool(min(cpu_count(), 32)) as pool: |
|
|
|
deq = deque() |
|
|
|
while True: |
|
|
|
while deq and deq[0].ready(): |
|
|
|
for plugin, mode, data in deq.popleft().get(): |
|
|
|
for writer in writers: |
|
|
|
writer.write(plugin, mode, data) |
|
|
|
for plugin, data in deq.popleft().get(): |
|
|
|
self._write(plugin, data) |
|
|
|
|
|
|
|
if not self._queue.empty(): |
|
|
|
if not self._queue.empty() and self._writers: |
|
|
|
action, data = self._queue.get() |
|
|
|
if action == 'WRITE': |
|
|
|
deq.append(pool.apply_async(_pack_data, (data, time.time()))) |
|
|
|
elif action == 'FLUSH': |
|
|
|
for writer in writers: |
|
|
|
writer.flush() |
|
|
|
self._flush() |
|
|
|
elif action == 'END': |
|
|
|
break |
|
|
|
for result in deq: |
|
|
|
for plugin, mode, data in result.get(): |
|
|
|
for writer in writers: |
|
|
|
writer.write(plugin, mode, data) |
|
|
|
for plugin, data in result.get(): |
|
|
|
self._write(plugin, data) |
|
|
|
|
|
|
|
for writer in writers: |
|
|
|
writer.close() |
|
|
|
self._close() |
|
|
|
|
|
|
|
def _get_writers(self): |
|
|
|
writers = [] |
|
|
|
@property |
|
|
|
def _writers(self): |
|
|
|
"""Get the writers in the subprocess.""" |
|
|
|
if self._writers_ is not None: |
|
|
|
return self._writers_ |
|
|
|
self._writers_ = [] |
|
|
|
for plugin, filename in self._filedict.items(): |
|
|
|
filepath = os.path.join(self._base_dir, filename) |
|
|
|
if plugin == 'summary': |
|
|
|
writers.append(SummaryWriter(filepath)) |
|
|
|
self._writers_.append(SummaryWriter(filepath)) |
|
|
|
elif plugin == 'lineage': |
|
|
|
writers.append(LineageWriter(filepath)) |
|
|
|
return writers |
|
|
|
self._writers_.append(LineageWriter(filepath)) |
|
|
|
return self._writers_ |
|
|
|
|
|
|
|
def _write(self, plugin, data): |
|
|
|
"""Write the data in the subprocess.""" |
|
|
|
for writer in self._writers[:]: |
|
|
|
try: |
|
|
|
writer.write(plugin, data) |
|
|
|
except RuntimeError: |
|
|
|
logger.warning(f'The disk space may be soon exhausted by this {type(writer).__name__}, ' |
|
|
|
'so the writer will be closed and not for further writing.') |
|
|
|
self._writers.remove(writer) |
|
|
|
writer.close() |
|
|
|
|
|
|
|
def _flush(self): |
|
|
|
"""Flush the writers in the subprocess.""" |
|
|
|
for writer in self._writers: |
|
|
|
writer.flush() |
|
|
|
|
|
|
|
def _close(self): |
|
|
|
"""Close the writers in the subprocess.""" |
|
|
|
for writer in self._writers: |
|
|
|
writer.close() |
|
|
|
|
|
|
|
def write(self, data) -> None: |
|
|
|
""" |
|
|
|
|