Browse Source

!5408 Use forkserver context to fix not-exit issue

Merge pull request !5408 from LiHongzhang/forkserver
tags/v1.0.0
mindspore-ci-bot Gitee 5 years ago
parent
commit
db6be3b35f
2 changed files with 10 additions and 5 deletions
  1. +1
    -1
      mindspore/train/callback/_summary_collector.py
  2. +9
    -4
      mindspore/train/summary/_writer_pool.py

+ 1
- 1
mindspore/train/callback/_summary_collector.py View File

@@ -111,7 +111,7 @@ class SummaryCollector(Callback):
and float. Default: None, it means there is no custom data. and float. Default: None, it means there is no custom data.
collect_tensor_freq (Optional[int]): The same semantics as the `collect_freq`, but controls TensorSummary only. collect_tensor_freq (Optional[int]): The same semantics as the `collect_freq`, but controls TensorSummary only.
Because TensorSummary data is too large to be compared with other summary data, this parameter is used to Because TensorSummary data is too large to be compared with other summary data, this parameter is used to
reduce its collection. By default, The maximum number of steps for collecting TensorSummary data is 21,
reduce its collection. By default, The maximum number of steps for collecting TensorSummary data is 20,
but it will not exceed the number of steps for collecting other summary data. but it will not exceed the number of steps for collecting other summary data.
Default: None, which means to follow the behavior as described above. For example, given `collect_freq=10`, Default: None, which means to follow the behavior as described above. For example, given `collect_freq=10`,
when the total steps is 600, TensorSummary will be collected 20 steps, while other summary data 61 steps, when the total steps is 600, TensorSummary will be collected 20 steps, while other summary data 61 steps,


+ 9
- 4
mindspore/train/summary/_writer_pool.py View File

@@ -16,7 +16,6 @@
import os import os
import time import time
from collections import deque from collections import deque
from multiprocessing import Pool, Process, Queue, cpu_count


import mindspore.log as logger import mindspore.log as logger


@@ -24,6 +23,12 @@ from ._lineage_adapter import serialize_to_lineage_event
from ._summary_adapter import package_graph_event, package_summary_event from ._summary_adapter import package_graph_event, package_summary_event
from ._summary_writer import LineageWriter, SummaryWriter from ._summary_writer import LineageWriter, SummaryWriter


try:
from multiprocessing import get_context
ctx = get_context('forkserver')
except ValueError:
import multiprocessing as ctx



def _pack_data(datadict, wall_time): def _pack_data(datadict, wall_time):
"""Pack data according to which plugin.""" """Pack data according to which plugin."""
@@ -42,7 +47,7 @@ def _pack_data(datadict, wall_time):
return result return result




class WriterPool(Process):
class WriterPool(ctx.Process):
""" """
Use a set of pooled resident processes for writing a list of file. Use a set of pooled resident processes for writing a list of file.


@@ -54,12 +59,12 @@ class WriterPool(Process):
def __init__(self, base_dir, max_file_size, **filedict) -> None: def __init__(self, base_dir, max_file_size, **filedict) -> None:
super().__init__() super().__init__()
self._base_dir, self._filedict = base_dir, filedict self._base_dir, self._filedict = base_dir, filedict
self._queue, self._writers_ = Queue(cpu_count() * 2), None
self._queue, self._writers_ = ctx.Queue(ctx.cpu_count() * 2), None
self._max_file_size = max_file_size self._max_file_size = max_file_size
self.start() self.start()


def run(self): def run(self):
with Pool(min(cpu_count(), 32)) as pool:
with ctx.Pool(min(ctx.cpu_count(), 32)) as pool:
deq = deque() deq = deque()
while True: while True:
while deq and deq[0].ready(): while deq and deq[0].ready():


Loading…
Cancel
Save