You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

task.py 12 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Python run preprocess and postprocess in python"""
  16. import threading
  17. import time
  18. import logging
  19. from mindspore_serving._mindspore_serving import Worker_
  20. from mindspore_serving._mindspore_serving import ExitSignalHandle_
  21. from mindspore_serving.worker.register.preprocess import preprocess_storage
  22. from mindspore_serving.worker.register.postprocess import postprocess_storage
  23. from mindspore_serving import log as logger
  24. class ServingSystemException(Exception):
  25. """Exception notify system error of worker, and need to exit py task"""
  26. def __init__(self, msg):
  27. super(ServingSystemException, self).__init__()
  28. self.msg = msg
  29. def __str__(self):
  30. return "Serving system error: " + self.msg
  31. class ServingExitException(Exception):
  32. """Exception notify exit of worker, and need to exit py task"""
  33. def __str__(self):
  34. return "Serving has exited"
  35. task_type_stop = "stop"
  36. task_type_empty = "empty"
  37. task_type_preprocess = "preprocess"
  38. task_type_postprocess = "postprocess"
  39. def has_worker_stopped():
  40. """Whether worker has stopped"""
  41. return ExitSignalHandle_.has_stopped()
  42. class PyTask:
  43. """Base class for preprocess and postprocess"""
  44. def __init__(self, switch_batch, task_name):
  45. super(PyTask, self).__init__()
  46. self.task_name = task_name
  47. self.switch_batch = switch_batch
  48. self.index = 0
  49. self.instances_size = 0
  50. self.result_batch = []
  51. self.task_info = None
  52. self.temp_result = None
  53. def push_failed_impl(self, count):
  54. """Base method to push failed result"""
  55. raise NotImplementedError
  56. def push_result_batch_impl(self, result_batch):
  57. """Base method to push success result"""
  58. raise NotImplementedError
  59. def get_task_info(self, task_name):
  60. """Base method to get task info"""
  61. raise NotImplementedError
  62. def push_failed(self, count):
  63. """Push failed result"""
  64. self.push_result_batch() # push success first
  65. self.push_failed_impl(count)
  66. self.index += count
  67. def push_result_batch(self):
  68. """Push success result"""
  69. if not self.result_batch:
  70. return
  71. result_batch = self.result_batch
  72. self.result_batch = []
  73. try:
  74. self.push_result_batch_impl(tuple(result_batch))
  75. except Exception as e:
  76. raise ServingSystemException(f"Push {self.task_name} result cause exception: {e}")
  77. self.index += len(result_batch)
  78. def has_next(self):
  79. """Is there result not handled"""
  80. return self.index < self.instances_size
  81. def run(self, task=None):
  82. """Run preprocess or postprocess, if last task has not been handled, continue to handle,
  83. or handle new task, every task has some instances"""
  84. if not self.temp_result:
  85. assert task is not None
  86. self.temp_result = self._run_inner(task)
  87. try:
  88. next(self.temp_result)
  89. if not self.has_next():
  90. self.temp_result = None
  91. except StopIteration:
  92. raise RuntimeError(f"Get next '{self.task_name}' result failed")
  93. def _run_inner(self, task):
  94. """Iterator get next result, and push it to c++"""
  95. instances_size = len(task.instance_list)
  96. self.index = 0
  97. self.instances_size = len(task.instance_list)
  98. self.task_info = self.get_task_info(task.name)
  99. instance_list = task.instance_list
  100. # check input
  101. for item in instance_list:
  102. if not isinstance(item, tuple) or len(item) != self.task_info["inputs_count"]:
  103. raise RuntimeError(f"length of given inputs {len(item)}"
  104. f" not match {self.task_name} required " + str(self.task_info["inputs_count"]))
  105. result = self._handle_task(instance_list)
  106. while self.index < instances_size:
  107. try:
  108. get_result_time_end = time.time()
  109. last_index = self.index
  110. for _ in range(self.index, min(self.index + self.switch_batch, instances_size)):
  111. if has_worker_stopped():
  112. logger.info("Worker has exited, exit py task")
  113. raise ServingExitException()
  114. output = next(result)
  115. output = self._handle_result(output)
  116. self.result_batch.append(output)
  117. get_result_time = time.time()
  118. logger.info(f"{self.task_name} get result "
  119. f"{last_index} ~ {last_index + len(self.result_batch) - 1} cost time "
  120. f"{(get_result_time - get_result_time_end) * 1000} ms")
  121. self.push_result_batch()
  122. yield self.index # end current coroutine, switch to next coroutine
  123. except StopIteration:
  124. result_count = self.index + len(self.result_batch)
  125. self.push_failed(instances_size - result_count)
  126. raise RuntimeError(
  127. f"expecting '{self.task_name}' yield count {result_count} equal to "
  128. f"instance size {instances_size}")
  129. except ServingSystemException as e:
  130. result_count = self.index + len(self.result_batch)
  131. self.push_failed(instances_size - result_count)
  132. raise e
  133. except ServingExitException as e:
  134. raise e
  135. except Exception as e: # catch exception and try next
  136. logger.warning(f"{self.task_name} get result catch exception: {e}")
  137. logging.exception(e)
  138. self.push_failed(1) # push success results and a failed result
  139. yield self.index # end current coroutine, switch to next coroutine
  140. result = self._handle_task(instance_list[self.index:])
  141. def _handle_task(self, instance_list):
  142. """Continue to handle task on new task or task exception happened"""
  143. try:
  144. outputs = self.task_info["fun"](instance_list)
  145. return outputs
  146. except Exception as e:
  147. logger.warning(f"{self.task_name} invoke catch exception: ")
  148. logging.exception(e)
  149. self.push_failed(len(instance_list))
  150. return None
  151. def _handle_result(self, output):
  152. """Further processing results of preprocess or postprocess"""
  153. if not isinstance(output, (tuple, list)):
  154. output = (output,)
  155. if len(output) != self.task_info["outputs_count"]:
  156. raise ServingSystemException(f"length of return output {len(output)} "
  157. f"not match {self.task_name} signatures " +
  158. str(self.task_info["outputs_count"]))
  159. output = (item.asnumpy() if callable(getattr(item, "asnumpy", None)) else item for item in output)
  160. return output
  161. class PyPreprocess(PyTask):
  162. """Preprocess implement"""
  163. def __init__(self, switch_batch):
  164. super(PyPreprocess, self).__init__(switch_batch, "preprocess")
  165. def push_failed_impl(self, count):
  166. """Push failed preprocess result to c++ env"""
  167. Worker_.push_preprocess_failed(count)
  168. def push_result_batch_impl(self, result_batch):
  169. """Push success preprocess result to c++ env"""
  170. Worker_.push_preprocess_result(result_batch)
  171. def get_task_info(self, task_name):
  172. """Get preprocess task info, including inputs, outputs count, function of preprocess"""
  173. return preprocess_storage.get(task_name)
  174. class PyPostprocess(PyTask):
  175. """Postprocess implement"""
  176. def __init__(self, switch_batch):
  177. super(PyPostprocess, self).__init__(switch_batch, "postprocess")
  178. def push_failed_impl(self, count):
  179. """Push failed postprocess result to c++ env"""
  180. Worker_.push_postprocess_failed(count)
  181. def push_result_batch_impl(self, result_batch):
  182. """Push success postprocess result to c++ env"""
  183. Worker_.push_postprocess_result(result_batch)
  184. def get_task_info(self, task_name):
  185. """Get postprocess task info, including inputs, outputs count, function of postprocess"""
  186. return postprocess_storage.get(task_name)
  187. class PyTaskThread(threading.Thread):
  188. """Thread for handling preprocess and postprocess"""
  189. def __init__(self, switch_batch):
  190. super(PyTaskThread, self).__init__()
  191. self.switch_batch = switch_batch
  192. if self.switch_batch <= 0:
  193. self.switch_batch = 1
  194. self.preprocess = PyPreprocess(self.switch_batch)
  195. self.postprocess = PyPostprocess(self.switch_batch)
  196. def run(self):
  197. """Run tasks of preprocess and postprocess, switch to other type of process when some instances are handled"""
  198. logger.info(f"start py task for preprocess and postprocess, switch_batch {self.switch_batch}")
  199. preprocess_turn = True
  200. while True:
  201. try:
  202. if has_worker_stopped():
  203. logger.info("Worker has exited, exit py task")
  204. break
  205. if not self.preprocess.has_next() and not self.postprocess.has_next():
  206. task = Worker_.get_py_task()
  207. if task.task_type == task_type_stop:
  208. break
  209. if task.task_type == task_type_preprocess:
  210. self.preprocess.run(task)
  211. preprocess_turn = False
  212. elif task.task_type == task_type_postprocess:
  213. self.postprocess.run(task)
  214. preprocess_turn = True
  215. # in preprocess turn, when preprocess is still running, switch to running preprocess
  216. # otherwise try get next preprocess task when postprocess is running
  217. # when next preprocess is not available, switch to running postprocess
  218. if preprocess_turn:
  219. if self.preprocess.has_next():
  220. self.preprocess.run()
  221. elif self.postprocess.has_next():
  222. task = Worker_.try_get_preprocess_py_task()
  223. if task.task_type == task_type_stop:
  224. break
  225. if task.task_type != task_type_empty:
  226. self.preprocess.run(task)
  227. preprocess_turn = False
  228. else:
  229. if self.postprocess.has_next():
  230. self.postprocess.run()
  231. elif self.preprocess.has_next():
  232. task = Worker_.try_get_postprocess_py_task()
  233. if task.task_type == task_type_stop:
  234. break
  235. if task.task_type != task_type_empty:
  236. self.postprocess.run(task)
  237. preprocess_turn = True
  238. except ServingExitException:
  239. logger.info("Catch ServingExitException and exit py task")
  240. break
  241. except Exception as e:
  242. logger.error(f"py task catch exception and exit: {e}")
  243. logging.exception(e)
  244. break
  245. logger.info("end py task for preprocess and postprocess")
  246. Worker_.stop_and_clear()
  247. py_task_thread = None
  248. def _start_py_task(switch_batch):
  249. """Start python thread for proprocessing and postprocessing"""
  250. global py_task_thread
  251. if py_task_thread is None:
  252. py_task_thread = PyTaskThread(switch_batch)
  253. py_task_thread.start()
  254. def _join_py_task():
  255. """Join python thread for proprocessing and postprocessing"""
  256. global py_task_thread
  257. if py_task_thread is not None:
  258. py_task_thread.join()
  259. py_task_thread = None

A lightweight and high-performance service module that helps MindSpore developers efficiently deploy online inference services in the production environment.