You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

task.py 12 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Python run preprocess and postprocess in python"""
  16. import threading
  17. import time
  18. import logging
  19. from mindspore_serving._mindspore_serving import Worker_
  20. from mindspore_serving.worker.register.preprocess import preprocess_storage
  21. from mindspore_serving.worker.register.postprocess import postprocess_storage
  22. from mindspore_serving import log as logger
  23. class ServingSystemException(Exception):
  24. def __init__(self, msg):
  25. super(ServingSystemException, self).__init__()
  26. self.msg = msg
  27. def __str__(self):
  28. return "Serving system error: " + self.msg
  29. task_type_stop = "stop"
  30. task_type_empty = "empty"
  31. task_type_preprocess = "preprocess"
  32. task_type_postprocess = "postprocess"
  33. class PyTask:
  34. """Base class for preprocess and postprocess"""
  35. def __init__(self, switch_batch, task_name):
  36. super(PyTask, self).__init__()
  37. self.task_name = task_name
  38. self.switch_batch = switch_batch
  39. self.index = 0
  40. self.instances_size = 0
  41. self.result_batch = []
  42. self.task_info = None
  43. self.temp_result = None
  44. def push_failed_impl(self, count):
  45. """Base method to push failed result"""
  46. raise NotImplementedError
  47. def push_result_batch_impl(self, result_batch):
  48. """Base method to push success result"""
  49. raise NotImplementedError
  50. def get_task_info(self, task_name):
  51. """Base method to get task info"""
  52. raise NotImplementedError
  53. def push_failed(self, count):
  54. """Push failed result"""
  55. self.push_result_batch() # push success first
  56. self.push_failed_impl(count)
  57. self.index += count
  58. def push_result_batch(self):
  59. """Push success result"""
  60. if not self.result_batch:
  61. return
  62. result_batch = self.result_batch
  63. self.result_batch = []
  64. try:
  65. self.push_result_batch_impl(tuple(result_batch))
  66. except Exception as e:
  67. raise ServingSystemException(f"Push {self.task_name} result cause exception: {e}")
  68. self.index += len(result_batch)
  69. def has_next(self):
  70. """Is there result not handled"""
  71. return self.index < self.instances_size
  72. def run(self, task=None):
  73. """Run preprocess or postprocess, if last task has not been handled, continue to handle,
  74. or handle new task, every task has some instances"""
  75. if not self.temp_result:
  76. assert task is not None
  77. self.temp_result = self._run_inner(task)
  78. try:
  79. next(self.temp_result)
  80. if not self.has_next():
  81. self.temp_result = None
  82. except StopIteration:
  83. raise RuntimeError(f"Get next '{self.task_name}' result failed")
  84. def _run_inner(self, task):
  85. """Iterator get next result, and push it to c++"""
  86. instances_size = len(task.instance_list)
  87. self.index = 0
  88. self.instances_size = len(task.instance_list)
  89. self.task_info = self.get_task_info(task.name)
  90. instance_list = task.instance_list
  91. # check input
  92. for item in instance_list:
  93. if not isinstance(item, tuple) or len(item) != self.task_info["inputs_count"]:
  94. raise RuntimeError(f"length of given inputs {len(item)}"
  95. f" not match {self.task_name} required " + str(self.task_info["inputs_count"]))
  96. result = self._handle_task(instance_list)
  97. while self.index < instances_size:
  98. try:
  99. get_result_time_end = time.time()
  100. last_index = self.index
  101. for _ in range(self.index, min(self.index + self.switch_batch, instances_size)):
  102. output = next(result)
  103. output = self._handle_result(output)
  104. self.result_batch.append(output)
  105. get_result_time = time.time()
  106. logger.info(f"{self.task_name} get result "
  107. f"{last_index} ~ {last_index + len(self.result_batch) - 1} cost time "
  108. f"{(get_result_time - get_result_time_end) * 1000} ms")
  109. self.push_result_batch()
  110. yield self.index # end current coroutine, switch to next coroutine
  111. except StopIteration:
  112. result_count = self.index + len(self.result_batch)
  113. self.push_failed(instances_size - result_count)
  114. raise RuntimeError(
  115. f"expecting '{self.task_name}' yield count {result_count} equal to "
  116. f"instance size {instances_size}")
  117. except ServingSystemException as e:
  118. result_count = self.index + len(self.result_batch)
  119. self.push_failed(instances_size - result_count)
  120. raise e
  121. except Exception as e: # catch exception and try next
  122. logger.warning(f"{self.task_name} get result catch exception: {e}")
  123. logging.exception(e)
  124. self.push_failed(1) # push success results and a failed result
  125. yield self.index # end current coroutine, switch to next coroutine
  126. result = self._handle_task(instance_list[self.index:])
  127. def _handle_task(self, instance_list):
  128. """Continue to handle task on new task or task exception happened"""
  129. try:
  130. outputs = self.task_info["fun"](instance_list)
  131. return outputs
  132. except Exception as e:
  133. logger.warning(f"{self.task_name} invoke catch exception: ")
  134. logging.exception(e)
  135. self.push_failed(len(instance_list))
  136. return None
  137. def _handle_result(self, output):
  138. """Further processing results of preprocess or postprocess"""
  139. if not isinstance(output, (tuple, list)):
  140. output = (output,)
  141. if len(output) != self.task_info["outputs_count"]:
  142. raise ServingSystemException(f"length of return output {len(output)} "
  143. f"not match {self.task_name} signatures " +
  144. str(self.task_info["outputs_count"]))
  145. output = (item.asnumpy() if callable(getattr(item, "asnumpy", None)) else item for item in output)
  146. return output
  147. class PyPreprocess(PyTask):
  148. """Preprocess implement"""
  149. def __init__(self, switch_batch):
  150. super(PyPreprocess, self).__init__(switch_batch, "preprocess")
  151. def push_failed_impl(self, count):
  152. """Push failed preprocess result to c++ env"""
  153. Worker_.push_preprocess_failed(count)
  154. def push_result_batch_impl(self, result_batch):
  155. """Push success preprocess result to c++ env"""
  156. Worker_.push_preprocess_result(result_batch)
  157. def get_task_info(self, task_name):
  158. """Get preprocess task info, including inputs, outputs count, function of preprocess"""
  159. return preprocess_storage.get(task_name)
  160. class PyPostprocess(PyTask):
  161. """Postprocess implement"""
  162. def __init__(self, switch_batch):
  163. super(PyPostprocess, self).__init__(switch_batch, "postprocess")
  164. def push_failed_impl(self, count):
  165. """Push failed postprocess result to c++ env"""
  166. Worker_.push_postprocess_failed(count)
  167. def push_result_batch_impl(self, result_batch):
  168. """Push success postprocess result to c++ env"""
  169. Worker_.push_postprocess_result(result_batch)
  170. def get_task_info(self, task_name):
  171. """Get postprocess task info, including inputs, outputs count, function of postprocess"""
  172. return postprocess_storage.get(task_name)
  173. class PyTaskThread(threading.Thread):
  174. """Thread for handling preprocess and postprocess"""
  175. def __init__(self, switch_batch):
  176. super(PyTaskThread, self).__init__()
  177. self.switch_batch = switch_batch
  178. if self.switch_batch <= 0:
  179. self.switch_batch = 1
  180. self.preprocess = PyPreprocess(self.switch_batch)
  181. self.postprocess = PyPostprocess(self.switch_batch)
  182. def run(self):
  183. """Run tasks of preprocess and postprocess, switch to other type of process when some instances are handled"""
  184. logger.info(f"start py task for preprocess and postprocess, switch_batch {self.switch_batch}")
  185. preprocess_turn = True
  186. while True:
  187. try:
  188. if not self.preprocess.has_next() and not self.postprocess.has_next():
  189. task = Worker_.get_py_task()
  190. if task.task_type == task_type_stop:
  191. break
  192. if task.task_type == task_type_preprocess:
  193. self.preprocess.run(task)
  194. preprocess_turn = False
  195. elif task.task_type == task_type_postprocess:
  196. self.postprocess.run(task)
  197. preprocess_turn = True
  198. # in preprocess turn, when preprocess is still running, switch to running preprocess
  199. # otherwise try get next preprocess task when postprocess is running
  200. # when next preprocess is not available, switch to running postprocess
  201. if preprocess_turn:
  202. if self.preprocess.has_next():
  203. self.preprocess.run()
  204. elif self.postprocess.has_next():
  205. task = Worker_.try_get_preprocess_py_task()
  206. if task.task_type == task_type_stop:
  207. break
  208. if task.task_type != task_type_empty:
  209. self.preprocess.run(task)
  210. preprocess_turn = False
  211. else:
  212. if self.postprocess.has_next():
  213. self.postprocess.run()
  214. elif self.preprocess.has_next():
  215. task = Worker_.try_get_postprocess_py_task()
  216. if task.task_type == task_type_stop:
  217. break
  218. if task.task_type != task_type_empty:
  219. self.postprocess.run(task)
  220. preprocess_turn = True
  221. except Exception as e:
  222. logger.error(f"py task catch exception and exit: {e}")
  223. logging.exception(e)
  224. break
  225. logger.info("end py task for preprocess and postprocess")
  226. Worker_.stop_and_clear()
  227. py_task_thread = None
  228. def _start_py_task(switch_batch):
  229. """Start python thread for proprocessing and postprocessing"""
  230. global py_task_thread
  231. if py_task_thread is None:
  232. py_task_thread = PyTaskThread(switch_batch)
  233. py_task_thread.start()
  234. def _join_py_task():
  235. """Join python thread for proprocessing and postprocessing"""
  236. global py_task_thread
  237. if py_task_thread is not None:
  238. py_task_thread.join()
  239. py_task_thread = None

A lightweight and high-performance service module that helps MindSpore developers efficiently deploy online inference services in the production environment.