You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

logfile_loader.py 6.8 kB

5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. """
  2. /**
  3. * Copyright 2020 Zhejiang Lab. All Rights Reserved.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. * =============================================================
  17. */
  18. """
  19. # -*- coding: UTF-8 -*-
  20. import threading
  21. import time
  22. from io import BytesIO
  23. from pathlib import Path
  24. from tbparser import SummaryReader
  25. from tbparser import Projector_Reader
  26. from utils.cache_io import CacheIO
  27. from utils.path_utils import path_parser
  28. from utils.redis_utils import RedisInstance
  29. import pickle
  30. class Trace_Thread(threading.Thread):
  31. def __init__(self, runname, filename, current_size, uid, cache_path):
  32. threading.Thread.__init__(self, name=filename.name)
  33. self.uid = uid
  34. self.runname = runname
  35. self.cache_path = cache_path
  36. self.filename = filename
  37. self.current_size = current_size
  38. self.r = RedisInstance
  39. # 该日志中是否有超参数
  40. self.has_hparams = False
  41. self.first_write = False
  42. self.metrics = []
  43. # 是否完成初始化
  44. self._finish_init = 0
  45. self.redis_tag = []
  46. def run(self):
  47. print('监听文件 %s' % self.filename)
  48. self.trace(self.current_size)
  49. def trace(self, current_size):
  50. filename = Path(self.filename)
  51. if filename.suffix == ".json":
  52. self.load_model_file(filename)
  53. self.finish_init = 1
  54. return
  55. f = open(filename, "rb")
  56. # for event file
  57. if "event" in filename.name:
  58. _io = BytesIO(
  59. f.read(current_size)
  60. )
  61. self.load_event_file(_io)
  62. # 设置初始化完成标志
  63. self.finish_init = 1
  64. while True:
  65. rest = f.read()
  66. if not rest:
  67. time.sleep(2)
  68. continue
  69. _io = BytesIO(rest)
  70. self.load_event_file(_io)
  71. # for projector file
  72. elif "projector" in filename.name:
  73. self.load_projector_file(f)
  74. # 设置初始化完成标志
  75. self.finish_init = 1
  76. @property
  77. def finish_init(self):
  78. return self._finish_init
  79. # 设置标志
  80. @finish_init.setter
  81. def finish_init(self, is_finish):
  82. self.r.set("{}_{}_{}_is_finish".format(self.uid, self.runname,
  83. self.filename.name), 1)
  84. print(self.name + " is finish")
  85. self._finish_init = is_finish
  86. def set_redis_key(self, type, tag, file_path):
  87. _key = self.uid + '_' + self.runname + '_' + type + '_' + tag
  88. if _key in self.redis_tag:
  89. pass
  90. else:
  91. self.r.set(_key, str(file_path))
  92. self.redis_tag.append(_key)
  93. def set_cache(self, file_name, data):
  94. if not file_name.parent.exists():
  95. file_name.parent.mkdir(parents=True, exist_ok=True)
  96. with open(file_name, 'ab') as f:
  97. pickle.dump(data, f)
  98. f.close()
  99. def load_event_file(self, fileIO):
  100. reader = SummaryReader(fileIO, types=[
  101. 'scalar',
  102. 'graph',
  103. 'hist',
  104. 'text',
  105. 'image',
  106. 'audio',
  107. 'hparams'
  108. ])
  109. for items in reader:
  110. if items.type == "graph":
  111. file_path = path_parser(self.cache_path, self.runname,
  112. items.type, tag='c_graph')
  113. CacheIO(file_path).set_cache(data=items.value)
  114. self.set_redis_key(items.type, tag='c_graph',
  115. file_path=file_path)
  116. continue
  117. elif items.type == "hparams":
  118. file_path = path_parser(self.cache_path, self.runname,
  119. type='hyperparm',
  120. tag='hparams')
  121. self.set_cache(file_name=file_path, data=items.value)
  122. self.set_redis_key(type='hyperparm',
  123. tag='hparams',
  124. file_path=file_path)
  125. continue
  126. item_data = {
  127. 'step': items.step,
  128. 'wall_time': items.wall_time,
  129. 'value': items.value,
  130. 'type': items.type
  131. }
  132. file_path = path_parser(self.cache_path, self.runname,
  133. type=items.type,
  134. tag=items.tag)
  135. CacheIO(file_path).set_cache(data=item_data)
  136. self.set_redis_key(type=items.type, tag=items.tag,
  137. file_path=file_path)
  138. def load_projector_file(self, fileIO):
  139. p_reader = Projector_Reader(fileIO).read()
  140. for items in p_reader.projectors:
  141. item_data = {
  142. 'step': items.step,
  143. 'wall_time': items.wall_time,
  144. 'value': items.value.reshape(items.value.shape[0], -1)
  145. if items.value.ndim > 2 else items.value,
  146. 'label': items.label,
  147. }
  148. file_path = path_parser(self.cache_path, self.runname,
  149. type=p_reader.metadata.type,
  150. tag=items.tag)
  151. CacheIO(file_path).set_cache(data=item_data)
  152. self.set_redis_key(type=p_reader.metadata.type, tag=items.tag,
  153. file_path=file_path)
  154. if p_reader.sample:
  155. file_path = path_parser(self.cache_path, self.runname,
  156. type="embedding",
  157. tag="sample_" + items.tag)
  158. CacheIO(file_path).set_cache(data=p_reader.sample)
  159. self.set_redis_key(type="embedding", tag="sample_" + items.tag,
  160. file_path=file_path)
  161. def load_model_file(self, file):
  162. with open(file, "r") as f:
  163. _content = f.read()
  164. file_path = path_parser(self.cache_path, self.runname,
  165. type="graph",
  166. tag="s_graph")
  167. CacheIO(file_path).set_cache(data=_content)
  168. self.set_redis_key(type="graph", tag="s_graph",
  169. file_path=file_path)

一站式算法开发平台、高性能分布式深度学习框架、先进算法模型库、视觉模型炼知平台、数据可视化分析平台等一系列平台及工具,在模型高效分布式训练、数据处理和可视分析、模型炼知和轻量化等技术上形成独特优势,目前已在产学研等各领域近千家单位及个人提供AI应用赋能

Contributors (1)