You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

utils.py 4.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """ File Description
  4. Details
  5. """
  6. import os
  7. import shutil
  8. import subprocess
  9. import time
  10. import re
  11. from mindspore import log as logger
  12. rank_table_path = "/home/workspace/mindspore_config/hccl/rank_table_8p.json"
  13. data_root = "/home/workspace/mindspore_dataset/"
  14. ckpt_root = "/home/workspace/mindspore_dataset/checkpoint"
  15. cur_path = os.path.split(os.path.realpath(__file__))[0]
  16. geir_root = os.path.join(cur_path, "mindspore_geir")
  17. arm_main_path = os.path.join(cur_path, "mindir_310infer_exe")
  18. model_zoo_path = os.path.join(cur_path, "../../../model_zoo")
  19. def copy_files(from_, to_, model_name):
  20. if not os.path.exists(os.path.join(from_, model_name)):
  21. raise ValueError("There is no file or path", os.path.join(from_, model_name))
  22. if os.path.exists(os.path.join(to_, model_name)):
  23. shutil.rmtree(os.path.join(to_, model_name))
  24. return os.system("cp -r {0} {1}".format(os.path.join(from_, model_name), to_))
  25. def exec_sed_command(old_list, new_list, file):
  26. if isinstance(old_list, str):
  27. old_list = [old_list]
  28. if isinstance(new_list, str):
  29. old_list = [new_list]
  30. if len(old_list) != len(new_list):
  31. raise ValueError("len(old_list) should be equal to len(new_list)")
  32. for old, new in zip(old_list, new_list):
  33. ret = os.system('sed -i "s#{0}#{1}#g" {2}'.format(old, new, file))
  34. if ret != 0:
  35. raise ValueError('exec `sed -i "s#{0}#{1}#g" {2}` failed.'.format(old, new, file))
  36. return ret
  37. def process_check(cycle_time, cmd, wait_time=5):
  38. for i in range(cycle_time):
  39. time.sleep(wait_time)
  40. sub = subprocess.Popen(args="{}".format(cmd), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
  41. stderr=subprocess.PIPE, universal_newlines=True)
  42. stdout_data, _ = sub.communicate()
  43. if not stdout_data:
  44. logger.info("process execute success.")
  45. return True
  46. logger.warning("process is running, please wait {}".format(i))
  47. logger.error("process execute execute timeout.")
  48. return False
  49. def get_perf_data(log_path, search_str="per step time", cmd=None):
  50. if cmd is None:
  51. get_step_times_cmd = r"""grep -a "{0}" {1}|egrep -v "loss|\]|\["|awk '{{print $(NF-1)}}'""" \
  52. .format(search_str, log_path)
  53. else:
  54. get_step_times_cmd = cmd
  55. sub = subprocess.Popen(args="{}".format(get_step_times_cmd), shell=True,
  56. stdin=subprocess.PIPE, stdout=subprocess.PIPE,
  57. stderr=subprocess.PIPE, universal_newlines=True)
  58. stdout, _ = sub.communicate()
  59. if sub.returncode != 0:
  60. raise RuntimeError("exec {} failed".format(cmd))
  61. logger.info("execute {} success".format(cmd))
  62. stdout = stdout.strip().split("\n")
  63. step_time_list = list(map(float, stdout[1:]))
  64. if not step_time_list:
  65. cmd = "cat {}".format(log_path)
  66. os.system(cmd)
  67. raise RuntimeError("step_time_list is empty")
  68. per_step_time = sum(step_time_list) / len(step_time_list)
  69. return per_step_time
  70. def get_loss_data_list(log_path, search_str="loss is", cmd=None):
  71. if cmd is None:
  72. loss_value_cmd = """ grep -a '{}' {}| awk '{{print $NF}}' """.format(search_str, log_path)
  73. else:
  74. loss_value_cmd = cmd
  75. sub = subprocess.Popen(args="{}".format(loss_value_cmd), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
  76. stderr=subprocess.PIPE, universal_newlines=True)
  77. stdout, _ = sub.communicate()
  78. if sub.returncode != 0:
  79. raise RuntimeError("get loss from {} failed".format(log_path))
  80. logger.info("execute {} success".format(cmd))
  81. stdout = stdout.strip().split("\n")
  82. loss_list = list(map(float, stdout))
  83. if not loss_list:
  84. cmd = "cat {}".format(log_path)
  85. os.system(cmd)
  86. raise RuntimeError("loss_list is empty")
  87. return loss_list
  88. def parse_log_file(pattern, log_path):
  89. value_list = []
  90. with open(log_path, "r") as file:
  91. for line in file.readlines():
  92. match_result = re.search(pattern, line)
  93. if match_result is not None:
  94. value_list.append(float(match_result.group(1)))
  95. if not value_list:
  96. print("pattern is", pattern)
  97. cmd = "cat {}".format(log_path)
  98. os.system(cmd)
  99. return value_list