You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

utils.py 5.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. import math
  2. import time
  3. import numpy as np
  4. from mindspore.train.serialization import load_checkpoint, load_param_into_net
  5. from mindspore.train.callback import LossMonitor, Callback
  6. from mindspore.common.tensor import Tensor
  7. from mindspore.common import dtype as mstype
  8. class MyLossMonitor(LossMonitor):
  9. def __init__(self, per_print_times=1):
  10. super(MyLossMonitor, self).__init__()
  11. self._per_print_times = per_print_times
  12. self._start_time = time.time()
  13. self._loss_list = []
  14. def step_end(self, run_context):
  15. cb_params = run_context.original_args()
  16. loss = cb_params.net_outputs
  17. if isinstance(loss, (tuple, list)):
  18. if isinstance(loss[0], Tensor) and isinstance(loss[0].asnumpy(), np.ndarray):
  19. loss = loss[0]
  20. if isinstance(loss, Tensor) and isinstance(loss.asnumpy(), np.ndarray):
  21. loss = np.mean(loss.asnumpy())
  22. cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1
  23. if isinstance(loss, float) and (np.isnan(loss) or np.isinf(loss)):
  24. raise ValueError("epoch: {} step: {}. Invalid loss, terminating training.".format(
  25. cb_params.cur_epoch_num, cur_step_in_epoch))
  26. if self._per_print_times != 0 and cb_params.cur_step_num % self._per_print_times == 0:
  27. self._loss_list.append(loss)
  28. if cb_params.cur_step_num % 100 == 0:
  29. print("epoch: %s, steps: [%s], mean loss is: %s"%(cb_params.cur_epoch_num, cur_step_in_epoch,
  30. np.array(self._loss_list).mean()), flush=True)
  31. self._loss_list = []
  32. self._start_time = time.time()
  33. class MyScaleSensCallback(Callback):
  34. '''MyLossScaleCallback'''
  35. def __init__(self, loss_scale_list, epoch_list):
  36. super(MyScaleSensCallback, self).__init__()
  37. self.loss_scale_list = loss_scale_list
  38. self.epoch_list = epoch_list
  39. self.scaling_sens = loss_scale_list[0]
  40. def epoch_end(self, run_context):
  41. cb_params = run_context.original_args()
  42. epoch = cb_params.cur_epoch_num
  43. for i, _ in enumerate(self.epoch_list):
  44. if epoch >= self.epoch_list[i]:
  45. self.scaling_sens = self.loss_scale_list[i+1]
  46. else:
  47. break
  48. scaling_sens_tensor = Tensor(self.scaling_sens, dtype=mstype.float32)
  49. cb_params.train_network.set_sense_scale(scaling_sens_tensor)
  50. print("Epoch: set train network scale sens to {}".format(self.scaling_sens))
  51. def _linear_warmup_learning_rate(current_step, warmup_steps, base_lr, init_lr):
  52. lr_inc = (float(base_lr) - float(init_lr)) / float(warmup_steps)
  53. learning_rate = float(init_lr) + lr_inc * current_step
  54. return learning_rate
  55. def _a_cosine_learning_rate(current_step, base_lr, warmup_steps, decay_steps):
  56. base = float(current_step - warmup_steps) / float(decay_steps)
  57. learning_rate = (1 + math.cos(base * math.pi)) / 2 * base_lr
  58. return learning_rate
  59. def _dynamic_lr(base_lr, total_steps, warmup_steps, warmup_ratio=1 / 3):
  60. lr = []
  61. for i in range(total_steps):
  62. if i < warmup_steps:
  63. lr.append(_linear_warmup_learning_rate(i, warmup_steps, base_lr, base_lr * warmup_ratio))
  64. else:
  65. lr.append(_a_cosine_learning_rate(i, base_lr, warmup_steps, total_steps))
  66. return lr
  67. def get_lr(lr, lr_gamma, steps_per_epoch, max_epoch_train, lr_steps, group_size, lr_type='default', warmup_epoch=5):
  68. if lr_type == 'default':
  69. lr_stage = np.array([lr] * steps_per_epoch * max_epoch_train).astype('f')
  70. for step in lr_steps:
  71. step //= group_size
  72. lr_stage[step:] *= lr_gamma
  73. elif lr_type == 'cosine':
  74. lr_stage = _dynamic_lr(lr, steps_per_epoch * max_epoch_train, warmup_epoch * steps_per_epoch,
  75. warmup_ratio=1 / 3)
  76. lr_stage = np.array(lr_stage).astype('f')
  77. else:
  78. raise ValueError("lr type {} is not support.".format(lr_type))
  79. lr_base = lr_stage.copy()
  80. lr_base = lr_base / 4
  81. lr_vgg = lr_base.copy()
  82. vgg_freeze_step = 2000 // group_size
  83. lr_vgg[:vgg_freeze_step] = 0
  84. return lr_stage, lr_base, lr_vgg
  85. def load_model(test_net, model_path):
  86. if model_path:
  87. param_dict = load_checkpoint(model_path)
  88. param_dict_new = {}
  89. for key, values in param_dict.items():
  90. if key.startswith('moment'):
  91. continue
  92. elif key.startswith('network.'):
  93. param_dict_new[key[8:]] = values
  94. load_param_into_net(test_net, param_dict_new)
  95. class show_loss_list():
  96. def __init__(self, name):
  97. self.loss_list = np.zeros(6).astype('f')
  98. self.sums = 0
  99. self.name = name
  100. def add(self, list_of_tensor):
  101. self.sums += 1
  102. for i, loss_tensor in enumerate(list_of_tensor):
  103. self.loss_list[i] += loss_tensor.asnumpy()
  104. def show(self):
  105. print(self.name + ' stage_loss:', self.loss_list / (self.sums + 1e-8), flush=True)
  106. self.loss_list = np.zeros(6).astype('f')
  107. self.sums = 0
  108. class AverageMeter():
  109. def __init__(self):
  110. self.loss = 0
  111. self.sum = 0
  112. def add(self, tensor):
  113. self.sum += 1
  114. self.loss += tensor.asnumpy()
  115. def meter(self):
  116. avergeLoss = self.loss / (self.sum + 1e-8)
  117. self.loss = 0
  118. self.sum = 0
  119. return avergeLoss