|
- # Copyright 2020 Huawei Technologies Co., Ltd
-
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
-
- # http://www.apache.org/licenses/LICENSE-2.0
-
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ============================================================================
- import argparse
- import os
- import time
- import numpy as np
-
- from mindspore.train.serialization import load_checkpoint, load_param_into_net
- from mindspore.train.callback import LossMonitor
- from mindspore.common.tensor import Tensor
-
- from src.config import params
-
- class MyLossMonitor(LossMonitor):
- def __init__(self, per_print_times=1):
- super(MyLossMonitor, self).__init__()
- self._per_print_times = per_print_times
- self._start_time = time.time()
- self._loss_list = []
-
- def step_end(self, run_context):
- cb_params = run_context.original_args()
- loss = cb_params.net_outputs
-
- if isinstance(loss, (tuple, list)):
- if isinstance(loss[0], Tensor) and isinstance(loss[0].asnumpy(), np.ndarray):
- loss = loss[0]
-
- if isinstance(loss, Tensor) and isinstance(loss.asnumpy(), np.ndarray):
- loss = np.mean(loss.asnumpy())
-
- cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1
-
- if isinstance(loss, float) and (np.isnan(loss) or np.isinf(loss)):
- raise ValueError("epoch: {} step: {}. Invalid loss, terminating training.".format(
- cb_params.cur_epoch_num, cur_step_in_epoch))
- if self._per_print_times != 0 and cb_params.cur_step_num % self._per_print_times == 0:
- # print("epoch: %s step: %s, loss is %s, step time: %.3f s." % (cb_params.cur_epoch_num, cur_step_in_epoch,
- # loss,
- # (time.time() - self._start_time)), flush=True)
- self._loss_list.append(loss)
- if cb_params.cur_step_num % 100 == 0:
- print("epoch: %s, steps: [%s] mean loss is: %s"%(cb_params.cur_epoch_num, cur_step_in_epoch,
- np.array(self._loss_list).mean()), flush=True)
- self._loss_list = []
-
- self._start_time = time.time()
-
-
- def parse_args():
- """Parse train arguments."""
- parser = argparse.ArgumentParser('mindspore openpose training')
-
- # dataset related
- parser.add_argument('--train_dir', type=str, default='train2017', help='train data dir')
- parser.add_argument('--train_ann', type=str, default='person_keypoints_train2017.json',
- help='train annotations json')
- parser.add_argument('--group_size', type=int, default=1, help='world size of distributed')
-
- args, _ = parser.parse_known_args()
-
- args.jsonpath_train = os.path.join(params['data_dir'], 'annotations/' + args.train_ann)
- args.imgpath_train = os.path.join(params['data_dir'], args.train_dir)
- args.maskpath_train = os.path.join(params['data_dir'], 'ignore_mask_train')
-
- return args
-
-
- def get_lr(lr, lr_gamma, steps_per_epoch, max_epoch_train, lr_steps, group_size):
- lr_stage = np.array([lr] * steps_per_epoch * max_epoch_train).astype('f')
- for step in lr_steps:
- step //= group_size
- lr_stage[step:] *= lr_gamma
-
- lr_base = lr_stage.copy()
- lr_base = lr_base / 4
-
- lr_vgg = lr_base.copy()
- vgg_freeze_step = 2000
- lr_vgg[:vgg_freeze_step] = 0
- return lr_stage, lr_base, lr_vgg
-
- # zhang add
- def adjust_learning_rate(init_lr, lr_gamma, steps_per_epoch, max_epoch_train, stepvalues):
- lr_stage = np.array([init_lr] * steps_per_epoch * max_epoch_train).astype('f')
- for epoch in stepvalues:
- lr_stage[epoch * steps_per_epoch:] *= lr_gamma
-
- lr_base = lr_stage.copy()
- lr_base = lr_base / 4
-
- lr_vgg = lr_base.copy()
- vgg_freeze_step = 2000
- lr_vgg[:vgg_freeze_step] = 0
- return lr_stage, lr_base, lr_vgg
-
-
- def load_model(test_net, model_path):
- if model_path:
- param_dict = load_checkpoint(model_path)
- # print(type(param_dict))
- param_dict_new = {}
- for key, values in param_dict.items():
- # print('key:', key)
- if key.startswith('moment'):
- continue
- elif key.startswith('network.'):
- param_dict_new[key[8:]] = values
-
- # else:
- # param_dict_new[key] = values
- load_param_into_net(test_net, param_dict_new)
-
-
- class show_loss_list():
- def __init__(self, name):
- self.loss_list = np.zeros(6).astype('f')
- self.sums = 0
- self.name = name
-
- def add(self, list_of_tensor):
- self.sums += 1
- for i, loss_tensor in enumerate(list_of_tensor):
- self.loss_list[i] += loss_tensor.asnumpy()
-
- def show(self):
- print(self.name + ' stage_loss:', self.loss_list / (self.sums + 1e-8), flush=True)
- self.loss_list = np.zeros(6).astype('f')
- self.sums = 0
-
-
- class AverageMeter():
- def __init__(self):
- self.loss = 0
- self.sum = 0
-
- def add(self, tensor):
- self.sum += 1
- self.loss += tensor.asnumpy()
-
- def meter(self):
- avergeLoss = self.loss / (self.sum + 1e-8)
- self.loss = 0
- self.sum = 0
- return avergeLoss
|