| @@ -317,8 +317,9 @@ You can train your own model based on either pretrained classification model or | |||||
| 1. Convert your own dataset to COCO or VOC style. Otherwise you have to add your own data preprocess code. | 1. Convert your own dataset to COCO or VOC style. Otherwise you have to add your own data preprocess code. | ||||
| 2. Change config.py according to your own dataset, especially the `num_classes`. | 2. Change config.py according to your own dataset, especially the `num_classes`. | ||||
| 3. Set argument `filter_weight` to `True` while calling `train.py`, this will filter the final detection box weight from the pretrained model. | |||||
| 4. Build your own bash scripts using new config and arguments for further convenient. | |||||
| 3. Prepare a pretrained checkpoint. You can load the pretrained checkpoint by `pre_trained` argument. Transfer training means a new training job, so just keep `pre_trained_epoch_size` same as default value `0`. | |||||
| 4. Set argument `filter_weight` to `True` while calling `train.py`, this will filter the final detection box weight from the pretrained model. | |||||
| 5. Build your own bash scripts using new config and arguments for further convenient. | |||||
| ### [Evaluation Process](#contents) | ### [Evaluation Process](#contents) | ||||
| @@ -599,7 +599,7 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell): | |||||
| scaling_sens = sens | scaling_sens = sens | ||||
| # alloc status and clear should be right before gradoperation | # alloc status and clear should be right before gradoperation | ||||
| init = self.alloc_status() | init = self.alloc_status() | ||||
| init = F.depend(loss, init) | |||||
| init = F.depend(init, loss) | |||||
| clear_status = self.clear_status(init) | clear_status = self.clear_status(init) | ||||
| scaling_sens = F.depend(scaling_sens, clear_status) | scaling_sens = F.depend(scaling_sens, clear_status) | ||||
| # update accumulation parameters | # update accumulation parameters | ||||
| @@ -804,7 +804,8 @@ class BertModel(nn.Cell): | |||||
| self.bert_embedding_lookup = nn.Embedding( | self.bert_embedding_lookup = nn.Embedding( | ||||
| vocab_size=config.vocab_size, | vocab_size=config.vocab_size, | ||||
| embedding_size=self.embedding_size, | embedding_size=self.embedding_size, | ||||
| use_one_hot=use_one_hot_embeddings) | |||||
| use_one_hot=use_one_hot_embeddings, | |||||
| embedding_table=TruncatedNormal(config.initializer_range)) | |||||
| self.bert_embedding_postprocessor = EmbeddingPostprocessor( | self.bert_embedding_postprocessor = EmbeddingPostprocessor( | ||||
| embedding_size=self.embedding_size, | embedding_size=self.embedding_size, | ||||
| @@ -36,9 +36,9 @@ cfg = edict({ | |||||
| 'warmup_steps': 10000, | 'warmup_steps': 10000, | ||||
| }), | }), | ||||
| 'Lamb': edict({ | 'Lamb': edict({ | ||||
| 'learning_rate': 3e-5, | |||||
| 'learning_rate': 3e-4, | |||||
| 'end_learning_rate': 0.0, | 'end_learning_rate': 0.0, | ||||
| 'power': 5.0, | |||||
| 'power': 2.0, | |||||
| 'warmup_steps': 10000, | 'warmup_steps': 10000, | ||||
| 'weight_decay': 0.01, | 'weight_decay': 0.01, | ||||
| 'decay_filter': lambda x: 'layernorm' not in x.name.lower() and 'bias' not in x.name.lower(), | 'decay_filter': lambda x: 'layernorm' not in x.name.lower() and 'bias' not in x.name.lower(), | ||||