diff --git a/model_zoo/research/nlp/gpt2/convert_tf_ckpt/trans_dict.py b/model_zoo/research/nlp/gpt2/convert_tf_ckpt/trans_dict.py index 554ceb9be3..fb131d52ff 100644 --- a/model_zoo/research/nlp/gpt2/convert_tf_ckpt/trans_dict.py +++ b/model_zoo/research/nlp/gpt2/convert_tf_ckpt/trans_dict.py @@ -446,8 +446,8 @@ trans_dict_tf = { 'h35.mlp.c_fc.w': 'gpt2_decoder.layers.35.feedforward.c_fc.weight', 'h35.mlp.c_proj.b': 'gpt2_decoder.layers.35.feedforward.c_proj.bias', 'h35.mlp.c_proj.w': 'gpt2_decoder.layers.35.feedforward.c_proj.weight', - 'ln_f.b': 'layer_norm.layer_norm.gamma', - 'ln_f.g': 'layer_norm.layer_norm.beta', + 'ln_f.b': 'layer_norm.layer_norm.beta', + 'ln_f.g': 'layer_norm.layer_norm.gamma', 'wpe': 'gpt2_embedding_postprocess.position_embedding_table', 'wte': 'gpt2_embedding_lookup.embedding_table' } # transfer dictionary diff --git a/model_zoo/research/nlp/gpt2/create_cbt_data.py b/model_zoo/research/nlp/gpt2/create_cbt_data.py index ee0a629329..fc31344a84 100644 --- a/model_zoo/research/nlp/gpt2/create_cbt_data.py +++ b/model_zoo/research/nlp/gpt2/create_cbt_data.py @@ -89,7 +89,7 @@ def main(): parser.add_argument("--output_file", type=str, required=True, default="", help='Output MindRecord file. ') parser.add_argument("--num_splits", type=int, default=1, help='The MindRecord file will be split into the number of partition. ') - parser.add_argument("--max_seq_length", type=int, required=True, help='Maximum sequence length. ') + parser.add_argument("--max_length", type=int, required=True, help='Maximum sequence length. ') parser.add_argument("--num_choice", type=int, required=True, help='Number of choices. ') parser.add_argument("--vocab_file", type=str, required=True, default='', help='url of gpt2-vocab.json ') parser.add_argument("--merge_file", type=str, required=True, default='', help='url of gpt2-merges.txt ') @@ -127,7 +127,7 @@ def main(): if total_read % 500 == 0: logging.info("%d ...", total_read) - output = create_instance(tokenizer, line, args.max_seq_length, num_choice) + output = create_instance(tokenizer, line, args.max_length, num_choice) features = write_instance_to_file(writer, instance=output) total_written += 1 diff --git a/model_zoo/research/nlp/gpt2/create_lambada_data.py b/model_zoo/research/nlp/gpt2/create_lambada_data.py index 17141a3565..0ed368bcff 100644 --- a/model_zoo/research/nlp/gpt2/create_lambada_data.py +++ b/model_zoo/research/nlp/gpt2/create_lambada_data.py @@ -84,7 +84,7 @@ def main(): parser.add_argument("--output_file", type=str, required=True, help='Output MindRecord file. ') parser.add_argument("--num_splits", type=int, default=1, help='The MindRecord file will be split into the number of partition. ') - parser.add_argument("--max_seq_length", type=int, required=True, help='Maximum sequence length. ') + parser.add_argument("--max_length", type=int, required=True, help='Maximum sequence length. ') parser.add_argument("--vocab_file", type=str, required=True, default='', help='url of gpt2-vocab.json ') parser.add_argument("--merge_file", type=str, required=True, default='', help='url of gpt2-merges.txt ') args = parser.parse_args() @@ -119,7 +119,7 @@ def main(): if total_read % 500 == 0: logging.info("%d ...", total_read) - output = create_instance(tokenizer, line, args.max_seq_length) + output = create_instance(tokenizer, line, args.max_length) features = write_instance_to_file(writer, instance=output) total_written += 1 diff --git a/model_zoo/research/nlp/gpt2/create_lm_data.py b/model_zoo/research/nlp/gpt2/create_lm_data.py index 8798f5d5fb..b65123b99a 100644 --- a/model_zoo/research/nlp/gpt2/create_lm_data.py +++ b/model_zoo/research/nlp/gpt2/create_lm_data.py @@ -70,7 +70,7 @@ def main(): parser.add_argument("--output_file", type=str, required=True, help='Output MindRecord file. ') parser.add_argument("--num_splits", type=int, default=1, help='The MindRecord file will be split into the number of partition. ') - parser.add_argument("--max_seq_length", type=int, required=True, help='Maximum sequence length. ') + parser.add_argument("--max_length", type=int, required=True, help='Maximum sequence length. ') parser.add_argument("--vocab_file", type=str, required=True, default='', help='url of gpt2-vocab.json ') parser.add_argument("--merge_file", type=str, required=True, default='', help='url of gpt2-merges.txt ') args = parser.parse_args() @@ -105,7 +105,7 @@ def main(): if total_read % 500 == 0: logging.info("%d ...", total_read) - output = create_instance(tokenizer, line, args.max_seq_length) + output = create_instance(tokenizer, line, args.max_length) features = write_instance_to_file(writer, instance=output) total_written += 1 diff --git a/model_zoo/research/nlp/gpt2/create_summary_data.py b/model_zoo/research/nlp/gpt2/create_summary_data.py index 92b1494e8f..3d0cb80e3d 100644 --- a/model_zoo/research/nlp/gpt2/create_summary_data.py +++ b/model_zoo/research/nlp/gpt2/create_summary_data.py @@ -74,7 +74,7 @@ def main(): parser.add_argument("--output_file", type=str, required=True, help='Output MindRecord file.') parser.add_argument("--num_splits", type=int, default=1, help='The MindRecord file will be split into the number of partition. ') - parser.add_argument("--max_seq_length", type=int, required=True, help='Maximum sequence length.') + parser.add_argument("--max_length", type=int, required=True, help='Maximum sequence length.') parser.add_argument("--vocab_file", type=str, required=True, default='', help='url of gpt2-vocab.json ') parser.add_argument("--merge_file", type=str, required=True, default='', help='url of gpt2-merges.txt ') parser.add_argument("--mode", type=str, required=True, default='cnn_dailymail', help='mode of dataset creation') @@ -109,7 +109,7 @@ def main(): if total_read % 500 == 0: logging.info("%d ...", total_read) - output = create_instance(tokenizer, line, args.max_seq_length) + output = create_instance(tokenizer, line, args.max_length) features = write_instance_to_file(writer, instance=output) total_written += 1 diff --git a/model_zoo/research/nlp/gpt2/run_CBT_task.py b/model_zoo/research/nlp/gpt2/run_CBT_task.py index 447a7a41ca..512ec3de25 100644 --- a/model_zoo/research/nlp/gpt2/run_CBT_task.py +++ b/model_zoo/research/nlp/gpt2/run_CBT_task.py @@ -32,6 +32,7 @@ from src.utils.metric_method import Accuracy from src.dataset import create_cbt_dataset, create_language_model_dataset from src.utils.lr_schedule import GPT2LearningRate from src.utils.task_utils import calculate_choice_prob_for_cbt +from src.utils.get_config_setting import get_train_setting, get_model_setting def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path="", epoch_num=1): @@ -248,6 +249,8 @@ def run_cbt_task(): use_one_hot_embeddings=False) if args_opt.do_train.lower() == "true": + get_train_setting(cfg) + get_model_setting(cfg, gpt2_net_cfg) print("============== Start Loading Train Dataset ============") print(" | Train Dataset: {}".format(args_opt.train_data_file_path)) print(" | Checkpoint: {}".format(args_opt.load_pretrain_ckpt_path)) @@ -256,6 +259,7 @@ def run_cbt_task(): do_train(train_dataset, gpt2_loss, load_pretrain_ckpt_path, save_finetune_ckpt_path, epoch_num) if args_opt.do_eval.lower() == "true": + get_model_setting(cfg, gpt2_net_cfg) print("============== Start Loading Evaluation Dataset ============") print(" | Eval Dataset: {}".format(args_opt.eval_data_file_path)) print(" | Checkpoint: {}".format(args_opt.load_finetune_ckpt_path)) diff --git a/model_zoo/research/nlp/gpt2/run_ReadComprehension.py b/model_zoo/research/nlp/gpt2/run_ReadComprehension.py index 207b7af9ee..c2ccc38932 100644 --- a/model_zoo/research/nlp/gpt2/run_ReadComprehension.py +++ b/model_zoo/research/nlp/gpt2/run_ReadComprehension.py @@ -33,6 +33,7 @@ from src.dataset import create_language_model_dataset from src.utils.lr_schedule import GPT2LearningRate from src.utils.tokenization import Tokenizer from src.GPT2_generation import GenerateForReadComprehension +from src.utils.get_config_setting import get_train_setting, get_model_setting def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path="", epoch_num=1): @@ -121,7 +122,7 @@ def do_eval(dataset=None, network=None, metric=None, load_checkpoint_path="", ev if load_checkpoint_path == "": raise ValueError("Finetune model missed, evaluation task must load finetune model!") if metric.lower() == "f1": - print("Prepare to calculate the BLEU score ...") + print("Prepare to calculate the F1 score ...") gpt2_rc = network(config=gpt2_net_cfg, is_training=False, @@ -269,6 +270,8 @@ def run_Readcomprehension(): use_one_hot_embeddings=False) if args_opt.do_train.lower() == "true": + get_train_setting(cfg) + get_model_setting(cfg, gpt2_net_cfg) print("============== Start Loading Translation Train Dataset ==============") print(" | Train Dataset: {}".format(args_opt.train_data_file_path)) print(" | Checkpoint: {}".format(args_opt.load_pretrain_ckpt_path)) @@ -277,6 +280,7 @@ def run_Readcomprehension(): do_train(train_dataset, gpt2_loss, load_pretrain_ckpt_path, save_finetune_ckpt_path, epoch_num) if args_opt.do_eval.lower() == "true": + get_model_setting(cfg, gpt2_net_cfg) print("============ Start Loading Translation Evaluation Dataset ============") print(" | Eval Dataset: {}".format(args_opt.eval_data_file_path)) print(" | Checkpoint: {}".format(args_opt.load_finetune_ckpt_path)) diff --git a/model_zoo/research/nlp/gpt2/run_summarization.py b/model_zoo/research/nlp/gpt2/run_summarization.py index 21b8241ef8..245e8bb476 100644 --- a/model_zoo/research/nlp/gpt2/run_summarization.py +++ b/model_zoo/research/nlp/gpt2/run_summarization.py @@ -36,6 +36,7 @@ from src.utils.lr_schedule import GPT2LearningRate from src.utils.tokenization import Tokenizer from src.utils.task_utils import clean_hypo, modify_paramdict from src.GPT2_generation import GenerateForSummarization +from src.utils.get_config_setting import get_train_setting, get_model_setting def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path="", epoch_num=1): @@ -272,6 +273,8 @@ def run_summarization(): raise Exception("Device target error, Ascend is supported.") if args_opt.do_train.lower() == "true": + get_train_setting(cfg) + get_model_setting(cfg, gpt2_net_cfg) train_data_file_path = args_opt.train_data_file_path gpt2_loss = GPT2Summarization(config=gpt2_net_cfg, is_training=True, @@ -282,6 +285,7 @@ def run_summarization(): do_train(train_dataset, gpt2_loss, load_pretrain_ckpt_path, save_finetune_ckpt_path, epoch_num) if args_opt.do_eval.lower() == "true": + get_model_setting(cfg, gpt2_net_cfg) eval_dataset_file_path = args_opt.eval_data_file_path print("============== Start Loading Evaluation Dataset ============") eval_dataset = create_language_model_dataset(do_shuffle=(args_opt.train_data_shuffle.lower() == "true"), diff --git a/model_zoo/research/nlp/gpt2/run_translation.py b/model_zoo/research/nlp/gpt2/run_translation.py index be2c878052..76dfffec5a 100644 --- a/model_zoo/research/nlp/gpt2/run_translation.py +++ b/model_zoo/research/nlp/gpt2/run_translation.py @@ -33,6 +33,7 @@ from src.utils.lr_schedule import GPT2LearningRate from src.utils.tokenization import Tokenizer from src.utils.metric_method import BLEU from src.GPT2_generation import GenerateForTranslation +from src.utils.get_config_setting import get_train_setting, get_model_setting def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path="", epoch_num=1): @@ -274,6 +275,8 @@ def run_translation(): use_one_hot_embeddings=False) if args_opt.do_train.lower() == "true": + get_train_setting(cfg) + get_model_setting(cfg, gpt2_net_cfg) print("============== Start Loading Translation Train Dataset ==============") print(" | Train Dataset: {}".format(args_opt.train_data_file_path)) print(" | Checkpoint: {}".format(args_opt.load_pretrain_ckpt_path)) @@ -282,6 +285,7 @@ def run_translation(): do_train(train_dataset, gpt2_loss, load_pretrain_ckpt_path, save_finetune_ckpt_path, epoch_num) if args_opt.do_eval.lower() == "true": + get_model_setting(cfg, gpt2_net_cfg) print("============ Start Loading Translation Evaluation Dataset ============") print(" | Eval Dataset: {}".format(args_opt.eval_data_file_path)) print(" | Checkpoint: {}".format(args_opt.load_finetune_ckpt_path)) diff --git a/model_zoo/research/nlp/gpt2/src/utils/tensor_manipulations.py b/model_zoo/research/nlp/gpt2/src/utils/tensor_manipulations.py index caf69f5c64..94fd0989db 100644 --- a/model_zoo/research/nlp/gpt2/src/utils/tensor_manipulations.py +++ b/model_zoo/research/nlp/gpt2/src/utils/tensor_manipulations.py @@ -56,7 +56,7 @@ def extract_string_from_tensor(input_ids, mode="single", config=None, tokenizer= for batch_idx in range(batch_size): sentence_tensor = input_ids_np[batch_idx] - sentence_list = sentence_tensor.asnumpy().tolist()[1:] + sentence_list = sentence_tensor.tolist()[1:] sentence = tokenizer.decode(sentence_list) prompt_start = 0 @@ -73,7 +73,7 @@ def extract_string_from_tensor(input_ids, mode="single", config=None, tokenizer= if mode == "single": for batch_idx in range(batch_size): sentence_tensor = input_ids_np[batch_idx] - sentence_list = sentence_tensor.asnumpy().tolist()[1:] + sentence_list = sentence_tensor.tolist()[1:] sentence = tokenizer.decode(sentence_list) prompt_start = 0