| @@ -446,8 +446,8 @@ trans_dict_tf = { | |||
| 'h35.mlp.c_fc.w': 'gpt2_decoder.layers.35.feedforward.c_fc.weight', | |||
| 'h35.mlp.c_proj.b': 'gpt2_decoder.layers.35.feedforward.c_proj.bias', | |||
| 'h35.mlp.c_proj.w': 'gpt2_decoder.layers.35.feedforward.c_proj.weight', | |||
| 'ln_f.b': 'layer_norm.layer_norm.gamma', | |||
| 'ln_f.g': 'layer_norm.layer_norm.beta', | |||
| 'ln_f.b': 'layer_norm.layer_norm.beta', | |||
| 'ln_f.g': 'layer_norm.layer_norm.gamma', | |||
| 'wpe': 'gpt2_embedding_postprocess.position_embedding_table', | |||
| 'wte': 'gpt2_embedding_lookup.embedding_table' | |||
| } # transfer dictionary | |||
| @@ -89,7 +89,7 @@ def main(): | |||
| parser.add_argument("--output_file", type=str, required=True, default="", help='Output MindRecord file. ') | |||
| parser.add_argument("--num_splits", type=int, default=1, | |||
| help='The MindRecord file will be split into the number of partition. ') | |||
| parser.add_argument("--max_seq_length", type=int, required=True, help='Maximum sequence length. ') | |||
| parser.add_argument("--max_length", type=int, required=True, help='Maximum sequence length. ') | |||
| parser.add_argument("--num_choice", type=int, required=True, help='Number of choices. ') | |||
| parser.add_argument("--vocab_file", type=str, required=True, default='', help='url of gpt2-vocab.json ') | |||
| parser.add_argument("--merge_file", type=str, required=True, default='', help='url of gpt2-merges.txt ') | |||
| @@ -127,7 +127,7 @@ def main(): | |||
| if total_read % 500 == 0: | |||
| logging.info("%d ...", total_read) | |||
| output = create_instance(tokenizer, line, args.max_seq_length, num_choice) | |||
| output = create_instance(tokenizer, line, args.max_length, num_choice) | |||
| features = write_instance_to_file(writer, instance=output) | |||
| total_written += 1 | |||
| @@ -84,7 +84,7 @@ def main(): | |||
| parser.add_argument("--output_file", type=str, required=True, help='Output MindRecord file. ') | |||
| parser.add_argument("--num_splits", type=int, default=1, | |||
| help='The MindRecord file will be split into the number of partition. ') | |||
| parser.add_argument("--max_seq_length", type=int, required=True, help='Maximum sequence length. ') | |||
| parser.add_argument("--max_length", type=int, required=True, help='Maximum sequence length. ') | |||
| parser.add_argument("--vocab_file", type=str, required=True, default='', help='url of gpt2-vocab.json ') | |||
| parser.add_argument("--merge_file", type=str, required=True, default='', help='url of gpt2-merges.txt ') | |||
| args = parser.parse_args() | |||
| @@ -119,7 +119,7 @@ def main(): | |||
| if total_read % 500 == 0: | |||
| logging.info("%d ...", total_read) | |||
| output = create_instance(tokenizer, line, args.max_seq_length) | |||
| output = create_instance(tokenizer, line, args.max_length) | |||
| features = write_instance_to_file(writer, instance=output) | |||
| total_written += 1 | |||
| @@ -70,7 +70,7 @@ def main(): | |||
| parser.add_argument("--output_file", type=str, required=True, help='Output MindRecord file. ') | |||
| parser.add_argument("--num_splits", type=int, default=1, | |||
| help='The MindRecord file will be split into the number of partition. ') | |||
| parser.add_argument("--max_seq_length", type=int, required=True, help='Maximum sequence length. ') | |||
| parser.add_argument("--max_length", type=int, required=True, help='Maximum sequence length. ') | |||
| parser.add_argument("--vocab_file", type=str, required=True, default='', help='url of gpt2-vocab.json ') | |||
| parser.add_argument("--merge_file", type=str, required=True, default='', help='url of gpt2-merges.txt ') | |||
| args = parser.parse_args() | |||
| @@ -105,7 +105,7 @@ def main(): | |||
| if total_read % 500 == 0: | |||
| logging.info("%d ...", total_read) | |||
| output = create_instance(tokenizer, line, args.max_seq_length) | |||
| output = create_instance(tokenizer, line, args.max_length) | |||
| features = write_instance_to_file(writer, instance=output) | |||
| total_written += 1 | |||
| @@ -74,7 +74,7 @@ def main(): | |||
| parser.add_argument("--output_file", type=str, required=True, help='Output MindRecord file.') | |||
| parser.add_argument("--num_splits", type=int, default=1, | |||
| help='The MindRecord file will be split into the number of partition. ') | |||
| parser.add_argument("--max_seq_length", type=int, required=True, help='Maximum sequence length.') | |||
| parser.add_argument("--max_length", type=int, required=True, help='Maximum sequence length.') | |||
| parser.add_argument("--vocab_file", type=str, required=True, default='', help='url of gpt2-vocab.json ') | |||
| parser.add_argument("--merge_file", type=str, required=True, default='', help='url of gpt2-merges.txt ') | |||
| parser.add_argument("--mode", type=str, required=True, default='cnn_dailymail', help='mode of dataset creation') | |||
| @@ -109,7 +109,7 @@ def main(): | |||
| if total_read % 500 == 0: | |||
| logging.info("%d ...", total_read) | |||
| output = create_instance(tokenizer, line, args.max_seq_length) | |||
| output = create_instance(tokenizer, line, args.max_length) | |||
| features = write_instance_to_file(writer, instance=output) | |||
| total_written += 1 | |||
| @@ -32,6 +32,7 @@ from src.utils.metric_method import Accuracy | |||
| from src.dataset import create_cbt_dataset, create_language_model_dataset | |||
| from src.utils.lr_schedule import GPT2LearningRate | |||
| from src.utils.task_utils import calculate_choice_prob_for_cbt | |||
| from src.utils.get_config_setting import get_train_setting, get_model_setting | |||
| def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path="", epoch_num=1): | |||
| @@ -248,6 +249,8 @@ def run_cbt_task(): | |||
| use_one_hot_embeddings=False) | |||
| if args_opt.do_train.lower() == "true": | |||
| get_train_setting(cfg) | |||
| get_model_setting(cfg, gpt2_net_cfg) | |||
| print("============== Start Loading Train Dataset ============") | |||
| print(" | Train Dataset: {}".format(args_opt.train_data_file_path)) | |||
| print(" | Checkpoint: {}".format(args_opt.load_pretrain_ckpt_path)) | |||
| @@ -256,6 +259,7 @@ def run_cbt_task(): | |||
| do_train(train_dataset, gpt2_loss, load_pretrain_ckpt_path, save_finetune_ckpt_path, epoch_num) | |||
| if args_opt.do_eval.lower() == "true": | |||
| get_model_setting(cfg, gpt2_net_cfg) | |||
| print("============== Start Loading Evaluation Dataset ============") | |||
| print(" | Eval Dataset: {}".format(args_opt.eval_data_file_path)) | |||
| print(" | Checkpoint: {}".format(args_opt.load_finetune_ckpt_path)) | |||
| @@ -33,6 +33,7 @@ from src.dataset import create_language_model_dataset | |||
| from src.utils.lr_schedule import GPT2LearningRate | |||
| from src.utils.tokenization import Tokenizer | |||
| from src.GPT2_generation import GenerateForReadComprehension | |||
| from src.utils.get_config_setting import get_train_setting, get_model_setting | |||
| def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path="", epoch_num=1): | |||
| @@ -121,7 +122,7 @@ def do_eval(dataset=None, network=None, metric=None, load_checkpoint_path="", ev | |||
| if load_checkpoint_path == "": | |||
| raise ValueError("Finetune model missed, evaluation task must load finetune model!") | |||
| if metric.lower() == "f1": | |||
| print("Prepare to calculate the BLEU score ...") | |||
| print("Prepare to calculate the F1 score ...") | |||
| gpt2_rc = network(config=gpt2_net_cfg, | |||
| is_training=False, | |||
| @@ -269,6 +270,8 @@ def run_Readcomprehension(): | |||
| use_one_hot_embeddings=False) | |||
| if args_opt.do_train.lower() == "true": | |||
| get_train_setting(cfg) | |||
| get_model_setting(cfg, gpt2_net_cfg) | |||
| print("============== Start Loading Translation Train Dataset ==============") | |||
| print(" | Train Dataset: {}".format(args_opt.train_data_file_path)) | |||
| print(" | Checkpoint: {}".format(args_opt.load_pretrain_ckpt_path)) | |||
| @@ -277,6 +280,7 @@ def run_Readcomprehension(): | |||
| do_train(train_dataset, gpt2_loss, load_pretrain_ckpt_path, save_finetune_ckpt_path, epoch_num) | |||
| if args_opt.do_eval.lower() == "true": | |||
| get_model_setting(cfg, gpt2_net_cfg) | |||
| print("============ Start Loading Translation Evaluation Dataset ============") | |||
| print(" | Eval Dataset: {}".format(args_opt.eval_data_file_path)) | |||
| print(" | Checkpoint: {}".format(args_opt.load_finetune_ckpt_path)) | |||
| @@ -36,6 +36,7 @@ from src.utils.lr_schedule import GPT2LearningRate | |||
| from src.utils.tokenization import Tokenizer | |||
| from src.utils.task_utils import clean_hypo, modify_paramdict | |||
| from src.GPT2_generation import GenerateForSummarization | |||
| from src.utils.get_config_setting import get_train_setting, get_model_setting | |||
| def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path="", epoch_num=1): | |||
| @@ -272,6 +273,8 @@ def run_summarization(): | |||
| raise Exception("Device target error, Ascend is supported.") | |||
| if args_opt.do_train.lower() == "true": | |||
| get_train_setting(cfg) | |||
| get_model_setting(cfg, gpt2_net_cfg) | |||
| train_data_file_path = args_opt.train_data_file_path | |||
| gpt2_loss = GPT2Summarization(config=gpt2_net_cfg, | |||
| is_training=True, | |||
| @@ -282,6 +285,7 @@ def run_summarization(): | |||
| do_train(train_dataset, gpt2_loss, load_pretrain_ckpt_path, save_finetune_ckpt_path, epoch_num) | |||
| if args_opt.do_eval.lower() == "true": | |||
| get_model_setting(cfg, gpt2_net_cfg) | |||
| eval_dataset_file_path = args_opt.eval_data_file_path | |||
| print("============== Start Loading Evaluation Dataset ============") | |||
| eval_dataset = create_language_model_dataset(do_shuffle=(args_opt.train_data_shuffle.lower() == "true"), | |||
| @@ -33,6 +33,7 @@ from src.utils.lr_schedule import GPT2LearningRate | |||
| from src.utils.tokenization import Tokenizer | |||
| from src.utils.metric_method import BLEU | |||
| from src.GPT2_generation import GenerateForTranslation | |||
| from src.utils.get_config_setting import get_train_setting, get_model_setting | |||
| def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path="", epoch_num=1): | |||
| @@ -274,6 +275,8 @@ def run_translation(): | |||
| use_one_hot_embeddings=False) | |||
| if args_opt.do_train.lower() == "true": | |||
| get_train_setting(cfg) | |||
| get_model_setting(cfg, gpt2_net_cfg) | |||
| print("============== Start Loading Translation Train Dataset ==============") | |||
| print(" | Train Dataset: {}".format(args_opt.train_data_file_path)) | |||
| print(" | Checkpoint: {}".format(args_opt.load_pretrain_ckpt_path)) | |||
| @@ -282,6 +285,7 @@ def run_translation(): | |||
| do_train(train_dataset, gpt2_loss, load_pretrain_ckpt_path, save_finetune_ckpt_path, epoch_num) | |||
| if args_opt.do_eval.lower() == "true": | |||
| get_model_setting(cfg, gpt2_net_cfg) | |||
| print("============ Start Loading Translation Evaluation Dataset ============") | |||
| print(" | Eval Dataset: {}".format(args_opt.eval_data_file_path)) | |||
| print(" | Checkpoint: {}".format(args_opt.load_finetune_ckpt_path)) | |||
| @@ -56,7 +56,7 @@ def extract_string_from_tensor(input_ids, mode="single", config=None, tokenizer= | |||
| for batch_idx in range(batch_size): | |||
| sentence_tensor = input_ids_np[batch_idx] | |||
| sentence_list = sentence_tensor.asnumpy().tolist()[1:] | |||
| sentence_list = sentence_tensor.tolist()[1:] | |||
| sentence = tokenizer.decode(sentence_list) | |||
| prompt_start = 0 | |||
| @@ -73,7 +73,7 @@ def extract_string_from_tensor(input_ids, mode="single", config=None, tokenizer= | |||
| if mode == "single": | |||
| for batch_idx in range(batch_size): | |||
| sentence_tensor = input_ids_np[batch_idx] | |||
| sentence_list = sentence_tensor.asnumpy().tolist()[1:] | |||
| sentence_list = sentence_tensor.tolist()[1:] | |||
| sentence = tokenizer.decode(sentence_list) | |||
| prompt_start = 0 | |||