| @@ -446,8 +446,8 @@ trans_dict_tf = { | |||||
| 'h35.mlp.c_fc.w': 'gpt2_decoder.layers.35.feedforward.c_fc.weight', | 'h35.mlp.c_fc.w': 'gpt2_decoder.layers.35.feedforward.c_fc.weight', | ||||
| 'h35.mlp.c_proj.b': 'gpt2_decoder.layers.35.feedforward.c_proj.bias', | 'h35.mlp.c_proj.b': 'gpt2_decoder.layers.35.feedforward.c_proj.bias', | ||||
| 'h35.mlp.c_proj.w': 'gpt2_decoder.layers.35.feedforward.c_proj.weight', | 'h35.mlp.c_proj.w': 'gpt2_decoder.layers.35.feedforward.c_proj.weight', | ||||
| 'ln_f.b': 'layer_norm.layer_norm.gamma', | |||||
| 'ln_f.g': 'layer_norm.layer_norm.beta', | |||||
| 'ln_f.b': 'layer_norm.layer_norm.beta', | |||||
| 'ln_f.g': 'layer_norm.layer_norm.gamma', | |||||
| 'wpe': 'gpt2_embedding_postprocess.position_embedding_table', | 'wpe': 'gpt2_embedding_postprocess.position_embedding_table', | ||||
| 'wte': 'gpt2_embedding_lookup.embedding_table' | 'wte': 'gpt2_embedding_lookup.embedding_table' | ||||
| } # transfer dictionary | } # transfer dictionary | ||||
| @@ -89,7 +89,7 @@ def main(): | |||||
| parser.add_argument("--output_file", type=str, required=True, default="", help='Output MindRecord file. ') | parser.add_argument("--output_file", type=str, required=True, default="", help='Output MindRecord file. ') | ||||
| parser.add_argument("--num_splits", type=int, default=1, | parser.add_argument("--num_splits", type=int, default=1, | ||||
| help='The MindRecord file will be split into the number of partition. ') | help='The MindRecord file will be split into the number of partition. ') | ||||
| parser.add_argument("--max_seq_length", type=int, required=True, help='Maximum sequence length. ') | |||||
| parser.add_argument("--max_length", type=int, required=True, help='Maximum sequence length. ') | |||||
| parser.add_argument("--num_choice", type=int, required=True, help='Number of choices. ') | parser.add_argument("--num_choice", type=int, required=True, help='Number of choices. ') | ||||
| parser.add_argument("--vocab_file", type=str, required=True, default='', help='url of gpt2-vocab.json ') | parser.add_argument("--vocab_file", type=str, required=True, default='', help='url of gpt2-vocab.json ') | ||||
| parser.add_argument("--merge_file", type=str, required=True, default='', help='url of gpt2-merges.txt ') | parser.add_argument("--merge_file", type=str, required=True, default='', help='url of gpt2-merges.txt ') | ||||
| @@ -127,7 +127,7 @@ def main(): | |||||
| if total_read % 500 == 0: | if total_read % 500 == 0: | ||||
| logging.info("%d ...", total_read) | logging.info("%d ...", total_read) | ||||
| output = create_instance(tokenizer, line, args.max_seq_length, num_choice) | |||||
| output = create_instance(tokenizer, line, args.max_length, num_choice) | |||||
| features = write_instance_to_file(writer, instance=output) | features = write_instance_to_file(writer, instance=output) | ||||
| total_written += 1 | total_written += 1 | ||||
| @@ -84,7 +84,7 @@ def main(): | |||||
| parser.add_argument("--output_file", type=str, required=True, help='Output MindRecord file. ') | parser.add_argument("--output_file", type=str, required=True, help='Output MindRecord file. ') | ||||
| parser.add_argument("--num_splits", type=int, default=1, | parser.add_argument("--num_splits", type=int, default=1, | ||||
| help='The MindRecord file will be split into the number of partition. ') | help='The MindRecord file will be split into the number of partition. ') | ||||
| parser.add_argument("--max_seq_length", type=int, required=True, help='Maximum sequence length. ') | |||||
| parser.add_argument("--max_length", type=int, required=True, help='Maximum sequence length. ') | |||||
| parser.add_argument("--vocab_file", type=str, required=True, default='', help='url of gpt2-vocab.json ') | parser.add_argument("--vocab_file", type=str, required=True, default='', help='url of gpt2-vocab.json ') | ||||
| parser.add_argument("--merge_file", type=str, required=True, default='', help='url of gpt2-merges.txt ') | parser.add_argument("--merge_file", type=str, required=True, default='', help='url of gpt2-merges.txt ') | ||||
| args = parser.parse_args() | args = parser.parse_args() | ||||
| @@ -119,7 +119,7 @@ def main(): | |||||
| if total_read % 500 == 0: | if total_read % 500 == 0: | ||||
| logging.info("%d ...", total_read) | logging.info("%d ...", total_read) | ||||
| output = create_instance(tokenizer, line, args.max_seq_length) | |||||
| output = create_instance(tokenizer, line, args.max_length) | |||||
| features = write_instance_to_file(writer, instance=output) | features = write_instance_to_file(writer, instance=output) | ||||
| total_written += 1 | total_written += 1 | ||||
| @@ -70,7 +70,7 @@ def main(): | |||||
| parser.add_argument("--output_file", type=str, required=True, help='Output MindRecord file. ') | parser.add_argument("--output_file", type=str, required=True, help='Output MindRecord file. ') | ||||
| parser.add_argument("--num_splits", type=int, default=1, | parser.add_argument("--num_splits", type=int, default=1, | ||||
| help='The MindRecord file will be split into the number of partition. ') | help='The MindRecord file will be split into the number of partition. ') | ||||
| parser.add_argument("--max_seq_length", type=int, required=True, help='Maximum sequence length. ') | |||||
| parser.add_argument("--max_length", type=int, required=True, help='Maximum sequence length. ') | |||||
| parser.add_argument("--vocab_file", type=str, required=True, default='', help='url of gpt2-vocab.json ') | parser.add_argument("--vocab_file", type=str, required=True, default='', help='url of gpt2-vocab.json ') | ||||
| parser.add_argument("--merge_file", type=str, required=True, default='', help='url of gpt2-merges.txt ') | parser.add_argument("--merge_file", type=str, required=True, default='', help='url of gpt2-merges.txt ') | ||||
| args = parser.parse_args() | args = parser.parse_args() | ||||
| @@ -105,7 +105,7 @@ def main(): | |||||
| if total_read % 500 == 0: | if total_read % 500 == 0: | ||||
| logging.info("%d ...", total_read) | logging.info("%d ...", total_read) | ||||
| output = create_instance(tokenizer, line, args.max_seq_length) | |||||
| output = create_instance(tokenizer, line, args.max_length) | |||||
| features = write_instance_to_file(writer, instance=output) | features = write_instance_to_file(writer, instance=output) | ||||
| total_written += 1 | total_written += 1 | ||||
| @@ -74,7 +74,7 @@ def main(): | |||||
| parser.add_argument("--output_file", type=str, required=True, help='Output MindRecord file.') | parser.add_argument("--output_file", type=str, required=True, help='Output MindRecord file.') | ||||
| parser.add_argument("--num_splits", type=int, default=1, | parser.add_argument("--num_splits", type=int, default=1, | ||||
| help='The MindRecord file will be split into the number of partition. ') | help='The MindRecord file will be split into the number of partition. ') | ||||
| parser.add_argument("--max_seq_length", type=int, required=True, help='Maximum sequence length.') | |||||
| parser.add_argument("--max_length", type=int, required=True, help='Maximum sequence length.') | |||||
| parser.add_argument("--vocab_file", type=str, required=True, default='', help='url of gpt2-vocab.json ') | parser.add_argument("--vocab_file", type=str, required=True, default='', help='url of gpt2-vocab.json ') | ||||
| parser.add_argument("--merge_file", type=str, required=True, default='', help='url of gpt2-merges.txt ') | parser.add_argument("--merge_file", type=str, required=True, default='', help='url of gpt2-merges.txt ') | ||||
| parser.add_argument("--mode", type=str, required=True, default='cnn_dailymail', help='mode of dataset creation') | parser.add_argument("--mode", type=str, required=True, default='cnn_dailymail', help='mode of dataset creation') | ||||
| @@ -109,7 +109,7 @@ def main(): | |||||
| if total_read % 500 == 0: | if total_read % 500 == 0: | ||||
| logging.info("%d ...", total_read) | logging.info("%d ...", total_read) | ||||
| output = create_instance(tokenizer, line, args.max_seq_length) | |||||
| output = create_instance(tokenizer, line, args.max_length) | |||||
| features = write_instance_to_file(writer, instance=output) | features = write_instance_to_file(writer, instance=output) | ||||
| total_written += 1 | total_written += 1 | ||||
| @@ -32,6 +32,7 @@ from src.utils.metric_method import Accuracy | |||||
| from src.dataset import create_cbt_dataset, create_language_model_dataset | from src.dataset import create_cbt_dataset, create_language_model_dataset | ||||
| from src.utils.lr_schedule import GPT2LearningRate | from src.utils.lr_schedule import GPT2LearningRate | ||||
| from src.utils.task_utils import calculate_choice_prob_for_cbt | from src.utils.task_utils import calculate_choice_prob_for_cbt | ||||
| from src.utils.get_config_setting import get_train_setting, get_model_setting | |||||
| def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path="", epoch_num=1): | def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path="", epoch_num=1): | ||||
| @@ -248,6 +249,8 @@ def run_cbt_task(): | |||||
| use_one_hot_embeddings=False) | use_one_hot_embeddings=False) | ||||
| if args_opt.do_train.lower() == "true": | if args_opt.do_train.lower() == "true": | ||||
| get_train_setting(cfg) | |||||
| get_model_setting(cfg, gpt2_net_cfg) | |||||
| print("============== Start Loading Train Dataset ============") | print("============== Start Loading Train Dataset ============") | ||||
| print(" | Train Dataset: {}".format(args_opt.train_data_file_path)) | print(" | Train Dataset: {}".format(args_opt.train_data_file_path)) | ||||
| print(" | Checkpoint: {}".format(args_opt.load_pretrain_ckpt_path)) | print(" | Checkpoint: {}".format(args_opt.load_pretrain_ckpt_path)) | ||||
| @@ -256,6 +259,7 @@ def run_cbt_task(): | |||||
| do_train(train_dataset, gpt2_loss, load_pretrain_ckpt_path, save_finetune_ckpt_path, epoch_num) | do_train(train_dataset, gpt2_loss, load_pretrain_ckpt_path, save_finetune_ckpt_path, epoch_num) | ||||
| if args_opt.do_eval.lower() == "true": | if args_opt.do_eval.lower() == "true": | ||||
| get_model_setting(cfg, gpt2_net_cfg) | |||||
| print("============== Start Loading Evaluation Dataset ============") | print("============== Start Loading Evaluation Dataset ============") | ||||
| print(" | Eval Dataset: {}".format(args_opt.eval_data_file_path)) | print(" | Eval Dataset: {}".format(args_opt.eval_data_file_path)) | ||||
| print(" | Checkpoint: {}".format(args_opt.load_finetune_ckpt_path)) | print(" | Checkpoint: {}".format(args_opt.load_finetune_ckpt_path)) | ||||
| @@ -33,6 +33,7 @@ from src.dataset import create_language_model_dataset | |||||
| from src.utils.lr_schedule import GPT2LearningRate | from src.utils.lr_schedule import GPT2LearningRate | ||||
| from src.utils.tokenization import Tokenizer | from src.utils.tokenization import Tokenizer | ||||
| from src.GPT2_generation import GenerateForReadComprehension | from src.GPT2_generation import GenerateForReadComprehension | ||||
| from src.utils.get_config_setting import get_train_setting, get_model_setting | |||||
| def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path="", epoch_num=1): | def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path="", epoch_num=1): | ||||
| @@ -121,7 +122,7 @@ def do_eval(dataset=None, network=None, metric=None, load_checkpoint_path="", ev | |||||
| if load_checkpoint_path == "": | if load_checkpoint_path == "": | ||||
| raise ValueError("Finetune model missed, evaluation task must load finetune model!") | raise ValueError("Finetune model missed, evaluation task must load finetune model!") | ||||
| if metric.lower() == "f1": | if metric.lower() == "f1": | ||||
| print("Prepare to calculate the BLEU score ...") | |||||
| print("Prepare to calculate the F1 score ...") | |||||
| gpt2_rc = network(config=gpt2_net_cfg, | gpt2_rc = network(config=gpt2_net_cfg, | ||||
| is_training=False, | is_training=False, | ||||
| @@ -269,6 +270,8 @@ def run_Readcomprehension(): | |||||
| use_one_hot_embeddings=False) | use_one_hot_embeddings=False) | ||||
| if args_opt.do_train.lower() == "true": | if args_opt.do_train.lower() == "true": | ||||
| get_train_setting(cfg) | |||||
| get_model_setting(cfg, gpt2_net_cfg) | |||||
| print("============== Start Loading Translation Train Dataset ==============") | print("============== Start Loading Translation Train Dataset ==============") | ||||
| print(" | Train Dataset: {}".format(args_opt.train_data_file_path)) | print(" | Train Dataset: {}".format(args_opt.train_data_file_path)) | ||||
| print(" | Checkpoint: {}".format(args_opt.load_pretrain_ckpt_path)) | print(" | Checkpoint: {}".format(args_opt.load_pretrain_ckpt_path)) | ||||
| @@ -277,6 +280,7 @@ def run_Readcomprehension(): | |||||
| do_train(train_dataset, gpt2_loss, load_pretrain_ckpt_path, save_finetune_ckpt_path, epoch_num) | do_train(train_dataset, gpt2_loss, load_pretrain_ckpt_path, save_finetune_ckpt_path, epoch_num) | ||||
| if args_opt.do_eval.lower() == "true": | if args_opt.do_eval.lower() == "true": | ||||
| get_model_setting(cfg, gpt2_net_cfg) | |||||
| print("============ Start Loading Translation Evaluation Dataset ============") | print("============ Start Loading Translation Evaluation Dataset ============") | ||||
| print(" | Eval Dataset: {}".format(args_opt.eval_data_file_path)) | print(" | Eval Dataset: {}".format(args_opt.eval_data_file_path)) | ||||
| print(" | Checkpoint: {}".format(args_opt.load_finetune_ckpt_path)) | print(" | Checkpoint: {}".format(args_opt.load_finetune_ckpt_path)) | ||||
| @@ -36,6 +36,7 @@ from src.utils.lr_schedule import GPT2LearningRate | |||||
| from src.utils.tokenization import Tokenizer | from src.utils.tokenization import Tokenizer | ||||
| from src.utils.task_utils import clean_hypo, modify_paramdict | from src.utils.task_utils import clean_hypo, modify_paramdict | ||||
| from src.GPT2_generation import GenerateForSummarization | from src.GPT2_generation import GenerateForSummarization | ||||
| from src.utils.get_config_setting import get_train_setting, get_model_setting | |||||
| def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path="", epoch_num=1): | def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path="", epoch_num=1): | ||||
| @@ -272,6 +273,8 @@ def run_summarization(): | |||||
| raise Exception("Device target error, Ascend is supported.") | raise Exception("Device target error, Ascend is supported.") | ||||
| if args_opt.do_train.lower() == "true": | if args_opt.do_train.lower() == "true": | ||||
| get_train_setting(cfg) | |||||
| get_model_setting(cfg, gpt2_net_cfg) | |||||
| train_data_file_path = args_opt.train_data_file_path | train_data_file_path = args_opt.train_data_file_path | ||||
| gpt2_loss = GPT2Summarization(config=gpt2_net_cfg, | gpt2_loss = GPT2Summarization(config=gpt2_net_cfg, | ||||
| is_training=True, | is_training=True, | ||||
| @@ -282,6 +285,7 @@ def run_summarization(): | |||||
| do_train(train_dataset, gpt2_loss, load_pretrain_ckpt_path, save_finetune_ckpt_path, epoch_num) | do_train(train_dataset, gpt2_loss, load_pretrain_ckpt_path, save_finetune_ckpt_path, epoch_num) | ||||
| if args_opt.do_eval.lower() == "true": | if args_opt.do_eval.lower() == "true": | ||||
| get_model_setting(cfg, gpt2_net_cfg) | |||||
| eval_dataset_file_path = args_opt.eval_data_file_path | eval_dataset_file_path = args_opt.eval_data_file_path | ||||
| print("============== Start Loading Evaluation Dataset ============") | print("============== Start Loading Evaluation Dataset ============") | ||||
| eval_dataset = create_language_model_dataset(do_shuffle=(args_opt.train_data_shuffle.lower() == "true"), | eval_dataset = create_language_model_dataset(do_shuffle=(args_opt.train_data_shuffle.lower() == "true"), | ||||
| @@ -33,6 +33,7 @@ from src.utils.lr_schedule import GPT2LearningRate | |||||
| from src.utils.tokenization import Tokenizer | from src.utils.tokenization import Tokenizer | ||||
| from src.utils.metric_method import BLEU | from src.utils.metric_method import BLEU | ||||
| from src.GPT2_generation import GenerateForTranslation | from src.GPT2_generation import GenerateForTranslation | ||||
| from src.utils.get_config_setting import get_train_setting, get_model_setting | |||||
| def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path="", epoch_num=1): | def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path="", epoch_num=1): | ||||
| @@ -274,6 +275,8 @@ def run_translation(): | |||||
| use_one_hot_embeddings=False) | use_one_hot_embeddings=False) | ||||
| if args_opt.do_train.lower() == "true": | if args_opt.do_train.lower() == "true": | ||||
| get_train_setting(cfg) | |||||
| get_model_setting(cfg, gpt2_net_cfg) | |||||
| print("============== Start Loading Translation Train Dataset ==============") | print("============== Start Loading Translation Train Dataset ==============") | ||||
| print(" | Train Dataset: {}".format(args_opt.train_data_file_path)) | print(" | Train Dataset: {}".format(args_opt.train_data_file_path)) | ||||
| print(" | Checkpoint: {}".format(args_opt.load_pretrain_ckpt_path)) | print(" | Checkpoint: {}".format(args_opt.load_pretrain_ckpt_path)) | ||||
| @@ -282,6 +285,7 @@ def run_translation(): | |||||
| do_train(train_dataset, gpt2_loss, load_pretrain_ckpt_path, save_finetune_ckpt_path, epoch_num) | do_train(train_dataset, gpt2_loss, load_pretrain_ckpt_path, save_finetune_ckpt_path, epoch_num) | ||||
| if args_opt.do_eval.lower() == "true": | if args_opt.do_eval.lower() == "true": | ||||
| get_model_setting(cfg, gpt2_net_cfg) | |||||
| print("============ Start Loading Translation Evaluation Dataset ============") | print("============ Start Loading Translation Evaluation Dataset ============") | ||||
| print(" | Eval Dataset: {}".format(args_opt.eval_data_file_path)) | print(" | Eval Dataset: {}".format(args_opt.eval_data_file_path)) | ||||
| print(" | Checkpoint: {}".format(args_opt.load_finetune_ckpt_path)) | print(" | Checkpoint: {}".format(args_opt.load_finetune_ckpt_path)) | ||||
| @@ -56,7 +56,7 @@ def extract_string_from_tensor(input_ids, mode="single", config=None, tokenizer= | |||||
| for batch_idx in range(batch_size): | for batch_idx in range(batch_size): | ||||
| sentence_tensor = input_ids_np[batch_idx] | sentence_tensor = input_ids_np[batch_idx] | ||||
| sentence_list = sentence_tensor.asnumpy().tolist()[1:] | |||||
| sentence_list = sentence_tensor.tolist()[1:] | |||||
| sentence = tokenizer.decode(sentence_list) | sentence = tokenizer.decode(sentence_list) | ||||
| prompt_start = 0 | prompt_start = 0 | ||||
| @@ -73,7 +73,7 @@ def extract_string_from_tensor(input_ids, mode="single", config=None, tokenizer= | |||||
| if mode == "single": | if mode == "single": | ||||
| for batch_idx in range(batch_size): | for batch_idx in range(batch_size): | ||||
| sentence_tensor = input_ids_np[batch_idx] | sentence_tensor = input_ids_np[batch_idx] | ||||
| sentence_list = sentence_tensor.asnumpy().tolist()[1:] | |||||
| sentence_list = sentence_tensor.tolist()[1:] | |||||
| sentence = tokenizer.decode(sentence_list) | sentence = tokenizer.decode(sentence_list) | ||||
| prompt_start = 0 | prompt_start = 0 | ||||