| @@ -39,7 +39,7 @@ Attention mechanism: uses the standardized Bahdanau attention mechanism. First, | |||||
| Note that you can run the scripts based on the dataset mentioned in original paper or widely used in relevant domain/network architecture. In the following sections, we will introduce how to run the scripts using the related dataset below. | Note that you can run the scripts based on the dataset mentioned in original paper or widely used in relevant domain/network architecture. In the following sections, we will introduce how to run the scripts using the related dataset below. | ||||
| - WMT Englis-German for training. | |||||
| - WMT English-German for training. | |||||
| - WMT newstest2014 for evaluation. | - WMT newstest2014 for evaluation. | ||||
| # [Environment Requirements](#contents) | # [Environment Requirements](#contents) | ||||
| @@ -178,7 +178,7 @@ Almost all required options and parameters can be easily assigned, including the | |||||
| 'hidden_size': 1024 # the output's last dimension of dynamicRNN | 'hidden_size': 1024 # the output's last dimension of dynamicRNN | ||||
| 'initializer_range': 0.1 # initializer range | 'initializer_range': 0.1 # initializer range | ||||
| 'max_decode_length': 50 # max length of decoder | 'max_decode_length': 50 # max length of decoder | ||||
| 'lr': 2e-1 # initial learning rate | |||||
| 'lr': 2e-3 # initial learning rate | |||||
| 'lr_scheduler': 'WarmupMultiStepLR' # learning rate scheduler | 'lr_scheduler': 'WarmupMultiStepLR' # learning rate scheduler | ||||
| 'existed_ckpt': "" # the absolute full path to save the checkpoint file | 'existed_ckpt': "" # the absolute full path to save the checkpoint file | ||||
| ``` | ``` | ||||
| @@ -242,7 +242,7 @@ The `VOCAB_ADDR` is the vocabulary address, `BPE_CODE_ADDR` is the bpe code addr | |||||
| | Resource | Ascend 910 | | | Resource | Ascend 910 | | ||||
| | uploaded Date | 11/06/2020 (month/day/year) | | | uploaded Date | 11/06/2020 (month/day/year) | | ||||
| | MindSpore Version | 1.0.0 | | | MindSpore Version | 1.0.0 | | ||||
| | Dataset | WMT Englis-German for training | | |||||
| | Dataset | WMT English-German for training | | |||||
| | Training Parameters | epoch=6, batch_size=128 | | | Training Parameters | epoch=6, batch_size=128 | | ||||
| | Optimizer | Adam | | | Optimizer | Adam | | ||||
| | Loss Function | Softmax Cross Entropy | | | Loss Function | Softmax Cross Entropy | | ||||
| @@ -4,7 +4,7 @@ | |||||
| "epochs": 6, | "epochs": 6, | ||||
| "batch_size": 128, | "batch_size": 128, | ||||
| "dataset_schema": "/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.json", | "dataset_schema": "/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.json", | ||||
| "pre_train_dataset": "/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.tfrecord-001-of-001", | |||||
| "pre_train_dataset": "/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.mindrecord", | |||||
| "fine_tune_dataset": null, | "fine_tune_dataset": null, | ||||
| "valid_dataset": null, | "valid_dataset": null, | ||||
| "dataset_sink_mode": true | "dataset_sink_mode": true | ||||
| @@ -93,7 +93,7 @@ class GNMTConfig: | |||||
| init_loss_scale (int): Initialized loss scale. | init_loss_scale (int): Initialized loss scale. | ||||
| loss_scale_factor (int): Loss scale factor. | loss_scale_factor (int): Loss scale factor. | ||||
| scale_window (int): Window size of loss scale. | scale_window (int): Window size of loss scale. | ||||
| lr_scheduler (str): Whether use lr_scheduler, only support "ISR" now. | |||||
| lr_scheduler (str): Learning rate scheduler. Please see the Note as follow. | |||||
| optimizer (str): Optimizer for training, e.g. Adam, Lamb, momentum. Default: Adam. | optimizer (str): Optimizer for training, e.g. Adam, Lamb, momentum. Default: Adam. | ||||
| lr (float): Initial learning rate. | lr (float): Initial learning rate. | ||||
| min_lr (float): Minimum learning rate. | min_lr (float): Minimum learning rate. | ||||
| @@ -6,7 +6,7 @@ | |||||
| "dataset_schema": "/home/workspace/dataset_menu/newstest2014.en.json", | "dataset_schema": "/home/workspace/dataset_menu/newstest2014.en.json", | ||||
| "pre_train_dataset": null, | "pre_train_dataset": null, | ||||
| "fine_tune_dataset": null, | "fine_tune_dataset": null, | ||||
| "test_dataset": "/home/workspace/dataset_menu/newstest2014.en.tfrecord-001-of-001", | |||||
| "test_dataset": "/home/workspace/dataset_menu/newstest2014.en.mindrecord", | |||||
| "valid_dataset": null, | "valid_dataset": null, | ||||
| "dataset_sink_mode": true | "dataset_sink_mode": true | ||||
| }, | }, | ||||
| @@ -15,13 +15,13 @@ | |||||
| # ============================================================================ | # ============================================================================ | ||||
| echo "==============================================================================================================" | echo "==============================================================================================================" | ||||
| echo "Please run the scipt as: " | |||||
| echo "Please run the script as: " | |||||
| echo "sh run_distributed_train_ascend.sh RANK_TABLE_ADDR DATASET_SCHEMA_TRAIN PRE_TRAIN_DATASET" | echo "sh run_distributed_train_ascend.sh RANK_TABLE_ADDR DATASET_SCHEMA_TRAIN PRE_TRAIN_DATASET" | ||||
| echo "for example:" | echo "for example:" | ||||
| echo "sh run_distributed_train_ascend.sh \ | echo "sh run_distributed_train_ascend.sh \ | ||||
| /home/workspace/rank_table_8p.json \ | /home/workspace/rank_table_8p.json \ | ||||
| /home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.json \ | /home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.json \ | ||||
| /home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.tfrecord-001-of-001" | |||||
| /home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.mindrecord" | |||||
| echo "It is better to use absolute path." | echo "It is better to use absolute path." | ||||
| echo "==============================================================================================================" | echo "==============================================================================================================" | ||||
| @@ -15,13 +15,13 @@ | |||||
| # ============================================================================ | # ============================================================================ | ||||
| echo "==============================================================================================================" | echo "==============================================================================================================" | ||||
| echo "Please run the scipt as: " | |||||
| echo "Please run the script as: " | |||||
| echo "sh run_standalone_eval_ascend.sh DATASET_SCHEMA_TEST TEST_DATASET EXISTED_CKPT_PATH \ | echo "sh run_standalone_eval_ascend.sh DATASET_SCHEMA_TEST TEST_DATASET EXISTED_CKPT_PATH \ | ||||
| VOCAB_ADDR BPE_CODE_ADDR TEST_TARGET" | VOCAB_ADDR BPE_CODE_ADDR TEST_TARGET" | ||||
| echo "for example:" | echo "for example:" | ||||
| echo "sh run_standalone_eval_ascend.sh \ | echo "sh run_standalone_eval_ascend.sh \ | ||||
| /home/workspace/dataset_menu/newstest2014.en.json \ | /home/workspace/dataset_menu/newstest2014.en.json \ | ||||
| /home/workspace/dataset_menu/newstest2014.en.tfrecord-001-of-001 \ | |||||
| /home/workspace/dataset_menu/newstest2014.en.mindrecord \ | |||||
| /home/workspace/gnmt_v2/gnmt-6_3452.ckpt \ | /home/workspace/gnmt_v2/gnmt-6_3452.ckpt \ | ||||
| /home/workspace/wmt16_de_en/vocab.bpe.32000 \ | /home/workspace/wmt16_de_en/vocab.bpe.32000 \ | ||||
| /home/workspace/wmt16_de_en/bpe.32000 \ | /home/workspace/wmt16_de_en/bpe.32000 \ | ||||
| @@ -53,7 +53,7 @@ cp ../*.py ./eval | |||||
| cp -r ../src ./eval | cp -r ../src ./eval | ||||
| cp -r ../config ./eval | cp -r ../config ./eval | ||||
| cd ./eval || exit | cd ./eval || exit | ||||
| echo "start eval for device $DEVICE_ID" | |||||
| echo "start for evaluation" | |||||
| env > env.log | env > env.log | ||||
| python eval.py \ | python eval.py \ | ||||
| --config=${current_exec_path}/eval/config/config_test.json \ | --config=${current_exec_path}/eval/config/config_test.json \ | ||||
| @@ -15,12 +15,12 @@ | |||||
| # ============================================================================ | # ============================================================================ | ||||
| echo "==============================================================================================================" | echo "==============================================================================================================" | ||||
| echo "Please run the scipt as: " | |||||
| echo "Please run the script as: " | |||||
| echo "sh run_standalone_train_ascend.sh DATASET_SCHEMA_TRAIN PRE_TRAIN_DATASET" | echo "sh run_standalone_train_ascend.sh DATASET_SCHEMA_TRAIN PRE_TRAIN_DATASET" | ||||
| echo "for example:" | echo "for example:" | ||||
| echo "sh run_standalone_train_ascend.sh \ | echo "sh run_standalone_train_ascend.sh \ | ||||
| /home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.json \ | /home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.json \ | ||||
| /home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.tfrecord-001-of-001" | |||||
| /home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.mindrecord" | |||||
| echo "It is better to use absolute path." | echo "It is better to use absolute path." | ||||
| echo "==============================================================================================================" | echo "==============================================================================================================" | ||||
| @@ -42,10 +42,10 @@ cp ../*.py ./train | |||||
| cp -r ../src ./train | cp -r ../src ./train | ||||
| cp -r ../config ./train | cp -r ../config ./train | ||||
| cd ./train || exit | cd ./train || exit | ||||
| echo "start training for device $DEVICE_ID" | |||||
| echo "start for training" | |||||
| env > env.log | env > env.log | ||||
| python train.py \ | python train.py \ | ||||
| --config=${current_exec_path}/train/config/config.json \ | --config=${current_exec_path}/train/config/config.json \ | ||||
| --dataset_schema_train=$DATASET_SCHEMA_TRAIN \ | --dataset_schema_train=$DATASET_SCHEMA_TRAIN \ | ||||
| --pre_train_dataset=$PRE_TRAIN_DATASET > log_gnmt_network${i}.log 2>&1 & | |||||
| --pre_train_dataset=$PRE_TRAIN_DATASET > log_gnmt_network.log 2>&1 & | |||||
| cd .. | cd .. | ||||
| @@ -136,7 +136,7 @@ class BiLingualDataLoader(DataLoader): | |||||
| columns = ["src", "src_padding", "prev_opt", "target", "tgt_padding"] | columns = ["src", "src_padding", "prev_opt", "target", "tgt_padding"] | ||||
| with open(self.schema_address, "w", encoding="utf-8") as f: | with open(self.schema_address, "w", encoding="utf-8") as f: | ||||
| f.write("{\n") | f.write("{\n") | ||||
| f.write(' "datasetType":"TF",\n') | |||||
| f.write(' "datasetType":"MS",\n') | |||||
| f.write(' "numRows":%s,\n' % provlist[0]) | f.write(' "numRows":%s,\n' % provlist[0]) | ||||
| f.write(' "columns":{\n') | f.write(' "columns":{\n') | ||||
| t = 1 | t = 1 | ||||
| @@ -211,7 +211,7 @@ class TextDataLoader(DataLoader): | |||||
| columns = ["src", "src_padding"] | columns = ["src", "src_padding"] | ||||
| with open(self.schema_address, "w", encoding="utf-8") as f: | with open(self.schema_address, "w", encoding="utf-8") as f: | ||||
| f.write("{\n") | f.write("{\n") | ||||
| f.write(' "datasetType":"TF",\n') | |||||
| f.write(' "datasetType":"MS",\n') | |||||
| f.write(' "numRows":%s,\n' % provlist[0]) | f.write(' "numRows":%s,\n' % provlist[0]) | ||||
| f.write(' "columns":{\n') | f.write(' "columns":{\n') | ||||
| t = 1 | t = 1 | ||||
| @@ -23,7 +23,7 @@ def _compute_fans(shape): | |||||
| Computes the number of input and output units for a weight shape. | Computes the number of input and output units for a weight shape. | ||||
| Args: | Args: | ||||
| shape (tuple): Integer shape tuple or TF tensor shape. | |||||
| shape (tuple): Integer shape tuple or MS tensor shape. | |||||
| Returns: | Returns: | ||||
| tuple, integer scalars (fan_in, fan_out). | tuple, integer scalars (fan_in, fan_out). | ||||