| @@ -37,6 +37,7 @@ To train and evaluate the model, command as follows: | |||||
| python train_and_eval.py | python train_and_eval.py | ||||
| ``` | ``` | ||||
| Arguments: | Arguments: | ||||
| * `--device_target`: Device where the code will be implemented (Default: Ascend). | |||||
| * `--data_path`: This should be set to the same directory given to the data_download's data_dir argument. | * `--data_path`: This should be set to the same directory given to the data_download's data_dir argument. | ||||
| * `--epochs`: Total train epochs. | * `--epochs`: Total train epochs. | ||||
| * `--batch_size`: Training batch size. | * `--batch_size`: Training batch size. | ||||
| @@ -57,6 +58,7 @@ To train the model in one device, command as follows: | |||||
| python train.py | python train.py | ||||
| ``` | ``` | ||||
| Arguments: | Arguments: | ||||
| * `--device_target`: Device where the code will be implemented (Default: Ascend). | |||||
| * `--data_path`: This should be set to the same directory given to the data_download's data_dir argument. | * `--data_path`: This should be set to the same directory given to the data_download's data_dir argument. | ||||
| * `--epochs`: Total train epochs. | * `--epochs`: Total train epochs. | ||||
| * `--batch_size`: Training batch size. | * `--batch_size`: Training batch size. | ||||
| @@ -87,6 +89,7 @@ To evaluate the model, command as follows: | |||||
| python eval.py | python eval.py | ||||
| ``` | ``` | ||||
| Arguments: | Arguments: | ||||
| * `--device_target`: Device where the code will be implemented (Default: Ascend). | |||||
| * `--data_path`: This should be set to the same directory given to the data_download's data_dir argument. | * `--data_path`: This should be set to the same directory given to the data_download's data_dir argument. | ||||
| * `--epochs`: Total train epochs. | * `--epochs`: Total train epochs. | ||||
| * `--batch_size`: Training batch size. | * `--batch_size`: Training batch size. | ||||
| @@ -26,11 +26,11 @@ from src.datasets import create_dataset | |||||
| from src.metrics import AUCMetric | from src.metrics import AUCMetric | ||||
| from src.config import WideDeepConfig | from src.config import WideDeepConfig | ||||
| context.set_context(mode=context.GRAPH_MODE, device_target="Davinci", | |||||
| save_graphs=True) | |||||
| def get_WideDeep_net(config): | def get_WideDeep_net(config): | ||||
| """ | |||||
| Get network of wide&deep model. | |||||
| """ | |||||
| WideDeep_net = WideDeepModel(config) | WideDeep_net = WideDeepModel(config) | ||||
| loss_net = NetWithLossClass(WideDeep_net, config) | loss_net = NetWithLossClass(WideDeep_net, config) | ||||
| @@ -91,4 +91,5 @@ if __name__ == "__main__": | |||||
| widedeep_config = WideDeepConfig() | widedeep_config = WideDeepConfig() | ||||
| widedeep_config.argparse_init() | widedeep_config.argparse_init() | ||||
| context.set_context(mode=context.GRAPH_MODE, device_target=widedeep_config.device_target) | |||||
| test_eval(widedeep_config) | test_eval(widedeep_config) | ||||
| @@ -14,7 +14,7 @@ | |||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| # bash run_multigpu_train.sh | |||||
| # bash run_multigpu_train.sh RANK_SIZE EPOCH_SIZE DATASET | |||||
| script_self=$(readlink -f "$0") | script_self=$(readlink -f "$0") | ||||
| self_path=$(dirname "${script_self}") | self_path=$(dirname "${script_self}") | ||||
| RANK_SIZE=$1 | RANK_SIZE=$1 | ||||
| @@ -25,4 +25,5 @@ mpirun --allow-run-as-root -n $RANK_SIZE \ | |||||
| python -s ${self_path}/../train_and_eval_distribute.py \ | python -s ${self_path}/../train_and_eval_distribute.py \ | ||||
| --device_target="GPU" \ | --device_target="GPU" \ | ||||
| --data_path=$DATASET \ | --data_path=$DATASET \ | ||||
| --batch_size=8000 \ | |||||
| --epochs=$EPOCH_SIZE > log.txt 2>&1 & | --epochs=$EPOCH_SIZE > log.txt 2>&1 & | ||||
| @@ -0,0 +1,27 @@ | |||||
| #!/bin/bash | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| # bash run_standalone_train_for_gpu.sh EPOCH_SIZE DATASET | |||||
| script_self=$(readlink -f "$0") | |||||
| self_path=$(dirname "${script_self}") | |||||
| EPOCH_SIZE=$1 | |||||
| DATASET=$2 | |||||
| python -s ${self_path}/../train_and_eval.py \ | |||||
| --device_target="GPU" \ | |||||
| --data_path=$DATASET \ | |||||
| --batch_size=16000 \ | |||||
| --epochs=$EPOCH_SIZE > log.txt 2>&1 & | |||||
| @@ -15,16 +15,16 @@ | |||||
| import os | import os | ||||
| from mindspore import Model, context | from mindspore import Model, context | ||||
| from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, TimeMonitor | from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, TimeMonitor | ||||
| from src.wide_and_deep import PredictWithSigmoid, TrainStepWrap, NetWithLossClass, WideDeepModel | from src.wide_and_deep import PredictWithSigmoid, TrainStepWrap, NetWithLossClass, WideDeepModel | ||||
| from src.callbacks import LossCallBack | from src.callbacks import LossCallBack | ||||
| from src.datasets import create_dataset | from src.datasets import create_dataset | ||||
| from src.config import WideDeepConfig | from src.config import WideDeepConfig | ||||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True) | |||||
| def get_WideDeep_net(configure): | def get_WideDeep_net(configure): | ||||
| """ | |||||
| Get network of wide&deep model. | |||||
| """ | |||||
| WideDeep_net = WideDeepModel(configure) | WideDeep_net = WideDeepModel(configure) | ||||
| loss_net = NetWithLossClass(WideDeep_net, configure) | loss_net = NetWithLossClass(WideDeep_net, configure) | ||||
| @@ -72,7 +72,7 @@ def test_train(configure): | |||||
| model = Model(train_net) | model = Model(train_net) | ||||
| callback = LossCallBack(config=configure) | callback = LossCallBack(config=configure) | ||||
| ckptconfig = CheckpointConfig(save_checkpoint_steps=1, | |||||
| ckptconfig = CheckpointConfig(save_checkpoint_steps=ds_train.get_dataset_size(), | |||||
| keep_checkpoint_max=5) | keep_checkpoint_max=5) | ||||
| ckpoint_cb = ModelCheckpoint(prefix='widedeep_train', directory=configure.ckpt_path, config=ckptconfig) | ckpoint_cb = ModelCheckpoint(prefix='widedeep_train', directory=configure.ckpt_path, config=ckptconfig) | ||||
| model.train(epochs, ds_train, callbacks=[TimeMonitor(ds_train.get_dataset_size()), callback, ckpoint_cb]) | model.train(epochs, ds_train, callbacks=[TimeMonitor(ds_train.get_dataset_size()), callback, ckpoint_cb]) | ||||
| @@ -82,4 +82,5 @@ if __name__ == "__main__": | |||||
| config = WideDeepConfig() | config = WideDeepConfig() | ||||
| config.argparse_init() | config.argparse_init() | ||||
| context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target) | |||||
| test_train(config) | test_train(config) | ||||
| @@ -15,7 +15,7 @@ | |||||
| import os | import os | ||||
| from mindspore import Model, context | from mindspore import Model, context | ||||
| from mindspore.train.callback import ModelCheckpoint, CheckpointConfig | |||||
| from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, TimeMonitor | |||||
| from src.wide_and_deep import PredictWithSigmoid, TrainStepWrap, NetWithLossClass, WideDeepModel | from src.wide_and_deep import PredictWithSigmoid, TrainStepWrap, NetWithLossClass, WideDeepModel | ||||
| from src.callbacks import LossCallBack, EvalCallBack | from src.callbacks import LossCallBack, EvalCallBack | ||||
| @@ -23,10 +23,11 @@ from src.datasets import create_dataset | |||||
| from src.metrics import AUCMetric | from src.metrics import AUCMetric | ||||
| from src.config import WideDeepConfig | from src.config import WideDeepConfig | ||||
| context.set_context(mode=context.GRAPH_MODE, device_target="Davinci") | |||||
| def get_WideDeep_net(config): | def get_WideDeep_net(config): | ||||
| """ | |||||
| Get network of wide&deep model. | |||||
| """ | |||||
| WideDeep_net = WideDeepModel(config) | WideDeep_net = WideDeepModel(config) | ||||
| loss_net = NetWithLossClass(WideDeep_net, config) | loss_net = NetWithLossClass(WideDeep_net, config) | ||||
| @@ -87,11 +88,13 @@ def test_train_eval(config): | |||||
| out = model.eval(ds_eval) | out = model.eval(ds_eval) | ||||
| print("=====" * 5 + "model.eval() initialized: {}".format(out)) | print("=====" * 5 + "model.eval() initialized: {}".format(out)) | ||||
| model.train(epochs, ds_train, callbacks=[eval_callback, callback, ckpoint_cb]) | |||||
| model.train(epochs, ds_train, | |||||
| callbacks=[TimeMonitor(ds_train.get_dataset_size()), eval_callback, callback, ckpoint_cb]) | |||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||
| wide_deep_config = WideDeepConfig() | wide_deep_config = WideDeepConfig() | ||||
| wide_deep_config.argparse_init() | wide_deep_config.argparse_init() | ||||
| context.set_context(mode=context.GRAPH_MODE, device_target=wide_deep_config.device_target) | |||||
| test_train_eval(wide_deep_config) | test_train_eval(wide_deep_config) | ||||
| @@ -40,6 +40,9 @@ init() | |||||
| def get_WideDeep_net(config): | def get_WideDeep_net(config): | ||||
| """ | |||||
| Get network of wide&deep model. | |||||
| """ | |||||
| WideDeep_net = WideDeepModel(config) | WideDeep_net = WideDeepModel(config) | ||||
| loss_net = NetWithLossClass(WideDeep_net, config) | loss_net = NetWithLossClass(WideDeep_net, config) | ||||
| loss_net = VirtualDatasetCellTriple(loss_net) | loss_net = VirtualDatasetCellTriple(loss_net) | ||||
| @@ -33,6 +33,9 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |||||
| def get_WideDeep_net(config): | def get_WideDeep_net(config): | ||||
| """ | |||||
| Get network of wide&deep model. | |||||
| """ | |||||
| WideDeep_net = WideDeepModel(config) | WideDeep_net = WideDeepModel(config) | ||||
| loss_net = NetWithLossClass(WideDeep_net, config) | loss_net = NetWithLossClass(WideDeep_net, config) | ||||
| train_net = TrainStepWrap(loss_net) | train_net = TrainStepWrap(loss_net) | ||||
| @@ -90,8 +93,12 @@ def train_and_eval(config): | |||||
| callback = LossCallBack(config=config) | callback = LossCallBack(config=config) | ||||
| ckptconfig = CheckpointConfig(save_checkpoint_steps=ds_train.get_dataset_size(), keep_checkpoint_max=5) | ckptconfig = CheckpointConfig(save_checkpoint_steps=ds_train.get_dataset_size(), keep_checkpoint_max=5) | ||||
| ckpoint_cb = ModelCheckpoint(prefix='widedeep_train', | |||||
| directory=config.ckpt_path, config=ckptconfig) | |||||
| if config.device_target == "Ascend": | |||||
| ckpoint_cb = ModelCheckpoint(prefix='widedeep_train', | |||||
| directory=config.ckpt_path, config=ckptconfig) | |||||
| elif config.device_target == "GPU": | |||||
| ckpoint_cb = ModelCheckpoint(prefix='widedeep_train_' + str(get_rank()), | |||||
| directory=config.ckpt_path, config=ckptconfig) | |||||
| out = model.eval(ds_eval) | out = model.eval(ds_eval) | ||||
| print("=====" * 5 + "model.eval() initialized: {}".format(out)) | print("=====" * 5 + "model.eval() initialized: {}".format(out)) | ||||
| model.train(epochs, ds_train, | model.train(epochs, ds_train, | ||||