| @@ -37,6 +37,7 @@ To train and evaluate the model, command as follows: | |||
| python train_and_eval.py | |||
| ``` | |||
| Arguments: | |||
| * `--device_target`: Device where the code will be implemented (Default: Ascend). | |||
| * `--data_path`: This should be set to the same directory given to the data_download's data_dir argument. | |||
| * `--epochs`: Total train epochs. | |||
| * `--batch_size`: Training batch size. | |||
| @@ -57,6 +58,7 @@ To train the model in one device, command as follows: | |||
| python train.py | |||
| ``` | |||
| Arguments: | |||
| * `--device_target`: Device where the code will be implemented (Default: Ascend). | |||
| * `--data_path`: This should be set to the same directory given to the data_download's data_dir argument. | |||
| * `--epochs`: Total train epochs. | |||
| * `--batch_size`: Training batch size. | |||
| @@ -87,6 +89,7 @@ To evaluate the model, command as follows: | |||
| python eval.py | |||
| ``` | |||
| Arguments: | |||
| * `--device_target`: Device where the code will be implemented (Default: Ascend). | |||
| * `--data_path`: This should be set to the same directory given to the data_download's data_dir argument. | |||
| * `--epochs`: Total train epochs. | |||
| * `--batch_size`: Training batch size. | |||
| @@ -26,11 +26,11 @@ from src.datasets import create_dataset | |||
| from src.metrics import AUCMetric | |||
| from src.config import WideDeepConfig | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="Davinci", | |||
| save_graphs=True) | |||
| def get_WideDeep_net(config): | |||
| """ | |||
| Get network of wide&deep model. | |||
| """ | |||
| WideDeep_net = WideDeepModel(config) | |||
| loss_net = NetWithLossClass(WideDeep_net, config) | |||
| @@ -91,4 +91,5 @@ if __name__ == "__main__": | |||
| widedeep_config = WideDeepConfig() | |||
| widedeep_config.argparse_init() | |||
| context.set_context(mode=context.GRAPH_MODE, device_target=widedeep_config.device_target) | |||
| test_eval(widedeep_config) | |||
| @@ -14,7 +14,7 @@ | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| # bash run_multigpu_train.sh | |||
| # bash run_multigpu_train.sh RANK_SIZE EPOCH_SIZE DATASET | |||
| script_self=$(readlink -f "$0") | |||
| self_path=$(dirname "${script_self}") | |||
| RANK_SIZE=$1 | |||
| @@ -25,4 +25,5 @@ mpirun --allow-run-as-root -n $RANK_SIZE \ | |||
| python -s ${self_path}/../train_and_eval_distribute.py \ | |||
| --device_target="GPU" \ | |||
| --data_path=$DATASET \ | |||
| --batch_size=8000 \ | |||
| --epochs=$EPOCH_SIZE > log.txt 2>&1 & | |||
| @@ -0,0 +1,27 @@ | |||
| #!/bin/bash | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| # bash run_standalone_train_for_gpu.sh EPOCH_SIZE DATASET | |||
| script_self=$(readlink -f "$0") | |||
| self_path=$(dirname "${script_self}") | |||
| EPOCH_SIZE=$1 | |||
| DATASET=$2 | |||
| python -s ${self_path}/../train_and_eval.py \ | |||
| --device_target="GPU" \ | |||
| --data_path=$DATASET \ | |||
| --batch_size=16000 \ | |||
| --epochs=$EPOCH_SIZE > log.txt 2>&1 & | |||
| @@ -15,16 +15,16 @@ | |||
| import os | |||
| from mindspore import Model, context | |||
| from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, TimeMonitor | |||
| from src.wide_and_deep import PredictWithSigmoid, TrainStepWrap, NetWithLossClass, WideDeepModel | |||
| from src.callbacks import LossCallBack | |||
| from src.datasets import create_dataset | |||
| from src.config import WideDeepConfig | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True) | |||
| def get_WideDeep_net(configure): | |||
| """ | |||
| Get network of wide&deep model. | |||
| """ | |||
| WideDeep_net = WideDeepModel(configure) | |||
| loss_net = NetWithLossClass(WideDeep_net, configure) | |||
| @@ -72,7 +72,7 @@ def test_train(configure): | |||
| model = Model(train_net) | |||
| callback = LossCallBack(config=configure) | |||
| ckptconfig = CheckpointConfig(save_checkpoint_steps=1, | |||
| ckptconfig = CheckpointConfig(save_checkpoint_steps=ds_train.get_dataset_size(), | |||
| keep_checkpoint_max=5) | |||
| ckpoint_cb = ModelCheckpoint(prefix='widedeep_train', directory=configure.ckpt_path, config=ckptconfig) | |||
| model.train(epochs, ds_train, callbacks=[TimeMonitor(ds_train.get_dataset_size()), callback, ckpoint_cb]) | |||
| @@ -82,4 +82,5 @@ if __name__ == "__main__": | |||
| config = WideDeepConfig() | |||
| config.argparse_init() | |||
| context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target) | |||
| test_train(config) | |||
| @@ -15,7 +15,7 @@ | |||
| import os | |||
| from mindspore import Model, context | |||
| from mindspore.train.callback import ModelCheckpoint, CheckpointConfig | |||
| from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, TimeMonitor | |||
| from src.wide_and_deep import PredictWithSigmoid, TrainStepWrap, NetWithLossClass, WideDeepModel | |||
| from src.callbacks import LossCallBack, EvalCallBack | |||
| @@ -23,10 +23,11 @@ from src.datasets import create_dataset | |||
| from src.metrics import AUCMetric | |||
| from src.config import WideDeepConfig | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="Davinci") | |||
| def get_WideDeep_net(config): | |||
| """ | |||
| Get network of wide&deep model. | |||
| """ | |||
| WideDeep_net = WideDeepModel(config) | |||
| loss_net = NetWithLossClass(WideDeep_net, config) | |||
| @@ -87,11 +88,13 @@ def test_train_eval(config): | |||
| out = model.eval(ds_eval) | |||
| print("=====" * 5 + "model.eval() initialized: {}".format(out)) | |||
| model.train(epochs, ds_train, callbacks=[eval_callback, callback, ckpoint_cb]) | |||
| model.train(epochs, ds_train, | |||
| callbacks=[TimeMonitor(ds_train.get_dataset_size()), eval_callback, callback, ckpoint_cb]) | |||
| if __name__ == "__main__": | |||
| wide_deep_config = WideDeepConfig() | |||
| wide_deep_config.argparse_init() | |||
| context.set_context(mode=context.GRAPH_MODE, device_target=wide_deep_config.device_target) | |||
| test_train_eval(wide_deep_config) | |||
| @@ -40,6 +40,9 @@ init() | |||
| def get_WideDeep_net(config): | |||
| """ | |||
| Get network of wide&deep model. | |||
| """ | |||
| WideDeep_net = WideDeepModel(config) | |||
| loss_net = NetWithLossClass(WideDeep_net, config) | |||
| loss_net = VirtualDatasetCellTriple(loss_net) | |||
| @@ -33,6 +33,9 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |||
| def get_WideDeep_net(config): | |||
| """ | |||
| Get network of wide&deep model. | |||
| """ | |||
| WideDeep_net = WideDeepModel(config) | |||
| loss_net = NetWithLossClass(WideDeep_net, config) | |||
| train_net = TrainStepWrap(loss_net) | |||
| @@ -90,8 +93,12 @@ def train_and_eval(config): | |||
| callback = LossCallBack(config=config) | |||
| ckptconfig = CheckpointConfig(save_checkpoint_steps=ds_train.get_dataset_size(), keep_checkpoint_max=5) | |||
| ckpoint_cb = ModelCheckpoint(prefix='widedeep_train', | |||
| directory=config.ckpt_path, config=ckptconfig) | |||
| if config.device_target == "Ascend": | |||
| ckpoint_cb = ModelCheckpoint(prefix='widedeep_train', | |||
| directory=config.ckpt_path, config=ckptconfig) | |||
| elif config.device_target == "GPU": | |||
| ckpoint_cb = ModelCheckpoint(prefix='widedeep_train_' + str(get_rank()), | |||
| directory=config.ckpt_path, config=ckptconfig) | |||
| out = model.eval(ds_eval) | |||
| print("=====" * 5 + "model.eval() initialized: {}".format(out)) | |||
| model.train(epochs, ds_train, | |||