| @@ -0,0 +1,28 @@ | |||||
| #!/bin/bash | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| # bash run_multigpu_train.sh | |||||
| script_self=$(readlink -f "$0") | |||||
| self_path=$(dirname "${script_self}") | |||||
| RANK_SIZE=$1 | |||||
| EPOCH_SIZE=$2 | |||||
| DATASET=$3 | |||||
| mpirun --allow-run-as-root -n $RANK_SIZE \ | |||||
| python -s ${self_path}/../train_and_eval_distribute.py \ | |||||
| --device_target="GPU" \ | |||||
| --data_path=$DATASET \ | |||||
| --epochs=$EPOCH_SIZE > log.txt 2>&1 & | |||||
| @@ -31,5 +31,5 @@ do | |||||
| cd ${execute_path}/device_$i/ || exit | cd ${execute_path}/device_$i/ || exit | ||||
| export RANK_ID=$i | export RANK_ID=$i | ||||
| export DEVICE_ID=$i | export DEVICE_ID=$i | ||||
| python -s ${self_path}/../train_and_eval_multinpu.py --data_path=$DATASET --epochs=$EPOCH_SIZE >train_deep$i.log 2>&1 & | |||||
| python -s ${self_path}/../train_and_eval_distribute.py --data_path=$DATASET --epochs=$EPOCH_SIZE >train_deep$i.log 2>&1 & | |||||
| done | done | ||||
| @@ -20,6 +20,8 @@ def argparse_init(): | |||||
| argparse_init | argparse_init | ||||
| """ | """ | ||||
| parser = argparse.ArgumentParser(description='WideDeep') | parser = argparse.ArgumentParser(description='WideDeep') | ||||
| parser.add_argument("--device_target", type=str, default="Ascend", choices=["Ascend", "GPU"], | |||||
| help="device where the code will be implemented. (Default: Ascend)") | |||||
| parser.add_argument("--data_path", type=str, default="./test_raw_data/") | parser.add_argument("--data_path", type=str, default="./test_raw_data/") | ||||
| parser.add_argument("--epochs", type=int, default=15) | parser.add_argument("--epochs", type=int, default=15) | ||||
| parser.add_argument("--full_batch", type=bool, default=False) | parser.add_argument("--full_batch", type=bool, default=False) | ||||
| @@ -44,6 +46,7 @@ class WideDeepConfig(): | |||||
| WideDeepConfig | WideDeepConfig | ||||
| """ | """ | ||||
| def __init__(self): | def __init__(self): | ||||
| self.device_target = "Ascend" | |||||
| self.data_path = "./test_raw_data/" | self.data_path = "./test_raw_data/" | ||||
| self.full_batch = False | self.full_batch = False | ||||
| self.epochs = 15 | self.epochs = 15 | ||||
| @@ -72,6 +75,7 @@ class WideDeepConfig(): | |||||
| """ | """ | ||||
| parser = argparse_init() | parser = argparse_init() | ||||
| args, _ = parser.parse_known_args() | args, _ = parser.parse_known_args() | ||||
| self.device_target = args.device_target | |||||
| self.data_path = args.data_path | self.data_path = args.data_path | ||||
| self.epochs = args.epochs | self.epochs = args.epochs | ||||
| self.full_batch = args.full_batch | self.full_batch = args.full_batch | ||||
| @@ -30,10 +30,6 @@ from src.metrics import AUCMetric | |||||
| from src.config import WideDeepConfig | from src.config import WideDeepConfig | ||||
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True) | |||||
| context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True) | |||||
| init() | |||||
| def get_WideDeep_net(config): | def get_WideDeep_net(config): | ||||
| @@ -105,4 +101,13 @@ def train_and_eval(config): | |||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||
| wide_deep_config = WideDeepConfig() | wide_deep_config = WideDeepConfig() | ||||
| wide_deep_config.argparse_init() | wide_deep_config.argparse_init() | ||||
| context.set_context(mode=context.GRAPH_MODE, device_target=wide_deep_config.device_target, save_graphs=True) | |||||
| if wide_deep_config.device_target == "Ascend": | |||||
| init("hccl") | |||||
| elif wide_deep_config.device_target == "GPU": | |||||
| init("nccl") | |||||
| context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True, | |||||
| device_num=get_group_size()) | |||||
| train_and_eval(wide_deep_config) | train_and_eval(wide_deep_config) | ||||