|
|
|
@@ -29,6 +29,8 @@ then |
|
|
|
fi |
|
|
|
|
|
|
|
# Before start distribute train, first create mindrecord files. |
|
|
|
BASE_PATH=$(cd "`dirname $0`" || exit; pwd) |
|
|
|
cd $BASE_PATH/../ || exit |
|
|
|
python train.py --only_create_dataset=1 |
|
|
|
|
|
|
|
echo "After running the scipt, the network runs in the background. The log will be generated in LOGx/log.txt" |
|
|
|
@@ -46,8 +48,8 @@ do |
|
|
|
export DEVICE_ID=$i |
|
|
|
rm -rf LOG$i |
|
|
|
mkdir ./LOG$i |
|
|
|
cp ../*.py ./LOG$i |
|
|
|
cp -r ../src ./LOG$i |
|
|
|
cp ./*.py ./LOG$i |
|
|
|
cp -r ./src ./LOG$i |
|
|
|
cd ./LOG$i || exit |
|
|
|
export RANK_ID=$i |
|
|
|
echo "start training for rank $i, device $DEVICE_ID" |
|
|
|
|