You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

run_distribute_train.sh 1.6 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. #!/bin/bash
  2. # Copyright 2020 Huawei Technologies Co., Ltd
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. # ============================================================================
  16. echo "Please run the scipt as: "
  17. echo "sh run_distribute_train.sh DEVICE_NUM EPOCH_SIZE IMAGE_DIR ANNO_PATH MINDSPORE_HCCL_CONFIG_PATH"
  18. echo "for example: sh run_distribute_train.sh 8 100 ./dataset/coco/train2017 ./dataset/train.txt ./hccl.json"
  19. echo "After running the scipt, the network runs in the background. The log will be generated in LOGx/log.txt"
  20. export RANK_SIZE=$1
  21. EPOCH_SIZE=$2
  22. IMAGE_DIR=$3
  23. ANNO_PATH=$4
  24. export MINDSPORE_HCCL_CONFIG_PATH=$5
  25. for((i=0;i<RANK_SIZE;i++))
  26. do
  27. export DEVICE_ID=$i
  28. rm -rf LOG$i
  29. mkdir ./LOG$i
  30. cp *.py ./LOG$i
  31. cd ./LOG$i || exit
  32. export RANK_ID=$i
  33. echo "start training for rank $i, device $DEVICE_ID"
  34. env > env.log
  35. python ../train.py \
  36. --distribute=1 \
  37. --device_num=$RANK_SIZE \
  38. --device_id=$DEVICE_ID \
  39. --image_dir=$IMAGE_DIR \
  40. --epoch_size=$EPOCH_SIZE \
  41. --anno_path=$ANNO_PATH > log.txt 2>&1 &
  42. cd ../
  43. done

MindSpore is a new open source deep learning training/inference framework that could be used for mobile, edge and cloud scenarios.