You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

run_broadcast_auto_parallel.sh 1.8 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. #!/bin/bash
  2. # Copyright 2020 Huawei Technologies Co., Ltd
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. # ============================================================================
  16. set -e
  17. BASE_PATH=$(
  18. cd "$(dirname $0)"
  19. pwd
  20. )
  21. CONFIG_PATH=/home/workspace/mindspore_config
  22. export DEVICE_NUM=8
  23. export RANK_SIZE=$DEVICE_NUM
  24. source ${BASE_PATH}/env.sh
  25. unset SLOG_PRINT_TO_STDOUT
  26. export MINDSPORE_HCCL_CONFIG_PATH=$CONFIG_PATH/hccl/rank_table_${DEVICE_NUM}p.json
  27. process_pid=()
  28. for ((i = 0; i < $DEVICE_NUM; i++)); do
  29. rm -rf ${BASE_PATH}/lenet_broadcast${i}
  30. mkdir ${BASE_PATH}/lenet_broadcast${i}
  31. cp -r ${BASE_PATH}/lenet_broadcast_auto_parallel.py ${BASE_PATH}/lenet_broadcast${i}/
  32. cd ${BASE_PATH}/lenet_broadcast${i}
  33. export RANK_ID=${i}
  34. export DEVICE_ID=${i}
  35. echo "start training for device $i"
  36. env >env$i.log
  37. pytest -s -v lenet_broadcast_auto_parallel.py >test_lenet_auto_parallel_broadcast_8p_log$i.log 2>&1 &
  38. process_pid[${i}]=$(echo $!)
  39. done
  40. for ((i = 0; i < ${DEVICE_NUM}; i++)); do
  41. wait ${process_pid[i]}
  42. status=$(echo $?)
  43. if [ "${status}" != "0" ]; then
  44. echo "[ERROR] test_broadcast_auto_parallel failed. status: ${status}"
  45. exit 1
  46. else
  47. echo "[INFO] test_broadcast_auto_parallel success."
  48. fi
  49. done
  50. exit 0