From: @ttudu Reviewed-by: @c_34,@liangchenghui Signed-off-by: @c_34tags/v1.1.0
| @@ -178,7 +178,6 @@ Usage: sh run_standalone_train.sh [PRETRAINED_MODEL] | |||
| # random threshold in data augmentation | |||
| "keep_ratio": True, | |||
| "flip_ratio": 0.5, | |||
| "photo_ratio": 0.5, | |||
| "expand_ratio": 1.0, | |||
| "max_instance_count": 128, # max number of bbox for each image | |||
| @@ -265,7 +264,6 @@ Usage: sh run_standalone_train.sh [PRETRAINED_MODEL] | |||
| "test_max_per_img": 100, # max number of instance | |||
| "test_batch_size": 2, # batch size | |||
| "rpn_head_loss_type": "CrossEntropyLoss", # loss type in rpn | |||
| "rpn_head_use_sigmoid": True, # whether use sigmoid or not in rpn | |||
| "rpn_head_weight": 1.0, # rpn head weight in loss | |||
| "mask_thr_binary": 0.5, # mask threshold for in rcnn | |||
| @@ -275,7 +273,6 @@ Usage: sh run_standalone_train.sh [PRETRAINED_MODEL] | |||
| "base_step": 58633, # bsae step in lr generator | |||
| "total_epoch": 13, # total epoch in lr generator | |||
| "warmup_step": 500, # warmp up step in lr generator | |||
| "warmup_mode": "linear", # warmp up mode | |||
| "warmup_ratio": 1/3.0, # warpm up ratio | |||
| "sgd_momentum": 0.9, # momentum in optimizer | |||
| @@ -14,9 +14,9 @@ | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| if [ $# != 2 ] | |||
| if [ $# != 2 ] && [ $# != 1 ] | |||
| then | |||
| echo "Usage: sh run_train.sh [RANK_TABLE_FILE] [PRETRAINED_PATH]" | |||
| echo "Usage: sh run_distribute_train.sh [RANK_TABLE_FILE] [PRETRAINED_PATH](optional)" | |||
| exit 1 | |||
| fi | |||
| @@ -31,7 +31,11 @@ PATH1=$(get_real_path $1) | |||
| PATH2=$2 | |||
| echo $PATH1 | |||
| echo $PATH2 | |||
| if [ $# == 2 ] | |||
| then | |||
| echo $PATH2 | |||
| fi | |||
| if [ ! -f $PATH1 ] | |||
| then | |||
| @@ -67,7 +71,16 @@ do | |||
| cd ./train_parallel$i || exit | |||
| echo "start training for rank $RANK_ID, device $DEVICE_ID" | |||
| env > env.log | |||
| taskset -c $cmdopt python train.py --do_train=True --device_id=$i --rank_id=$i --run_distribute=True --device_num=$DEVICE_NUM \ | |||
| --pre_trained=$PATH2 &> log & | |||
| if [ $# == 2 ] | |||
| then | |||
| taskset -c $cmdopt python train.py --do_train=True --device_id=$i --rank_id=$i --run_distribute=True --device_num=$DEVICE_NUM \ | |||
| --pre_trained=$PATH2 &> log & | |||
| fi | |||
| if [ $# == 1 ] | |||
| then | |||
| taskset -c $cmdopt python train.py --do_train=True --device_id=$i --rank_id=$i --run_distribute=True --device_num=$DEVICE_NUM &> log & | |||
| fi | |||
| cd .. | |||
| done | |||
| @@ -14,9 +14,9 @@ | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| if [ $# != 1 ] | |||
| if [ $# != 1 ] && [ $# != 0 ] | |||
| then | |||
| echo "Usage: sh run_standalone_train.sh [PRETRAINED_PATH]" | |||
| echo "Usage: sh run_standalone_train.sh [PRETRAINED_PATH](optional)" | |||
| exit 1 | |||
| fi | |||
| @@ -27,13 +27,11 @@ get_real_path(){ | |||
| echo "$(realpath -m $PWD/$1)" | |||
| fi | |||
| } | |||
| PATH1=$(get_real_path $1) | |||
| echo $PATH1 | |||
| if [ ! -f $PATH1 ] | |||
| then | |||
| echo "error: PRETRAINED_PATH=$PATH1 is not a file" | |||
| exit 1 | |||
| if [ $# == 1 ] | |||
| then | |||
| PATH1=$(get_real_path $1) | |||
| echo $PATH1 | |||
| fi | |||
| ulimit -u unlimited | |||
| @@ -53,5 +51,14 @@ cp -r ../src ./train | |||
| cd ./train || exit | |||
| echo "start training for device $DEVICE_ID" | |||
| env > env.log | |||
| python train.py --do_train=True --device_id=$DEVICE_ID --pre_trained=$PATH1 &> log & | |||
| if [ $# == 1 ] | |||
| then | |||
| python train.py --do_train=True --device_id=$DEVICE_ID --pre_trained=$PATH1 &> log & | |||
| fi | |||
| if [ $# == 0 ] | |||
| then | |||
| python train.py --do_train=True --device_id=$DEVICE_ID &> log & | |||
| fi | |||
| cd .. | |||
| @@ -22,7 +22,6 @@ config = ed({ | |||
| "img_height": 768, | |||
| "keep_ratio": True, | |||
| "flip_ratio": 0.5, | |||
| "photo_ratio": 0.5, | |||
| "expand_ratio": 1.0, | |||
| "max_instance_count": 128, | |||
| @@ -109,7 +108,6 @@ config = ed({ | |||
| "test_max_per_img": 100, | |||
| "test_batch_size": 2, | |||
| "rpn_head_loss_type": "CrossEntropyLoss", | |||
| "rpn_head_use_sigmoid": True, | |||
| "rpn_head_weight": 1.0, | |||
| "mask_thr_binary": 0.5, | |||
| @@ -119,7 +117,6 @@ config = ed({ | |||
| "base_step": 58633, | |||
| "total_epoch": 13, | |||
| "warmup_step": 500, | |||
| "warmup_mode": "linear", | |||
| "warmup_ratio": 1/3.0, | |||
| "sgd_momentum": 0.9, | |||
| @@ -131,7 +128,7 @@ config = ed({ | |||
| "pretrain_epoch_size": 0, | |||
| "epoch_size": 12, | |||
| "save_checkpoint": True, | |||
| "save_checkpoint_epochs": 1, | |||
| "save_checkpoint_epochs": 12, | |||
| "keep_checkpoint_max": 12, | |||
| "save_checkpoint_path": "./", | |||