From: @ttudu Reviewed-by: @c_34,@liangchenghui Signed-off-by: @c_34tags/v1.1.0
| @@ -178,7 +178,6 @@ Usage: sh run_standalone_train.sh [PRETRAINED_MODEL] | |||||
| # random threshold in data augmentation | # random threshold in data augmentation | ||||
| "keep_ratio": True, | "keep_ratio": True, | ||||
| "flip_ratio": 0.5, | "flip_ratio": 0.5, | ||||
| "photo_ratio": 0.5, | |||||
| "expand_ratio": 1.0, | "expand_ratio": 1.0, | ||||
| "max_instance_count": 128, # max number of bbox for each image | "max_instance_count": 128, # max number of bbox for each image | ||||
| @@ -265,7 +264,6 @@ Usage: sh run_standalone_train.sh [PRETRAINED_MODEL] | |||||
| "test_max_per_img": 100, # max number of instance | "test_max_per_img": 100, # max number of instance | ||||
| "test_batch_size": 2, # batch size | "test_batch_size": 2, # batch size | ||||
| "rpn_head_loss_type": "CrossEntropyLoss", # loss type in rpn | |||||
| "rpn_head_use_sigmoid": True, # whether use sigmoid or not in rpn | "rpn_head_use_sigmoid": True, # whether use sigmoid or not in rpn | ||||
| "rpn_head_weight": 1.0, # rpn head weight in loss | "rpn_head_weight": 1.0, # rpn head weight in loss | ||||
| "mask_thr_binary": 0.5, # mask threshold for in rcnn | "mask_thr_binary": 0.5, # mask threshold for in rcnn | ||||
| @@ -275,7 +273,6 @@ Usage: sh run_standalone_train.sh [PRETRAINED_MODEL] | |||||
| "base_step": 58633, # bsae step in lr generator | "base_step": 58633, # bsae step in lr generator | ||||
| "total_epoch": 13, # total epoch in lr generator | "total_epoch": 13, # total epoch in lr generator | ||||
| "warmup_step": 500, # warmp up step in lr generator | "warmup_step": 500, # warmp up step in lr generator | ||||
| "warmup_mode": "linear", # warmp up mode | |||||
| "warmup_ratio": 1/3.0, # warpm up ratio | "warmup_ratio": 1/3.0, # warpm up ratio | ||||
| "sgd_momentum": 0.9, # momentum in optimizer | "sgd_momentum": 0.9, # momentum in optimizer | ||||
| @@ -14,9 +14,9 @@ | |||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| if [ $# != 2 ] | |||||
| if [ $# != 2 ] && [ $# != 1 ] | |||||
| then | then | ||||
| echo "Usage: sh run_train.sh [RANK_TABLE_FILE] [PRETRAINED_PATH]" | |||||
| echo "Usage: sh run_distribute_train.sh [RANK_TABLE_FILE] [PRETRAINED_PATH](optional)" | |||||
| exit 1 | exit 1 | ||||
| fi | fi | ||||
| @@ -31,7 +31,11 @@ PATH1=$(get_real_path $1) | |||||
| PATH2=$2 | PATH2=$2 | ||||
| echo $PATH1 | echo $PATH1 | ||||
| echo $PATH2 | |||||
| if [ $# == 2 ] | |||||
| then | |||||
| echo $PATH2 | |||||
| fi | |||||
| if [ ! -f $PATH1 ] | if [ ! -f $PATH1 ] | ||||
| then | then | ||||
| @@ -67,7 +71,16 @@ do | |||||
| cd ./train_parallel$i || exit | cd ./train_parallel$i || exit | ||||
| echo "start training for rank $RANK_ID, device $DEVICE_ID" | echo "start training for rank $RANK_ID, device $DEVICE_ID" | ||||
| env > env.log | env > env.log | ||||
| taskset -c $cmdopt python train.py --do_train=True --device_id=$i --rank_id=$i --run_distribute=True --device_num=$DEVICE_NUM \ | |||||
| --pre_trained=$PATH2 &> log & | |||||
| if [ $# == 2 ] | |||||
| then | |||||
| taskset -c $cmdopt python train.py --do_train=True --device_id=$i --rank_id=$i --run_distribute=True --device_num=$DEVICE_NUM \ | |||||
| --pre_trained=$PATH2 &> log & | |||||
| fi | |||||
| if [ $# == 1 ] | |||||
| then | |||||
| taskset -c $cmdopt python train.py --do_train=True --device_id=$i --rank_id=$i --run_distribute=True --device_num=$DEVICE_NUM &> log & | |||||
| fi | |||||
| cd .. | cd .. | ||||
| done | done | ||||
| @@ -14,9 +14,9 @@ | |||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| if [ $# != 1 ] | |||||
| if [ $# != 1 ] && [ $# != 0 ] | |||||
| then | then | ||||
| echo "Usage: sh run_standalone_train.sh [PRETRAINED_PATH]" | |||||
| echo "Usage: sh run_standalone_train.sh [PRETRAINED_PATH](optional)" | |||||
| exit 1 | exit 1 | ||||
| fi | fi | ||||
| @@ -27,13 +27,11 @@ get_real_path(){ | |||||
| echo "$(realpath -m $PWD/$1)" | echo "$(realpath -m $PWD/$1)" | ||||
| fi | fi | ||||
| } | } | ||||
| PATH1=$(get_real_path $1) | |||||
| echo $PATH1 | |||||
| if [ ! -f $PATH1 ] | |||||
| then | |||||
| echo "error: PRETRAINED_PATH=$PATH1 is not a file" | |||||
| exit 1 | |||||
| if [ $# == 1 ] | |||||
| then | |||||
| PATH1=$(get_real_path $1) | |||||
| echo $PATH1 | |||||
| fi | fi | ||||
| ulimit -u unlimited | ulimit -u unlimited | ||||
| @@ -53,5 +51,14 @@ cp -r ../src ./train | |||||
| cd ./train || exit | cd ./train || exit | ||||
| echo "start training for device $DEVICE_ID" | echo "start training for device $DEVICE_ID" | ||||
| env > env.log | env > env.log | ||||
| python train.py --do_train=True --device_id=$DEVICE_ID --pre_trained=$PATH1 &> log & | |||||
| if [ $# == 1 ] | |||||
| then | |||||
| python train.py --do_train=True --device_id=$DEVICE_ID --pre_trained=$PATH1 &> log & | |||||
| fi | |||||
| if [ $# == 0 ] | |||||
| then | |||||
| python train.py --do_train=True --device_id=$DEVICE_ID &> log & | |||||
| fi | |||||
| cd .. | cd .. | ||||
| @@ -22,7 +22,6 @@ config = ed({ | |||||
| "img_height": 768, | "img_height": 768, | ||||
| "keep_ratio": True, | "keep_ratio": True, | ||||
| "flip_ratio": 0.5, | "flip_ratio": 0.5, | ||||
| "photo_ratio": 0.5, | |||||
| "expand_ratio": 1.0, | "expand_ratio": 1.0, | ||||
| "max_instance_count": 128, | "max_instance_count": 128, | ||||
| @@ -109,7 +108,6 @@ config = ed({ | |||||
| "test_max_per_img": 100, | "test_max_per_img": 100, | ||||
| "test_batch_size": 2, | "test_batch_size": 2, | ||||
| "rpn_head_loss_type": "CrossEntropyLoss", | |||||
| "rpn_head_use_sigmoid": True, | "rpn_head_use_sigmoid": True, | ||||
| "rpn_head_weight": 1.0, | "rpn_head_weight": 1.0, | ||||
| "mask_thr_binary": 0.5, | "mask_thr_binary": 0.5, | ||||
| @@ -119,7 +117,6 @@ config = ed({ | |||||
| "base_step": 58633, | "base_step": 58633, | ||||
| "total_epoch": 13, | "total_epoch": 13, | ||||
| "warmup_step": 500, | "warmup_step": 500, | ||||
| "warmup_mode": "linear", | |||||
| "warmup_ratio": 1/3.0, | "warmup_ratio": 1/3.0, | ||||
| "sgd_momentum": 0.9, | "sgd_momentum": 0.9, | ||||
| @@ -131,7 +128,7 @@ config = ed({ | |||||
| "pretrain_epoch_size": 0, | "pretrain_epoch_size": 0, | ||||
| "epoch_size": 12, | "epoch_size": 12, | ||||
| "save_checkpoint": True, | "save_checkpoint": True, | ||||
| "save_checkpoint_epochs": 1, | |||||
| "save_checkpoint_epochs": 12, | |||||
| "keep_checkpoint_max": 12, | "keep_checkpoint_max": 12, | ||||
| "save_checkpoint_path": "./", | "save_checkpoint_path": "./", | ||||