| @@ -117,13 +117,13 @@ python train.py \ | |||||
| --lr_scheduler=cosine_annealing > log.txt 2>&1 & | --lr_scheduler=cosine_annealing > log.txt 2>&1 & | ||||
| # standalone training example(1p) by shell script | # standalone training example(1p) by shell script | ||||
| sh run_standalone_train.sh dataset/coco2014 darknet53_backbone.ckpt | |||||
| bash run_standalone_train.sh dataset/coco2014 darknet53_backbone.ckpt | |||||
| # For Ascend device, distributed training example(8p) by shell script | # For Ascend device, distributed training example(8p) by shell script | ||||
| sh run_distribute_train.sh dataset/coco2014 darknet53_backbone.ckpt rank_table_8p.json | |||||
| bash run_distribute_train.sh dataset/coco2014 darknet53_backbone.ckpt rank_table_8p.json | |||||
| # For GPU device, distributed training example(8p) by shell script | # For GPU device, distributed training example(8p) by shell script | ||||
| sh run_distribute_train_gpu.sh dataset/coco2014 darknet53_backbone.ckpt | |||||
| bash run_distribute_train_gpu.sh dataset/coco2014 darknet53_backbone.ckpt | |||||
| # run evaluation by python command | # run evaluation by python command | ||||
| python eval.py \ | python eval.py \ | ||||
| @@ -132,7 +132,7 @@ python eval.py \ | |||||
| --testing_shape=416 > log.txt 2>&1 & | --testing_shape=416 > log.txt 2>&1 & | ||||
| # run evaluation by shell script | # run evaluation by shell script | ||||
| sh run_eval.sh dataset/coco2014/ checkpoint/0-319_102400.ckpt | |||||
| bash run_eval.sh dataset/coco2014/ checkpoint/0-319_102400.ckpt | |||||
| ``` | ``` | ||||
| ## [Script Description](#contents) | ## [Script Description](#contents) | ||||
| @@ -269,13 +269,13 @@ The model checkpoint will be saved in outputs directory. | |||||
| For Ascend device, distributed training example(8p) by shell script | For Ascend device, distributed training example(8p) by shell script | ||||
| ```command | ```command | ||||
| sh run_distribute_train.sh dataset/coco2014 darknet53_backbone.ckpt rank_table_8p.json | |||||
| bash run_distribute_train.sh dataset/coco2014 darknet53_backbone.ckpt rank_table_8p.json | |||||
| ``` | ``` | ||||
| For GPU device, distributed training example(8p) by shell script | For GPU device, distributed training example(8p) by shell script | ||||
| ```command | ```command | ||||
| sh run_distribute_train_gpu.sh dataset/coco2014 darknet53_backbone.ckpt | |||||
| bash run_distribute_train_gpu.sh dataset/coco2014 darknet53_backbone.ckpt | |||||
| ``` | ``` | ||||
| The above shell script will run distribute training in the background. You can view the results through the file `train_parallel[X]/log.txt`. The loss value will be achieved as follows: | The above shell script will run distribute training in the background. You can view the results through the file `train_parallel[X]/log.txt`. The loss value will be achieved as follows: | ||||
| @@ -307,7 +307,7 @@ python eval.py \ | |||||
| --pretrained=yolov3.ckpt \ | --pretrained=yolov3.ckpt \ | ||||
| --testing_shape=416 > log.txt 2>&1 & | --testing_shape=416 > log.txt 2>&1 & | ||||
| OR | OR | ||||
| sh run_eval.sh dataset/coco2014/ checkpoint/0-319_102400.ckpt | |||||
| bash run_eval.sh dataset/coco2014/ checkpoint/0-319_102400.ckpt | |||||
| ``` | ``` | ||||
| The above python command will run in the background. You can view the results through the file "log.txt". The mAP of the test dataset will be as follows: | The above python command will run in the background. You can view the results through the file "log.txt". The mAP of the test dataset will be as follows: | ||||
| @@ -90,7 +90,7 @@ YOLOv3使用DarkNet53执行特征提取,这是YOLOv2中的Darknet-19和残差 | |||||
| 可以从网站[下载](https://pjreddie.com/media/files/darknet53.conv.74) darknet53.conv.74文件。 | 可以从网站[下载](https://pjreddie.com/media/files/darknet53.conv.74) darknet53.conv.74文件。 | ||||
| 也可以在linux系统中使用指令下载该文件。 | 也可以在linux系统中使用指令下载该文件。 | ||||
| ```command | |||||
| ```command | |||||
| wget https://pjreddie.com/media/files/darknet53.conv.74 | wget https://pjreddie.com/media/files/darknet53.conv.74 | ||||
| ``` | ``` | ||||
| @@ -118,17 +118,17 @@ python train.py \ | |||||
| ```shell script | ```shell script | ||||
| # shell脚本单机训练示例(1卡) | # shell脚本单机训练示例(1卡) | ||||
| sh run_standalone_train.sh dataset/coco2014 darknet53_backbone.ckpt | |||||
| bash run_standalone_train.sh dataset/coco2014 darknet53_backbone.ckpt | |||||
| ``` | ``` | ||||
| ```shell script | ```shell script | ||||
| # 对于Ascend设备,使用shell脚本分布式训练示例(8卡) | # 对于Ascend设备,使用shell脚本分布式训练示例(8卡) | ||||
| sh run_distribute_train.sh dataset/coco2014 darknet53_backbone.ckpt rank_table_8p.json | |||||
| bash run_distribute_train.sh dataset/coco2014 darknet53_backbone.ckpt rank_table_8p.json | |||||
| ``` | ``` | ||||
| ```shell script | ```shell script | ||||
| # 对于GPU设备,使用shell脚本分布式训练示例(8卡) | # 对于GPU设备,使用shell脚本分布式训练示例(8卡) | ||||
| sh run_distribute_train_gpu.sh dataset/coco2014 darknet53_backbone.ckpt | |||||
| bash run_distribute_train_gpu.sh dataset/coco2014 darknet53_backbone.ckpt | |||||
| ``` | ``` | ||||
| ```python | ```python | ||||
| @@ -141,7 +141,7 @@ python eval.py \ | |||||
| ```shell script | ```shell script | ||||
| # 通过shell脚本运行评估 | # 通过shell脚本运行评估 | ||||
| sh run_eval.sh dataset/coco2014/ checkpoint/0-319_102400.ckpt | |||||
| bash run_eval.sh dataset/coco2014/ checkpoint/0-319_102400.ckpt | |||||
| ``` | ``` | ||||
| # 脚本说明 | # 脚本说明 | ||||
| @@ -270,13 +270,13 @@ python train.py \ | |||||
| 对于Ascend设备,使用shell脚本分布式训练示例(8卡) | 对于Ascend设备,使用shell脚本分布式训练示例(8卡) | ||||
| ```shell script | ```shell script | ||||
| sh run_distribute_train.sh dataset/coco2014 darknet53_backbone.ckpt rank_table_8p.json | |||||
| bash run_distribute_train.sh dataset/coco2014 darknet53_backbone.ckpt rank_table_8p.json | |||||
| ``` | ``` | ||||
| 对于GPU设备,使用shell脚本分布式训练示例(8卡) | 对于GPU设备,使用shell脚本分布式训练示例(8卡) | ||||
| ```shell script | ```shell script | ||||
| sh run_distribute_train_gpu.sh dataset/coco2014 darknet53_backbone.ckpt | |||||
| bash run_distribute_train_gpu.sh dataset/coco2014 darknet53_backbone.ckpt | |||||
| ``` | ``` | ||||
| 上述shell脚本将在后台运行分布训练。您可以通过`train_parallel[X]/log.txt`文件查看结果。损失值的实现如下: | 上述shell脚本将在后台运行分布训练。您可以通过`train_parallel[X]/log.txt`文件查看结果。损失值的实现如下: | ||||
| @@ -312,7 +312,7 @@ python eval.py \ | |||||
| 或者 | 或者 | ||||
| ```shell script | ```shell script | ||||
| sh run_eval.sh dataset/coco2014/ checkpoint/0-319_102400.ckpt | |||||
| bash run_eval.sh dataset/coco2014/ checkpoint/0-319_102400.ckpt | |||||
| ``` | ``` | ||||
| 上述python命令将在后台运行,您可以通过log.txt文件查看结果。测试数据集的mAP如下: | 上述python命令将在后台运行,您可以通过log.txt文件查看结果。测试数据集的mAP如下: | ||||
| @@ -274,10 +274,12 @@ def train(): | |||||
| if i % args.log_interval == 0: | if i % args.log_interval == 0: | ||||
| time_used = time.time() - t_end | time_used = time.time() - t_end | ||||
| epoch = int(i / args.steps_per_epoch) | epoch = int(i / args.steps_per_epoch) | ||||
| per_step_time = time_used/args.log_interval | |||||
| fps = args.per_batch_size * (i - old_progress) * args.group_size / time_used | fps = args.per_batch_size * (i - old_progress) * args.group_size / time_used | ||||
| if args.rank == 0: | if args.rank == 0: | ||||
| args.logger.info( | args.logger.info( | ||||
| 'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{}'.format(epoch, i, loss_meter, fps, lr[i])) | |||||
| 'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{},' | |||||
| ' per_step_time:{}'.format(epoch, i, loss_meter, fps, lr[i], per_step_time)) | |||||
| t_end = time.time() | t_end = time.time() | ||||
| loss_meter.reset() | loss_meter.reset() | ||||
| old_progress = i | old_progress = i | ||||