From eb80f555b7f58d05fdcc4b9bc83501dbc3f50ac9 Mon Sep 17 00:00:00 2001 From: wsq3 <877518222@qq.com> Date: Sun, 25 Apr 2021 12:48:08 +0800 Subject: [PATCH] optimized readme and add per_step_time --- model_zoo/official/cv/yolov3_darknet53/README.md | 14 +++++++------- .../official/cv/yolov3_darknet53/README_CN.md | 16 ++++++++-------- model_zoo/official/cv/yolov3_darknet53/train.py | 4 +++- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/model_zoo/official/cv/yolov3_darknet53/README.md b/model_zoo/official/cv/yolov3_darknet53/README.md index ea9443d0d9..deefb3bab0 100644 --- a/model_zoo/official/cv/yolov3_darknet53/README.md +++ b/model_zoo/official/cv/yolov3_darknet53/README.md @@ -117,13 +117,13 @@ python train.py \ --lr_scheduler=cosine_annealing > log.txt 2>&1 & # standalone training example(1p) by shell script -sh run_standalone_train.sh dataset/coco2014 darknet53_backbone.ckpt +bash run_standalone_train.sh dataset/coco2014 darknet53_backbone.ckpt # For Ascend device, distributed training example(8p) by shell script -sh run_distribute_train.sh dataset/coco2014 darknet53_backbone.ckpt rank_table_8p.json +bash run_distribute_train.sh dataset/coco2014 darknet53_backbone.ckpt rank_table_8p.json # For GPU device, distributed training example(8p) by shell script -sh run_distribute_train_gpu.sh dataset/coco2014 darknet53_backbone.ckpt +bash run_distribute_train_gpu.sh dataset/coco2014 darknet53_backbone.ckpt # run evaluation by python command python eval.py \ @@ -132,7 +132,7 @@ python eval.py \ --testing_shape=416 > log.txt 2>&1 & # run evaluation by shell script -sh run_eval.sh dataset/coco2014/ checkpoint/0-319_102400.ckpt +bash run_eval.sh dataset/coco2014/ checkpoint/0-319_102400.ckpt ``` ## [Script Description](#contents) @@ -269,13 +269,13 @@ The model checkpoint will be saved in outputs directory. For Ascend device, distributed training example(8p) by shell script ```command -sh run_distribute_train.sh dataset/coco2014 darknet53_backbone.ckpt rank_table_8p.json +bash run_distribute_train.sh dataset/coco2014 darknet53_backbone.ckpt rank_table_8p.json ``` For GPU device, distributed training example(8p) by shell script ```command -sh run_distribute_train_gpu.sh dataset/coco2014 darknet53_backbone.ckpt +bash run_distribute_train_gpu.sh dataset/coco2014 darknet53_backbone.ckpt ``` The above shell script will run distribute training in the background. You can view the results through the file `train_parallel[X]/log.txt`. The loss value will be achieved as follows: @@ -307,7 +307,7 @@ python eval.py \ --pretrained=yolov3.ckpt \ --testing_shape=416 > log.txt 2>&1 & OR -sh run_eval.sh dataset/coco2014/ checkpoint/0-319_102400.ckpt +bash run_eval.sh dataset/coco2014/ checkpoint/0-319_102400.ckpt ``` The above python command will run in the background. You can view the results through the file "log.txt". The mAP of the test dataset will be as follows: diff --git a/model_zoo/official/cv/yolov3_darknet53/README_CN.md b/model_zoo/official/cv/yolov3_darknet53/README_CN.md index e733346743..75c400f73c 100644 --- a/model_zoo/official/cv/yolov3_darknet53/README_CN.md +++ b/model_zoo/official/cv/yolov3_darknet53/README_CN.md @@ -90,7 +90,7 @@ YOLOv3使用DarkNet53执行特征提取,这是YOLOv2中的Darknet-19和残差 可以从网站[下载](https://pjreddie.com/media/files/darknet53.conv.74) darknet53.conv.74文件。 也可以在linux系统中使用指令下载该文件。 - ```command + ```command wget https://pjreddie.com/media/files/darknet53.conv.74 ``` @@ -118,17 +118,17 @@ python train.py \ ```shell script # shell脚本单机训练示例(1卡) -sh run_standalone_train.sh dataset/coco2014 darknet53_backbone.ckpt +bash run_standalone_train.sh dataset/coco2014 darknet53_backbone.ckpt ``` ```shell script # 对于Ascend设备,使用shell脚本分布式训练示例(8卡) -sh run_distribute_train.sh dataset/coco2014 darknet53_backbone.ckpt rank_table_8p.json +bash run_distribute_train.sh dataset/coco2014 darknet53_backbone.ckpt rank_table_8p.json ``` ```shell script # 对于GPU设备,使用shell脚本分布式训练示例(8卡) -sh run_distribute_train_gpu.sh dataset/coco2014 darknet53_backbone.ckpt +bash run_distribute_train_gpu.sh dataset/coco2014 darknet53_backbone.ckpt ``` ```python @@ -141,7 +141,7 @@ python eval.py \ ```shell script # 通过shell脚本运行评估 -sh run_eval.sh dataset/coco2014/ checkpoint/0-319_102400.ckpt +bash run_eval.sh dataset/coco2014/ checkpoint/0-319_102400.ckpt ``` # 脚本说明 @@ -270,13 +270,13 @@ python train.py \ 对于Ascend设备,使用shell脚本分布式训练示例(8卡) ```shell script -sh run_distribute_train.sh dataset/coco2014 darknet53_backbone.ckpt rank_table_8p.json +bash run_distribute_train.sh dataset/coco2014 darknet53_backbone.ckpt rank_table_8p.json ``` 对于GPU设备,使用shell脚本分布式训练示例(8卡) ```shell script -sh run_distribute_train_gpu.sh dataset/coco2014 darknet53_backbone.ckpt +bash run_distribute_train_gpu.sh dataset/coco2014 darknet53_backbone.ckpt ``` 上述shell脚本将在后台运行分布训练。您可以通过`train_parallel[X]/log.txt`文件查看结果。损失值的实现如下: @@ -312,7 +312,7 @@ python eval.py \ 或者 ```shell script -sh run_eval.sh dataset/coco2014/ checkpoint/0-319_102400.ckpt +bash run_eval.sh dataset/coco2014/ checkpoint/0-319_102400.ckpt ``` 上述python命令将在后台运行,您可以通过log.txt文件查看结果。测试数据集的mAP如下: diff --git a/model_zoo/official/cv/yolov3_darknet53/train.py b/model_zoo/official/cv/yolov3_darknet53/train.py index 7d213acc5b..c0998e8e0b 100644 --- a/model_zoo/official/cv/yolov3_darknet53/train.py +++ b/model_zoo/official/cv/yolov3_darknet53/train.py @@ -274,10 +274,12 @@ def train(): if i % args.log_interval == 0: time_used = time.time() - t_end epoch = int(i / args.steps_per_epoch) + per_step_time = time_used/args.log_interval fps = args.per_batch_size * (i - old_progress) * args.group_size / time_used if args.rank == 0: args.logger.info( - 'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{}'.format(epoch, i, loss_meter, fps, lr[i])) + 'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{},' + ' per_step_time:{}'.format(epoch, i, loss_meter, fps, lr[i], per_step_time)) t_end = time.time() loss_meter.reset() old_progress = i