From 95f178f0fb019c5b5a0da3c4694f19ea634e0b80 Mon Sep 17 00:00:00 2001 From: wukesong Date: Sun, 27 Dec 2020 15:21:25 +0800 Subject: [PATCH] modify tiny-darknet --- model_zoo/official/cv/tinydarknet/README.md | 22 +++++++++---------- .../official/cv/tinydarknet/README_CN.md | 20 ++++++++--------- .../{run_train.sh => run_distribute_train.sh} | 2 +- ...rain_single.sh => run_standalone_train.sh} | 0 4 files changed, 22 insertions(+), 22 deletions(-) rename model_zoo/official/cv/tinydarknet/scripts/{run_train.sh => run_distribute_train.sh} (95%) rename model_zoo/official/cv/tinydarknet/scripts/{run_train_single.sh => run_standalone_train.sh} (100%) diff --git a/model_zoo/official/cv/tinydarknet/README.md b/model_zoo/official/cv/tinydarknet/README.md index 79d5bb4431..f173b0b8fa 100644 --- a/model_zoo/official/cv/tinydarknet/README.md +++ b/model_zoo/official/cv/tinydarknet/README.md @@ -71,10 +71,10 @@ After installing MindSpore via the official website, you can start training and ```python # run training example - bash ./scripts/run_train_single.sh + bash ./scripts/run_standalone_train.sh # run distributed training example - bash ./scripts/run_train.sh rank_table.json + bash ./scripts/run_distribute_train.sh rank_table.json # run evaluation example python eval.py > eval.log 2>&1 & @@ -97,10 +97,11 @@ For more details, please refer the specify script. ```bash ├── Tiny-DarkNet - ├── README.md // descriptions about Tiny-Darknet + ├── README.md // descriptions about Tiny-Darknet in English + ├── README_CN.md // descriptions about Tiny-Darknet in Chinese ├── scripts - │ ├──run_train_single.sh // shell script for single on Ascend - │ ├──run_train.sh // shell script for distributed on Ascend + │ ├──run_standalone_train.sh // shell script for single on Ascend + │ ├──run_distribute_train.sh // shell script for distributed on Ascend │ ├──run_eval.sh // shell script for evaluation on Ascend ├── src │ ├──dataset.py // creating dataset @@ -131,7 +132,6 @@ Parameters for both training and evaluation can be set in config.py 'data_path': './ImageNet_Original/train/' # absolute full path to the train datasets 'val_data_path': './ImageNet_Original/val/' # absolute full path to the evaluation datasets 'device_target': 'Ascend' # device running the program - 'device_id': 0 # device ID used to train or evaluate the dataset. Ignore it when you use run_train.sh for distributed training 'keep_checkpoint_max': 10 # only keep the last keep_checkpoint_max checkpoint 'checkpoint_path': '/train_tinydarknet.ckpt' # the absolute full path to save the checkpoint file 'onnx_filename': 'tinydarknet.onnx' # file name of the onnx model used in export.py @@ -157,7 +157,7 @@ For more configuration details, please refer the script config.py. - running on Ascend: ```python - sh scripts/run_train_single.sh + bash scripts/run_standalone_train.sh ``` The command above will run in the background, you can view the results through the file train.log. @@ -184,7 +184,7 @@ For more configuration details, please refer the script config.py. - running on Ascend: ```python - sh scripts/run_train.sh + bash ./scripts/run_distribute_train.sh rank_table.json ``` The above shell script will run distribute training in the background. You can view the results through the file train_parallel[X]/log. The loss value will be achieved as follows: @@ -211,7 +211,7 @@ For more configuration details, please refer the script config.py. ```python python eval.py > eval.log 2>&1 & OR - sh scripts/run_eval.sh + bash scripts/run_eval.sh ``` The above python command will run in the background. You can view the results through the file "eval.log". The accuracy of the test dataset will be as follows: @@ -232,7 +232,7 @@ For more configuration details, please refer the script config.py. ## [Performance](#contents) -### [Evaluation Performance](#contents) +### [Training Performance](#contents) | Parameters | Ascend | | -------------------------- | ----------------------------------------------------------- | @@ -260,7 +260,7 @@ For more configuration details, please refer the script config.py. | Dataset | 200k images | | batch_size | 128 | | Outputs | probability | -| Accuracy | 8 pc Top-5: 81.7% | +| Accuracy | 8 pc Top-1: 58.7%; Top-5: 81.7% | | Model for inference | 11.6M (.ckpt file) | # [ModelZoo Homepage](#contents) diff --git a/model_zoo/official/cv/tinydarknet/README_CN.md b/model_zoo/official/cv/tinydarknet/README_CN.md index c273b31ccb..654c0a8220 100644 --- a/model_zoo/official/cv/tinydarknet/README_CN.md +++ b/model_zoo/official/cv/tinydarknet/README_CN.md @@ -79,10 +79,10 @@ Tiny-DarkNet是Joseph Chet Redmon等人提出的一个16层的针对于经典的 ```python # 单卡训练 - bash ./scripts/run_train_single.sh + bash ./scripts/run_standalone_train.sh # 分布式训练 - bash ./scripts/run_train.sh rank_table.json + bash ./scripts/run_distribute_train.sh rank_table.json # 评估 python eval.py > eval.log 2>&1 & @@ -105,10 +105,11 @@ Tiny-DarkNet是Joseph Chet Redmon等人提出的一个16层的针对于经典的 ```bash ├── Tiny-DarkNet - ├── README.md // Tiny-Darknet相关说明 + ├── README.md // Tiny-Darknet英文说明 + ├── README_CN.md // Tiny-Darknet中文说明 ├── scripts - │ ├──run_train_single.sh // Ascend单卡训练shell脚本 - │ ├──run_train.sh // Ascend分布式训练shell脚本 + │ ├──run_standalone_train.sh // Ascend单卡训练shell脚本 + │ ├──run_distribute_train.sh // Ascend分布式训练shell脚本 │ ├──run_eval.sh // Ascend评估shell脚本 ├── src │ ├──dataset.py // 创建数据集 @@ -139,7 +140,6 @@ Tiny-DarkNet是Joseph Chet Redmon等人提出的一个16层的针对于经典的 'data_path': './ImageNet_Original/train/' # 训练数据集的绝对路径 'val_data_path': './ImageNet_Original/val/' # 评估数据集的绝对路径 'device_target': 'Ascend' # 程序运行的设备 - 'device_id': 0 # 用来训练和评估的设备编号 'keep_checkpoint_max': 10 # 仅仅保持最新的keep_checkpoint_max个checkpoint文件 'checkpoint_path': '/train_tinydarknet.ckpt' # 保存checkpoint文件的绝对路径 'onnx_filename': 'tinydarknet.onnx' # 用于export.py 文件中的onnx模型的文件名 @@ -165,7 +165,7 @@ Tiny-DarkNet是Joseph Chet Redmon等人提出的一个16层的针对于经典的 - 在Ascend资源上运行: ```python - sh scripts/run_train_single.sh + bash ./scripts/run_standalone_train.sh ``` 上述的命令将运行在后台中,可以通过 `train.log` 文件查看运行结果. @@ -192,7 +192,7 @@ Tiny-DarkNet是Joseph Chet Redmon等人提出的一个16层的针对于经典的 - 在Ascend资源上运行: ```python - sh scripts/run_train.sh + bash scripts/run_distribute_train.sh rank_table.json ``` 上述的脚本命令将在后台中进行分布式训练,可以通过`train_parallel[X]/log`文件查看运行结果. 训练的损失值将以如下的形式展示: @@ -219,7 +219,7 @@ Tiny-DarkNet是Joseph Chet Redmon等人提出的一个16层的针对于经典的 ```python python eval.py > eval.log 2>&1 & OR - sh scripts/run_eval.sh + bash scripts/run_eval.sh ``` 上述的python命令将运行在后台中,可以通过"eval.log"文件查看结果. 测试数据集的准确率将如下面所列: @@ -268,7 +268,7 @@ Tiny-DarkNet是Joseph Chet Redmon等人提出的一个16层的针对于经典的 | 数据集 | 200k张图片 | | batch_size | 128 | | 输出 | 分类概率 | -| 准确率 | 8卡 Top-5: 81.7% | +| 准确率 | 8卡 Top-1: 58.7%; Top-5: 81.7% | | 推理模型 | 11.6M (.ckpt文件) | # [ModelZoo主页](#目录) diff --git a/model_zoo/official/cv/tinydarknet/scripts/run_train.sh b/model_zoo/official/cv/tinydarknet/scripts/run_distribute_train.sh similarity index 95% rename from model_zoo/official/cv/tinydarknet/scripts/run_train.sh rename to model_zoo/official/cv/tinydarknet/scripts/run_distribute_train.sh index bb2b3f896e..7d87109a3c 100644 --- a/model_zoo/official/cv/tinydarknet/scripts/run_train.sh +++ b/model_zoo/official/cv/tinydarknet/scripts/run_distribute_train.sh @@ -18,7 +18,7 @@ echo "$1 $2" if [ $# != 1 ] && [ $# != 2 ] then - echo "Usage: sh run_train.sh [RANK_TABLE_FILE] [cifar10|imagenet]" + echo "Usage: bash run_distribute_train.sh [RANK_TABLE_FILE] [cifar10|imagenet]" exit 1 fi diff --git a/model_zoo/official/cv/tinydarknet/scripts/run_train_single.sh b/model_zoo/official/cv/tinydarknet/scripts/run_standalone_train.sh similarity index 100% rename from model_zoo/official/cv/tinydarknet/scripts/run_train_single.sh rename to model_zoo/official/cv/tinydarknet/scripts/run_standalone_train.sh