fix cnn efficientnet bugs

5 years ago · d72feda64b
--- a/model_zoo/official/cv/cnn_direction_model/scripts/run_distribute_train_ascend.sh
+++ b/model_zoo/official/cv/cnn_direction_model/scripts/run_distribute_train_ascend.sh
@@ -15,7 +15,7 @@
 # ============================================================================
 if [ $# != 2 ] && [ $# != 3 ]
 then 
 	echo "Usage: sh run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)"
    echo "Usage: sh run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)"
 exit 1
 fi

@@ -76,13 +76,13 @@ do
    
    if [ $# == 2 ]
    then
        python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 &> log &
        python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 > train.log 2>&1 &
    fi
    
    if [ $# == 3 ]
    then
        python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 --pre_trained=$PATH3 &> log &
        python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 --pre_trained=$PATH3 > train.log 2>&1 &
    fi

    cd ..
 done
 done
--- a/model_zoo/official/cv/cnn_direction_model/scripts/run_standalone_eval_ascend.sh
+++ b/model_zoo/official/cv/cnn_direction_model/scripts/run_standalone_eval_ascend.sh
@@ -57,6 +57,6 @@ cd ./eval || exit
 echo "start evaluation for device $DEVICE_ID"
 env > env.log

 python eval.py --dataset_path=$PATH1 --checkpoint_path=$PATH2 #&> log &
 python eval.py --dataset_path=$PATH1 --checkpoint_path=$PATH2 > eval.log 2>&1 &

 cd ..
 cd ..
--- a/model_zoo/official/cv/cnn_direction_model/scripts/run_standalone_train_ascend.sh
+++ b/model_zoo/official/cv/cnn_direction_model/scripts/run_standalone_train_ascend.sh
@@ -66,7 +66,7 @@ fi

 if [ $# == 2 ]
 then
    python train.py --dataset_path=$PATH1 --pre_trained=$PATH2 &> log &
    python train.py --dataset_path=$PATH1 --pre_trained=$PATH2 > train.log 2>&1 &
 fi

 cd ..
 cd ..
--- a/model_zoo/official/cv/cnn_direction_model/train.py
+++ b/model_zoo/official/cv/cnn_direction_model/train.py
@@ -41,6 +41,7 @@ parser.add_argument('--device_num', type=int, default=1, help='Device num.')
 parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
 parser.add_argument('--device_target', type=str, default='Ascend', help='Device target')
 parser.add_argument('--pre_trained', type=str, default=None, help='Pretrained checkpoint path')
 parser.add_argument('--is_save_on_master', type=int, default=1, help='save ckpt on master or all rank')

 args_opt = parser.parse_args()

@@ -70,6 +71,13 @@ if __name__ == '__main__':
        context.set_auto_parallel_context(device_num=rank_size, parallel_mode=ParallelMode.DATA_PARALLEL)
        init()

    args_opt.rank_save_ckpt_flag = 0
    if args_opt.is_save_on_master:
        if rank_id == 0:
            args_opt.rank_save_ckpt_flag = 1
    else:
        args_opt.rank_save_ckpt_flag = 1

    # create dataset
    dataset_name = config.dataset_name
    dataset = create_dataset_train(args_opt.dataset_path +  "/" + dataset_name +
@@ -100,10 +108,11 @@ if __name__ == '__main__':
    loss_cb = LossMonitor()
    cb = [time_cb, loss_cb]
    if config.save_checkpoint:
        config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_steps,
                                     keep_checkpoint_max=config.keep_checkpoint_max)
        ckpt_cb = ModelCheckpoint(prefix="cnn_direction_model", directory=ckpt_save_dir, config=config_ck)
        cb += [ckpt_cb]
        if args_opt.rank_save_ckpt_flag == 1:
            config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_steps,
                                         keep_checkpoint_max=config.keep_checkpoint_max)
            ckpt_cb = ModelCheckpoint(prefix="cnn_direction_model", directory=ckpt_save_dir, config=config_ck)
            cb += [ckpt_cb]

    # train model
    model.train(config.epoch_size, dataset, callbacks=cb, dataset_sink_mode=False)
--- a/model_zoo/official/cv/efficientnet/README.md
+++ b/model_zoo/official/cv/efficientnet/README.md
@@ -18,7 +18,6 @@

 # [EfficientNet-B0 Description](#contents)


 [Paper](https://arxiv.org/abs/1905.11946): Mingxing Tan, Quoc V. Le. EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks. 2019.

 # [Model architecture](#contents)
@@ -27,27 +26,25 @@ The overall network architecture of EfficientNet-B0 is show below:

 [Link](https://arxiv.org/abs/1905.11946)


 # [Dataset](#contents)

 Dataset used: [imagenet](http://www.image-net.org/)

 - Dataset size: ~125G, 1.2W colorful images in 1000 classes
  - Train: 120G, 1.2W images
  - Test: 5G, 50000 images
    - Train: 120G, 1.2W images
    - Test: 5G, 50000 images
 - Data format: RGB images.
  - Note: Data will be processed in src/dataset.py

    - Note: Data will be processed in src/dataset.py

 # [Environment Requirements](#contents)

 - Hardware GPU
  - Prepare hardware environment with GPU processor.
    - Prepare hardware environment with GPU processor.
 - Framework
  - [MindSpore](https://www.mindspore.cn/install/en)
    - [MindSpore](https://www.mindspore.cn/install/en)
 - For more information, please check the resources below：
  - [MindSpore Tutorials](https://www.mindspore.cn/tutorial/training/en/master/index.html)
  - [MindSpore Python API](https://www.mindspore.cn/doc/api_python/en/master/index.html)
    - [MindSpore Tutorials](https://www.mindspore.cn/tutorial/training/en/master/index.html)
    - [MindSpore Python API](https://www.mindspore.cn/doc/api_python/en/master/index.html)

 # [Script description](#contents)

@@ -77,7 +74,7 @@ Dataset used: [imagenet](http://www.image-net.org/)

 Parameters for both training and evaluating can be set in config.py.

 ```
 ```python
 'random_seed': 1,                # fix random seed
 'model': 'efficientnet_b0',      # model name
 'drop': 0.2,                     # dropout rate
@@ -106,17 +103,17 @@ Parameters for both training and evaluating can be set in config.py.

 ## [Training Process](#contents)

 #### Usage
 ### Usage

 ```
 ```python
 GPU:
    # distribute training example(8p)
    sh run_distribute_train_for_gpu.sh 
    sh run_distribute_train_for_gpu.sh
    # standalone training
    sh run_standalone_train_for_gpu.sh DEVICE_ID DATA_DIR
 ```

 #### Launch
 ### Launch

 ```bash
 # distributed training example(8p) for GPU
@@ -133,7 +130,7 @@ You can find checkpoint file together with result in log.

 ### Usage

 ```
 ```bash
 # Evaluation
 sh run_eval_for_gpu.sh DATA_DIR DEVICE_ID PATH_CHECKPOINT
 ```
@@ -148,9 +145,9 @@ sh run_eval_for_gpu.sh /dataset/eval ./checkpoint/efficientnet_b0-600_1251.ckpt

 #### Result

 Evaluation result will be stored in the scripts path. Under this, you can find result like the followings in log.
 Evaluation result will be stored in the scripts path. Under this, you can find result like the following in log.

 ```
 ```python
 acc=76.96%(TOP1)
 ```

@@ -186,7 +183,6 @@ acc=76.96%(TOP1)
 | outputs                    | probability               |
 | Accuracy                   | acc=76.96%(TOP1)          |


 # [ModelZoo Homepage](#contents)
 

 Please check the official [homepage](https://gitee.com/mindspore/mindspore/tree/master/model_zoo).
--- a/model_zoo/official/cv/efficientnet/src/transform.py
+++ b/model_zoo/official/cv/efficientnet/src/transform.py
@@ -31,7 +31,7 @@ class RandAugment:
        self.hparams = hparams

    def __call__(self, imgs, labels, batchInfo):
        # assert the imgs objetc are pil_images
        # assert the imgs object are pil_images
        ret_imgs = []
        ret_labels = []
        py_to_pil_op = P.ToPIL()