!7878 Remove generate_hccn_file from cnnctc and psenet.

Merge pull request !7878 from linqingke/cnnctc
5 years ago · b1f88ad439
--- a/model_zoo/official/cv/cnnctc/README.md
+++ b/model_zoo/official/cv/cnnctc/README.md
@@ -150,7 +150,6 @@ The entire code structure is as following:
        |---callback.py    // loss callback file
        |---dataset.py    // process dataset
        |---util.py    // routine operation
        |---generate_hccn_file.py    // generate distribute json file
        |---preprocess_dataset.py    // preprocess dataset

 ```
--- a/model_zoo/official/cv/cnnctc/scripts/run_distribute_train_ascend.sh
+++ b/model_zoo/official/cv/cnnctc/scripts/run_distribute_train_ascend.sh
@@ -31,7 +31,6 @@ echo $PATH1
 PATH2=$(get_real_path $2)
 echo $PATH2

 python ${current_exec_path}/src/generate_hccn_file.py --rank_file=$PATH1
 export RANK_TABLE_FILE=$PATH1
 export RANK_SIZE=8
 ulimit -u unlimited
--- a/model_zoo/official/cv/cnnctc/src/generate_hccn_file.py
+++ b/model_zoo/official/cv/cnnctc/src/generate_hccn_file.py
@@ -1,88 +0,0 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """generate ascend rank file"""

 import os
 import socket
 import argparse

 parser = argparse.ArgumentParser(description="ascend distribute rank.")
 parser.add_argument("--rank_file", type=str, default="scripts/rank_table_8p.json", help="rank_tabel_file_path.")

 def main(rank_table_file):
    nproc_per_node = 8

    visible_devices = ['0', '1', '2', '3', '4', '5', '6', '7']

    server_id = socket.gethostbyname(socket.gethostname())

    hccn_configs = open('/etc/hccn.conf', 'r').readlines()
    device_ips = {}
    for hccn_item in hccn_configs:
        hccn_item = hccn_item.strip()
        if hccn_item.startswith('address_'):
            device_id, device_ip = hccn_item.split('=')
            device_id = device_id.split('_')[1]
            device_ips[device_id] = device_ip
            print('device_id:{}, device_ip:{}'.format(device_id, device_ip))

    hccn_table = {}
    hccn_table['board_id'] = '0x002f'  # A+K
    # hccn_table['board_id'] = '0x0000' # A+X

    hccn_table['chip_info'] = '910'
    hccn_table['deploy_mode'] = 'lab'
    hccn_table['group_count'] = '1'
    hccn_table['group_list'] = []
    instance_list = []
    for instance_id in range(nproc_per_node):
        instance = {}
        instance['devices'] = []
        device_id = visible_devices[instance_id]
        device_ip = device_ips[device_id]
        instance['devices'].append({
            'device_id': device_id,
            'device_ip': device_ip,
        })
        instance['rank_id'] = str(instance_id)
        instance['server_id'] = server_id
        instance_list.append(instance)
    hccn_table['group_list'].append({
        'device_num': str(nproc_per_node),
        'server_num': '1',
        'group_name': '',
        'instance_count': str(nproc_per_node),
        'instance_list': instance_list,
    })
    hccn_table['para_plane_nic_location'] = 'device'
    hccn_table['para_plane_nic_name'] = []
    for instance_id in range(nproc_per_node):
        eth_id = visible_devices[instance_id]
        hccn_table['para_plane_nic_name'].append('eth{}'.format(eth_id))
    hccn_table['para_plane_nic_num'] = str(nproc_per_node)
    hccn_table['status'] = 'completed'
    import json
    with open(rank_table_file, 'w') as table_fp:
        json.dump(hccn_table, table_fp, indent=4)

 if __name__ == '__main__':
    args_opt = parser.parse_args()
    rank_table = args_opt.rank_file
    if os.path.exists(rank_table):
        print('Rank table file exists.')
    else:
        print('Generating rank table file.')
        main(rank_table)
        print('Rank table file generated')
--- a/model_zoo/official/cv/psenet/README.md
+++ b/model_zoo/official/cv/psenet/README.md
@@ -58,7 +58,7 @@ A testing set containing about 2000 readable words
 After installing MindSpore via the official website, you can start training and evaluation as follows: 
 ```python
 # run distributed training example
 sh scripts/run_distribute_train.sh pretrained_model.ckpt
 sh scripts/run_distribute_train.sh rank_table_file pretrained_model.ckpt

 #download opencv library
 download pyblind11, opencv3.4
@@ -91,7 +91,6 @@ sh scripts/run_eval_ascend.sh
 		└── run_eval_ascend.sh  			// shell script for evaluation 
 	├── src  
 		├── __init__.py  
        ├── generate_hccn_file.py           // creating rank.json
 		├── ETSNET  
 			├── __init__.py  
 			├── base.py                     // convolution and BN operator
@@ -130,7 +129,7 @@ Major parameters in train.py and config.py are:

 ### Distributed Training
 ```
 sh scripts/run_distribute_train.sh pretrained_model.ckpt
 sh scripts/run_distribute_train.sh rank_table_file pretrained_model.ckpt
 ```

 The above shell script will run distribute training in the background. You can view the results through the file 
@@ -169,18 +168,18 @@ Calculated!{"precision": 0.814796668299853, "recall": 0.8006740491092923, "hmean

 | Parameters                 | PSENet                                                   |
 | -------------------------- | ----------------------------------------------------------- |
 | Model Version              | Inception V1                                                |
 | Model Version              | V1                                                |
 | Resource                   | Ascend 910 ；CPU 2.60GHz，192cores；Memory，755G             |
 | uploaded Date              | 09/15/2020 (month/day/year)                                 |
 | MindSpore Version          | 1.0-alpha                                                   |
 | uploaded Date              | 09/30/2020 (month/day/year)                                 |
 | MindSpore Version          | 1.0.0                                                   |
 | Dataset                    | ICDAR2015                                                   |
 | Training Parameters        | start_lr=0.1; lr_scale=0.1                                  |
 | Optimizer                  | SGD                                                         |
 | Loss Function              | LossCallBack                                                |
 | outputs                    | probability                                                 |
 | Loss                       | 0.35                                                        |
 | Speed                      | 1pc: 444 ms/step;  4pcs: 446 ms/step                        |
 | Total time                 | 1pc: 75.48 h;  4pcs: 18.87 h                                |
 | Speed                      | 1pc: 444 ms/step;  8pcs: 446 ms/step                        |
 | Total time                 | 1pc: 75.48 h;  8pcs: 10.01 h                                |
 | Parameters (M)             | 27.36                                                       |
 | Checkpoint for Fine tuning | 109.44M (.ckpt file)                                        |
 | Scripts                    | https://gitee.com/mindspore/mindspore/tree/master/model_zoo/psenet |
@@ -190,13 +189,13 @@ Calculated!{"precision": 0.814796668299853, "recall": 0.8006740491092923, "hmean

 | Parameters          | PSENet                      |
 | ------------------- | --------------------------- |
 | Model Version       | Inception V1                |
 | Model Version       | V1                |
 | Resource            | Ascend 910                  |
 | Uploaded Date       | 09/15/2020 (month/day/year) |
 | MindSpore Version   | 1.0-alpha                   |
 | Uploaded Date       | 09/30/2020 (month/day/year) |
 | MindSpore Version   | 1.0,0                   |
 | Dataset             | ICDAR2015                   |
 | outputs             | probability                 |
 | Accuracy            | 1pc: 81%;  4pcs: 81%   |
 | Accuracy            | 1pc: 81%;  8pcs: 81%   |

 ## [How to use](#contents)

--- a/model_zoo/official/cv/psenet/scripts/run_distribute_train.sh
+++ b/model_zoo/official/cv/psenet/scripts/run_distribute_train.sh
@@ -17,9 +17,9 @@
 current_exec_path=$(pwd)
 echo 'current_exec_path: '${current_exec_path}

 if [ $# != 1 ]
 if [ $# != 2 ]
 then
    echo "Usage: sh run_distribute_train.sh [PRETRAINED_PATH]"
    echo "Usage: sh run_distribute_train.sh [RANK_FILE] [PRETRAINED_PATH]"
 exit 1
 fi

@@ -30,20 +30,24 @@ get_real_path(){
    echo "$(realpath -m $PWD/$1)"
  fi
 }
 PATH1=$(get_real_path $1)


 PATH1=$(get_real_path $1)
 if [ ! -f $PATH1 ]
 then
    echo "error: PRETRAINED_PATH=$PATH1 is not a file"
    echo "error: RANK_TABLE_FILE=$PATH1 is not a file"
 exit 1
 fi

 python ${current_exec_path}/src/generate_hccn_file.py
 PATH2=$(get_real_path $2)
 if [ ! -f $PATH2 ]
 then
    echo "error: PRETRAINED_PATH=$PATH2 is not a file"
 exit 1
 fi

 export DEVICE_NUM=8
 export RANK_SIZE=8
 export RANK_TABLE_FILE=${current_exec_path}/rank_table_8p.json
 export RANK_TABLE_FILE=$PATH1

 for((i=0; i<${DEVICE_NUM}; i++))
 do
@@ -70,7 +74,7 @@ do
    cd ${current_exec_path}/device_$i || exit
    export RANK_ID=$i
    export DEVICE_ID=$i
    python ${current_exec_path}/train.py --run_distribute --device_id $i --pre_trained $PATH1 --device_num ${DEVICE_NUM} >test_deep$i.log 2>&1 &
    python ${current_exec_path}/train.py --run_distribute --device_id $i --pre_trained $PATH2 --device_num ${DEVICE_NUM} >test_deep$i.log 2>&1 &
    cd ${current_exec_path} || exit
 done

--- a/model_zoo/official/cv/psenet/src/generate_hccn_file.py
+++ b/model_zoo/official/cv/psenet/src/generate_hccn_file.py
@@ -1,85 +0,0 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================


 import os
 import socket

 RANK_TABLE_SAVE_PATH = './rank_table_8p.json'


 def main():
    nproc_per_node = 4

    visible_devices = ['0', '1', '2', '3']

    server_id = socket.gethostbyname(socket.gethostname())

    hccn_configs = open('/etc/hccn.conf', 'r').readlines()
    device_ips = {}
    for hccn_item in hccn_configs:
        hccn_item = hccn_item.strip()
        if hccn_item.startswith('address_'):
            device_id, device_ip = hccn_item.split('=')
            device_id = device_id.split('_')[1]
            device_ips[device_id] = device_ip
            print('device_id:{}, device_ip:{}'.format(device_id, device_ip))

    hccn_table = {}
    hccn_table['board_id'] = '0x002f'  # A+K

    hccn_table['chip_info'] = '910'
    hccn_table['deploy_mode'] = 'lab'
    hccn_table['group_count'] = '1'
    hccn_table['group_list'] = []
    instance_list = []
    for instance_id in range(nproc_per_node):
        instance = {}
        instance['devices'] = []
        device_id = visible_devices[instance_id]
        device_ip = device_ips[device_id]
        instance['devices'].append({
            'device_id': device_id,
            'device_ip': device_ip,
        })
        instance['rank_id'] = str(instance_id)
        instance['server_id'] = server_id
        instance_list.append(instance)
    hccn_table['group_list'].append({
        'device_num': str(nproc_per_node),
        'server_num': '1',
        'group_name': '',
        'instance_count': str(nproc_per_node),
        'instance_list': instance_list,
    })
    hccn_table['para_plane_nic_location'] = 'device'
    hccn_table['para_plane_nic_name'] = []
    for instance_id in range(nproc_per_node):
        eth_id = visible_devices[instance_id]
        hccn_table['para_plane_nic_name'].append('eth{}'.format(eth_id))
    hccn_table['para_plane_nic_num'] = str(nproc_per_node)
    hccn_table['status'] = 'completed'
    import json
    with open(RANK_TABLE_SAVE_PATH, 'w') as table_fp:
        json.dump(hccn_table, table_fp, indent=4)


 if __name__ == '__main__':
    if os.path.exists(RANK_TABLE_SAVE_PATH):
        print('Rank table file exists.')
    else:
        print('Generating rank table file.')
        main()
        print('Rank table file generated')