Browse Source

!7878 Remove generate_hccn_file from cnnctc and psenet.

Merge pull request !7878 from linqingke/cnnctc
tags/v1.1.0
mindspore-ci-bot Gitee 5 years ago
parent
commit
b1f88ad439
6 changed files with 23 additions and 195 deletions
  1. +0
    -1
      model_zoo/official/cv/cnnctc/README.md
  2. +0
    -1
      model_zoo/official/cv/cnnctc/scripts/run_distribute_train_ascend.sh
  3. +0
    -88
      model_zoo/official/cv/cnnctc/src/generate_hccn_file.py
  4. +11
    -12
      model_zoo/official/cv/psenet/README.md
  5. +12
    -8
      model_zoo/official/cv/psenet/scripts/run_distribute_train.sh
  6. +0
    -85
      model_zoo/official/cv/psenet/src/generate_hccn_file.py

+ 0
- 1
model_zoo/official/cv/cnnctc/README.md View File

@@ -150,7 +150,6 @@ The entire code structure is as following:
|---callback.py // loss callback file |---callback.py // loss callback file
|---dataset.py // process dataset |---dataset.py // process dataset
|---util.py // routine operation |---util.py // routine operation
|---generate_hccn_file.py // generate distribute json file
|---preprocess_dataset.py // preprocess dataset |---preprocess_dataset.py // preprocess dataset
``` ```


+ 0
- 1
model_zoo/official/cv/cnnctc/scripts/run_distribute_train_ascend.sh View File

@@ -31,7 +31,6 @@ echo $PATH1
PATH2=$(get_real_path $2) PATH2=$(get_real_path $2)
echo $PATH2 echo $PATH2


python ${current_exec_path}/src/generate_hccn_file.py --rank_file=$PATH1
export RANK_TABLE_FILE=$PATH1 export RANK_TABLE_FILE=$PATH1
export RANK_SIZE=8 export RANK_SIZE=8
ulimit -u unlimited ulimit -u unlimited


+ 0
- 88
model_zoo/official/cv/cnnctc/src/generate_hccn_file.py View File

@@ -1,88 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""generate ascend rank file"""
import os
import socket
import argparse
parser = argparse.ArgumentParser(description="ascend distribute rank.")
parser.add_argument("--rank_file", type=str, default="scripts/rank_table_8p.json", help="rank_tabel_file_path.")
def main(rank_table_file):
nproc_per_node = 8
visible_devices = ['0', '1', '2', '3', '4', '5', '6', '7']
server_id = socket.gethostbyname(socket.gethostname())
hccn_configs = open('/etc/hccn.conf', 'r').readlines()
device_ips = {}
for hccn_item in hccn_configs:
hccn_item = hccn_item.strip()
if hccn_item.startswith('address_'):
device_id, device_ip = hccn_item.split('=')
device_id = device_id.split('_')[1]
device_ips[device_id] = device_ip
print('device_id:{}, device_ip:{}'.format(device_id, device_ip))
hccn_table = {}
hccn_table['board_id'] = '0x002f' # A+K
# hccn_table['board_id'] = '0x0000' # A+X
hccn_table['chip_info'] = '910'
hccn_table['deploy_mode'] = 'lab'
hccn_table['group_count'] = '1'
hccn_table['group_list'] = []
instance_list = []
for instance_id in range(nproc_per_node):
instance = {}
instance['devices'] = []
device_id = visible_devices[instance_id]
device_ip = device_ips[device_id]
instance['devices'].append({
'device_id': device_id,
'device_ip': device_ip,
})
instance['rank_id'] = str(instance_id)
instance['server_id'] = server_id
instance_list.append(instance)
hccn_table['group_list'].append({
'device_num': str(nproc_per_node),
'server_num': '1',
'group_name': '',
'instance_count': str(nproc_per_node),
'instance_list': instance_list,
})
hccn_table['para_plane_nic_location'] = 'device'
hccn_table['para_plane_nic_name'] = []
for instance_id in range(nproc_per_node):
eth_id = visible_devices[instance_id]
hccn_table['para_plane_nic_name'].append('eth{}'.format(eth_id))
hccn_table['para_plane_nic_num'] = str(nproc_per_node)
hccn_table['status'] = 'completed'
import json
with open(rank_table_file, 'w') as table_fp:
json.dump(hccn_table, table_fp, indent=4)
if __name__ == '__main__':
args_opt = parser.parse_args()
rank_table = args_opt.rank_file
if os.path.exists(rank_table):
print('Rank table file exists.')
else:
print('Generating rank table file.')
main(rank_table)
print('Rank table file generated')

+ 11
- 12
model_zoo/official/cv/psenet/README.md View File

@@ -58,7 +58,7 @@ A testing set containing about 2000 readable words
After installing MindSpore via the official website, you can start training and evaluation as follows: After installing MindSpore via the official website, you can start training and evaluation as follows:
```python ```python
# run distributed training example # run distributed training example
sh scripts/run_distribute_train.sh pretrained_model.ckpt
sh scripts/run_distribute_train.sh rank_table_file pretrained_model.ckpt
#download opencv library #download opencv library
download pyblind11, opencv3.4 download pyblind11, opencv3.4
@@ -91,7 +91,6 @@ sh scripts/run_eval_ascend.sh
└── run_eval_ascend.sh // shell script for evaluation └── run_eval_ascend.sh // shell script for evaluation
├── src ├── src
├── __init__.py ├── __init__.py
├── generate_hccn_file.py // creating rank.json
├── ETSNET ├── ETSNET
├── __init__.py ├── __init__.py
├── base.py // convolution and BN operator ├── base.py // convolution and BN operator
@@ -130,7 +129,7 @@ Major parameters in train.py and config.py are:
### Distributed Training ### Distributed Training
``` ```
sh scripts/run_distribute_train.sh pretrained_model.ckpt
sh scripts/run_distribute_train.sh rank_table_file pretrained_model.ckpt
``` ```
The above shell script will run distribute training in the background. You can view the results through the file The above shell script will run distribute training in the background. You can view the results through the file
@@ -169,18 +168,18 @@ Calculated!{"precision": 0.814796668299853, "recall": 0.8006740491092923, "hmean
| Parameters | PSENet | | Parameters | PSENet |
| -------------------------- | ----------------------------------------------------------- | | -------------------------- | ----------------------------------------------------------- |
| Model Version | Inception V1 |
| Model Version | V1 |
| Resource | Ascend 910 ;CPU 2.60GHz,192cores;Memory,755G | | Resource | Ascend 910 ;CPU 2.60GHz,192cores;Memory,755G |
| uploaded Date | 09/15/2020 (month/day/year) |
| MindSpore Version | 1.0-alpha |
| uploaded Date | 09/30/2020 (month/day/year) |
| MindSpore Version | 1.0.0 |
| Dataset | ICDAR2015 | | Dataset | ICDAR2015 |
| Training Parameters | start_lr=0.1; lr_scale=0.1 | | Training Parameters | start_lr=0.1; lr_scale=0.1 |
| Optimizer | SGD | | Optimizer | SGD |
| Loss Function | LossCallBack | | Loss Function | LossCallBack |
| outputs | probability | | outputs | probability |
| Loss | 0.35 | | Loss | 0.35 |
| Speed | 1pc: 444 ms/step; 4pcs: 446 ms/step |
| Total time | 1pc: 75.48 h; 4pcs: 18.87 h |
| Speed | 1pc: 444 ms/step; 8pcs: 446 ms/step |
| Total time | 1pc: 75.48 h; 8pcs: 10.01 h |
| Parameters (M) | 27.36 | | Parameters (M) | 27.36 |
| Checkpoint for Fine tuning | 109.44M (.ckpt file) | | Checkpoint for Fine tuning | 109.44M (.ckpt file) |
| Scripts | https://gitee.com/mindspore/mindspore/tree/master/model_zoo/psenet | | Scripts | https://gitee.com/mindspore/mindspore/tree/master/model_zoo/psenet |
@@ -190,13 +189,13 @@ Calculated!{"precision": 0.814796668299853, "recall": 0.8006740491092923, "hmean
| Parameters | PSENet | | Parameters | PSENet |
| ------------------- | --------------------------- | | ------------------- | --------------------------- |
| Model Version | Inception V1 |
| Model Version | V1 |
| Resource | Ascend 910 | | Resource | Ascend 910 |
| Uploaded Date | 09/15/2020 (month/day/year) |
| MindSpore Version | 1.0-alpha |
| Uploaded Date | 09/30/2020 (month/day/year) |
| MindSpore Version | 1.0,0 |
| Dataset | ICDAR2015 | | Dataset | ICDAR2015 |
| outputs | probability | | outputs | probability |
| Accuracy | 1pc: 81%; 4pcs: 81% |
| Accuracy | 1pc: 81%; 8pcs: 81% |
## [How to use](#contents) ## [How to use](#contents)


+ 12
- 8
model_zoo/official/cv/psenet/scripts/run_distribute_train.sh View File

@@ -17,9 +17,9 @@
current_exec_path=$(pwd) current_exec_path=$(pwd)
echo 'current_exec_path: '${current_exec_path} echo 'current_exec_path: '${current_exec_path}


if [ $# != 1 ]
if [ $# != 2 ]
then then
echo "Usage: sh run_distribute_train.sh [PRETRAINED_PATH]"
echo "Usage: sh run_distribute_train.sh [RANK_FILE] [PRETRAINED_PATH]"
exit 1 exit 1
fi fi


@@ -30,20 +30,24 @@ get_real_path(){
echo "$(realpath -m $PWD/$1)" echo "$(realpath -m $PWD/$1)"
fi fi
} }
PATH1=$(get_real_path $1)



PATH1=$(get_real_path $1)
if [ ! -f $PATH1 ] if [ ! -f $PATH1 ]
then then
echo "error: PRETRAINED_PATH=$PATH1 is not a file"
echo "error: RANK_TABLE_FILE=$PATH1 is not a file"
exit 1 exit 1
fi fi


python ${current_exec_path}/src/generate_hccn_file.py
PATH2=$(get_real_path $2)
if [ ! -f $PATH2 ]
then
echo "error: PRETRAINED_PATH=$PATH2 is not a file"
exit 1
fi


export DEVICE_NUM=8 export DEVICE_NUM=8
export RANK_SIZE=8 export RANK_SIZE=8
export RANK_TABLE_FILE=${current_exec_path}/rank_table_8p.json
export RANK_TABLE_FILE=$PATH1


for((i=0; i<${DEVICE_NUM}; i++)) for((i=0; i<${DEVICE_NUM}; i++))
do do
@@ -70,7 +74,7 @@ do
cd ${current_exec_path}/device_$i || exit cd ${current_exec_path}/device_$i || exit
export RANK_ID=$i export RANK_ID=$i
export DEVICE_ID=$i export DEVICE_ID=$i
python ${current_exec_path}/train.py --run_distribute --device_id $i --pre_trained $PATH1 --device_num ${DEVICE_NUM} >test_deep$i.log 2>&1 &
python ${current_exec_path}/train.py --run_distribute --device_id $i --pre_trained $PATH2 --device_num ${DEVICE_NUM} >test_deep$i.log 2>&1 &
cd ${current_exec_path} || exit cd ${current_exec_path} || exit
done done



+ 0
- 85
model_zoo/official/cv/psenet/src/generate_hccn_file.py View File

@@ -1,85 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import os
import socket
RANK_TABLE_SAVE_PATH = './rank_table_8p.json'
def main():
nproc_per_node = 4
visible_devices = ['0', '1', '2', '3']
server_id = socket.gethostbyname(socket.gethostname())
hccn_configs = open('/etc/hccn.conf', 'r').readlines()
device_ips = {}
for hccn_item in hccn_configs:
hccn_item = hccn_item.strip()
if hccn_item.startswith('address_'):
device_id, device_ip = hccn_item.split('=')
device_id = device_id.split('_')[1]
device_ips[device_id] = device_ip
print('device_id:{}, device_ip:{}'.format(device_id, device_ip))
hccn_table = {}
hccn_table['board_id'] = '0x002f' # A+K
hccn_table['chip_info'] = '910'
hccn_table['deploy_mode'] = 'lab'
hccn_table['group_count'] = '1'
hccn_table['group_list'] = []
instance_list = []
for instance_id in range(nproc_per_node):
instance = {}
instance['devices'] = []
device_id = visible_devices[instance_id]
device_ip = device_ips[device_id]
instance['devices'].append({
'device_id': device_id,
'device_ip': device_ip,
})
instance['rank_id'] = str(instance_id)
instance['server_id'] = server_id
instance_list.append(instance)
hccn_table['group_list'].append({
'device_num': str(nproc_per_node),
'server_num': '1',
'group_name': '',
'instance_count': str(nproc_per_node),
'instance_list': instance_list,
})
hccn_table['para_plane_nic_location'] = 'device'
hccn_table['para_plane_nic_name'] = []
for instance_id in range(nproc_per_node):
eth_id = visible_devices[instance_id]
hccn_table['para_plane_nic_name'].append('eth{}'.format(eth_id))
hccn_table['para_plane_nic_num'] = str(nproc_per_node)
hccn_table['status'] = 'completed'
import json
with open(RANK_TABLE_SAVE_PATH, 'w') as table_fp:
json.dump(hccn_table, table_fp, indent=4)
if __name__ == '__main__':
if os.path.exists(RANK_TABLE_SAVE_PATH):
print('Rank table file exists.')
else:
print('Generating rank table file.')
main()
print('Rank table file generated')

Loading…
Cancel
Save