delete launch file

5 years ago · 1221ea9349
--- a/model_zoo/official/cv/mobilenetv2/README.md
+++ b/model_zoo/official/cv/mobilenetv2/README.md
@@ -70,7 +70,6 @@ For FP16 operators, if the input data type is FP32, the backend of MindSpore wil
  │   ├──args.py        # parse args
  │   ├──config.py      # parameter configuration
  │   ├──dataset.py     # creating dataset
  │   ├──launch.py      # start python script
  │   ├──lr_generator.py     # learning rate config
  │   ├──mobilenetV2.py      # MobileNetV2 architecture
  │   ├──models.py      # contain define_net and Loss, Monitor
--- a/model_zoo/official/cv/mobilenetv2/scripts/run_train.sh
+++ b/model_zoo/official/cv/mobilenetv2/scripts/run_train.sh
@@ -31,21 +31,32 @@ run_ascend()
    BASEPATH=$(cd "`dirname $0`" || exit; pwd)
    export PYTHONPATH=${BASEPATH}:$PYTHONPATH
    export RANK_TABLE_FILE=$4
    DEVICE_NUM=$2
    if [ -d "../train" ];
    then
        rm -rf ../train
    fi
    mkdir ../train
    cd ../train || exit
    python ${BASEPATH}/../src/launch.py \
    for((i=0; i<${DEVICE_NUM}; i++))
    do
        export DEVICE_ID=$i
        export RANK_ID=$i
        rm -rf ./rank$i
        mkdir ./rank$i
        cp ../*.py ./rank$i
        cp -r ../src ./rank$i
        cd ./rank$i || exit
        echo "start training for rank $RANK_ID, device $DEVICE_ID"
        env > env.log
        python train.py \
            --platform=$1 \
            --nproc_per_node=$2 \
            --visible_devices=$3 \
            --training_script=${BASEPATH}/../train.py \
            --dataset_path=$5 \
            --pretrain_ckpt=$6 \
            --freeze_layer=$7 \
            &> ../train.log &  # dataset train folder
            &> log$i.log & 
        cd ..
    done
 }

 run_gpu()
--- a/model_zoo/official/cv/mobilenetv2/src/launch.py
+++ b/model_zoo/official/cv/mobilenetv2/src/launch.py
@@ -1,64 +0,0 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """launch train script"""
 import os
 import sys
 import subprocess
 import shutil
 from args import launch_parse_args

 def main():
    print("start", __file__)
    args = launch_parse_args()
    print(args)
    visible_devices = args.visible_devices.split(',')
    assert os.path.isfile(args.training_script)
    assert len(visible_devices) >= args.nproc_per_node
    print('visible_devices:{}'.format(visible_devices))

    # spawn the processes
    processes = []
    cmds = []
    log_files = []
    env = os.environ.copy()
    env['RANK_SIZE'] = str(args.nproc_per_node)
    cur_path = os.getcwd()
    for rank_id in range(0, args.nproc_per_node):
        os.chdir(cur_path)
        device_id = visible_devices[rank_id]
        rank_dir = os.path.join(cur_path, 'rank{}'.format(rank_id))
        env['RANK_ID'] = str(rank_id)
        env['DEVICE_ID'] = str(device_id)
        if os.path.exists(rank_dir):
            shutil.rmtree(rank_dir)
        os.mkdir(rank_dir)
        os.chdir(rank_dir)
        cmd = [sys.executable, '-u']
        cmd.append(args.training_script)
        cmd.extend(args.training_script_args)
        log_file = open(f'{rank_dir}/log{rank_id}.log', 'w')
        process = subprocess.Popen(cmd, stdout=log_file, stderr=log_file, env=env)
        processes.append(process)
        cmds.append(cmd)
        log_files.append(log_file)
    for process, cmd, log_file in zip(processes, cmds, log_files):
        process.wait()
        if process.returncode != 0:
            raise subprocess.CalledProcessError(returncode=process, cmd=cmd)
        log_file.close()


 if __name__ == "__main__":
    main()
--- a/model_zoo/official/cv/mobilenetv3/Readme.md
+++ b/model_zoo/official/cv/mobilenetv3/Readme.md
@@ -64,7 +64,6 @@ Dataset used: [imagenet](http://www.image-net.org/)
  ├── src                              
  │   ├──config.py      # parameter configuration               
  │   ├──dataset.py     # creating dataset
  │   ├──launch.py      # start python script
  │   ├──lr_generator.py     # learning rate config                            
  │   ├──mobilenetV3.py      # MobileNetV3 architecture
  ├── train.py      # training script
--- a/model_zoo/official/cv/mobilenetv3/src/launch.py
+++ b/model_zoo/official/cv/mobilenetv3/src/launch.py
@@ -1,162 +0,0 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """launch train script"""
 import os
 import sys
 import json
 import subprocess
 import shutil
 from argparse import ArgumentParser

 def parse_args():
    """
    parse args .

    Args:

    Returns:
        args.

    Examples:
        >>> parse_args()
    """
    parser = ArgumentParser(description="mindspore distributed training launch "
                                        "helper utilty that will spawn up "
                                        "multiple distributed processes")
    parser.add_argument("--nproc_per_node", type=int, default=1,
                        help="The number of processes to launch on each node, "
                             "for D training, this is recommended to be set "
                             "to the number of D in your system so that "
                             "each process can be bound to a single D.")
    parser.add_argument("--visible_devices", type=str, default="0,1,2,3,4,5,6,7",
                        help="will use the visible devices sequentially")
    parser.add_argument("--server_id", type=str, default="",
                        help="server ip")
    parser.add_argument("--training_script", type=str,
                        help="The full path to the single D training "
                             "program/script to be launched in parallel, "
                             "followed by all the arguments for the "
                             "training script")
    # rest from the training program
    args, unknown = parser.parse_known_args()
    args.training_script_args = unknown
    return args


 def main():
    print("start", __file__)
    args = parse_args()
    print(args)
    visible_devices = args.visible_devices.split(',')
    assert os.path.isfile(args.training_script)
    assert len(visible_devices) >= args.nproc_per_node
    print('visible_devices:{}'.format(visible_devices))
    if not args.server_id:
        print('pleaser input server ip!!!')
        exit(0)
    print('server_id:{}'.format(args.server_id))

    # construct hccn_table
    hccn_configs = open('/etc/hccn.conf', 'r').readlines()
    device_ips = {}
    for hccn_item in hccn_configs:
        hccn_item = hccn_item.strip()
        if hccn_item.startswith('address_'):
            device_id, device_ip = hccn_item.split('=')
            device_id = device_id.split('_')[1]
            device_ips[device_id] = device_ip
            print('device_id:{}, device_ip:{}'.format(device_id, device_ip))
    hccn_table = {}
    hccn_table['board_id'] = '0x0000'
    hccn_table['chip_info'] = '910'
    hccn_table['deploy_mode'] = 'lab'
    hccn_table['group_count'] = '1'
    hccn_table['group_list'] = []
    instance_list = []
    usable_dev = ''
    for instance_id in range(args.nproc_per_node):
        instance = {}
        instance['devices'] = []
        device_id = visible_devices[instance_id]
        device_ip = device_ips[device_id]
        usable_dev += str(device_id)
        instance['devices'].append({
            'device_id': device_id,
            'device_ip': device_ip,
        })
        instance['rank_id'] = str(instance_id)
        instance['server_id'] = args.server_id
        instance_list.append(instance)
    hccn_table['group_list'].append({
        'device_num': str(args.nproc_per_node),
        'server_num': '1',
        'group_name': '',
        'instance_count': str(args.nproc_per_node),
        'instance_list': instance_list,
    })
    hccn_table['para_plane_nic_location'] = 'device'
    hccn_table['para_plane_nic_name'] = []
    for instance_id in range(args.nproc_per_node):
        eth_id = visible_devices[instance_id]
        hccn_table['para_plane_nic_name'].append('eth{}'.format(eth_id))
    hccn_table['para_plane_nic_num'] = str(args.nproc_per_node)
    hccn_table['status'] = 'completed'

    # save hccn_table to file
    table_path = os.getcwd()
    if not os.path.exists(table_path):
        os.mkdir(table_path)
    table_fn = os.path.join(table_path,
                            'rank_table_{}p_{}_{}.json'.format(args.nproc_per_node, usable_dev, args.server_id))
    with open(table_fn, 'w') as table_fp:
        json.dump(hccn_table, table_fp, indent=4)
    sys.stdout.flush()

    # spawn the processes
    processes = []
    cmds = []
    log_files = []
    env = os.environ.copy()
    env['RANK_SIZE'] = str(args.nproc_per_node)
    cur_path = os.getcwd()
    for rank_id in range(0, args.nproc_per_node):
        os.chdir(cur_path)
        device_id = visible_devices[rank_id]
        device_dir = os.path.join(cur_path, 'device{}'.format(rank_id))
        env['RANK_ID'] = str(rank_id)
        env['DEVICE_ID'] = str(device_id)
        if args.nproc_per_node > 1:
            env['RANK_TABLE_FILE'] = table_fn
        if os.path.exists(device_dir):
            shutil.rmtree(device_dir)
        os.mkdir(device_dir)
        os.chdir(device_dir)
        cmd = [sys.executable, '-u']
        cmd.append(args.training_script)
        cmd.extend(args.training_script_args)
        log_file = open('{dir}/log{id}.log'.format(dir=device_dir, id=rank_id), 'w')
        process = subprocess.Popen(cmd, stdout=log_file, stderr=log_file, env=env)
        processes.append(process)
        cmds.append(cmd)
        log_files.append(log_file)
    for process, cmd, log_file in zip(processes, cmds, log_files):
        process.wait()
        if process.returncode != 0:
            raise subprocess.CalledProcessError(returncode=process, cmd=cmd)
        log_file.close()


 if __name__ == "__main__":
    main()
--- a/model_zoo/official/cv/ssd/src/box_utils.py
+++ b/model_zoo/official/cv/ssd/src/box_utils.py
@@ -66,7 +66,7 @@ default_boxes_ltrb = GeneratDefaultBoxes().default_boxes_ltrb
 default_boxes = GeneratDefaultBoxes().default_boxes
 y1, x1, y2, x2 = np.split(default_boxes_ltrb[:, :4], 4, axis=-1)
 vol_anchors = (x2 - x1) * (y2 - y1)
 matching_threshold = config.match_thershold
 matching_threshold = config.match_threshold


 def ssd_bboxes_encode(boxes):
--- a/model_zoo/official/cv/ssd/src/coco_eval.py
+++ b/model_zoo/official/cv/ssd/src/coco_eval.py
@@ -100,7 +100,7 @@ def metrics(pred_data):
            class_boxes = pred_boxes[score_mask] * [h, w, h, w]

            if score_mask.any():
                nms_index = apply_nms(class_boxes, class_box_scores, config.nms_thershold, config.max_boxes)
                nms_index = apply_nms(class_boxes, class_box_scores, config.nms_threshold, config.max_boxes)
                class_boxes = class_boxes[nms_index]
                class_box_scores = class_box_scores[nms_index]

--- a/model_zoo/official/cv/ssd/src/config.py
+++ b/model_zoo/official/cv/ssd/src/config.py
@@ -21,8 +21,8 @@ config = ed({
    "img_shape": [300, 300],
    "num_ssd_boxes": 1917,
    "neg_pre_positive": 3,
    "match_thershold": 0.5,
    "nms_thershold": 0.6,
    "match_threshold": 0.5,
    "nms_threshold": 0.6,
    "min_score": 0.1,
    "max_boxes": 100,

@@ -38,7 +38,7 @@ config = ed({
    "num_default": [3, 6, 6, 6, 6, 6],
    "extras_in_channels": [256, 576, 1280, 512, 256, 256],
    "extras_out_channels": [576, 1280, 512, 256, 256, 128],
    "extras_srides": [1, 1, 2, 2, 2, 2],
    "extras_strides": [1, 1, 2, 2, 2, 2],
    "extras_ratio": [0.2, 0.2, 0.2, 0.25, 0.5, 0.25],
    "feature_size": [19, 10, 5, 3, 2, 1],
    "min_scale": 0.2,
--- a/model_zoo/official/cv/ssd/src/ssd.py
+++ b/model_zoo/official/cv/ssd/src/ssd.py
@@ -228,7 +228,7 @@ class SSD300(nn.Cell):
        in_channels = config.extras_in_channels
        out_channels = config.extras_out_channels
        ratios = config.extras_ratio
        strides = config.extras_srides
        strides = config.extras_strides
        residual_list = []
        for i in range(2, len(in_channels)):
            residual = InvertedResidual(in_channels[i], out_channels[i], stride=strides[i],