Browse Source

cloud

pull/15789/head
wwx691809 huchunmei 4 years ago
parent
commit
5efb0ae940
24 changed files with 1078 additions and 196 deletions
  1. +61
    -0
      model_zoo/official/cv/alexnet/config_imagenet.yaml
  2. +56
    -0
      model_zoo/official/cv/alexnet/default_config.yaml
  3. +44
    -35
      model_zoo/official/cv/alexnet/eval.py
  4. +19
    -31
      model_zoo/official/cv/alexnet/export.py
  5. +35
    -0
      model_zoo/official/cv/alexnet/scripts/run_eval_standalone_ascend.sh
  6. +35
    -0
      model_zoo/official/cv/alexnet/scripts/run_train_standalone_ascend.sh
  7. +0
    -1
      model_zoo/official/cv/alexnet/src/dataset.py
  8. +67
    -64
      model_zoo/official/cv/alexnet/train.py
  9. +0
    -0
      model_zoo/official/cv/alexnet/utils/__init__.py
  10. +127
    -0
      model_zoo/official/cv/alexnet/utils/config.py
  11. +27
    -0
      model_zoo/official/cv/alexnet/utils/device_adapter.py
  12. +36
    -0
      model_zoo/official/cv/alexnet/utils/local_adapter.py
  13. +122
    -0
      model_zoo/official/cv/alexnet/utils/moxing_adapter.py
  14. +56
    -0
      model_zoo/official/cv/lenet/default_config.yaml
  15. +26
    -18
      model_zoo/official/cv/lenet/eval.py
  16. +18
    -20
      model_zoo/official/cv/lenet/export.py
  17. +4
    -3
      model_zoo/official/cv/lenet/scripts/run_standalone_eval_ascend.sh
  18. +4
    -3
      model_zoo/official/cv/lenet/scripts/run_standalone_train_ascend.sh
  19. +29
    -21
      model_zoo/official/cv/lenet/train.py
  20. +0
    -0
      model_zoo/official/cv/lenet/utils/__init__.py
  21. +127
    -0
      model_zoo/official/cv/lenet/utils/config.py
  22. +27
    -0
      model_zoo/official/cv/lenet/utils/device_adapter.py
  23. +36
    -0
      model_zoo/official/cv/lenet/utils/local_adapter.py
  24. +122
    -0
      model_zoo/official/cv/lenet/utils/moxing_adapter.py

+ 61
- 0
model_zoo/official/cv/alexnet/config_imagenet.yaml View File

@@ -0,0 +1,61 @@
# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
enable_modelarts: False
data_url: ""
train_url: ""
checkpoint_url: ""
data_path: "/cache/data"
output_path: "/cache/train"
load_path: "/cache/checkpoint_path"
checkpoint_path: './checkpoint/'
checkpoint_file: './checkpoint/checkpoint_alexnet-30_1562.ckpt'
device_target: Ascend
enable_profiling: False

data_path_local: '/data/hcm/data/ImageNet_Original/'
ckpt_path_local: '/data/hcm/data/ckpt_alexnet/checkpoint_alexnet-30_1562.ckpt'

# ==============================================================================
# Training options
num_classes: 1000
learning_rate: 0.13
momentum: 0.9
epoch_size: 150
batch_size: 256
buffer_size: None
image_height: 224
image_width: 224
save_checkpoint_steps: 625
keep_checkpoint_max: 10
air_name: 'alexnet.air'

weight_decay: 0.0001
loss_scale: 1024
is_dynamic_loss_scale: 0

# Model Description
model_name: alexnet
file_name: 'alexnet'
file_format: 'AIR'

dataset_name: 'imagenet'
sink_size: -1
dataset_sink_mode: True
device_id: 0
save_checkpoint: True
save_checkpoint_epochs: 2
lr: 0.01


---
# Config description for each option
enable_modelarts: 'Whether training on modelarts, default: False'
data_url: 'Dataset url for obs'
train_url: 'Training output url for obs'
data_path: 'Dataset path for local'
output_path: 'Training output path for local'

device_target: 'Target device type'
enable_profiling: 'Whether enable profiling while training, default: False'

---
device_target: ['Ascend', 'GPU', 'CPU']

+ 56
- 0
model_zoo/official/cv/alexnet/default_config.yaml View File

@@ -0,0 +1,56 @@
# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
enable_modelarts: False
data_url: ""
train_url: ""
checkpoint_url: ""
data_path: "/cache/data"
output_path: "/cache/train"
load_path: "/cache/checkpoint_path"
checkpoint_path: './checkpoint/'
checkpoint_file: './checkpoint/checkpoint_alexnet-30_1562.ckpt'
device_target: Ascend
enable_profiling: False

data_path_local: '/data/hcm/data/cifar-10-batches-bin/'
ckpt_path_local: '/data/hcm/data/ckpt_alexnet/checkpoint_alexnet-30_1562.ckpt'
# ==============================================================================
# Training options
epoch_size: 30
keep_checkpoint_max: 10
num_classes: 10
learning_rate: 0.002
momentum: 0.9
batch_size: 32
buffer_size: 1000
image_height: 227
image_width: 227
save_checkpoint_steps: 1562
air_name: 'alexnet.air'

dataset_name: 'cifar10'
sink_size: -1
dataset_sink_mode: True
device_id: 0
save_checkpoint: True
save_checkpoint_epochs: 2
lr: 0.01

# Model Description
model_name: alexnet
file_name: 'alexnet'
file_format: 'AIR'


---
# Config description for each option
enable_modelarts: 'Whether training on modelarts, default: False'
data_url: 'Dataset url for obs'
train_url: 'Training output url for obs'
data_path: 'Dataset path for local'
output_path: 'Training output path for local'

device_target: 'Target device type'
enable_profiling: 'Whether enable profiling while training, default: False'

---
device_target: ['Ascend', 'GPU', 'CPU']

+ 44
- 35
model_zoo/official/cv/alexnet/eval.py View File

@@ -18,9 +18,13 @@ eval alexnet according to model file:
python eval.py --data_path /YourDataPath --ckpt_path Your.ckpt python eval.py --data_path /YourDataPath --ckpt_path Your.ckpt
""" """


import ast
import argparse
from src.config import alexnet_cifar10_cfg, alexnet_imagenet_cfg
import os
# import sys
# sys.path.append(os.path.join(os.getcwd(), 'utils'))
from utils.config import config
from utils.moxing_adapter import moxing_wrapper
from utils.device_adapter import get_device_id, get_device_num

from src.dataset import create_dataset_cifar10, create_dataset_imagenet from src.dataset import create_dataset_cifar10, create_dataset_imagenet
from src.alexnet import AlexNet from src.alexnet import AlexNet
import mindspore.nn as nn import mindspore.nn as nn
@@ -28,51 +32,52 @@ from mindspore import context
from mindspore.train.serialization import load_checkpoint, load_param_into_net from mindspore.train.serialization import load_checkpoint, load_param_into_net
from mindspore.train import Model from mindspore.train import Model
from mindspore.nn.metrics import Accuracy from mindspore.nn.metrics import Accuracy
from mindspore.communication.management import init




if __name__ == "__main__":
parser = argparse.ArgumentParser(description='MindSpore AlexNet Example')
parser.add_argument('--dataset_name', type=str, default='cifar10', choices=['imagenet', 'cifar10'],
help='dataset name.')
parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU'],
help='device where the code will be implemented (default: Ascend)')
parser.add_argument('--data_path', type=str, default="./", help='path where the dataset is saved')
parser.add_argument('--ckpt_path', type=str, default="./ckpt", help='if is test, must provide\
path where the trained ckpt file')
parser.add_argument('--dataset_sink_mode', type=ast.literal_eval,
default=True, help='dataset_sink_mode is False or True')
parser.add_argument('--device_id', type=int, default=0, help='device id of GPU or Ascend. (Default: 0)')
args = parser.parse_args()
if os.path.exists(config.data_path_local):
config.data_path = config.data_path_local
load_path = config.ckpt_path_local
else:
load_path = os.path.join(config.data_path, 'checkpoint_alexnet-30_1562.ckpt')


context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
def modelarts_process():
pass


@moxing_wrapper(pre_process=modelarts_process)
def eval_alexnet():
print("============== Starting Testing ==============") print("============== Starting Testing ==============")


if args.dataset_name == 'cifar10':
cfg = alexnet_cifar10_cfg
network = AlexNet(cfg.num_classes, phase='test')
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum)
ds_eval = create_dataset_cifar10(args.data_path, cfg.batch_size, status="test", target=args.device_target)
device_num = get_device_num()
if device_num > 1:
# context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target)
context.set_context(mode=context.GRAPH_MODE, device_target='Davinci', save_graphs=False)
if config.device_target == "Ascend":
context.set_context(device_id=get_device_id())
init()
elif config.device_target == "GPU":
init()


param_dict = load_checkpoint(args.ckpt_path)
print("load checkpoint from [{}].".format(args.ckpt_path))
if config.dataset_name == 'cifar10':
network = AlexNet(config.num_classes, phase='test')
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
opt = nn.Momentum(network.trainable_params(), config.learning_rate, config.momentum)
ds_eval = create_dataset_cifar10(config.data_path, config.batch_size, status="test", \
target=config.device_target)
param_dict = load_checkpoint(load_path)
print("load checkpoint from [{}].".format(load_path))
load_param_into_net(network, param_dict) load_param_into_net(network, param_dict)
network.set_train(False) network.set_train(False)

model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()}) model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()})


elif args.dataset_name == 'imagenet':
cfg = alexnet_imagenet_cfg
network = AlexNet(cfg.num_classes, phase='test')
elif config.dataset_name == 'imagenet':
network = AlexNet(config.num_classes, phase='test')
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
ds_eval = create_dataset_imagenet(args.data_path, cfg.batch_size, training=False)

param_dict = load_checkpoint(args.ckpt_path)
print("load checkpoint from [{}].".format(args.ckpt_path))
ds_eval = create_dataset_imagenet(config.data_path, config.batch_size, training=False)
param_dict = load_checkpoint(load_path)
print("load checkpoint from [{}].".format(load_path))
load_param_into_net(network, param_dict) load_param_into_net(network, param_dict)
network.set_train(False) network.set_train(False)

model = Model(network, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'}) model = Model(network, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'})


else: else:
@@ -81,5 +86,9 @@ if __name__ == "__main__":
if ds_eval.get_dataset_size() == 0: if ds_eval.get_dataset_size() == 0:
raise ValueError("Please check dataset size > 0 and batch_size <= dataset size") raise ValueError("Please check dataset size > 0 and batch_size <= dataset size")


result = model.eval(ds_eval, dataset_sink_mode=args.dataset_sink_mode)
result = model.eval(ds_eval, dataset_sink_mode=config.dataset_sink_mode)
print("result : {}".format(result)) print("result : {}".format(result))


if __name__ == "__main__":
eval_alexnet()

+ 19
- 31
model_zoo/official/cv/alexnet/export.py View File

@@ -16,44 +16,32 @@
##############export checkpoint file into air, onnx, mindir models################# ##############export checkpoint file into air, onnx, mindir models#################
python export.py python export.py
""" """
import argparse
import numpy as np


import os
# import sys
# sys.path.append(os.path.join(os.getcwd(), 'utils'))
from utils.config import config

import numpy as np
import mindspore as ms import mindspore as ms
from mindspore import context, Tensor, load_checkpoint, load_param_into_net, export from mindspore import context, Tensor, load_checkpoint, load_param_into_net, export

from src.config import alexnet_cifar10_cfg, alexnet_imagenet_cfg
from src.alexnet import AlexNet from src.alexnet import AlexNet


parser = argparse.ArgumentParser(description='Classification')
parser.add_argument("--device_id", type=int, default=0, help="Device id")
parser.add_argument("--batch_size", type=int, default=1, help="batch size")
parser.add_argument('--dataset_name', type=str, default='cifar10', choices=['imagenet', 'cifar10'],
help='please choose dataset: imagenet or cifar10.')
parser.add_argument('--device_target', type=str, default="Ascend",
choices=['Ascend', 'GPU', 'CPU'],
help='device where the code will be implemented (default: Ascend)')
parser.add_argument("--ckpt_file", type=str, required=True, help="Checkpoint file path.")
parser.add_argument("--file_name", type=str, default="alexnet", help="output file name.")
parser.add_argument("--file_format", type=str, choices=["AIR", "ONNX", "MINDIR"], default="AIR", help="file format")
args_opt = parser.parse_args()

context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target)
if args_opt.device_target == "Ascend":
context.set_context(device_id=args_opt.device_id)


if __name__ == '__main__':
if args_opt.dataset_name == 'cifar10':
cfg = alexnet_cifar10_cfg
elif args_opt.dataset_name == 'imagenet':
cfg = alexnet_imagenet_cfg
else:
raise ValueError("dataset is not support.")
if os.path.exists(config.data_path_local):
ckpt_path = config.ckpt_path_local
else:
ckpt_path = os.path.join(config.data_path, 'checkpoint_alexnet-30_1562.ckpt')

context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target)
if config.device_target == "Ascend":
context.set_context(device_id=config.device_id)


net = AlexNet(num_classes=cfg.num_classes)
if __name__ == '__main__':
net = AlexNet(num_classes=config.num_classes)


param_dict = load_checkpoint(args_opt.ckpt_file)
param_dict = load_checkpoint(ckpt_path)
load_param_into_net(net, param_dict) load_param_into_net(net, param_dict)


input_arr = Tensor(np.zeros([args_opt.batch_size, 3, cfg.image_height, cfg.image_width]), ms.float32)
export(net, input_arr, file_name=args_opt.file_name, file_format=args_opt.file_format)
input_arr = Tensor(np.zeros([config.batch_size, 3, config.image_height, config.image_width]), ms.float32)
export(net, input_arr, file_name=config.file_name, file_format=config.file_format)

+ 35
- 0
model_zoo/official/cv/alexnet/scripts/run_eval_standalone_ascend.sh View File

@@ -0,0 +1,35 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
# an simple tutorial as follows, more parameters can be setting
# echo "Usage: sh run_standalone_eval_ascend.sh [cifar10|imagenet] [DATA_PATH] [CKPT_PATH] [DEVICE_ID]"

BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)

if [ $# -ge 1 ]; then
if [ $1 == 'imagenet' ]; then
CONFIG_FILE="${BASE_PATH}/../config_imagenet.yaml"
elif [ $1 == 'cifar10' ]; then
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
else
echo "Unrecognized parameter"
exit 1
fi
else
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
fi

# python eval.py --dataset_name=$DATASET_NAME --data_path=$DATA_PATH --ckpt_path=$CKPT_PATH --device_id=$DEVICE_ID --device_target="Ascend" > eval_log 2>&1 &
python ../eval.py --config_path=$CONFIG_FILE > eval_log 2>&1 &

+ 35
- 0
model_zoo/official/cv/alexnet/scripts/run_train_standalone_ascend.sh View File

@@ -0,0 +1,35 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
# an simple tutorial as follows, more parameters can be setting
# echo "Usage: sh run_standalone_train_ascend.sh [cifar10|imagenet] [DATA_PATH] [DEVICE_ID]"

BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)

if [ $# -ge 1 ]; then
if [ $1 == 'imagenet' ]; then
CONFIG_FILE="${BASE_PATH}/../config_imagenet.yaml"
elif [ $1 == 'cifar10' ]; then
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
else
echo "Unrecognized parameter"
exit 1
fi
else
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
fi

# python train.py --dataset_name=$DATASET_NAME --data_path=$DATA_PATH --device_id=$DEVICE_ID --device_target="Ascend" > log 2>&1 &
python ../train.py --config_path=$CONFIG_FILE > log 2>&1 &

+ 0
- 1
model_zoo/official/cv/alexnet/src/dataset.py View File

@@ -17,7 +17,6 @@ Produce the dataset
""" """


import os import os

import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.c_transforms as C import mindspore.dataset.transforms.c_transforms as C
import mindspore.dataset.vision.c_transforms as CV import mindspore.dataset.vision.c_transforms as CV


+ 67
- 64
model_zoo/official/cv/alexnet/train.py View File

@@ -18,10 +18,14 @@ train alexnet and get network model files(.ckpt) :
python train.py --data_path /YourDataPath python train.py --data_path /YourDataPath
""" """


import ast
import argparse
import os import os
from src.config import alexnet_cifar10_cfg, alexnet_imagenet_cfg
# import sys
# sys.path.append(os.path.join(os.getcwd(), 'utils'))
from utils.config import config
from utils.moxing_adapter import moxing_wrapper
from utils.device_adapter import get_device_id, get_device_num, get_rank_id, get_job_id

# from src.config import alexnet_cifar10_config, alexnet_imagenet_config
from src.dataset import create_dataset_cifar10, create_dataset_imagenet from src.dataset import create_dataset_cifar10, create_dataset_imagenet
from src.generator_lr import get_lr_cifar10, get_lr_imagenet from src.generator_lr import get_lr_cifar10, get_lr_imagenet
from src.alexnet import AlexNet from src.alexnet import AlexNet
@@ -40,88 +44,84 @@ from mindspore.common import set_seed
set_seed(1) set_seed(1)
de.config.set_seed(1) de.config.set_seed(1)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description='MindSpore AlexNet Example')
parser.add_argument('--dataset_name', type=str, default='cifar10', choices=['imagenet', 'cifar10'],
help='dataset name.')
parser.add_argument('--sink_size', type=int, default=-1, help='control the amount of data in each sink')
parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU'],
help='device where the code will be implemented (default: Ascend)')
parser.add_argument('--data_path', type=str, default="./", help='path where the dataset is saved')
parser.add_argument('--ckpt_path', type=str, default="./ckpt", help='if is test, must provide\
path where the trained ckpt file')
parser.add_argument('--dataset_sink_mode', type=ast.literal_eval,
default=True, help='dataset_sink_mode is False or True')
parser.add_argument('--device_id', type=int, default=0, help='device id of GPU or Ascend. (Default: 0)')
args = parser.parse_args()

device_num = int(os.environ.get("DEVICE_NUM", 1))
if args.dataset_name == "cifar10":
cfg = alexnet_cifar10_cfg
if os.path.exists(config.data_path_local):
config.data_path = config.data_path_local
config.checkpoint_path = os.path.join(config.checkpoint_path, str(get_rank_id()))
else:
config.checkpoint_path = os.path.join(config.output_path, config.checkpoint_path, str(get_rank_id()))

def modelarts_pre_process():
pass

@moxing_wrapper(pre_process=modelarts_pre_process)
def train_alexnet():
print(config)
print('device id:', get_device_id())
print('device num:', get_device_num())
print('rank id:', get_rank_id())
print('job id:', get_job_id())

device_target = config.device_target
context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target)
context.set_context(save_graphs=False)

device_num = get_device_num()
if config.dataset_name == "cifar10":
if device_num > 1: if device_num > 1:
cfg.learning_rate = cfg.learning_rate * device_num
cfg.epoch_size = cfg.epoch_size * 2
elif args.dataset_name == "imagenet":
cfg = alexnet_imagenet_cfg
config.learning_rate = config.learning_rate * device_num
config.epoch_size = config.epoch_size * 2
elif config.dataset_name == "imagenet":
pass
else: else:
raise ValueError("Unsupported dataset.") raise ValueError("Unsupported dataset.")


device_target = args.device_target
context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
context.set_context(save_graphs=False)

if device_target == "Ascend":
context.set_context(device_id=args.device_id)

if device_num > 1:
context.reset_auto_parallel_context()
context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
gradients_mean=True)
if device_num > 1:
context.reset_auto_parallel_context()
context.set_auto_parallel_context(device_num=device_num, \
parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True)
if device_target == "Ascend":
context.set_context(device_id=get_device_id())
init() init()
elif device_target == "GPU":
if device_num > 1:
elif device_target == "GPU":
init() init()
context.reset_auto_parallel_context()
context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
gradients_mean=True)
else: else:
raise ValueError("Unsupported platform.")
context.set_context(device_id=get_device_id())


if args.dataset_name == "cifar10":
ds_train = create_dataset_cifar10(args.data_path, cfg.batch_size, target=args.device_target)
elif args.dataset_name == "imagenet":
ds_train = create_dataset_imagenet(args.data_path, cfg.batch_size)
if config.dataset_name == "cifar10":
ds_train = create_dataset_cifar10(config.data_path, config.batch_size, target=config.device_target)
elif config.dataset_name == "imagenet":
ds_train = create_dataset_imagenet(config.data_path, config.batch_size)
else: else:
raise ValueError("Unsupported dataset.") raise ValueError("Unsupported dataset.")


if ds_train.get_dataset_size() == 0: if ds_train.get_dataset_size() == 0:
raise ValueError("Please check dataset size > 0 and batch_size <= dataset size") raise ValueError("Please check dataset size > 0 and batch_size <= dataset size")


network = AlexNet(cfg.num_classes, phase='train')
network = AlexNet(config.num_classes, phase='train')


loss_scale_manager = None loss_scale_manager = None
metrics = None metrics = None
step_per_epoch = ds_train.get_dataset_size() if args.sink_size == -1 else args.sink_size
if args.dataset_name == 'cifar10':
step_per_epoch = ds_train.get_dataset_size() if config.sink_size == -1 else config.sink_size
if config.dataset_name == 'cifar10':
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
lr = Tensor(get_lr_cifar10(0, cfg.learning_rate, cfg.epoch_size, step_per_epoch))
opt = nn.Momentum(network.trainable_params(), lr, cfg.momentum)
lr = Tensor(get_lr_cifar10(0, config.learning_rate, config.epoch_size, step_per_epoch))
opt = nn.Momentum(network.trainable_params(), lr, config.momentum)
metrics = {"Accuracy": Accuracy()} metrics = {"Accuracy": Accuracy()}


elif args.dataset_name == 'imagenet':
elif config.dataset_name == 'imagenet':
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
lr = Tensor(get_lr_imagenet(cfg.learning_rate, cfg.epoch_size, step_per_epoch))
lr = Tensor(get_lr_imagenet(config.learning_rate, config.epoch_size, step_per_epoch))
opt = nn.Momentum(params=get_param_groups(network), opt = nn.Momentum(params=get_param_groups(network),
learning_rate=lr, learning_rate=lr,
momentum=cfg.momentum,
weight_decay=cfg.weight_decay,
loss_scale=cfg.loss_scale)
momentum=config.momentum,
weight_decay=config.weight_decay,
loss_scale=config.loss_scale)


from mindspore.train.loss_scale_manager import DynamicLossScaleManager, FixedLossScaleManager from mindspore.train.loss_scale_manager import DynamicLossScaleManager, FixedLossScaleManager
if cfg.is_dynamic_loss_scale == 1:
if config.is_dynamic_loss_scale == 1:
loss_scale_manager = DynamicLossScaleManager(init_loss_scale=65536, scale_factor=2, scale_window=2000) loss_scale_manager = DynamicLossScaleManager(init_loss_scale=65536, scale_factor=2, scale_window=2000)
else: else:
loss_scale_manager = FixedLossScaleManager(cfg.loss_scale, drop_overflow_update=False)
loss_scale_manager = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)


else: else:
raise ValueError("Unsupported dataset.") raise ValueError("Unsupported dataset.")
@@ -135,15 +135,18 @@ if __name__ == "__main__":
raise ValueError("Unsupported platform.") raise ValueError("Unsupported platform.")


if device_num > 1: if device_num > 1:
ckpt_save_dir = os.path.join(args.ckpt_path + "_" + str(get_rank()))
ckpt_save_dir = os.path.join(config.checkpoint_path + "_" + str(get_rank()))
else: else:
ckpt_save_dir = args.ckpt_path
ckpt_save_dir = config.checkpoint_path


time_cb = TimeMonitor(data_size=step_per_epoch) time_cb = TimeMonitor(data_size=step_per_epoch)
config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps,
keep_checkpoint_max=cfg.keep_checkpoint_max)
config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_steps,
keep_checkpoint_max=config.keep_checkpoint_max)
ckpoint_cb = ModelCheckpoint(prefix="checkpoint_alexnet", directory=ckpt_save_dir, config=config_ck) ckpoint_cb = ModelCheckpoint(prefix="checkpoint_alexnet", directory=ckpt_save_dir, config=config_ck)


print("============== Starting Training ==============") print("============== Starting Training ==============")
model.train(cfg.epoch_size, ds_train, callbacks=[time_cb, ckpoint_cb, LossMonitor()],
dataset_sink_mode=args.dataset_sink_mode, sink_size=args.sink_size)
model.train(config.epoch_size, ds_train, callbacks=[time_cb, ckpoint_cb, LossMonitor()],
dataset_sink_mode=config.dataset_sink_mode, sink_size=config.sink_size)

if __name__ == "__main__":
train_alexnet()

+ 0
- 0
model_zoo/official/cv/alexnet/utils/__init__.py View File


+ 127
- 0
model_zoo/official/cv/alexnet/utils/config.py View File

@@ -0,0 +1,127 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""Parse arguments"""

import os
import ast
import argparse
from pprint import pprint, pformat
import yaml

class Config:
"""
Configuration namespace. Convert dictionary to members.
"""
def __init__(self, cfg_dict):
for k, v in cfg_dict.items():
if isinstance(v, (list, tuple)):
setattr(self, k, [Config(x) if isinstance(x, dict) else x for x in v])
else:
setattr(self, k, Config(v) if isinstance(v, dict) else v)

def __str__(self):
return pformat(self.__dict__)

def __repr__(self):
return self.__str__()


def parse_cli_to_yaml(parser, cfg, helper=None, choices=None, cfg_path="default_config.yaml"):
"""
Parse command line arguments to the configuration according to the default yaml.

Args:
parser: Parent parser.
cfg: Base configuration.
helper: Helper description.
cfg_path: Path to the default yaml config.
"""
parser = argparse.ArgumentParser(description="[REPLACE THIS at config.py]",
parents=[parser])
helper = {} if helper is None else helper
choices = {} if choices is None else choices
for item in cfg:
if not isinstance(cfg[item], list) and not isinstance(cfg[item], dict):
help_description = helper[item] if item in helper else "Please reference to {}".format(cfg_path)
choice = choices[item] if item in choices else None
if isinstance(cfg[item], bool):
parser.add_argument("--" + item, type=ast.literal_eval, default=cfg[item], choices=choice,
help=help_description)
else:
parser.add_argument("--" + item, type=type(cfg[item]), default=cfg[item], choices=choice,
help=help_description)
args = parser.parse_args()
return args


def parse_yaml(yaml_path):
"""
Parse the yaml config file.

Args:
yaml_path: Path to the yaml config.
"""
with open(yaml_path, 'r') as fin:
try:
cfgs = yaml.load_all(fin.read(), Loader=yaml.FullLoader)
cfgs = [x for x in cfgs]
if len(cfgs) == 1:
cfg_helper = {}
cfg = cfgs[0]
cfg_choices = {}
elif len(cfgs) == 2:
cfg, cfg_helper = cfgs
cfg_choices = {}
elif len(cfgs) == 3:
cfg, cfg_helper, cfg_choices = cfgs
else:
raise ValueError("At most 3 docs (config, description for help, choices) are supported in config yaml")
print(cfg_helper)
except:
raise ValueError("Failed to parse yaml")
return cfg, cfg_helper, cfg_choices


def merge(args, cfg):
"""
Merge the base config from yaml file and command line arguments.

Args:
args: Command line arguments.
cfg: Base configuration.
"""
args_var = vars(args)
for item in args_var:
cfg[item] = args_var[item]
return cfg


def get_config():
"""
Get Config according to the yaml file and cli arguments.
"""
parser = argparse.ArgumentParser(description="default name", add_help=False)
current_dir = os.path.dirname(os.path.abspath(__file__))
parser.add_argument("--config_path", type=str, default=os.path.join(current_dir, "../default_config.yaml"),
help="Config file path")
path_args, _ = parser.parse_known_args()
default, helper, choices = parse_yaml(path_args.config_path)
pprint(default)
args = parse_cli_to_yaml(parser=parser, cfg=default, helper=helper, choices=choices, cfg_path=path_args.config_path)
final_config = merge(args, default)
return Config(final_config)

config = get_config()

+ 27
- 0
model_zoo/official/cv/alexnet/utils/device_adapter.py View File

@@ -0,0 +1,27 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""Device adapter for ModelArts"""

from .config import config

if config.enable_modelarts:
from .moxing_adapter import get_device_id, get_device_num, get_rank_id, get_job_id
else:
from .local_adapter import get_device_id, get_device_num, get_rank_id, get_job_id

__all__ = [
"get_device_id", "get_device_num", "get_rank_id", "get_job_id"
]

+ 36
- 0
model_zoo/official/cv/alexnet/utils/local_adapter.py View File

@@ -0,0 +1,36 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""Local adapter"""

import os

def get_device_id():
device_id = os.getenv('DEVICE_ID', '0')
return int(device_id)


def get_device_num():
device_num = os.getenv('RANK_SIZE', '1')
return int(device_num)


def get_rank_id():
global_rank_id = os.getenv('RANK_ID', '0')
return int(global_rank_id)


def get_job_id():
return "Local Job"

+ 122
- 0
model_zoo/official/cv/alexnet/utils/moxing_adapter.py View File

@@ -0,0 +1,122 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""Moxing adapter for ModelArts"""

import os
import functools
from mindspore import context
from mindspore.profiler import Profiler
from .config import config

_global_sync_count = 0

def get_device_id():
device_id = os.getenv('DEVICE_ID', '0')
return int(device_id)


def get_device_num():
device_num = os.getenv('RANK_SIZE', '1')
return int(device_num)


def get_rank_id():
global_rank_id = os.getenv('RANK_ID', '0')
return int(global_rank_id)


def get_job_id():
job_id = os.getenv('JOB_ID')
job_id = job_id if job_id != "" else "default"
return job_id

def sync_data(from_path, to_path):
"""
Download data from remote obs to local directory if the first url is remote url and the second one is local path
Upload data from local directory to remote obs in contrast.
"""
import moxing as mox
import time
global _global_sync_count
sync_lock = "/tmp/copy_sync.lock" + str(_global_sync_count)
_global_sync_count += 1

# Each server contains 8 devices as most.
if get_device_id() % min(get_device_num(), 8) == 0 and not os.path.exists(sync_lock):
print("from path: ", from_path)
print("to path: ", to_path)
mox.file.copy_parallel(from_path, to_path)
print("===finish data synchronization===")
try:
os.mknod(sync_lock)
except IOError:
pass
print("===save flag===")

while True:
if os.path.exists(sync_lock):
break
time.sleep(1)

print("Finish sync data from {} to {}.".format(from_path, to_path))


def moxing_wrapper(pre_process=None, post_process=None):
"""
Moxing wrapper to download dataset and upload outputs.
"""
def wrapper(run_func):
@functools.wraps(run_func)
def wrapped_func(*args, **kwargs):
# Download data from data_url
if config.enable_modelarts:
if config.data_url:
sync_data(config.data_url, config.data_path)
print("Dataset downloaded: ", os.listdir(config.data_path))
if config.checkpoint_url:
sync_data(config.checkpoint_url, config.load_path)
print("Preload downloaded: ", os.listdir(config.load_path))
if config.train_url:
sync_data(config.train_url, config.output_path)
print("Workspace downloaded: ", os.listdir(config.output_path))

context.set_context(save_graphs_path=os.path.join(config.output_path, str(get_rank_id())))
config.device_num = get_device_num()
config.device_id = get_device_id()
if not os.path.exists(config.output_path):
os.makedirs(config.output_path)

if pre_process:
pre_process()

if config.enable_profiling:
profiler = Profiler()

run_func(*args, **kwargs)

if config.enable_profiling:
profiler.analyse()

# Upload data to train_url
if config.enable_modelarts:
if post_process:
post_process()

if config.train_url:
print("Start to copy output directory")
sync_data(config.output_path, config.train_url)
return wrapped_func
return wrapper

+ 56
- 0
model_zoo/official/cv/lenet/default_config.yaml View File

@@ -0,0 +1,56 @@
# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
enable_modelarts: False
data_url: ""
train_url: ""
checkpoint_url: ""
data_path: "/cache/data"
output_path: "/cache/train"
load_path: "/cache/checkpoint_path"
checkpoint_path: './checkpoint/'
checkpoint_file: './checkpoint/checkpoint_lenet-10_1875.ckpt'
device_target: Ascend
enable_profiling: False

data_path_local: '/data/hcm/data/MNIST_Data/'
ckpt_path_local: '/data/hcm/data/ckpt_lenet/checkpoint_lenet-10_1875.ckpt'
# ==============================================================================
# Training options
num_classes: 10
lr: 0.01
momentum: 0.9
epoch_size: 10
batch_size: 15 # 32
buffer_size: 1000
image_height: 32
image_width: 32
save_checkpoint_steps: 1875
keep_checkpoint_max: 10
air_name: "lenet"
device_id: 0
file_name: "lenet"
file_format: "AIR"

model_name: lenet
learning_rate: 0.002
dataset_name: 'mnist'
sink_size: -1
dataset_sink_mode: True
save_checkpoint: True
save_checkpoint_epochs: 2

---
# Config description for each option
enable_modelarts: 'Whether training on modelarts, default: False'
data_url: 'Dataset url for obs'
train_url: 'Training output url for obs'
data_path: 'Dataset path for local'
output_path: 'Training output path for local'

device_target: 'Target device type'
enable_profiling: 'Whether enable profiling while training, default: False'
file_name: 'output file name.'
file_format: 'file format'

---
device_target: ['Ascend', 'GPU', 'CPU']
file_format: ['AIR', 'ONNX', 'MINDIR']

+ 26
- 18
model_zoo/official/cv/lenet/eval.py View File

@@ -19,43 +19,51 @@ python eval.py --data_path /YourDataPath --ckpt_path Your.ckpt
""" """


import os import os
import argparse
# import sys
# sys.path.append(os.path.join(os.getcwd(), 'utils'))
from utils.config import config
from utils.moxing_adapter import moxing_wrapper

import mindspore.nn as nn import mindspore.nn as nn
from mindspore import context from mindspore import context
from mindspore.train.serialization import load_checkpoint, load_param_into_net from mindspore.train.serialization import load_checkpoint, load_param_into_net
from mindspore.train import Model from mindspore.train import Model
from mindspore.nn.metrics import Accuracy from mindspore.nn.metrics import Accuracy
from src.dataset import create_dataset from src.dataset import create_dataset
from src.config import mnist_cfg as cfg
from src.lenet import LeNet5 from src.lenet import LeNet5


if __name__ == "__main__":
parser = argparse.ArgumentParser(description='MindSpore Lenet Example')
parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU', 'CPU'],
help='device where the code will be implemented (default: Ascend)')
parser.add_argument('--data_path', type=str, default="./Data",
help='path where the dataset is saved')
parser.add_argument('--ckpt_path', type=str, default="", help='if mode is test, must provide\
path where the trained ckpt file')
if os.path.exists(config.data_path_local):
config.data_path = config.data_path_local
ckpt_path = config.ckpt_path_local
else:
ckpt_path = os.path.join(config.data_path, 'checkpoint_lenet-10_1875.ckpt')
def modelarts_process():
pass


args = parser.parse_args()
@moxing_wrapper(pre_process=modelarts_process)
def eval_lenet():


context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target)


network = LeNet5(cfg.num_classes)
network = LeNet5(config.num_classes)
net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
repeat_size = cfg.epoch_size
net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
# repeat_size = config.epoch_size
net_opt = nn.Momentum(network.trainable_params(), config.lr, config.momentum)
model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})


print("============== Starting Testing ==============") print("============== Starting Testing ==============")
param_dict = load_checkpoint(args.ckpt_path)
param_dict = load_checkpoint(ckpt_path)
load_param_into_net(network, param_dict) load_param_into_net(network, param_dict)
ds_eval = create_dataset(os.path.join(args.data_path, "test"),
cfg.batch_size,
ds_eval = create_dataset(os.path.join(config.data_path, "test"),
config.batch_size,
1) 1)
if ds_eval.get_dataset_size() == 0: if ds_eval.get_dataset_size() == 0:
raise ValueError("Please check dataset size > 0 and batch_size <= dataset size") raise ValueError("Please check dataset size > 0 and batch_size <= dataset size")


acc = model.eval(ds_eval) acc = model.eval(ds_eval)
print("============== {} ==============".format(acc)) print("============== {} ==============".format(acc))


if __name__ == "__main__":
eval_lenet()

+ 18
- 20
model_zoo/official/cv/lenet/export.py View File

@@ -14,37 +14,35 @@
# ============================================================================ # ============================================================================
"""export checkpoint file into air, onnx, mindir models""" """export checkpoint file into air, onnx, mindir models"""


import argparse
import numpy as np
import os
# import sys
# sys.path.append(os.path.join(os.getcwd(), 'utils'))
from utils.config import config
from utils.device_adapter import get_device_id


import numpy as np
import mindspore import mindspore
from mindspore import Tensor, context, load_checkpoint, load_param_into_net, export from mindspore import Tensor, context, load_checkpoint, load_param_into_net, export

from src.config import mnist_cfg as cfg
from src.lenet import LeNet5 from src.lenet import LeNet5


parser = argparse.ArgumentParser(description='MindSpore MNIST Example')
parser.add_argument("--device_id", type=int, default=0, help="Device id")
parser.add_argument("--batch_size", type=int, default=1, help="batch size")
parser.add_argument("--ckpt_file", type=str, required=True, help="Checkpoint file path.")
parser.add_argument("--file_name", type=str, default="lenet", help="output file name.")
parser.add_argument("--file_format", type=str, choices=["AIR", "ONNX", "MINDIR"], default="AIR", help="file format")
parser.add_argument("--device_target", type=str, choices=["Ascend", "GPU", "CPU"], default="Ascend",
help="device target")
args = parser.parse_args()


context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
if args.device_target == "Ascend":
context.set_context(device_id=args.device_id)
if os.path.exists(config.data_path_local):
ckpt_file = config.ckpt_path_local
else:
ckpt_file = os.path.join(config.data_path, 'checkpoint_lenet-10_1875.ckpt')

context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target)
if config.device_target == "Ascend":
context.set_context(device_id=get_device_id())


if __name__ == "__main__": if __name__ == "__main__":


# define fusion network # define fusion network
network = LeNet5(cfg.num_classes)
network = LeNet5(config.num_classes)
# load network checkpoint # load network checkpoint
param_dict = load_checkpoint(args.ckpt_file)
param_dict = load_checkpoint(ckpt_file)
load_param_into_net(network, param_dict) load_param_into_net(network, param_dict)


# export network # export network
inputs = Tensor(np.ones([args.batch_size, 1, cfg.image_height, cfg.image_width]), mindspore.float32)
export(network, inputs, file_name=args.file_name, file_format=args.file_format)
inputs = Tensor(np.ones([config.batch_size, 1, config.image_height, config.image_width]), mindspore.float32)
export(network, inputs, file_name=config.file_name, file_format=config.file_format)

+ 4
- 3
model_zoo/official/cv/lenet/scripts/run_standalone_eval_ascend.sh View File

@@ -17,6 +17,7 @@
# an simple tutorial as follows, more parameters can be setting # an simple tutorial as follows, more parameters can be setting
script_self=$(readlink -f "$0") script_self=$(readlink -f "$0")
self_path=$(dirname "${script_self}") self_path=$(dirname "${script_self}")
DATA_PATH=$1
CKPT_PATH=$2
python -s ${self_path}/../eval.py --data_path=$DATA_PATH --device_target="Ascend" --ckpt_path=$CKPT_PATH > log.txt 2>&1 &
# DATA_PATH=$1
# CKPT_PATH=$2
# --data_path=$DATA_PATH --device_target="Ascend" --ckpt_path=$CKPT_PATH
python -s ${self_path}/../eval.py > log_eval.txt 2>&1 &

+ 4
- 3
model_zoo/official/cv/lenet/scripts/run_standalone_train_ascend.sh View File

@@ -17,6 +17,7 @@
# an simple tutorial as follows, more parameters can be setting # an simple tutorial as follows, more parameters can be setting
script_self=$(readlink -f "$0") script_self=$(readlink -f "$0")
self_path=$(dirname "${script_self}") self_path=$(dirname "${script_self}")
DATA_PATH=$1
CKPT_PATH=$2
python -s ${self_path}/../train.py --data_path=$DATA_PATH --device_target="Ascend" --ckpt_path=$CKPT_PATH > log.txt 2>&1 &
# DATA_PATH=$1
# CKPT_PATH=$2
# --data_path=$DATA_PATH --device_target="Ascend" --ckpt_path=$CKPT_PATH
python -s ${self_path}/../train.py > log.txt 2>&1 &

+ 29
- 21
model_zoo/official/cv/lenet/train.py View File

@@ -19,8 +19,12 @@ python train.py --data_path /YourDataPath
""" """


import os import os
import argparse
from src.config import mnist_cfg as cfg
# import sys
# sys.path.append(os.path.join(os.getcwd(), 'utils'))
from utils.config import config
from utils.moxing_adapter import moxing_wrapper
from utils.device_adapter import get_rank_id

from src.dataset import create_dataset from src.dataset import create_dataset
from src.lenet import LeNet5 from src.lenet import LeNet5
import mindspore.nn as nn import mindspore.nn as nn
@@ -30,36 +34,40 @@ from mindspore.train import Model
from mindspore.nn.metrics import Accuracy from mindspore.nn.metrics import Accuracy
from mindspore.common import set_seed from mindspore.common import set_seed



parser = argparse.ArgumentParser(description='MindSpore Lenet Example')
parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU', 'CPU'],
help='device where the code will be implemented (default: Ascend)')
parser.add_argument('--data_path', type=str, default="./Data",
help='path where the dataset is saved')
parser.add_argument('--ckpt_path', type=str, default="./ckpt", help='if is test, must provide\
path where the trained ckpt file')
args = parser.parse_args()
set_seed(1) set_seed(1)


if os.path.exists(config.data_path_local):
config.data_path = config.data_path_local
config.checkpoint_path = os.path.join(config.checkpoint_path, str(get_rank_id()))
else:
config.checkpoint_path = os.path.join(config.output_path, config.checkpoint_path, str(get_rank_id()))


if __name__ == "__main__":
context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
ds_train = create_dataset(os.path.join(args.data_path, "train"), cfg.batch_size)
def modelarts_pre_process():
pass

@moxing_wrapper(pre_process=modelarts_pre_process)
def train_lenet():

context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target)
ds_train = create_dataset(os.path.join(config.data_path, "train"), config.batch_size)
if ds_train.get_dataset_size() == 0: if ds_train.get_dataset_size() == 0:
raise ValueError("Please check dataset size > 0 and batch_size <= dataset size") raise ValueError("Please check dataset size > 0 and batch_size <= dataset size")


network = LeNet5(cfg.num_classes)
network = LeNet5(config.num_classes)
net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
net_opt = nn.Momentum(network.trainable_params(), config.lr, config.momentum)
time_cb = TimeMonitor(data_size=ds_train.get_dataset_size()) time_cb = TimeMonitor(data_size=ds_train.get_dataset_size())
config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps,
keep_checkpoint_max=cfg.keep_checkpoint_max)
ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", directory=args.ckpt_path, config=config_ck)
config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_steps,
keep_checkpoint_max=config.keep_checkpoint_max)
ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", directory=config.checkpoint_path, config=config_ck)


if args.device_target != "Ascend":
if config.device_target != "Ascend":
model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
else: else:
model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}, amp_level="O2") model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}, amp_level="O2")


print("============== Starting Training ==============") print("============== Starting Training ==============")
model.train(cfg['epoch_size'], ds_train, callbacks=[time_cb, ckpoint_cb, LossMonitor()])
model.train(config.epoch_size, ds_train, callbacks=[time_cb, ckpoint_cb, LossMonitor()])

if __name__ == "__main__":
train_lenet()

+ 0
- 0
model_zoo/official/cv/lenet/utils/__init__.py View File


+ 127
- 0
model_zoo/official/cv/lenet/utils/config.py View File

@@ -0,0 +1,127 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""Parse arguments"""

import os
import ast
import argparse
from pprint import pprint, pformat
import yaml

class Config:
"""
Configuration namespace. Convert dictionary to members.
"""
def __init__(self, cfg_dict):
for k, v in cfg_dict.items():
if isinstance(v, (list, tuple)):
setattr(self, k, [Config(x) if isinstance(x, dict) else x for x in v])
else:
setattr(self, k, Config(v) if isinstance(v, dict) else v)

def __str__(self):
return pformat(self.__dict__)

def __repr__(self):
return self.__str__()


def parse_cli_to_yaml(parser, cfg, helper=None, choices=None, cfg_path="default_config.yaml"):
"""
Parse command line arguments to the configuration according to the default yaml.

Args:
parser: Parent parser.
cfg: Base configuration.
helper: Helper description.
cfg_path: Path to the default yaml config.
"""
parser = argparse.ArgumentParser(description="[REPLACE THIS at config.py]",
parents=[parser])
helper = {} if helper is None else helper
choices = {} if choices is None else choices
for item in cfg:
if not isinstance(cfg[item], list) and not isinstance(cfg[item], dict):
help_description = helper[item] if item in helper else "Please reference to {}".format(cfg_path)
choice = choices[item] if item in choices else None
if isinstance(cfg[item], bool):
parser.add_argument("--" + item, type=ast.literal_eval, default=cfg[item], choices=choice,
help=help_description)
else:
parser.add_argument("--" + item, type=type(cfg[item]), default=cfg[item], choices=choice,
help=help_description)
args = parser.parse_args()
return args


def parse_yaml(yaml_path):
"""
Parse the yaml config file.

Args:
yaml_path: Path to the yaml config.
"""
with open(yaml_path, 'r') as fin:
try:
cfgs = yaml.load_all(fin.read(), Loader=yaml.FullLoader)
cfgs = [x for x in cfgs]
if len(cfgs) == 1:
cfg_helper = {}
cfg = cfgs[0]
cfg_choices = {}
elif len(cfgs) == 2:
cfg, cfg_helper = cfgs
cfg_choices = {}
elif len(cfgs) == 3:
cfg, cfg_helper, cfg_choices = cfgs
else:
raise ValueError("At most 3 docs (config, description for help, choices) are supported in config yaml")
print(cfg_helper)
except:
raise ValueError("Failed to parse yaml")
return cfg, cfg_helper, cfg_choices


def merge(args, cfg):
"""
Merge the base config from yaml file and command line arguments.

Args:
args: Command line arguments.
cfg: Base configuration.
"""
args_var = vars(args)
for item in args_var:
cfg[item] = args_var[item]
return cfg


def get_config():
"""
Get Config according to the yaml file and cli arguments.
"""
parser = argparse.ArgumentParser(description="default name", add_help=False)
current_dir = os.path.dirname(os.path.abspath(__file__))
parser.add_argument("--config_path", type=str, default=os.path.join(current_dir, "../default_config.yaml"),
help="Config file path")
path_args, _ = parser.parse_known_args()
default, helper, choices = parse_yaml(path_args.config_path)
pprint(default)
args = parse_cli_to_yaml(parser=parser, cfg=default, helper=helper, choices=choices, cfg_path=path_args.config_path)
final_config = merge(args, default)
return Config(final_config)

config = get_config()

+ 27
- 0
model_zoo/official/cv/lenet/utils/device_adapter.py View File

@@ -0,0 +1,27 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""Device adapter for ModelArts"""

from .config import config

if config.enable_modelarts:
from .moxing_adapter import get_device_id, get_device_num, get_rank_id, get_job_id
else:
from .local_adapter import get_device_id, get_device_num, get_rank_id, get_job_id

__all__ = [
"get_device_id", "get_device_num", "get_rank_id", "get_job_id"
]

+ 36
- 0
model_zoo/official/cv/lenet/utils/local_adapter.py View File

@@ -0,0 +1,36 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""Local adapter"""

import os

def get_device_id():
device_id = os.getenv('DEVICE_ID', '0')
return int(device_id)


def get_device_num():
device_num = os.getenv('RANK_SIZE', '1')
return int(device_num)


def get_rank_id():
global_rank_id = os.getenv('RANK_ID', '0')
return int(global_rank_id)


def get_job_id():
return "Local Job"

+ 122
- 0
model_zoo/official/cv/lenet/utils/moxing_adapter.py View File

@@ -0,0 +1,122 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""Moxing adapter for ModelArts"""

import os
import functools
from mindspore import context
from mindspore.profiler import Profiler
from .config import config

_global_sync_count = 0

def get_device_id():
device_id = os.getenv('DEVICE_ID', '0')
return int(device_id)


def get_device_num():
device_num = os.getenv('RANK_SIZE', '1')
return int(device_num)


def get_rank_id():
global_rank_id = os.getenv('RANK_ID', '0')
return int(global_rank_id)


def get_job_id():
job_id = os.getenv('JOB_ID')
job_id = job_id if job_id != "" else "default"
return job_id

def sync_data(from_path, to_path):
"""
Download data from remote obs to local directory if the first url is remote url and the second one is local path
Upload data from local directory to remote obs in contrast.
"""
import moxing as mox
import time
global _global_sync_count
sync_lock = "/tmp/copy_sync.lock" + str(_global_sync_count)
_global_sync_count += 1

# Each server contains 8 devices as most.
if get_device_id() % min(get_device_num(), 8) == 0 and not os.path.exists(sync_lock):
print("from path: ", from_path)
print("to path: ", to_path)
mox.file.copy_parallel(from_path, to_path)
print("===finish data synchronization===")
try:
os.mknod(sync_lock)
except IOError:
pass
print("===save flag===")

while True:
if os.path.exists(sync_lock):
break
time.sleep(1)

print("Finish sync data from {} to {}.".format(from_path, to_path))


def moxing_wrapper(pre_process=None, post_process=None):
"""
Moxing wrapper to download dataset and upload outputs.
"""
def wrapper(run_func):
@functools.wraps(run_func)
def wrapped_func(*args, **kwargs):
# Download data from data_url
if config.enable_modelarts:
if config.data_url:
sync_data(config.data_url, config.data_path)
print("Dataset downloaded: ", os.listdir(config.data_path))
if config.checkpoint_url:
sync_data(config.checkpoint_url, config.load_path)
print("Preload downloaded: ", os.listdir(config.load_path))
if config.train_url:
sync_data(config.train_url, config.output_path)
print("Workspace downloaded: ", os.listdir(config.output_path))

context.set_context(save_graphs_path=os.path.join(config.output_path, str(get_rank_id())))
config.device_num = get_device_num()
config.device_id = get_device_id()
if not os.path.exists(config.output_path):
os.makedirs(config.output_path)

if pre_process:
pre_process()

if config.enable_profiling:
profiler = Profiler()

run_func(*args, **kwargs)

if config.enable_profiling:
profiler.analyse()

# Upload data to train_url
if config.enable_modelarts:
if post_process:
post_process()

if config.train_url:
print("Start to copy output directory")
sync_data(config.output_path, config.train_url)
return wrapped_func
return wrapper

Loading…
Cancel
Save