| @@ -0,0 +1,115 @@ | |||
| # Inception-v3 Example | |||
| ## Description | |||
| This is an example of training Inception-v3 in MindSpore. | |||
| ## Requirements | |||
| - Install [Mindspore](http://www.mindspore.cn/install/en). | |||
| - Downlaod the dataset. | |||
| ## Structure | |||
| ```shell | |||
| . | |||
| └─Inception-v3 | |||
| ├─README.md | |||
| ├─scripts | |||
| ├─run_standalone_train_for_gpu.sh # launch standalone training with gpu platform(1p) | |||
| ├─run_distribute_train_for_gpu.sh # launch distributed training with gpu platform(8p) | |||
| └─run_eval_for_gpu.sh # launch evaluating with gpu platform | |||
| ├─src | |||
| ├─config.py # parameter configuration | |||
| ├─dataset.py # data preprocessing | |||
| ├─inception_v3.py # network definition | |||
| ├─loss.py # Customized CrossEntropy loss function | |||
| ├─lr_generator.py # learning rate generator | |||
| ├─eval.py # eval net | |||
| ├─export.py # convert checkpoint | |||
| └─train.py # train net | |||
| ``` | |||
| ## Parameter Configuration | |||
| Parameters for both training and evaluating can be set in config.py | |||
| ``` | |||
| 'random_seed': 1, # fix random seed | |||
| 'rank': 0, # local rank of distributed | |||
| 'group_size': 1, # world size of distributed | |||
| 'work_nums': 8, # number of workers to read the data | |||
| 'decay_method': 'cosine', # learning rate scheduler mode | |||
| "loss_scale": 1, # loss scale | |||
| 'batch_size': 128, # input batchsize | |||
| 'epoch_size': 250, # total epoch numbers | |||
| 'num_classes': 1000, # dataset class numbers | |||
| 'smooth_factor': 0.1, # label smoothing factor | |||
| 'aux_factor': 0.2, # loss factor of aux logit | |||
| 'lr_init': 0.00004, # initiate learning rate | |||
| 'lr_max': 0.4, # max bound of learning rate | |||
| 'lr_end': 0.000004, # min bound of learning rate | |||
| 'warmup_epochs': 1, # warmup epoch numbers | |||
| 'weight_decay': 0.00004, # weight decay | |||
| 'momentum': 0.9, # momentum | |||
| 'opt_eps': 1.0, # epsilon | |||
| 'keep_checkpoint_max': 100, # max numbers to keep checkpoints | |||
| 'ckpt_path': './checkpoint/', # save checkpoint path | |||
| 'is_save_on_master': 1 # save checkpoint on rank0, distributed parameters | |||
| ``` | |||
| ## Running the example | |||
| ### Train | |||
| #### Usage | |||
| ``` | |||
| # distribute training example(8p) | |||
| sh run_distribute_train_for_gpu.sh DATA_DIR | |||
| # standalone training | |||
| sh run_standalone_train_for_gpu.sh DEVICE_ID DATA_DIR | |||
| ``` | |||
| #### Launch | |||
| ```bash | |||
| # distributed training example(8p) for GPU | |||
| sh scripts/run_distribute_train_for_gpu.sh /dataset/train | |||
| # standalone training example for GPU | |||
| sh scripts/run_standalone_train_for_gpu.sh 0 /dataset/train | |||
| ``` | |||
| #### Result | |||
| You can find checkpoint file together with result in log. | |||
| ### Evaluation | |||
| #### Usage | |||
| ``` | |||
| # Evaluation | |||
| sh run_eval_for_gpu.sh DEVICE_ID DATA_DIR PATH_CHECKPOINT | |||
| ``` | |||
| #### Launch | |||
| ```bash | |||
| # Evaluation with checkpoint | |||
| sh scripts/run_eval_for_gpu.sh 0 /dataset/val ./checkpoint/inceptionv3-rank3-247_1251.ckpt | |||
| ``` | |||
| > checkpoint can be produced in training process. | |||
| #### Result | |||
| Evaluation result will be stored in the scripts path. Under this, you can find result like the followings in log. | |||
| ``` | |||
| acc=78.75%(TOP1) | |||
| acc=94.07%(TOP5) | |||
| ``` | |||
| @@ -0,0 +1,53 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """evaluate_imagenet""" | |||
| import argparse | |||
| import os | |||
| import mindspore.nn as nn | |||
| from mindspore import context | |||
| from mindspore.train.model import Model | |||
| from mindspore.train.serialization import load_checkpoint, load_param_into_net | |||
| from src.config import config_gpu as cfg | |||
| from src.dataset import create_dataset | |||
| from src.inception_v3 import InceptionV3 | |||
| from src.loss import CrossEntropy_Val | |||
| if __name__ == '__main__': | |||
| parser = argparse.ArgumentParser(description='image classification evaluation') | |||
| parser.add_argument('--checkpoint', type=str, default='', help='checkpoint of inception-v3 (Default: None)') | |||
| parser.add_argument('--dataset_path', type=str, default='', help='Dataset path') | |||
| parser.add_argument('--platform', type=str, default='GPU', choices=('Ascend', 'GPU'), help='run platform') | |||
| args_opt = parser.parse_args() | |||
| if args_opt.platform == 'Ascend': | |||
| device_id = int(os.getenv('DEVICE_ID')) | |||
| context.set_context(device_id=device_id) | |||
| context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.platform) | |||
| net = InceptionV3(num_classes=cfg.num_classes, is_training=False) | |||
| ckpt = load_checkpoint(args_opt.checkpoint) | |||
| load_param_into_net(net, ckpt) | |||
| net.set_train(False) | |||
| dataset = create_dataset(args_opt.dataset_path, False, 0, 1) | |||
| loss = CrossEntropy_Val(smooth_factor=0.1, num_classes=cfg.num_classes) | |||
| eval_metrics = {'Loss': nn.Loss(), | |||
| 'Top1-Acc': nn.Top1CategoricalAccuracy(), | |||
| 'Top5-Acc': nn.Top5CategoricalAccuracy()} | |||
| model = Model(net, loss, optimizer=None, metrics=eval_metrics) | |||
| metrics = model.eval(dataset) | |||
| print("metric: ", metrics) | |||
| @@ -0,0 +1,40 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| ##############export checkpoint file into geir and onnx models################# | |||
| """ | |||
| import argparse | |||
| import numpy as np | |||
| import mindspore as ms | |||
| from mindspore import Tensor | |||
| from mindspore.train.serialization import load_checkpoint, load_param_into_net, export | |||
| from src.config import config_gpu as cfg | |||
| from src.inception_v3 import InceptionV3 | |||
| if __name__ == '__main__': | |||
| parser = argparse.ArgumentParser(description='checkpoint export') | |||
| parser.add_argument('--checkpoint', type=str, default='', help='checkpoint of inception-v3 (Default: None)') | |||
| args_opt = parser.parse_args() | |||
| net = InceptionV3(num_classes=cfg.num_classes, is_training=False) | |||
| param_dict = load_checkpoint(args_opt.checkpoint) | |||
| load_param_into_net(net, param_dict) | |||
| input_arr = Tensor(np.random.uniform(0.0, 1.0, size=[1, 3, 299, 299]), ms.float32) | |||
| export(net, input_arr, file_name=cfg.onnx_filename, file_format="ONNX") | |||
| export(net, input_arr, file_name=cfg.geir_filename, file_format="GEIR") | |||
| @@ -0,0 +1,17 @@ | |||
| #!/bin/bash | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| DATA_DIR=$1 | |||
| mpirun --allow-run-as-root -n 8 python ./train.py --is_distributed --platform 'GPU' --dataset_path $DATA_DIR > train.log 2>&1 & | |||
| @@ -0,0 +1,19 @@ | |||
| #!/bin/bash | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| DEVICE_ID=$1 | |||
| DATA_DIR=$2 | |||
| PATH_CHECKPOINT=$3 | |||
| CUDA_VISIBLE_DEVICES=$DEVICE_ID python ./eval.py --platform 'GPU' --dataset_path $DATA_DIR --checkpoint $PATH_CHECKPOINT > eval.log 2>&1 & | |||
| @@ -0,0 +1,19 @@ | |||
| #!/bin/bash | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| DEVICE_ID=$1 | |||
| DATA_DIR=$2 | |||
| CUDA_VISIBLE_DEVICES=$DEVICE_ID python ./train.py --platform 'GPU' --dataset_path $DATA_DIR > train.log 2>&1 & | |||
| @@ -0,0 +1,43 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| network config setting, will be used in main.py | |||
| """ | |||
| from easydict import EasyDict as edict | |||
| config_gpu = edict({ | |||
| 'random_seed': 1, | |||
| 'rank': 0, | |||
| 'group_size': 1, | |||
| 'work_nums': 8, | |||
| 'decay_method': 'cosine', | |||
| "loss_scale": 1, | |||
| 'batch_size': 128, | |||
| 'epoch_size': 250, | |||
| 'num_classes': 1000, | |||
| 'smooth_factor': 0.1, | |||
| 'aux_factor': 0.2, | |||
| 'lr_init': 0.00004, | |||
| 'lr_max': 0.4, | |||
| 'lr_end': 0.000004, | |||
| 'warmup_epochs': 1, | |||
| 'weight_decay': 0.00004, | |||
| 'momentum': 0.9, | |||
| 'opt_eps': 1.0, | |||
| 'keep_checkpoint_max': 100, | |||
| 'ckpt_path': './checkpoint/', | |||
| 'is_save_on_master': 0 | |||
| }) | |||
| @@ -0,0 +1,69 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| Data operations, will be used in train.py and eval.py | |||
| """ | |||
| import mindspore.common.dtype as mstype | |||
| import mindspore.dataset.engine as de | |||
| import mindspore.dataset.transforms.c_transforms as C2 | |||
| import mindspore.dataset.transforms.vision.c_transforms as C | |||
| from src.config import config_gpu as cfg | |||
| def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1): | |||
| """ | |||
| create a train or eval dataset | |||
| Args: | |||
| dataset_path(string): the path of dataset. | |||
| do_train(bool): whether dataset is used for train or eval. | |||
| rank (int): The shard ID within num_shards (default=None). | |||
| group_size (int): Number of shards that the dataset should be divided into (default=None). | |||
| repeat_num(int): the repeat times of dataset. Default: 1. | |||
| Returns: | |||
| dataset | |||
| """ | |||
| if group_size == 1: | |||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=cfg.work_nums, shuffle=True) | |||
| else: | |||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=cfg.work_nums, shuffle=True, | |||
| num_shards=group_size, shard_id=rank) | |||
| # define map operations | |||
| if do_train: | |||
| trans = [ | |||
| C.RandomCropDecodeResize(299, scale=(0.08, 1.0), ratio=(0.75, 1.333)), | |||
| C.RandomHorizontalFlip(prob=0.5), | |||
| C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4) | |||
| ] | |||
| else: | |||
| trans = [ | |||
| C.Decode(), | |||
| C.Resize(299), | |||
| C.CenterCrop(299) | |||
| ] | |||
| trans += [ | |||
| C.Rescale(1.0 / 255.0, 0.0), | |||
| C.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), | |||
| C.HWC2CHW() | |||
| ] | |||
| type_cast_op = C2.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=cfg.work_nums) | |||
| ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=cfg.work_nums) | |||
| # apply batch operations | |||
| ds = ds.batch(cfg.batch_size, drop_remainder=True) | |||
| # apply dataset repeat operation | |||
| ds = ds.repeat(repeat_num) | |||
| return ds | |||
| @@ -0,0 +1,257 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Inception-v3 model definition""" | |||
| import mindspore.nn as nn | |||
| from mindspore.ops import operations as P | |||
| from mindspore.common.initializer import XavierUniform | |||
| class BasicConv2d(nn.Cell): | |||
| def __init__(self, in_channel, out_channel, kernel_size, stride=1, pad_mode='same', padding=0): | |||
| super(BasicConv2d, self).__init__() | |||
| self.conv = nn.Conv2d(in_channel, out_channel, kernel_size=kernel_size, stride=stride, | |||
| pad_mode=pad_mode, padding=padding, weight_init=XavierUniform(), has_bias=True) | |||
| self.bn = nn.BatchNorm2d(out_channel, eps=0.001, momentum=0.9997) | |||
| self.relu = nn.ReLU() | |||
| def construct(self, x): | |||
| x = self.conv(x) | |||
| x = self.bn(x) | |||
| x = self.relu(x) | |||
| return x | |||
| class Inception_A(nn.Cell): | |||
| def __init__(self, in_channels, pool_features): | |||
| super(Inception_A, self).__init__() | |||
| self.concat = P.Concat(axis=1) | |||
| self.branch0 = BasicConv2d(in_channels, 64, kernel_size=1) | |||
| self.branch1 = nn.SequentialCell([ | |||
| BasicConv2d(in_channels, 48, kernel_size=1), | |||
| BasicConv2d(48, 64, kernel_size=5) | |||
| ]) | |||
| self.branch2 = nn.SequentialCell([ | |||
| BasicConv2d(in_channels, 64, kernel_size=1), | |||
| BasicConv2d(64, 96, kernel_size=3), | |||
| BasicConv2d(96, 96, kernel_size=3) | |||
| ]) | |||
| self.branch_pool = nn.SequentialCell([ | |||
| nn.AvgPool2d(kernel_size=3, pad_mode='same'), | |||
| BasicConv2d(in_channels, pool_features, kernel_size=1) | |||
| ]) | |||
| def construct(self, x): | |||
| x0 = self.branch0(x) | |||
| x1 = self.branch1(x) | |||
| x2 = self.branch2(x) | |||
| branch_pool = self.branch_pool(x) | |||
| out = self.concat((x0, x1, x2, branch_pool)) | |||
| return out | |||
| class Inception_B(nn.Cell): | |||
| def __init__(self, in_channels): | |||
| super(Inception_B, self).__init__() | |||
| self.concat = P.Concat(axis=1) | |||
| self.branch0 = BasicConv2d(in_channels, 384, kernel_size=3, stride=2, pad_mode='valid') | |||
| self.branch1 = nn.SequentialCell([ | |||
| BasicConv2d(in_channels, 64, kernel_size=1), | |||
| BasicConv2d(64, 96, kernel_size=3), | |||
| BasicConv2d(96, 96, kernel_size=3, stride=2, pad_mode='valid') | |||
| ]) | |||
| self.branch_pool = nn.MaxPool2d(kernel_size=3, stride=2) | |||
| def construct(self, x): | |||
| x0 = self.branch0(x) | |||
| x1 = self.branch1(x) | |||
| branch_pool = self.branch_pool(x) | |||
| out = self.concat((x0, x1, branch_pool)) | |||
| return out | |||
| class Inception_C(nn.Cell): | |||
| def __init__(self, in_channels, channels_7x7): | |||
| super(Inception_C, self).__init__() | |||
| self.concat = P.Concat(axis=1) | |||
| self.branch0 = BasicConv2d(in_channels, 192, kernel_size=1) | |||
| self.branch1 = nn.SequentialCell([ | |||
| BasicConv2d(in_channels, channels_7x7, kernel_size=1), | |||
| BasicConv2d(channels_7x7, channels_7x7, kernel_size=(1, 7)), | |||
| BasicConv2d(channels_7x7, 192, kernel_size=(7, 1)) | |||
| ]) | |||
| self.branch2 = nn.SequentialCell([ | |||
| BasicConv2d(in_channels, channels_7x7, kernel_size=1), | |||
| BasicConv2d(channels_7x7, channels_7x7, kernel_size=(7, 1)), | |||
| BasicConv2d(channels_7x7, channels_7x7, kernel_size=(1, 7)), | |||
| BasicConv2d(channels_7x7, channels_7x7, kernel_size=(7, 1)), | |||
| BasicConv2d(channels_7x7, 192, kernel_size=(1, 7)) | |||
| ]) | |||
| self.branch_pool = nn.SequentialCell([ | |||
| nn.AvgPool2d(kernel_size=3, pad_mode='same'), | |||
| BasicConv2d(in_channels, 192, kernel_size=1) | |||
| ]) | |||
| def construct(self, x): | |||
| x0 = self.branch0(x) | |||
| x1 = self.branch1(x) | |||
| x2 = self.branch2(x) | |||
| branch_pool = self.branch_pool(x) | |||
| out = self.concat((x0, x1, x2, branch_pool)) | |||
| return out | |||
| class Inception_D(nn.Cell): | |||
| def __init__(self, in_channels): | |||
| super(Inception_D, self).__init__() | |||
| self.concat = P.Concat(axis=1) | |||
| self.branch0 = nn.SequentialCell([ | |||
| BasicConv2d(in_channels, 192, kernel_size=1), | |||
| BasicConv2d(192, 320, kernel_size=3, stride=2, pad_mode='valid') | |||
| ]) | |||
| self.branch1 = nn.SequentialCell([ | |||
| BasicConv2d(in_channels, 192, kernel_size=1), | |||
| BasicConv2d(192, 192, kernel_size=(1, 7)), # check | |||
| BasicConv2d(192, 192, kernel_size=(7, 1)), | |||
| BasicConv2d(192, 192, kernel_size=3, stride=2, pad_mode='valid') | |||
| ]) | |||
| self.branch_pool = nn.MaxPool2d(kernel_size=3, stride=2) | |||
| def construct(self, x): | |||
| x0 = self.branch0(x) | |||
| x1 = self.branch1(x) | |||
| branch_pool = self.branch_pool(x) | |||
| out = self.concat((x0, x1, branch_pool)) | |||
| return out | |||
| class Inception_E(nn.Cell): | |||
| def __init__(self, in_channels): | |||
| super(Inception_E, self).__init__() | |||
| self.concat = P.Concat(axis=1) | |||
| self.branch0 = BasicConv2d(in_channels, 320, kernel_size=1) | |||
| self.branch1 = BasicConv2d(in_channels, 384, kernel_size=1) | |||
| self.branch1_a = BasicConv2d(384, 384, kernel_size=(1, 3)) | |||
| self.branch1_b = BasicConv2d(384, 384, kernel_size=(3, 1)) | |||
| self.branch2 = nn.SequentialCell([ | |||
| BasicConv2d(in_channels, 448, kernel_size=1), | |||
| BasicConv2d(448, 384, kernel_size=3) | |||
| ]) | |||
| self.branch2_a = BasicConv2d(384, 384, kernel_size=(1, 3)) | |||
| self.branch2_b = BasicConv2d(384, 384, kernel_size=(3, 1)) | |||
| self.branch_pool = nn.SequentialCell([ | |||
| nn.AvgPool2d(kernel_size=3, pad_mode='same'), | |||
| BasicConv2d(in_channels, 192, kernel_size=1) | |||
| ]) | |||
| def construct(self, x): | |||
| x0 = self.branch0(x) | |||
| x1 = self.branch1(x) | |||
| x1 = self.concat((self.branch1_a(x1), self.branch1_b(x1))) | |||
| x2 = self.branch2(x) | |||
| x2 = self.concat((self.branch2_a(x2), self.branch2_b(x2))) | |||
| branch_pool = self.branch_pool(x) | |||
| out = self.concat((x0, x1, x2, branch_pool)) | |||
| return out | |||
| class Logits(nn.Cell): | |||
| def __init__(self, num_classes=10, dropout_keep_prob=0.8): | |||
| super(Logits, self).__init__() | |||
| self.avg_pool = nn.AvgPool2d(8, pad_mode='valid') | |||
| self.dropout = nn.Dropout(keep_prob=dropout_keep_prob) | |||
| self.flatten = P.Flatten() | |||
| self.fc = nn.Dense(2048, num_classes) | |||
| def construct(self, x): | |||
| x = self.avg_pool(x) | |||
| x = self.dropout(x) | |||
| x = self.flatten(x) | |||
| x = self.fc(x) | |||
| return x | |||
| class AuxLogits(nn.Cell): | |||
| def __init__(self, in_channels, num_classes=10): | |||
| super(AuxLogits, self).__init__() | |||
| self.avg_pool = nn.AvgPool2d(5, stride=3, pad_mode='valid') | |||
| self.conv2d_0 = nn.Conv2d(in_channels, 128, kernel_size=1) | |||
| self.conv2d_1 = nn.Conv2d(128, 768, kernel_size=5, pad_mode='valid') | |||
| self.flatten = P.Flatten() | |||
| self.fc = nn.Dense(in_channels, num_classes) | |||
| def construct(self, x): | |||
| x = self.avg_pool(x) | |||
| x = self.conv2d_0(x) | |||
| x = self.conv2d_1(x) | |||
| x = self.flatten(x) | |||
| x = self.fc(x) | |||
| return x | |||
| class InceptionV3(nn.Cell): | |||
| def __init__(self, num_classes=10, is_training=True): | |||
| super(InceptionV3, self).__init__() | |||
| self.is_training = is_training | |||
| self.Conv2d_1a = BasicConv2d(3, 32, kernel_size=3, stride=2, pad_mode='valid') | |||
| self.Conv2d_2a = BasicConv2d(32, 32, kernel_size=3, stride=1, pad_mode='valid') | |||
| self.Conv2d_2b = BasicConv2d(32, 64, kernel_size=3, stride=1) | |||
| self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2) | |||
| self.Conv2d_3b = BasicConv2d(64, 80, kernel_size=1) | |||
| self.Conv2d_4a = BasicConv2d(80, 192, kernel_size=3, pad_mode='valid') | |||
| self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2) | |||
| self.Mixed_5b = Inception_A(192, pool_features=32) | |||
| self.Mixed_5c = Inception_A(256, pool_features=64) | |||
| self.Mixed_5d = Inception_A(288, pool_features=64) | |||
| self.Mixed_6a = Inception_B(288) | |||
| self.Mixed_6b = Inception_C(768, channels_7x7=128) | |||
| self.Mixed_6c = Inception_C(768, channels_7x7=160) | |||
| self.Mixed_6d = Inception_C(768, channels_7x7=160) | |||
| self.Mixed_6e = Inception_C(768, channels_7x7=192) | |||
| self.Mixed_7a = Inception_D(768) | |||
| self.Mixed_7b = Inception_E(1280) | |||
| self.Mixed_7c = Inception_E(2048) | |||
| if is_training: | |||
| self.aux_logits = AuxLogits(768, num_classes) | |||
| self.logits = Logits(num_classes, dropout_keep_prob=0.5) | |||
| def construct(self, x): | |||
| x = self.Conv2d_1a(x) | |||
| x = self.Conv2d_2a(x) | |||
| x = self.Conv2d_2b(x) | |||
| x = self.maxpool1(x) | |||
| x = self.Conv2d_3b(x) | |||
| x = self.Conv2d_4a(x) | |||
| x = self.maxpool2(x) | |||
| x = self.Mixed_5b(x) | |||
| x = self.Mixed_5c(x) | |||
| x = self.Mixed_5d(x) | |||
| x = self.Mixed_6a(x) | |||
| x = self.Mixed_6b(x) | |||
| x = self.Mixed_6c(x) | |||
| x = self.Mixed_6d(x) | |||
| x = self.Mixed_6e(x) | |||
| if self.is_training: | |||
| aux_logits = self.aux_logits(x) | |||
| else: | |||
| aux_logits = None | |||
| x = self.Mixed_7a(x) | |||
| x = self.Mixed_7b(x) | |||
| x = self.Mixed_7c(x) | |||
| logits = self.logits(x) | |||
| if self.is_training: | |||
| return logits, aux_logits | |||
| return logits | |||
| @@ -0,0 +1,60 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """define loss function for network.""" | |||
| from mindspore.nn.loss.loss import _Loss | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops import functional as F | |||
| from mindspore import Tensor | |||
| from mindspore.common import dtype as mstype | |||
| import mindspore.nn as nn | |||
| class CrossEntropy(_Loss): | |||
| """the redefined loss function with SoftmaxCrossEntropyWithLogits""" | |||
| def __init__(self, smooth_factor=0, num_classes=1000, factor=0.4): | |||
| super(CrossEntropy, self).__init__() | |||
| self.factor = factor | |||
| self.onehot = P.OneHot() | |||
| self.on_value = Tensor(1.0 - smooth_factor, mstype.float32) | |||
| self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32) | |||
| self.ce = nn.SoftmaxCrossEntropyWithLogits() | |||
| self.mean = P.ReduceMean(False) | |||
| def construct(self, logits, label): | |||
| logit, aux = logits | |||
| one_hot_label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value) | |||
| loss_logit = self.ce(logit, one_hot_label) | |||
| loss_logit = self.mean(loss_logit, 0) | |||
| one_hot_label_aux = self.onehot(label, F.shape(aux)[1], self.on_value, self.off_value) | |||
| loss_aux = self.ce(aux, one_hot_label_aux) | |||
| loss_aux = self.mean(loss_aux, 0) | |||
| return loss_logit + self.factor*loss_aux | |||
| class CrossEntropy_Val(_Loss): | |||
| """the redefined loss function with SoftmaxCrossEntropyWithLogits, will be used in inference process""" | |||
| def __init__(self, smooth_factor=0, num_classes=1000): | |||
| super(CrossEntropy_Val, self).__init__() | |||
| self.onehot = P.OneHot() | |||
| self.on_value = Tensor(1.0 - smooth_factor, mstype.float32) | |||
| self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32) | |||
| self.ce = nn.SoftmaxCrossEntropyWithLogits() | |||
| self.mean = P.ReduceMean(False) | |||
| def construct(self, logits, label): | |||
| one_hot_label = self.onehot(label, F.shape(logits)[1], self.on_value, self.off_value) | |||
| loss_logit = self.ce(logits, one_hot_label) | |||
| loss_logit = self.mean(loss_logit, 0) | |||
| return loss_logit | |||
| @@ -0,0 +1,87 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """learning rate generator""" | |||
| import math | |||
| import numpy as np | |||
| def get_lr(lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch, lr_decay_mode): | |||
| """ | |||
| generate learning rate array | |||
| Args: | |||
| lr_init(float): init learning rate | |||
| lr_end(float): end learning rate | |||
| lr_max(float): max learning rate | |||
| warmup_epochs(int): number of warmup epochs | |||
| total_epochs(int): total epoch of training | |||
| steps_per_epoch(int): steps of one epoch | |||
| lr_decay_mode(string): learning rate decay mode, including steps, poly, cosine or default | |||
| Returns: | |||
| np.array, learning rate array | |||
| """ | |||
| lr_each_step = [] | |||
| total_steps = steps_per_epoch * total_epochs | |||
| warmup_steps = steps_per_epoch * warmup_epochs | |||
| if lr_decay_mode == 'steps': | |||
| decay_epoch_index = [0.3 * total_steps, 0.6 * total_steps, 0.8 * total_steps] | |||
| for i in range(total_steps): | |||
| if i < warmup_steps: | |||
| lr = lr_init + (lr_max - lr_init) * i / warmup_steps | |||
| else: | |||
| if i < decay_epoch_index[0]: | |||
| lr = lr_max | |||
| elif i < decay_epoch_index[1]: | |||
| lr = lr_max * 0.1 | |||
| elif i < decay_epoch_index[2]: | |||
| lr = lr_max * 0.01 | |||
| else: | |||
| lr = lr_max * 0.001 | |||
| lr_each_step.append(lr) | |||
| elif lr_decay_mode == 'steps_decay': | |||
| if warmup_steps != 0: | |||
| inc_each_step = (float(lr_max) - float(lr_init)) / float(warmup_steps) | |||
| else: | |||
| inc_each_step = 0 | |||
| for i in range(total_steps): | |||
| if i < warmup_steps: | |||
| lr = float(lr_init) + inc_each_step * float(i) | |||
| else: | |||
| decay_nums = math.floor((float(i-warmup_steps)/steps_per_epoch) / 2) | |||
| decay_rate = pow(0.94, decay_nums) | |||
| lr = float(lr_max)*decay_rate | |||
| if lr < 0.0: | |||
| lr = 0.0 | |||
| lr_each_step.append(lr) | |||
| elif lr_decay_mode == 'cosine': | |||
| decay_steps = total_steps - warmup_steps | |||
| for i in range(total_steps): | |||
| if i < warmup_steps: | |||
| lr_inc = (float(lr_max) - float(lr_init)) / float(warmup_steps) | |||
| lr = float(lr_init) + lr_inc * (i + 1) | |||
| else: | |||
| cosine_decay = 0.5 * (1 + math.cos(math.pi * (i-warmup_steps) / decay_steps)) | |||
| lr = (lr_max-lr_end)*cosine_decay + lr_end | |||
| lr_each_step.append(lr) | |||
| else: | |||
| for i in range(total_steps): | |||
| if i < warmup_steps: | |||
| lr = lr_init + (lr_max - lr_init) * i / warmup_steps | |||
| else: | |||
| lr = lr_max - (lr_max - lr_end) * (i - warmup_steps) / (total_steps - warmup_steps) | |||
| lr_each_step.append(lr) | |||
| learning_rate = np.array(lr_each_step).astype(np.float32) | |||
| return learning_rate | |||
| @@ -0,0 +1,116 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """train_imagenet.""" | |||
| import argparse | |||
| import os | |||
| import random | |||
| import numpy as np | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| from mindspore import context | |||
| from mindspore import ParallelMode | |||
| from mindspore.communication.management import init, get_rank, get_group_size | |||
| from mindspore.nn.optim.rmsprop import RMSProp | |||
| from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor | |||
| from mindspore.train.model import Model | |||
| from mindspore.train.serialization import load_checkpoint, load_param_into_net | |||
| from mindspore import dataset as de | |||
| from src.config import config_gpu as cfg | |||
| from src.dataset import create_dataset | |||
| from src.inception_v3 import InceptionV3 | |||
| from src.lr_generator import get_lr | |||
| from src.loss import CrossEntropy | |||
| random.seed(cfg.random_seed) | |||
| np.random.seed(cfg.random_seed) | |||
| de.config.set_seed(cfg.random_seed) | |||
| if __name__ == '__main__': | |||
| parser = argparse.ArgumentParser(description='image classification training') | |||
| parser.add_argument('--dataset_path', type=str, default='', help='Dataset path') | |||
| parser.add_argument('--resume', type=str, default='', help='resume training with existed checkpoint') | |||
| parser.add_argument('--is_distributed', action='store_true', default=False, | |||
| help='distributed training') | |||
| parser.add_argument('--platform', type=str, default='GPU', choices=('Ascend', 'GPU'), help='run platform') | |||
| args_opt = parser.parse_args() | |||
| context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.platform, save_graphs=False) | |||
| if os.getenv('DEVICE_ID', "not_set").isdigit(): | |||
| context.set_context(device_id=int(os.getenv('DEVICE_ID'))) | |||
| # init distributed | |||
| if args_opt.is_distributed: | |||
| if args_opt.platform == "Ascend": | |||
| init() | |||
| else: | |||
| init("nccl") | |||
| cfg.rank = get_rank() | |||
| cfg.group_size = get_group_size() | |||
| parallel_mode = ParallelMode.DATA_PARALLEL | |||
| context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=cfg.group_size, | |||
| parameter_broadcast=True, mirror_mean=True) | |||
| else: | |||
| cfg.rank = 0 | |||
| cfg.group_size = 1 | |||
| # dataloader | |||
| dataset = create_dataset(args_opt.dataset_path, True, cfg.rank, cfg.group_size) | |||
| batches_per_epoch = dataset.get_dataset_size() | |||
| # network | |||
| net = InceptionV3(num_classes=cfg.num_classes) | |||
| # loss | |||
| loss = CrossEntropy(smooth_factor=cfg.smooth_factor, num_classes=cfg.num_classes, factor=cfg.aux_factor) | |||
| # learning rate schedule | |||
| lr = get_lr(lr_init=cfg.lr_init, lr_end=cfg.lr_end, lr_max=cfg.lr_max, warmup_epochs=cfg.warmup_epochs, | |||
| total_epochs=cfg.epoch_size, steps_per_epoch=batches_per_epoch, lr_decay_mode=cfg.decay_method) | |||
| lr = Tensor(lr) | |||
| # optimizer | |||
| decayed_params = list(filter(lambda x: 'beta' not in x.name and 'gamma' not in x.name and 'bias' not in x.name, net.trainable_params())) | |||
| no_decayed_params = [param for param in net.trainable_params() if param not in decayed_params] | |||
| group_params = [{'params': decayed_params, 'weight_decay': cfg.weight_decay}, | |||
| {'params': no_decayed_params}, | |||
| {'order_params': net.trainable_params()}] | |||
| optimizer = RMSProp(group_params, lr, decay=0.9, weight_decay=cfg.weight_decay, | |||
| momentum=cfg.momentum, epsilon=cfg.opt_eps, loss_scale=cfg.loss_scale) | |||
| eval_metrics = {'Loss': nn.Loss(), | |||
| 'Top1-Acc': nn.Top1CategoricalAccuracy(), | |||
| 'Top5-Acc': nn.Top5CategoricalAccuracy()} | |||
| if args_opt.resume: | |||
| ckpt = load_checkpoint(args_opt.resume) | |||
| load_param_into_net(net, ckpt) | |||
| model = Model(net, loss_fn=loss, optimizer=optimizer, metrics={'acc'}) | |||
| print("============== Starting Training ==============") | |||
| loss_cb = LossMonitor(per_print_times=batches_per_epoch) | |||
| time_cb = TimeMonitor(data_size=batches_per_epoch) | |||
| callbacks = [loss_cb, time_cb] | |||
| config_ck = CheckpointConfig(save_checkpoint_steps=batches_per_epoch, keep_checkpoint_max=cfg.keep_checkpoint_max) | |||
| ckpoint_cb = ModelCheckpoint(prefix=f"inceptionv3-rank{cfg.rank}", directory=cfg.ckpt_path, config=config_ck) | |||
| if args_opt.is_distributed & cfg.is_save_on_master: | |||
| if cfg.rank == 0: | |||
| callbacks.append(ckpoint_cb) | |||
| model.train(cfg.epoch_size, dataset, callbacks=callbacks, dataset_sink_mode=True) | |||
| else: | |||
| callbacks.append(ckpoint_cb) | |||
| model.train(cfg.epoch_size, dataset, callbacks=callbacks, dataset_sink_mode=True) | |||
| print("train success") | |||