| @@ -9,19 +9,11 @@ If there are Chinese comments in the code,please add at the beginning: | |||
| 数据集结构是: | |||
| MnistDataset_torch.zip | |||
| ├── test | |||
| │ ├── MNIST/processed/test.pt | |||
| │ └── MNIST/processed/training.pt | |||
| │ ├── MNIST/raw/train-images-idx3-ubyte | |||
| │ └── MNIST/raw/train-labels-idx1-ubyte | |||
| │ ├── MNIST/raw/t10k-images-idx3-ubyte | |||
| │ └── MNIST/raw/t10k-labels-idx1-ubyte | |||
| ├── train | |||
| │ ├── MNIST/processed/test.pt | |||
| │ └── MNIST/processed/training.pt | |||
| │ ├── MNIST/raw/train-images-idx3-ubyte | |||
| │ └── MNIST/raw/train-labels-idx1-ubyte | |||
| │ ├── MNIST/raw/t10k-images-idx3-ubyte | |||
| │ └── MNIST/raw/t10k-labels-idx1-ubyte | |||
| └── train | |||
| 预训练模型文件夹结构是: | |||
| Torch_MNIST_Example_Model | |||
| ├── mnist_epoch1_0.76.pkl | |||
| ''' | |||
| @@ -5,11 +5,16 @@ If there are Chinese comments in the code,please add at the beginning: | |||
| #!/usr/bin/python | |||
| #coding=utf-8 | |||
| 1,The dataset structure of the single-dataset in this example | |||
| 示例选用的数据集是MnistDataset_torch.zip | |||
| 数据集结构是: | |||
| MnistDataset_torch.zip | |||
| ├── test | |||
| └── train | |||
| 预训练模型文件夹结构是: | |||
| Torch_MNIST_Example_Model | |||
| ├── mnist_epoch1_0.76.pkl | |||
| ''' | |||
| from model import Model | |||
| import numpy as np | |||
| @@ -5,11 +5,16 @@ If there are Chinese comments in the code,please add at the beginning: | |||
| #!/usr/bin/python | |||
| #coding=utf-8 | |||
| 1,The dataset structure of the single-dataset in this example | |||
| 示例选用的数据集是MnistDataset_torch.zip | |||
| 数据集结构是: | |||
| MnistDataset_torch.zip | |||
| ├── test | |||
| └── train | |||
| 预训练模型文件夹结构是: | |||
| Torch_MNIST_Example_Model | |||
| ├── mnist_epoch1_0.76.pkl | |||
| ''' | |||
| from model import Model | |||
| import numpy as np | |||
| @@ -5,11 +5,15 @@ If there are Chinese comments in the code,please add at the beginning: | |||
| #!/usr/bin/python | |||
| #coding=utf-8 | |||
| 1,The dataset structure of the single-dataset in this example | |||
| 数据集结构是: | |||
| MnistDataset_torch.zip | |||
| ├── test | |||
| └── train | |||
| 预训练模型文件夹结构是: | |||
| Torch_MNIST_Example_Model | |||
| ├── mnist_epoch1_0.76.pkl | |||
| ''' | |||
| @@ -85,6 +85,7 @@ upload_output() | |||
| - 训练任务单卡示例请参考示例中[train.py](./train.py)的代码注释 | |||
| - 训练任务多卡示例请参考示例中[train_multi_card.py](./train_multi_card.py)的代码注释 | |||
| - 训练任务在每个epoch结束后就上传文件,可参考[train_epoch_upload.py](./train_epoch_upload.py)的代码注释 | |||
| - 推理任务示例请参考示例中[inference.py](./inference.py)的代码注释 | |||
| - 启智集群训练任务已经将ImageNet-1k数据集挂载到训练镜像,具体使用方法请参考示例中[read_imagenet.py](./read_imagenet.py)的代码注释 | |||
| - 继续训练示例参考示例中[train_continue.py](./train_continue.py)的代码注释 | |||
| @@ -39,9 +39,6 @@ import mindspore.dataset.vision.c_transforms as transforms | |||
| from c2net.context import upload_output | |||
| parser = argparse.ArgumentParser(description='Read big dataset ImageNet Example') | |||
| parser.add_argument('--train_url', | |||
| help='output folder to save/load', | |||
| default= '/cache/output/') | |||
| if __name__ == "__main__": | |||
| args, unknown = parser.parse_known_args() | |||
| @@ -11,6 +11,10 @@ | |||
| ├── train-images-idx3-ubyte | |||
| └── train-labels-idx1-ubyte | |||
| 模型文件夹结构是: | |||
| Mindspore_MNIST_Example_Model | |||
| ├── checkpoint_lenet-1_1875.ckpt | |||
| 使用注意事项: | |||
| 1、在代码中加入args, unknown = parser.parse_known_args(),可忽略掉--ckpt_url参数报错等参数问题 | |||
| 2、用户需要调用c2net的python sdk包 | |||
| @@ -0,0 +1,115 @@ | |||
| """ | |||
| 示例选用的数据集是MnistDataset_mindspore.zip | |||
| 数据集结构是: | |||
| MnistDataset_mindspore.zip | |||
| ├── test | |||
| │ ├── t10k-images-idx3-ubyte | |||
| │ └── t10k-labels-idx1-ubyte | |||
| └── train | |||
| ├── train-images-idx3-ubyte | |||
| └── train-labels-idx1-ubyte | |||
| 模型文件夹结构是: | |||
| Mindspore_MNIST_Example_Model | |||
| ├── checkpoint_lenet-1_1875.ckpt | |||
| 使用注意事项: | |||
| 1、在代码中加入args, unknown = parser.parse_known_args(),可忽略掉--ckpt_url参数报错等参数问题 | |||
| 2、用户需要调用c2net的python sdk包 | |||
| """ | |||
| import os | |||
| import argparse | |||
| from config import mnist_cfg as cfg | |||
| from dataset import create_dataset | |||
| from lenet import LeNet5 | |||
| import mindspore.nn as nn | |||
| from mindspore import context | |||
| from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor | |||
| from mindspore import load_checkpoint, load_param_into_net | |||
| from mindspore.train import Model | |||
| from mindspore.train.callback import Callback | |||
| #导入c2net包 | |||
| from c2net.context import prepare, upload_output | |||
| class EnvToOpenIEpochEnd(Callback): | |||
| """ | |||
| upload output to openi when epoch end | |||
| """ | |||
| def epoch_end(self,run_context): | |||
| upload_output() | |||
| parser = argparse.ArgumentParser(description='MindSpore Lenet Example') | |||
| parser.add_argument( | |||
| '--device_target', | |||
| type=str, | |||
| default="Ascend", | |||
| choices=['Ascend', 'CPU'], | |||
| help='device where the code will be implemented (default: Ascend),if to use the CPU on the Qizhi platform:device_target=CPU') | |||
| parser.add_argument('--epoch_size', | |||
| type=int, | |||
| default=5, | |||
| help='Training epochs.') | |||
| if __name__ == "__main__": | |||
| ###请在代码中加入args, unknown = parser.parse_known_args(),可忽略掉--ckpt_url参数报错等参数问题 | |||
| args, unknown = parser.parse_known_args() | |||
| #初始化导入数据集和预训练模型到容器内 | |||
| c2net_context = prepare() | |||
| #获取数据集路径 | |||
| MnistDataset_mindspore_path = c2net_context.dataset_path+"/"+"MnistDataset_mindspore" | |||
| #获取预训练模型路径 | |||
| Mindspore_MNIST_Example_Model_path = c2net_context.pretrain_model_path+"/"+"Mindspore_MNIST_Example_Model" | |||
| #获取输出路径 | |||
| output_path = c2net_context.output_path | |||
| context.set_context(mode=context.GRAPH_MODE,device_target=args.device_target) | |||
| #使用数据集的方式 | |||
| ds_train = create_dataset(os.path.join(MnistDataset_mindspore_path, "train"), cfg.batch_size) | |||
| network = LeNet5(cfg.num_classes) | |||
| net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") | |||
| net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) | |||
| time_cb = TimeMonitor(data_size=ds_train.get_dataset_size()) | |||
| load_param_into_net(network, load_checkpoint(os.path.join(Mindspore_MNIST_Example_Model_path, "checkpoint_lenet-1_1875.ckpt"))) | |||
| if args.device_target != "Ascend": | |||
| model = Model(network, | |||
| net_loss, | |||
| net_opt, | |||
| metrics={"accuracy"}) | |||
| else: | |||
| model = Model(network, | |||
| net_loss, | |||
| net_opt, | |||
| metrics={"accuracy"}, | |||
| amp_level="O2") | |||
| config_ck = CheckpointConfig( | |||
| save_checkpoint_steps=cfg.save_checkpoint_steps, | |||
| keep_checkpoint_max=cfg.keep_checkpoint_max) | |||
| #将模型保存到c2net_context.output_path | |||
| outputDirectory = output_path + "/" | |||
| ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", | |||
| directory=outputDirectory, | |||
| config=config_ck) | |||
| print("============== Starting Training ==============") | |||
| epoch_size = cfg['epoch_size'] | |||
| if (args.epoch_size): | |||
| epoch_size = args.epoch_size | |||
| print('epoch_size is: ', epoch_size) | |||
| # set callback functions | |||
| callback =[time_cb,LossMonitor()] | |||
| local_rank=int(os.getenv('RANK_ID')) | |||
| #非必选,每个epoch结束后,都手动上传训练结果到启智平台,注意这样使用会占用很多内存,只有在部分特殊需要手动上传的任务才需要使用 | |||
| uploadOutput = EnvToOpenIEpochEnd() | |||
| callback.append(uploadOutput) | |||
| # for data parallel, only save checkpoint on rank 0 | |||
| if local_rank==0 : | |||
| callback.append(ckpoint_cb) | |||
| model.train(epoch_size,ds_train,callbacks=callback) | |||
| @@ -10,7 +10,7 @@ | |||
| └── train | |||
| ├── train-images-idx3-ubyte | |||
| └── train-labels-idx1-ubyte | |||
| 使用注意事项: | |||
| 1、在代码中加入args, unknown = parser.parse_known_args(),可忽略掉--ckpt_url参数报错等参数问题 | |||
| 2、用户需要调用c2net的python sdk包 | |||
| @@ -19,13 +19,11 @@ | |||
| import os | |||
| import argparse | |||
| from config import mnist_cfg as cfg | |||
| from dataset import create_dataset | |||
| from dataset_distributed import create_dataset_parallel | |||
| from lenet import LeNet5 | |||
| import mindspore.nn as nn | |||
| from mindspore import context | |||
| from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor | |||
| from mindspore import load_checkpoint, load_param_into_net | |||
| from mindspore.train import Model | |||
| from mindspore.context import ParallelMode | |||
| from mindspore.communication.management import init, get_rank | |||
| @@ -64,15 +62,13 @@ if __name__ == "__main__": | |||
| c2net_context = prepare() | |||
| #获取数据集路径 | |||
| MnistDataset_mindspore_path = c2net_context.dataset_path+"/"+"MnistDataset_mindspore" | |||
| #获取预训练模型路径 | |||
| Mindspore_MNIST_Example_Model_path = c2net_context.pretrain_model_path+"/"+"Mindspore_MNIST_Example_Model" | |||
| output_path = c2net_context.output_path | |||
| ds_train = create_dataset_parallel(os.path.join(MnistDataset_mindspore_path, "train"), cfg.batch_size) | |||
| network = LeNet5(cfg.num_classes) | |||
| net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") | |||
| net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) | |||
| time_cb = TimeMonitor(data_size=ds_train.get_dataset_size()) | |||
| #load_param_into_net(network, load_checkpoint(os.path.join(Mindspore_MNIST_Example_Model_path, "checkpoint_lenet-1_1875.ckpt"))) | |||
| if args.device_target != "Ascend": | |||
| model = Model(network, | |||
| net_loss, | |||