| @@ -0,0 +1,93 @@ | |||||
| #!/usr/bin/python | |||||
| #coding=utf-8 | |||||
| ''' | |||||
| If there are Chinese comments in the code,please add at the beginning: | |||||
| #!/usr/bin/python | |||||
| #coding=utf-8 | |||||
| Due to the adaptability of a100, before using the training environment, please use the recommended image of the | |||||
| platform with cuda 11.Then adjust the code and submit the image. | |||||
| The image of this example is: dockerhub.pcl.ac.cn:5000/user-images/openi:cuda111_python37_pytorch191 | |||||
| In the training environment, the uploaded dataset will be automatically placed in the /dataset directory. | |||||
| If it is a single dataset: | |||||
| if MnistDataset_torch.zip is selected,Then the dataset directory is /dataset/train, /dataset/test; | |||||
| If it is a multiple dataset: | |||||
| If MnistDataset_torch.zip and checkpoint_epoch1_0.73.zip are selected, | |||||
| the dataset directory is /dataset/MnistDataset_torch/train, /dataset/MnistDataset_torch/test | |||||
| and /dataset/checkpoint_epoch1_0.73/mnist_epoch1_0.73.pkl | |||||
| The model download path is under /model by default. Please specify the model output location to /model, | |||||
| and the Qizhi platform will provide file downloads under the /model directory. | |||||
| ''' | |||||
| from model import Model | |||||
| import numpy as np | |||||
| import torch | |||||
| from torchvision.datasets import mnist | |||||
| from torch.nn import CrossEntropyLoss | |||||
| from torch.optim import SGD | |||||
| from torch.utils.data import DataLoader | |||||
| from torchvision.transforms import ToTensor | |||||
| import argparse | |||||
| import datetime | |||||
| # Training settings | |||||
| parser = argparse.ArgumentParser(description='PyTorch MNIST Example') | |||||
| #The dataset location is placed under /dataset | |||||
| parser.add_argument('--traindata', default="/dataset/train" ,help='path to train dataset') | |||||
| parser.add_argument('--testdata', default="/dataset/test" ,help='path to test dataset') | |||||
| parser.add_argument('--epoch_size', type=int, default=1, help='how much epoch to train') | |||||
| parser.add_argument('--batch_size', type=int, default=256, help='how much batch_size in epoch') | |||||
| def gettime(): | |||||
| timestr = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') | |||||
| return timestr | |||||
| if __name__ == '__main__': | |||||
| args, unknown = parser.parse_known_args() | |||||
| #log output | |||||
| print(gettime(), 'cuda is available:{}'.format(torch.cuda.is_available())) | |||||
| device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | |||||
| batch_size = args.batch_size | |||||
| train_dataset = mnist.MNIST(root=args.traindata, train=True, transform=ToTensor(),download=False) | |||||
| test_dataset = mnist.MNIST(root=args.testdata, train=False, transform=ToTensor(),download=False) | |||||
| train_loader = DataLoader(train_dataset, batch_size=batch_size) | |||||
| test_loader = DataLoader(test_dataset, batch_size=batch_size) | |||||
| model = Model().to(device) | |||||
| sgd = SGD(model.parameters(), lr=1e-1) | |||||
| cost = CrossEntropyLoss() | |||||
| epoch = args.epoch_size | |||||
| print(gettime(), 'epoch_size is:{}'.format(epoch)) | |||||
| for _epoch in range(epoch): | |||||
| print(gettime(), 'the {} epoch_size begin'.format(_epoch + 1)) | |||||
| model.train() | |||||
| for idx, (train_x, train_label) in enumerate(train_loader): | |||||
| train_x = train_x.to(device) | |||||
| train_label = train_label.to(device) | |||||
| label_np = np.zeros((train_label.shape[0], 10)) | |||||
| sgd.zero_grad() | |||||
| predict_y = model(train_x.float()) | |||||
| loss = cost(predict_y, train_label.long()) | |||||
| if idx % 10 == 0: | |||||
| print(gettime(), 'idx: {}, loss: {}'.format(idx, loss.sum().item())) | |||||
| loss.backward() | |||||
| sgd.step() | |||||
| correct = 0 | |||||
| _sum = 0 | |||||
| model.eval() | |||||
| for idx, (test_x, test_label) in enumerate(test_loader): | |||||
| test_x = test_x | |||||
| test_label = test_label | |||||
| predict_y = model(test_x.to(device).float()).detach() | |||||
| predict_ys = np.argmax(predict_y.cpu(), axis=-1) | |||||
| label_np = test_label.numpy() | |||||
| _ = predict_ys == test_label | |||||
| correct += np.sum(_.numpy(), axis=-1) | |||||
| _sum += _.shape[0] | |||||
| print(gettime(), 'accuracy: {:.2f}'.format(correct / _sum)) | |||||
| #The model output location is placed under /model | |||||
| torch.save(model, '/model/mnist_epoch{}_{:.2f}.pkl'.format(_epoch+1, correct / _sum)) | |||||
| print("----------this is the end--------") | |||||
| print(a) | |||||