|
|
|
@@ -11,6 +11,13 @@ |
|
|
|
"在LeNet5提出的时候,没有 GPU 帮助训练,甚至 CPU 的速度也很慢,因此,LeNet5的规模并不大。其包含七个处理层,每一层都包含可训练参数(权重),当时使用的输入数据是 $32 \\times 32$ 像素的图像。LeNet-5 这个网络虽然很小,但是它包含了深度学习的基本模块:卷积层,池化层,全连接层。它是其他深度学习模型的基础,这里对LeNet5进行深入分析和讲解,通过实例分析,加深对与卷积层和池化层的理解。" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "markdown", |
|
|
|
"metadata": {}, |
|
|
|
"source": [ |
|
|
|
"定义网络为:" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 1, |
|
|
|
@@ -26,30 +33,57 @@ |
|
|
|
"import torch\n", |
|
|
|
"from torch import nn\n", |
|
|
|
"from torch.autograd import Variable\n", |
|
|
|
"import torch.nn.functional as F\n", |
|
|
|
"from torchvision.datasets import CIFAR10\n", |
|
|
|
"from torchvision import transforms as tfs" |
|
|
|
"from torchvision import transforms as tfs\n", |
|
|
|
"\n", |
|
|
|
"\n", |
|
|
|
"class LeNet5(nn.Module):\n", |
|
|
|
" def __init__(self):\n", |
|
|
|
" super(LeNet5, self).__init__()\n", |
|
|
|
" # 1-input channel, 6-output channels, 5x5-conv\n", |
|
|
|
" self.conv1 = nn.Conv2d(1, 6, 5)\n", |
|
|
|
" # 6-input channel, 16-output channels, 5x5-conv\n", |
|
|
|
" self.conv2 = nn.Conv2d(6, 16, 5)\n", |
|
|
|
" # 16x5x5-input, 120-output\n", |
|
|
|
" self.fc1 = nn.Linear(16 * 5 * 5, 120) \n", |
|
|
|
" # 120-input, 84-output\n", |
|
|
|
" self.fc2 = nn.Linear(120, 84)\n", |
|
|
|
" # 84-input, 10-output\n", |
|
|
|
" self.fc3 = nn.Linear(84, 10)\n", |
|
|
|
"\n", |
|
|
|
" def forward(self, x):\n", |
|
|
|
" x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))\n", |
|
|
|
" x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))\n", |
|
|
|
" x = torch.flatten(x, 1) # 将结果拉升成1维向量,除了批次的维度\n", |
|
|
|
" x = F.relu(self.fc1(x))\n", |
|
|
|
" x = F.relu(self.fc2(x))\n", |
|
|
|
" x = self.fc3(x)\n", |
|
|
|
" return x" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 1, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"execution_count": 4, |
|
|
|
"metadata": {}, |
|
|
|
"outputs": [ |
|
|
|
{ |
|
|
|
"name": "stdout", |
|
|
|
"output_type": "stream", |
|
|
|
"text": [ |
|
|
|
"LeNet5(\n", |
|
|
|
" (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))\n", |
|
|
|
" (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))\n", |
|
|
|
" (fc1): Linear(in_features=400, out_features=120, bias=True)\n", |
|
|
|
" (fc2): Linear(in_features=120, out_features=84, bias=True)\n", |
|
|
|
" (fc3): Linear(in_features=84, out_features=10, bias=True)\n", |
|
|
|
")\n" |
|
|
|
] |
|
|
|
} |
|
|
|
], |
|
|
|
"source": [ |
|
|
|
"import torch\n", |
|
|
|
"from torch import nn\n", |
|
|
|
"\n", |
|
|
|
"lenet5 = nn.Sequential(\n", |
|
|
|
" nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.Sigmoid(),\n", |
|
|
|
" nn.AvgPool2d(kernel_size=2, stride=2),\n", |
|
|
|
" nn.Conv2d(6, 16, kernel_size=5), nn.Sigmoid(),\n", |
|
|
|
" nn.AvgPool2d(kernel_size=2, stride=2),\n", |
|
|
|
" nn.Flatten(),\n", |
|
|
|
" nn.Linear(16 * 5 * 5, 120), nn.Sigmoid(),\n", |
|
|
|
" nn.Linear(120, 84), nn.Sigmoid(),\n", |
|
|
|
" nn.Linear(84, 10) )" |
|
|
|
"net = LeNet5()\n", |
|
|
|
"print(net)" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
@@ -60,34 +94,27 @@ |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"from torchvision.datasets import mnist\n", |
|
|
|
"from torch.utils.data import DataLoader\n", |
|
|
|
"from utils import train\n", |
|
|
|
"\n", |
|
|
|
"# 使用数据增强\n", |
|
|
|
"def train_tf(x):\n", |
|
|
|
" im_aug = tfs.Compose([\n", |
|
|
|
" tfs.Resize(224),\n", |
|
|
|
" tfs.ToTensor(),\n", |
|
|
|
" tfs.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])\n", |
|
|
|
" ])\n", |
|
|
|
" x = im_aug(x)\n", |
|
|
|
" return x\n", |
|
|
|
"\n", |
|
|
|
"def test_tf(x):\n", |
|
|
|
"def data_tf(x):\n", |
|
|
|
" im_aug = tfs.Compose([\n", |
|
|
|
" tfs.Resize(224),\n", |
|
|
|
" tfs.Resize(32),\n", |
|
|
|
" tfs.ToTensor(),\n", |
|
|
|
" tfs.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])\n", |
|
|
|
" ])\n", |
|
|
|
" x = im_aug(x)\n", |
|
|
|
" return x\n", |
|
|
|
" \n", |
|
|
|
"train_set = CIFAR10('../../data', train=True, transform=train_tf)\n", |
|
|
|
"train_set = CIFAR10('../../data', train=True, transform=data_tf)\n", |
|
|
|
"train_data = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)\n", |
|
|
|
"test_set = CIFAR10('../../data', train=False, transform=test_tf)\n", |
|
|
|
"test_set = CIFAR10('../../data', train=False, transform=data_tf)\n", |
|
|
|
"test_data = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)\n", |
|
|
|
"\n", |
|
|
|
"net = lenet5\n", |
|
|
|
"optimizer = torch.optim.SGD(net.parameters(), lr=1e-1)\n", |
|
|
|
"net = LeNet5()\n", |
|
|
|
"optimizer = torch.optim.Adam(net.parameters(), lr=1e-1)\n", |
|
|
|
"criterion = nn.CrossEntropyLoss()" |
|
|
|
] |
|
|
|
}, |
|
|
|
@@ -99,11 +126,9 @@ |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"(l_train_loss, l_train_acc, l_valid_loss, l_valid_acc) = train(net, \n", |
|
|
|
" train_data, test_data, \n", |
|
|
|
" 20, \n", |
|
|
|
" optimizer, criterion,\n", |
|
|
|
" use_cuda=False)" |
|
|
|
"res = train(net, train_data, test_data, 20, \n", |
|
|
|
" optimizer, criterion,\n", |
|
|
|
" use_cuda=False)" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
@@ -117,15 +142,15 @@ |
|
|
|
"import matplotlib.pyplot as plt\n", |
|
|
|
"%matplotlib inline\n", |
|
|
|
"\n", |
|
|
|
"plt.plot(l_train_loss, label='train')\n", |
|
|
|
"plt.plot(l_valid_loss, label='valid')\n", |
|
|
|
"plt.plot(res[0], label='train')\n", |
|
|
|
"plt.plot(res[2], label='valid')\n", |
|
|
|
"plt.xlabel('epoch')\n", |
|
|
|
"plt.legend(loc='best')\n", |
|
|
|
"plt.savefig('fig-res-lenet5-train-validate-loss.pdf')\n", |
|
|
|
"plt.show()\n", |
|
|
|
"\n", |
|
|
|
"plt.plot(l_train_acc, label='train')\n", |
|
|
|
"plt.plot(l_valid_acc, label='valid')\n", |
|
|
|
"plt.plot(res[1], label='train')\n", |
|
|
|
"plt.plot(res[3], label='valid')\n", |
|
|
|
"plt.xlabel('epoch')\n", |
|
|
|
"plt.legend(loc='best')\n", |
|
|
|
"plt.savefig('fig-res-lenet5-train-validate-acc.pdf')\n", |
|
|
|
|