diff --git a/7_deep_learning/1_CNN/02-LeNet5.ipynb b/7_deep_learning/1_CNN/02-LeNet5.ipynb index a4c66a2..3512f55 100644 --- a/7_deep_learning/1_CNN/02-LeNet5.ipynb +++ b/7_deep_learning/1_CNN/02-LeNet5.ipynb @@ -11,6 +11,13 @@ "在LeNet5提出的时候,没有 GPU 帮助训练,甚至 CPU 的速度也很慢,因此,LeNet5的规模并不大。其包含七个处理层,每一层都包含可训练参数(权重),当时使用的输入数据是 $32 \\times 32$ 像素的图像。LeNet-5 这个网络虽然很小,但是它包含了深度学习的基本模块:卷积层,池化层,全连接层。它是其他深度学习模型的基础,这里对LeNet5进行深入分析和讲解,通过实例分析,加深对与卷积层和池化层的理解。" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "定义网络为:" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -26,30 +33,57 @@ "import torch\n", "from torch import nn\n", "from torch.autograd import Variable\n", + "import torch.nn.functional as F\n", "from torchvision.datasets import CIFAR10\n", - "from torchvision import transforms as tfs" + "from torchvision import transforms as tfs\n", + "\n", + "\n", + "class LeNet5(nn.Module):\n", + " def __init__(self):\n", + " super(LeNet5, self).__init__()\n", + " # 1-input channel, 6-output channels, 5x5-conv\n", + " self.conv1 = nn.Conv2d(1, 6, 5)\n", + " # 6-input channel, 16-output channels, 5x5-conv\n", + " self.conv2 = nn.Conv2d(6, 16, 5)\n", + " # 16x5x5-input, 120-output\n", + " self.fc1 = nn.Linear(16 * 5 * 5, 120) \n", + " # 120-input, 84-output\n", + " self.fc2 = nn.Linear(120, 84)\n", + " # 84-input, 10-output\n", + " self.fc3 = nn.Linear(84, 10)\n", + "\n", + " def forward(self, x):\n", + " x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))\n", + " x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))\n", + " x = torch.flatten(x, 1) # 将结果拉升成1维向量,除了批次的维度\n", + " x = F.relu(self.fc1(x))\n", + " x = F.relu(self.fc2(x))\n", + " x = self.fc3(x)\n", + " return x" ] }, { "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LeNet5(\n", + " (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))\n", + " (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))\n", + " (fc1): Linear(in_features=400, out_features=120, bias=True)\n", + " (fc2): Linear(in_features=120, out_features=84, bias=True)\n", + " (fc3): Linear(in_features=84, out_features=10, bias=True)\n", + ")\n" + ] + } + ], "source": [ - "import torch\n", - "from torch import nn\n", - "\n", - "lenet5 = nn.Sequential(\n", - " nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.Sigmoid(),\n", - " nn.AvgPool2d(kernel_size=2, stride=2),\n", - " nn.Conv2d(6, 16, kernel_size=5), nn.Sigmoid(),\n", - " nn.AvgPool2d(kernel_size=2, stride=2),\n", - " nn.Flatten(),\n", - " nn.Linear(16 * 5 * 5, 120), nn.Sigmoid(),\n", - " nn.Linear(120, 84), nn.Sigmoid(),\n", - " nn.Linear(84, 10) )" + "net = LeNet5()\n", + "print(net)" ] }, { @@ -60,34 +94,27 @@ }, "outputs": [], "source": [ + "from torchvision.datasets import mnist\n", + "from torch.utils.data import DataLoader\n", "from utils import train\n", "\n", "# 使用数据增强\n", - "def train_tf(x):\n", - " im_aug = tfs.Compose([\n", - " tfs.Resize(224),\n", - " tfs.ToTensor(),\n", - " tfs.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])\n", - " ])\n", - " x = im_aug(x)\n", - " return x\n", - "\n", - "def test_tf(x):\n", + "def data_tf(x):\n", " im_aug = tfs.Compose([\n", - " tfs.Resize(224),\n", + " tfs.Resize(32),\n", " tfs.ToTensor(),\n", " tfs.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])\n", " ])\n", " x = im_aug(x)\n", " return x\n", " \n", - "train_set = CIFAR10('../../data', train=True, transform=train_tf)\n", + "train_set = CIFAR10('../../data', train=True, transform=data_tf)\n", "train_data = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)\n", - "test_set = CIFAR10('../../data', train=False, transform=test_tf)\n", + "test_set = CIFAR10('../../data', train=False, transform=data_tf)\n", "test_data = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)\n", "\n", - "net = lenet5\n", - "optimizer = torch.optim.SGD(net.parameters(), lr=1e-1)\n", + "net = LeNet5()\n", + "optimizer = torch.optim.Adam(net.parameters(), lr=1e-1)\n", "criterion = nn.CrossEntropyLoss()" ] }, @@ -99,11 +126,9 @@ }, "outputs": [], "source": [ - "(l_train_loss, l_train_acc, l_valid_loss, l_valid_acc) = train(net, \n", - " train_data, test_data, \n", - " 20, \n", - " optimizer, criterion,\n", - " use_cuda=False)" + "res = train(net, train_data, test_data, 20, \n", + " optimizer, criterion,\n", + " use_cuda=False)" ] }, { @@ -117,15 +142,15 @@ "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "\n", - "plt.plot(l_train_loss, label='train')\n", - "plt.plot(l_valid_loss, label='valid')\n", + "plt.plot(res[0], label='train')\n", + "plt.plot(res[2], label='valid')\n", "plt.xlabel('epoch')\n", "plt.legend(loc='best')\n", "plt.savefig('fig-res-lenet5-train-validate-loss.pdf')\n", "plt.show()\n", "\n", - "plt.plot(l_train_acc, label='train')\n", - "plt.plot(l_valid_acc, label='valid')\n", + "plt.plot(res[1], label='train')\n", + "plt.plot(res[3], label='valid')\n", "plt.xlabel('epoch')\n", "plt.legend(loc='best')\n", "plt.savefig('fig-res-lenet5-train-validate-acc.pdf')\n", diff --git a/7_deep_learning/1_CNN/03-AlexNet.ipynb b/7_deep_learning/1_CNN/03-AlexNet.ipynb index aeaf5ac..3ea3fd8 100644 --- a/7_deep_learning/1_CNN/03-AlexNet.ipynb +++ b/7_deep_learning/1_CNN/03-AlexNet.ipynb @@ -73,6 +73,44 @@ " nn.init.normal_(m.weight, 0, 0.01) \n", " nn.init.constant_(m.bias, 0)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('..')\n", + "\n", + "from torchvision.datasets import CIFAR10\n", + "from torchvision import transforms as tfs\n", + "from utils import train\n", + "\n", + "\n", + "# 数据转换\n", + "def data_tf(x):\n", + " im_aug = tfs.Compose([\n", + " tfs.Resize(227),\n", + " tfs.ToTensor(),\n", + " tfs.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])\n", + " ])\n", + " x = im_aug(x)\n", + " return x\n", + " \n", + "train_set = CIFAR10('../../data', train=True, transform=data_tf)\n", + "train_data = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)\n", + "test_set = CIFAR10('../../data', train=False, transform=data_tf)\n", + "test_data = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)\n", + "\n", + "net = AlexNet(num_classes=10)\n", + "optimizer = torch.optim.Adam(net.parameters(), lr=1e-2)\n", + "criterion = nn.CrossEntropyLoss()\n", + "\n", + "res = train(net, train_data, test_data, 20, optimizer, criterion, use_cuda=False)" + ] } ], "metadata": { diff --git a/7_deep_learning/1_CNN/08-batch-normalization.ipynb b/7_deep_learning/1_CNN/08-batch-normalization.ipynb index 61a2f7f..5f11ffa 100644 --- a/7_deep_learning/1_CNN/08-batch-normalization.ipynb +++ b/7_deep_learning/1_CNN/08-batch-normalization.ipynb @@ -204,7 +204,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": { "collapsed": true }, @@ -214,20 +214,7 @@ "from torchvision.datasets import mnist # 导入 pytorch 内置的 mnist 数据\n", "from torch.utils.data import DataLoader\n", "from torch import nn\n", - "from torch.autograd import Variable" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# 使用内置函数下载 mnist 数据集\n", - "train_set = mnist.MNIST('../../data/mnist', train=True)\n", - "test_set = mnist.MNIST('../../data/mnist', train=False)\n", + "from torch.autograd import Variable\n", "\n", "def data_tf(x):\n", " x = np.array(x, dtype='float32') / 255\n", @@ -236,7 +223,7 @@ " x = torch.from_numpy(x)\n", " return x\n", "\n", - "# 重新载入数据集,申明定义的数据变换\n", + "# 下载 MNIST 数据集,载入数据集,申明定义的数据变换\n", "train_set = mnist.MNIST('../../data/mnist', train=True, \n", " transform=data_tf, download=True) \n", "test_set = mnist.MNIST('../../data/mnist', train=False, \n",