| @@ -645,7 +645,7 @@ | |||||
| "name": "python", | "name": "python", | ||||
| "nbconvert_exporter": "python", | "nbconvert_exporter": "python", | ||||
| "pygments_lexer": "ipython3", | "pygments_lexer": "ipython3", | ||||
| "version": "3.6.3" | |||||
| "version": "3.5.2" | |||||
| } | } | ||||
| }, | }, | ||||
| "nbformat": 4, | "nbformat": 4, | ||||
| @@ -0,0 +1,220 @@ | |||||
| # -*- coding: utf-8 -*- | |||||
| # --- | |||||
| # jupyter: | |||||
| # jupytext_format_version: '1.2' | |||||
| # kernelspec: | |||||
| # display_name: Python 3 | |||||
| # language: python | |||||
| # name: python3 | |||||
| # language_info: | |||||
| # codemirror_mode: | |||||
| # name: ipython | |||||
| # version: 3 | |||||
| # file_extension: .py | |||||
| # mimetype: text/x-python | |||||
| # name: python | |||||
| # nbconvert_exporter: python | |||||
| # pygments_lexer: ipython3 | |||||
| # version: 3.5.2 | |||||
| # --- | |||||
| # # 自动求导 | |||||
| # 这次课程我们会了解 PyTorch 中的自动求导机制,自动求导是 PyTorch 中非常重要的特性,能够让我们避免手动去计算非常复杂的导数,这能够极大地减少了我们构建模型的时间,这也是其前身 Torch 这个框架所不具备的特性,下面我们通过例子看看 PyTorch 自动求导的独特魅力以及探究自动求导的更多用法。 | |||||
| import torch | |||||
| from torch.autograd import Variable | |||||
| # ## 简单情况的自动求导 | |||||
| # 下面我们显示一些简单情况的自动求导,"简单"体现在计算的结果都是标量,也就是一个数,我们对这个标量进行自动求导。 | |||||
| x = Variable(torch.Tensor([2]), requires_grad=True) | |||||
| y = x + 2 | |||||
| z = y ** 2 + 3 | |||||
| print(z) | |||||
| # 通过上面的一些列操作,我们从 x 得到了最后的结果out,我们可以将其表示为数学公式 | |||||
| # | |||||
| # $$ | |||||
| # z = (x + 2)^2 + 3 | |||||
| # $$ | |||||
| # | |||||
| # 那么我们从 z 对 x 求导的结果就是 | |||||
| # | |||||
| # $$ | |||||
| # \frac{\partial z}{\partial x} = 2 (x + 2) = 2 (2 + 2) = 8 | |||||
| # $$ | |||||
| # 如果你对求导不熟悉,可以查看以下[网址进行复习](https://baike.baidu.com/item/%E5%AF%BC%E6%95%B0#1) | |||||
| # 使用自动求导 | |||||
| z.backward() | |||||
| print(x.grad) | |||||
| # 对于上面这样一个简单的例子,我们验证了自动求导,同时可以发现发现使用自动求导非常方便。如果是一个更加复杂的例子,那么手动求导就会显得非常的麻烦,所以自动求导的机制能够帮助我们省去麻烦的数学计算,下面我们可以看一个更加复杂的例子。 | |||||
| # + | |||||
| x = Variable(torch.randn(10, 20), requires_grad=True) | |||||
| y = Variable(torch.randn(10, 5), requires_grad=True) | |||||
| w = Variable(torch.randn(20, 5), requires_grad=True) | |||||
| out = torch.mean(y - torch.matmul(x, w)) # torch.matmul 是做矩阵乘法 | |||||
| out.backward() | |||||
| # - | |||||
| # 如果你对矩阵乘法不熟悉,可以查看下面的[网址进行复习](https://baike.baidu.com/item/%E7%9F%A9%E9%98%B5%E4%B9%98%E6%B3%95/5446029?fr=aladdin) | |||||
| # 得到 x 的梯度 | |||||
| print(x.grad) | |||||
| # 得到 y 的的梯度 | |||||
| print(y.grad) | |||||
| # 得到 w 的梯度 | |||||
| print(w.grad) | |||||
| # 上面数学公式就更加复杂,矩阵乘法之后对两个矩阵对应元素相乘,然后所有元素求平均,有兴趣的同学可以手动去计算一下梯度,使用 PyTorch 的自动求导,我们能够非常容易得到 x, y 和 w 的导数,因为深度学习中充满大量的矩阵运算,所以我们没有办法手动去求这些导数,有了自动求导能够非常方便地解决网络更新的问题。 | |||||
| # | |||||
| # | |||||
| # ## 复杂情况的自动求导 | |||||
| # 上面我们展示了简单情况下的自动求导,都是对标量进行自动求导,可能你会有一个疑问,如何对一个向量或者矩阵自动求导了呢?感兴趣的同学可以自己先去尝试一下,下面我们会介绍对多维数组的自动求导机制。 | |||||
| m = Variable(torch.FloatTensor([[2, 3]]), requires_grad=True) # 构建一个 1 x 2 的矩阵 | |||||
| n = Variable(torch.zeros(1, 2)) # 构建一个相同大小的 0 矩阵 | |||||
| print(m) | |||||
| print(n) | |||||
| # 通过 m 中的值计算新的 n 中的值 | |||||
| n[0, 0] = m[0, 0] ** 2 | |||||
| n[0, 1] = m[0, 1] ** 3 | |||||
| print(n) | |||||
| # 将上面的式子写成数学公式,可以得到 | |||||
| # $$ | |||||
| # n = (n_0,\ n_1) = (m_0^2,\ m_1^3) = (2^2,\ 3^3) | |||||
| # $$ | |||||
| # 下面我们直接对 n 进行反向传播,也就是求 n 对 m 的导数。 | |||||
| # | |||||
| # 这时我们需要明确这个导数的定义,即如何定义 | |||||
| # | |||||
| # $$ | |||||
| # \frac{\partial n}{\partial m} = \frac{\partial (n_0,\ n_1)}{\partial (m_0,\ m_1)} | |||||
| # $$ | |||||
| # | |||||
| # 在 PyTorch 中,如果要调用自动求导,需要往`backward()`中传入一个参数,这个参数的形状和 n 一样大,比如是 $(w_0,\ w_1)$,那么自动求导的结果就是: | |||||
| # $$ | |||||
| # \frac{\partial n}{\partial m_0} = w_0 \frac{\partial n_0}{\partial m_0} + w_1 \frac{\partial n_1}{\partial m_0} | |||||
| # $$ | |||||
| # $$ | |||||
| # \frac{\partial n}{\partial m_1} = w_0 \frac{\partial n_0}{\partial m_1} + w_1 \frac{\partial n_1}{\partial m_1} | |||||
| # $$ | |||||
| n.backward(torch.ones_like(n)) # 将 (w0, w1) 取成 (1, 1) | |||||
| print(m.grad) | |||||
| # 通过自动求导我们得到了梯度是 4 和 27,我们可以验算一下 | |||||
| # $$ | |||||
| # \frac{\partial n}{\partial m_0} = w_0 \frac{\partial n_0}{\partial m_0} + w_1 \frac{\partial n_1}{\partial m_0} = 2 m_0 + 0 = 2 \times 2 = 4 | |||||
| # $$ | |||||
| # $$ | |||||
| # \frac{\partial n}{\partial m_1} = w_0 \frac{\partial n_0}{\partial m_1} + w_1 \frac{\partial n_1}{\partial m_1} = 0 + 3 m_1^2 = 3 \times 3^2 = 27 | |||||
| # $$ | |||||
| # 通过验算我们可以得到相同的结果 | |||||
| # | |||||
| # | |||||
| # ## 多次自动求导 | |||||
| # 通过调用 backward 我们可以进行一次自动求导,如果我们再调用一次 backward,会发现程序报错,没有办法再做一次。这是因为 PyTorch 默认做完一次自动求导之后,计算图就被丢弃了,所以两次自动求导需要手动设置一个东西,我们通过下面的小例子来说明。 | |||||
| x = Variable(torch.FloatTensor([3]), requires_grad=True) | |||||
| y = x * 2 + x ** 2 + 3 | |||||
| print(y) | |||||
| y.backward(retain_graph=True) # 设置 retain_graph 为 True 来保留计算图 | |||||
| print(x.grad) | |||||
| y.backward() # 再做一次自动求导,这次不保留计算图 | |||||
| print(x.grad) | |||||
| # 可以发现 x 的梯度变成了 16,因为这里做了两次自动求导,所以讲第一次的梯度 8 和第二次的梯度 8 加起来得到了 16 的结果。 | |||||
| # | |||||
| # | |||||
| # **小练习** | |||||
| # | |||||
| # 定义 | |||||
| # | |||||
| # $$ | |||||
| # x = | |||||
| # \left[ | |||||
| # \begin{matrix} | |||||
| # x_0 \\ | |||||
| # x_1 | |||||
| # \end{matrix} | |||||
| # \right] = | |||||
| # \left[ | |||||
| # \begin{matrix} | |||||
| # 2 \\ | |||||
| # 3 | |||||
| # \end{matrix} | |||||
| # \right] | |||||
| # $$ | |||||
| # | |||||
| # $$ | |||||
| # k = (k_0,\ k_1) = (x_0^2 + 3 x_1,\ 2 x_0 + x_1^2) | |||||
| # $$ | |||||
| # | |||||
| # 我们希望求得 | |||||
| # | |||||
| # $$ | |||||
| # j = \left[ | |||||
| # \begin{matrix} | |||||
| # \frac{\partial k_0}{\partial x_0} & \frac{\partial k_0}{\partial x_1} \\ | |||||
| # \frac{\partial k_1}{\partial x_0} & \frac{\partial k_1}{\partial x_1} | |||||
| # \end{matrix} | |||||
| # \right] | |||||
| # $$ | |||||
| # | |||||
| # 参考答案: | |||||
| # | |||||
| # $$ | |||||
| # \left[ | |||||
| # \begin{matrix} | |||||
| # 4 & 3 \\ | |||||
| # 2 & 6 \\ | |||||
| # \end{matrix} | |||||
| # \right] | |||||
| # $$ | |||||
| # + | |||||
| x = Variable(torch.FloatTensor([2, 3]), requires_grad=True) | |||||
| k = Variable(torch.zeros(2)) | |||||
| k[0] = x[0] ** 2 + 3 * x[1] | |||||
| k[1] = x[1] ** 2 + 2 * x[0] | |||||
| # - | |||||
| print(k) | |||||
| # + | |||||
| j = torch.zeros(2, 2) | |||||
| k.backward(torch.FloatTensor([1, 0]), retain_graph=True) | |||||
| j[0] = x.grad.data | |||||
| x.grad.data.zero_() # 归零之前求得的梯度 | |||||
| k.backward(torch.FloatTensor([0, 1])) | |||||
| j[1] = x.grad.data | |||||
| # - | |||||
| print(j) | |||||
| # 下一次课我们会介绍两种神经网络的编程方式,动态图编程和静态图编程 | |||||
| @@ -0,0 +1,355 @@ | |||||
| # -*- coding: utf-8 -*- | |||||
| # --- | |||||
| # jupyter: | |||||
| # jupytext_format_version: '1.2' | |||||
| # kernelspec: | |||||
| # display_name: Python 3 | |||||
| # language: python | |||||
| # name: python3 | |||||
| # language_info: | |||||
| # codemirror_mode: | |||||
| # name: ipython | |||||
| # version: 3 | |||||
| # file_extension: .py | |||||
| # mimetype: text/x-python | |||||
| # name: python | |||||
| # nbconvert_exporter: python | |||||
| # pygments_lexer: ipython3 | |||||
| # version: 3.5.2 | |||||
| # --- | |||||
| # # 线性模型和梯度下降 | |||||
| # 这是神经网络的第一课,我们会学习一个非常简单的模型,线性回归,同时也会学习一个优化算法-梯度下降法,对这个模型进行优化。线性回归是监督学习里面一个非常简单的模型,同时梯度下降也是深度学习中应用最广的优化算法,我们将从这里开始我们的深度学习之旅 | |||||
| # | |||||
| # | |||||
| # ## 一元线性回归 | |||||
| # 一元线性模型非常简单,假设我们有变量 $x_i$ 和目标 $y_i$,每个 i 对应于一个数据点,希望建立一个模型 | |||||
| # | |||||
| # $$ | |||||
| # \hat{y}_i = w x_i + b | |||||
| # $$ | |||||
| # | |||||
| # $\hat{y}_i$ 是我们预测的结果,希望通过 $\hat{y}_i$ 来拟合目标 $y_i$,通俗来讲就是找到这个函数拟合 $y_i$ 使得误差最小,即最小化 | |||||
| # | |||||
| # $$ | |||||
| # \frac{1}{n} \sum_{i=1}^n(\hat{y}_i - y_i)^2 | |||||
| # $$ | |||||
| # 那么如何最小化这个误差呢? | |||||
| # | |||||
| # 这里需要用到**梯度下降**,这是我们接触到的第一个优化算法,非常简单,但是却非常强大,在深度学习中被大量使用,所以让我们从简单的例子出发了解梯度下降法的原理 | |||||
| # ## 梯度下降法 | |||||
| # 在梯度下降法中,我们首先要明确梯度的概念,随后我们再了解如何使用梯度进行下降。 | |||||
| # ### 梯度 | |||||
| # 梯度在数学上就是导数,如果是一个多元函数,那么梯度就是偏导数。比如一个函数f(x, y),那么 f 的梯度就是 | |||||
| # | |||||
| # $$ | |||||
| # (\frac{\partial f}{\partial x},\ \frac{\partial f}{\partial y}) | |||||
| # $$ | |||||
| # | |||||
| # 可以称为 grad f(x, y) 或者 $\nabla f(x, y)$。具体某一点 $(x_0,\ y_0)$ 的梯度就是 $\nabla f(x_0,\ y_0)$。 | |||||
| # | |||||
| # 下面这个图片是 $f(x) = x^2$ 这个函数在 x=1 处的梯度 | |||||
| # | |||||
| #  | |||||
| # 梯度有什么意义呢?从几何意义来讲,一个点的梯度值是这个函数变化最快的地方,具体来说,对于函数 f(x, y),在点 $(x_0, y_0)$ 处,沿着梯度 $\nabla f(x_0,\ y_0)$ 的方向,函数增加最快,也就是说沿着梯度的方向,我们能够更快地找到函数的极大值点,或者反过来沿着梯度的反方向,我们能够更快地找到函数的最小值点。 | |||||
| # ### 梯度下降法 | |||||
| # 有了对梯度的理解,我们就能了解梯度下降发的原理了。上面我们需要最小化这个误差,也就是需要找到这个误差的最小值点,那么沿着梯度的反方向我们就能够找到这个最小值点。 | |||||
| # | |||||
| # 我们可以来看一个直观的解释。比如我们在一座大山上的某处位置,由于我们不知道怎么下山,于是决定走一步算一步,也就是在每走到一个位置的时候,求解当前位置的梯度,沿着梯度的负方向,也就是当前最陡峭的位置向下走一步,然后继续求解当前位置梯度,向这一步所在位置沿着最陡峭最易下山的位置走一步。这样一步步的走下去,一直走到觉得我们已经到了山脚。当然这样走下去,有可能我们不能走到山脚,而是到了某一个局部的山峰低处。 | |||||
| # | |||||
| # 类比我们的问题,就是沿着梯度的反方向,我们不断改变 w 和 b 的值,最终找到一组最好的 w 和 b 使得误差最小。 | |||||
| # | |||||
| # 在更新的时候,我们需要决定每次更新的幅度,比如在下山的例子中,我们需要每次往下走的那一步的长度,这个长度称为学习率,用 $\eta$ 表示,这个学习率非常重要,不同的学习率都会导致不同的结果,学习率太小会导致下降非常缓慢,学习率太大又会导致跳动非常明显,可以看看下面的例子 | |||||
| # | |||||
| #  | |||||
| # | |||||
| # 可以看到上面的学习率较为合适,而下面的学习率太大,就会导致不断跳动 | |||||
| # | |||||
| # 最后我们的更新公式就是 | |||||
| # | |||||
| # $$ | |||||
| # w := w - \eta \frac{\partial f(w,\ b)}{\partial w} \\ | |||||
| # b := b - \eta \frac{\partial f(w,\ b)}{\partial b} | |||||
| # $$ | |||||
| # | |||||
| # 通过不断地迭代更新,最终我们能够找到一组最优的 w 和 b,这就是梯度下降法的原理。 | |||||
| # | |||||
| # 最后可以通过这张图形象地说明一下这个方法 | |||||
| # | |||||
| #  | |||||
| # | |||||
| # | |||||
| # 上面是原理部分,下面通过一个例子来进一步学习线性模型 | |||||
| # + | |||||
| import torch | |||||
| import numpy as np | |||||
| from torch.autograd import Variable | |||||
| torch.manual_seed(2017) | |||||
| # + | |||||
| # 读入数据 x 和 y | |||||
| x_train = np.array([[3.3], [4.4], [5.5], [6.71], [6.93], [4.168], | |||||
| [9.779], [6.182], [7.59], [2.167], [7.042], | |||||
| [10.791], [5.313], [7.997], [3.1]], dtype=np.float32) | |||||
| y_train = np.array([[1.7], [2.76], [2.09], [3.19], [1.694], [1.573], | |||||
| [3.366], [2.596], [2.53], [1.221], [2.827], | |||||
| [3.465], [1.65], [2.904], [1.3]], dtype=np.float32) | |||||
| # + | |||||
| # 画出图像 | |||||
| import matplotlib.pyplot as plt | |||||
| # %matplotlib inline | |||||
| plt.plot(x_train, y_train, 'bo') | |||||
| # + | |||||
| # 转换成 Tensor | |||||
| x_train = torch.from_numpy(x_train) | |||||
| y_train = torch.from_numpy(y_train) | |||||
| # 定义参数 w 和 b | |||||
| w = Variable(torch.randn(1), requires_grad=True) # 随机初始化 | |||||
| b = Variable(torch.zeros(1), requires_grad=True) # 使用 0 进行初始化 | |||||
| # + | |||||
| # 构建线性回归模型 | |||||
| x_train = Variable(x_train) | |||||
| y_train = Variable(y_train) | |||||
| def linear_model(x): | |||||
| return x * w + b | |||||
| # - | |||||
| y_ = linear_model(x_train) | |||||
| # 经过上面的步骤我们就定义好了模型,在进行参数更新之前,我们可以先看看模型的输出结果长什么样 | |||||
| plt.plot(x_train.data.numpy(), y_train.data.numpy(), 'bo', label='real') | |||||
| plt.plot(x_train.data.numpy(), y_.data.numpy(), 'ro', label='estimated') | |||||
| plt.legend() | |||||
| # **思考:红色的点表示预测值,似乎排列成一条直线,请思考一下这些点是否在一条直线上?** | |||||
| # 这个时候需要计算我们的误差函数,也就是 | |||||
| # | |||||
| # $$ | |||||
| # \frac{1}{n} \sum_{i=1}^n(\hat{y}_i - y_i)^2 | |||||
| # $$ | |||||
| # + | |||||
| # 计算误差 | |||||
| def get_loss(y_, y): | |||||
| return torch.mean((y_ - y) ** 2) | |||||
| loss = get_loss(y_, y_train) | |||||
| # - | |||||
| # 打印一下看看 loss 的大小 | |||||
| print(loss) | |||||
| # 定义好了误差函数,接下来我们需要计算 w 和 b 的梯度了,这时得益于 PyTorch 的自动求导,我们不需要手动去算梯度,有兴趣的同学可以手动计算一下,w 和 b 的梯度分别是 | |||||
| # | |||||
| # $$ | |||||
| # \frac{\partial}{\partial w} = \frac{2}{n} \sum_{i=1}^n x_i(w x_i + b - y_i) \\ | |||||
| # \frac{\partial}{\partial b} = \frac{2}{n} \sum_{i=1}^n (w x_i + b - y_i) | |||||
| # $$ | |||||
| # 自动求导 | |||||
| loss.backward() | |||||
| # 查看 w 和 b 的梯度 | |||||
| print(w.grad) | |||||
| print(b.grad) | |||||
| # 更新一次参数 | |||||
| w.data = w.data - 1e-2 * w.grad.data | |||||
| b.data = b.data - 1e-2 * b.grad.data | |||||
| # 更新完成参数之后,我们再一次看看模型输出的结果 | |||||
| y_ = linear_model(x_train) | |||||
| plt.plot(x_train.data.numpy(), y_train.data.numpy(), 'bo', label='real') | |||||
| plt.plot(x_train.data.numpy(), y_.data.numpy(), 'ro', label='estimated') | |||||
| plt.legend() | |||||
| # 从上面的例子可以看到,更新之后红色的线跑到了蓝色的线下面,没有特别好的拟合蓝色的真实值,所以我们需要在进行几次更新 | |||||
| for e in range(10): # 进行 10 次更新 | |||||
| y_ = linear_model(x_train) | |||||
| loss = get_loss(y_, y_train) | |||||
| w.grad.zero_() # 记得归零梯度 | |||||
| b.grad.zero_() # 记得归零梯度 | |||||
| loss.backward() | |||||
| w.data = w.data - 1e-2 * w.grad.data # 更新 w | |||||
| b.data = b.data - 1e-2 * b.grad.data # 更新 b | |||||
| print('epoch: {}, loss: {}'.format(e, loss.data[0])) | |||||
| y_ = linear_model(x_train) | |||||
| plt.plot(x_train.data.numpy(), y_train.data.numpy(), 'bo', label='real') | |||||
| plt.plot(x_train.data.numpy(), y_.data.numpy(), 'ro', label='estimated') | |||||
| plt.legend() | |||||
| # 经过 10 次更新,我们发现红色的预测结果已经比较好的拟合了蓝色的真实值。 | |||||
| # | |||||
| # 现在你已经学会了你的第一个机器学习模型了,再接再厉,完成下面的小练习。 | |||||
| # **小练习:** | |||||
| # | |||||
| # 重启 notebook 运行上面的线性回归模型,但是改变训练次数以及不同的学习率进行尝试得到不同的结果 | |||||
| # ## 多项式回归模型 | |||||
| # 下面我们更进一步,讲一讲多项式回归。什么是多项式回归呢?非常简单,根据上面的线性回归模型 | |||||
| # | |||||
| # $$ | |||||
| # \hat{y} = w x + b | |||||
| # $$ | |||||
| # | |||||
| # 这里是关于 x 的一个一次多项式,这个模型比较简单,没有办法拟合比较复杂的模型,所以我们可以使用更高次的模型,比如 | |||||
| # | |||||
| # $$ | |||||
| # \hat{y} = w_0 + w_1 x + w_2 x^2 + w_3 x^3 + \cdots | |||||
| # $$ | |||||
| # | |||||
| # 这样就能够拟合更加复杂的模型,这就是多项式模型,这里使用了 x 的更高次,同理还有多元回归模型,形式也是一样的,只是出了使用 x,还是更多的变量,比如 y、z 等等,同时他们的 loss 函数和简单的线性回归模型是一致的。 | |||||
| # | |||||
| # | |||||
| # 首先我们可以先定义一个需要拟合的目标函数,这个函数是个三次的多项式 | |||||
| # + | |||||
| # 定义一个多变量函数 | |||||
| w_target = np.array([0.5, 3, 2.4]) # 定义参数 | |||||
| b_target = np.array([0.9]) # 定义参数 | |||||
| f_des = 'y = {:.2f} + {:.2f} * x + {:.2f} * x^2 + {:.2f} * x^3'.format( | |||||
| b_target[0], w_target[0], w_target[1], w_target[2]) # 打印出函数的式子 | |||||
| print(f_des) | |||||
| # - | |||||
| # 我们可以先画出这个多项式的图像 | |||||
| # + | |||||
| # 画出这个函数的曲线 | |||||
| x_sample = np.arange(-3, 3.1, 0.1) | |||||
| y_sample = b_target[0] + w_target[0] * x_sample + w_target[1] * x_sample ** 2 + w_target[2] * x_sample ** 3 | |||||
| plt.plot(x_sample, y_sample, label='real curve') | |||||
| plt.legend() | |||||
| # - | |||||
| # 接着我们可以构建数据集,需要 x 和 y,同时是一个三次多项式,所以我们取了 $x,\ x^2, x^3$ | |||||
| # + | |||||
| # 构建数据 x 和 y | |||||
| # x 是一个如下矩阵 [x, x^2, x^3] | |||||
| # y 是函数的结果 [y] | |||||
| x_train = np.stack([x_sample ** i for i in range(1, 4)], axis=1) | |||||
| x_train = torch.from_numpy(x_train).float() # 转换成 float tensor | |||||
| y_train = torch.from_numpy(y_sample).float().unsqueeze(1) # 转化成 float tensor | |||||
| # - | |||||
| # 接着我们可以定义需要优化的参数,就是前面这个函数里面的 $w_i$ | |||||
| # + | |||||
| # 定义参数和模型 | |||||
| w = Variable(torch.randn(3, 1), requires_grad=True) | |||||
| b = Variable(torch.zeros(1), requires_grad=True) | |||||
| # 将 x 和 y 转换成 Variable | |||||
| x_train = Variable(x_train) | |||||
| y_train = Variable(y_train) | |||||
| def multi_linear(x): | |||||
| return torch.mm(x, w) + b | |||||
| # - | |||||
| # 我们可以画出没有更新之前的模型和真实的模型之间的对比 | |||||
| # + | |||||
| # 画出更新之前的模型 | |||||
| y_pred = multi_linear(x_train) | |||||
| plt.plot(x_train.data.numpy()[:, 0], y_pred.data.numpy(), label='fitting curve', color='r') | |||||
| plt.plot(x_train.data.numpy()[:, 0], y_sample, label='real curve', color='b') | |||||
| plt.legend() | |||||
| # - | |||||
| # 可以发现,这两条曲线之间存在差异,我们计算一下他们之间的误差 | |||||
| # 计算误差,这里的误差和一元的线性模型的误差是相同的,前面已经定义过了 get_loss | |||||
| loss = get_loss(y_pred, y_train) | |||||
| print(loss) | |||||
| # 自动求导 | |||||
| loss.backward() | |||||
| # 查看一下 w 和 b 的梯度 | |||||
| print(w.grad) | |||||
| print(b.grad) | |||||
| # 更新一下参数 | |||||
| w.data = w.data - 0.001 * w.grad.data | |||||
| b.data = b.data - 0.001 * b.grad.data | |||||
| # + | |||||
| # 画出更新一次之后的模型 | |||||
| y_pred = multi_linear(x_train) | |||||
| plt.plot(x_train.data.numpy()[:, 0], y_pred.data.numpy(), label='fitting curve', color='r') | |||||
| plt.plot(x_train.data.numpy()[:, 0], y_sample, label='real curve', color='b') | |||||
| plt.legend() | |||||
| # - | |||||
| # 因为只更新了一次,所以两条曲线之间的差异仍然存在,我们进行 100 次迭代 | |||||
| # 进行 100 次参数更新 | |||||
| for e in range(100): | |||||
| y_pred = multi_linear(x_train) | |||||
| loss = get_loss(y_pred, y_train) | |||||
| w.grad.data.zero_() | |||||
| b.grad.data.zero_() | |||||
| loss.backward() | |||||
| # 更新参数 | |||||
| w.data = w.data - 0.001 * w.grad.data | |||||
| b.data = b.data - 0.001 * b.grad.data | |||||
| if (e + 1) % 20 == 0: | |||||
| print('epoch {}, Loss: {:.5f}'.format(e+1, loss.data[0])) | |||||
| # 可以看到更新完成之后 loss 已经非常小了,我们画出更新之后的曲线对比 | |||||
| # + | |||||
| # 画出更新之后的结果 | |||||
| y_pred = multi_linear(x_train) | |||||
| plt.plot(x_train.data.numpy()[:, 0], y_pred.data.numpy(), label='fitting curve', color='r') | |||||
| plt.plot(x_train.data.numpy()[:, 0], y_sample, label='real curve', color='b') | |||||
| plt.legend() | |||||
| # - | |||||
| # 可以看到,经过 100 次更新之后,可以看到拟合的线和真实的线已经完全重合了 | |||||
| # **小练习:上面的例子是一个三次的多项式,尝试使用二次的多项式去拟合它,看看最后能做到多好** | |||||
| # | |||||
| # **提示:参数 `w = torch.randn(2, 1)`,同时重新构建 x 数据集** | |||||
| @@ -0,0 +1,72 @@ | |||||
| import numpy as np | |||||
| import torch | |||||
| from torch.autograd import Variable | |||||
| import matplotlib.pyplot as plt | |||||
| """ | |||||
| Using pytorch to do linear regression | |||||
| """ | |||||
| torch.manual_seed(2018) | |||||
| # model's real-parameters | |||||
| w_target = 3 | |||||
| b_target = 10 | |||||
| # generate data | |||||
| n_data = 100 | |||||
| x_train = np.random.rand(n_data, 1)*20 - 10 | |||||
| y_train = w_target*x_train + b_target + (np.random.randn(n_data, 1)*10-5.0) | |||||
| # draw the data | |||||
| plt.plot(x_train, y_train, 'bo') | |||||
| plt.show() | |||||
| # convert to tensor | |||||
| x_train = torch.from_numpy(x_train).float() | |||||
| y_train = torch.from_numpy(y_train).float() | |||||
| # define model parameters | |||||
| w = Variable(torch.randn(1).float(), requires_grad=True) | |||||
| b = Variable(torch.zeros(1).float(), requires_grad=True) | |||||
| # construct the linear model | |||||
| x_train = Variable(x_train) | |||||
| y_train = Variable(y_train) | |||||
| # define model's function | |||||
| def linear_model(x): | |||||
| return x*w + b | |||||
| # define the loss function | |||||
| def get_loss(y_pred, y): | |||||
| return torch.mean((y_pred - y)**2) | |||||
| # upgrade parameters | |||||
| eta = 1e-2 | |||||
| for i in range(100): | |||||
| y_pred = linear_model(x_train) | |||||
| loss = get_loss(y_pred, y_train) | |||||
| loss.backward() | |||||
| w.data = w.data - eta*w.grad.data | |||||
| b.data = b.data - eta*b.grad.data | |||||
| w.grad.zero_() | |||||
| b.grad.zero_() | |||||
| if i % 10 == 0: | |||||
| print("epoch: %3d, loss: %f" % (i, loss.data[0])) | |||||
| # draw the results | |||||
| plt.plot(x_train.data.numpy(), y_train.data.numpy(), 'bo', label="Real") | |||||
| plt.plot(x_train.data.numpy(), y_pred.data.numpy(), 'ro', label="Estimated") | |||||
| plt.legend() | |||||
| plt.show() | |||||
| @@ -0,0 +1,92 @@ | |||||
| import numpy as np | |||||
| import torch | |||||
| from torch.autograd import Variable | |||||
| import matplotlib.pyplot as plt | |||||
| """ | |||||
| Using pytorch to do linear regression | |||||
| """ | |||||
| torch.manual_seed(2018) | |||||
| # generate data | |||||
| x_train = np.array([[3.3], [4.4], [5.5], [6.71], [6.93], [4.168], | |||||
| [9.779], [6.182], [7.59], [2.167], [7.042], | |||||
| [10.791], [5.313], [7.997], [3.1]], dtype=np.float32) | |||||
| y_train = np.array([[1.7], [2.76], [2.09], [3.19], [1.694], [1.573], | |||||
| [3.366], [2.596], [2.53], [1.221], [2.827], | |||||
| [3.465], [1.65], [2.904], [1.3]], dtype=np.float32) | |||||
| # draw the data | |||||
| plt.plot(x_train, y_train, 'bo') | |||||
| plt.show() | |||||
| # convert to tensor | |||||
| x_train = torch.from_numpy(x_train) | |||||
| y_train = torch.from_numpy(y_train) | |||||
| # define model parameters | |||||
| w = Variable(torch.randn(1), requires_grad=True) | |||||
| b = Variable(torch.zeros(1), requires_grad=True) | |||||
| # construct the linear model | |||||
| x_train = Variable(x_train) | |||||
| y_train = Variable(y_train) | |||||
| def linear_model(x): | |||||
| return x*w + b | |||||
| # first predictive | |||||
| y_pred = linear_model(x_train) | |||||
| # draw the real & predictived data | |||||
| plt.plot(x_train.data.numpy(), y_train.data.numpy(), 'bo', label="Real") | |||||
| plt.plot(x_train.data.numpy(), y_pred.data.numpy(), 'ro', label="Estimated") | |||||
| plt.legend() | |||||
| plt.show() | |||||
| # define the loss function | |||||
| def get_loss(y_pred, y): | |||||
| return torch.mean((y_pred - y)**2) | |||||
| loss = get_loss(y_pred, y_train) | |||||
| print("loss = %f" % float(loss)) | |||||
| # auto-grad | |||||
| loss.backward() | |||||
| print("w.grad = %f" % float(w.grad)) | |||||
| print("b.grad = %f" % float(b.grad)) | |||||
| # upgrade parameters | |||||
| eta = 1e-2 | |||||
| w.data = w.data - eta*w.grad.data | |||||
| b.data = b.data - eta*w.grad.data | |||||
| y_pred = linear_model(x_train) | |||||
| plt.plot(x_train.data.numpy(), y_train.data.numpy(), 'bo', label="Real") | |||||
| plt.plot(x_train.data.numpy(), y_pred.data.numpy(), 'ro', label="Estimated") | |||||
| plt.legend() | |||||
| plt.show() | |||||
| for i in range(10): | |||||
| y_pred = linear_model(x_train) | |||||
| loss = get_loss(y_pred, y_train) | |||||
| w.grad.zero_() | |||||
| b.grad.zero_() | |||||
| loss.backward() | |||||
| w.data = w.data - eta*w.grad.data | |||||
| b.data = b.data - eta*b.grad.data | |||||
| print("epoch: %3d, loss: %f" % (i, loss.data[0])) | |||||
| @@ -0,0 +1,77 @@ | |||||
| import numpy as np | |||||
| import torch | |||||
| from torch.autograd import Variable | |||||
| import matplotlib.pyplot as plt | |||||
| """ | |||||
| Polynomial fitting by pytorch | |||||
| """ | |||||
| # define the real model's parameters | |||||
| w_target = np.array([0.5, 3, 2.4]) | |||||
| b_target = np.array([0.9]) | |||||
| f_des = "y = %f + %f * x + %f * x^2 + %f * x^3" % ( | |||||
| b_target[0], | |||||
| w_target[0], w_target[1], w_target[2]) | |||||
| print(f_des) | |||||
| # draw the data | |||||
| x_sample = np.arange(-3, 3.1, 0.1) | |||||
| y_sample = b_target[0] + w_target[0]*x_sample + w_target[1]*x_sample**2 + w_target[2]*x_sample**3 | |||||
| plt.plot(x_sample, y_sample, label="Real") | |||||
| plt.legend() | |||||
| plt.show() | |||||
| # construct variabels | |||||
| x_train = np.stack([x_sample**i for i in range(1, 4)], axis=1) | |||||
| x_train = torch.from_numpy(x_train).float() | |||||
| y_train = torch.from_numpy(y_sample).float().unsqueeze(1) | |||||
| # define model parameters | |||||
| w = Variable(torch.randn(3, 1).float(), requires_grad=True) | |||||
| b = Variable(torch.zeros(1).float(), requires_grad=True) | |||||
| x_train = Variable(x_train) | |||||
| y_train = Variable(y_train) | |||||
| # define the model function & loss function | |||||
| def polynomial(x): | |||||
| return torch.mm(x, w) + b | |||||
| def get_loss(y_pred, y): | |||||
| return torch.mean((y_pred-y)**2) | |||||
| # begin iterative optimization | |||||
| eta = 0.001 | |||||
| for i in range(100): | |||||
| y_pred = polynomial(x_train) | |||||
| loss = get_loss(y_pred, y_train) | |||||
| loss.backward() | |||||
| w.data = w.data - eta*w.grad.data | |||||
| b.data = b.data - eta*b.grad.data | |||||
| w.grad.data.zero_() | |||||
| b.grad.data.zero_() | |||||
| if i % 10 == 0: | |||||
| print("epoch: %4d, loss: %f" % (i, loss.data[0])) | |||||
| # draw the results | |||||
| y_pred = polynomial(x_train) | |||||
| plt.plot(x_train.data.numpy()[:, 0], y_sample, label="Real", color='b') | |||||
| plt.plot(x_train.data.numpy()[:, 0], y_pred.data.numpy(), label="Fitting", color='r') | |||||
| plt.legend() | |||||
| plt.show() | |||||
| @@ -0,0 +1,105 @@ | |||||
| import numpy as np | |||||
| import torch | |||||
| from torch.autograd import Variable | |||||
| import matplotlib.pyplot as plt | |||||
| """ | |||||
| Polynomial fitting by pytorch | |||||
| """ | |||||
| # define the model's parameters | |||||
| w_target = np.array([0.5, 3, 2.4]) | |||||
| b_target = np.array([0.9]) | |||||
| f_des = "y = %f + %f * x + %f * x^2 + %f * x^3" % ( | |||||
| b_target[0], | |||||
| w_target[0], w_target[1], w_target[2]) | |||||
| print(f_des) | |||||
| # draw the data | |||||
| x_sample = np.arange(-3, 3.1, 0.1) | |||||
| y_sample = b_target[0] + w_target[0]*x_sample + w_target[1]*x_sample**2 + w_target[2]*x_sample**3 | |||||
| plt.plot(x_sample, y_sample, label="Real") | |||||
| plt.legend() | |||||
| plt.show() | |||||
| # construct variabels | |||||
| x_train = np.stack([x_sample**i for i in range(1, 4)], axis=1) | |||||
| x_train = torch.from_numpy(x_train).float() | |||||
| y_train = torch.from_numpy(y_sample).float().unsqueeze(1) | |||||
| # define model parameters | |||||
| w = Variable(torch.randn(3, 1).float(), requires_grad=True) | |||||
| b = Variable(torch.zeros(1).float(), requires_grad=True) | |||||
| x_train = Variable(x_train) | |||||
| y_train = Variable(y_train) | |||||
| print(w.shape) | |||||
| print(b.shape) | |||||
| print(x_train.shape) | |||||
| print(y_train.shape) | |||||
| def polynomial(x): | |||||
| return torch.mm(x, w) + b | |||||
| def get_loss(y_pred, y): | |||||
| return torch.mean((y_pred-y)**2) | |||||
| # draw initial graph | |||||
| y_pred = polynomial(x_train) | |||||
| plt.plot(x_train.data.numpy()[:, 0], y_sample, label="Real", color='b') | |||||
| plt.plot(x_train.data.numpy()[:, 0], y_pred.data.numpy(), label="Fitting", color='r') | |||||
| plt.legend() | |||||
| plt.show() | |||||
| # compute loss | |||||
| loss = get_loss(y_pred, y_train) | |||||
| print("Loss = %f" % loss) | |||||
| loss.backward() | |||||
| print(w.grad) | |||||
| print(b.grad) | |||||
| eta = 0.001 | |||||
| w.data = w.data - eta*w.grad.data | |||||
| b.data = b.data - eta*b.grad.data | |||||
| # second draw | |||||
| y_pred = polynomial(x_train) | |||||
| plt.plot(x_train.data.numpy()[:, 0], y_sample, label="Real", color='b') | |||||
| plt.plot(x_train.data.numpy()[:, 0], y_pred.data.numpy(), label="Fitting", color='r') | |||||
| plt.legend() | |||||
| plt.show() | |||||
| for i in range(100): | |||||
| y_pred = polynomial(x_train) | |||||
| loss = get_loss(y_pred, y_train) | |||||
| w.grad.data.zero_() | |||||
| b.grad.data.zero_() | |||||
| loss.backward() | |||||
| w.data = w.data - eta*w.grad.data | |||||
| b.data = b.data - eta*b.grad.data | |||||
| print("epoch: %4d, loss: %f" % (i, loss.data[0])) | |||||
| # second draw | |||||
| y_pred = polynomial(x_train) | |||||
| plt.plot(x_train.data.numpy()[:, 0], y_sample, label="Real", color='b') | |||||
| plt.plot(x_train.data.numpy()[:, 0], y_pred.data.numpy(), label="Fitting", color='r') | |||||
| plt.legend() | |||||
| plt.show() | |||||
| @@ -34,7 +34,7 @@ trainloader = t.utils.data.DataLoader( | |||||
| # 测试集 | # 测试集 | ||||
| testset = tv.datasets.CIFAR10( | testset = tv.datasets.CIFAR10( | ||||
| dataset_path, train=False, download=True, transform=transform) | |||||
| root=dataset_path, train=False, download=True, transform=transform) | |||||
| testloader = t.utils.data.DataLoader( | testloader = t.utils.data.DataLoader( | ||||
| testset, | testset, | ||||
| @@ -69,7 +69,7 @@ class Net(nn.Module): | |||||
| net = Net() | net = Net() | ||||
| print(net) | print(net) | ||||
| criterion = nn.CrossEntropyLoss() # 交叉熵损失函数 | |||||
| criterion = nn.CrossEntropyLoss() | |||||
| optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) | optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) | ||||
| t.set_num_threads(8) | t.set_num_threads(8) | ||||
| @@ -1,16 +1,18 @@ | |||||
| import torch | import torch | ||||
| from torch import nn, optim | from torch import nn, optim | ||||
| from torch.autograd import Variable | from torch.autograd import Variable | ||||
| from torch.utils.data import DataLoader | from torch.utils.data import DataLoader | ||||
| import torch.nn.functional as F | |||||
| from torchvision import transforms | from torchvision import transforms | ||||
| from torchvision import datasets | from torchvision import datasets | ||||
| # set parameters | |||||
| batch_size = 32 | batch_size = 32 | ||||
| learning_rate = 1e-2 | learning_rate = 1e-2 | ||||
| num_epoches = 50 | num_epoches = 50 | ||||
| # 下载训练集 MNIST 手写数字训练集 | |||||
| # download & load MNIST dataset | |||||
| dataset_path = "../data/mnist" | dataset_path = "../data/mnist" | ||||
| train_dataset = datasets.MNIST( | train_dataset = datasets.MNIST( | ||||
| @@ -23,70 +25,62 @@ train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) | |||||
| test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) | test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) | ||||
| # 定义简单的前馈神经网络 | |||||
| class Neuralnetwork(nn.Module): | |||||
| # Define the network | |||||
| class NeuralNetwork(nn.Module): | |||||
| def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim): | def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim): | ||||
| super(Neuralnetwork, self).__init__() | |||||
| super(NeuralNetwork, self).__init__() | |||||
| self.layer1 = nn.Linear(in_dim, n_hidden_1) | self.layer1 = nn.Linear(in_dim, n_hidden_1) | ||||
| self.layer2 = nn.Linear(n_hidden_1, n_hidden_2) | self.layer2 = nn.Linear(n_hidden_1, n_hidden_2) | ||||
| self.layer3 = nn.Linear(n_hidden_2, out_dim) | self.layer3 = nn.Linear(n_hidden_2, out_dim) | ||||
| def forward(self, x): | def forward(self, x): | ||||
| x = self.layer1(x) | |||||
| x = self.layer2(x) | |||||
| x = F.relu(self.layer1(x)) | |||||
| x = F.relu(self.layer2(x)) | |||||
| x = self.layer3(x) | x = self.layer3(x) | ||||
| return x | return x | ||||
| model = Neuralnetwork(28 * 28, 300, 100, 10) | |||||
| if torch.cuda.is_available(): | |||||
| model = model.cuda() | |||||
| # create network & define loss function | |||||
| model = NeuralNetwork(28 * 28, 300, 100, 10) | |||||
| criterion = nn.CrossEntropyLoss() | criterion = nn.CrossEntropyLoss() | ||||
| optimizer = optim.SGD(model.parameters(), lr=learning_rate) | optimizer = optim.SGD(model.parameters(), lr=learning_rate) | ||||
| # train | |||||
| for epoch in range(num_epoches): | for epoch in range(num_epoches): | ||||
| print('epoch {}'.format(epoch + 1)) | |||||
| print('*' * 10) | |||||
| print("epoch %6d" % int(epoch+1)) | |||||
| print('-' * 40) | |||||
| running_loss = 0.0 | running_loss = 0.0 | ||||
| running_acc = 0.0 | running_acc = 0.0 | ||||
| for i, data in enumerate(train_loader, 1): | for i, data in enumerate(train_loader, 1): | ||||
| # FIXME: label need to change one-hot coding | |||||
| img, label = data | img, label = data | ||||
| img = img.view(img.size(0), -1) | |||||
| target = torch.zeros(label.size(0), 10) | |||||
| target = target.scatter_(1, label.data, 1) | |||||
| if torch.cuda.is_available(): | |||||
| img = Variable(img).cuda() | |||||
| label = Variable(label).cuda() | |||||
| else: | |||||
| img = Variable(img) | |||||
| label = Variable(label) | |||||
| img = Variable(img.view(img.size(0), -1)) | |||||
| label = Variable(label) | |||||
| # 向前传播 | # 向前传播 | ||||
| optimizer.zero_grad() | |||||
| out = model(img) | out = model(img) | ||||
| loss = criterion(out, label) | loss = criterion(out, label) | ||||
| running_loss += loss.data[0] * label.size(0) | running_loss += loss.data[0] * label.size(0) | ||||
| _, pred = torch.max(out, 1) | |||||
| num_correct = (pred == label).sum() | |||||
| running_acc += num_correct.data[0] | |||||
| pred = out.data.max(1, keepdim=True)[1] | |||||
| running_acc += float(pred.eq(label.data.view_as(pred)).cpu().sum()) | |||||
| # 向后传播 | # 向后传播 | ||||
| optimizer.zero_grad() | |||||
| loss.backward() | loss.backward() | ||||
| optimizer.step() | optimizer.step() | ||||
| if i % 300 == 0: | if i % 300 == 0: | ||||
| print('[{}/{}] Loss: {:.6f}, Acc: {:.6f}'.format( | |||||
| epoch + 1, num_epoches, running_loss / (batch_size * i), | |||||
| running_acc / (batch_size * i))) | |||||
| print('Finish {} epoch, Loss: {:.6f}, Acc: {:.6f}'.format( | |||||
| epoch + 1, running_loss / (len(train_dataset)), running_acc / (len( | |||||
| train_dataset)))) | |||||
| print('[{}/{}] Loss: {:.6f}, Acc: {:.2f}%'.format( | |||||
| epoch + 1, num_epoches, | |||||
| 1.0*running_loss / (batch_size * i), | |||||
| 100.0*running_acc / (batch_size * i))) | |||||
| # do test | |||||
| model.eval() | model.eval() | ||||
| eval_loss = 0. | eval_loss = 0. | ||||
| eval_acc = 0. | eval_acc = 0. | ||||
| @@ -94,22 +88,23 @@ for epoch in range(num_epoches): | |||||
| for data in test_loader: | for data in test_loader: | ||||
| img, label = data | img, label = data | ||||
| img = img.view(img.size(0), -1) | img = img.view(img.size(0), -1) | ||||
| if torch.cuda.is_available(): | |||||
| img = Variable(img, volatile=True).cuda() | |||||
| label = Variable(label, volatile=True).cuda() | |||||
| else: | |||||
| img = Variable(img, volatile=True) | |||||
| label = Variable(label, volatile=True) | |||||
| img = Variable(img) | |||||
| label = Variable(label) | |||||
| out = model(img) | out = model(img) | ||||
| loss = criterion(out, label) | loss = criterion(out, label) | ||||
| eval_loss += loss.data[0] * label.size(0) | eval_loss += loss.data[0] * label.size(0) | ||||
| _, pred = torch.max(out, 1) | |||||
| num_correct = (pred == label).sum() | |||||
| eval_acc += num_correct.data[0] | |||||
| pred = out.data.max(1, keepdim=True)[1] | |||||
| eval_acc += float(pred.eq(label.data.view_as(pred)).cpu().sum()) | |||||
| print('Test Loss: {:.6f}, Acc: {:.6f}'.format(eval_loss / (len( | |||||
| test_dataset)), eval_acc / (len(test_dataset)))) | |||||
| print('\nTest Loss: {:.6f}, Acc: {:.2f}%'.format( | |||||
| 1.0*eval_loss / (len(test_dataset)), | |||||
| 100.0*eval_acc / (len(test_dataset)))) | |||||
| print() | print() | ||||
| # 保存模型 | |||||
| # save model | |||||
| torch.save(model.state_dict(), './model_Neural_Network.pth') | torch.save(model.state_dict(), './model_Neural_Network.pth') | ||||
| @@ -1,5 +1,3 @@ | |||||
| from __future__ import print_function | |||||
| import torch | import torch | ||||
| import torch.nn as nn | import torch.nn as nn | ||||
| import torch.nn.functional as F | import torch.nn.functional as F | ||||
| @@ -58,7 +56,8 @@ optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) | |||||
| def train(epoch): | def train(epoch): | ||||
| #model.train() | |||||
| model.train() | |||||
| for batch_idx, (data, target) in enumerate(train_loader): | for batch_idx, (data, target) in enumerate(train_loader): | ||||
| data, target = Variable(data), Variable(target) | data, target = Variable(data), Variable(target) | ||||
| optimizer.zero_grad() | optimizer.zero_grad() | ||||
| @@ -66,30 +65,33 @@ def train(epoch): | |||||
| loss = criterion(output, target) | loss = criterion(output, target) | ||||
| loss.backward() | loss.backward() | ||||
| optimizer.step() | optimizer.step() | ||||
| if batch_idx % 10 == 0: | |||||
| print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( | |||||
| if batch_idx % 100 == 0: | |||||
| print("Train epoch: %6d [%6d/%6d (%.0f %%)] \t Loss: %.6f" % ( | |||||
| epoch, batch_idx * len(data), len(train_loader.dataset), | epoch, batch_idx * len(data), len(train_loader.dataset), | ||||
| 100. * batch_idx / len(train_loader), loss.data[0])) | |||||
| 100. * batch_idx / len(train_loader), loss.data[0]) ) | |||||
| def test(): | def test(): | ||||
| model.eval() | model.eval() | ||||
| test_loss = 0 | |||||
| correct = 0 | |||||
| test_loss = 0.0 | |||||
| correct = 0.0 | |||||
| for data, target in test_loader: | for data, target in test_loader: | ||||
| data, target = Variable(data, volatile=True), Variable(target) | |||||
| data, target = Variable(data), Variable(target) | |||||
| output = model(data) | output = model(data) | ||||
| # sum up batch loss | # sum up batch loss | ||||
| test_loss += criterion(output, target).data[0] | test_loss += criterion(output, target).data[0] | ||||
| # get the index of the max | # get the index of the max | ||||
| pred = output.data.max(1, keepdim=True)[1] | pred = output.data.max(1, keepdim=True)[1] | ||||
| correct += pred.eq(target.data.view_as(pred)).cpu().sum() | |||||
| correct += float(pred.eq(target.data.view_as(pred)).cpu().sum()) | |||||
| test_loss /= len(test_loader.dataset) | test_loss /= len(test_loader.dataset) | ||||
| print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( | |||||
| test_loss, correct, len(test_loader.dataset), | |||||
| 100. * correct / len(test_loader.dataset))) | |||||
| print("\nTest set: Average loss: %.4f, Accuracy: %6d/%6d (%4.2f %%)\n" % | |||||
| (test_loss, | |||||
| correct, len(test_loader.dataset), | |||||
| 100.0*correct / len(test_loader.dataset)) ) | |||||
| for epoch in range(1, 10): | for epoch in range(1, 10): | ||||
| train(epoch) | train(epoch) | ||||
| @@ -0,0 +1,6 @@ | |||||
| import torch | |||||
| a = torch.tensor([1, 2, 3, 4, 3.5]) | |||||
| f = 1.0 * a.sum() / 10.0 | |||||
| print("f = %f" % f) | |||||