| @@ -113,6 +113,78 @@ plt.legend() | |||||
| plt.show() | plt.show() | ||||
| # - | # - | ||||
| # ## How to use iterative method to estimate parameters? | |||||
| # | |||||
| # + | |||||
| n_epoch = 3000 # epoch size | |||||
| a, b = 1, 1 # initial parameters | |||||
| epsilon = 0.001 # learning rate | |||||
| for i in range(n_epoch): | |||||
| for j in range(N): | |||||
| a = a + epsilon*2*(Y[j] - a*X[j] - b)*X[j] | |||||
| b = b + epsilon*2*(Y[j] - a*X[j] - b) | |||||
| L = 0 | |||||
| for j in range(N): | |||||
| L = L + (Y[j]-a*X[j]-b)**2 | |||||
| print("epoch %4d: loss = %f, a = %f, b = %f" % (i, L, a, b)) | |||||
| x_min = np.min(X) | |||||
| x_max = np.max(X) | |||||
| y_min = a * x_min + b | |||||
| y_max = a * x_max + b | |||||
| plt.scatter(X, Y, label='original data') | |||||
| plt.plot([x_min, x_max], [y_min, y_max], 'r', label='model') | |||||
| plt.legend() | |||||
| plt.show() | |||||
| # - | |||||
| # ## How to show the iterative process | |||||
| # + | |||||
| # %matplotlib nbagg | |||||
| import matplotlib.pyplot as plt | |||||
| import matplotlib.animation as animation | |||||
| n_epoch = 3000 # epoch size | |||||
| a, b = 1, 1 # initial parameters | |||||
| epsilon = 0.001 # learning rate | |||||
| fig = plt.figure() | |||||
| imgs = [] | |||||
| for i in range(n_epoch): | |||||
| for j in range(N): | |||||
| a = a + epsilon*2*(Y[j] - a*X[j] - b)*X[j] | |||||
| b = b + epsilon*2*(Y[j] - a*X[j] - b) | |||||
| L = 0 | |||||
| for j in range(N): | |||||
| L = L + (Y[j]-a*X[j]-b)**2 | |||||
| #print("epoch %4d: loss = %f, a = %f, b = %f" % (i, L, a, b)) | |||||
| if i % 50 == 0: | |||||
| x_min = np.min(X) | |||||
| x_max = np.max(X) | |||||
| y_min = a * x_min + b | |||||
| y_max = a * x_max + b | |||||
| img = plt.scatter(X, Y, label='original data') | |||||
| img = plt.plot([x_min, x_max], [y_min, y_max], 'r', label='model') | |||||
| imgs.append(img) | |||||
| ani = animation.ArtistAnimation(fig, imgs) | |||||
| plt.show() | |||||
| # - | |||||
| # ## How to use batch update method? | |||||
| # | |||||
| # If some data is outliear, then the | |||||
| # ## How to fit polynomial function? | # ## How to fit polynomial function? | ||||
| # | # | ||||
| # If we observe a missle at some time, then how to estimate the trajectory? Acoording the physical theory, the trajectory can be formulated as: | # If we observe a missle at some time, then how to estimate the trajectory? Acoording the physical theory, the trajectory can be formulated as: | ||||
| @@ -217,8 +289,9 @@ Y_est = regr.predict(X_test) | |||||
| print("Y_est = ", Y_est) | print("Y_est = ", Y_est) | ||||
| print("Y_test = ", Y_test) | print("Y_test = ", Y_test) | ||||
| err = (Y_est - Y_test)**2 | err = (Y_est - Y_test)**2 | ||||
| err2 = sklearn.metrics.mean_squared_error(Y_test, Y_est) | |||||
| score = regr.score(X_test, Y_test) | score = regr.score(X_test, Y_test) | ||||
| print("err = %f, score = %f" % (np.sqrt(np.sum(err))/N_test, score)) | |||||
| print("err = %f (%f), score = %f" % (np.sqrt(np.sum(err))/N_test, np.sqrt(err2), score)) | |||||
| # plot data | # plot data | ||||
| @@ -5,12 +5,28 @@ | |||||
| "metadata": {}, | "metadata": {}, | ||||
| "source": [ | "source": [ | ||||
| "# Logistic Regression\n", | "# Logistic Regression\n", | ||||
| "\n", | |||||
| "逻辑回归(Logistic Regression, LR)模型其实仅在线性回归的基础上,套用了一个逻辑函数,但也就由于这个逻辑函数,使得逻辑回归模型成为了机器学习领域一颗耀眼的明星,更是计算广告学的核心。本节主要详述逻辑回归模型的基础。\n", | |||||
| "\n", | |||||
| "\n", | |||||
| "## 1 逻辑回归模型\n", | |||||
| "回归是一种比较容易理解的模型,就相当于$y=f(x)$,表明自变量$x$与因变量$y$的关系。最常见问题有如医生治病时的望、闻、问、切,之后判定病人是否生病或生了什么病,其中的望闻问切就是获取自变量$x$,即特征数据,判断是否生病就相当于获取因变量$y$,即预测分类。\n", | |||||
| "\n", | |||||
| "最简单的回归是线性回归,在此借用Andrew NG的讲义,有如图所示,$X$为数据点——肿瘤的大小,$Y$为观测值——是否是恶性肿瘤。通过构建线性回归模型,如$h_\\theta(x)$所示,构建线性回归模型后,即可以根据肿瘤大小,预测是否为恶性肿瘤$h_\\theta(x)) \\ge 0.5$为恶性,$h_\\theta(x) \\lt 0.5$为良性。\n", | |||||
| "\n", | |||||
| "\n", | |||||
| "\n", | |||||
| "然而线性回归的鲁棒性很差,例如在上图的数据集上建立回归,因最右边噪点的存在,使回归模型在训练集上表现都很差。这主要是由于线性回归在整个实数域内敏感度一致,而分类范围,需要在$[0,1]$。\n", | |||||
| "\n", | |||||
| "逻辑回归就是一种减小预测范围,将预测值限定为$[0,1]$间的一种回归模型,其回归方程与回归曲线如图2所示。逻辑曲线在$z=0$时,十分敏感,在$z>>0$或$z<<0$处,都不敏感,将预测值限定为$(0,1)$。\n", | |||||
| "\n", | |||||
| "\n", | |||||
| "\n" | "\n" | ||||
| ] | ] | ||||
| }, | }, | ||||
| { | { | ||||
| "cell_type": "code", | "cell_type": "code", | ||||
| "execution_count": 23, | |||||
| "execution_count": 2, | |||||
| "metadata": {}, | "metadata": {}, | ||||
| "outputs": [], | "outputs": [], | ||||
| "source": [ | "source": [ | ||||
| @@ -171,6 +187,16 @@ | |||||
| "logistic.train(200)\n", | "logistic.train(200)\n", | ||||
| "plot_decision_boundary(lambda x: logistic.predict(x), data, label)" | "plot_decision_boundary(lambda x: logistic.predict(x), data, label)" | ||||
| ] | ] | ||||
| }, | |||||
| { | |||||
| "cell_type": "markdown", | |||||
| "metadata": {}, | |||||
| "source": [ | |||||
| "## References\n", | |||||
| "\n", | |||||
| "* [逻辑回归模型(Logistic Regression, LR)基础](https://www.cnblogs.com/sparkwen/p/3441197.html)\n", | |||||
| "* [逻辑回归(Logistic Regression)](http://www.cnblogs.com/BYRans/p/4713624.html)" | |||||
| ] | |||||
| } | } | ||||
| ], | ], | ||||
| "metadata": { | "metadata": { | ||||
| @@ -0,0 +1,132 @@ | |||||
| # -*- coding: utf-8 -*- | |||||
| # --- | |||||
| # jupyter: | |||||
| # jupytext_format_version: '1.2' | |||||
| # kernelspec: | |||||
| # display_name: Python 3 | |||||
| # language: python | |||||
| # name: python3 | |||||
| # language_info: | |||||
| # codemirror_mode: | |||||
| # name: ipython | |||||
| # version: 3 | |||||
| # file_extension: .py | |||||
| # mimetype: text/x-python | |||||
| # name: python | |||||
| # nbconvert_exporter: python | |||||
| # pygments_lexer: ipython3 | |||||
| # version: 3.5.2 | |||||
| # --- | |||||
| # # Logistic Regression | |||||
| # | |||||
| # 逻辑回归(Logistic Regression, LR)模型其实仅在线性回归的基础上,套用了一个逻辑函数,但也就由于这个逻辑函数,使得逻辑回归模型成为了机器学习领域一颗耀眼的明星,更是计算广告学的核心。本节主要详述逻辑回归模型的基础。 | |||||
| # | |||||
| # | |||||
| # ## 1 逻辑回归模型 | |||||
| # 回归是一种比较容易理解的模型,就相当于$y=f(x)$,表明自变量$x$与因变量$y$的关系。最常见问题有如医生治病时的望、闻、问、切,之后判定病人是否生病或生了什么病,其中的望闻问切就是获取自变量$x$,即特征数据,判断是否生病就相当于获取因变量$y$,即预测分类。 | |||||
| # | |||||
| # 最简单的回归是线性回归,在此借用Andrew NG的讲义,有如图所示,$X$为数据点——肿瘤的大小,$Y$为观测值——是否是恶性肿瘤。通过构建线性回归模型,如$h_\theta(x)$所示,构建线性回归模型后,即可以根据肿瘤大小,预测是否为恶性肿瘤$h_\theta(x)) \ge 0.5$为恶性,$h_\theta(x) \lt 0.5$为良性。 | |||||
| # | |||||
| #  | |||||
| # | |||||
| # 然而线性回归的鲁棒性很差,例如在上图的数据集上建立回归,因最右边噪点的存在,使回归模型在训练集上表现都很差。这主要是由于线性回归在整个实数域内敏感度一致,而分类范围,需要在$[0,1]$。 | |||||
| # | |||||
| # 逻辑回归就是一种减小预测范围,将预测值限定为$[0,1]$间的一种回归模型,其回归方程与回归曲线如图2所示。逻辑曲线在$z=0$时,十分敏感,在$z>>0$或$z<<0$处,都不敏感,将预测值限定为$(0,1)$。 | |||||
| # | |||||
| #  | |||||
| # | |||||
| # | |||||
| # + | |||||
| # %matplotlib inline | |||||
| from __future__ import division | |||||
| import numpy as np | |||||
| import sklearn.datasets | |||||
| import matplotlib.pyplot as plt | |||||
| np.random.seed(0) | |||||
| # + | |||||
| # load sample data | |||||
| data, label = sklearn.datasets.make_moons(200, noise=0.30) | |||||
| print("data = ", data[:10, :]) | |||||
| print("label = ", label[:10]) | |||||
| plt.scatter(data[:,0], data[:,1], c=label) | |||||
| plt.title("Original Data") | |||||
| # + | |||||
| def plot_decision_boundary(predict_func, data, label): | |||||
| """画出结果图 | |||||
| Args: | |||||
| pred_func (callable): 预测函数 | |||||
| data (numpy.ndarray): 训练数据集合 | |||||
| label (numpy.ndarray): 训练数据标签 | |||||
| """ | |||||
| x_min, x_max = data[:, 0].min() - .5, data[:, 0].max() + .5 | |||||
| y_min, y_max = data[:, 1].min() - .5, data[:, 1].max() + .5 | |||||
| h = 0.01 | |||||
| xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) | |||||
| Z = predict_func(np.c_[xx.ravel(), yy.ravel()]) | |||||
| Z = Z.reshape(xx.shape) | |||||
| plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral) | |||||
| plt.scatter(data[:, 0], data[:, 1], c=label, cmap=plt.cm.Spectral) | |||||
| plt.show() | |||||
| # + | |||||
| def sigmoid(x): | |||||
| return 1.0 / (1 + np.exp(-x)) | |||||
| class Logistic(object): | |||||
| """logistic回归模型""" | |||||
| def __init__(self, data, label): | |||||
| self.data = data | |||||
| self.label = label | |||||
| self.data_num, n = np.shape(data) | |||||
| self.weights = np.ones(n) | |||||
| self.b = 1 | |||||
| def train(self, num_iteration=150): | |||||
| """随机梯度上升算法 | |||||
| Args: | |||||
| data (numpy.ndarray): 训练数据集 | |||||
| labels (numpy.ndarray): 训练标签 | |||||
| num_iteration (int): 迭代次数 | |||||
| """ | |||||
| for j in range(num_iteration): | |||||
| data_index = list(range(self.data_num)) | |||||
| for i in range(self.data_num): | |||||
| # 学习速率 | |||||
| alpha = 0.01 | |||||
| rand_index = int(np.random.uniform(0, len(data_index))) | |||||
| error = self.label[rand_index] - sigmoid(sum(self.data[rand_index] * self.weights + self.b)) | |||||
| self.weights += alpha * error * self.data[rand_index] | |||||
| self.b += alpha * error | |||||
| del(data_index[rand_index]) | |||||
| def predict(self, predict_data): | |||||
| """预测函数""" | |||||
| result = list(map(lambda x: 1 if sum(self.weights * x + self.b) > 0 else 0, | |||||
| predict_data)) | |||||
| return np.array(result) | |||||
| # - | |||||
| logistic = Logistic(data, label) | |||||
| logistic.train(200) | |||||
| plot_decision_boundary(lambda x: logistic.predict(x), data, label) | |||||
| # ## References | |||||
| # | |||||
| # * [逻辑回归模型(Logistic Regression, LR)基础](https://www.cnblogs.com/sparkwen/p/3441197.html) | |||||
| # * [逻辑回归(Logistic Regression)](http://www.cnblogs.com/BYRans/p/4713624.html) | |||||
| @@ -1,66 +0,0 @@ | |||||
| import matplotlib.pyplot as plt | |||||
| import numpy as np | |||||
| import sklearn | |||||
| from sklearn import datasets | |||||
| # load data | |||||
| d = datasets.load_diabetes() | |||||
| X = d.data[:, 2] | |||||
| Y = d.target | |||||
| # draw original data | |||||
| plt.scatter(X, Y) | |||||
| plt.show() | |||||
| ############################################################################### | |||||
| # Least squares | |||||
| ############################################################################### | |||||
| # L = \sum_{i=1, N} (y_i - a*x_i - b)^2 | |||||
| N = X.shape[0] | |||||
| S_X2 = np.sum(X*X) | |||||
| S_X = np.sum(X) | |||||
| S_XY = np.sum(X*Y) | |||||
| S_Y = np.sum(Y) | |||||
| A1 = np.array([[S_X2, S_X], [S_X, N]]) | |||||
| B1 = np.array([S_XY, S_Y]) | |||||
| coeff = np.linalg.inv(A1).dot(B1) | |||||
| x_min = np.min(X) | |||||
| x_max = np.max(X) | |||||
| y_min = coeff[0] * x_min + coeff[1] | |||||
| y_max = coeff[0] * x_max + coeff[1] | |||||
| plt.scatter(X, Y) | |||||
| plt.plot([x_min, x_max], [y_min, y_max], 'r') | |||||
| plt.show() | |||||
| ############################################################################### | |||||
| # Linear regression | |||||
| ############################################################################### | |||||
| # the loss function | |||||
| # L = \sum_{i=1, N} (y_i - a*x_i - b)^2 | |||||
| n_train = 1000 | |||||
| a, b = 1, 1 | |||||
| epsilon = 0.001 | |||||
| for i in range(n_train): | |||||
| for j in range(N): | |||||
| a = a + epsilon*2*(Y[j] - a*X[j] - b)*X[j] | |||||
| b = b + epsilon*2*(Y[j] - a*X[j] - b) | |||||
| L = 0 | |||||
| for j in range(N): | |||||
| L = L + (Y[j]-a*X[j]-b)**2 | |||||
| print("epoch %4d: loss = %f" % (i, L)) | |||||
| @@ -1,70 +0,0 @@ | |||||
| # -*- coding=utf8 -*- | |||||
| from __future__ import division | |||||
| import numpy as np | |||||
| import sklearn.datasets | |||||
| import matplotlib.pyplot as plt | |||||
| np.random.seed(0) | |||||
| data, label = sklearn.datasets.make_moons(200, noise=0.30) | |||||
| def plot_decision_boundary(predict_func, data, label): | |||||
| """画出结果图 | |||||
| Args: | |||||
| pred_func (callable): 预测函数 | |||||
| data (numpy.ndarray): 训练数据集合 | |||||
| label (numpy.ndarray): 训练数据标签 | |||||
| """ | |||||
| x_min, x_max = data[:, 0].min() - .5, data[:, 0].max() + .5 | |||||
| y_min, y_max = data[:, 1].min() - .5, data[:, 1].max() + .5 | |||||
| h = 0.01 | |||||
| xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) | |||||
| Z = predict_func(np.c_[xx.ravel(), yy.ravel()]) | |||||
| Z = Z.reshape(xx.shape) | |||||
| plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral) | |||||
| plt.scatter(data[:, 0], data[:, 1], c=label, cmap=plt.cm.Spectral) | |||||
| plt.show() | |||||
| def sigmoid(x): | |||||
| return 1.0 / (1 + np.exp(-x)) | |||||
| class Logistic(object): | |||||
| """logistic回归模型""" | |||||
| def __init__(self, data, label): | |||||
| self.data = data | |||||
| self.label = label | |||||
| self.data_num, n = np.shape(data) | |||||
| self.weights = np.ones(n) | |||||
| self.b = 1 | |||||
| def train(self, num_iteration=150): | |||||
| """随机梯度上升算法 | |||||
| Args: | |||||
| data (numpy.ndarray): 训练数据集 | |||||
| labels (numpy.ndarray): 训练标签 | |||||
| num_iteration (int): 迭代次数 | |||||
| """ | |||||
| for j in range(num_iteration): | |||||
| data_index = list(range(self.data_num)) | |||||
| for i in range(self.data_num): | |||||
| # 学习速率 | |||||
| alpha = 0.01 | |||||
| rand_index = int(np.random.uniform(0, len(data_index))) | |||||
| error = self.label[rand_index] - sigmoid(sum(self.data[rand_index] * self.weights + self.b)) | |||||
| self.weights += alpha * error * self.data[rand_index] | |||||
| self.b += alpha * error | |||||
| del(data_index[rand_index]) | |||||
| def predict(self, predict_data): | |||||
| """预测函数""" | |||||
| result = list(map(lambda x: 1 if sum(self.weights * x + self.b) > 0 else 0, | |||||
| predict_data)) | |||||
| return np.array(result) | |||||
| if __name__ == '__main__': | |||||
| logistic = Logistic(data, label) | |||||
| logistic.train(200) | |||||
| plot_decision_boundary(lambda x: logistic.predict(x), data, label) | |||||
| @@ -1,72 +0,0 @@ | |||||
| # -*- coding=utf8 -*- | |||||
| from __future__ import division | |||||
| import numpy as np | |||||
| import sklearn.datasets | |||||
| import matplotlib.pyplot as plt | |||||
| np.random.seed(0) | |||||
| data, label = sklearn.datasets.make_moons(200, noise=0.30) | |||||
| def plot_decision_boundary(predict_func, data, label): | |||||
| """画出结果图 | |||||
| Args: | |||||
| pred_func (callable): 预测函数 | |||||
| data (numpy.ndarray): 训练数据集合 | |||||
| label (numpy.ndarray): 训练数据标签 | |||||
| """ | |||||
| x_min, x_max = data[:, 0].min() - .5, data[:, 0].max() + .5 | |||||
| y_min, y_max = data[:, 1].min() - .5, data[:, 1].max() + .5 | |||||
| h = 0.01 | |||||
| xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) | |||||
| Z = predict_func(np.c_[xx.ravel(), yy.ravel()]) | |||||
| print(Z.shape) | |||||
| Z = Z.reshape(xx.shape) | |||||
| plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral) | |||||
| plt.scatter(data[:, 0], data[:, 1], c=label, cmap=plt.cm.Spectral) | |||||
| plt.show() | |||||
| def sigmoid(x): | |||||
| return 1.0 / (1 + np.exp(-x)) | |||||
| class Logistic(object): | |||||
| """logistic回归模型""" | |||||
| def __init__(self, data, label): | |||||
| self.data = data | |||||
| self.label = label | |||||
| self.data_num, n = np.shape(data) | |||||
| self.weights = np.ones(n) | |||||
| self.b = 1 | |||||
| def train(self, num_iteration=150): | |||||
| """随机梯度上升算法 | |||||
| Args: | |||||
| data (numpy.ndarray): 训练数据集 | |||||
| labels (numpy.ndarray): 训练标签 | |||||
| num_iteration (int): 迭代次数 | |||||
| """ | |||||
| for j in range(num_iteration): | |||||
| data_index = range(self.data_num) | |||||
| for i in range(self.data_num): | |||||
| # 学习速率 | |||||
| alpha = 0.01 | |||||
| rand_index = int(np.random.uniform(0, len(data_index))) | |||||
| error = self.label[rand_index] - sigmoid(sum(self.data[rand_index] * self.weights + self.b)) | |||||
| self.weights += alpha * error * self.data[rand_index] | |||||
| self.b += alpha * error | |||||
| def predict(self, predict_data): | |||||
| """预测函数""" | |||||
| result = map(lambda x: 1 if sum(self.weights * x + self.b) > 0 else 0, | |||||
| predict_data) | |||||
| print(result) | |||||
| return np.array(result) | |||||
| if __name__ == '__main__': | |||||
| logistic = Logistic(data, label) | |||||
| logistic.train(200) | |||||
| plot_decision_boundary(lambda x: logistic.predict(x), data, label) | |||||