You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

std_model.py 16 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. import numpy as np
  2. def format_shape(shape):
  3. return "x".join(map(str, shape)) if shape else "()"
  4. class Node(object):
  5. def __repr__(self):
  6. return "<{} shape={} at {}>".format(
  7. type(self).__name__, format_shape(self.data.shape), hex(id(self)))
  8. class DataNode(Node):
  9. """
  10. DataNode is the parent class for Parameter and Constant nodes.
  11. You should not need to use this class directly.
  12. """
  13. def __init__(self, data):
  14. self.parents = []
  15. self.data = data
  16. def _forward(self, *inputs):
  17. return self.data
  18. @staticmethod
  19. def _backward(gradient, *inputs):
  20. return []
  21. class Parameter(DataNode):
  22. """
  23. A Parameter node stores parameters used in a neural network (or perceptron).
  24. Use the the `update` method to update parameters when training the
  25. perceptron or neural network.
  26. """
  27. def __init__(self, *shape):
  28. assert len(shape) == 2, (
  29. "Shape must have 2 dimensions, instead has {}".format(len(shape)))
  30. assert all(isinstance(dim, int) and dim > 0 for dim in shape), (
  31. "Shape must consist of positive integers, got {!r}".format(shape))
  32. limit = np.sqrt(3.0 / np.mean(shape))
  33. data = np.random.uniform(low=-limit, high=limit, size=shape)
  34. super().__init__(data)
  35. def update(self, direction, multiplier):
  36. assert isinstance(direction, Constant), (
  37. "Update direction must be a {} node, instead has type {!r}".format(
  38. Constant.__name__, type(direction).__name__))
  39. assert direction.data.shape == self.data.shape, (
  40. "Update direction shape {} does not match parameter shape "
  41. "{}".format(
  42. format_shape(direction.data.shape),
  43. format_shape(self.data.shape)))
  44. assert isinstance(multiplier, (int, float)), (
  45. "Multiplier must be a Python scalar, instead has type {!r}".format(
  46. type(multiplier).__name__))
  47. self.data += multiplier * direction.data
  48. assert np.all(np.isfinite(self.data)), (
  49. "Parameter contains NaN or infinity after update, cannot continue")
  50. class Constant(DataNode):
  51. """
  52. A Constant node is used to represent:
  53. * Input features
  54. * Output labels
  55. * Gradients computed by back-propagation
  56. You should not need to construct any Constant nodes directly; they will
  57. instead be provided by either the dataset or when you call `nn.gradients`.
  58. """
  59. def __init__(self, data):
  60. assert isinstance(data, np.ndarray), (
  61. "Data should be a numpy array, instead has type {!r}".format(
  62. type(data).__name__))
  63. assert np.issubdtype(data.dtype, np.floating), (
  64. "Data should be a float array, instead has data type {!r}".format(
  65. data.dtype))
  66. super().__init__(data)
  67. class FunctionNode(Node):
  68. """
  69. A FunctionNode represents a value that is computed based on other nodes.
  70. The FunctionNode class performs necessary book-keeping to compute gradients.
  71. """
  72. def __init__(self, *parents):
  73. assert all(isinstance(parent, Node) for parent in parents), (
  74. "Inputs must be node objects, instead got types {!r}".format(
  75. tuple(type(parent).__name__ for parent in parents)))
  76. self.parents = parents
  77. self.data = self._forward(*(parent.data for parent in parents))
  78. class Add(FunctionNode):
  79. """
  80. Adds matrices element-wise.
  81. Usage: nn.Add(x, y)
  82. Inputs:
  83. x: a Node with shape (batch_size x num_features)
  84. y: a Node with the same shape as x
  85. Output:
  86. a Node with shape (batch_size x num_features)
  87. """
  88. @staticmethod
  89. def _forward(*inputs):
  90. assert len(inputs) == 2, "Expected 2 inputs, got {}".format(len(inputs))
  91. assert inputs[0].ndim == 2, (
  92. "First input should have 2 dimensions, instead has {}".format(
  93. inputs[0].ndim))
  94. assert inputs[1].ndim == 2, (
  95. "Second input should have 2 dimensions, instead has {}".format(
  96. inputs[1].ndim))
  97. assert inputs[0].shape == inputs[1].shape, (
  98. "Input shapes should match, instead got {} and {}".format(
  99. format_shape(inputs[0].shape), format_shape(inputs[1].shape)))
  100. return inputs[0] + inputs[1]
  101. @staticmethod
  102. def _backward(gradient, *inputs):
  103. assert gradient.shape == inputs[0].shape
  104. return [gradient, gradient]
  105. class AddBias(FunctionNode):
  106. """
  107. Adds a bias vector to each feature vector
  108. Usage: nn.AddBias(features, bias)
  109. Inputs:
  110. features: a Node with shape (batch_size x num_features)
  111. bias: a Node with shape (1 x num_features)
  112. Output:
  113. a Node with shape (batch_size x num_features)
  114. """
  115. @staticmethod
  116. def _forward(*inputs):
  117. assert len(inputs) == 2, "Expected 2 inputs, got {}".format(len(inputs))
  118. assert inputs[0].ndim == 2, (
  119. "First input should have 2 dimensions, instead has {}".format(
  120. inputs[0].ndim))
  121. assert inputs[1].ndim == 2, (
  122. "Second input should have 2 dimensions, instead has {}".format(
  123. inputs[1].ndim))
  124. assert inputs[1].shape[0] == 1, (
  125. "First dimension of second input should be 1, instead got shape "
  126. "{}".format(format_shape(inputs[1].shape)))
  127. assert inputs[0].shape[1] == inputs[1].shape[1], (
  128. "Second dimension of inputs should match, instead got shapes {} "
  129. "and {}".format(
  130. format_shape(inputs[0].shape), format_shape(inputs[1].shape)))
  131. return inputs[0] + inputs[1]
  132. @staticmethod
  133. def _backward(gradient, *inputs):
  134. assert gradient.shape == inputs[0].shape
  135. return [gradient, np.sum(gradient, axis=0, keepdims=True)]
  136. class DotProduct(FunctionNode):
  137. """
  138. Batched dot product
  139. Usage: nn.DotProduct(features, weights)
  140. Inputs:
  141. features: a Node with shape (batch_size x num_features)
  142. weights: a Node with shape (1 x num_features)
  143. Output: a Node with shape (batch_size x 1)
  144. """
  145. @staticmethod
  146. def _forward(*inputs):
  147. assert len(inputs) == 2, "Expected 2 inputs, got {}".format(len(inputs))
  148. assert inputs[0].ndim == 2, (
  149. "First input should have 2 dimensions, instead has {}".format(
  150. inputs[0].ndim))
  151. assert inputs[1].ndim == 2, (
  152. "Second input should have 2 dimensions, instead has {}".format(
  153. inputs[1].ndim))
  154. assert inputs[1].shape[0] == 1, (
  155. "First dimension of second input should be 1, instead got shape "
  156. "{}".format(format_shape(inputs[1].shape)))
  157. assert inputs[0].shape[1] == inputs[1].shape[1], (
  158. "Second dimension of inputs should match, instead got shapes {} "
  159. "and {}".format(
  160. format_shape(inputs[0].shape), format_shape(inputs[1].shape)))
  161. return np.dot(inputs[0], inputs[1].T)
  162. @staticmethod
  163. def _backward(gradient, *inputs):
  164. # assert gradient.shape[0] == inputs[0].shape[0]
  165. # assert gradient.shape[1] == 1
  166. # return [np.dot(gradient, inputs[1]), np.dot(gradient.T, inputs[0])]
  167. raise NotImplementedError(
  168. "Backpropagation through DotProduct nodes is not needed in this "
  169. "assignment")
  170. class Linear(FunctionNode):
  171. """
  172. Applies a linear transformation (matrix multiplication) to the input
  173. Usage: nn.Linear(features, weights)
  174. Inputs:
  175. features: a Node with shape (batch_size x input_features)
  176. weights: a Node with shape (input_features x output_features)
  177. Output: a node with shape (batch_size x input_features)
  178. """
  179. @staticmethod
  180. def _forward(*inputs):
  181. assert len(inputs) == 2, "Expected 2 inputs, got {}".format(len(inputs))
  182. assert inputs[0].ndim == 2, (
  183. "First input should have 2 dimensions, instead has {}".format(
  184. inputs[0].ndim))
  185. assert inputs[1].ndim == 2, (
  186. "Second input should have 2 dimensions, instead has {}".format(
  187. inputs[1].ndim))
  188. assert inputs[0].shape[1] == inputs[1].shape[0], (
  189. "Second dimension of first input should match first dimension of "
  190. "second input, instead got shapes {} and {}".format(
  191. format_shape(inputs[0].shape), format_shape(inputs[1].shape)))
  192. return np.dot(inputs[0], inputs[1])
  193. @staticmethod
  194. def _backward(gradient, *inputs):
  195. assert gradient.shape[0] == inputs[0].shape[0]
  196. assert gradient.shape[1] == inputs[1].shape[1]
  197. return [np.dot(gradient, inputs[1].T), np.dot(inputs[0].T, gradient)]
  198. class ReLU(FunctionNode):
  199. """
  200. An element-wise Rectified Linear Unit nonlinearity: max(x, 0).
  201. This nonlinearity replaces all negative entries in its input with zeros.
  202. Usage: nn.ReLU(x)
  203. Input:
  204. x: a Node with shape (batch_size x num_features)
  205. Output: a Node with the same shape as x, but no negative entries
  206. """
  207. @staticmethod
  208. def _forward(*inputs):
  209. assert len(inputs) == 1, "Expected 1 input, got {}".format(len(inputs))
  210. assert inputs[0].ndim == 2, (
  211. "Input should have 2 dimensions, instead has {}".format(
  212. inputs[0].ndim))
  213. return np.maximum(inputs[0], 0)
  214. @staticmethod
  215. def _backward(gradient, *inputs):
  216. assert gradient.shape == inputs[0].shape
  217. return [gradient * np.where(inputs[0] > 0, 1.0, 0.0)]
  218. class SquareLoss(FunctionNode):
  219. """
  220. This node first computes 0.5 * (a[i,j] - b[i,j])**2 at all positions (i,j)
  221. in the inputs, which creates a (batch_size x dim) matrix. It then calculates
  222. and returns the mean of all elements in this matrix.
  223. Usage: nn.SquareLoss(a, b)
  224. Inputs:
  225. a: a Node with shape (batch_size x dim)
  226. b: a Node with shape (batch_size x dim)
  227. Output: a scalar Node (containing a single floating-point number)
  228. """
  229. @staticmethod
  230. def _forward(*inputs):
  231. assert len(inputs) == 2, "Expected 2 inputs, got {}".format(len(inputs))
  232. assert inputs[0].ndim == 2, (
  233. "First input should have 2 dimensions, instead has {}".format(
  234. inputs[0].ndim))
  235. assert inputs[1].ndim == 2, (
  236. "Second input should have 2 dimensions, instead has {}".format(
  237. inputs[1].ndim))
  238. assert inputs[0].shape == inputs[1].shape, (
  239. "Input shapes should match, instead got {} and {}".format(
  240. format_shape(inputs[0].shape), format_shape(inputs[1].shape)))
  241. return np.mean(np.square(inputs[0] - inputs[1]) / 2)
  242. @staticmethod
  243. def _backward(gradient, *inputs):
  244. assert np.asarray(gradient).ndim == 0
  245. return [
  246. gradient * (inputs[0] - inputs[1]) / inputs[0].size,
  247. gradient * (inputs[1] - inputs[0]) / inputs[0].size
  248. ]
  249. class SoftmaxLoss(FunctionNode):
  250. """
  251. A batched softmax loss, used for classification problems.
  252. IMPORTANT: do not swap the order of the inputs to this node!
  253. Usage: nn.SoftmaxLoss(logits, labels)
  254. Inputs:
  255. logits: a Node with shape (batch_size x num_classes). Each row
  256. represents the scores associated with that example belonging to a
  257. particular class. A score can be an arbitrary real number.
  258. labels: a Node with shape (batch_size x num_classes) that encodes the
  259. correct labels for the examples. All entries must be non-negative
  260. and the sum of values along each row should be 1.
  261. Output: a scalar Node (containing a single floating-point number)
  262. """
  263. @staticmethod
  264. def log_softmax(logits):
  265. log_probs = logits - np.max(logits, axis=1, keepdims=True)
  266. log_probs -= np.log(np.sum(np.exp(log_probs), axis=1, keepdims=True))
  267. return log_probs
  268. @staticmethod
  269. def _forward(*inputs):
  270. assert len(inputs) == 2, "Expected 2 inputs, got {}".format(len(inputs))
  271. assert inputs[0].ndim == 2, (
  272. "First input should have 2 dimensions, instead has {}".format(
  273. inputs[0].ndim))
  274. assert inputs[1].ndim == 2, (
  275. "Second input should have 2 dimensions, instead has {}".format(
  276. inputs[1].ndim))
  277. assert inputs[0].shape == inputs[1].shape, (
  278. "Input shapes should match, instead got {} and {}".format(
  279. format_shape(inputs[0].shape), format_shape(inputs[1].shape)))
  280. assert np.all(inputs[1] >= 0), (
  281. "All entries in the labels input must be non-negative")
  282. assert np.allclose(np.sum(inputs[1], axis=1), 1), (
  283. "Labels input must sum to 1 along each row")
  284. log_probs = SoftmaxLoss.log_softmax(inputs[0])
  285. return np.mean(-np.sum(inputs[1] * log_probs, axis=1))
  286. @staticmethod
  287. def _backward(gradient, *inputs):
  288. assert np.asarray(gradient).ndim == 0
  289. log_probs = SoftmaxLoss.log_softmax(inputs[0])
  290. return [
  291. gradient * (np.exp(log_probs) - inputs[1]) / inputs[0].shape[0],
  292. gradient * -log_probs / inputs[0].shape[0]
  293. ]
  294. def gradients(loss, parameters):
  295. """
  296. Computes and returns the gradient of the loss with respect to the provided
  297. parameters.
  298. Usage: nn.gradients(loss, parameters)
  299. Inputs:
  300. loss: a SquareLoss or SoftmaxLoss node
  301. parameters: a list (or iterable) containing Parameter nodes
  302. Output: a list of Constant objects, representing the gradient of the loss
  303. with respect to each provided parameter.
  304. """
  305. assert isinstance(loss, (SquareLoss, SoftmaxLoss)), (
  306. "Loss must be a loss node, instead has type {!r}".format(
  307. type(loss).__name__))
  308. assert all(isinstance(parameter, Parameter) for parameter in parameters), (
  309. "Parameters must all have type {}, instead got types {!r}".format(
  310. Parameter.__name__,
  311. tuple(type(parameter).__name__ for parameter in parameters)))
  312. assert not hasattr(loss, "used"), (
  313. "Loss node has already been used for backpropagation, cannot reuse")
  314. loss.used = True
  315. nodes = set()
  316. tape = []
  317. def visit(node):
  318. if node not in nodes:
  319. for parent in node.parents:
  320. visit(parent)
  321. nodes.add(node)
  322. tape.append(node)
  323. visit(loss)
  324. nodes |= set(parameters)
  325. grads = {node: np.zeros_like(node.data) for node in nodes}
  326. grads[loss] = 1.0
  327. for node in reversed(tape):
  328. parent_grads = node._backward(
  329. grads[node], *(parent.data for parent in node.parents))
  330. for parent, parent_grad in zip(node.parents, parent_grads):
  331. grads[parent] += parent_grad
  332. return [Constant(grads[parameter]) for parameter in parameters]
  333. def as_scalar(node):
  334. """
  335. Returns the value of a Node as a standard Python number. This only works
  336. for nodes with one element (e.g. SquareLoss and SoftmaxLoss, as well as
  337. DotProduct with a batch size of 1 element).
  338. """
  339. assert isinstance(node, Node), (
  340. "Input must be a node object, instead has type {!r}".format(
  341. type(node).__name__))
  342. assert node.data.size == 1, (
  343. "Node has shape {}, cannot convert to a scalar".format(
  344. format_shape(node.data.shape)))
  345. node.data = node.data.flatten()
  346. return node.data.tolist()[0]

计算机大作业

Contributors (1)