You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

05_training_test.py 5.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. import uctc.nn as nn
  2. import std_model as stdnn
  3. import numpy as np
  4. np.random.seed(42)
  5. class LinearTestModel:
  6. def __init__(self, input_features, hidden_features, output_features):
  7. self.w1 = nn.Parameter([input_features, hidden_features])
  8. self.b1 = nn.Parameter([1, hidden_features])
  9. self.w2 = nn.Parameter([hidden_features, output_features])
  10. self.b2 = nn.Parameter([1, output_features])
  11. def forward(self, x):
  12. layer_1 = nn.ReLU(nn.AddBias(nn.Linear(x, self.w1), self.b1))
  13. prediction = nn.AddBias(nn.Linear(layer_1, self.w2), self.b2)
  14. # print(f"o1: {prediction.data()[:10]}")
  15. return prediction
  16. def get_loss(self, x, y):
  17. return nn.SquareLoss(self.forward(x), y)
  18. def backward(self, x, y):
  19. loss = self.get_loss(x, y)
  20. g_w1, g_b1, g_w2, g_b2 = nn.gradients(loss, [self.w1, self.b1, self.w2, self.b2])
  21. return g_w1.data(), g_b1.data(), g_w2.data(), g_b2.data()
  22. def update(self, x, y, lr):
  23. loss = self.get_loss(x, y)
  24. g_w1, g_b1, g_w2, g_b2 = nn.gradients(loss, [self.w1, self.b1, self.w2, self.b2])
  25. self.w1.update(g_w1, lr)
  26. self.b1.update(g_b1, lr)
  27. self.w2.update(g_w2, lr)
  28. self.b2.update(g_b2, lr)
  29. # print(g_w1.data())
  30. # print(g_b1.data())
  31. # print(g_w2.data())
  32. # print(g_b2.data())
  33. # return self.w1.data(), self.b1.data(), self.w2.data(), self.b2.data()
  34. def train(self):
  35. self.x = np.expand_dims(np.linspace(-2 * np.pi, 2 * np.pi, num=200), axis=1)
  36. # np.random.RandomState(0).shuffle(self.x)
  37. self.argsort_x = np.argsort(self.x.flatten())
  38. self.y = np.sin(self.x)
  39. for i in range(epoch):
  40. np.random.RandomState(0).shuffle(self.x)
  41. index = 0
  42. while index < self.x.shape[0]:
  43. x = self.x[index:index + batch_size]
  44. y = self.y[index:index + batch_size]
  45. cx = nn.Constant(x)
  46. cy = nn.Constant(y)
  47. self.update(cx, cy, 0.01)
  48. index += batch_size
  49. # break
  50. loss = self.get_loss(cx,cy)
  51. print(loss.data())
  52. class StdLinerTestModel:
  53. def __init__(self, input_features, hidden_features, output_features, tmodel: LinearTestModel):
  54. self.w1 = stdnn.Parameter(input_features, hidden_features)
  55. self.b1 = stdnn.Parameter(1, hidden_features)
  56. self.w2 = stdnn.Parameter(hidden_features, output_features)
  57. self.b2 = stdnn.Parameter(1, output_features)
  58. # self.w1.data = np.array(tmodel.w1.data()).reshape(input_features, hidden_features)
  59. # self.b1.data = np.array(tmodel.b1.data()).reshape(1, hidden_features)
  60. # self.w2.data = np.array(tmodel.w2.data()).reshape(hidden_features, output_features)
  61. # self.b2.data = np.array(tmodel.b2.data()).reshape(1, output_features)
  62. # print(self.w1.data)
  63. def forward(self, x):
  64. layer_1 = stdnn.ReLU(stdnn.AddBias(stdnn.Linear(x, self.w1), self.b1))
  65. prediction = stdnn.AddBias(stdnn.Linear(layer_1, self.w2), self.b2)
  66. # print(f"o2: {prediction.data.flatten()[:10]}")
  67. return prediction
  68. def get_loss(self, x, y):
  69. return stdnn.SquareLoss(self.forward(x), y)
  70. def backward(self, x, y):
  71. loss = self.get_loss(x, y)
  72. g_w1, g_b1, g_w2, g_b2 = stdnn.gradients(loss, [self.w1, self.b1, self.w2, self.b2])
  73. return g_w1.data.flatten().tolist(), g_b1.data.flatten().tolist(), g_w2.data.flatten().tolist(), g_b2.data.flatten().tolist()
  74. def update(self, x, y, lr):
  75. # loss = self.get_loss(x, y)
  76. # g_w1, g_b1, g_w2, g_b2 = stdnn.gradients(loss, [self.w1, self.b1, self.w2, self.b2])
  77. loss = self.get_loss(x, y)
  78. g_w1, g_b1, g_w2, g_b2 = stdnn.gradients(loss, [self.w1, self.b1, self.w2, self.b2])
  79. self.w1.update(g_w1, -lr)
  80. self.b1.update(g_b1, -lr)
  81. self.w2.update(g_w2, -lr)
  82. self.b2.update(g_b2, -lr)
  83. # print(loss.data)
  84. # return self.w1.data.flatten().tolist(), self.b1.data.flatten().tolist(), self.w2.data.flatten().tolist(), self.b2.data.flatten().tolist()
  85. def train(self):
  86. self.x = np.expand_dims(np.linspace(-2 * np.pi, 2 * np.pi, num=200), axis=1)
  87. self.argsort_x = np.argsort(self.x.flatten())
  88. self.y = np.sin(self.x)
  89. for i in range(epoch):
  90. # np.random.RandomState(0).shuffle(self.x)
  91. index = 0
  92. while index < self.x.shape[0]:
  93. x = self.x[index:index + batch_size]
  94. y = self.y[index:index + batch_size]
  95. cx = stdnn.Constant(x)
  96. cy = stdnn.Constant(y)
  97. self.update(cx, cy, 0.01)
  98. index += batch_size
  99. break
  100. loss = self.get_loss(cx, cy)
  101. print(loss.data)
  102. input_features = 1
  103. hidden_features = 50
  104. output_features = 1
  105. batch_size = 10
  106. epoch = 1
  107. model = LinearTestModel(input_features, hidden_features, output_features)
  108. smodel = StdLinerTestModel(input_features, hidden_features, output_features, model)
  109. # model.train()
  110. smodel.train()