| @@ -1,36 +0,0 @@ | |||
| # -*- coding: utf-8 -*- | |||
| import numpy as np | |||
| import megengine | |||
| import megengine.autodiff as ad | |||
| import megengine.optimizer as optimizer | |||
| from megengine import Parameter, tensor | |||
| from megengine.module import Module | |||
| class Simple(Module): | |||
| def __init__(self): | |||
| super().__init__() | |||
| self.a = Parameter([1.0], dtype=np.float32) | |||
| def forward(self, x): | |||
| x = x[:, 0] * self.a | |||
| return x | |||
| def test_ai(): | |||
| net = Simple() | |||
| gm = ad.GradManager().attach(net.parameters()) | |||
| optim = optimizer.SGD(net.parameters(), lr=1.0) | |||
| optim.clear_grad() | |||
| dshape = (10, 10) | |||
| data = tensor(np.ones(dshape).astype(np.float32)) | |||
| with gm: | |||
| loss = net(data).sum() | |||
| gm.backward(loss) | |||
| optim.step() | |||
| np.testing.assert_almost_equal( | |||
| net.a.numpy(), np.array([1.0 - dshape[0]]).astype(np.float32) | |||
| ) | |||
| @@ -7,7 +7,9 @@ import pytest | |||
| import megengine as mge | |||
| import megengine.autodiff as ad | |||
| import megengine.functional as F | |||
| import megengine.optimizer as optim | |||
| from megengine import Tensor | |||
| from megengine.core import set_option | |||
| from megengine.module import Linear, Module | |||
| from megengine.optimizer import SGD | |||
| from megengine.traced_module import trace_module | |||
| @@ -66,8 +68,13 @@ class XORNet(Module): | |||
| return x | |||
| @pytest.mark.parametrize("test_traced_module", [True, False]) | |||
| def test_training_converge(test_traced_module): | |||
| @pytest.mark.parametrize( | |||
| "test_traced_module, with_drop, grad_clip", | |||
| [(False, False, False), (True, True, True)], | |||
| ) | |||
| def test_training_converge(test_traced_module, with_drop, grad_clip): | |||
| if with_drop: | |||
| set_option("enable_drop", 1) | |||
| net = XORNet() | |||
| if test_traced_module: | |||
| inp = Tensor(np.random.random((14, 2))) | |||
| @@ -81,6 +88,8 @@ def test_training_converge(test_traced_module): | |||
| pred = net(data) | |||
| loss = F.nn.cross_entropy(pred, label) | |||
| gm.backward(loss) | |||
| if grad_clip: | |||
| optim.clip_grad_norm(net.parameters(), max_norm=0.2, ord=2.0) | |||
| return loss | |||
| def infer(data): | |||
| @@ -89,11 +98,13 @@ def test_training_converge(test_traced_module): | |||
| train_dataset = minibatch_generator() | |||
| losses = [] | |||
| for data, label in itertools.islice(train_dataset, 2000): | |||
| for data, label in itertools.islice(train_dataset, 1500): | |||
| data = Tensor(data, dtype=np.float32) | |||
| label = Tensor(label, dtype=np.int32) | |||
| opt.clear_grad() | |||
| loss = train(data, label) | |||
| if grad_clip: | |||
| optim.clip_grad_value(net.parameters(), lower=-0.1, upper=0.1) | |||
| opt.step() | |||
| losses.append(loss.numpy()) | |||
| @@ -110,3 +121,6 @@ def test_training_converge(test_traced_module): | |||
| assert precision == 1.0, "Test precision must be high enough, get {}".format( | |||
| precision | |||
| ) | |||
| if with_drop: | |||
| set_option("enable_drop", 0) | |||
| @@ -1,112 +0,0 @@ | |||
| # -*- coding: utf-8 -*- | |||
| import itertools | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.autodiff as ad | |||
| import megengine.functional as F | |||
| from megengine import Tensor | |||
| from megengine.core import get_option, set_option | |||
| from megengine.module import Linear, Module | |||
| from megengine.optimizer import SGD | |||
| batch_size = 64 | |||
| data_shape = (batch_size, 2) | |||
| label_shape = (batch_size,) | |||
| def minibatch_generator(): | |||
| while True: | |||
| inp_data = np.zeros((batch_size, 2)) | |||
| label = np.zeros(batch_size, dtype=np.int32) | |||
| for i in range(batch_size): | |||
| # [x0, x1], sampled from U[-1, 1] | |||
| inp_data[i, :] = np.random.rand(2) * 2 - 1 | |||
| label[i] = 0 if np.prod(inp_data[i]) < 0 else 1 | |||
| yield inp_data.astype(np.float32), label.astype(np.int32) | |||
| def calculate_precision(data: np.ndarray, pred: np.ndarray) -> float: | |||
| """ Calculate precision for given data and prediction. | |||
| :type data: [[x, y], ...] | |||
| :param data: Input data | |||
| :type pred: [[x_pred, y_pred], ...] | |||
| :param pred: Network output data | |||
| """ | |||
| correct = 0 | |||
| assert len(data) == len(pred) | |||
| for inp_data, pred_output in zip(data, pred): | |||
| label = 0 if np.prod(inp_data) < 0 else 1 | |||
| pred_label = np.argmax(pred_output) | |||
| if pred_label == label: | |||
| correct += 1 | |||
| return float(correct) / len(data) | |||
| class XORNet(Module): | |||
| def __init__(self): | |||
| self.mid_layers = 14 | |||
| self.num_class = 2 | |||
| super().__init__() | |||
| self.fc0 = Linear(self.num_class, self.mid_layers, bias=True) | |||
| self.fc1 = Linear(self.mid_layers, self.mid_layers, bias=True) | |||
| self.fc2 = Linear(self.mid_layers, self.num_class, bias=True) | |||
| def forward(self, x): | |||
| y = self.fc0(x) | |||
| x = F.tanh(y) | |||
| y = self.fc1(x) | |||
| x = F.tanh(y) | |||
| x = self.fc2(x) | |||
| y = (x + x) / 2 # in order to test drop() | |||
| y._drop() | |||
| return y | |||
| def test_training_converge_with_drop(): | |||
| set_option("enable_drop", 1) | |||
| net = XORNet() | |||
| opt = SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) | |||
| gm = ad.GradManager().attach(net.parameters()) | |||
| def train(data, label): | |||
| with gm: | |||
| pred = net(data) | |||
| loss = F.nn.cross_entropy(pred, label) | |||
| gm.backward(loss) | |||
| return loss | |||
| def infer(data): | |||
| return net(data) | |||
| train_dataset = minibatch_generator() | |||
| losses = [] | |||
| for data, label in itertools.islice(train_dataset, 2000): | |||
| data = Tensor(data, dtype=np.float32) | |||
| label = Tensor(label, dtype=np.int32) | |||
| opt.clear_grad() | |||
| loss = train(data, label) | |||
| opt.step() | |||
| losses.append(loss.numpy()) | |||
| assert np.mean(losses[-100:]) < 0.1, "Final training Loss must be low enough" | |||
| ngrid = 10 | |||
| x = np.linspace(-1.0, 1.0, ngrid) | |||
| xx, yy = np.meshgrid(x, x) | |||
| xx = xx.reshape((ngrid * ngrid, 1)) | |||
| yy = yy.reshape((ngrid * ngrid, 1)) | |||
| data = mge.tensor(np.concatenate((xx, yy), axis=1).astype(np.float32)) | |||
| pred = infer(Tensor(data)).numpy() | |||
| precision = calculate_precision(data.numpy(), pred) | |||
| assert precision == 1.0, "Test precision must be high enough, get {}".format( | |||
| precision | |||
| ) | |||
| set_option("enable_drop", 0) | |||
| @@ -1,117 +0,0 @@ | |||
| # -*- coding: utf-8 -*- | |||
| import itertools | |||
| import numpy as np | |||
| import pytest | |||
| import megengine as mge | |||
| import megengine.autodiff as ad | |||
| import megengine.functional as F | |||
| import megengine.optimizer as optim | |||
| from megengine import Tensor | |||
| from megengine.jit import trace | |||
| from megengine.module import Linear, Module | |||
| from megengine.optimizer import SGD | |||
| from megengine.traced_module import trace_module | |||
| batch_size = 64 | |||
| data_shape = (batch_size, 2) | |||
| label_shape = (batch_size,) | |||
| def minibatch_generator(): | |||
| while True: | |||
| inp_data = np.zeros((batch_size, 2)) | |||
| label = np.zeros(batch_size, dtype=np.int32) | |||
| for i in range(batch_size): | |||
| # [x0, x1], sampled from U[-1, 1] | |||
| inp_data[i, :] = np.random.rand(2) * 2 - 1 | |||
| label[i] = 0 if np.prod(inp_data[i]) < 0 else 1 | |||
| yield inp_data.astype(np.float32), label.astype(np.int32) | |||
| def calculate_precision(data: np.ndarray, pred: np.ndarray) -> float: | |||
| """ Calculate precision for given data and prediction. | |||
| :type data: [[x, y], ...] | |||
| :param data: Input data | |||
| :type pred: [[x_pred, y_pred], ...] | |||
| :param pred: Network output data | |||
| """ | |||
| correct = 0 | |||
| assert len(data) == len(pred) | |||
| for inp_data, pred_output in zip(data, pred): | |||
| label = 0 if np.prod(inp_data) < 0 else 1 | |||
| pred_label = np.argmax(pred_output) | |||
| if pred_label == label: | |||
| correct += 1 | |||
| return float(correct) / len(data) | |||
| class XORNet(Module): | |||
| def __init__(self): | |||
| self.mid_layers = 14 | |||
| self.num_class = 2 | |||
| super().__init__() | |||
| self.fc0 = Linear(self.num_class, self.mid_layers, bias=True) | |||
| self.fc1 = Linear(self.mid_layers, self.mid_layers, bias=True) | |||
| self.fc2 = Linear(self.mid_layers, self.num_class, bias=True) | |||
| def forward(self, x): | |||
| x = self.fc0(x) | |||
| x = F.tanh(x) | |||
| x = self.fc1(x) | |||
| x = F.tanh(x) | |||
| x = self.fc2(x) | |||
| return x | |||
| @pytest.mark.parametrize("test_traced_module", [True, False]) | |||
| def test_training_converge(test_traced_module): | |||
| net = XORNet() | |||
| if test_traced_module: | |||
| inp = Tensor(np.random.random((14, 2))) | |||
| net = trace_module(net, inp) | |||
| opt = SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) | |||
| gm = ad.GradManager().attach(net.parameters()) | |||
| @trace(symbolic=False) | |||
| def train(data, label): | |||
| with gm: | |||
| pred = net(data) | |||
| loss = F.nn.cross_entropy(pred, label) | |||
| gm.backward(loss) | |||
| optim.clip_grad_norm(net.parameters(), max_norm=0.2, ord=2.0) | |||
| return loss | |||
| def infer(data): | |||
| return net(data) | |||
| train_dataset = minibatch_generator() | |||
| losses = [] | |||
| for data, label in itertools.islice(train_dataset, 2000): | |||
| data = Tensor(data, dtype=np.float32) | |||
| label = Tensor(label, dtype=np.int32) | |||
| opt.clear_grad() | |||
| loss = train(data, label) | |||
| optim.clip_grad_value(net.parameters(), lower=-0.1, upper=0.1) | |||
| opt.step() | |||
| losses.append(loss.numpy()) | |||
| assert ( | |||
| np.mean(losses[-100:]) < 0.1 | |||
| ), "Final training Loss must be low enough, get {}".format(np.mean(losses[-100:])) | |||
| ngrid = 10 | |||
| x = np.linspace(-1.0, 1.0, ngrid) | |||
| xx, yy = np.meshgrid(x, x) | |||
| xx = xx.reshape((ngrid * ngrid, 1)) | |||
| yy = yy.reshape((ngrid * ngrid, 1)) | |||
| data = mge.tensor(np.concatenate((xx, yy), axis=1).astype(np.float32)) | |||
| pred = infer(data) | |||
| precision = calculate_precision(data.numpy(), pred.numpy()) | |||
| assert precision == 1.0, "Test precision must be high enough, get {}".format( | |||
| precision | |||
| ) | |||
| @@ -1,38 +0,0 @@ | |||
| # -*- coding: utf-8 -*- | |||
| import subprocess | |||
| import numpy as np | |||
| import pytest | |||
| import megengine | |||
| import megengine.autodiff as ad | |||
| import megengine.optimizer as optimizer | |||
| from megengine import Parameter, tensor | |||
| from megengine.module import Module | |||
| class Simple(Module): | |||
| def __init__(self): | |||
| super().__init__() | |||
| self.a = Parameter([1.23], dtype=np.float32) | |||
| def forward(self, x): | |||
| x = x * self.a | |||
| return x | |||
| def test_hello_world(): | |||
| net = Simple() | |||
| optim = optimizer.SGD(net.parameters(), lr=1.0) | |||
| optim.clear_grad() | |||
| gm = ad.GradManager().attach(net.parameters()) | |||
| data = tensor([2.34]) | |||
| with gm: | |||
| loss = net(data) | |||
| gm.backward(loss) | |||
| optim.step() | |||
| np.testing.assert_almost_equal( | |||
| net.a.numpy(), np.array([1.23 - 2.34]).astype(np.float32) | |||
| ) | |||
| @@ -1,72 +0,0 @@ | |||
| # -*- coding: utf-8 -*- | |||
| import itertools | |||
| import os | |||
| import numpy as np | |||
| import pytest | |||
| import megengine | |||
| import megengine.autodiff as ad | |||
| import megengine.optimizer as optimizer | |||
| from megengine import Parameter, tensor | |||
| from megengine.jit import trace | |||
| from megengine.module import Module | |||
| class Simple(Module): | |||
| def __init__(self): | |||
| super().__init__() | |||
| self.a = Parameter([1.23], dtype="float32") | |||
| def forward(self, x): | |||
| x = x * self.a | |||
| return x | |||
| @pytest.mark.parametrize("trace_mode", [True, False, None]) | |||
| @pytest.mark.parametrize("inplace_mode", [True, False]) | |||
| def test_sgd_momentum(monkeypatch, trace_mode, inplace_mode): | |||
| with monkeypatch.context() as mk: | |||
| mk.setenv("MEGENGINE_INPLACE_UPDATE", str(int(inplace_mode))) | |||
| def train_func(data, *, model=None, optim=None, gm=None): | |||
| optim.clear_grad() | |||
| with gm: | |||
| loss = net(data) | |||
| gm.backward(loss) | |||
| optim.step() | |||
| return loss | |||
| if trace_mode is not None: | |||
| train_func = trace(symbolic=trace_mode)(train_func) | |||
| def eval_func(data, *, model=None, optim=None, gm=None): | |||
| loss = net(data) | |||
| return loss | |||
| if trace_mode is not None: | |||
| eval_func = trace(symbolic=trace_mode)(eval_func) | |||
| net = Simple() | |||
| optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9) | |||
| gm = ad.GradManager().attach(net.parameters()) | |||
| data = tensor([2.34]) | |||
| train_func(data, model=net, optim=optim, gm=gm) | |||
| np.testing.assert_almost_equal( | |||
| optim._state[net.a]["momentum_buffer"].numpy(), 2.34 | |||
| ) | |||
| # do 3 steps of infer | |||
| for _ in range(3): | |||
| loss = eval_func(data) | |||
| np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34), 5) | |||
| np.testing.assert_almost_equal( | |||
| optim._state[net.a]["momentum_buffer"].numpy(), 2.34 | |||
| ) | |||
| # do a step of train | |||
| train_func(data, model=net, optim=optim, gm=gm) | |||
| np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34), 5) | |||
| np.testing.assert_almost_equal( | |||
| optim._state[net.a]["momentum_buffer"].numpy(), 0.9 * 2.34 + 2.34, 5 | |||
| ) | |||