| @@ -14,7 +14,6 @@ import sys | |||
| import time | |||
| import numpy as np | |||
| from resnet50 import Resnet50 | |||
| import megengine as mge | |||
| import megengine.distributed as dist | |||
| @@ -70,6 +69,9 @@ def run_perf( | |||
| eager=False, | |||
| ): | |||
| # pylint: disable = import-outside-toplevel | |||
| from resnet50 import Resnet50 | |||
| if conv_fastrun: | |||
| set_conv_execution_strategy("PROFILE") | |||
| @@ -1,141 +0,0 @@ | |||
| # -*- coding: utf-8 -*- | |||
| # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
| # | |||
| # Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, | |||
| # software distributed under the License is distributed on an | |||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| import os | |||
| import subprocess | |||
| import sys | |||
| import numpy as np | |||
| def fwd_test(backend): | |||
| model_path = "../examples/cifar10/resnet_example/checkpoint/pretrained_model_82.mge" | |||
| # Change the reference number if the change is from numerical rounding-off | |||
| # FIXME! Need to use different number depending on CPU/GPU | |||
| loss_ref = np.array([7.315978]).astype(np.float32) | |||
| if backend == "megengine-dynamic": | |||
| os.environ["MGE_DISABLE_TRACE"] = "true" | |||
| import megengine | |||
| from megengine.functional.debug_param import set_conv_execution_strategy | |||
| from megengine.test import assertTensorClose | |||
| from megengine.core import Graph | |||
| sys.path.append( | |||
| os.path.join(os.path.dirname(__file__), "..", "..", "..", "examples") | |||
| ) | |||
| from cifar10.resnet_example.main import Example as resnet18_config | |||
| from cifar10.resnet_example.main import eval_one_iter_mge | |||
| mge_root = os.path.dirname(megengine.__file__) | |||
| model_path = os.path.join(mge_root, model_path) | |||
| run_case = resnet18_config(backend=backend, mode="eval") | |||
| run_case.init_net() | |||
| run_case.load_model(model_path) | |||
| np.random.seed(0) | |||
| inputs = np.random.rand(run_case.train_batch_size, 3, 32, 32) | |||
| targets = np.random.randint(10, size=(run_case.train_batch_size,)) | |||
| max_err = 0.0 | |||
| run_case.net_context["net"].eval() | |||
| loss, _ = eval_one_iter_mge(inputs, targets, config=run_case) | |||
| try: | |||
| loss = loss.numpy() | |||
| assertTensorClose(loss, loss_ref, max_err=max_err) | |||
| except: | |||
| print("calculated loss:", loss) | |||
| print("expect:", loss_ref) | |||
| sys.exit(1) | |||
| def train_test(backend): | |||
| model_path = "../examples/cifar10/resnet_example/checkpoint/pretrained_model_82.mge" | |||
| # Change the reference number if the change is from numerical rounding-off | |||
| # FIXME! Need to use different number depending on CPU/GPU | |||
| if backend == "megengine-dynamic": | |||
| os.environ["MGE_DISABLE_TRACE"] = "true" | |||
| loss_ref = np.array([3.4709125, 12.46342]).astype(np.float32) | |||
| else: | |||
| loss_ref = np.array([3.4709125, 12.463419]).astype(np.float32) | |||
| import megengine | |||
| from megengine.functional.debug_param import set_conv_execution_strategy | |||
| from megengine.test import assertTensorClose | |||
| from megengine.core import Graph | |||
| sys.path.append( | |||
| os.path.join(os.path.dirname(__file__), "..", "..", "..", "examples") | |||
| ) | |||
| from cifar10.resnet_example.main import Example as resnet18_config | |||
| from cifar10.resnet_example.main import train_one_iter_mge | |||
| mge_root = os.path.dirname(megengine.__file__) | |||
| model_path = os.path.join(mge_root, model_path) | |||
| set_conv_execution_strategy("HEURISTIC_REPRODUCIBLE") | |||
| run_case = resnet18_config(backend=backend, mode="train") | |||
| run_case.init_net() | |||
| run_case.load_model(model_path) | |||
| max_err = 0.0 | |||
| loss = [] | |||
| np.random.seed(0) | |||
| inputs = np.random.rand(run_case.train_batch_size, 3, 32, 32) | |||
| targets = np.random.randint(10, size=(run_case.train_batch_size,)) | |||
| run_case.set_optimizer(0.0) | |||
| opt = run_case.net_context["optimizer"] | |||
| for lr in (1.0, 1.0): | |||
| run_case.set_optimizer(lr) | |||
| opt.zero_grad() | |||
| loss_batch, _ = train_one_iter_mge(inputs, targets, config=run_case) | |||
| opt.step() | |||
| loss.append(loss_batch.numpy()[0]) | |||
| try: | |||
| assertTensorClose(np.array(loss).astype(np.float32), loss_ref, max_err=1e-5) | |||
| except: | |||
| print("calculated loss:", loss) | |||
| print("expect:", loss_ref) | |||
| sys.exit(1) | |||
| def run_func(func): | |||
| cmd_start = ["python3", "-c"] | |||
| cmd_head = "from verify_correctness import fwd_test, train_test\n" | |||
| cmd = cmd_start + [cmd_head + func] | |||
| ret = subprocess.run( | |||
| cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True | |||
| ) | |||
| if ret.returncode != 0: | |||
| print("Failed!!!") | |||
| print(ret.stdout) | |||
| print(ret.stderr) | |||
| raise | |||
| print("Success") | |||
| if __name__ == "__main__": | |||
| print("Running fwd static ...") | |||
| run_func('fwd_test(backend="megengine-static")') | |||
| print("Running fwd dynamic ...") | |||
| run_func('fwd_test(backend="megengine-dynamic")') | |||
| print("Running train static ...") | |||
| run_func('train_test(backend="megengine-static")') | |||
| print("Running train dynamic ...") | |||
| run_func('train_test(backend="megengine-dynamic")') | |||
| @@ -0,0 +1,142 @@ | |||
| # -*- coding: utf-8 -*- | |||
| # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
| # | |||
| # Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, | |||
| # software distributed under the License is distributed on an | |||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| import os | |||
| import sys | |||
| import numpy as np | |||
| import megengine as mge | |||
| import megengine.functional as F | |||
| from megengine import jit, tensor | |||
| from megengine.functional.debug_param import set_conv_execution_strategy | |||
| from megengine.module import BatchNorm2d, Conv2d, Linear, MaxPool2d, Module | |||
| from megengine.optimizer import SGD | |||
| from megengine.test import assertTensorClose | |||
| class MnistNet(Module): | |||
| def __init__(self, has_bn=False): | |||
| super().__init__() | |||
| self.conv0 = Conv2d(1, 20, kernel_size=5, bias=True) | |||
| self.pool0 = MaxPool2d(2) | |||
| self.conv1 = Conv2d(20, 20, kernel_size=5, bias=True) | |||
| self.pool1 = MaxPool2d(2) | |||
| self.fc0 = Linear(20 * 4 * 4, 500, bias=True) | |||
| self.fc1 = Linear(500, 10, bias=True) | |||
| self.bn0 = None | |||
| self.bn1 = None | |||
| if has_bn: | |||
| self.bn0 = BatchNorm2d(20) | |||
| self.bn1 = BatchNorm2d(20) | |||
| def forward(self, x): | |||
| x = self.conv0(x) | |||
| if self.bn0: | |||
| x = self.bn0(x) | |||
| x = F.relu(x) | |||
| x = self.pool0(x) | |||
| x = self.conv1(x) | |||
| if self.bn1: | |||
| x = self.bn1(x) | |||
| x = F.relu(x) | |||
| x = self.pool1(x) | |||
| x = F.flatten(x, 1) | |||
| x = self.fc0(x) | |||
| x = F.relu(x) | |||
| x = self.fc1(x) | |||
| return x | |||
| def train(data, label, net, opt): | |||
| pred = net(data) | |||
| loss = F.cross_entropy_with_softmax(pred, label) | |||
| opt.backward(loss) | |||
| return loss | |||
| def update_model(model_path): | |||
| """ | |||
| Update the dumped model with test cases for new reference values | |||
| """ | |||
| net = MnistNet(has_bn=True) | |||
| checkpoint = mge.load(model_path) | |||
| net.load_state_dict(checkpoint["net_init"]) | |||
| lr = checkpoint["sgd_lr"] | |||
| opt = SGD(net.parameters(), lr=lr) | |||
| data = tensor(dtype=np.float32) | |||
| label = tensor(dtype=np.int32) | |||
| data.set_value(checkpoint["data"]) | |||
| label.set_value(checkpoint["label"]) | |||
| opt.zero_grad() | |||
| loss = train(data, label, net=net, opt=opt) | |||
| opt.step() | |||
| checkpoint.update({"net_updated": net.state_dict(), "loss": loss.numpy()}) | |||
| mge.save(checkpoint, model_path) | |||
| def run_test(model_path, use_jit, use_symbolic): | |||
| """ | |||
| Load the model with test cases and run the training for one iter. | |||
| The loss and updated weights are compared with reference value to verify the correctness. | |||
| The model with pre-trained weights is trained for one iter and the net state dict is dumped. | |||
| The test cases is appended to the model file. The reference result is obtained | |||
| by running the train for one iter. | |||
| Dump a new file with updated result by calling update_model | |||
| if you think the test fails due to numerical rounding errors instead of bugs. | |||
| Please think twice before you do so. | |||
| """ | |||
| net = MnistNet(has_bn=True) | |||
| checkpoint = mge.load(model_path) | |||
| net.load_state_dict(checkpoint["net_init"]) | |||
| lr = checkpoint["sgd_lr"] | |||
| opt = SGD(net.parameters(), lr=lr) | |||
| data = tensor(dtype=np.float32) | |||
| label = tensor(dtype=np.int32) | |||
| data.set_value(checkpoint["data"]) | |||
| label.set_value(checkpoint["label"]) | |||
| max_err = 0.0 | |||
| train_func = train | |||
| if use_jit: | |||
| train_func = jit.trace(train_func, symbolic=use_symbolic) | |||
| opt.zero_grad() | |||
| loss = train_func(data, label, net=net, opt=opt) | |||
| opt.step() | |||
| assertTensorClose(loss.numpy(), checkpoint["loss"], max_err=max_err) | |||
| for param, param_ref in zip( | |||
| net.state_dict().items(), checkpoint["net_updated"].items() | |||
| ): | |||
| assert param[0] == param_ref[0] | |||
| assertTensorClose(param[1], param_ref[1], max_err=max_err) | |||
| def test_correctness(): | |||
| if mge.is_cuda_available(): | |||
| model_name = "mnist_model_with_test.mge" | |||
| else: | |||
| model_name = "mnist_model_with_test_cpu.mge" | |||
| model_path = os.path.join(os.path.dirname(__file__), model_name) | |||
| set_conv_execution_strategy("HEURISTIC_REPRODUCIBLE") | |||
| run_test(model_path, False, False) | |||
| run_test(model_path, True, False) | |||
| run_test(model_path, True, True) | |||
| @@ -94,9 +94,7 @@ def test_pytorch_mixed(): | |||
| def __init__(self): | |||
| super().__init__() | |||
| self.torch_module = PyTorchModule(self.SubModule()) | |||
| a = list(self.SubModule().named_parameters(recurse=True)) | |||
| a = list(self.SubModule().parameters()) | |||
| self.multiplier = Parameter(np.array(init_param[1]), dtype=np.float32) | |||
| self.multiplier = Parameter(init_param[1], dtype=np.float32) | |||
| def forward(self, inp): | |||
| return self.torch_module(inp) * self.multiplier | |||