From 9386d0682e56e05073f8a7e48a85857bbc84296d Mon Sep 17 00:00:00 2001 From: Frozenmad <351549709@sjtu.edu.cn> Date: Sun, 17 Apr 2022 14:44:58 +0800 Subject: [PATCH 01/10] Update issue templates --- .github/ISSUE_TEMPLATE/bug_report.md | 2 +- .github/ISSUE_TEMPLATE/feature_request.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index ec87ad7..644d7e8 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -3,7 +3,7 @@ name: Bug report about: Create a report to help us improve title: "[BUG]" labels: bug -assignees: Frozenmad +assignees: general502570 --- diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index c6b696d..68b86de 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -3,7 +3,7 @@ name: Feature request about: Suggest an idea for this project title: "[FEATURE]" labels: enhancement -assignees: Frozenmad +assignees: general502570 --- From 487f2b2f798b9b1363ad5dc100fb410b12222e06 Mon Sep 17 00:00:00 2001 From: Generall Date: Tue, 19 Apr 2022 11:40:55 +0800 Subject: [PATCH 02/10] v0.3.1 --- README.md | 1 + README_cn.md | 1 + autogl/__init__.py | 2 +- setup.py | 2 +- 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 81d5d8f..9c76bdf 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ Feel free to open issues ## News! +- 2022.4.19 New version v0.3.1!We have released Chinese tutorial for the first time! - 2021.12.31 New Version! v0.3.0-pre is here! - AutoGL now support [__Deep Graph Library (DGL)__](https://www.dgl.ai/) backend to be interface-friendly for DGL users! All the homogeneous node classification task, link prediction task, and graph classification task are currently supported under DGL backend. AutoGL is also compatible with PyG 2.0 now. - The __heterogeneous__ node classification tasks are now supported! See [hetero tutorial](http://mn.cs.tsinghua.edu.cn/autogl/documentation/docfile/tutorial/t_hetero_node_clf.html) for more details. diff --git a/README_cn.md b/README_cn.md index ad98901..f52b34f 100644 --- a/README_cn.md +++ b/README_cn.md @@ -13,6 +13,7 @@ ## 最新消息 +- 2022.4.19 v0.3.1版本更新!首次更新中文教程! - 2021.12.31 v0.3.0-pre版本更新! - 智图目前支持[__Deep Graph Library (DGL)__](https://www.dgl.ai/)作为后端,以方便DGL的用户使用。目前在DGL后端已经支持同构图的节点分类、链接预测以及图分类等任务。智图现在也可兼容PyG 2.0版本。 - 智图可以支持__异构图__节点分类任务!详情请参考[异构图教程](http://mn.cs.tsinghua.edu.cn/autogl/documentation/docfile/tutorial/t_hetero_node_clf.html)。 diff --git a/autogl/__init__.py b/autogl/__init__.py index 00a5ff9..f5c0239 100644 --- a/autogl/__init__.py +++ b/autogl/__init__.py @@ -16,4 +16,4 @@ from .module import ( train, ) -__version__ = "0.3.0-pre" +__version__ = "0.3.1" diff --git a/setup.py b/setup.py index afaa49d..ea4be0e 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ with open("README.md", 'r') as fh: ''' https://setuptools.readthedocs.io/en/latest/ ''' setup( name='autogl', - version='0.3.0-pre', + version='0.3.1', author='THUMNLab/aglteam', maintainer='THUMNLab/aglteam', author_email='autogl@tsinghua.edu.cn', From 6063ba2d0cef78ff59ac2cb26b47c3a0aa78de1c Mon Sep 17 00:00:00 2001 From: lihy Date: Thu, 24 Nov 2022 23:59:46 +0800 Subject: [PATCH 03/10] ogb glf example --- examples/glfogb.py | 146 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 examples/glfogb.py diff --git a/examples/glfogb.py b/examples/glfogb.py new file mode 100644 index 0000000..c81f5c8 --- /dev/null +++ b/examples/glfogb.py @@ -0,0 +1,146 @@ +from torch_geometric.data import DataLoader +import torch.optim as optim +from tqdm import tqdm +from ogb.graphproppred import Evaluator +import random +import torch +import numpy as np +from autogl.datasets import build_dataset_from_name +from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter +from ogb_gnn import GNN +from autogl.backend import DependentBackend +from torch_geometric.data import Data + +backend = DependentBackend.get_backend_name() + +cls_criterion = torch.nn.BCEWithLogitsLoss() +reg_criterion = torch.nn.MSELoss() + +def train(model, device, loader, optimizer, task_type): + model.train() + + for step, batch in enumerate(tqdm(loader, desc="Iteration")): + batch = batch.to(device) + + if batch.x.shape[0] == 1 or batch.batch[-1] == 0: + pass + else: + pred = model(batch) + optimizer.zero_grad() + is_labeled = batch.y == batch.y + loss = cls_criterion(pred.to(torch.float32)[is_labeled], batch.y.to(torch.float32)[is_labeled]) + loss.backward() + optimizer.step() + +def eval(model, device, loader, evaluator): + model.eval() + y_true = [] + y_pred = [] + + for step, batch in enumerate(tqdm(loader, desc="Iteration")): + batch = batch.to(device) + + if batch.x.shape[0] == 1: + pass + else: + with torch.no_grad(): + pred = model(batch) + + y_true.append(batch.y.view(pred.shape).detach().cpu()) + y_pred.append(pred.detach().cpu()) + + y_true = torch.cat(y_true, dim = 0).numpy() + y_pred = torch.cat(y_pred, dim = 0).numpy() + + input_dict = {"y_true": y_true, "y_pred": y_pred} + + return evaluator.eval(input_dict) + +def trans(dataset): + ret = [] + for i in range(len(dataset)): + x = dataset[i].nodes.data['x'] + y = dataset[i].data['y'].view(-1, 1) + edge_index = dataset[i].edges.connections + edge_attr = dataset[i].edges.data['edge_feat'] + data = Data(x=x, y=y, edge_index=edge_index, edge_attr=edge_attr) + ret.append(data) + return ret + +if __name__ == "__main__": + parser = ArgumentParser( + "auto graph classification", formatter_class=ArgumentDefaultsHelpFormatter + ) + parser.add_argument( + "--dataset", + default="ogbg-molhiv", + type=str, + help="graph classification dataset", + choices=["mutag", "imdb-b", "imdb-m", "proteins", "collab", "ogbg-molbace"], + ) + parser.add_argument( + "--configs", default="../configs/graphclf_gin_benchmark.yml", help="config files" + ) + parser.add_argument("--device", type=int, default=0, help="device to run on, -1 means cpu") + parser.add_argument("--seed", type=int, default=0, help="random seed") + + args = parser.parse_args() + + if args.device == -1: + args.device = "cpu" + + if torch.cuda.is_available() and args.device != "cpu": + torch.cuda.set_device(args.device) + seed = args.seed + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed(seed) + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + dataset = build_dataset_from_name(args.dataset) + model = GNN(num_tasks=1, gnn_type = 'gcn').to(args.device) + evaluator = Evaluator(args.dataset) + + train_dataset = trans(dataset.train_split) + val_dataset = trans(dataset.val_split) + test_dataset = trans(dataset.test_split) + + train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, + num_workers=0) + valid_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, + num_workers=0) + test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, + num_workers=0) + + optimizer = optim.Adam(model.parameters(), lr=0.001) + + valid_curve = [] + test_curve = [] + train_curve = [] + device = torch.device("cuda:0") + for epoch in range(1, 100 + 1): + print("=====Epoch {}".format(epoch)) + print('Training...') + train(model, device, train_loader, optimizer, 'binary classification') + + print('Evaluating...') + train_perf = eval(model, device, train_loader, evaluator) + valid_perf = eval(model, device, valid_loader, evaluator) + test_perf = eval(model, device, test_loader, evaluator) + + print({'Train': train_perf, 'Validation': valid_perf, 'Test': test_perf}) + + train_curve.append(train_perf['rocauc']) + valid_curve.append(valid_perf['rocauc']) + test_curve.append(test_perf['rocauc']) + + best_val_epoch = np.argmax(np.array(valid_curve)) + best_train = max(train_curve) + + print('Finished training!') + print('Best validation score: {}'.format(valid_curve[best_val_epoch])) + print('Test score: {}'.format(test_curve[best_val_epoch])) + From dfcb04a58a5eb3775d29dd87708879f525c88899 Mon Sep 17 00:00:00 2001 From: defineZYP <953726616@qq.com> Date: Wed, 30 Nov 2022 18:47:40 +0800 Subject: [PATCH 04/10] fix bug of ogbn dataset and add ogb node classification example --- autogl/datasets/_ogb.py | 12 ++- examples/nodeclf_ogb.py | 219 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 230 insertions(+), 1 deletion(-) create mode 100644 examples/nodeclf_ogb.py diff --git a/autogl/datasets/_ogb.py b/autogl/datasets/_ogb.py index b8ee15a..7192a0c 100644 --- a/autogl/datasets/_ogb.py +++ b/autogl/datasets/_ogb.py @@ -5,6 +5,8 @@ from ogb.nodeproppred import NodePropPredDataset from ogb.linkproppred import LinkPropPredDataset from ogb.graphproppred import GraphPropPredDataset +from torch_sparse import SparseTensor + from autogl import backend as _backend from autogl.data import InMemoryStaticGraphSet from autogl.data.graph import ( @@ -30,13 +32,21 @@ class _OGBNDatasetUtil(_OGBDatasetUtil): edges_data_key_mapping: _typing.Optional[_typing.Mapping[str, str]] = ..., graph_data_key_mapping: _typing.Optional[_typing.Mapping[str, str]] = ... ) -> GeneralStaticGraph: + # TODO + edge_index = ogbn_data['edge_index'] + num_nodes = ogbn_data['node_feat'].shape[0] + edge_feat = ogbn_data['edge_feat'] + edge_index = SparseTensor(row=torch.tensor(edge_index[0]), col=torch.tensor(edge_index[1]), value=edge_feat, sparse_sizes=(num_nodes, num_nodes)) + edge_index = edge_index.to_symmetric() + row, col, _ = edge_index.coo() + edge_index = [row.cpu().detach().numpy(), col.cpu().detach().numpy()] homogeneous_static_graph: GeneralStaticGraph = ( GeneralStaticGraphGenerator.create_homogeneous_static_graph( dict([ (target_key, torch.from_numpy(ogbn_data[source_key])) for source_key, target_key in nodes_data_key_mapping.items() ]), - torch.from_numpy(ogbn_data['edge_index']), + torch.tensor(edge_index), dict([ (target_key, torch.from_numpy(ogbn_data[source_key])) for source_key, target_key in edges_data_key_mapping.items() diff --git a/examples/nodeclf_ogb.py b/examples/nodeclf_ogb.py new file mode 100644 index 0000000..3fbac6a --- /dev/null +++ b/examples/nodeclf_ogb.py @@ -0,0 +1,219 @@ +import os +import tqdm +import argparse +import numpy as np +import torch +import torch.nn.functional as F + +from torch_geometric.nn import GCNConv, SAGEConv + +from ogb.nodeproppred import Evaluator +from autogl.datasets import build_dataset_from_name +from autogl import backend + +if backend.DependentBackend.is_dgl(): + feat = 'feat' + label = 'label' +else: + feat = 'x' + label = 'y' + +class GCN(torch.nn.Module): + def __init__(self, in_channels, hidden_channels, out_channels, num_layers, + dropout): + super(GCN, self).__init__() + + self.convs = torch.nn.ModuleList() + self.convs.append(GCNConv(in_channels, hidden_channels, cached=True)) + self.bns = torch.nn.ModuleList() + self.bns.append(torch.nn.BatchNorm1d(hidden_channels)) + for _ in range(num_layers - 2): + self.convs.append( + GCNConv(hidden_channels, hidden_channels, cached=True)) + self.bns.append(torch.nn.BatchNorm1d(hidden_channels)) + self.convs.append(GCNConv(hidden_channels, out_channels, cached=True)) + + self.dropout = dropout + + def reset_parameters(self): + for conv in self.convs: + conv.reset_parameters() + for bn in self.bns: + bn.reset_parameters() + + def forward(self, x, adj_t): + for i, conv in enumerate(self.convs[:-1]): + x = conv(x, adj_t) + x = self.bns[i](x) + x = F.relu(x) + x = F.dropout(x, p=self.dropout, training=self.training) + x = self.convs[-1](x, adj_t) + return x.log_softmax(dim=-1) + + +class SAGE(torch.nn.Module): + def __init__(self, in_channels, hidden_channels, out_channels, num_layers, + dropout): + super(SAGE, self).__init__() + + self.convs = torch.nn.ModuleList() + self.convs.append(SAGEConv(in_channels, hidden_channels)) + self.bns = torch.nn.ModuleList() + self.bns.append(torch.nn.BatchNorm1d(hidden_channels)) + for _ in range(num_layers - 2): + self.convs.append(SAGEConv(hidden_channels, hidden_channels)) + self.bns.append(torch.nn.BatchNorm1d(hidden_channels)) + self.convs.append(SAGEConv(hidden_channels, out_channels)) + + self.dropout = dropout + + def reset_parameters(self): + for conv in self.convs: + conv.reset_parameters() + for bn in self.bns: + bn.reset_parameters() + + def forward(self, x, adj_t): + for i, conv in enumerate(self.convs[:-1]): + x = conv(x, adj_t) + x = self.bns[i](x) + x = F.relu(x) + x = F.dropout(x, p=self.dropout, training=self.training) + x = self.convs[-1](x, adj_t) + return x.log_softmax(dim=-1) + + +def train(model, x, y, edge_index, train_idx, optimizer): + model.train() + optimizer.zero_grad() + out = model(x, edge_index)[train_idx] + loss = F.nll_loss(out, y[train_idx]) + loss.backward() + optimizer.step() + return loss.item() + + +@torch.no_grad() +def test(model, x, y, edge_index, split_idx, evaluator): + model.eval() + out = model(x, edge_index) + y_pred = out.argmax(dim=-1, keepdim=True) + + train_acc = evaluator.eval({ + 'y_true': y[split_idx['train']].view(-1, 1), + 'y_pred': y_pred[split_idx['train']], + })['acc'] + valid_acc = evaluator.eval({ + 'y_true': y[split_idx['valid']].view(-1, 1), + 'y_pred': y_pred[split_idx['valid']], + })['acc'] + test_acc = evaluator.eval({ + 'y_true': y[split_idx['test']].view(-1, 1), + 'y_pred': y_pred[split_idx['test']], + })['acc'] + + return train_acc, valid_acc, test_acc + +class Node: + def __init__(self, a, b): + self.a = a + self.b = b + + def __le__(self, other): + return self.a <= other.a + + def __lt__(self, other): + if self.a < other.a: + return True + elif self.a == other.a: + return self.b < other.b + else: + return False + +def main(): + parser = argparse.ArgumentParser(description='OGBN-Arxiv (GNN)') + parser.add_argument('--device', type=int, default=0) + parser.add_argument('--log_steps', type=int, default=1) + parser.add_argument('--use_sage', action='store_true') + parser.add_argument('--num_layers', type=int, default=3) + parser.add_argument('--hidden_channels', type=int, default=256) + parser.add_argument('--dropout', type=float, default=0.5) + parser.add_argument('--lr', type=float, default=0.01) + parser.add_argument('--epochs', type=int, default=500) + parser.add_argument('--runs', type=int, default=10) + args = parser.parse_args() + print(args) + + device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' + device = torch.device(device) + # print(oedge_index) + + + dataset = build_dataset_from_name('ogbn_arxiv', path='./dataset/') + + data = dataset[0] + x = data.nodes.data[feat].to(device) + y = data.nodes.data[label].to(device) + edge_index = data.edges.connections.to(device) + # edge_index = data_transfer(edge_index, row, col) + print(edge_index) + # print(edge_index.shape) + + train_mask = data.nodes.data['train_mask'] + val_mask = data.nodes.data['val_mask'] + test_mask = data.nodes.data['test_mask'] + split_idx = { + 'train': train_mask, + 'valid': val_mask, + 'test': test_mask + } + + # split_idx = dataset.get_idx_split() + train_idx = split_idx['train'].to(device) + labels = dataset[0].nodes.data[label] + num_classes = len(np.unique(labels.numpy())) + + if args.use_sage: + model = SAGE(data.num_features, args.hidden_channels, + dataset.num_classes, args.num_layers, + args.dropout).to(device) + else: + model = GCN(dataset[0].nodes.data[feat].size(1), args.hidden_channels, + num_classes, args.num_layers, + args.dropout).to(device) + + evaluator = Evaluator(name='ogbn-arxiv') + # logger = Logger(args.runs, args) + best_accs = [] + for run in range(args.runs): + model.reset_parameters() + optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) + best_valid = 0.0 + best_test = 0.0 + for epoch in range(1, 1 + args.epochs): + loss = train(model, x, y, edge_index, train_idx, optimizer) + result = test(model, x, y, edge_index, split_idx, evaluator) + # logger.add_result(run, result) + + if epoch % args.log_steps == 0: + train_acc, valid_acc, test_acc = result + print(f'Run: {run + 1:02d}, ' + f'Epoch: {epoch:02d}, ' + f'Loss: {loss:.4f}, ' + f'Train: {100 * train_acc:.2f}%, ' + f'Valid: {100 * valid_acc:.2f}% ' + f'Test: {100 * test_acc:.2f}%') + if valid_acc > best_valid: + best_valid = valid_acc + best_test = test_acc + best_accs.append(best_test) + print(best_accs) + print(np.mean(best_accs)) + print(np.std(best_accs)) + + # logger.print_statistics(run) + # logger.print_statistics() + + +if __name__ == "__main__": + main() \ No newline at end of file From bfa1d9c8c3b19dc6fabbd6517390304ee3be4dc4 Mon Sep 17 00:00:00 2001 From: defineZYP <953726616@qq.com> Date: Wed, 30 Nov 2022 22:48:33 +0800 Subject: [PATCH 05/10] add example config file of ogb node classification --- configs/nodeclf_gcn_benchmark_ogb.yml | 59 +++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 configs/nodeclf_gcn_benchmark_ogb.yml diff --git a/configs/nodeclf_gcn_benchmark_ogb.yml b/configs/nodeclf_gcn_benchmark_ogb.yml new file mode 100644 index 0000000..c9aaa1a --- /dev/null +++ b/configs/nodeclf_gcn_benchmark_ogb.yml @@ -0,0 +1,59 @@ +ensemble: + name: null +feature: +- name: NormalizeFeatures +hpo: + max_evals: 10 + name: random +models: +- hp_space: + - feasiblePoints: 2,3 + parameterName: num_layers + type: DISCRETE + - cutFunc: lambda x:x[0] - 1 + cutPara: + - num_layers + length: 2 + maxValue: + - 256 + - 256 + minValue: + - 256 + - 256 + numericalType: INTEGER + parameterName: hidden + scalingType: LOG + type: NUMERICAL_LIST + - maxValue: 0.505 + minValue: 0.495 + parameterName: dropout + scalingType: LINEAR + type: DOUBLE + - feasiblePoints: + - leaky_relu + - relu + parameterName: act + type: CATEGORICAL + name: gcn-model +trainer: + hp_space: + - maxValue: 500 + minValue: 500 + parameterName: max_epoch + scalingType: LINEAR + type: INTEGER + - maxValue: 500 + minValue: 500 + parameterName: early_stopping_round + scalingType: LINEAR + type: INTEGER + - maxValue: 0.0105 + minValue: 0.0095 + parameterName: lr + scalingType: LOG + type: DOUBLE + - maxValue: 0.0000001 + minValue: 0.00000001 + parameterName: weight_decay + scalingType: LOG + type: DOUBLE From e936f0fd391b83830145f2ce3a4545eefcdc94fe Mon Sep 17 00:00:00 2001 From: defineZYP <953726616@qq.com> Date: Wed, 30 Nov 2022 23:40:23 +0800 Subject: [PATCH 06/10] fix warning --- autogl/datasets/_ogb.py | 2 +- examples/nodeclf_ogb.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/autogl/datasets/_ogb.py b/autogl/datasets/_ogb.py index 7192a0c..0679b30 100644 --- a/autogl/datasets/_ogb.py +++ b/autogl/datasets/_ogb.py @@ -39,7 +39,7 @@ class _OGBNDatasetUtil(_OGBDatasetUtil): edge_index = SparseTensor(row=torch.tensor(edge_index[0]), col=torch.tensor(edge_index[1]), value=edge_feat, sparse_sizes=(num_nodes, num_nodes)) edge_index = edge_index.to_symmetric() row, col, _ = edge_index.coo() - edge_index = [row.cpu().detach().numpy(), col.cpu().detach().numpy()] + edge_index = np.array([row.cpu().detach().numpy(), col.cpu().detach().numpy()]) homogeneous_static_graph: GeneralStaticGraph = ( GeneralStaticGraphGenerator.create_homogeneous_static_graph( dict([ diff --git a/examples/nodeclf_ogb.py b/examples/nodeclf_ogb.py index 3fbac6a..bb8a34e 100644 --- a/examples/nodeclf_ogb.py +++ b/examples/nodeclf_ogb.py @@ -174,8 +174,8 @@ def main(): num_classes = len(np.unique(labels.numpy())) if args.use_sage: - model = SAGE(data.num_features, args.hidden_channels, - dataset.num_classes, args.num_layers, + model = SAGE(dataset[0].nodes.data[feat].size(1), args.hidden_channels, + num_classes, args.num_layers, args.dropout).to(device) else: model = GCN(dataset[0].nodes.data[feat].size(1), args.hidden_channels, From 39594665e729d75069ac87d40d8c97a1c54c6a21 Mon Sep 17 00:00:00 2001 From: defineZYP <953726616@qq.com> Date: Thu, 1 Dec 2022 09:44:50 +0800 Subject: [PATCH 07/10] fix bug --- autogl/datasets/_ogb.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/autogl/datasets/_ogb.py b/autogl/datasets/_ogb.py index 0679b30..52ec574 100644 --- a/autogl/datasets/_ogb.py +++ b/autogl/datasets/_ogb.py @@ -34,8 +34,10 @@ class _OGBNDatasetUtil(_OGBDatasetUtil): ) -> GeneralStaticGraph: # TODO edge_index = ogbn_data['edge_index'] - num_nodes = ogbn_data['node_feat'].shape[0] + num_nodes = ogbn_data['num_nodes'] edge_feat = ogbn_data['edge_feat'] + if edge_feat is not None: + edge_feat = torch.tensor(edge_feat) edge_index = SparseTensor(row=torch.tensor(edge_index[0]), col=torch.tensor(edge_index[1]), value=edge_feat, sparse_sizes=(num_nodes, num_nodes)) edge_index = edge_index.to_symmetric() row, col, _ = edge_index.coo() From 2a20d002e2b2d59de5f07a7097096829d9b7e62c Mon Sep 17 00:00:00 2001 From: defineZYP <953726616@qq.com> Date: Thu, 1 Dec 2022 10:36:55 +0800 Subject: [PATCH 08/10] fix bug --- autogl/datasets/_ogb.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/autogl/datasets/_ogb.py b/autogl/datasets/_ogb.py index 52ec574..b55bcbf 100644 --- a/autogl/datasets/_ogb.py +++ b/autogl/datasets/_ogb.py @@ -40,8 +40,9 @@ class _OGBNDatasetUtil(_OGBDatasetUtil): edge_feat = torch.tensor(edge_feat) edge_index = SparseTensor(row=torch.tensor(edge_index[0]), col=torch.tensor(edge_index[1]), value=edge_feat, sparse_sizes=(num_nodes, num_nodes)) edge_index = edge_index.to_symmetric() - row, col, _ = edge_index.coo() + row, col, value = edge_index.coo() edge_index = np.array([row.cpu().detach().numpy(), col.cpu().detach().numpy()]) + ogbn_data['edge_feat'] = value.cpu().detach().numpy() homogeneous_static_graph: GeneralStaticGraph = ( GeneralStaticGraphGenerator.create_homogeneous_static_graph( dict([ From 162ec53f4ba7111560795cb9550f528f0a13ba3b Mon Sep 17 00:00:00 2001 From: defineZYP <953726616@qq.com> Date: Thu, 1 Dec 2022 10:42:01 +0800 Subject: [PATCH 09/10] fix bug --- autogl/datasets/_ogb.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/autogl/datasets/_ogb.py b/autogl/datasets/_ogb.py index b55bcbf..9fc39c4 100644 --- a/autogl/datasets/_ogb.py +++ b/autogl/datasets/_ogb.py @@ -39,10 +39,11 @@ class _OGBNDatasetUtil(_OGBDatasetUtil): if edge_feat is not None: edge_feat = torch.tensor(edge_feat) edge_index = SparseTensor(row=torch.tensor(edge_index[0]), col=torch.tensor(edge_index[1]), value=edge_feat, sparse_sizes=(num_nodes, num_nodes)) + _, _, value = edge_index.coo() + ogbn_data['edge_feat'] = value.cpu().detach().numpy() edge_index = edge_index.to_symmetric() - row, col, value = edge_index.coo() + row, col, _ = edge_index.coo() edge_index = np.array([row.cpu().detach().numpy(), col.cpu().detach().numpy()]) - ogbn_data['edge_feat'] = value.cpu().detach().numpy() homogeneous_static_graph: GeneralStaticGraph = ( GeneralStaticGraphGenerator.create_homogeneous_static_graph( dict([ From 6c7cfc46de7403892be96cf423a8416e86bd3107 Mon Sep 17 00:00:00 2001 From: defineZYP <953726616@qq.com> Date: Thu, 1 Dec 2022 11:53:07 +0800 Subject: [PATCH 10/10] add example of ogbn-proteins --- examples/nodeclf_ogb.py | 7 +- examples/nodeclf_ogb_proteins.py | 188 +++++++++++++++++++++++++++++++ 2 files changed, 189 insertions(+), 6 deletions(-) create mode 100644 examples/nodeclf_ogb_proteins.py diff --git a/examples/nodeclf_ogb.py b/examples/nodeclf_ogb.py index bb8a34e..183eb1f 100644 --- a/examples/nodeclf_ogb.py +++ b/examples/nodeclf_ogb.py @@ -183,7 +183,7 @@ def main(): args.dropout).to(device) evaluator = Evaluator(name='ogbn-arxiv') - # logger = Logger(args.runs, args) + best_accs = [] for run in range(args.runs): model.reset_parameters() @@ -193,7 +193,6 @@ def main(): for epoch in range(1, 1 + args.epochs): loss = train(model, x, y, edge_index, train_idx, optimizer) result = test(model, x, y, edge_index, split_idx, evaluator) - # logger.add_result(run, result) if epoch % args.log_steps == 0: train_acc, valid_acc, test_acc = result @@ -211,9 +210,5 @@ def main(): print(np.mean(best_accs)) print(np.std(best_accs)) - # logger.print_statistics(run) - # logger.print_statistics() - - if __name__ == "__main__": main() \ No newline at end of file diff --git a/examples/nodeclf_ogb_proteins.py b/examples/nodeclf_ogb_proteins.py new file mode 100644 index 0000000..e1a8646 --- /dev/null +++ b/examples/nodeclf_ogb_proteins.py @@ -0,0 +1,188 @@ +import argparse +import numpy as np +import torch +import torch.nn.functional as F + +import torch_geometric.transforms as T +from torch_sparse import SparseTensor +from torch_geometric.nn import GCNConv, SAGEConv + +from ogb.nodeproppred import PygNodePropPredDataset, Evaluator +from autogl import backend +from autogl.datasets import build_dataset_from_name + +if backend.DependentBackend.is_dgl(): + ylabel = 'label' +else: + ylabel = 'y' + +class GCN(torch.nn.Module): + def __init__(self, in_channels, hidden_channels, out_channels, num_layers, + dropout): + super(GCN, self).__init__() + + self.convs = torch.nn.ModuleList() + self.convs.append( + GCNConv(in_channels, hidden_channels, normalize=False)) + for _ in range(num_layers - 2): + self.convs.append( + GCNConv(hidden_channels, hidden_channels, normalize=False)) + self.convs.append( + GCNConv(hidden_channels, out_channels, normalize=False)) + + self.dropout = dropout + + def reset_parameters(self): + for conv in self.convs: + conv.reset_parameters() + + def forward(self, x, adj_t): + for conv in self.convs[:-1]: + x = conv(x, adj_t) + x = F.relu(x) + x = F.dropout(x, p=self.dropout, training=self.training) + x = self.convs[-1](x, adj_t) + return x + + +class SAGE(torch.nn.Module): + def __init__(self, in_channels, hidden_channels, out_channels, num_layers, + dropout): + super(SAGE, self).__init__() + + self.convs = torch.nn.ModuleList() + self.convs.append(SAGEConv(in_channels, hidden_channels)) + for _ in range(num_layers - 2): + self.convs.append(SAGEConv(hidden_channels, hidden_channels)) + self.convs.append(SAGEConv(hidden_channels, out_channels)) + + self.dropout = dropout + + def reset_parameters(self): + for conv in self.convs: + conv.reset_parameters() + + def forward(self, x, adj_t): + for conv in self.convs[:-1]: + x = conv(x, adj_t) + x = F.relu(x) + x = F.dropout(x, p=self.dropout, training=self.training) + x = self.convs[-1](x, adj_t) + return x + + +def train(model, x, y, edge_index, train_idx, optimizer): + model.train() + criterion = torch.nn.BCEWithLogitsLoss() + + optimizer.zero_grad() + out = model(x, edge_index)[train_idx] + loss = criterion(out, y[train_idx].to(torch.float)) + loss.backward() + optimizer.step() + + return loss.item() + + +@torch.no_grad() +def test(model, x, y, edge_index, split_idx, evaluator): + model.eval() + + y_pred = model(x, edge_index) + + train_rocauc = evaluator.eval({ + 'y_true': y[split_idx['train']], + 'y_pred': y_pred[split_idx['train']], + })['rocauc'] + valid_rocauc = evaluator.eval({ + 'y_true': y[split_idx['valid']], + 'y_pred': y_pred[split_idx['valid']], + })['rocauc'] + test_rocauc = evaluator.eval({ + 'y_true': y[split_idx['test']], + 'y_pred': y_pred[split_idx['test']], + })['rocauc'] + + return train_rocauc, valid_rocauc, test_rocauc + + +def main(): + parser = argparse.ArgumentParser(description='OGBN-Proteins (GNN)') + parser.add_argument('--device', type=int, default=0) + parser.add_argument('--log_steps', type=int, default=1) + parser.add_argument('--use_sage', action='store_true') + parser.add_argument('--num_layers', type=int, default=3) + parser.add_argument('--hidden_channels', type=int, default=256) + parser.add_argument('--dropout', type=float, default=0.0) + parser.add_argument('--lr', type=float, default=0.01) + parser.add_argument('--epochs', type=int, default=1000) + parser.add_argument('--eval_steps', type=int, default=5) + parser.add_argument('--runs', type=int, default=10) + args = parser.parse_args() + print(args) + + device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' + device = torch.device(device) + + autogl_dataset = build_dataset_from_name('ogbn-proteins') + data = autogl_dataset[0] + y = data.nodes.data[ylabel].to(device) + num_nodes = data.nodes.data['species'].shape[0] + edge_index = data.edges.connections + row = edge_index[0].type(torch.long).to(device) + col = edge_index[1].type(torch.long).to(device) + edge_feat = data.edges.data['edge_feat'].to(device) + edge_index = SparseTensor(row=row, col=col, value=edge_feat, sparse_sizes=(num_nodes, num_nodes)) + x = edge_index.mean(dim=1).to(device) + edge_index.set_value_(None) + + train_mask = data.nodes.data['train_mask'] + val_mask = data.nodes.data['val_mask'] + test_mask = data.nodes.data['test_mask'] + split_idx = { + 'train': train_mask, + 'valid': val_mask, + 'test': test_mask + } + labels = data.nodes.data[ylabel] + num_classes = len(np.unique(labels.numpy())) + train_idx = split_idx['train'] + + if args.use_sage: + model = SAGE(x.size(1), args.hidden_channels, 112, + args.num_layers, args.dropout).to(device) + else: + model = GCN(x.size(1), args.hidden_channels, 112, + args.num_layers, args.dropout).to(device) + + # Pre-compute GCN normalization. + adj_t = edge_index.set_diag() + deg = adj_t.sum(dim=1).to(torch.float) + deg_inv_sqrt = deg.pow(-0.5) + deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0 + adj_t = deg_inv_sqrt.view(-1, 1) * adj_t * deg_inv_sqrt.view(1, -1) + edge_index = adj_t + + evaluator = Evaluator(name='ogbn-proteins') + + for run in range(args.runs): + model.reset_parameters() + optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) + for epoch in range(1, 1 + args.epochs): + loss = train(model, x, y, edge_index, train_idx, optimizer) + + if epoch % args.eval_steps == 0: + result = test(model, x, y, edge_index, split_idx, evaluator) + + if epoch % args.log_steps == 0: + train_rocauc, valid_rocauc, test_rocauc = result + print(f'Run: {run + 1:02d}, ' + f'Epoch: {epoch:02d}, ' + f'Loss: {loss:.4f}, ' + f'Train: {100 * train_rocauc:.2f}%, ' + f'Valid: {100 * valid_rocauc:.2f}% ' + f'Test: {100 * test_rocauc:.2f}%') + + +if __name__ == "__main__": + main() \ No newline at end of file