ogb dataset examplesdevelop/0.4/predevelop
| @@ -3,7 +3,7 @@ name: Bug report | |||
| about: Create a report to help us improve | |||
| title: "[BUG]" | |||
| labels: bug | |||
| assignees: Frozenmad | |||
| assignees: general502570 | |||
| --- | |||
| @@ -3,7 +3,7 @@ name: Feature request | |||
| about: Suggest an idea for this project | |||
| title: "[FEATURE]" | |||
| labels: enhancement | |||
| assignees: Frozenmad | |||
| assignees: general502570 | |||
| --- | |||
| @@ -14,6 +14,7 @@ Feel free to open <a href="https://github.com/THUMNLab/AutoGL/issues">issues</a> | |||
| ## News! | |||
| - 2022.4.19 New version v0.3.1!We have released Chinese tutorial for the first time! | |||
| - 2021.12.31 New Version! v0.3.0-pre is here! | |||
| - AutoGL now support [__Deep Graph Library (DGL)__](https://www.dgl.ai/) backend to be interface-friendly for DGL users! All the homogeneous node classification task, link prediction task, and graph classification task are currently supported under DGL backend. AutoGL is also compatible with PyG 2.0 now. | |||
| - The __heterogeneous__ node classification tasks are now supported! See [hetero tutorial](http://mn.cs.tsinghua.edu.cn/autogl/documentation/docfile/tutorial/t_hetero_node_clf.html) for more details. | |||
| @@ -13,6 +13,7 @@ | |||
| ## 最新消息 | |||
| - 2022.4.19 v0.3.1版本更新!首次更新中文教程! | |||
| - 2021.12.31 v0.3.0-pre版本更新! | |||
| - 智图目前支持[__Deep Graph Library (DGL)__](https://www.dgl.ai/)作为后端,以方便DGL的用户使用。目前在DGL后端已经支持同构图的节点分类、链接预测以及图分类等任务。智图现在也可兼容PyG 2.0版本。 | |||
| - 智图可以支持__异构图__节点分类任务!详情请参考[异构图教程](http://mn.cs.tsinghua.edu.cn/autogl/documentation/docfile/tutorial/t_hetero_node_clf.html)。 | |||
| @@ -16,4 +16,4 @@ from .module import ( | |||
| train, | |||
| ) | |||
| __version__ = "0.3.0-pre" | |||
| __version__ = "0.3.1" | |||
| @@ -5,6 +5,8 @@ from ogb.nodeproppred import NodePropPredDataset | |||
| from ogb.linkproppred import LinkPropPredDataset | |||
| from ogb.graphproppred import GraphPropPredDataset | |||
| from torch_sparse import SparseTensor | |||
| from autogl import backend as _backend | |||
| from autogl.data import InMemoryStaticGraphSet | |||
| from autogl.data.graph import ( | |||
| @@ -30,13 +32,25 @@ class _OGBNDatasetUtil(_OGBDatasetUtil): | |||
| edges_data_key_mapping: _typing.Optional[_typing.Mapping[str, str]] = ..., | |||
| graph_data_key_mapping: _typing.Optional[_typing.Mapping[str, str]] = ... | |||
| ) -> GeneralStaticGraph: | |||
| # TODO | |||
| edge_index = ogbn_data['edge_index'] | |||
| num_nodes = ogbn_data['num_nodes'] | |||
| edge_feat = ogbn_data['edge_feat'] | |||
| if edge_feat is not None: | |||
| edge_feat = torch.tensor(edge_feat) | |||
| edge_index = SparseTensor(row=torch.tensor(edge_index[0]), col=torch.tensor(edge_index[1]), value=edge_feat, sparse_sizes=(num_nodes, num_nodes)) | |||
| _, _, value = edge_index.coo() | |||
| ogbn_data['edge_feat'] = value.cpu().detach().numpy() | |||
| edge_index = edge_index.to_symmetric() | |||
| row, col, _ = edge_index.coo() | |||
| edge_index = np.array([row.cpu().detach().numpy(), col.cpu().detach().numpy()]) | |||
| homogeneous_static_graph: GeneralStaticGraph = ( | |||
| GeneralStaticGraphGenerator.create_homogeneous_static_graph( | |||
| dict([ | |||
| (target_key, torch.from_numpy(ogbn_data[source_key])) | |||
| for source_key, target_key in nodes_data_key_mapping.items() | |||
| ]), | |||
| torch.from_numpy(ogbn_data['edge_index']), | |||
| torch.tensor(edge_index), | |||
| dict([ | |||
| (target_key, torch.from_numpy(ogbn_data[source_key])) | |||
| for source_key, target_key in edges_data_key_mapping.items() | |||
| @@ -0,0 +1,59 @@ | |||
| ensemble: | |||
| name: null | |||
| feature: | |||
| - name: NormalizeFeatures | |||
| hpo: | |||
| max_evals: 10 | |||
| name: random | |||
| models: | |||
| - hp_space: | |||
| - feasiblePoints: 2,3 | |||
| parameterName: num_layers | |||
| type: DISCRETE | |||
| - cutFunc: lambda x:x[0] - 1 | |||
| cutPara: | |||
| - num_layers | |||
| length: 2 | |||
| maxValue: | |||
| - 256 | |||
| - 256 | |||
| minValue: | |||
| - 256 | |||
| - 256 | |||
| numericalType: INTEGER | |||
| parameterName: hidden | |||
| scalingType: LOG | |||
| type: NUMERICAL_LIST | |||
| - maxValue: 0.505 | |||
| minValue: 0.495 | |||
| parameterName: dropout | |||
| scalingType: LINEAR | |||
| type: DOUBLE | |||
| - feasiblePoints: | |||
| - leaky_relu | |||
| - relu | |||
| parameterName: act | |||
| type: CATEGORICAL | |||
| name: gcn-model | |||
| trainer: | |||
| hp_space: | |||
| - maxValue: 500 | |||
| minValue: 500 | |||
| parameterName: max_epoch | |||
| scalingType: LINEAR | |||
| type: INTEGER | |||
| - maxValue: 500 | |||
| minValue: 500 | |||
| parameterName: early_stopping_round | |||
| scalingType: LINEAR | |||
| type: INTEGER | |||
| - maxValue: 0.0105 | |||
| minValue: 0.0095 | |||
| parameterName: lr | |||
| scalingType: LOG | |||
| type: DOUBLE | |||
| - maxValue: 0.0000001 | |||
| minValue: 0.00000001 | |||
| parameterName: weight_decay | |||
| scalingType: LOG | |||
| type: DOUBLE | |||
| @@ -0,0 +1,146 @@ | |||
| from torch_geometric.data import DataLoader | |||
| import torch.optim as optim | |||
| from tqdm import tqdm | |||
| from ogb.graphproppred import Evaluator | |||
| import random | |||
| import torch | |||
| import numpy as np | |||
| from autogl.datasets import build_dataset_from_name | |||
| from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter | |||
| from ogb_gnn import GNN | |||
| from autogl.backend import DependentBackend | |||
| from torch_geometric.data import Data | |||
| backend = DependentBackend.get_backend_name() | |||
| cls_criterion = torch.nn.BCEWithLogitsLoss() | |||
| reg_criterion = torch.nn.MSELoss() | |||
| def train(model, device, loader, optimizer, task_type): | |||
| model.train() | |||
| for step, batch in enumerate(tqdm(loader, desc="Iteration")): | |||
| batch = batch.to(device) | |||
| if batch.x.shape[0] == 1 or batch.batch[-1] == 0: | |||
| pass | |||
| else: | |||
| pred = model(batch) | |||
| optimizer.zero_grad() | |||
| is_labeled = batch.y == batch.y | |||
| loss = cls_criterion(pred.to(torch.float32)[is_labeled], batch.y.to(torch.float32)[is_labeled]) | |||
| loss.backward() | |||
| optimizer.step() | |||
| def eval(model, device, loader, evaluator): | |||
| model.eval() | |||
| y_true = [] | |||
| y_pred = [] | |||
| for step, batch in enumerate(tqdm(loader, desc="Iteration")): | |||
| batch = batch.to(device) | |||
| if batch.x.shape[0] == 1: | |||
| pass | |||
| else: | |||
| with torch.no_grad(): | |||
| pred = model(batch) | |||
| y_true.append(batch.y.view(pred.shape).detach().cpu()) | |||
| y_pred.append(pred.detach().cpu()) | |||
| y_true = torch.cat(y_true, dim = 0).numpy() | |||
| y_pred = torch.cat(y_pred, dim = 0).numpy() | |||
| input_dict = {"y_true": y_true, "y_pred": y_pred} | |||
| return evaluator.eval(input_dict) | |||
| def trans(dataset): | |||
| ret = [] | |||
| for i in range(len(dataset)): | |||
| x = dataset[i].nodes.data['x'] | |||
| y = dataset[i].data['y'].view(-1, 1) | |||
| edge_index = dataset[i].edges.connections | |||
| edge_attr = dataset[i].edges.data['edge_feat'] | |||
| data = Data(x=x, y=y, edge_index=edge_index, edge_attr=edge_attr) | |||
| ret.append(data) | |||
| return ret | |||
| if __name__ == "__main__": | |||
| parser = ArgumentParser( | |||
| "auto graph classification", formatter_class=ArgumentDefaultsHelpFormatter | |||
| ) | |||
| parser.add_argument( | |||
| "--dataset", | |||
| default="ogbg-molhiv", | |||
| type=str, | |||
| help="graph classification dataset", | |||
| choices=["mutag", "imdb-b", "imdb-m", "proteins", "collab", "ogbg-molbace"], | |||
| ) | |||
| parser.add_argument( | |||
| "--configs", default="../configs/graphclf_gin_benchmark.yml", help="config files" | |||
| ) | |||
| parser.add_argument("--device", type=int, default=0, help="device to run on, -1 means cpu") | |||
| parser.add_argument("--seed", type=int, default=0, help="random seed") | |||
| args = parser.parse_args() | |||
| if args.device == -1: | |||
| args.device = "cpu" | |||
| if torch.cuda.is_available() and args.device != "cpu": | |||
| torch.cuda.set_device(args.device) | |||
| seed = args.seed | |||
| random.seed(seed) | |||
| np.random.seed(seed) | |||
| torch.manual_seed(seed) | |||
| if torch.cuda.is_available(): | |||
| torch.cuda.manual_seed(seed) | |||
| torch.backends.cudnn.deterministic = True | |||
| torch.backends.cudnn.benchmark = False | |||
| dataset = build_dataset_from_name(args.dataset) | |||
| model = GNN(num_tasks=1, gnn_type = 'gcn').to(args.device) | |||
| evaluator = Evaluator(args.dataset) | |||
| train_dataset = trans(dataset.train_split) | |||
| val_dataset = trans(dataset.val_split) | |||
| test_dataset = trans(dataset.test_split) | |||
| train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, | |||
| num_workers=0) | |||
| valid_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, | |||
| num_workers=0) | |||
| test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, | |||
| num_workers=0) | |||
| optimizer = optim.Adam(model.parameters(), lr=0.001) | |||
| valid_curve = [] | |||
| test_curve = [] | |||
| train_curve = [] | |||
| device = torch.device("cuda:0") | |||
| for epoch in range(1, 100 + 1): | |||
| print("=====Epoch {}".format(epoch)) | |||
| print('Training...') | |||
| train(model, device, train_loader, optimizer, 'binary classification') | |||
| print('Evaluating...') | |||
| train_perf = eval(model, device, train_loader, evaluator) | |||
| valid_perf = eval(model, device, valid_loader, evaluator) | |||
| test_perf = eval(model, device, test_loader, evaluator) | |||
| print({'Train': train_perf, 'Validation': valid_perf, 'Test': test_perf}) | |||
| train_curve.append(train_perf['rocauc']) | |||
| valid_curve.append(valid_perf['rocauc']) | |||
| test_curve.append(test_perf['rocauc']) | |||
| best_val_epoch = np.argmax(np.array(valid_curve)) | |||
| best_train = max(train_curve) | |||
| print('Finished training!') | |||
| print('Best validation score: {}'.format(valid_curve[best_val_epoch])) | |||
| print('Test score: {}'.format(test_curve[best_val_epoch])) | |||
| @@ -0,0 +1,214 @@ | |||
| import os | |||
| import tqdm | |||
| import argparse | |||
| import numpy as np | |||
| import torch | |||
| import torch.nn.functional as F | |||
| from torch_geometric.nn import GCNConv, SAGEConv | |||
| from ogb.nodeproppred import Evaluator | |||
| from autogl.datasets import build_dataset_from_name | |||
| from autogl import backend | |||
| if backend.DependentBackend.is_dgl(): | |||
| feat = 'feat' | |||
| label = 'label' | |||
| else: | |||
| feat = 'x' | |||
| label = 'y' | |||
| class GCN(torch.nn.Module): | |||
| def __init__(self, in_channels, hidden_channels, out_channels, num_layers, | |||
| dropout): | |||
| super(GCN, self).__init__() | |||
| self.convs = torch.nn.ModuleList() | |||
| self.convs.append(GCNConv(in_channels, hidden_channels, cached=True)) | |||
| self.bns = torch.nn.ModuleList() | |||
| self.bns.append(torch.nn.BatchNorm1d(hidden_channels)) | |||
| for _ in range(num_layers - 2): | |||
| self.convs.append( | |||
| GCNConv(hidden_channels, hidden_channels, cached=True)) | |||
| self.bns.append(torch.nn.BatchNorm1d(hidden_channels)) | |||
| self.convs.append(GCNConv(hidden_channels, out_channels, cached=True)) | |||
| self.dropout = dropout | |||
| def reset_parameters(self): | |||
| for conv in self.convs: | |||
| conv.reset_parameters() | |||
| for bn in self.bns: | |||
| bn.reset_parameters() | |||
| def forward(self, x, adj_t): | |||
| for i, conv in enumerate(self.convs[:-1]): | |||
| x = conv(x, adj_t) | |||
| x = self.bns[i](x) | |||
| x = F.relu(x) | |||
| x = F.dropout(x, p=self.dropout, training=self.training) | |||
| x = self.convs[-1](x, adj_t) | |||
| return x.log_softmax(dim=-1) | |||
| class SAGE(torch.nn.Module): | |||
| def __init__(self, in_channels, hidden_channels, out_channels, num_layers, | |||
| dropout): | |||
| super(SAGE, self).__init__() | |||
| self.convs = torch.nn.ModuleList() | |||
| self.convs.append(SAGEConv(in_channels, hidden_channels)) | |||
| self.bns = torch.nn.ModuleList() | |||
| self.bns.append(torch.nn.BatchNorm1d(hidden_channels)) | |||
| for _ in range(num_layers - 2): | |||
| self.convs.append(SAGEConv(hidden_channels, hidden_channels)) | |||
| self.bns.append(torch.nn.BatchNorm1d(hidden_channels)) | |||
| self.convs.append(SAGEConv(hidden_channels, out_channels)) | |||
| self.dropout = dropout | |||
| def reset_parameters(self): | |||
| for conv in self.convs: | |||
| conv.reset_parameters() | |||
| for bn in self.bns: | |||
| bn.reset_parameters() | |||
| def forward(self, x, adj_t): | |||
| for i, conv in enumerate(self.convs[:-1]): | |||
| x = conv(x, adj_t) | |||
| x = self.bns[i](x) | |||
| x = F.relu(x) | |||
| x = F.dropout(x, p=self.dropout, training=self.training) | |||
| x = self.convs[-1](x, adj_t) | |||
| return x.log_softmax(dim=-1) | |||
| def train(model, x, y, edge_index, train_idx, optimizer): | |||
| model.train() | |||
| optimizer.zero_grad() | |||
| out = model(x, edge_index)[train_idx] | |||
| loss = F.nll_loss(out, y[train_idx]) | |||
| loss.backward() | |||
| optimizer.step() | |||
| return loss.item() | |||
| @torch.no_grad() | |||
| def test(model, x, y, edge_index, split_idx, evaluator): | |||
| model.eval() | |||
| out = model(x, edge_index) | |||
| y_pred = out.argmax(dim=-1, keepdim=True) | |||
| train_acc = evaluator.eval({ | |||
| 'y_true': y[split_idx['train']].view(-1, 1), | |||
| 'y_pred': y_pred[split_idx['train']], | |||
| })['acc'] | |||
| valid_acc = evaluator.eval({ | |||
| 'y_true': y[split_idx['valid']].view(-1, 1), | |||
| 'y_pred': y_pred[split_idx['valid']], | |||
| })['acc'] | |||
| test_acc = evaluator.eval({ | |||
| 'y_true': y[split_idx['test']].view(-1, 1), | |||
| 'y_pred': y_pred[split_idx['test']], | |||
| })['acc'] | |||
| return train_acc, valid_acc, test_acc | |||
| class Node: | |||
| def __init__(self, a, b): | |||
| self.a = a | |||
| self.b = b | |||
| def __le__(self, other): | |||
| return self.a <= other.a | |||
| def __lt__(self, other): | |||
| if self.a < other.a: | |||
| return True | |||
| elif self.a == other.a: | |||
| return self.b < other.b | |||
| else: | |||
| return False | |||
| def main(): | |||
| parser = argparse.ArgumentParser(description='OGBN-Arxiv (GNN)') | |||
| parser.add_argument('--device', type=int, default=0) | |||
| parser.add_argument('--log_steps', type=int, default=1) | |||
| parser.add_argument('--use_sage', action='store_true') | |||
| parser.add_argument('--num_layers', type=int, default=3) | |||
| parser.add_argument('--hidden_channels', type=int, default=256) | |||
| parser.add_argument('--dropout', type=float, default=0.5) | |||
| parser.add_argument('--lr', type=float, default=0.01) | |||
| parser.add_argument('--epochs', type=int, default=500) | |||
| parser.add_argument('--runs', type=int, default=10) | |||
| args = parser.parse_args() | |||
| print(args) | |||
| device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' | |||
| device = torch.device(device) | |||
| # print(oedge_index) | |||
| dataset = build_dataset_from_name('ogbn_arxiv', path='./dataset/') | |||
| data = dataset[0] | |||
| x = data.nodes.data[feat].to(device) | |||
| y = data.nodes.data[label].to(device) | |||
| edge_index = data.edges.connections.to(device) | |||
| # edge_index = data_transfer(edge_index, row, col) | |||
| print(edge_index) | |||
| # print(edge_index.shape) | |||
| train_mask = data.nodes.data['train_mask'] | |||
| val_mask = data.nodes.data['val_mask'] | |||
| test_mask = data.nodes.data['test_mask'] | |||
| split_idx = { | |||
| 'train': train_mask, | |||
| 'valid': val_mask, | |||
| 'test': test_mask | |||
| } | |||
| # split_idx = dataset.get_idx_split() | |||
| train_idx = split_idx['train'].to(device) | |||
| labels = dataset[0].nodes.data[label] | |||
| num_classes = len(np.unique(labels.numpy())) | |||
| if args.use_sage: | |||
| model = SAGE(dataset[0].nodes.data[feat].size(1), args.hidden_channels, | |||
| num_classes, args.num_layers, | |||
| args.dropout).to(device) | |||
| else: | |||
| model = GCN(dataset[0].nodes.data[feat].size(1), args.hidden_channels, | |||
| num_classes, args.num_layers, | |||
| args.dropout).to(device) | |||
| evaluator = Evaluator(name='ogbn-arxiv') | |||
| best_accs = [] | |||
| for run in range(args.runs): | |||
| model.reset_parameters() | |||
| optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) | |||
| best_valid = 0.0 | |||
| best_test = 0.0 | |||
| for epoch in range(1, 1 + args.epochs): | |||
| loss = train(model, x, y, edge_index, train_idx, optimizer) | |||
| result = test(model, x, y, edge_index, split_idx, evaluator) | |||
| if epoch % args.log_steps == 0: | |||
| train_acc, valid_acc, test_acc = result | |||
| print(f'Run: {run + 1:02d}, ' | |||
| f'Epoch: {epoch:02d}, ' | |||
| f'Loss: {loss:.4f}, ' | |||
| f'Train: {100 * train_acc:.2f}%, ' | |||
| f'Valid: {100 * valid_acc:.2f}% ' | |||
| f'Test: {100 * test_acc:.2f}%') | |||
| if valid_acc > best_valid: | |||
| best_valid = valid_acc | |||
| best_test = test_acc | |||
| best_accs.append(best_test) | |||
| print(best_accs) | |||
| print(np.mean(best_accs)) | |||
| print(np.std(best_accs)) | |||
| if __name__ == "__main__": | |||
| main() | |||
| @@ -0,0 +1,188 @@ | |||
| import argparse | |||
| import numpy as np | |||
| import torch | |||
| import torch.nn.functional as F | |||
| import torch_geometric.transforms as T | |||
| from torch_sparse import SparseTensor | |||
| from torch_geometric.nn import GCNConv, SAGEConv | |||
| from ogb.nodeproppred import PygNodePropPredDataset, Evaluator | |||
| from autogl import backend | |||
| from autogl.datasets import build_dataset_from_name | |||
| if backend.DependentBackend.is_dgl(): | |||
| ylabel = 'label' | |||
| else: | |||
| ylabel = 'y' | |||
| class GCN(torch.nn.Module): | |||
| def __init__(self, in_channels, hidden_channels, out_channels, num_layers, | |||
| dropout): | |||
| super(GCN, self).__init__() | |||
| self.convs = torch.nn.ModuleList() | |||
| self.convs.append( | |||
| GCNConv(in_channels, hidden_channels, normalize=False)) | |||
| for _ in range(num_layers - 2): | |||
| self.convs.append( | |||
| GCNConv(hidden_channels, hidden_channels, normalize=False)) | |||
| self.convs.append( | |||
| GCNConv(hidden_channels, out_channels, normalize=False)) | |||
| self.dropout = dropout | |||
| def reset_parameters(self): | |||
| for conv in self.convs: | |||
| conv.reset_parameters() | |||
| def forward(self, x, adj_t): | |||
| for conv in self.convs[:-1]: | |||
| x = conv(x, adj_t) | |||
| x = F.relu(x) | |||
| x = F.dropout(x, p=self.dropout, training=self.training) | |||
| x = self.convs[-1](x, adj_t) | |||
| return x | |||
| class SAGE(torch.nn.Module): | |||
| def __init__(self, in_channels, hidden_channels, out_channels, num_layers, | |||
| dropout): | |||
| super(SAGE, self).__init__() | |||
| self.convs = torch.nn.ModuleList() | |||
| self.convs.append(SAGEConv(in_channels, hidden_channels)) | |||
| for _ in range(num_layers - 2): | |||
| self.convs.append(SAGEConv(hidden_channels, hidden_channels)) | |||
| self.convs.append(SAGEConv(hidden_channels, out_channels)) | |||
| self.dropout = dropout | |||
| def reset_parameters(self): | |||
| for conv in self.convs: | |||
| conv.reset_parameters() | |||
| def forward(self, x, adj_t): | |||
| for conv in self.convs[:-1]: | |||
| x = conv(x, adj_t) | |||
| x = F.relu(x) | |||
| x = F.dropout(x, p=self.dropout, training=self.training) | |||
| x = self.convs[-1](x, adj_t) | |||
| return x | |||
| def train(model, x, y, edge_index, train_idx, optimizer): | |||
| model.train() | |||
| criterion = torch.nn.BCEWithLogitsLoss() | |||
| optimizer.zero_grad() | |||
| out = model(x, edge_index)[train_idx] | |||
| loss = criterion(out, y[train_idx].to(torch.float)) | |||
| loss.backward() | |||
| optimizer.step() | |||
| return loss.item() | |||
| @torch.no_grad() | |||
| def test(model, x, y, edge_index, split_idx, evaluator): | |||
| model.eval() | |||
| y_pred = model(x, edge_index) | |||
| train_rocauc = evaluator.eval({ | |||
| 'y_true': y[split_idx['train']], | |||
| 'y_pred': y_pred[split_idx['train']], | |||
| })['rocauc'] | |||
| valid_rocauc = evaluator.eval({ | |||
| 'y_true': y[split_idx['valid']], | |||
| 'y_pred': y_pred[split_idx['valid']], | |||
| })['rocauc'] | |||
| test_rocauc = evaluator.eval({ | |||
| 'y_true': y[split_idx['test']], | |||
| 'y_pred': y_pred[split_idx['test']], | |||
| })['rocauc'] | |||
| return train_rocauc, valid_rocauc, test_rocauc | |||
| def main(): | |||
| parser = argparse.ArgumentParser(description='OGBN-Proteins (GNN)') | |||
| parser.add_argument('--device', type=int, default=0) | |||
| parser.add_argument('--log_steps', type=int, default=1) | |||
| parser.add_argument('--use_sage', action='store_true') | |||
| parser.add_argument('--num_layers', type=int, default=3) | |||
| parser.add_argument('--hidden_channels', type=int, default=256) | |||
| parser.add_argument('--dropout', type=float, default=0.0) | |||
| parser.add_argument('--lr', type=float, default=0.01) | |||
| parser.add_argument('--epochs', type=int, default=1000) | |||
| parser.add_argument('--eval_steps', type=int, default=5) | |||
| parser.add_argument('--runs', type=int, default=10) | |||
| args = parser.parse_args() | |||
| print(args) | |||
| device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' | |||
| device = torch.device(device) | |||
| autogl_dataset = build_dataset_from_name('ogbn-proteins') | |||
| data = autogl_dataset[0] | |||
| y = data.nodes.data[ylabel].to(device) | |||
| num_nodes = data.nodes.data['species'].shape[0] | |||
| edge_index = data.edges.connections | |||
| row = edge_index[0].type(torch.long).to(device) | |||
| col = edge_index[1].type(torch.long).to(device) | |||
| edge_feat = data.edges.data['edge_feat'].to(device) | |||
| edge_index = SparseTensor(row=row, col=col, value=edge_feat, sparse_sizes=(num_nodes, num_nodes)) | |||
| x = edge_index.mean(dim=1).to(device) | |||
| edge_index.set_value_(None) | |||
| train_mask = data.nodes.data['train_mask'] | |||
| val_mask = data.nodes.data['val_mask'] | |||
| test_mask = data.nodes.data['test_mask'] | |||
| split_idx = { | |||
| 'train': train_mask, | |||
| 'valid': val_mask, | |||
| 'test': test_mask | |||
| } | |||
| labels = data.nodes.data[ylabel] | |||
| num_classes = len(np.unique(labels.numpy())) | |||
| train_idx = split_idx['train'] | |||
| if args.use_sage: | |||
| model = SAGE(x.size(1), args.hidden_channels, 112, | |||
| args.num_layers, args.dropout).to(device) | |||
| else: | |||
| model = GCN(x.size(1), args.hidden_channels, 112, | |||
| args.num_layers, args.dropout).to(device) | |||
| # Pre-compute GCN normalization. | |||
| adj_t = edge_index.set_diag() | |||
| deg = adj_t.sum(dim=1).to(torch.float) | |||
| deg_inv_sqrt = deg.pow(-0.5) | |||
| deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0 | |||
| adj_t = deg_inv_sqrt.view(-1, 1) * adj_t * deg_inv_sqrt.view(1, -1) | |||
| edge_index = adj_t | |||
| evaluator = Evaluator(name='ogbn-proteins') | |||
| for run in range(args.runs): | |||
| model.reset_parameters() | |||
| optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) | |||
| for epoch in range(1, 1 + args.epochs): | |||
| loss = train(model, x, y, edge_index, train_idx, optimizer) | |||
| if epoch % args.eval_steps == 0: | |||
| result = test(model, x, y, edge_index, split_idx, evaluator) | |||
| if epoch % args.log_steps == 0: | |||
| train_rocauc, valid_rocauc, test_rocauc = result | |||
| print(f'Run: {run + 1:02d}, ' | |||
| f'Epoch: {epoch:02d}, ' | |||
| f'Loss: {loss:.4f}, ' | |||
| f'Train: {100 * train_rocauc:.2f}%, ' | |||
| f'Valid: {100 * valid_rocauc:.2f}% ' | |||
| f'Test: {100 * test_rocauc:.2f}%') | |||
| if __name__ == "__main__": | |||
| main() | |||
| @@ -16,7 +16,7 @@ with open("README.md", 'r') as fh: | |||
| ''' https://setuptools.readthedocs.io/en/latest/ ''' | |||
| setup( | |||
| name='autogl', | |||
| version='0.3.0-pre', | |||
| version='0.3.1', | |||
| author='THUMNLab/aglteam', | |||
| maintainer='THUMNLab/aglteam', | |||
| author_email='autogl@tsinghua.edu.cn', | |||