""" Performance check of AutoGL model + DGL (dataset + trainer) """ import os import pickle os.environ["AUTOGL_BACKEND"] = "dgl" # from dgl.dataloading.pytorch.dataloader import GraphDataLoader from dgl.dataloading import GraphDataLoader import numpy as np from tqdm import tqdm import random import torch import torch.nn as nn import torch.optim as optim from dgl.data import GINDataset import torch import torch.nn as nn from autogl.module.model.dgl.gin import AutoGIN from autogl.module.model.dgl.topkpool import AutoTopkpool from autogl.solver.utils import set_seed import argparse class DatasetAbstraction(): def __init__(self, graphs, labels): for g in graphs: g.ndata['feat'] = g.ndata['attr'] self.graphs, self.labels = [], [] for g, l in zip(graphs, labels): self.graphs.append(g) self.labels.append(l) self.gclasses = max(self.labels).item() + 1 self.graph = self.graphs def __len__(self): return len(self.graphs) def __getitem__(self, idx): if isinstance(idx, int): return self.graphs[idx], self.labels[idx] elif isinstance(idx, torch.BoolTensor): idx = [i for i in range(len(idx)) if idx[i]] elif isinstance(idx, torch.Tensor) and idx.unique()[0].sum().item() == 1: idx = [i for i in range(len(idx)) if idx[i]] return DatasetAbstraction([self.graphs[i] for i in idx], [self.labels[i] for i in idx]) def train(net, trainloader, validloader, optimizer, criterion, epoch, device): best_model = pickle.dumps(net.state_dict()) best_acc = 0. for e in range(epoch): net.train() for graphs, labels in trainloader: labels = labels.to(device) graphs = graphs.to(device) # outputs = net((graphs, labels)) # feat = graphs.ndata.pop('attr') # outputs = net(graphs, feat) outputs = net(graphs) loss = criterion(outputs, labels) # backprop optimizer.zero_grad() loss.backward() optimizer.step() gt = [] pr = [] net.eval() for graphs, labels in validloader: labels = labels.to(device) graphs = graphs.to(device) gt.append(labels) # feat = graphs.ndata.pop('attr') # outputs = net(graphs, feat) # outputs = net((graphs, labels)) outputs = net(graphs) pr.append(outputs.argmax(1)) gt = torch.cat(gt, dim=0) pr = torch.cat(pr, dim=0) acc = (gt == pr).float().mean().item() if acc > best_acc: best_acc = acc best_model = pickle.dumps(net.state_dict()) net.load_state_dict(pickle.loads(best_model)) return net def eval_net(net, dataloader, device): net.eval() total = 0 total_correct = 0 for data in dataloader: graphs, labels = data graphs = graphs.to(device) labels = labels.to(device) # feat = graphs.ndata.pop('attr') total += len(labels) # outputs = net(graphs, feat) # outputs = net((graphs, labels)) outputs = net(graphs) _, predicted = torch.max(outputs.data, 1) total_correct += (predicted == labels.data).sum().item() acc = 1.0 * total_correct / total net.train() return acc def main(args): device = torch.device(args.device) dataset_ = GINDataset(args.dataset, False) dataset = DatasetAbstraction([g[0] for g in dataset_], [g[1] for g in dataset_]) # 1. split dataset [fix split] dataids = list(range(len(dataset))) random.seed(args.dataset_seed) random.shuffle(dataids) fold = int(len(dataset) * 0.1) train_dataset = dataset[dataids[:fold * 8]] val_dataset = dataset[dataids[fold * 8: fold * 9]] test_dataset = dataset[dataids[fold * 9: ]] trainloader = GraphDataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) valloader = GraphDataLoader(val_dataset, batch_size=args.batch_size, shuffle=False) testloader = GraphDataLoader(test_dataset, batch_size=args.batch_size, shuffle=False) accs = [] for seed in tqdm(range(args.repeat)): # set up seeds, args.seed supported set_seed(seed) if args.model == 'gin': model = AutoGIN( num_features=dataset_.dim_nfeats, num_classes=dataset_.gclasses, device=device, ).from_hyper_parameter({ "num_layers": 5, "hidden": [64,64,64,64], "dropout": 0.5, "act": "relu", "eps": "False", "mlp_layers": 2, "neighbor_pooling_type": "sum", "graph_pooling_type": "sum" }).model elif args.model == 'topkpool': model = AutoTopkpool( num_features=dataset_.dim_nfeats, num_classes=dataset_.gclasses, device=device, ).from_hyper_parameter({ "num_layers": 5, "hidden": [64,64,64,64], "dropout": 0.5 }).model model = model.to(device) criterion = nn.CrossEntropyLoss() # defaul reduce is true optimizer = optim.Adam(model.parameters(), lr=args.lr) model = train(model, trainloader, valloader, optimizer, criterion, args.epoch, device) acc = eval_net(model, testloader, device) accs.append(acc) print('{:.2f} ~ {:.2f}'.format(np.mean(accs) * 100, np.std(accs) * 100)) if __name__ == '__main__': parser = argparse.ArgumentParser('model parser') parser.add_argument('--device', type=str, default='cuda') parser.add_argument('--dataset', type=str, choices=['MUTAG', 'COLLAB', 'IMDBBINARY', 'IMDBMULTI', 'NCI1', 'PROTEINS', 'PTC', 'REDDITBINARY', 'REDDITMULTI5K'], default='MUTAG') parser.add_argument('--dataset_seed', type=int, default=2021) parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--repeat', type=int, default=50) parser.add_argument('--model', type=str, choices=['gin', 'topkpool'], default='gin') parser.add_argument('--lr', type=float, default=0.0001) parser.add_argument('--epoch', type=int, default=100) args = parser.parse_args() main(args)