Browse Source

node model

tags/v0.3.1
Beini 4 years ago
parent
commit
97b7b011fa
2 changed files with 218 additions and 2 deletions
  1. +6
    -2
      test/performance/node_classification/dgl/model.py
  2. +212
    -0
      test/performance/node_classification/dgl/test.py

+ 6
- 2
test/performance/node_classification/dgl/model.py View File

@@ -4,7 +4,7 @@ Performance check of AutoGL model + DGL (trainer + dataset)
import os
import numpy as np
from tqdm import tqdm
import dgl
os.environ["AUTOGL_BACKEND"] = "dgl"
import sys
sys.path.append("../../../../")
@@ -53,7 +53,7 @@ if __name__ == '__main__':
parser = argparse.ArgumentParser('dgl model')
parser.add_argument('--device', type=str, default='cuda')
parser.add_argument('--dataset', type=str, choices=['Cora', 'CiteSeer', 'PubMed'], default='Cora')
parser.add_argument('--repeat', type=int, default=50)
parser.add_argument('--repeat', type=int, default=1)
parser.add_argument('--model', type=str, choices=['gat', 'gcn', 'sage'], default='gat')
parser.add_argument('--lr', type=float, default=0.01)
parser.add_argument('--weight_decay', type=float, default=0.0)
@@ -69,6 +69,10 @@ if __name__ == '__main__':
elif args.dataset == 'PubMed':
dataset = PubmedGraphDataset()
graph = dataset[0].to(args.device)

graph = dgl.remove_self_loop(graph)
graph = dgl.add_self_loop(graph)

label = graph.ndata['label']
train_mask = graph.ndata['train_mask']
val_mask = graph.ndata['val_mask']


+ 212
- 0
test/performance/node_classification/dgl/test.py View File

@@ -0,0 +1,212 @@
import argparse
import numpy as np
import networkx as nx
import time
import torch
import torch.nn.functional as F
import dgl
from dgl.data import register_data_args
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset



import torch
import torch.nn as nn
import dgl.function as fn
from dgl.nn import GATConv


class GAT(nn.Module):
def __init__(self,
g,
num_layers,
in_dim,
num_hidden,
num_classes,
heads,
activation,
feat_drop,
attn_drop,
negative_slope,
residual):
super(GAT, self).__init__()
self.g = g
self.num_layers = num_layers
self.gat_layers = nn.ModuleList()
self.activation = activation
# input projection (no residual)
self.gat_layers.append(GATConv(
in_dim, num_hidden, heads[0],
feat_drop, attn_drop, negative_slope, False, self.activation))
# hidden layers
for l in range(1, num_layers):
# due to multi-head, the in_dim = num_hidden * num_heads
self.gat_layers.append(GATConv(
num_hidden * heads[l-1], num_hidden, heads[l],
feat_drop, attn_drop, negative_slope, residual, self.activation))
# output projection
self.gat_layers.append(GATConv(
num_hidden * heads[-2], num_classes, heads[-1],
feat_drop, attn_drop, negative_slope, residual, None))

def forward(self, inputs):
h = inputs
for l in range(self.num_layers):
h = self.gat_layers[l](self.g, h).flatten(1)
# output projection
logits = self.gat_layers[-1](self.g, h).mean(1)
return logits

def accuracy(logits, labels):
_, indices = torch.max(logits, dim=1)
correct = torch.sum(indices == labels)
return correct.item() * 1.0 / len(labels)


def evaluate(model, features, labels, mask):
model.eval()
with torch.no_grad():
logits = model(features)
logits = logits[mask]
labels = labels[mask]
return accuracy(logits, labels)


def main(args):
# load and preprocess dataset
args.dataset = 'pubmed'
if args.dataset == 'cora':
data = CoraGraphDataset()
elif args.dataset == 'citeseer':
data = CiteseerGraphDataset()
elif args.dataset == 'pubmed':
data = PubmedGraphDataset()
else:
raise ValueError('Unknown dataset: {}'.format(args.dataset))

g = data[0]
if args.gpu < 0:
cuda = False
else:
cuda = True
g = g.int().to(args.gpu)

features = g.ndata['feat']
labels = g.ndata['label']
train_mask = g.ndata['train_mask']
val_mask = g.ndata['val_mask']
test_mask = g.ndata['test_mask']
num_feats = features.shape[1]
n_classes = data.num_labels
n_edges = data.graph.number_of_edges()
print("""----Data statistics------'
#Edges %d
#Classes %d
#Train samples %d
#Val samples %d
#Test samples %d""" %
(n_edges, n_classes,
train_mask.int().sum().item(),
val_mask.int().sum().item(),
test_mask.int().sum().item()))

# add self loop
g = dgl.remove_self_loop(g)
g = dgl.add_self_loop(g)
n_edges = g.number_of_edges()
# create model
heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
model = GAT(g,
args.num_layers,
num_feats,
args.num_hidden,
n_classes,
heads,
F.elu,
args.in_drop,
args.attn_drop,
args.negative_slope,
args.residual)
print(model)

if cuda:
model.cuda()
loss_fcn = torch.nn.CrossEntropyLoss()

# use optimizer
optimizer = torch.optim.Adam(
model.parameters(), lr=args.lr, weight_decay=args.weight_decay)

# initialize graph
dur = []
for epoch in range(args.epochs):
model.train()
if epoch >= 3:
t0 = time.time()
# forward
logits = model(features)
loss = loss_fcn(logits[train_mask], labels[train_mask])

optimizer.zero_grad()
loss.backward()
optimizer.step()

if epoch >= 3:
dur.append(time.time() - t0)

train_acc = accuracy(logits[train_mask], labels[train_mask])

if args.fastmode:
val_acc = accuracy(logits[val_mask], labels[val_mask])
else:
val_acc = evaluate(model, features, labels, val_mask)


print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |"
" ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".
format(epoch, np.mean(dur), loss.item(), train_acc,
val_acc, n_edges / np.mean(dur) / 1000))

print()

acc = evaluate(model, features, labels, test_mask)
print("Test Accuracy {:.4f}".format(acc))


if __name__ == '__main__':

parser = argparse.ArgumentParser(description='GAT')
register_data_args(parser)
parser.add_argument("--gpu", type=int, default=-1,
help="which GPU to use. Set -1 to use CPU.")

parser.add_argument("--epochs", type=int, default=200,
help="number of training epochs")
parser.add_argument("--num-heads", type=int, default=8,
help="number of hidden attention heads")
parser.add_argument("--num-out-heads", type=int, default=1,
help="number of output attention heads")
parser.add_argument("--num-layers", type=int, default=1,
help="number of hidden layers")
parser.add_argument("--num-hidden", type=int, default=8,
help="number of hidden units")
parser.add_argument("--residual", action="store_true", default=False,
help="use residual connection")
parser.add_argument("--in-drop", type=float, default=.6,
help="input feature dropout")
parser.add_argument("--attn-drop", type=float, default=.6,
help="attention dropout")
parser.add_argument("--lr", type=float, default=0.01,
help="learning rate")
parser.add_argument('--weight-decay', type=float, default=0,
help="weight decay")
parser.add_argument('--negative-slope', type=float, default=0.2,
help="the negative slope of leaky relu")
parser.add_argument('--early-stop', action='store_true', default=False,
help="indicates whether to use early stop or not")
parser.add_argument('--fastmode', action="store_true", default=False,
help="skip re-evaluate the validation set")
args = parser.parse_args()
print(args)

main(args)

Loading…
Cancel
Save