|
|
|
@@ -0,0 +1,212 @@ |
|
|
|
import argparse |
|
|
|
import numpy as np |
|
|
|
import networkx as nx |
|
|
|
import time |
|
|
|
import torch |
|
|
|
import torch.nn.functional as F |
|
|
|
import dgl |
|
|
|
from dgl.data import register_data_args |
|
|
|
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import torch |
|
|
|
import torch.nn as nn |
|
|
|
import dgl.function as fn |
|
|
|
from dgl.nn import GATConv |
|
|
|
|
|
|
|
|
|
|
|
class GAT(nn.Module): |
|
|
|
def __init__(self, |
|
|
|
g, |
|
|
|
num_layers, |
|
|
|
in_dim, |
|
|
|
num_hidden, |
|
|
|
num_classes, |
|
|
|
heads, |
|
|
|
activation, |
|
|
|
feat_drop, |
|
|
|
attn_drop, |
|
|
|
negative_slope, |
|
|
|
residual): |
|
|
|
super(GAT, self).__init__() |
|
|
|
self.g = g |
|
|
|
self.num_layers = num_layers |
|
|
|
self.gat_layers = nn.ModuleList() |
|
|
|
self.activation = activation |
|
|
|
# input projection (no residual) |
|
|
|
self.gat_layers.append(GATConv( |
|
|
|
in_dim, num_hidden, heads[0], |
|
|
|
feat_drop, attn_drop, negative_slope, False, self.activation)) |
|
|
|
# hidden layers |
|
|
|
for l in range(1, num_layers): |
|
|
|
# due to multi-head, the in_dim = num_hidden * num_heads |
|
|
|
self.gat_layers.append(GATConv( |
|
|
|
num_hidden * heads[l-1], num_hidden, heads[l], |
|
|
|
feat_drop, attn_drop, negative_slope, residual, self.activation)) |
|
|
|
# output projection |
|
|
|
self.gat_layers.append(GATConv( |
|
|
|
num_hidden * heads[-2], num_classes, heads[-1], |
|
|
|
feat_drop, attn_drop, negative_slope, residual, None)) |
|
|
|
|
|
|
|
def forward(self, inputs): |
|
|
|
h = inputs |
|
|
|
for l in range(self.num_layers): |
|
|
|
h = self.gat_layers[l](self.g, h).flatten(1) |
|
|
|
# output projection |
|
|
|
logits = self.gat_layers[-1](self.g, h).mean(1) |
|
|
|
return logits |
|
|
|
|
|
|
|
def accuracy(logits, labels): |
|
|
|
_, indices = torch.max(logits, dim=1) |
|
|
|
correct = torch.sum(indices == labels) |
|
|
|
return correct.item() * 1.0 / len(labels) |
|
|
|
|
|
|
|
|
|
|
|
def evaluate(model, features, labels, mask): |
|
|
|
model.eval() |
|
|
|
with torch.no_grad(): |
|
|
|
logits = model(features) |
|
|
|
logits = logits[mask] |
|
|
|
labels = labels[mask] |
|
|
|
return accuracy(logits, labels) |
|
|
|
|
|
|
|
|
|
|
|
def main(args): |
|
|
|
# load and preprocess dataset |
|
|
|
args.dataset = 'pubmed' |
|
|
|
if args.dataset == 'cora': |
|
|
|
data = CoraGraphDataset() |
|
|
|
elif args.dataset == 'citeseer': |
|
|
|
data = CiteseerGraphDataset() |
|
|
|
elif args.dataset == 'pubmed': |
|
|
|
data = PubmedGraphDataset() |
|
|
|
else: |
|
|
|
raise ValueError('Unknown dataset: {}'.format(args.dataset)) |
|
|
|
|
|
|
|
g = data[0] |
|
|
|
if args.gpu < 0: |
|
|
|
cuda = False |
|
|
|
else: |
|
|
|
cuda = True |
|
|
|
g = g.int().to(args.gpu) |
|
|
|
|
|
|
|
features = g.ndata['feat'] |
|
|
|
labels = g.ndata['label'] |
|
|
|
train_mask = g.ndata['train_mask'] |
|
|
|
val_mask = g.ndata['val_mask'] |
|
|
|
test_mask = g.ndata['test_mask'] |
|
|
|
num_feats = features.shape[1] |
|
|
|
n_classes = data.num_labels |
|
|
|
n_edges = data.graph.number_of_edges() |
|
|
|
print("""----Data statistics------' |
|
|
|
#Edges %d |
|
|
|
#Classes %d |
|
|
|
#Train samples %d |
|
|
|
#Val samples %d |
|
|
|
#Test samples %d""" % |
|
|
|
(n_edges, n_classes, |
|
|
|
train_mask.int().sum().item(), |
|
|
|
val_mask.int().sum().item(), |
|
|
|
test_mask.int().sum().item())) |
|
|
|
|
|
|
|
# add self loop |
|
|
|
g = dgl.remove_self_loop(g) |
|
|
|
g = dgl.add_self_loop(g) |
|
|
|
n_edges = g.number_of_edges() |
|
|
|
# create model |
|
|
|
heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] |
|
|
|
model = GAT(g, |
|
|
|
args.num_layers, |
|
|
|
num_feats, |
|
|
|
args.num_hidden, |
|
|
|
n_classes, |
|
|
|
heads, |
|
|
|
F.elu, |
|
|
|
args.in_drop, |
|
|
|
args.attn_drop, |
|
|
|
args.negative_slope, |
|
|
|
args.residual) |
|
|
|
print(model) |
|
|
|
|
|
|
|
if cuda: |
|
|
|
model.cuda() |
|
|
|
loss_fcn = torch.nn.CrossEntropyLoss() |
|
|
|
|
|
|
|
# use optimizer |
|
|
|
optimizer = torch.optim.Adam( |
|
|
|
model.parameters(), lr=args.lr, weight_decay=args.weight_decay) |
|
|
|
|
|
|
|
# initialize graph |
|
|
|
dur = [] |
|
|
|
for epoch in range(args.epochs): |
|
|
|
model.train() |
|
|
|
if epoch >= 3: |
|
|
|
t0 = time.time() |
|
|
|
# forward |
|
|
|
logits = model(features) |
|
|
|
loss = loss_fcn(logits[train_mask], labels[train_mask]) |
|
|
|
|
|
|
|
optimizer.zero_grad() |
|
|
|
loss.backward() |
|
|
|
optimizer.step() |
|
|
|
|
|
|
|
if epoch >= 3: |
|
|
|
dur.append(time.time() - t0) |
|
|
|
|
|
|
|
train_acc = accuracy(logits[train_mask], labels[train_mask]) |
|
|
|
|
|
|
|
if args.fastmode: |
|
|
|
val_acc = accuracy(logits[val_mask], labels[val_mask]) |
|
|
|
else: |
|
|
|
val_acc = evaluate(model, features, labels, val_mask) |
|
|
|
|
|
|
|
|
|
|
|
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" |
|
|
|
" ValAcc {:.4f} | ETputs(KTEPS) {:.2f}". |
|
|
|
format(epoch, np.mean(dur), loss.item(), train_acc, |
|
|
|
val_acc, n_edges / np.mean(dur) / 1000)) |
|
|
|
|
|
|
|
print() |
|
|
|
|
|
|
|
acc = evaluate(model, features, labels, test_mask) |
|
|
|
print("Test Accuracy {:.4f}".format(acc)) |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
|
|
|
|
parser = argparse.ArgumentParser(description='GAT') |
|
|
|
register_data_args(parser) |
|
|
|
parser.add_argument("--gpu", type=int, default=-1, |
|
|
|
help="which GPU to use. Set -1 to use CPU.") |
|
|
|
|
|
|
|
parser.add_argument("--epochs", type=int, default=200, |
|
|
|
help="number of training epochs") |
|
|
|
parser.add_argument("--num-heads", type=int, default=8, |
|
|
|
help="number of hidden attention heads") |
|
|
|
parser.add_argument("--num-out-heads", type=int, default=1, |
|
|
|
help="number of output attention heads") |
|
|
|
parser.add_argument("--num-layers", type=int, default=1, |
|
|
|
help="number of hidden layers") |
|
|
|
parser.add_argument("--num-hidden", type=int, default=8, |
|
|
|
help="number of hidden units") |
|
|
|
parser.add_argument("--residual", action="store_true", default=False, |
|
|
|
help="use residual connection") |
|
|
|
parser.add_argument("--in-drop", type=float, default=.6, |
|
|
|
help="input feature dropout") |
|
|
|
parser.add_argument("--attn-drop", type=float, default=.6, |
|
|
|
help="attention dropout") |
|
|
|
parser.add_argument("--lr", type=float, default=0.01, |
|
|
|
help="learning rate") |
|
|
|
parser.add_argument('--weight-decay', type=float, default=0, |
|
|
|
help="weight decay") |
|
|
|
parser.add_argument('--negative-slope', type=float, default=0.2, |
|
|
|
help="the negative slope of leaky relu") |
|
|
|
parser.add_argument('--early-stop', action='store_true', default=False, |
|
|
|
help="indicates whether to use early stop or not") |
|
|
|
parser.add_argument('--fastmode', action="store_true", default=False, |
|
|
|
help="skip re-evaluate the validation set") |
|
|
|
args = parser.parse_args() |
|
|
|
print(args) |
|
|
|
|
|
|
|
main(args) |