Browse Source

gnnguard and tutorial for link-prediction

develop/0.4/predevelop
蔡婕 3 years ago
parent
commit
eca9d0567a
9 changed files with 1123 additions and 604 deletions
  1. +3
    -3
      autogl/module/model/pyg/__init__.py
  2. +0
    -279
      autogl/module/model/pyg/robust/gcn_svd.py
  3. +304
    -0
      autogl/module/model/pyg/robust/gnnguard.py
  4. +196
    -0
      autogl/module/model/pyg/robust/nn/conv/gcn_conv.py
  5. +204
    -0
      docs/docfile/tutorial/t_homo_link_prediction.rst
  6. +212
    -0
      docs/docfile/tutorial_cn/t_homo_link_prediction.rst
  7. +0
    -130
      test/performance/robust/model_gcnsvd.py
  8. +0
    -192
      test/performance/robust/model_gnnguard.py
  9. +204
    -0
      test/performance/robust/model_gnnguard_meta.py

+ 3
- 3
autogl/module/model/pyg/__init__.py View File

@@ -9,8 +9,7 @@ from .gcn import AutoGCN
from .gat import AutoGAT
from .gin import AutoGIN

from .robust.gcn_svd import AutoGCNSVD
from .robust.gnnguard import AutoGNNGuard, GCN4GNNGuard
from .robust.gnnguard import AutoGNNGuard, AutoGNNGuard_attack, GCN4GNNGuard, GCN4GNNGuard_attack

__all__ = [
"ModelUniversalRegistry",
@@ -22,7 +21,8 @@ __all__ = [
"AutoGCN",
"AutoGAT",
"AutoGIN",
"AutoGCNSVD",
"AutoGNNGuard",
"AutoGNNGuard_attack",
"GCN4GNNGuard",
"GCN4GNNGuard_attack",
]

+ 0
- 279
autogl/module/model/pyg/robust/gcn_svd.py View File

@@ -1,279 +0,0 @@
from tkinter import TRUE
import torch
import torch.optim as optim
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module
import torch.nn as nn
import torch.nn.functional as F
import typing as _typing
import math
from tqdm import tqdm
import scipy.sparse as sp
import numpy as np
from copy import deepcopy
from numba import njit

from .. import register_model
from . import utils
from ..gcn import GCN
from ..base import BaseAutoModel
from .....utils import get_logger

LOGGER = get_logger("GCNSVDModel")


### ========================== ###

class GCN4Robust(GCN):
# 在已有gcn的基础上增加robust的部分
def __init__(self, nfeat, nclass, nhid, activation, dropout=0.5, lr=0.01, weight_decay=5e-4, with_relu=True, with_bias=True, add_self_loops = True, normalize = TRUE):
super(GCN4Robust, self).__init__(nfeat, nclass, nhid, activation, dropout=dropout, add_self_loops = add_self_loops, normalize = normalize)
def fit(self, features, adj, labels, idx_train, idx_val=None, train_iters=200, initialize=True, verbose=False, normalize=True, patience=500, **kwargs):
self.device = self.gc1.weight.device
if initialize:
self.initialize()

if type(adj) is not torch.Tensor:
features, adj, labels = utils.to_tensor(features, adj, labels, device=self.device)
else:
features = features.to(self.device)
adj = adj.to(self.device)
labels = labels.to(self.device)

if normalize:
if utils.is_sparse_tensor(adj):
adj_norm = utils.normalize_adj_tensor(adj, sparse=True)
else:
adj_norm = utils.normalize_adj_tensor(adj)
else:
adj_norm = adj

self.adj_norm = adj_norm
self.features = features
self.labels = labels

if idx_val is None:
self._train_without_val(labels, idx_train, train_iters, verbose)
else:
if patience < train_iters:
self._train_with_early_stopping(labels, idx_train, idx_val, train_iters, patience, verbose)
else:
self._train_with_val(labels, idx_train, idx_val, train_iters, verbose)

def _train_without_val(self, labels, idx_train, train_iters, verbose):
self.train()
optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)
for i in range(train_iters):
optimizer.zero_grad()
output = self.forward(self.features, self.adj_norm)
loss_train = F.nll_loss(output[idx_train], labels[idx_train])
loss_train.backward()
optimizer.step()
if verbose and i % 10 == 0:
print('Epoch {}, training loss: {}'.format(i, loss_train.item()))

self.eval()
output = self.forward(self.features, self.adj_norm)
self.output = output

def _train_with_val(self, labels, idx_train, idx_val, train_iters, verbose):
if verbose:
print('=== training gcn model ===')
optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)

best_loss_val = 100
best_acc_val = 0

for i in range(train_iters):
self.train()
optimizer.zero_grad()
output = self.forward(self.features, self.adj_norm)
loss_train = F.nll_loss(output[idx_train], labels[idx_train])
loss_train.backward()
optimizer.step()

if verbose and i % 10 == 0:
print('Epoch {}, training loss: {}'.format(i, loss_train.item()))

self.eval()
output = self.forward(self.features, self.adj_norm)
loss_val = F.nll_loss(output[idx_val], labels[idx_val])
acc_val = utils.accuracy(output[idx_val], labels[idx_val])

if best_loss_val > loss_val:
best_loss_val = loss_val
self.output = output
weights = deepcopy(self.state_dict())

if acc_val > best_acc_val:
best_acc_val = acc_val
self.output = output
weights = deepcopy(self.state_dict())

if verbose:
print('=== picking the best model according to the performance on validation ===')
self.load_state_dict(weights)

def _train_with_early_stopping(self, labels, idx_train, idx_val, train_iters, patience, verbose):
if verbose:
print('=== training gcn model ===')
optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)

early_stopping = patience
best_loss_val = 100

for i in range(train_iters):
self.train()
optimizer.zero_grad()
output = self.forward(self.features, self.adj_norm)
loss_train = F.nll_loss(output[idx_train], labels[idx_train])
loss_train.backward()
optimizer.step()

if verbose and i % 10 == 0:
print('Epoch {}, training loss: {}'.format(i, loss_train.item()))

self.eval()
output = self.forward(self.features, self.adj_norm)

# def eval_class(output, labels):
# preds = output.max(1)[1].type_as(labels)
# return f1_score(labels.cpu().numpy(), preds.cpu().numpy(), average='micro') + \
# f1_score(labels.cpu().numpy(), preds.cpu().numpy(), average='macro')

# perf_sum = eval_class(output[idx_val], labels[idx_val])
loss_val = F.nll_loss(output[idx_val], labels[idx_val])

if best_loss_val > loss_val:
best_loss_val = loss_val
self.output = output
weights = deepcopy(self.state_dict())
patience = early_stopping
else:
patience -= 1
if i > early_stopping and patience <= 0:
break

if verbose:
print('=== early stopping at {0}, loss_val = {1} ==='.format(i, best_loss_val) )
self.load_state_dict(weights)

class GCNSVD(GCN4Robust):
def __init__(self, nfeat, nclass, nhid, activation, dropout=0.5, lr=0.01, weight_decay=5e-4, with_relu=True, with_bias=True, add_self_loops = True, normalize = True):
super(GCNSVD, self).__init__(nfeat, nclass, nhid, activation, dropout, lr, weight_decay, with_relu, with_bias, add_self_loops, normalize)

def fit(self, features, adj, labels, idx_train, idx_val=None, k=50, train_iters=200, initialize=True, verbose=True, **kwargs):
modified_adj = self.truncatedSVD(adj, k=k)
self.k = k
# modified_adj_tensor = utils.sparse_mx_to_torch_sparse_tensor(self.modified_adj)
features, modified_adj, labels = utils.to_tensor(features, modified_adj, labels, device=self.device)

self.modified_adj = modified_adj
self.features = features
self.labels = labels
super().fit(features, modified_adj, labels, idx_train, idx_val, train_iters=train_iters, initialize=initialize, verbose=verbose)

def truncatedSVD(self, data, k=50):
print('=== GCN-SVD: rank={} ==='.format(k))
if sp.issparse(data):
data = data.asfptype()
U, S, V = sp.linalg.svds(data, k=k)
print("rank_after = {}".format(len(S.nonzero()[0])))
diag_S = np.diag(S)
else:
U, S, V = np.linalg.svd(data)
U = U[:, :k]
S = S[:k]
V = V[:k, :]
print("rank_before = {}".format(len(S.nonzero()[0])))
diag_S = np.diag(S)
print("rank_after = {}".format(len(diag_S.nonzero()[0])))

return U @ diag_S @ V

def predict(self, features=None, adj=None):

self.eval()
if features is None and adj is None:
return self.forward(self.features, self.adj_norm)
else:
adj = self.truncatedSVD(adj, k=self.k)
if type(adj) is not torch.Tensor:
features, adj = utils.to_tensor(features, adj, device=self.device)

self.features = features
if utils.is_sparse_tensor(adj):
self.adj_norm = utils.normalize_adj_tensor(adj, sparse=True)
else:
self.adj_norm = utils.normalize_adj_tensor(adj)
return self.forward(self.features, self.adj_norm)

@register_model("gcnsvd-model")
class AutoGCNSVD(BaseAutoModel):
def __init__(
self,
num_features: int = ...,
num_classes: int = ...,
device: _typing.Union[str, torch.device] = ...,
**kwargs
) -> None:
super().__init__(num_features, num_classes, device, **kwargs)
self.hyper_parameter_space = [
{
"parameterName": "add_self_loops",
"type": "CATEGORICAL",
"feasiblePoints": [1],
},
{
"parameterName": "normalize",
"type": "CATEGORICAL",
"feasiblePoints": [1],
},
{
"parameterName": "num_layers",
"type": "DISCRETE",
"feasiblePoints": "2,3,4",
},
{
"parameterName": "hidden",
"type": "NUMERICAL_LIST",
"numericalType": "INTEGER",
"length": 3,
"minValue": [8, 8, 8],
"maxValue": [128, 128, 128],
"scalingType": "LOG",
"cutPara": ("num_layers",),
"cutFunc": lambda x: x[0] - 1,
},
{
"parameterName": "dropout",
"type": "DOUBLE",
"maxValue": 0.8,
"minValue": 0.2,
"scalingType": "LINEAR",
},
{
"parameterName": "act",
"type": "CATEGORICAL",
"feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
},
]

self.hyper_parameters = {
"num_layers": 3,
"hidden": [128, 64],
"dropout": 0,
"act": "relu",
}

def _initialize(self):
self._model = GCNSVD(
self.input_dimension,
self.output_dimension,
self.hyper_parameters.get("hidden"),
self.hyper_parameters.get("act"),
self.hyper_parameters.get("dropout", None),
bool(self.hyper_parameters.get("add_self_loops", True)),
bool(self.hyper_parameters.get("normalize", True)),
).to(self.device)

+ 304
- 0
autogl/module/model/pyg/robust/gnnguard.py View File

@@ -39,6 +39,12 @@ class GCN4GNNGuard(GCN):
self.nfeat = nfeat
self.hidden_sizes = nhid
self.drop = drop
if not with_relu:
self.weight_decay = 0
else:
self.weight_decay = weight_decay
self.with_relu = with_relu
self.with_bias = with_bias

self.gc1 = GCNConv(nfeat, nhid[0], bias=True,)
self.gc2 = GCNConv(nhid[0], nclass, bias=True, )
@@ -345,6 +351,235 @@ class GCN4GNNGuard(GCN):
self.adj_norm = utils.normalize_adj_tensor(adj)
return self.forward(self.features, self.adj_norm)

class GCN4GNNGuard_attack(GCN):
# Based on the existing GCN, add the robust part.
def __init__(self, nfeat, nclass, nhid, activation, dropout=0.5, lr=0.01, drop=False, weight_decay=5e-4, with_relu=True, with_bias=True, add_self_loops = True, normalize = True):
super(GCN4GNNGuard_attack, self).__init__(nfeat, nclass, nhid, activation, dropout=dropout, add_self_loops = add_self_loops, normalize = normalize)

self.lr = lr
self.weight_decay = weight_decay
self.dropout = dropout
self.nclass = nclass
self.nfeat = nfeat
self.hidden_sizes = nhid
self.drop = drop
if not with_relu:
self.weight_decay = 0
else:
self.weight_decay = weight_decay
self.with_relu = with_relu
self.with_bias = with_bias

self.gc1 = GCNConv(nfeat, nhid[0], bias=True,)
self.gc2 = GCNConv(nhid[0], nclass, bias=True, )

def forward(self, x, adj_lil):
"""we don't change the edge_index, just update the edge_weight;
some edge_weight are regarded as removed if it equals to zero"""
x = x.to_dense()
adj = adj_lil.coalesce().indices()
edge_weight = adj_lil.coalesce().values()

x = F.relu(self.gc1(x, adj, edge_weight=edge_weight))
x = F.dropout(x, self.dropout, training=self.training)
x = self.gc2(x, adj, edge_weight=edge_weight)

return F.log_softmax(x, dim=1)

def add_loop_sparse(self, adj, fill_value=1):
# make identify sparse tensor
row = torch.range(0, int(adj.shape[0]-1), dtype=torch.int64)
i = torch.stack((row, row), dim=0)
v = torch.ones(adj.shape[0], dtype=torch.float32)
shape = adj.shape
I_n = torch.sparse.FloatTensor(i, v, shape)
return adj + I_n.to(self.device)

def initialize(self):
self.gc1.reset_parameters()
self.gc2.reset_parameters()

def fit(self, features, adj, labels, idx_train, idx_val=None, idx_test=None, train_iters=81, att_0=None, attention=False, model_name=None, initialize=True, verbose=False, normalize=False, patience=510, ):
'''
train the gcn model, when idx_val is not None, pick the best model
according to the validation loss
'''
sd = self.state_dict()
for v in sd.values():
self.device = v.device
break

self.sim = None
self.attention = attention
if self.attention:
att_0 = self.att_coef_1(features, adj)
adj = att_0 # update adj
self.sim = att_0 # update att_0

self.idx_test = idx_test
if initialize:
self.initialize()

if type(adj) is not torch.Tensor:
features, adj, labels = utils.to_tensor(features, adj, labels, device=self.device)
else:
features = features.to(self.device)
adj = adj.to(self.device)
labels = labels.to(self.device)

normalize = False # we don't need normalize here, the norm is conducted in the GCN (self.gcn1) model
if normalize:
if utils.is_sparse_tensor(adj):
adj_norm = utils.normalize_adj_tensor(adj, sparse=True)
else:
adj_norm = utils.normalize_adj_tensor(adj)
else:
adj_norm = adj
# add self loop
# adj = self.add_loop_sparse(adj)


"""Make the coefficient D^{-1/2}(A+I)D^{-1/2}"""
self.adj_norm = adj_norm
self.features = features
self.labels = labels

if idx_val is None:
self._train_without_val(labels, idx_train, train_iters, verbose)
else:
if patience < train_iters:
self._train_with_early_stopping(labels, idx_train, idx_val, train_iters, patience, verbose)
else:
self._train_with_val(labels, idx_train, idx_val, train_iters, verbose)

def _train_without_val(self, labels, idx_train, train_iters, verbose):
self.train()
optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)
for i in range(train_iters):
optimizer.zero_grad()
output = self.forward(self.features, self.adj_norm)
loss_train = F.nll_loss(output[idx_train], labels[idx_train], weight=None) # this weight is the weight of each training nodes
loss_train.backward()
optimizer.step()
if verbose and i % 10 == 0:
print('Epoch {}, training loss: {}'.format(i, loss_train.item()))

self.eval()
output = self.forward(self.features, self.adj_norm)
self.output = output

def _train_with_val(self, labels, idx_train, idx_val, train_iters, verbose):
if verbose:
print('=== training gcn model ===')
optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)

best_loss_val = 100
best_acc_val = 0

for i in range(train_iters):
# print('epoch', i)
self.train()
optimizer.zero_grad()
output = self.forward(self.features, self.adj_norm)
loss_train = F.nll_loss(output[idx_train], labels[idx_train])
loss_train.backward()
optimizer.step()

acc_test =accuracy(output[self.idx_test], labels[self.idx_test])

self.eval()
output = self.forward(self.features, self.adj_norm)
loss_val = F.nll_loss(output[idx_val], labels[idx_val])
acc_val = utils.accuracy(output[idx_val], labels[idx_val])

if verbose and i % 200 == 0:
print('Epoch {}, training loss: {}, test acc: {}'.format(i, loss_train.item(), acc_test))

if best_loss_val > loss_val:
best_loss_val = loss_val
self.output = output
weights = deepcopy(self.state_dict())

if acc_val > best_acc_val:
best_acc_val = acc_val
self.output = output
weights = deepcopy(self.state_dict())

if verbose:
print('=== picking the best model according to the performance on validation ===')
self.load_state_dict(weights)

def _train_with_early_stopping(self, labels, idx_train, idx_val, train_iters, patience, verbose):
if verbose:
print('=== training gcn model ===')
optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)

early_stopping = patience
best_loss_val = 100

for i in range(train_iters):
self.train()
optimizer.zero_grad()
output = self.forward(self.features, self.adj_norm)
loss_train = F.nll_loss(output[idx_train], labels[idx_train])
loss_train.backward()
optimizer.step()

self.eval()
output = self.forward(self.features, self.adj_norm)

if verbose and i % 10 == 0:
print('Epoch {}, training loss: {}'.format(i, loss_train.item()))


loss_val = F.nll_loss(output[idx_val], labels[idx_val])

if best_loss_val > loss_val:
best_loss_val = loss_val
self.output = output
weights = deepcopy(self.state_dict())
patience = early_stopping
else:
patience -= 1
if i > early_stopping and patience <= 0:
break

if verbose:
print('=== early stopping at {0}, loss_val = {1} ==='.format(i, best_loss_val) )
self.load_state_dict(weights)

def test(self, idx_test):
self.eval()
output = self.predict() # here use the self.features and self.adj_norm in training stage
loss_test = F.nll_loss(output[idx_test], self.labels[idx_test])
acc_test = utils.accuracy(output[idx_test], self.labels[idx_test])
print("Test set results:",
"loss= {:.4f}".format(loss_test.item()),
"accuracy= {:.4f}".format(acc_test.item()))
return acc_test, output

def _set_parameters(self):
# TODO
pass

def predict(self, features=None, adj=None):
'''By default, inputs are unnormalized data'''
# self.eval()
if features is None and adj is None:
return self.forward(self.features, self.adj_norm)
else:
if type(adj) is not torch.Tensor:
features, adj = utils.to_tensor(features, adj, device=self.device)

self.features = features
if utils.is_sparse_tensor(adj):
self.adj_norm = utils.normalize_adj_tensor(adj, sparse=True)
else:
self.adj_norm = utils.normalize_adj_tensor(adj)
return self.forward(self.features, self.adj_norm)


@register_model("gnnguard-model")
class AutoGNNGuard(BaseAutoModel):
@@ -414,3 +649,72 @@ class AutoGNNGuard(BaseAutoModel):
add_self_loops = bool(self.hyper_parameters.get("add_self_loops", True)),
normalize = bool(self.hyper_parameters.get("normalize", True)),
).to(self.device)

@register_model("gnnguard-attack-model")
class AutoGNNGuard_attack(BaseAutoModel):
def __init__(
self,
num_features: int = ...,
num_classes: int = ...,
device: _typing.Union[str, torch.device] = ...,
**kwargs
) -> None:
super().__init__(num_features, num_classes, device, **kwargs)
self.hyper_parameter_space = [
{
"parameterName": "add_self_loops",
"type": "CATEGORICAL",
"feasiblePoints": [1],
},
{
"parameterName": "normalize",
"type": "CATEGORICAL",
"feasiblePoints": [1],
},
{
"parameterName": "num_layers",
"type": "DISCRETE",
"feasiblePoints": "2,3,4",
},
{
"parameterName": "hidden",
"type": "NUMERICAL_LIST",
"numericalType": "INTEGER",
"length": 3,
"minValue": [8, 8, 8],
"maxValue": [128, 128, 128],
"scalingType": "LOG",
"cutPara": ("num_layers",),
"cutFunc": lambda x: x[0] - 1,
},
{
"parameterName": "dropout",
"type": "DOUBLE",
"maxValue": 0.8,
"minValue": 0.2,
"scalingType": "LINEAR",
},
{
"parameterName": "act",
"type": "CATEGORICAL",
"feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
},
]

self.hyper_parameters = {
"num_layers": 3,
"hidden": [128, 64],
"dropout": 0,
"act": "relu",
}

def _initialize(self):
self._model = GCN4GNNGuard_attack(
nfeat = self.input_dimension,
nclass = self.output_dimension,
nhid = self.hyper_parameters.get("hidden"),
activation = self.hyper_parameters.get("act"),
dropout = self.hyper_parameters.get("dropout", None),
add_self_loops = bool(self.hyper_parameters.get("add_self_loops", True)),
normalize = bool(self.hyper_parameters.get("normalize", True)),
).to(self.device)

+ 196
- 0
autogl/module/model/pyg/robust/nn/conv/gcn_conv.py View File

@@ -0,0 +1,196 @@
import torch
from torch.nn import Parameter
from torch_scatter import scatter_add
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.utils import add_remaining_self_loops, to_undirected

from ..inits import glorot, zeros

@torch.jit._overload
def gcn_norm(edge_index, edge_weight=None, num_nodes=None, improved=False,
add_self_loops=True, dtype=None):
# type: (Tensor, OptTensor, Optional[int], bool, bool, Optional[int]) -> PairTensor # noqa
pass


@torch.jit._overload
def gcn_norm(edge_index, edge_weight=None, num_nodes=None, improved=False,
add_self_loops=True, dtype=None):
# type: (SparseTensor, OptTensor, Optional[int], bool, bool, Optional[int]) -> SparseTensor # noqa
pass


def gcn_norm(edge_index, edge_weight=None, num_nodes=None, improved=False,
add_self_loops=True, dtype=None):

fill_value = 2. if improved else 1.

if isinstance(edge_index, SparseTensor):
adj_t = edge_index
if not adj_t.has_value():
adj_t = adj_t.fill_value(1., dtype=dtype)
if add_self_loops:
adj_t = fill_diag(adj_t, fill_value)
deg = sum(adj_t, dim=1)
deg_inv_sqrt = deg.pow_(-0.5)
deg_inv_sqrt.masked_fill_(deg_inv_sqrt == float('inf'), 0.)
adj_t = mul(adj_t, deg_inv_sqrt.view(-1, 1))
adj_t = mul(adj_t, deg_inv_sqrt.view(1, -1))
return adj_t

else:
num_nodes = maybe_num_nodes(edge_index, num_nodes)

if edge_weight is None:
edge_weight = torch.ones((edge_index.size(1), ), dtype=dtype,
device=edge_index.device)

if add_self_loops:
edge_index, tmp_edge_weight = add_remaining_self_loops(
edge_index, edge_weight, fill_value, num_nodes)
assert tmp_edge_weight is not None
edge_weight = tmp_edge_weight

row, col = edge_index[0], edge_index[1]
deg = scatter_add(edge_weight, col, dim=0, dim_size=num_nodes)
deg_inv_sqrt = deg.pow_(-0.5)
deg_inv_sqrt.masked_fill_(deg_inv_sqrt == float('inf'), 0)
return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]



class GCNConv(MessagePassing):
r"""The graph convolutional operator from the `"Semi-supervised
Classification with Graph Convolutional Networks"
<https://arxiv.org/abs/1609.02907>`_ paper

.. math::
\mathbf{X}^{\prime} = \mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}}
\mathbf{\hat{D}}^{-1/2} \mathbf{X} \mathbf{\Theta},

where :math:`\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}` denotes the
adjacency matrix with inserted self-loops and
:math:`\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}` its diagonal degree matrix.

Args:
in_channels (int): Size of each input sample.
out_channels (int): Size of each output sample.
improved (bool, optional): If set to :obj:`True`, the layer computes
:math:`\mathbf{\hat{A}}` as :math:`\mathbf{A} + 2\mathbf{I}`.
(default: :obj:`False`)
cached (bool, optional): If set to :obj:`True`, the layer will cache
the computation of :math:`\mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}}
\mathbf{\hat{D}}^{-1/2}` on first execution, and will use the
cached version for further executions.
This parameter should only be set to :obj:`True` in transductive
learning scenarios. (default: :obj:`False`)
bias (bool, optional): If set to :obj:`False`, the layer will not learn
an additive bias. (default: :obj:`True`)
normalize (bool, optional): Whether to add self-loops and apply
symmetric normalization. (default: :obj:`True`)
**kwargs (optional): Additional arguments of
:class:`torch_geometric.nn.conv.MessagePassing`.
"""

def __init__(self, in_channels, out_channels, improved=False, cached=False,
bias=True, add_self_loops: bool = True, normalize=True, **kwargs):
super(GCNConv, self).__init__(aggr='add', **kwargs)

self.in_channels = in_channels
self.out_channels = out_channels
self.improved = improved
self.cached = cached
self.normalize = normalize

self.weight = Parameter(torch.Tensor(in_channels, out_channels))

if bias:
self.bias = Parameter(torch.tensor(out_channels, dtype=torch.float32))
else:
self.register_parameter('bias', None)

self.reset_parameters()

def reset_parameters(self):
glorot(self.weight)
zeros(self.bias)
self.cached_result = None
self.cached_num_edges = None

# 原来的版本
# @staticmethod
# def norm(edge_index, num_nodes, edge_weight=None, improved=False,
# dtype=None):
# if edge_weight is None:
# edge_weight = torch.ones((edge_index.size(1), ), dtype=dtype,
# device=edge_index.device)

# fill_value = 1 if not improved else 2
# # """Here I removed the self-loop because the self-loop already added in the att_coef function"""
# # edge_index, edge_weight = add_remaining_self_loops(
# # edge_index, edge_weight, fill_value, num_nodes)

# row, col = edge_index
# deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
# deg_inv_sqrt = deg.pow(-0.5)
# deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0

# return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]

# for GNNGuard
@staticmethod
def norm(edge_index, num_nodes, edge_weight=None, improved=False,
dtype=None):
if edge_weight is None:
edge_weight = torch.ones((edge_index.size(1), ), dtype=dtype,
device=edge_index.device)
edge_weight = edge_weight.to(edge_index.device)
fill_value = 1 if not improved else 2
# """Here I removed the self-loop because the self-loop already added in the att_coef function"""
# edge_index, edge_weight = add_remaining_self_loops(
# edge_index, edge_weight, fill_value, num_nodes)

row, col = edge_index # for GNNGuard
# row, col = edge_index[0], edge_index[1]
deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
deg_inv_sqrt = deg.pow(-0.5)
deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0

return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]


def forward(self, x, edge_index, edge_weight=None):
""""""
x = torch.matmul(x, self.weight)

if self.cached and self.cached_result is not None:
if edge_index.size(1) != self.cached_num_edges:
raise RuntimeError(
'Cached {} number of edges, but found {}. Please '
'disable the caching behavior of this layer by removing '
'the `cached=True` argument in its constructor.'.format(
self.cached_num_edges, edge_index.size(1)))
# edge_index = to_undirected(edge_index, x.size(0)) # add non-direct edges
if not self.cached or self.cached_result is None:
self.cached_num_edges = edge_index.size(1)
if self.normalize:
edge_index, norm = self.norm(edge_index, x.size(0), edge_weight, self.improved, x.dtype)
else:
norm = edge_weight
self.cached_result = edge_index, norm

edge_index, norm = self.cached_result

return self.propagate(edge_index, x=x, norm=norm)

def message(self, x_j, norm):
return norm.view(-1, 1) * x_j

def update(self, aggr_out):
if self.bias is not None:
aggr_out = aggr_out + self.bias
return aggr_out

def __repr__(self):
return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
self.out_channels)

+ 204
- 0
docs/docfile/tutorial/t_homo_link_prediction.rst View File

@@ -0,0 +1,204 @@
==========================
Link Prediction Model
==========================

Building Link Prediction Modules
=====================================

In AutoGL, we support three models for link prediction models, ``gcn``, ``gat`` and ``sage``.

AutoLinkPredictor
>>>>>>>

Used to automatically solve the link prediction problems. For example,


.. code-block:: python

class AutoGCN(BaseAutoModel):
r"""
AutoGCN.
The model used in this automodel is GCN, i.e., the graph convolutional network from the
`"Semi-supervised Classification with Graph Convolutional
Networks" <https://arxiv.org/abs/1609.02907>`_ paper. The layer is

.. math::

\mathbf{X}^{\prime} = \mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}}
\mathbf{\hat{D}}^{-1/2} \mathbf{X} \mathbf{\Theta},

where :math:`\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}` denotes the
adjacency matrix with inserted self-loops and
:math:`\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}` its diagonal degree matrix.

Parameters
----------
num_features: ``int``
The dimension of features.

num_classes: ``int``
The number of classes.

device: ``torch.device`` or ``str``
The device where model will be running on.

init: `bool`.
If True(False), the model will (not) be initialized.
"""

def __init__(
self,
num_features: int = ...,
num_classes: int = ...,
device: _typing.Union[str, torch.device] = ...,
**kwargs
) -> None:
super().__init__(num_features, num_classes, device, **kwargs)
self.hyper_parameter_space = [
{
"parameterName": "add_self_loops",
"type": "CATEGORICAL",
"feasiblePoints": [1],
},
{
"parameterName": "normalize",
"type": "CATEGORICAL",
"feasiblePoints": [1],
},
{
"parameterName": "num_layers",
"type": "DISCRETE",
"feasiblePoints": "2,3,4",
},
{
"parameterName": "hidden",
"type": "NUMERICAL_LIST",
"numericalType": "INTEGER",
"length": 3,
"minValue": [8, 8, 8],
"maxValue": [128, 128, 128],
"scalingType": "LOG",
"cutPara": ("num_layers",),
"cutFunc": lambda x: x[0] - 1,
},
{
"parameterName": "dropout",
"type": "DOUBLE",
"maxValue": 0.8,
"minValue": 0.2,
"scalingType": "LINEAR",
},
{
"parameterName": "act",
"type": "CATEGORICAL",
"feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
},
]

self.hyper_parameters = {
"num_layers": 3,
"hidden": [128, 64],
"dropout": 0,
"act": "relu",
}

def _initialize(self):
self._model = GCN(
self.input_dimension,
self.output_dimension,
self.hyper_parameters.get("hidden"),
self.hyper_parameters.get("act"),
self.hyper_parameters.get("dropout", None),
bool(self.hyper_parameters.get("add_self_loops", True)),
bool(self.hyper_parameters.get("normalize", True)),
).to(self.device)

You could get define your own ``LinkPrediction`` model by using ``from_hyper_parameter`` function and specify the hyperpameryers.

.. code-block:: python

# pyg version
from autogl.module.model.pyg import AutoLinkPredictor
# from autogl.module.model.dgl import AutoLinkPredictor # dgl version
model = AutoLinkPredictor(
feature_module="NormalizeFeatures",
graph_models=(args.model, ),
hpo_module="random",
ensemble_module=None,
max_evals=1,
trainer_hp_space=fixed(**{
"max_epoch": 100,
"early_stopping_round": 101,
"lr": 1e-2,
"weight_decay": 0.0,
}),
model_hp_spaces=[{"encoder": fixed(**model_hp), "decoder": fixed(**decoder_hp)}]
).model

Then you can train the model for 100 epochs.

.. code-block:: python

import torch.nn.functional as F

# Define the loss optimizer.
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Training
for epoch in range(100):
model.train()
optimizer.zero_grad()

z = model.lp_encode(splitted[0])
link_logits = model.lp_decode(
z, torch.stack(splitted[1].edges()), torch.stack(splitted[2].edges())
)
link_labels = get_link_labels(
torch.stack(splitted[1].edges()), torch.stack(splitted[2].edges())
)
loss = F.binary_cross_entropy_with_logits(link_logits, link_labels)
loss.backward()
optimizer.step()

auc_val = evaluate(model, splitted, "val")

if auc_val > best_auc:
best_auc = auc_val
best_parameters = pickle.dumps(model.state_dict())

Finally, evaluate the trained model.

.. code-block:: python

model.load_state_dict(pickle.loads(best_parameters))
evaluate(model, splitted, "test")


Automatic Search for Link Prediction Tasks
===============================================

In AutoGL, we also provide a high-level API Solver to control the overall pipeline.
We encapsulated the training process in the Building GNN Modules part for link prediction tasks
in the solver ``AutoLinkPredictor`` that supports automatic hyperparametric optimization
as well as feature engineering and ensemble. In this part, we will show you how to use
``AutoLinkPredictor``.

.. code-block:: python

solver = AutoLinkPredictor(
feature_module="NormalizeFeatures",
graph_models=(args.model, ),
hpo_module="random",
ensemble_module=None,
max_evals=1,
trainer_hp_space=fixed(**{
"max_epoch": 100,
"early_stopping_round": 101,
"lr": 1e-2,
"weight_decay": 0.0,
}),
model_hp_spaces=[{"encoder": fixed(**model_hp), "decoder": fixed(**decoder_hp)}]
)
solver.fit(dataset, train_split=0.85, val_split=0.05, evaluation_method=["auc"], seed=seed)
pre = solver.evaluate(metric="auc")

+ 212
- 0
docs/docfile/tutorial_cn/t_homo_link_prediction.rst View File

@@ -0,0 +1,212 @@
: _homo_cn:

==========================
链接预测模型
==========================

构建链接预测模块
=====================================
.. In AutoGL, we support two graph classification models, ``gin`` and ``topk``.
在AutoGL中,我们支持三种链接预测模型: ``gcn``, ``gat`` and ``sage`` 。

AutoLinkPredictor
>>>>>>>

.. Used to automatically solve the link prediction problems. For example,
用于自动解决链路预测问题。例如,

.. code-block:: python

class AutoGCN(BaseAutoModel):
r"""
AutoGCN.
The model used in this automodel is GCN, i.e., the graph convolutional network from the
`"Semi-supervised Classification with Graph Convolutional
Networks" <https://arxiv.org/abs/1609.02907>`_ paper. The layer is

.. math::

\mathbf{X}^{\prime} = \mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}}
\mathbf{\hat{D}}^{-1/2} \mathbf{X} \mathbf{\Theta},

where :math:`\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}` denotes the
adjacency matrix with inserted self-loops and
:math:`\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}` its diagonal degree matrix.

Parameters
----------
num_features: ``int``
The dimension of features.

num_classes: ``int``
The number of classes.

device: ``torch.device`` or ``str``
The device where model will be running on.

init: `bool`.
If True(False), the model will (not) be initialized.
"""

def __init__(
self,
num_features: int = ...,
num_classes: int = ...,
device: _typing.Union[str, torch.device] = ...,
**kwargs
) -> None:
super().__init__(num_features, num_classes, device, **kwargs)
self.hyper_parameter_space = [
{
"parameterName": "add_self_loops",
"type": "CATEGORICAL",
"feasiblePoints": [1],
},
{
"parameterName": "normalize",
"type": "CATEGORICAL",
"feasiblePoints": [1],
},
{
"parameterName": "num_layers",
"type": "DISCRETE",
"feasiblePoints": "2,3,4",
},
{
"parameterName": "hidden",
"type": "NUMERICAL_LIST",
"numericalType": "INTEGER",
"length": 3,
"minValue": [8, 8, 8],
"maxValue": [128, 128, 128],
"scalingType": "LOG",
"cutPara": ("num_layers",),
"cutFunc": lambda x: x[0] - 1,
},
{
"parameterName": "dropout",
"type": "DOUBLE",
"maxValue": 0.8,
"minValue": 0.2,
"scalingType": "LINEAR",
},
{
"parameterName": "act",
"type": "CATEGORICAL",
"feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
},
]

self.hyper_parameters = {
"num_layers": 3,
"hidden": [128, 64],
"dropout": 0,
"act": "relu",
}

def _initialize(self):
self._model = GCN(
self.input_dimension,
self.output_dimension,
self.hyper_parameters.get("hidden"),
self.hyper_parameters.get("act"),
self.hyper_parameters.get("dropout", None),
bool(self.hyper_parameters.get("add_self_loops", True)),
bool(self.hyper_parameters.get("normalize", True)),
).to(self.device)


.. You could get define your own ``LinkPrediction`` model by using ``from_hyper_parameter`` function and specify the hyperpameryers.
You could get define your own ``gin`` model by using ``from_hyper_parameter`` function and specify the hyperpameryers.
你可以通过使用 ``from_hyper_parameter`` 函数定义你自己的 ``LinkPrediction`` 模型,并对其指定超参数。

.. code-block:: python

# pyg version
from autogl.module.model.pyg import AutoLinkPredictor
# from autogl.module.model.dgl import AutoLinkPredictor # dgl version
model = AutoLinkPredictor(
feature_module="NormalizeFeatures",
graph_models=(args.model, ),
hpo_module="random",
ensemble_module=None,
max_evals=1,
trainer_hp_space=fixed(**{
"max_epoch": 100,
"early_stopping_round": 101,
"lr": 1e-2,
"weight_decay": 0.0,
}),
model_hp_spaces=[{"encoder": fixed(**model_hp), "decoder": fixed(**decoder_hp)}]
).model



.. Then you can train the model for 100 epochs.
然后你可以对模型进行100次的训练:

.. code-block:: python

import torch.nn.functional as F

# Define the loss optimizer.
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Training
for epoch in range(100):
model.train()
optimizer.zero_grad()

z = model.lp_encode(splitted[0])
link_logits = model.lp_decode(
z, torch.stack(splitted[1].edges()), torch.stack(splitted[2].edges())
)
link_labels = get_link_labels(
torch.stack(splitted[1].edges()), torch.stack(splitted[2].edges())
)
loss = F.binary_cross_entropy_with_logits(link_logits, link_labels)
loss.backward()
optimizer.step()

auc_val = evaluate(model, splitted, "val")

if auc_val > best_auc:
best_auc = auc_val
best_parameters = pickle.dumps(model.state_dict())


.. Finally, evaluate the trained model.
最后,你可以评估该模型:

.. code-block:: python

model.load_state_dict(pickle.loads(best_parameters))
evaluate(model, splitted, "test")

.. Automatic Search for Link Prediction Tasks
链接预测任务的自动搜索
===============================================

.. In AutoGL, we also provide a high-level API Solver to control the overall pipeline. We encapsulated the training process in the Building GNN Modules part for link prediction tasks in the solver ``AutoLinkPredictor`` that supports automatic hyperparametric optimization as well as feature engineering and ensemble. In this part, we will show you how to use ``AutoLinkPredictor``.
在AutoGL中,我们还提供了一个高级的API求解器来控制整个流水线。我们将构建图神经网络模块部分的训练过程封装在求解器 ``AutoLinkPredictor`` 中以用于图分类任务,它支持自动超参数优化,特征工程及集成。
在这一部分,我们提供了一个例子来指导如何使用 ``AutoLinkPredictor`` :

.. code-block:: python

solver = AutoLinkPredictor(
feature_module="NormalizeFeatures",
graph_models=(args.model, ),
hpo_module="random",
ensemble_module=None,
max_evals=1,
trainer_hp_space=fixed(**{
"max_epoch": 100,
"early_stopping_round": 101,
"lr": 1e-2,
"weight_decay": 0.0,
}),
model_hp_spaces=[{"encoder": fixed(**model_hp), "decoder": fixed(**decoder_hp)}]
)
solver.fit(dataset, train_split=0.85, val_split=0.05, evaluation_method=["auc"], seed=seed)
pre = solver.evaluate(metric="auc")

+ 0
- 130
test/performance/robust/model_gcnsvd.py View File

@@ -1,130 +0,0 @@
import os
import pickle
from torchaudio import datasets
from tqdm import tqdm
import torch
import numpy as np
import torch.nn.functional as F
import torch.optim as optim
from deeprobust.graph.defense import GCNSVD
from deeprobust.graph.utils import *
from deeprobust.graph.data import Dataset, PrePtbDataset, Dpr2Pyg, AmazonPyg
import argparse

os.environ["AUTOGL_BACKEND"] = "pyg"


from autogl.module.model.pyg import AutoGCNSVD
from autogl.solver.utils import set_seed

def test(model, data, mask):
model.eval()

if hasattr(model, 'cls_forward'):
out = model.cls_forward(data)[mask]
else:
out = model(data)[mask]
pred = out.max(1)[1]
acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
return acc

def train(model, data, args):
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
print(data)
parameters = model.state_dict()
best_acc = 0.
for epoch in range(args.epoch):
model.train()
optimizer.zero_grad()
if hasattr(model, 'cls_forward'):
output = model.cls_forward(data)
else:
output = model(data)
loss = F.nll_loss(output[data.train_mask], data.y[data.train_mask])
loss.backward()
optimizer.step()

val_acc = test(model, data, data.val_mask)
if val_acc > best_acc:
best_acc = val_acc
parameters = pickle.dumps(model.state_dict())
model.load_state_dict(pickle.loads(parameters))
return model


if __name__ == '__main__':

parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser('pyg model')
parser.add_argument('--seed', type=int, default=15, help='Random seed.')
parser.add_argument('--dataset', type=str, default='cora', choices=['cora', 'cora_ml', 'citeseer', 'polblogs', 'pubmed'], help='dataset')
parser.add_argument('--ptb_rate', type=float, default=0.05, help='pertubation rate')
parser.add_argument('--k', type=int, default=15, help='Truncated Components.')
parser.add_argument('--repeat', type=int, default=1)
parser.add_argument('--device', type=str, default='cuda:0')
parser.add_argument('--lr', type=float, default=0.01)
parser.add_argument('--weight_decay', type=float, default=0.0)
parser.add_argument('--epoch', type=int, default=200)

args = parser.parse_args()
args.cuda = torch.cuda.is_available()
print('cuda: %s' % args.cuda)

# make sure you use the same data splits as you generated attacks
np.random.seed(args.seed)
if args.cuda:
torch.cuda.manual_seed(args.seed)

# Here the random seed is to split the train/val/test data,
# we need to set the random seed to be the same as that when you generate the perturbed graph
# data = Dataset(root='/tmp/', name=args.dataset, setting='nettack', seed=15)
# Or we can just use setting='prognn' to get the splits
data = Dataset(root='/tmp/', name=args.dataset, setting='prognn')
adj, features, labels = data.adj, data.features, data.labels
idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
pyg_data = Dpr2Pyg(data).process().to(args.device)
pyg_data.num_classes = len(set(labels))

# load pre-attacked graph
perturbed_data = PrePtbDataset(root='/tmp/',
name=args.dataset,
attack_method='meta',
ptb_rate=args.ptb_rate)
perturbed_adj = perturbed_data.adj

print('=== testing GCN-SVD on perturbed graph (AutoGL) ===')
model_hp = {
"num_layers": 2,
"hidden": [16],
"dropout": 0.5,
"act": "relu"
}
accs = []
for seed in tqdm(range(args.repeat)):
set_seed(seed)
model = AutoGCNSVD(
num_features=pyg_data.num_node_features,
num_classes=pyg_data.num_classes,
device=args.device,
init=False
).from_hyper_parameter(model_hp).model
model.to(args.device)

train(model, pyg_data, args)
acc = test(model, pyg_data, pyg_data.test_mask)
accs.append(acc)
print('{:.4f} ~ {:.4f}'.format(np.mean(accs), np.std(accs)))


print('=== testing GCN-SVD on perturbed graph (deeprobust)===')
model = GCNSVD(nfeat=features.shape[1], nclass=labels.max()+1,
nhid=16, device=args.device)

model = model.to(args.device)
# Test set results: loss= 0.8541 accuracy= 0.7067
model.fit(features, perturbed_adj, labels, idx_train, idx_val, k=args.k, verbose=True)
model.eval()
output = model.test(idx_test)
print(output)

+ 0
- 192
test/performance/robust/model_gnnguard.py View File

@@ -1,192 +0,0 @@
import os
import torch
# import sys
# sys.path.insert(0, '/n/scratch2/xz204/Dr37/lib/python3.7/site-packages')
from deeprobust.graph.targeted_attack import Nettack
from deeprobust.graph.utils import *
from deeprobust.graph.data import Dataset
import argparse
# from deeprobust.graph.defense import * # GCN, GAT, GIN, JK, GCN_attack,accuracy_1
from deeprobust.graph.defense import *
from deeprobust.graph.data import Dataset, PrePtbDataset, Dpr2Pyg, AmazonPyg
from tqdm import tqdm
import scipy
import numpy as np
from sklearn.preprocessing import normalize
import pickle

os.environ["AUTOGL_BACKEND"] = "pyg"

from autogl.module.model.pyg import AutoGNNGuard
from autogl.solver.utils import set_seed

parser = argparse.ArgumentParser()
parser.add_argument('--seed', type=int, default=14, help='Random seed.')
# cora and citeseer are binary, pubmed has not binary features
parser.add_argument('--dataset', type=str, default='citeseer', choices=['cora', 'cora_ml', 'citeseer', 'polblogs', 'pubmed'], help='dataset')
parser.add_argument('--ptb_rate', type=float, default=0.05, help='pertubation rate')
parser.add_argument('--modelname', type=str, default='GCN', choices=['GCN', 'GAT','GIN', 'JK'])
parser.add_argument('--defensemodel', type=str, default='GCNJaccard', choices=['GCNJaccard', 'RGCN', 'GCNSVD'])
parser.add_argument('--DPlabel', type=int, default=9, help='0-10')

args = parser.parse_args()
args.cuda = torch.cuda.is_available()
print('cuda: %s' % args.cuda)
args.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def seed_torch(seed=1029):
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
# torch.use_deterministic_algorithms(True)
seed_torch(args.seed)

args.dataset = "cora"
args.modelname = "GCN"

data = Dataset(root='/tmp/', name=args.dataset)
adj, features, labels = data.adj, data.features, data.labels
if scipy.sparse.issparse(features)==False:
features = scipy.sparse.csr_matrix(features)
"""set the number of training/val/testing nodes"""
idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
"""add undirected edges, orgn-arxiv is directed graph, we transfer it to undirected closely following
https://ogb.stanford.edu/docs/leader_nodeprop/#ogbn-arxiv
"""
adj = adj + adj.T
adj[adj>1] = 1

pyg_data = Dpr2Pyg(data).process().to(args.device)
pyg_data.num_classes = len(set(labels))


def main(flag):

# Setup Surrogate model
surrogate = GCN_attack(nfeat=features.shape[1], nclass=labels.max().item()+1, n_edge=adj.nonzero()[0].shape[0], nhid=16, dropout=0, with_relu=False, with_bias=False, device=args.device, )
surrogate = surrogate.to(args.device)
surrogate.fit(features, adj, labels, idx_train, train_iters=201) # change this train_iters to 201: train_iters=201

# Setup Attack Model
target_node = 859

model = Nettack(surrogate, nnodes=adj.shape[0], attack_structure=True, attack_features=False, device=args.device)
model = model.to(args.device)

degrees = adj.sum(0).A1
# How many perturbations to perform. Default: Degree of the node
n_perturbations = int(degrees[target_node])

# # indirect attack/ influencer attack
model.attack(features, adj, labels, target_node, n_perturbations, direct=True)
modified_adj = model.modified_adj
modified_features = model.modified_features

print('=== testing GNN on original(clean) graph ===')
test(adj, features, target_node, attention=flag)

print('=== testing GCN on perturbed graph ===')
test(modified_adj, modified_features, target_node,attention=flag)

def test(adj, features, target_node, attention=False):
''
"""test on GCN """
"""model_name could be 'GCN', 'GAT', 'GIN','JK' """
# for orgn-arxiv: nhid =256, layers =3, epoch =500

gcn = globals()[args.modelname](nfeat=features.shape[1], nhid=256, nclass=labels.max().item() + 1, dropout=0.5,
device=args.device)
gcn = gcn.to(args.device)
gcn.fit(features, adj, labels, idx_train, idx_val=idx_val,
idx_test=idx_test,
attention=attention, verbose=True, train_iters=81)
gcn.eval()
_, output = gcn.test(idx_test=idx_test)

probs = torch.exp(output[[target_node]])[0]
print('probs: {}'.format(probs.detach().cpu().numpy()))
acc_test = accuracy(output[idx_test], labels[idx_test])

print("Test set results:",
"accuracy= {:.4f}".format(acc_test.item()))
return acc_test.item()

def main_autogl(flag):
# Setup Surrogate model
surrogate = AutoGNNGuard(
num_features=pyg_data.num_node_features,
num_classes=pyg_data.num_classes,
device=args.device,
init=False
).from_hyper_parameter(model_hp).model
surrogate = surrogate.to(args.device)
surrogate.fit(features, adj, labels, idx_train, train_iters=201) # change this train_iters to 201: train_iters=201

# Setup Attack Model
target_node = 859

model = Nettack(surrogate, nnodes=adj.shape[0], attack_structure=True, attack_features=False, device=args.device)
model = model.to(args.device)

degrees = adj.sum(0).A1
# How many perturbations to perform. Default: Degree of the node
n_perturbations = int(degrees[target_node])

# # indirect attack/ influencer attack
model.attack(features, adj, labels, target_node, n_perturbations, direct=True)
modified_adj = model.modified_adj
modified_features = model.modified_features

print('=== testing GNN on original(clean) graph (AutoGL) ===')
test_autogl(adj, features, target_node, attention=flag)

print('=== testing GCN on perturbed graph (AutoGL) ===')
test_autogl(modified_adj, modified_features, target_node,attention=flag)


def test_autogl(adj, features, target_node, attention=False):
''
"""test on GCN """
"""model_name could be 'GCN', 'GAT', 'GIN','JK' """
# for orgn-arxiv: nhid =256, layers =3, epoch =500

gcn = AutoGNNGuard(
num_features=pyg_data.num_node_features,
num_classes=pyg_data.num_classes,
device=args.device,
init=False
).from_hyper_parameter(model_hp).model
gcn = gcn.to(args.device)
gcn.fit(features, adj, labels, idx_train, idx_val=idx_val,
idx_test=idx_test,
attention=attention, verbose=True, train_iters=81)
gcn.eval()
_, output = gcn.test(idx_test=idx_test)

probs = torch.exp(output[[target_node]])[0]
print('probs: {}'.format(probs.detach().cpu().numpy()))
acc_test = accuracy(output[idx_test], labels[idx_test])

print("Test set results:",
"accuracy= {:.4f}".format(acc_test.item()))
return acc_test.item()

model_hp = {
"num_layers": 2,
"hidden": [16],
"dropout": 0.0,
"act": "relu"
}

if __name__ == '__main__':
# Check the performance of GCN under directed attack without defense
main(flag=False)
# Use GNNGuard for defense
main(flag=True)
# Check the performance of GCN under directed attack without defense(AutoGL)
main_autogl(flag=False)
# Use GNNGuard for defense(AutoGL)
main_autogl(flag=True)

+ 204
- 0
test/performance/robust/model_gnnguard_meta.py View File

@@ -0,0 +1,204 @@
import os
import torch
# import sys
# sys.path.insert(0, '/n/scratch2/xz204/Dr37/lib/python3.7/site-packages')
from deeprobust.graph.targeted_attack import Nettack
from deeprobust.graph.utils import *
from deeprobust.graph.data import Dataset
from deeprobust.graph.global_attack import Metattack
import argparse
# from deeprobust.graph.defense import * # GCN, GAT, GIN, JK, GCN_attack,accuracy_1
from deeprobust.graph.defense import *
from deeprobust.graph.data import Dataset, PrePtbDataset, Dpr2Pyg, AmazonPyg
from scipy.sparse import csr_matrix
from tqdm import tqdm
import scipy
import scipy.sparse as sp
import numpy as np
from sklearn.preprocessing import normalize
import pickle

os.environ["AUTOGL_BACKEND"] = "pyg"

from autogl.module.model.pyg import AutoGNNGuard, AutoGNNGuard_attack
from autogl.solver.utils import set_seed

def seed_torch(seed=1029):
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
# torch.use_deterministic_algorithms(True)
seed_torch(2048)

def main(dataset, adj, features, device):
# from deeprobust.graph.data import PrePtbDataset
# perturbed_data = PrePtbDataset(root='/tmp/', name=dataset, attack_method='meta', ptb_rate=0.2)
# modified_adj = perturbed_data.adj

# Setup Surrogate model
surrogate = GCN_attack(nfeat=features.shape[1], nclass=labels.max().item()+1, n_edge=adj.nonzero()[0].shape[0], nhid=16, dropout=0, with_relu=False, with_bias=False, device=args.device, )

surrogate = surrogate.to(args.device)
surrogate.fit(features, adj, labels, idx_train, train_iters=201) # change this train_iters to 201: train_iters=201

# Setup Attack Model
# model = Metattack(model=surrogate, nnodes=adj.shape[0], feature_shape=features.shape, attack_structure=True, attack_features=False, device=args.device, lambda_=0.5) # lambda_ is used to weight the two objectives in Eq. (10) in the paper.
# model = model.to(args.device)

# """save the mettacked adj"""
# model.attack(features, adj, labels, idx_train, idx_unlabeled, perturbations, ll_constraint=False)
# modified_adj = sp.csr_matrix(model.modified_adj.cpu())

# from deeprobust.graph.data import PrePtbDataset
perturbed_data = PrePtbDataset(root='/tmp/', name=dataset, attack_method='meta', ptb_rate=0.2)
modified_adj = perturbed_data.adj

# Check the performance of GCN under directed attack without defense
flag = False
# print('=== testing GNN on original(clean) graph ===')
# print("acc_test:",test(adj, features, device, attention=flag))
# print('=== testing GCN on perturbed graph ===')
# print("acc_test:",test(modified_adj, features, device, attention=flag))

# Use GNNGuard for defense
flag = True
print('=== testing GNN on original(clean) graph + GNNGuard ===')
print("acc_test:",test(adj, features, device, attention=flag))
print('=== testing GCN on perturbed graph + GNNGuard ===')
print("acc_test:",test(modified_adj, features, device, attention=flag))

def test(adj, features, device, attention):
accs = []
for seed in tqdm(range(5)):

classifier = GCN(nfeat=features.shape[1], nhid=16, nclass=labels.max().item() + 1, dropout=0.5, device=device)

classifier = classifier.to(device)

print(1)
classifier.fit(features, adj, labels, idx_train, train_iters=201,
idx_val=idx_val,
idx_test=idx_test,
verbose=True, attention=attention) # idx_val=idx_val, idx_test=idx_test , model_name=model_name
classifier.eval()

# classifier.fit(features, adj, labels, idx_train, idx_val) # train with validation model picking
acc_test, output = classifier.test(idx_test)
accs.append(acc_test.item())
mean = np.mean(accs)
std = np.std(accs)
return {"mean": mean, "std": std}

def main_autogl(dataset, model_hp, adj, features, device):
# Setup Surrogate model
surrogate = AutoGNNGuard_attack(
num_features=pyg_data.num_node_features,
num_classes=pyg_data.num_classes,
device=args.device,
init=False
).from_hyper_parameter(model_hp).model
surrogate = surrogate.to(args.device)
surrogate.fit(features, adj, labels, idx_train, train_iters=201) # change this train_iters to 201: train_iters=201

# Setup Attack Model
# model = Metattack(model=surrogate, nnodes=adj.shape[0], feature_shape=features.shape, attack_structure=True, attack_features=False, device=args.device, lambda_=0.5) # lambda_ is used to weight the two objectives in Eq. (10) in the paper.
# model = model.to(args.device)

# """save the mettacked adj"""
# model.attack(features, adj, labels, idx_train, idx_unlabeled, perturbations, ll_constraint=False)
# modified_adj = sp.csr_matrix(model.modified_adj.cpu())

# from deeprobust.graph.data import PrePtbDataset
perturbed_data = PrePtbDataset(root='/tmp/', name=dataset, attack_method='meta', ptb_rate=0.2)
modified_adj = perturbed_data.adj

# Check the performance of GCN under directed attack without defense(AutoGL)
# flag = False
# print('=== testing GNN on original(clean) graph (AutoGL) ===')
# print("acc_test:",test_autogl(adj, features, device, attention=flag))
# print('=== testing GCN on perturbed graph (AutoGL) ===')
# print("acc_test:",test_autogl(modified_adj, features, device, attention=flag))

# Use GNNGuard for defense(AutoGL)
flag = True
print('=== testing GNN on original(clean) graph (AutoGL) + GNNGuard ===')
print("acc_test:",test_autogl(adj, features, device, attention=flag))
print('=== testing GCN on perturbed graph (AutoGL) + GNNGuard ===')
print("acc_test:",test_autogl(modified_adj, features, device, attention=flag))

def test_autogl(adj, features, device, attention):
''
"""test on GCN """
"""model_name could be 'GCN', 'GAT', 'GIN','JK' """
accs = []
for seed in tqdm(range(5)):
gcn = AutoGNNGuard(
num_features=pyg_data.num_node_features,
num_classes=pyg_data.num_classes,
device=args.device,
init=False
).from_hyper_parameter(model_hp).model
gcn = gcn.to(device)
gcn.fit(features, adj, labels, idx_train, idx_val=idx_val,
idx_test=idx_test,
attention=attention, verbose=True, train_iters=81)
gcn.eval()
acc_test, output = gcn.test(idx_test=idx_test)
accs.append(acc_test.item())
mean = np.mean(accs)
std = np.std(accs)
return {"mean": mean, "std": std}

if __name__ == '__main__':

model_hp = {
"num_layers": 2,
"hidden": [16],
"dropout": 0.5,
"act": "relu"
}

parser = argparse.ArgumentParser()
parser.add_argument('--seed', type=int, default=14, help='Random seed.')
# cora and citeseer are binary, pubmed has not binary features
parser.add_argument('--dataset', type=str, default='citeseer', choices=['cora', 'cora_ml', 'citeseer', 'polblogs', 'pubmed'], help='dataset')
parser.add_argument('--ptb_rate', type=float, default=0.2, help='pertubation rate')
parser.add_argument('--modelname', type=str, default='GCN', choices=['GCN', 'GAT','GIN', 'JK'])
parser.add_argument('--defensemodel', type=str, default='GCNJaccard', choices=['GCNJaccard', 'RGCN', 'GCNSVD'])
parser.add_argument('--DPlabel', type=int, default=9, help='0-10')

args = parser.parse_args()
args.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('cuda: %s' % args.device)

args.dataset = "pubmed"
args.modelname = "GCN"

data = Dataset(root='/tmp/', name=args.dataset)
pyg_data = Dpr2Pyg(data).process().to(args.device)
pyg_data.num_classes = len(set(data.labels))

adj, features, labels = data.adj, data.features, data.labels
idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
idx_unlabeled = np.union1d(idx_val, idx_test)
if scipy.sparse.issparse(features)==False:
features = scipy.sparse.csr_matrix(features)

perturbations = int(args.ptb_rate * (adj.sum()//2)) ###
adj, features, labels = preprocess(adj, features, labels, preprocess_adj=False)

# to CSR sparse
adj, features = csr_matrix(adj), csr_matrix(features)

"""add undirected edges, orgn-arxiv is directed graph, we transfer it to undirected closely following
https://ogb.stanford.edu/docs/leader_nodeprop/#ogbn-arxiv
"""
adj = adj + adj.T
adj[adj>1] = 1

# main(args.dataset, adj, features, device=args.device)
main_autogl(args.dataset, model_hp, adj, features, device=args.device)

Loading…
Cancel
Save