gnnguard and tutorial for link-prediction

3 years ago · eca9d0567a
--- a/autogl/module/model/pyg/init.py
+++ b/autogl/module/model/pyg/init.py
@@ -9,8 +9,7 @@ from .gcn import AutoGCN
 from .gat import AutoGAT
 from .gin import AutoGIN

 from .robust.gcn_svd import AutoGCNSVD
 from .robust.gnnguard import AutoGNNGuard, GCN4GNNGuard
 from .robust.gnnguard import AutoGNNGuard, AutoGNNGuard_attack, GCN4GNNGuard, GCN4GNNGuard_attack

 __all__ = [
    "ModelUniversalRegistry",
@@ -22,7 +21,8 @@ __all__ = [
    "AutoGCN",
    "AutoGAT",
    "AutoGIN",
    "AutoGCNSVD",
    "AutoGNNGuard",
    "AutoGNNGuard_attack",
    "GCN4GNNGuard",
    "GCN4GNNGuard_attack",
 ]
--- a/autogl/module/model/pyg/robust/gcn_svd.py
+++ b/autogl/module/model/pyg/robust/gcn_svd.py
@@ -1,279 +0,0 @@
 from tkinter import TRUE
 import torch
 import torch.optim as optim
 from torch.nn.parameter import Parameter
 from torch.nn.modules.module import Module
 import torch.nn as nn
 import torch.nn.functional as F
 import typing as _typing
 import math
 from tqdm import tqdm
 import scipy.sparse as sp
 import numpy as np
 from copy import deepcopy
 from numba import njit

 from .. import register_model
 from . import utils
 from ..gcn import GCN
 from ..base import BaseAutoModel
 from .....utils import get_logger

 LOGGER = get_logger("GCNSVDModel")


 ### ========================== ###

 class GCN4Robust(GCN):
    # 在已有gcn的基础上增加robust的部分
    def __init__(self, nfeat, nclass, nhid, activation, dropout=0.5, lr=0.01, weight_decay=5e-4, with_relu=True, with_bias=True, add_self_loops = True, normalize = TRUE):
        super(GCN4Robust, self).__init__(nfeat, nclass, nhid, activation, dropout=dropout, add_self_loops = add_self_loops, normalize = normalize)
    
    def fit(self, features, adj, labels, idx_train, idx_val=None, train_iters=200, initialize=True, verbose=False, normalize=True, patience=500, **kwargs):
        self.device = self.gc1.weight.device
        if initialize:
            self.initialize()

        if type(adj) is not torch.Tensor:
            features, adj, labels = utils.to_tensor(features, adj, labels, device=self.device)
        else:
            features = features.to(self.device)
            adj = adj.to(self.device)
            labels = labels.to(self.device)

        if normalize:
            if utils.is_sparse_tensor(adj):
                adj_norm = utils.normalize_adj_tensor(adj, sparse=True)
            else:
                adj_norm = utils.normalize_adj_tensor(adj)
        else:
            adj_norm = adj

        self.adj_norm = adj_norm
        self.features = features
        self.labels = labels

        if idx_val is None:
            self._train_without_val(labels, idx_train, train_iters, verbose)
        else:
            if patience < train_iters:
                self._train_with_early_stopping(labels, idx_train, idx_val, train_iters, patience, verbose)
            else:
                self._train_with_val(labels, idx_train, idx_val, train_iters, verbose)

    def _train_without_val(self, labels, idx_train, train_iters, verbose):
        self.train()
        optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)
        for i in range(train_iters):
            optimizer.zero_grad()
            output = self.forward(self.features, self.adj_norm)
            loss_train = F.nll_loss(output[idx_train], labels[idx_train])
            loss_train.backward()
            optimizer.step()
            if verbose and i % 10 == 0:
                print('Epoch {}, training loss: {}'.format(i, loss_train.item()))

        self.eval()
        output = self.forward(self.features, self.adj_norm)
        self.output = output

    def _train_with_val(self, labels, idx_train, idx_val, train_iters, verbose):
        if verbose:
            print('=== training gcn model ===')
        optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)

        best_loss_val = 100
        best_acc_val = 0

        for i in range(train_iters):
            self.train()
            optimizer.zero_grad()
            output = self.forward(self.features, self.adj_norm)
            loss_train = F.nll_loss(output[idx_train], labels[idx_train])
            loss_train.backward()
            optimizer.step()

            if verbose and i % 10 == 0:
                print('Epoch {}, training loss: {}'.format(i, loss_train.item()))

            self.eval()
            output = self.forward(self.features, self.adj_norm)
            loss_val = F.nll_loss(output[idx_val], labels[idx_val])
            acc_val = utils.accuracy(output[idx_val], labels[idx_val])

            if best_loss_val > loss_val:
                best_loss_val = loss_val
                self.output = output
                weights = deepcopy(self.state_dict())

            if acc_val > best_acc_val:
                best_acc_val = acc_val
                self.output = output
                weights = deepcopy(self.state_dict())

        if verbose:
            print('=== picking the best model according to the performance on validation ===')
        self.load_state_dict(weights)

    def _train_with_early_stopping(self, labels, idx_train, idx_val, train_iters, patience, verbose):
        if verbose:
            print('=== training gcn model ===')
        optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)

        early_stopping = patience
        best_loss_val = 100

        for i in range(train_iters):
            self.train()
            optimizer.zero_grad()
            output = self.forward(self.features, self.adj_norm)
            loss_train = F.nll_loss(output[idx_train], labels[idx_train])
            loss_train.backward()
            optimizer.step()

            if verbose and i % 10 == 0:
                print('Epoch {}, training loss: {}'.format(i, loss_train.item()))

            self.eval()
            output = self.forward(self.features, self.adj_norm)

            # def eval_class(output, labels):
            #     preds = output.max(1)[1].type_as(labels)
            #     return f1_score(labels.cpu().numpy(), preds.cpu().numpy(), average='micro') + \
            #         f1_score(labels.cpu().numpy(), preds.cpu().numpy(), average='macro')

            # perf_sum = eval_class(output[idx_val], labels[idx_val])
            loss_val = F.nll_loss(output[idx_val], labels[idx_val])

            if best_loss_val > loss_val:
                best_loss_val = loss_val
                self.output = output
                weights = deepcopy(self.state_dict())
                patience = early_stopping
            else:
                patience -= 1
            if i > early_stopping and patience <= 0:
                break

        if verbose:
             print('=== early stopping at {0}, loss_val = {1} ==='.format(i, best_loss_val) )
        self.load_state_dict(weights)

 class GCNSVD(GCN4Robust):
    def __init__(self, nfeat, nclass, nhid, activation, dropout=0.5, lr=0.01, weight_decay=5e-4, with_relu=True, with_bias=True, add_self_loops = True, normalize = True):
        super(GCNSVD, self).__init__(nfeat, nclass, nhid, activation, dropout, lr, weight_decay, with_relu, with_bias, add_self_loops, normalize)

    def fit(self, features, adj, labels, idx_train, idx_val=None, k=50, train_iters=200, initialize=True, verbose=True, **kwargs):
        modified_adj = self.truncatedSVD(adj, k=k)
        self.k = k
        # modified_adj_tensor = utils.sparse_mx_to_torch_sparse_tensor(self.modified_adj)
        features, modified_adj, labels = utils.to_tensor(features, modified_adj, labels, device=self.device)

        self.modified_adj = modified_adj
        self.features = features
        self.labels = labels
        super().fit(features, modified_adj, labels, idx_train, idx_val, train_iters=train_iters, initialize=initialize, verbose=verbose)

    def truncatedSVD(self, data, k=50):
        print('=== GCN-SVD: rank={} ==='.format(k))
        if sp.issparse(data):
            data = data.asfptype()
            U, S, V = sp.linalg.svds(data, k=k)
            print("rank_after = {}".format(len(S.nonzero()[0])))
            diag_S = np.diag(S)
        else:
            U, S, V = np.linalg.svd(data)
            U = U[:, :k]
            S = S[:k]
            V = V[:k, :]
            print("rank_before = {}".format(len(S.nonzero()[0])))
            diag_S = np.diag(S)
            print("rank_after = {}".format(len(diag_S.nonzero()[0])))

        return U @ diag_S @ V

    def predict(self, features=None, adj=None):

        self.eval()
        if features is None and adj is None:
            return self.forward(self.features, self.adj_norm)
        else:
            adj = self.truncatedSVD(adj, k=self.k)
            if type(adj) is not torch.Tensor:
                features, adj = utils.to_tensor(features, adj, device=self.device)

            self.features = features
            if utils.is_sparse_tensor(adj):
                self.adj_norm = utils.normalize_adj_tensor(adj, sparse=True)
            else:
                self.adj_norm = utils.normalize_adj_tensor(adj)
            return self.forward(self.features, self.adj_norm)

@register_model("gcnsvd-model")
 class AutoGCNSVD(BaseAutoModel):
    def __init__(
        self,
        num_features: int = ...,
        num_classes: int = ...,
        device: _typing.Union[str, torch.device] = ...,
        **kwargs
    ) -> None:
        super().__init__(num_features, num_classes, device, **kwargs)
        self.hyper_parameter_space = [
            {
                "parameterName": "add_self_loops",
                "type": "CATEGORICAL",
                "feasiblePoints": [1],
            },
            {
                "parameterName": "normalize",
                "type": "CATEGORICAL",
                "feasiblePoints": [1],
            },
            {
                "parameterName": "num_layers",
                "type": "DISCRETE",
                "feasiblePoints": "2,3,4",
            },
            {
                "parameterName": "hidden",
                "type": "NUMERICAL_LIST",
                "numericalType": "INTEGER",
                "length": 3,
                "minValue": [8, 8, 8],
                "maxValue": [128, 128, 128],
                "scalingType": "LOG",
                "cutPara": ("num_layers",),
                "cutFunc": lambda x: x[0] - 1,
            },
            {
                "parameterName": "dropout",
                "type": "DOUBLE",
                "maxValue": 0.8,
                "minValue": 0.2,
                "scalingType": "LINEAR",
            },
            {
                "parameterName": "act",
                "type": "CATEGORICAL",
                "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
            },
        ]

        self.hyper_parameters = {
            "num_layers": 3,
            "hidden": [128, 64],
            "dropout": 0,
            "act": "relu",
        }

    def _initialize(self):
        self._model = GCNSVD(
            self.input_dimension,
            self.output_dimension,
            self.hyper_parameters.get("hidden"),
            self.hyper_parameters.get("act"),
            self.hyper_parameters.get("dropout", None),
            bool(self.hyper_parameters.get("add_self_loops", True)),
            bool(self.hyper_parameters.get("normalize", True)),
        ).to(self.device)
--- a/autogl/module/model/pyg/robust/gnnguard.py
+++ b/autogl/module/model/pyg/robust/gnnguard.py
@@ -39,6 +39,12 @@ class GCN4GNNGuard(GCN):
        self.nfeat = nfeat
        self.hidden_sizes = nhid
        self.drop = drop
        if not with_relu:
            self.weight_decay = 0
        else:
            self.weight_decay = weight_decay
        self.with_relu = with_relu
        self.with_bias = with_bias

        self.gc1 = GCNConv(nfeat, nhid[0], bias=True,)
        self.gc2 = GCNConv(nhid[0], nclass, bias=True, )
@@ -345,6 +351,235 @@ class GCN4GNNGuard(GCN):
                self.adj_norm = utils.normalize_adj_tensor(adj)
            return self.forward(self.features, self.adj_norm)

 class GCN4GNNGuard_attack(GCN):
    # Based on the existing GCN, add the robust part.
    def __init__(self, nfeat, nclass, nhid, activation, dropout=0.5, lr=0.01, drop=False, weight_decay=5e-4, with_relu=True, with_bias=True, add_self_loops = True, normalize = True):
        super(GCN4GNNGuard_attack, self).__init__(nfeat, nclass, nhid, activation, dropout=dropout, add_self_loops = add_self_loops, normalize = normalize)

        self.lr = lr
        self.weight_decay = weight_decay
        self.dropout = dropout
        self.nclass = nclass
        self.nfeat = nfeat
        self.hidden_sizes = nhid
        self.drop = drop
        if not with_relu:
            self.weight_decay = 0
        else:
            self.weight_decay = weight_decay
        self.with_relu = with_relu
        self.with_bias = with_bias

        self.gc1 = GCNConv(nfeat, nhid[0], bias=True,)
        self.gc2 = GCNConv(nhid[0], nclass, bias=True, )

    def forward(self, x, adj_lil):
        """we don't change the edge_index, just update the edge_weight;
        some edge_weight are regarded as removed if it equals to zero"""
        x = x.to_dense()
        adj = adj_lil.coalesce().indices()
        edge_weight = adj_lil.coalesce().values()

        x = F.relu(self.gc1(x, adj, edge_weight=edge_weight))
        x = F.dropout(x, self.dropout, training=self.training)
        x = self.gc2(x, adj, edge_weight=edge_weight)

        return F.log_softmax(x, dim=1)

    def add_loop_sparse(self, adj, fill_value=1):
        # make identify sparse tensor
        row = torch.range(0, int(adj.shape[0]-1), dtype=torch.int64)
        i = torch.stack((row, row), dim=0)
        v = torch.ones(adj.shape[0], dtype=torch.float32)
        shape = adj.shape
        I_n = torch.sparse.FloatTensor(i, v, shape)
        return adj + I_n.to(self.device)

    def initialize(self):
        self.gc1.reset_parameters()
        self.gc2.reset_parameters()

    def fit(self, features, adj, labels, idx_train, idx_val=None, idx_test=None, train_iters=81, att_0=None, attention=False, model_name=None, initialize=True, verbose=False, normalize=False, patience=510, ):
        '''
            train the gcn model, when idx_val is not None, pick the best model
            according to the validation loss
        '''
        sd = self.state_dict()
        for v in sd.values():
            self.device = v.device
            break

        self.sim = None
        self.attention = attention
        if self.attention:
            att_0 = self.att_coef_1(features, adj)
            adj = att_0 # update adj
            self.sim = att_0 # update att_0

        self.idx_test = idx_test
        
        if initialize:
            self.initialize()

        if type(adj) is not torch.Tensor:
            features, adj, labels = utils.to_tensor(features, adj, labels, device=self.device)
        else:
            features = features.to(self.device)
            adj = adj.to(self.device)
            labels = labels.to(self.device)

        normalize = False # we don't need normalize here, the norm is conducted in the GCN (self.gcn1) model
        if normalize:
            if utils.is_sparse_tensor(adj):
                adj_norm = utils.normalize_adj_tensor(adj, sparse=True)
            else:
                adj_norm = utils.normalize_adj_tensor(adj)
        else:
            adj_norm = adj
        # add self loop
        # adj = self.add_loop_sparse(adj)


        """Make the coefficient D^{-1/2}(A+I)D^{-1/2}"""
        self.adj_norm = adj_norm
        self.features = features
        self.labels = labels

        if idx_val is None:
            self._train_without_val(labels, idx_train, train_iters, verbose)
        else:
            if patience < train_iters:
                self._train_with_early_stopping(labels, idx_train, idx_val, train_iters, patience, verbose)
            else:
                self._train_with_val(labels, idx_train, idx_val, train_iters, verbose)

    def _train_without_val(self, labels, idx_train, train_iters, verbose):
        self.train()
        optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)
        for i in range(train_iters):
            optimizer.zero_grad()
            output = self.forward(self.features, self.adj_norm)
            loss_train = F.nll_loss(output[idx_train], labels[idx_train], weight=None)   # this weight is the weight of each training nodes
            loss_train.backward()
            optimizer.step()
            if verbose and i % 10 == 0:
                print('Epoch {}, training loss: {}'.format(i, loss_train.item()))

        self.eval()
        output = self.forward(self.features, self.adj_norm)
        self.output = output

    def _train_with_val(self, labels, idx_train, idx_val, train_iters, verbose):
        if verbose:
            print('=== training gcn model ===')
        optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)

        best_loss_val = 100
        best_acc_val = 0

        for i in range(train_iters):
            # print('epoch', i)
            self.train()
            optimizer.zero_grad()
            output = self.forward(self.features, self.adj_norm)
            loss_train = F.nll_loss(output[idx_train], labels[idx_train])
            loss_train.backward()
            optimizer.step()

            acc_test =accuracy(output[self.idx_test], labels[self.idx_test])

            self.eval()
            output = self.forward(self.features, self.adj_norm)
            loss_val = F.nll_loss(output[idx_val], labels[idx_val])
            acc_val = utils.accuracy(output[idx_val], labels[idx_val])

            if verbose and i % 200 == 0:
                print('Epoch {}, training loss: {}, test acc: {}'.format(i, loss_train.item(), acc_test))

            if best_loss_val > loss_val:
                best_loss_val = loss_val
                self.output = output
                weights = deepcopy(self.state_dict())

            if acc_val > best_acc_val:
                best_acc_val = acc_val
                self.output = output
                weights = deepcopy(self.state_dict())

        if verbose:
            print('=== picking the best model according to the performance on validation ===')
        self.load_state_dict(weights)
        

    def _train_with_early_stopping(self, labels, idx_train, idx_val, train_iters, patience, verbose):
        if verbose:
            print('=== training gcn model ===')
        optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)

        early_stopping = patience
        best_loss_val = 100

        for i in range(train_iters):
            self.train()
            optimizer.zero_grad()
            output = self.forward(self.features, self.adj_norm)
            loss_train = F.nll_loss(output[idx_train], labels[idx_train])
            loss_train.backward()
            optimizer.step()

            self.eval()
            output = self.forward(self.features, self.adj_norm)

            if verbose and i % 10 == 0:
                print('Epoch {}, training loss: {}'.format(i, loss_train.item()))


            loss_val = F.nll_loss(output[idx_val], labels[idx_val])

            if best_loss_val > loss_val:
                best_loss_val = loss_val
                self.output = output
                weights = deepcopy(self.state_dict())
                patience = early_stopping
            else:
                patience -= 1
            if i > early_stopping and patience <= 0:
                break

        if verbose:
             print('=== early stopping at {0}, loss_val = {1} ==='.format(i, best_loss_val) )
        self.load_state_dict(weights)

    def test(self, idx_test):
        self.eval()
        output = self.predict()  # here use the self.features and self.adj_norm in training stage
        loss_test = F.nll_loss(output[idx_test], self.labels[idx_test])
        acc_test = utils.accuracy(output[idx_test], self.labels[idx_test])
        print("Test set results:",
              "loss= {:.4f}".format(loss_test.item()),
              "accuracy= {:.4f}".format(acc_test.item()))
        return acc_test, output

    def _set_parameters(self):
        # TODO
        pass

    def predict(self, features=None, adj=None):
        '''By default, inputs are unnormalized data'''
        # self.eval()
        if features is None and adj is None:
            return self.forward(self.features, self.adj_norm)
        else:
            if type(adj) is not torch.Tensor:
                features, adj = utils.to_tensor(features, adj, device=self.device)

            self.features = features
            if utils.is_sparse_tensor(adj):
                self.adj_norm = utils.normalize_adj_tensor(adj, sparse=True)
            else:
                self.adj_norm = utils.normalize_adj_tensor(adj)
            return self.forward(self.features, self.adj_norm)


@register_model("gnnguard-model")
 class AutoGNNGuard(BaseAutoModel):
@@ -414,3 +649,72 @@ class AutoGNNGuard(BaseAutoModel):
            add_self_loops = bool(self.hyper_parameters.get("add_self_loops", True)),
            normalize = bool(self.hyper_parameters.get("normalize", True)),
        ).to(self.device)

@register_model("gnnguard-attack-model")
 class AutoGNNGuard_attack(BaseAutoModel):
    def __init__(
        self,
        num_features: int = ...,
        num_classes: int = ...,
        device: _typing.Union[str, torch.device] = ...,
        **kwargs
    ) -> None:
        super().__init__(num_features, num_classes, device, **kwargs)
        self.hyper_parameter_space = [
            {
                "parameterName": "add_self_loops",
                "type": "CATEGORICAL",
                "feasiblePoints": [1],
            },
            {
                "parameterName": "normalize",
                "type": "CATEGORICAL",
                "feasiblePoints": [1],
            },
            {
                "parameterName": "num_layers",
                "type": "DISCRETE",
                "feasiblePoints": "2,3,4",
            },
            {
                "parameterName": "hidden",
                "type": "NUMERICAL_LIST",
                "numericalType": "INTEGER",
                "length": 3,
                "minValue": [8, 8, 8],
                "maxValue": [128, 128, 128],
                "scalingType": "LOG",
                "cutPara": ("num_layers",),
                "cutFunc": lambda x: x[0] - 1,
            },
            {
                "parameterName": "dropout",
                "type": "DOUBLE",
                "maxValue": 0.8,
                "minValue": 0.2,
                "scalingType": "LINEAR",
            },
            {
                "parameterName": "act",
                "type": "CATEGORICAL",
                "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
            },
        ]

        self.hyper_parameters = {
            "num_layers": 3,
            "hidden": [128, 64],
            "dropout": 0,
            "act": "relu",
        }

    def _initialize(self):
        self._model = GCN4GNNGuard_attack(
            nfeat = self.input_dimension,
            nclass = self.output_dimension,
            nhid = self.hyper_parameters.get("hidden"),
            activation = self.hyper_parameters.get("act"),
            dropout = self.hyper_parameters.get("dropout", None),
            add_self_loops = bool(self.hyper_parameters.get("add_self_loops", True)),
            normalize = bool(self.hyper_parameters.get("normalize", True)),
        ).to(self.device)
--- a/autogl/module/model/pyg/robust/nn/conv/gcn_conv.py
+++ b/autogl/module/model/pyg/robust/nn/conv/gcn_conv.py
@@ -0,0 +1,196 @@
 import torch
 from torch.nn import Parameter
 from torch_scatter import scatter_add
 from torch_geometric.nn.conv import MessagePassing
 from torch_geometric.utils import add_remaining_self_loops, to_undirected

 from ..inits import glorot, zeros

@torch.jit._overload
 def gcn_norm(edge_index, edge_weight=None, num_nodes=None, improved=False,
             add_self_loops=True, dtype=None):
    # type: (Tensor, OptTensor, Optional[int], bool, bool, Optional[int]) -> PairTensor  # noqa
    pass


@torch.jit._overload
 def gcn_norm(edge_index, edge_weight=None, num_nodes=None, improved=False,
             add_self_loops=True, dtype=None):
    # type: (SparseTensor, OptTensor, Optional[int], bool, bool, Optional[int]) -> SparseTensor  # noqa
    pass


 def gcn_norm(edge_index, edge_weight=None, num_nodes=None, improved=False,
             add_self_loops=True, dtype=None):

    fill_value = 2. if improved else 1.

    if isinstance(edge_index, SparseTensor):
        adj_t = edge_index
        if not adj_t.has_value():
            adj_t = adj_t.fill_value(1., dtype=dtype)
        if add_self_loops:
            adj_t = fill_diag(adj_t, fill_value)
        deg = sum(adj_t, dim=1)
        deg_inv_sqrt = deg.pow_(-0.5)
        deg_inv_sqrt.masked_fill_(deg_inv_sqrt == float('inf'), 0.)
        adj_t = mul(adj_t, deg_inv_sqrt.view(-1, 1))
        adj_t = mul(adj_t, deg_inv_sqrt.view(1, -1))
        return adj_t

    else:
        num_nodes = maybe_num_nodes(edge_index, num_nodes)

        if edge_weight is None:
            edge_weight = torch.ones((edge_index.size(1), ), dtype=dtype,
                                     device=edge_index.device)

        if add_self_loops:
            edge_index, tmp_edge_weight = add_remaining_self_loops(
                edge_index, edge_weight, fill_value, num_nodes)
            assert tmp_edge_weight is not None
            edge_weight = tmp_edge_weight

        row, col = edge_index[0], edge_index[1]
        deg = scatter_add(edge_weight, col, dim=0, dim_size=num_nodes)
        deg_inv_sqrt = deg.pow_(-0.5)
        deg_inv_sqrt.masked_fill_(deg_inv_sqrt == float('inf'), 0)
        return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]



 class GCNConv(MessagePassing):
    r"""The graph convolutional operator from the `"Semi-supervised
    Classification with Graph Convolutional Networks"
    <https://arxiv.org/abs/1609.02907>`_ paper

    .. math::
        \mathbf{X}^{\prime} = \mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}}
        \mathbf{\hat{D}}^{-1/2} \mathbf{X} \mathbf{\Theta},

    where :math:`\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}` denotes the
    adjacency matrix with inserted self-loops and
    :math:`\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}` its diagonal degree matrix.

    Args:
        in_channels (int): Size of each input sample.
        out_channels (int): Size of each output sample.
        improved (bool, optional): If set to :obj:`True`, the layer computes
            :math:`\mathbf{\hat{A}}` as :math:`\mathbf{A} + 2\mathbf{I}`.
            (default: :obj:`False`)
        cached (bool, optional): If set to :obj:`True`, the layer will cache
            the computation of :math:`\mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}}
            \mathbf{\hat{D}}^{-1/2}` on first execution, and will use the
            cached version for further executions.
            This parameter should only be set to :obj:`True` in transductive
            learning scenarios. (default: :obj:`False`)
        bias (bool, optional): If set to :obj:`False`, the layer will not learn
            an additive bias. (default: :obj:`True`)
        normalize (bool, optional): Whether to add self-loops and apply
            symmetric normalization. (default: :obj:`True`)
        **kwargs (optional): Additional arguments of
            :class:`torch_geometric.nn.conv.MessagePassing`.
    """

    def __init__(self, in_channels, out_channels, improved=False, cached=False, 
                 bias=True, add_self_loops: bool = True, normalize=True, **kwargs):
        super(GCNConv, self).__init__(aggr='add', **kwargs)

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.improved = improved
        self.cached = cached
        self.normalize = normalize

        self.weight = Parameter(torch.Tensor(in_channels, out_channels))

        if bias:
            self.bias = Parameter(torch.tensor(out_channels, dtype=torch.float32))
        else:
            self.register_parameter('bias', None)

        self.reset_parameters()

    def reset_parameters(self):
        glorot(self.weight)
        zeros(self.bias)
        self.cached_result = None
        self.cached_num_edges = None

    # 原来的版本
    # @staticmethod
    # def norm(edge_index, num_nodes, edge_weight=None, improved=False,
    #          dtype=None):
    #     if edge_weight is None:
    #         edge_weight = torch.ones((edge_index.size(1), ), dtype=dtype,
    #                                  device=edge_index.device)

    #     fill_value = 1 if not improved else 2
    #     # """Here I removed the self-loop because the self-loop already added in the att_coef function"""
    #     # edge_index, edge_weight = add_remaining_self_loops(
    #     #     edge_index, edge_weight, fill_value, num_nodes)

    #     row, col = edge_index
    #     deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
    #     deg_inv_sqrt = deg.pow(-0.5)
    #     deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0

    #     return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]

    # for GNNGuard
    @staticmethod
    def norm(edge_index, num_nodes, edge_weight=None, improved=False,
             dtype=None):
        if edge_weight is None:
            edge_weight = torch.ones((edge_index.size(1), ), dtype=dtype,
                                     device=edge_index.device)
        edge_weight = edge_weight.to(edge_index.device)
        fill_value = 1 if not improved else 2
        # """Here I removed the self-loop because the self-loop already added in the att_coef function"""
        # edge_index, edge_weight = add_remaining_self_loops(
        #     edge_index, edge_weight, fill_value, num_nodes)

        row, col = edge_index # for GNNGuard
        # row, col = edge_index[0], edge_index[1]
        deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
        deg_inv_sqrt = deg.pow(-0.5)
        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0

        return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]


    def forward(self, x, edge_index, edge_weight=None):
        """"""
        x = torch.matmul(x, self.weight)

        if self.cached and self.cached_result is not None:
            if edge_index.size(1) != self.cached_num_edges:
                raise RuntimeError(
                    'Cached {} number of edges, but found {}. Please '
                    'disable the caching behavior of this layer by removing '
                    'the `cached=True` argument in its constructor.'.format(
                        self.cached_num_edges, edge_index.size(1)))
        # edge_index = to_undirected(edge_index, x.size(0))  # add non-direct edges
        if not self.cached or self.cached_result is None:
            self.cached_num_edges = edge_index.size(1)
            if self.normalize:
                edge_index, norm = self.norm(edge_index, x.size(0), edge_weight, self.improved, x.dtype)
            else:
                norm = edge_weight
            self.cached_result = edge_index, norm

        edge_index, norm = self.cached_result

        return self.propagate(edge_index, x=x, norm=norm)

    def message(self, x_j, norm):
        return norm.view(-1, 1) * x_j

    def update(self, aggr_out):
        if self.bias is not None:
            aggr_out = aggr_out + self.bias
        return aggr_out

    def __repr__(self):
        return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
                                   self.out_channels)
--- a/docs/docfile/tutorial/t_homo_link_prediction.rst
+++ b/docs/docfile/tutorial/t_homo_link_prediction.rst
@@ -0,0 +1,204 @@
 ==========================
 Link Prediction Model
 ==========================

 Building Link Prediction Modules
 =====================================

 In AutoGL, we support three models for link prediction models, ``gcn``, ``gat`` and  ``sage``.

 AutoLinkPredictor
 >>>>>>>

 Used to automatically solve the link prediction problems. For example, 


 .. code-block:: python

    class AutoGCN(BaseAutoModel):
    r"""
    AutoGCN.
    The model used in this automodel is GCN, i.e., the graph convolutional network from the
    `"Semi-supervised Classification with Graph Convolutional
    Networks" <https://arxiv.org/abs/1609.02907>`_ paper. The layer is

    .. math::

        \mathbf{X}^{\prime} = \mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}}
        \mathbf{\hat{D}}^{-1/2} \mathbf{X} \mathbf{\Theta},

    where :math:`\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}` denotes the
    adjacency matrix with inserted self-loops and
    :math:`\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}` its diagonal degree matrix.

    Parameters
    ----------
    num_features: ``int``
        The dimension of features.

    num_classes: ``int``
        The number of classes.

    device: ``torch.device`` or ``str``
        The device where model will be running on.

    init: `bool`.
        If True(False), the model will (not) be initialized.
    """

    def __init__(
        self,
        num_features: int = ...,
        num_classes: int = ...,
        device: _typing.Union[str, torch.device] = ...,
        **kwargs
    ) -> None:
        super().__init__(num_features, num_classes, device, **kwargs)
        self.hyper_parameter_space = [
            {
                "parameterName": "add_self_loops",
                "type": "CATEGORICAL",
                "feasiblePoints": [1],
            },
            {
                "parameterName": "normalize",
                "type": "CATEGORICAL",
                "feasiblePoints": [1],
            },
            {
                "parameterName": "num_layers",
                "type": "DISCRETE",
                "feasiblePoints": "2,3,4",
            },
            {
                "parameterName": "hidden",
                "type": "NUMERICAL_LIST",
                "numericalType": "INTEGER",
                "length": 3,
                "minValue": [8, 8, 8],
                "maxValue": [128, 128, 128],
                "scalingType": "LOG",
                "cutPara": ("num_layers",),
                "cutFunc": lambda x: x[0] - 1,
            },
            {
                "parameterName": "dropout",
                "type": "DOUBLE",
                "maxValue": 0.8,
                "minValue": 0.2,
                "scalingType": "LINEAR",
            },
            {
                "parameterName": "act",
                "type": "CATEGORICAL",
                "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
            },
        ]

        self.hyper_parameters = {
            "num_layers": 3,
            "hidden": [128, 64],
            "dropout": 0,
            "act": "relu",
        }

    def _initialize(self):
        self._model = GCN(
            self.input_dimension,
            self.output_dimension,
            self.hyper_parameters.get("hidden"),
            self.hyper_parameters.get("act"),
            self.hyper_parameters.get("dropout", None),
            bool(self.hyper_parameters.get("add_self_loops", True)),
            bool(self.hyper_parameters.get("normalize", True)),
        ).to(self.device)

 You could get define your own ``LinkPrediction`` model by using ``from_hyper_parameter`` function and specify the hyperpameryers.

 .. code-block:: python

    # pyg version
    from autogl.module.model.pyg import AutoLinkPredictor
    # from autogl.module.model.dgl import AutoLinkPredictor  # dgl version
    model = AutoLinkPredictor(
            feature_module="NormalizeFeatures",
            graph_models=(args.model, ),
            hpo_module="random",
            ensemble_module=None,
            max_evals=1,
            trainer_hp_space=fixed(**{
                "max_epoch": 100,
                "early_stopping_round": 101,
                "lr": 1e-2,
                "weight_decay": 0.0,
            }),
            model_hp_spaces=[{"encoder": fixed(**model_hp), "decoder": fixed(**decoder_hp)}]
        ).model

 Then you can train the model for 100 epochs.

 .. code-block:: python

    import torch.nn.functional as F

    # Define the loss optimizer.
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    # Training
    for epoch in range(100):
        model.train()
        optimizer.zero_grad()

        z = model.lp_encode(splitted[0])
        link_logits = model.lp_decode(
            z, torch.stack(splitted[1].edges()), torch.stack(splitted[2].edges())
        )
        link_labels = get_link_labels(
            torch.stack(splitted[1].edges()), torch.stack(splitted[2].edges())
        )
        loss = F.binary_cross_entropy_with_logits(link_logits, link_labels)
        loss.backward()
        optimizer.step()

        auc_val = evaluate(model, splitted, "val")

        if auc_val > best_auc:
            best_auc = auc_val
            best_parameters = pickle.dumps(model.state_dict())

 Finally, evaluate the trained model.

 .. code-block:: python

    model.load_state_dict(pickle.loads(best_parameters))
    evaluate(model, splitted, "test")


 Automatic Search for Link Prediction Tasks
 ===============================================

 In AutoGL, we also provide a high-level API Solver to control the overall pipeline.
 We encapsulated the training process in the Building GNN Modules part for link prediction tasks
 in the solver ``AutoLinkPredictor`` that supports automatic hyperparametric optimization 
 as well as feature engineering and ensemble. In this part, we will show you how to use 
 ``AutoLinkPredictor``.

 .. code-block:: python

    solver = AutoLinkPredictor(
            feature_module="NormalizeFeatures",
            graph_models=(args.model, ),
            hpo_module="random",
            ensemble_module=None,
            max_evals=1,
            trainer_hp_space=fixed(**{
                "max_epoch": 100,
                "early_stopping_round": 101,
                "lr": 1e-2,
                "weight_decay": 0.0,
            }),
            model_hp_spaces=[{"encoder": fixed(**model_hp), "decoder": fixed(**decoder_hp)}]
        )
    
    solver.fit(dataset, train_split=0.85, val_split=0.05, evaluation_method=["auc"], seed=seed)
    pre = solver.evaluate(metric="auc")
--- a/docs/docfile/tutorial_cn/t_homo_link_prediction.rst
+++ b/docs/docfile/tutorial_cn/t_homo_link_prediction.rst
@@ -0,0 +1,212 @@
 : _homo_cn:

 ==========================
 链接预测模型
 ==========================

 构建链接预测模块
 =====================================
 .. In AutoGL, we support two graph classification models, ``gin`` and  ``topk``.
 在AutoGL中，我们支持三种链接预测模型： ``gcn``, ``gat`` and  ``sage`` 。

 AutoLinkPredictor
 >>>>>>>

 .. Used to automatically solve the link prediction problems. For example,
 用于自动解决链路预测问题。例如,

 .. code-block:: python

    class AutoGCN(BaseAutoModel):
    r"""
    AutoGCN.
    The model used in this automodel is GCN, i.e., the graph convolutional network from the
    `"Semi-supervised Classification with Graph Convolutional
    Networks" <https://arxiv.org/abs/1609.02907>`_ paper. The layer is

    .. math::

        \mathbf{X}^{\prime} = \mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}}
        \mathbf{\hat{D}}^{-1/2} \mathbf{X} \mathbf{\Theta},

    where :math:`\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}` denotes the
    adjacency matrix with inserted self-loops and
    :math:`\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}` its diagonal degree matrix.

    Parameters
    ----------
    num_features: ``int``
        The dimension of features.

    num_classes: ``int``
        The number of classes.

    device: ``torch.device`` or ``str``
        The device where model will be running on.

    init: `bool`.
        If True(False), the model will (not) be initialized.
    """

    def __init__(
        self,
        num_features: int = ...,
        num_classes: int = ...,
        device: _typing.Union[str, torch.device] = ...,
        **kwargs
    ) -> None:
        super().__init__(num_features, num_classes, device, **kwargs)
        self.hyper_parameter_space = [
            {
                "parameterName": "add_self_loops",
                "type": "CATEGORICAL",
                "feasiblePoints": [1],
            },
            {
                "parameterName": "normalize",
                "type": "CATEGORICAL",
                "feasiblePoints": [1],
            },
            {
                "parameterName": "num_layers",
                "type": "DISCRETE",
                "feasiblePoints": "2,3,4",
            },
            {
                "parameterName": "hidden",
                "type": "NUMERICAL_LIST",
                "numericalType": "INTEGER",
                "length": 3,
                "minValue": [8, 8, 8],
                "maxValue": [128, 128, 128],
                "scalingType": "LOG",
                "cutPara": ("num_layers",),
                "cutFunc": lambda x: x[0] - 1,
            },
            {
                "parameterName": "dropout",
                "type": "DOUBLE",
                "maxValue": 0.8,
                "minValue": 0.2,
                "scalingType": "LINEAR",
            },
            {
                "parameterName": "act",
                "type": "CATEGORICAL",
                "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
            },
        ]

        self.hyper_parameters = {
            "num_layers": 3,
            "hidden": [128, 64],
            "dropout": 0,
            "act": "relu",
        }

    def _initialize(self):
        self._model = GCN(
            self.input_dimension,
            self.output_dimension,
            self.hyper_parameters.get("hidden"),
            self.hyper_parameters.get("act"),
            self.hyper_parameters.get("dropout", None),
            bool(self.hyper_parameters.get("add_self_loops", True)),
            bool(self.hyper_parameters.get("normalize", True)),
        ).to(self.device)


 .. You could get define your own ``LinkPrediction`` model by using ``from_hyper_parameter`` function and specify the hyperpameryers.
 You could get define your own ``gin`` model by using ``from_hyper_parameter`` function and specify the hyperpameryers.
 你可以通过使用 ``from_hyper_parameter`` 函数定义你自己的 ``LinkPrediction`` 模型，并对其指定超参数。

 .. code-block:: python

    # pyg version
    from autogl.module.model.pyg import AutoLinkPredictor
    # from autogl.module.model.dgl import AutoLinkPredictor  # dgl version
    model = AutoLinkPredictor(
            feature_module="NormalizeFeatures",
            graph_models=(args.model, ),
            hpo_module="random",
            ensemble_module=None,
            max_evals=1,
            trainer_hp_space=fixed(**{
                "max_epoch": 100,
                "early_stopping_round": 101,
                "lr": 1e-2,
                "weight_decay": 0.0,
            }),
            model_hp_spaces=[{"encoder": fixed(**model_hp), "decoder": fixed(**decoder_hp)}]
        ).model



 .. Then you can train the model for 100 epochs.
 然后你可以对模型进行100次的训练：

 .. code-block:: python

    import torch.nn.functional as F

    # Define the loss optimizer.
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    # Training
    for epoch in range(100):
        model.train()
        optimizer.zero_grad()

        z = model.lp_encode(splitted[0])
        link_logits = model.lp_decode(
            z, torch.stack(splitted[1].edges()), torch.stack(splitted[2].edges())
        )
        link_labels = get_link_labels(
            torch.stack(splitted[1].edges()), torch.stack(splitted[2].edges())
        )
        loss = F.binary_cross_entropy_with_logits(link_logits, link_labels)
        loss.backward()
        optimizer.step()

        auc_val = evaluate(model, splitted, "val")

        if auc_val > best_auc:
            best_auc = auc_val
            best_parameters = pickle.dumps(model.state_dict())


 .. Finally, evaluate the trained model.
 最后，你可以评估该模型：

 .. code-block:: python

    model.load_state_dict(pickle.loads(best_parameters))
    evaluate(model, splitted, "test")

 .. Automatic Search for Link Prediction Tasks
 链接预测任务的自动搜索
 ===============================================

 .. In AutoGL, we also provide a high-level API Solver to control the overall pipeline. We encapsulated the training process in the Building GNN Modules part for link prediction tasks in the solver ``AutoLinkPredictor`` that supports automatic hyperparametric optimization as well as feature engineering and ensemble. In this part, we will show you how to use ``AutoLinkPredictor``.
 在AutoGL中，我们还提供了一个高级的API求解器来控制整个流水线。我们将构建图神经网络模块部分的训练过程封装在求解器 ``AutoLinkPredictor`` 中以用于图分类任务，它支持自动超参数优化，特征工程及集成。
 在这一部分，我们提供了一个例子来指导如何使用 ``AutoLinkPredictor`` ：

 .. code-block:: python

    solver = AutoLinkPredictor(
            feature_module="NormalizeFeatures",
            graph_models=(args.model, ),
            hpo_module="random",
            ensemble_module=None,
            max_evals=1,
            trainer_hp_space=fixed(**{
                "max_epoch": 100,
                "early_stopping_round": 101,
                "lr": 1e-2,
                "weight_decay": 0.0,
            }),
            model_hp_spaces=[{"encoder": fixed(**model_hp), "decoder": fixed(**decoder_hp)}]
        )
    
    solver.fit(dataset, train_split=0.85, val_split=0.05, evaluation_method=["auc"], seed=seed)
    pre = solver.evaluate(metric="auc")
--- a/test/performance/robust/model_gcnsvd.py
+++ b/test/performance/robust/model_gcnsvd.py
@@ -1,130 +0,0 @@
 import os
 import pickle
 from torchaudio import datasets
 from tqdm import tqdm
 import torch
 import numpy as np
 import torch.nn.functional as F
 import torch.optim as optim
 from deeprobust.graph.defense import GCNSVD
 from deeprobust.graph.utils import *
 from deeprobust.graph.data import Dataset, PrePtbDataset, Dpr2Pyg, AmazonPyg
 import argparse

 os.environ["AUTOGL_BACKEND"] = "pyg"


 from autogl.module.model.pyg import AutoGCNSVD
 from autogl.solver.utils import set_seed

 def test(model, data, mask):
    model.eval()

    if hasattr(model, 'cls_forward'):
        out = model.cls_forward(data)[mask]
    else:
        out = model(data)[mask]
    pred = out.max(1)[1]
    acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
    return acc

 def train(model, data, args):
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    print(data)
    parameters = model.state_dict()
    best_acc = 0.
    for epoch in range(args.epoch):
        model.train()
        optimizer.zero_grad()
        if hasattr(model, 'cls_forward'):
            output = model.cls_forward(data)
        else:
            output = model(data)
        loss = F.nll_loss(output[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()

        val_acc = test(model, data, data.val_mask)
        if val_acc > best_acc:
            best_acc = val_acc
            parameters = pickle.dumps(model.state_dict())
            
    model.load_state_dict(pickle.loads(parameters))
    return model


 if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    parser = argparse.ArgumentParser('pyg model')
    parser.add_argument('--seed', type=int, default=15, help='Random seed.')
    parser.add_argument('--dataset', type=str, default='cora', choices=['cora', 'cora_ml', 'citeseer', 'polblogs', 'pubmed'], help='dataset')
    parser.add_argument('--ptb_rate', type=float, default=0.05,  help='pertubation rate')
    parser.add_argument('--k', type=int, default=15, help='Truncated Components.')
    parser.add_argument('--repeat', type=int, default=1)
    parser.add_argument('--device', type=str, default='cuda:0')
    parser.add_argument('--lr', type=float, default=0.01)
    parser.add_argument('--weight_decay', type=float, default=0.0)
    parser.add_argument('--epoch', type=int, default=200)

    args = parser.parse_args()
    args.cuda = torch.cuda.is_available()
    print('cuda: %s' % args.cuda)

    # make sure you use the same data splits as you generated attacks
    np.random.seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    # Here the random seed is to split the train/val/test data,
    # we need to set the random seed to be the same as that when you generate the perturbed graph
    # data = Dataset(root='/tmp/', name=args.dataset, setting='nettack', seed=15)
    # Or we can just use setting='prognn' to get the splits
    data = Dataset(root='/tmp/', name=args.dataset, setting='prognn')
    adj, features, labels = data.adj, data.features, data.labels
    idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
    pyg_data = Dpr2Pyg(data).process().to(args.device)
    pyg_data.num_classes = len(set(labels))

    # load pre-attacked graph
    perturbed_data = PrePtbDataset(root='/tmp/',
            name=args.dataset,
            attack_method='meta',
            ptb_rate=args.ptb_rate)
    perturbed_adj = perturbed_data.adj

    print('=== testing GCN-SVD on perturbed graph (AutoGL) ===')
    model_hp = {
            "num_layers": 2,
            "hidden": [16],
            "dropout": 0.5,
            "act": "relu"
        }
    accs = []
    for seed in tqdm(range(args.repeat)):
        set_seed(seed)
        model = AutoGCNSVD(
                num_features=pyg_data.num_node_features,
                num_classes=pyg_data.num_classes,
                device=args.device,
                init=False
            ).from_hyper_parameter(model_hp).model
        model.to(args.device)

        train(model, pyg_data, args)
        acc = test(model, pyg_data, pyg_data.test_mask)
        accs.append(acc)
    print('{:.4f} ~ {:.4f}'.format(np.mean(accs), np.std(accs)))


    print('=== testing GCN-SVD on perturbed graph (deeprobust)===')
    model = GCNSVD(nfeat=features.shape[1], nclass=labels.max()+1,
                    nhid=16, device=args.device)

    model = model.to(args.device)
    # Test set results: loss= 0.8541 accuracy= 0.7067
    model.fit(features, perturbed_adj, labels, idx_train, idx_val, k=args.k, verbose=True)
    model.eval()
    output = model.test(idx_test)
    print(output)
    
--- a/test/performance/robust/model_gnnguard.py
+++ b/test/performance/robust/model_gnnguard.py
@@ -1,192 +0,0 @@
 import os
 import torch
 # import sys
 # sys.path.insert(0, '/n/scratch2/xz204/Dr37/lib/python3.7/site-packages')
 from deeprobust.graph.targeted_attack import Nettack
 from deeprobust.graph.utils import *
 from deeprobust.graph.data import Dataset
 import argparse
 # from deeprobust.graph.defense import * # GCN, GAT, GIN, JK, GCN_attack,accuracy_1
 from deeprobust.graph.defense import *
 from deeprobust.graph.data import Dataset, PrePtbDataset, Dpr2Pyg, AmazonPyg
 from tqdm import tqdm
 import scipy
 import numpy as np
 from sklearn.preprocessing import normalize
 import pickle

 os.environ["AUTOGL_BACKEND"] = "pyg"

 from autogl.module.model.pyg import AutoGNNGuard
 from autogl.solver.utils import set_seed

 parser = argparse.ArgumentParser()
 parser.add_argument('--seed', type=int, default=14, help='Random seed.')
 # cora and citeseer are binary, pubmed has not binary features
 parser.add_argument('--dataset', type=str, default='citeseer', choices=['cora', 'cora_ml', 'citeseer', 'polblogs', 'pubmed'], help='dataset')
 parser.add_argument('--ptb_rate', type=float, default=0.05,  help='pertubation rate')
 parser.add_argument('--modelname', type=str, default='GCN',  choices=['GCN', 'GAT','GIN', 'JK'])
 parser.add_argument('--defensemodel', type=str, default='GCNJaccard',  choices=['GCNJaccard', 'RGCN', 'GCNSVD'])
 parser.add_argument('--DPlabel', type=int, default=9,  help='0-10')

 args = parser.parse_args()
 args.cuda = torch.cuda.is_available()
 print('cuda: %s' % args.cuda)
 args.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

 def seed_torch(seed=1029):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    # torch.use_deterministic_algorithms(True)
 seed_torch(args.seed)

 args.dataset = "cora"
 args.modelname = "GCN"

 data = Dataset(root='/tmp/', name=args.dataset)
 adj, features, labels = data.adj, data.features, data.labels
 if scipy.sparse.issparse(features)==False:
    features = scipy.sparse.csr_matrix(features)
 """set the number of training/val/testing nodes"""
 idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
 """add undirected edges, orgn-arxiv is directed graph, we transfer it to undirected closely following 
 https://ogb.stanford.edu/docs/leader_nodeprop/#ogbn-arxiv
 """
 adj = adj + adj.T
 adj[adj>1] = 1

 pyg_data = Dpr2Pyg(data).process().to(args.device)
 pyg_data.num_classes = len(set(labels))


 def main(flag):

    # Setup Surrogate model
    surrogate = GCN_attack(nfeat=features.shape[1], nclass=labels.max().item()+1, n_edge=adj.nonzero()[0].shape[0], nhid=16, dropout=0, with_relu=False, with_bias=False, device=args.device, )
    surrogate = surrogate.to(args.device)
    surrogate.fit(features, adj, labels, idx_train, train_iters=201)  # change this train_iters to 201: train_iters=201

    # Setup Attack Model
    target_node = 859

    model = Nettack(surrogate, nnodes=adj.shape[0], attack_structure=True, attack_features=False, device=args.device)
    model = model.to(args.device)

    degrees = adj.sum(0).A1
    # How many perturbations to perform. Default: Degree of the node
    n_perturbations = int(degrees[target_node])

    # # indirect attack/ influencer attack
    model.attack(features, adj, labels, target_node, n_perturbations, direct=True)
    modified_adj = model.modified_adj
    modified_features = model.modified_features

    print('=== testing GNN on original(clean) graph ===')
    test(adj, features, target_node,  attention=flag)

    print('=== testing GCN on perturbed graph ===')
    test(modified_adj, modified_features, target_node,attention=flag)

 def test(adj, features, target_node, attention=False):
    ''
    """test on GCN """
    """model_name could be 'GCN', 'GAT', 'GIN','JK'  """
    # for orgn-arxiv: nhid =256, layers =3, epoch =500

    gcn = globals()[args.modelname](nfeat=features.shape[1], nhid=256,  nclass=labels.max().item() + 1, dropout=0.5,
              device=args.device)
    gcn = gcn.to(args.device)
    gcn.fit(features, adj, labels, idx_train, idx_val=idx_val,
            idx_test=idx_test,
            attention=attention, verbose=True, train_iters=81)
    gcn.eval()
    _, output = gcn.test(idx_test=idx_test)

    probs = torch.exp(output[[target_node]])[0]
    print('probs: {}'.format(probs.detach().cpu().numpy()))
    acc_test = accuracy(output[idx_test], labels[idx_test])

    print("Test set results:",
          "accuracy= {:.4f}".format(acc_test.item()))
    return acc_test.item()

 def main_autogl(flag): 
    # Setup Surrogate model
    surrogate = AutoGNNGuard(
                num_features=pyg_data.num_node_features,
                num_classes=pyg_data.num_classes,
                device=args.device,
                init=False
            ).from_hyper_parameter(model_hp).model
    surrogate = surrogate.to(args.device)
    surrogate.fit(features, adj, labels, idx_train, train_iters=201)  # change this train_iters to 201: train_iters=201

    # Setup Attack Model
    target_node = 859

    model = Nettack(surrogate, nnodes=adj.shape[0], attack_structure=True, attack_features=False, device=args.device)
    model = model.to(args.device)

    degrees = adj.sum(0).A1
    # How many perturbations to perform. Default: Degree of the node
    n_perturbations = int(degrees[target_node])

    # # indirect attack/ influencer attack
    model.attack(features, adj, labels, target_node, n_perturbations, direct=True)
    modified_adj = model.modified_adj
    modified_features = model.modified_features

    print('=== testing GNN on original(clean) graph (AutoGL) ===')
    test_autogl(adj, features, target_node,  attention=flag)

    print('=== testing GCN on perturbed graph (AutoGL) ===')
    test_autogl(modified_adj, modified_features, target_node,attention=flag)


 def test_autogl(adj, features, target_node, attention=False):
    ''
    """test on GCN """
    """model_name could be 'GCN', 'GAT', 'GIN','JK'  """
    # for orgn-arxiv: nhid =256, layers =3, epoch =500

    gcn = AutoGNNGuard(
                num_features=pyg_data.num_node_features,
                num_classes=pyg_data.num_classes,
                device=args.device,
                init=False
            ).from_hyper_parameter(model_hp).model
    gcn = gcn.to(args.device)
    gcn.fit(features, adj, labels, idx_train, idx_val=idx_val,
            idx_test=idx_test,
            attention=attention, verbose=True, train_iters=81)
    gcn.eval()
    _, output = gcn.test(idx_test=idx_test)

    probs = torch.exp(output[[target_node]])[0]
    print('probs: {}'.format(probs.detach().cpu().numpy()))
    acc_test = accuracy(output[idx_test], labels[idx_test])

    print("Test set results:",
          "accuracy= {:.4f}".format(acc_test.item()))
    return acc_test.item()

 model_hp = {
        "num_layers": 2,
        "hidden": [16],
        "dropout": 0.0,
        "act": "relu"
    }

 if __name__ == '__main__':
    # Check the performance of GCN under directed attack without defense
    main(flag=False) 
    # Use GNNGuard for defense
    main(flag=True)
    # Check the performance of GCN under directed attack without defense（AutoGL）
    main_autogl(flag=False)
    # Use GNNGuard for defense（AutoGL）
    main_autogl(flag=True)
--- a/test/performance/robust/model_gnnguard_meta.py
+++ b/test/performance/robust/model_gnnguard_meta.py
@@ -0,0 +1,204 @@
 import os
 import torch
 # import sys
 # sys.path.insert(0, '/n/scratch2/xz204/Dr37/lib/python3.7/site-packages')
 from deeprobust.graph.targeted_attack import Nettack
 from deeprobust.graph.utils import *
 from deeprobust.graph.data import Dataset
 from deeprobust.graph.global_attack import Metattack
 import argparse
 # from deeprobust.graph.defense import * # GCN, GAT, GIN, JK, GCN_attack,accuracy_1
 from deeprobust.graph.defense import *
 from deeprobust.graph.data import Dataset, PrePtbDataset, Dpr2Pyg, AmazonPyg
 from scipy.sparse import csr_matrix
 from tqdm import tqdm
 import scipy
 import scipy.sparse as sp
 import numpy as np
 from sklearn.preprocessing import normalize
 import pickle

 os.environ["AUTOGL_BACKEND"] = "pyg"

 from autogl.module.model.pyg import AutoGNNGuard, AutoGNNGuard_attack
 from autogl.solver.utils import set_seed

 def seed_torch(seed=1029):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    # torch.use_deterministic_algorithms(True)
 seed_torch(2048)

 def main(dataset, adj, features, device):
    # from deeprobust.graph.data import PrePtbDataset
    # perturbed_data = PrePtbDataset(root='/tmp/', name=dataset, attack_method='meta', ptb_rate=0.2)
    # modified_adj = perturbed_data.adj

    # Setup Surrogate model
    surrogate = GCN_attack(nfeat=features.shape[1], nclass=labels.max().item()+1, n_edge=adj.nonzero()[0].shape[0], nhid=16, dropout=0, with_relu=False, with_bias=False, device=args.device, )

    surrogate = surrogate.to(args.device)
    surrogate.fit(features, adj, labels, idx_train, train_iters=201)  # change this train_iters to 201: train_iters=201

    # Setup Attack Model
    # model = Metattack(model=surrogate, nnodes=adj.shape[0], feature_shape=features.shape,  attack_structure=True, attack_features=False, device=args.device, lambda_=0.5) # lambda_ is used to weight the two objectives in Eq. (10) in the paper.
    # model = model.to(args.device)

    # """save the mettacked adj"""
    # model.attack(features, adj, labels, idx_train, idx_unlabeled, perturbations, ll_constraint=False)
    # modified_adj = sp.csr_matrix(model.modified_adj.cpu())

    # from deeprobust.graph.data import PrePtbDataset
    perturbed_data = PrePtbDataset(root='/tmp/', name=dataset, attack_method='meta', ptb_rate=0.2)
    modified_adj = perturbed_data.adj

    # Check the performance of GCN under directed attack without defense
    flag = False
    # print('=== testing GNN on original(clean) graph ===')
    # print("acc_test:",test(adj, features, device, attention=flag))
    # print('=== testing GCN on perturbed graph ===')
    # print("acc_test:",test(modified_adj, features, device, attention=flag))

    # Use GNNGuard for defense
    flag = True
    print('=== testing GNN on original(clean) graph + GNNGuard ===')
    print("acc_test:",test(adj, features, device, attention=flag))
    print('=== testing GCN on perturbed graph + GNNGuard ===')
    print("acc_test:",test(modified_adj, features, device, attention=flag))

 def test(adj, features, device, attention):
    accs = []
    for seed in tqdm(range(5)):

        classifier = GCN(nfeat=features.shape[1], nhid=16, nclass=labels.max().item() + 1, dropout=0.5, device=device)

        classifier = classifier.to(device)

        print(1)
        classifier.fit(features, adj, labels, idx_train, train_iters=201,
                    idx_val=idx_val,
                    idx_test=idx_test,
                    verbose=True, attention=attention) # idx_val=idx_val, idx_test=idx_test , model_name=model_name
        classifier.eval()

        # classifier.fit(features, adj, labels, idx_train, idx_val) # train with validation model picking
        acc_test, output = classifier.test(idx_test)
        accs.append(acc_test.item())
    mean = np.mean(accs)
    std = np.std(accs)
    return {"mean": mean, "std": std}

 def main_autogl(dataset, model_hp, adj, features, device):
    
    # Setup Surrogate model
    surrogate = AutoGNNGuard_attack(
                num_features=pyg_data.num_node_features,
                num_classes=pyg_data.num_classes,
                device=args.device,
                init=False
            ).from_hyper_parameter(model_hp).model
    surrogate = surrogate.to(args.device)
    surrogate.fit(features, adj, labels, idx_train, train_iters=201)  # change this train_iters to 201: train_iters=201

    # Setup Attack Model
    # model = Metattack(model=surrogate, nnodes=adj.shape[0], feature_shape=features.shape,  attack_structure=True, attack_features=False, device=args.device, lambda_=0.5) # lambda_ is used to weight the two objectives in Eq. (10) in the paper.
    # model = model.to(args.device)

    # """save the mettacked adj"""
    # model.attack(features, adj, labels, idx_train, idx_unlabeled, perturbations, ll_constraint=False)
    # modified_adj = sp.csr_matrix(model.modified_adj.cpu())

    # from deeprobust.graph.data import PrePtbDataset
    perturbed_data = PrePtbDataset(root='/tmp/', name=dataset, attack_method='meta', ptb_rate=0.2)
    modified_adj = perturbed_data.adj

    # Check the performance of GCN under directed attack without defense（AutoGL）
    # flag = False
    # print('=== testing GNN on original(clean) graph (AutoGL) ===')
    # print("acc_test:",test_autogl(adj, features, device, attention=flag))
    # print('=== testing GCN on perturbed graph (AutoGL) ===')
    # print("acc_test:",test_autogl(modified_adj, features, device, attention=flag))

    # Use GNNGuard for defense（AutoGL）
    flag = True
    print('=== testing GNN on original(clean) graph (AutoGL) + GNNGuard ===')
    print("acc_test:",test_autogl(adj, features, device, attention=flag))
    print('=== testing GCN on perturbed graph (AutoGL) + GNNGuard ===')
    print("acc_test:",test_autogl(modified_adj, features, device, attention=flag))

 def test_autogl(adj, features, device, attention):
    ''
    """test on GCN """
    """model_name could be 'GCN', 'GAT', 'GIN','JK'  """
    accs = []
    for seed in tqdm(range(5)):
        gcn = AutoGNNGuard(
                    num_features=pyg_data.num_node_features,
                    num_classes=pyg_data.num_classes,
                    device=args.device,
                    init=False
                ).from_hyper_parameter(model_hp).model
        gcn = gcn.to(device)
        gcn.fit(features, adj, labels, idx_train, idx_val=idx_val,
                idx_test=idx_test,
                attention=attention, verbose=True, train_iters=81)
        gcn.eval()
        acc_test, output = gcn.test(idx_test=idx_test)
        accs.append(acc_test.item())
    mean = np.mean(accs)
    std = np.std(accs)
    return {"mean": mean, "std": std}

 if __name__ == '__main__':

    model_hp = {
        "num_layers": 2,
        "hidden": [16],
        "dropout": 0.5,
        "act": "relu"
    }

    parser = argparse.ArgumentParser()
    parser.add_argument('--seed', type=int, default=14, help='Random seed.')
    # cora and citeseer are binary, pubmed has not binary features
    parser.add_argument('--dataset', type=str, default='citeseer', choices=['cora', 'cora_ml', 'citeseer', 'polblogs', 'pubmed'], help='dataset')
    parser.add_argument('--ptb_rate', type=float, default=0.2,  help='pertubation rate')
    parser.add_argument('--modelname', type=str, default='GCN',  choices=['GCN', 'GAT','GIN', 'JK'])
    parser.add_argument('--defensemodel', type=str, default='GCNJaccard',  choices=['GCNJaccard', 'RGCN', 'GCNSVD'])
    parser.add_argument('--DPlabel', type=int, default=9,  help='0-10')

    args = parser.parse_args()
    args.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print('cuda: %s' % args.device)

    args.dataset = "pubmed"
    args.modelname = "GCN"

    data = Dataset(root='/tmp/', name=args.dataset)
    pyg_data = Dpr2Pyg(data).process().to(args.device)
    pyg_data.num_classes = len(set(data.labels))

    adj, features, labels = data.adj, data.features, data.labels
    idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
    idx_unlabeled = np.union1d(idx_val, idx_test)
    if scipy.sparse.issparse(features)==False:
        features = scipy.sparse.csr_matrix(features)

    perturbations = int(args.ptb_rate * (adj.sum()//2)) ###
    adj, features, labels = preprocess(adj, features, labels, preprocess_adj=False)

    # to CSR sparse
    adj, features = csr_matrix(adj), csr_matrix(features)

    """add undirected edges, orgn-arxiv is directed graph, we transfer it to undirected closely following 
    https://ogb.stanford.edu/docs/leader_nodeprop/#ogbn-arxiv
    """
    adj = adj + adj.T
    adj[adj>1] = 1

    # main(args.dataset, adj, features, device=args.device)
    main_autogl(args.dataset, model_hp, adj, features, device=args.device)