| @@ -9,8 +9,7 @@ from .gcn import AutoGCN | |||
| from .gat import AutoGAT | |||
| from .gin import AutoGIN | |||
| from .robust.gcn_svd import AutoGCNSVD | |||
| from .robust.gnnguard import AutoGNNGuard, GCN4GNNGuard | |||
| from .robust.gnnguard import AutoGNNGuard, AutoGNNGuard_attack, GCN4GNNGuard, GCN4GNNGuard_attack | |||
| __all__ = [ | |||
| "ModelUniversalRegistry", | |||
| @@ -22,7 +21,8 @@ __all__ = [ | |||
| "AutoGCN", | |||
| "AutoGAT", | |||
| "AutoGIN", | |||
| "AutoGCNSVD", | |||
| "AutoGNNGuard", | |||
| "AutoGNNGuard_attack", | |||
| "GCN4GNNGuard", | |||
| "GCN4GNNGuard_attack", | |||
| ] | |||
| @@ -1,279 +0,0 @@ | |||
| from tkinter import TRUE | |||
| import torch | |||
| import torch.optim as optim | |||
| from torch.nn.parameter import Parameter | |||
| from torch.nn.modules.module import Module | |||
| import torch.nn as nn | |||
| import torch.nn.functional as F | |||
| import typing as _typing | |||
| import math | |||
| from tqdm import tqdm | |||
| import scipy.sparse as sp | |||
| import numpy as np | |||
| from copy import deepcopy | |||
| from numba import njit | |||
| from .. import register_model | |||
| from . import utils | |||
| from ..gcn import GCN | |||
| from ..base import BaseAutoModel | |||
| from .....utils import get_logger | |||
| LOGGER = get_logger("GCNSVDModel") | |||
| ### ========================== ### | |||
| class GCN4Robust(GCN): | |||
| # 在已有gcn的基础上增加robust的部分 | |||
| def __init__(self, nfeat, nclass, nhid, activation, dropout=0.5, lr=0.01, weight_decay=5e-4, with_relu=True, with_bias=True, add_self_loops = True, normalize = TRUE): | |||
| super(GCN4Robust, self).__init__(nfeat, nclass, nhid, activation, dropout=dropout, add_self_loops = add_self_loops, normalize = normalize) | |||
| def fit(self, features, adj, labels, idx_train, idx_val=None, train_iters=200, initialize=True, verbose=False, normalize=True, patience=500, **kwargs): | |||
| self.device = self.gc1.weight.device | |||
| if initialize: | |||
| self.initialize() | |||
| if type(adj) is not torch.Tensor: | |||
| features, adj, labels = utils.to_tensor(features, adj, labels, device=self.device) | |||
| else: | |||
| features = features.to(self.device) | |||
| adj = adj.to(self.device) | |||
| labels = labels.to(self.device) | |||
| if normalize: | |||
| if utils.is_sparse_tensor(adj): | |||
| adj_norm = utils.normalize_adj_tensor(adj, sparse=True) | |||
| else: | |||
| adj_norm = utils.normalize_adj_tensor(adj) | |||
| else: | |||
| adj_norm = adj | |||
| self.adj_norm = adj_norm | |||
| self.features = features | |||
| self.labels = labels | |||
| if idx_val is None: | |||
| self._train_without_val(labels, idx_train, train_iters, verbose) | |||
| else: | |||
| if patience < train_iters: | |||
| self._train_with_early_stopping(labels, idx_train, idx_val, train_iters, patience, verbose) | |||
| else: | |||
| self._train_with_val(labels, idx_train, idx_val, train_iters, verbose) | |||
| def _train_without_val(self, labels, idx_train, train_iters, verbose): | |||
| self.train() | |||
| optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay) | |||
| for i in range(train_iters): | |||
| optimizer.zero_grad() | |||
| output = self.forward(self.features, self.adj_norm) | |||
| loss_train = F.nll_loss(output[idx_train], labels[idx_train]) | |||
| loss_train.backward() | |||
| optimizer.step() | |||
| if verbose and i % 10 == 0: | |||
| print('Epoch {}, training loss: {}'.format(i, loss_train.item())) | |||
| self.eval() | |||
| output = self.forward(self.features, self.adj_norm) | |||
| self.output = output | |||
| def _train_with_val(self, labels, idx_train, idx_val, train_iters, verbose): | |||
| if verbose: | |||
| print('=== training gcn model ===') | |||
| optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay) | |||
| best_loss_val = 100 | |||
| best_acc_val = 0 | |||
| for i in range(train_iters): | |||
| self.train() | |||
| optimizer.zero_grad() | |||
| output = self.forward(self.features, self.adj_norm) | |||
| loss_train = F.nll_loss(output[idx_train], labels[idx_train]) | |||
| loss_train.backward() | |||
| optimizer.step() | |||
| if verbose and i % 10 == 0: | |||
| print('Epoch {}, training loss: {}'.format(i, loss_train.item())) | |||
| self.eval() | |||
| output = self.forward(self.features, self.adj_norm) | |||
| loss_val = F.nll_loss(output[idx_val], labels[idx_val]) | |||
| acc_val = utils.accuracy(output[idx_val], labels[idx_val]) | |||
| if best_loss_val > loss_val: | |||
| best_loss_val = loss_val | |||
| self.output = output | |||
| weights = deepcopy(self.state_dict()) | |||
| if acc_val > best_acc_val: | |||
| best_acc_val = acc_val | |||
| self.output = output | |||
| weights = deepcopy(self.state_dict()) | |||
| if verbose: | |||
| print('=== picking the best model according to the performance on validation ===') | |||
| self.load_state_dict(weights) | |||
| def _train_with_early_stopping(self, labels, idx_train, idx_val, train_iters, patience, verbose): | |||
| if verbose: | |||
| print('=== training gcn model ===') | |||
| optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay) | |||
| early_stopping = patience | |||
| best_loss_val = 100 | |||
| for i in range(train_iters): | |||
| self.train() | |||
| optimizer.zero_grad() | |||
| output = self.forward(self.features, self.adj_norm) | |||
| loss_train = F.nll_loss(output[idx_train], labels[idx_train]) | |||
| loss_train.backward() | |||
| optimizer.step() | |||
| if verbose and i % 10 == 0: | |||
| print('Epoch {}, training loss: {}'.format(i, loss_train.item())) | |||
| self.eval() | |||
| output = self.forward(self.features, self.adj_norm) | |||
| # def eval_class(output, labels): | |||
| # preds = output.max(1)[1].type_as(labels) | |||
| # return f1_score(labels.cpu().numpy(), preds.cpu().numpy(), average='micro') + \ | |||
| # f1_score(labels.cpu().numpy(), preds.cpu().numpy(), average='macro') | |||
| # perf_sum = eval_class(output[idx_val], labels[idx_val]) | |||
| loss_val = F.nll_loss(output[idx_val], labels[idx_val]) | |||
| if best_loss_val > loss_val: | |||
| best_loss_val = loss_val | |||
| self.output = output | |||
| weights = deepcopy(self.state_dict()) | |||
| patience = early_stopping | |||
| else: | |||
| patience -= 1 | |||
| if i > early_stopping and patience <= 0: | |||
| break | |||
| if verbose: | |||
| print('=== early stopping at {0}, loss_val = {1} ==='.format(i, best_loss_val) ) | |||
| self.load_state_dict(weights) | |||
| class GCNSVD(GCN4Robust): | |||
| def __init__(self, nfeat, nclass, nhid, activation, dropout=0.5, lr=0.01, weight_decay=5e-4, with_relu=True, with_bias=True, add_self_loops = True, normalize = True): | |||
| super(GCNSVD, self).__init__(nfeat, nclass, nhid, activation, dropout, lr, weight_decay, with_relu, with_bias, add_self_loops, normalize) | |||
| def fit(self, features, adj, labels, idx_train, idx_val=None, k=50, train_iters=200, initialize=True, verbose=True, **kwargs): | |||
| modified_adj = self.truncatedSVD(adj, k=k) | |||
| self.k = k | |||
| # modified_adj_tensor = utils.sparse_mx_to_torch_sparse_tensor(self.modified_adj) | |||
| features, modified_adj, labels = utils.to_tensor(features, modified_adj, labels, device=self.device) | |||
| self.modified_adj = modified_adj | |||
| self.features = features | |||
| self.labels = labels | |||
| super().fit(features, modified_adj, labels, idx_train, idx_val, train_iters=train_iters, initialize=initialize, verbose=verbose) | |||
| def truncatedSVD(self, data, k=50): | |||
| print('=== GCN-SVD: rank={} ==='.format(k)) | |||
| if sp.issparse(data): | |||
| data = data.asfptype() | |||
| U, S, V = sp.linalg.svds(data, k=k) | |||
| print("rank_after = {}".format(len(S.nonzero()[0]))) | |||
| diag_S = np.diag(S) | |||
| else: | |||
| U, S, V = np.linalg.svd(data) | |||
| U = U[:, :k] | |||
| S = S[:k] | |||
| V = V[:k, :] | |||
| print("rank_before = {}".format(len(S.nonzero()[0]))) | |||
| diag_S = np.diag(S) | |||
| print("rank_after = {}".format(len(diag_S.nonzero()[0]))) | |||
| return U @ diag_S @ V | |||
| def predict(self, features=None, adj=None): | |||
| self.eval() | |||
| if features is None and adj is None: | |||
| return self.forward(self.features, self.adj_norm) | |||
| else: | |||
| adj = self.truncatedSVD(adj, k=self.k) | |||
| if type(adj) is not torch.Tensor: | |||
| features, adj = utils.to_tensor(features, adj, device=self.device) | |||
| self.features = features | |||
| if utils.is_sparse_tensor(adj): | |||
| self.adj_norm = utils.normalize_adj_tensor(adj, sparse=True) | |||
| else: | |||
| self.adj_norm = utils.normalize_adj_tensor(adj) | |||
| return self.forward(self.features, self.adj_norm) | |||
| @register_model("gcnsvd-model") | |||
| class AutoGCNSVD(BaseAutoModel): | |||
| def __init__( | |||
| self, | |||
| num_features: int = ..., | |||
| num_classes: int = ..., | |||
| device: _typing.Union[str, torch.device] = ..., | |||
| **kwargs | |||
| ) -> None: | |||
| super().__init__(num_features, num_classes, device, **kwargs) | |||
| self.hyper_parameter_space = [ | |||
| { | |||
| "parameterName": "add_self_loops", | |||
| "type": "CATEGORICAL", | |||
| "feasiblePoints": [1], | |||
| }, | |||
| { | |||
| "parameterName": "normalize", | |||
| "type": "CATEGORICAL", | |||
| "feasiblePoints": [1], | |||
| }, | |||
| { | |||
| "parameterName": "num_layers", | |||
| "type": "DISCRETE", | |||
| "feasiblePoints": "2,3,4", | |||
| }, | |||
| { | |||
| "parameterName": "hidden", | |||
| "type": "NUMERICAL_LIST", | |||
| "numericalType": "INTEGER", | |||
| "length": 3, | |||
| "minValue": [8, 8, 8], | |||
| "maxValue": [128, 128, 128], | |||
| "scalingType": "LOG", | |||
| "cutPara": ("num_layers",), | |||
| "cutFunc": lambda x: x[0] - 1, | |||
| }, | |||
| { | |||
| "parameterName": "dropout", | |||
| "type": "DOUBLE", | |||
| "maxValue": 0.8, | |||
| "minValue": 0.2, | |||
| "scalingType": "LINEAR", | |||
| }, | |||
| { | |||
| "parameterName": "act", | |||
| "type": "CATEGORICAL", | |||
| "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"], | |||
| }, | |||
| ] | |||
| self.hyper_parameters = { | |||
| "num_layers": 3, | |||
| "hidden": [128, 64], | |||
| "dropout": 0, | |||
| "act": "relu", | |||
| } | |||
| def _initialize(self): | |||
| self._model = GCNSVD( | |||
| self.input_dimension, | |||
| self.output_dimension, | |||
| self.hyper_parameters.get("hidden"), | |||
| self.hyper_parameters.get("act"), | |||
| self.hyper_parameters.get("dropout", None), | |||
| bool(self.hyper_parameters.get("add_self_loops", True)), | |||
| bool(self.hyper_parameters.get("normalize", True)), | |||
| ).to(self.device) | |||
| @@ -39,6 +39,12 @@ class GCN4GNNGuard(GCN): | |||
| self.nfeat = nfeat | |||
| self.hidden_sizes = nhid | |||
| self.drop = drop | |||
| if not with_relu: | |||
| self.weight_decay = 0 | |||
| else: | |||
| self.weight_decay = weight_decay | |||
| self.with_relu = with_relu | |||
| self.with_bias = with_bias | |||
| self.gc1 = GCNConv(nfeat, nhid[0], bias=True,) | |||
| self.gc2 = GCNConv(nhid[0], nclass, bias=True, ) | |||
| @@ -345,6 +351,235 @@ class GCN4GNNGuard(GCN): | |||
| self.adj_norm = utils.normalize_adj_tensor(adj) | |||
| return self.forward(self.features, self.adj_norm) | |||
| class GCN4GNNGuard_attack(GCN): | |||
| # Based on the existing GCN, add the robust part. | |||
| def __init__(self, nfeat, nclass, nhid, activation, dropout=0.5, lr=0.01, drop=False, weight_decay=5e-4, with_relu=True, with_bias=True, add_self_loops = True, normalize = True): | |||
| super(GCN4GNNGuard_attack, self).__init__(nfeat, nclass, nhid, activation, dropout=dropout, add_self_loops = add_self_loops, normalize = normalize) | |||
| self.lr = lr | |||
| self.weight_decay = weight_decay | |||
| self.dropout = dropout | |||
| self.nclass = nclass | |||
| self.nfeat = nfeat | |||
| self.hidden_sizes = nhid | |||
| self.drop = drop | |||
| if not with_relu: | |||
| self.weight_decay = 0 | |||
| else: | |||
| self.weight_decay = weight_decay | |||
| self.with_relu = with_relu | |||
| self.with_bias = with_bias | |||
| self.gc1 = GCNConv(nfeat, nhid[0], bias=True,) | |||
| self.gc2 = GCNConv(nhid[0], nclass, bias=True, ) | |||
| def forward(self, x, adj_lil): | |||
| """we don't change the edge_index, just update the edge_weight; | |||
| some edge_weight are regarded as removed if it equals to zero""" | |||
| x = x.to_dense() | |||
| adj = adj_lil.coalesce().indices() | |||
| edge_weight = adj_lil.coalesce().values() | |||
| x = F.relu(self.gc1(x, adj, edge_weight=edge_weight)) | |||
| x = F.dropout(x, self.dropout, training=self.training) | |||
| x = self.gc2(x, adj, edge_weight=edge_weight) | |||
| return F.log_softmax(x, dim=1) | |||
| def add_loop_sparse(self, adj, fill_value=1): | |||
| # make identify sparse tensor | |||
| row = torch.range(0, int(adj.shape[0]-1), dtype=torch.int64) | |||
| i = torch.stack((row, row), dim=0) | |||
| v = torch.ones(adj.shape[0], dtype=torch.float32) | |||
| shape = adj.shape | |||
| I_n = torch.sparse.FloatTensor(i, v, shape) | |||
| return adj + I_n.to(self.device) | |||
| def initialize(self): | |||
| self.gc1.reset_parameters() | |||
| self.gc2.reset_parameters() | |||
| def fit(self, features, adj, labels, idx_train, idx_val=None, idx_test=None, train_iters=81, att_0=None, attention=False, model_name=None, initialize=True, verbose=False, normalize=False, patience=510, ): | |||
| ''' | |||
| train the gcn model, when idx_val is not None, pick the best model | |||
| according to the validation loss | |||
| ''' | |||
| sd = self.state_dict() | |||
| for v in sd.values(): | |||
| self.device = v.device | |||
| break | |||
| self.sim = None | |||
| self.attention = attention | |||
| if self.attention: | |||
| att_0 = self.att_coef_1(features, adj) | |||
| adj = att_0 # update adj | |||
| self.sim = att_0 # update att_0 | |||
| self.idx_test = idx_test | |||
| if initialize: | |||
| self.initialize() | |||
| if type(adj) is not torch.Tensor: | |||
| features, adj, labels = utils.to_tensor(features, adj, labels, device=self.device) | |||
| else: | |||
| features = features.to(self.device) | |||
| adj = adj.to(self.device) | |||
| labels = labels.to(self.device) | |||
| normalize = False # we don't need normalize here, the norm is conducted in the GCN (self.gcn1) model | |||
| if normalize: | |||
| if utils.is_sparse_tensor(adj): | |||
| adj_norm = utils.normalize_adj_tensor(adj, sparse=True) | |||
| else: | |||
| adj_norm = utils.normalize_adj_tensor(adj) | |||
| else: | |||
| adj_norm = adj | |||
| # add self loop | |||
| # adj = self.add_loop_sparse(adj) | |||
| """Make the coefficient D^{-1/2}(A+I)D^{-1/2}""" | |||
| self.adj_norm = adj_norm | |||
| self.features = features | |||
| self.labels = labels | |||
| if idx_val is None: | |||
| self._train_without_val(labels, idx_train, train_iters, verbose) | |||
| else: | |||
| if patience < train_iters: | |||
| self._train_with_early_stopping(labels, idx_train, idx_val, train_iters, patience, verbose) | |||
| else: | |||
| self._train_with_val(labels, idx_train, idx_val, train_iters, verbose) | |||
| def _train_without_val(self, labels, idx_train, train_iters, verbose): | |||
| self.train() | |||
| optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay) | |||
| for i in range(train_iters): | |||
| optimizer.zero_grad() | |||
| output = self.forward(self.features, self.adj_norm) | |||
| loss_train = F.nll_loss(output[idx_train], labels[idx_train], weight=None) # this weight is the weight of each training nodes | |||
| loss_train.backward() | |||
| optimizer.step() | |||
| if verbose and i % 10 == 0: | |||
| print('Epoch {}, training loss: {}'.format(i, loss_train.item())) | |||
| self.eval() | |||
| output = self.forward(self.features, self.adj_norm) | |||
| self.output = output | |||
| def _train_with_val(self, labels, idx_train, idx_val, train_iters, verbose): | |||
| if verbose: | |||
| print('=== training gcn model ===') | |||
| optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay) | |||
| best_loss_val = 100 | |||
| best_acc_val = 0 | |||
| for i in range(train_iters): | |||
| # print('epoch', i) | |||
| self.train() | |||
| optimizer.zero_grad() | |||
| output = self.forward(self.features, self.adj_norm) | |||
| loss_train = F.nll_loss(output[idx_train], labels[idx_train]) | |||
| loss_train.backward() | |||
| optimizer.step() | |||
| acc_test =accuracy(output[self.idx_test], labels[self.idx_test]) | |||
| self.eval() | |||
| output = self.forward(self.features, self.adj_norm) | |||
| loss_val = F.nll_loss(output[idx_val], labels[idx_val]) | |||
| acc_val = utils.accuracy(output[idx_val], labels[idx_val]) | |||
| if verbose and i % 200 == 0: | |||
| print('Epoch {}, training loss: {}, test acc: {}'.format(i, loss_train.item(), acc_test)) | |||
| if best_loss_val > loss_val: | |||
| best_loss_val = loss_val | |||
| self.output = output | |||
| weights = deepcopy(self.state_dict()) | |||
| if acc_val > best_acc_val: | |||
| best_acc_val = acc_val | |||
| self.output = output | |||
| weights = deepcopy(self.state_dict()) | |||
| if verbose: | |||
| print('=== picking the best model according to the performance on validation ===') | |||
| self.load_state_dict(weights) | |||
| def _train_with_early_stopping(self, labels, idx_train, idx_val, train_iters, patience, verbose): | |||
| if verbose: | |||
| print('=== training gcn model ===') | |||
| optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay) | |||
| early_stopping = patience | |||
| best_loss_val = 100 | |||
| for i in range(train_iters): | |||
| self.train() | |||
| optimizer.zero_grad() | |||
| output = self.forward(self.features, self.adj_norm) | |||
| loss_train = F.nll_loss(output[idx_train], labels[idx_train]) | |||
| loss_train.backward() | |||
| optimizer.step() | |||
| self.eval() | |||
| output = self.forward(self.features, self.adj_norm) | |||
| if verbose and i % 10 == 0: | |||
| print('Epoch {}, training loss: {}'.format(i, loss_train.item())) | |||
| loss_val = F.nll_loss(output[idx_val], labels[idx_val]) | |||
| if best_loss_val > loss_val: | |||
| best_loss_val = loss_val | |||
| self.output = output | |||
| weights = deepcopy(self.state_dict()) | |||
| patience = early_stopping | |||
| else: | |||
| patience -= 1 | |||
| if i > early_stopping and patience <= 0: | |||
| break | |||
| if verbose: | |||
| print('=== early stopping at {0}, loss_val = {1} ==='.format(i, best_loss_val) ) | |||
| self.load_state_dict(weights) | |||
| def test(self, idx_test): | |||
| self.eval() | |||
| output = self.predict() # here use the self.features and self.adj_norm in training stage | |||
| loss_test = F.nll_loss(output[idx_test], self.labels[idx_test]) | |||
| acc_test = utils.accuracy(output[idx_test], self.labels[idx_test]) | |||
| print("Test set results:", | |||
| "loss= {:.4f}".format(loss_test.item()), | |||
| "accuracy= {:.4f}".format(acc_test.item())) | |||
| return acc_test, output | |||
| def _set_parameters(self): | |||
| # TODO | |||
| pass | |||
| def predict(self, features=None, adj=None): | |||
| '''By default, inputs are unnormalized data''' | |||
| # self.eval() | |||
| if features is None and adj is None: | |||
| return self.forward(self.features, self.adj_norm) | |||
| else: | |||
| if type(adj) is not torch.Tensor: | |||
| features, adj = utils.to_tensor(features, adj, device=self.device) | |||
| self.features = features | |||
| if utils.is_sparse_tensor(adj): | |||
| self.adj_norm = utils.normalize_adj_tensor(adj, sparse=True) | |||
| else: | |||
| self.adj_norm = utils.normalize_adj_tensor(adj) | |||
| return self.forward(self.features, self.adj_norm) | |||
| @register_model("gnnguard-model") | |||
| class AutoGNNGuard(BaseAutoModel): | |||
| @@ -414,3 +649,72 @@ class AutoGNNGuard(BaseAutoModel): | |||
| add_self_loops = bool(self.hyper_parameters.get("add_self_loops", True)), | |||
| normalize = bool(self.hyper_parameters.get("normalize", True)), | |||
| ).to(self.device) | |||
| @register_model("gnnguard-attack-model") | |||
| class AutoGNNGuard_attack(BaseAutoModel): | |||
| def __init__( | |||
| self, | |||
| num_features: int = ..., | |||
| num_classes: int = ..., | |||
| device: _typing.Union[str, torch.device] = ..., | |||
| **kwargs | |||
| ) -> None: | |||
| super().__init__(num_features, num_classes, device, **kwargs) | |||
| self.hyper_parameter_space = [ | |||
| { | |||
| "parameterName": "add_self_loops", | |||
| "type": "CATEGORICAL", | |||
| "feasiblePoints": [1], | |||
| }, | |||
| { | |||
| "parameterName": "normalize", | |||
| "type": "CATEGORICAL", | |||
| "feasiblePoints": [1], | |||
| }, | |||
| { | |||
| "parameterName": "num_layers", | |||
| "type": "DISCRETE", | |||
| "feasiblePoints": "2,3,4", | |||
| }, | |||
| { | |||
| "parameterName": "hidden", | |||
| "type": "NUMERICAL_LIST", | |||
| "numericalType": "INTEGER", | |||
| "length": 3, | |||
| "minValue": [8, 8, 8], | |||
| "maxValue": [128, 128, 128], | |||
| "scalingType": "LOG", | |||
| "cutPara": ("num_layers",), | |||
| "cutFunc": lambda x: x[0] - 1, | |||
| }, | |||
| { | |||
| "parameterName": "dropout", | |||
| "type": "DOUBLE", | |||
| "maxValue": 0.8, | |||
| "minValue": 0.2, | |||
| "scalingType": "LINEAR", | |||
| }, | |||
| { | |||
| "parameterName": "act", | |||
| "type": "CATEGORICAL", | |||
| "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"], | |||
| }, | |||
| ] | |||
| self.hyper_parameters = { | |||
| "num_layers": 3, | |||
| "hidden": [128, 64], | |||
| "dropout": 0, | |||
| "act": "relu", | |||
| } | |||
| def _initialize(self): | |||
| self._model = GCN4GNNGuard_attack( | |||
| nfeat = self.input_dimension, | |||
| nclass = self.output_dimension, | |||
| nhid = self.hyper_parameters.get("hidden"), | |||
| activation = self.hyper_parameters.get("act"), | |||
| dropout = self.hyper_parameters.get("dropout", None), | |||
| add_self_loops = bool(self.hyper_parameters.get("add_self_loops", True)), | |||
| normalize = bool(self.hyper_parameters.get("normalize", True)), | |||
| ).to(self.device) | |||
| @@ -0,0 +1,196 @@ | |||
| import torch | |||
| from torch.nn import Parameter | |||
| from torch_scatter import scatter_add | |||
| from torch_geometric.nn.conv import MessagePassing | |||
| from torch_geometric.utils import add_remaining_self_loops, to_undirected | |||
| from ..inits import glorot, zeros | |||
| @torch.jit._overload | |||
| def gcn_norm(edge_index, edge_weight=None, num_nodes=None, improved=False, | |||
| add_self_loops=True, dtype=None): | |||
| # type: (Tensor, OptTensor, Optional[int], bool, bool, Optional[int]) -> PairTensor # noqa | |||
| pass | |||
| @torch.jit._overload | |||
| def gcn_norm(edge_index, edge_weight=None, num_nodes=None, improved=False, | |||
| add_self_loops=True, dtype=None): | |||
| # type: (SparseTensor, OptTensor, Optional[int], bool, bool, Optional[int]) -> SparseTensor # noqa | |||
| pass | |||
| def gcn_norm(edge_index, edge_weight=None, num_nodes=None, improved=False, | |||
| add_self_loops=True, dtype=None): | |||
| fill_value = 2. if improved else 1. | |||
| if isinstance(edge_index, SparseTensor): | |||
| adj_t = edge_index | |||
| if not adj_t.has_value(): | |||
| adj_t = adj_t.fill_value(1., dtype=dtype) | |||
| if add_self_loops: | |||
| adj_t = fill_diag(adj_t, fill_value) | |||
| deg = sum(adj_t, dim=1) | |||
| deg_inv_sqrt = deg.pow_(-0.5) | |||
| deg_inv_sqrt.masked_fill_(deg_inv_sqrt == float('inf'), 0.) | |||
| adj_t = mul(adj_t, deg_inv_sqrt.view(-1, 1)) | |||
| adj_t = mul(adj_t, deg_inv_sqrt.view(1, -1)) | |||
| return adj_t | |||
| else: | |||
| num_nodes = maybe_num_nodes(edge_index, num_nodes) | |||
| if edge_weight is None: | |||
| edge_weight = torch.ones((edge_index.size(1), ), dtype=dtype, | |||
| device=edge_index.device) | |||
| if add_self_loops: | |||
| edge_index, tmp_edge_weight = add_remaining_self_loops( | |||
| edge_index, edge_weight, fill_value, num_nodes) | |||
| assert tmp_edge_weight is not None | |||
| edge_weight = tmp_edge_weight | |||
| row, col = edge_index[0], edge_index[1] | |||
| deg = scatter_add(edge_weight, col, dim=0, dim_size=num_nodes) | |||
| deg_inv_sqrt = deg.pow_(-0.5) | |||
| deg_inv_sqrt.masked_fill_(deg_inv_sqrt == float('inf'), 0) | |||
| return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col] | |||
| class GCNConv(MessagePassing): | |||
| r"""The graph convolutional operator from the `"Semi-supervised | |||
| Classification with Graph Convolutional Networks" | |||
| <https://arxiv.org/abs/1609.02907>`_ paper | |||
| .. math:: | |||
| \mathbf{X}^{\prime} = \mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}} | |||
| \mathbf{\hat{D}}^{-1/2} \mathbf{X} \mathbf{\Theta}, | |||
| where :math:`\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}` denotes the | |||
| adjacency matrix with inserted self-loops and | |||
| :math:`\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}` its diagonal degree matrix. | |||
| Args: | |||
| in_channels (int): Size of each input sample. | |||
| out_channels (int): Size of each output sample. | |||
| improved (bool, optional): If set to :obj:`True`, the layer computes | |||
| :math:`\mathbf{\hat{A}}` as :math:`\mathbf{A} + 2\mathbf{I}`. | |||
| (default: :obj:`False`) | |||
| cached (bool, optional): If set to :obj:`True`, the layer will cache | |||
| the computation of :math:`\mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}} | |||
| \mathbf{\hat{D}}^{-1/2}` on first execution, and will use the | |||
| cached version for further executions. | |||
| This parameter should only be set to :obj:`True` in transductive | |||
| learning scenarios. (default: :obj:`False`) | |||
| bias (bool, optional): If set to :obj:`False`, the layer will not learn | |||
| an additive bias. (default: :obj:`True`) | |||
| normalize (bool, optional): Whether to add self-loops and apply | |||
| symmetric normalization. (default: :obj:`True`) | |||
| **kwargs (optional): Additional arguments of | |||
| :class:`torch_geometric.nn.conv.MessagePassing`. | |||
| """ | |||
| def __init__(self, in_channels, out_channels, improved=False, cached=False, | |||
| bias=True, add_self_loops: bool = True, normalize=True, **kwargs): | |||
| super(GCNConv, self).__init__(aggr='add', **kwargs) | |||
| self.in_channels = in_channels | |||
| self.out_channels = out_channels | |||
| self.improved = improved | |||
| self.cached = cached | |||
| self.normalize = normalize | |||
| self.weight = Parameter(torch.Tensor(in_channels, out_channels)) | |||
| if bias: | |||
| self.bias = Parameter(torch.tensor(out_channels, dtype=torch.float32)) | |||
| else: | |||
| self.register_parameter('bias', None) | |||
| self.reset_parameters() | |||
| def reset_parameters(self): | |||
| glorot(self.weight) | |||
| zeros(self.bias) | |||
| self.cached_result = None | |||
| self.cached_num_edges = None | |||
| # 原来的版本 | |||
| # @staticmethod | |||
| # def norm(edge_index, num_nodes, edge_weight=None, improved=False, | |||
| # dtype=None): | |||
| # if edge_weight is None: | |||
| # edge_weight = torch.ones((edge_index.size(1), ), dtype=dtype, | |||
| # device=edge_index.device) | |||
| # fill_value = 1 if not improved else 2 | |||
| # # """Here I removed the self-loop because the self-loop already added in the att_coef function""" | |||
| # # edge_index, edge_weight = add_remaining_self_loops( | |||
| # # edge_index, edge_weight, fill_value, num_nodes) | |||
| # row, col = edge_index | |||
| # deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes) | |||
| # deg_inv_sqrt = deg.pow(-0.5) | |||
| # deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0 | |||
| # return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col] | |||
| # for GNNGuard | |||
| @staticmethod | |||
| def norm(edge_index, num_nodes, edge_weight=None, improved=False, | |||
| dtype=None): | |||
| if edge_weight is None: | |||
| edge_weight = torch.ones((edge_index.size(1), ), dtype=dtype, | |||
| device=edge_index.device) | |||
| edge_weight = edge_weight.to(edge_index.device) | |||
| fill_value = 1 if not improved else 2 | |||
| # """Here I removed the self-loop because the self-loop already added in the att_coef function""" | |||
| # edge_index, edge_weight = add_remaining_self_loops( | |||
| # edge_index, edge_weight, fill_value, num_nodes) | |||
| row, col = edge_index # for GNNGuard | |||
| # row, col = edge_index[0], edge_index[1] | |||
| deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes) | |||
| deg_inv_sqrt = deg.pow(-0.5) | |||
| deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0 | |||
| return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col] | |||
| def forward(self, x, edge_index, edge_weight=None): | |||
| """""" | |||
| x = torch.matmul(x, self.weight) | |||
| if self.cached and self.cached_result is not None: | |||
| if edge_index.size(1) != self.cached_num_edges: | |||
| raise RuntimeError( | |||
| 'Cached {} number of edges, but found {}. Please ' | |||
| 'disable the caching behavior of this layer by removing ' | |||
| 'the `cached=True` argument in its constructor.'.format( | |||
| self.cached_num_edges, edge_index.size(1))) | |||
| # edge_index = to_undirected(edge_index, x.size(0)) # add non-direct edges | |||
| if not self.cached or self.cached_result is None: | |||
| self.cached_num_edges = edge_index.size(1) | |||
| if self.normalize: | |||
| edge_index, norm = self.norm(edge_index, x.size(0), edge_weight, self.improved, x.dtype) | |||
| else: | |||
| norm = edge_weight | |||
| self.cached_result = edge_index, norm | |||
| edge_index, norm = self.cached_result | |||
| return self.propagate(edge_index, x=x, norm=norm) | |||
| def message(self, x_j, norm): | |||
| return norm.view(-1, 1) * x_j | |||
| def update(self, aggr_out): | |||
| if self.bias is not None: | |||
| aggr_out = aggr_out + self.bias | |||
| return aggr_out | |||
| def __repr__(self): | |||
| return '{}({}, {})'.format(self.__class__.__name__, self.in_channels, | |||
| self.out_channels) | |||
| @@ -0,0 +1,204 @@ | |||
| ========================== | |||
| Link Prediction Model | |||
| ========================== | |||
| Building Link Prediction Modules | |||
| ===================================== | |||
| In AutoGL, we support three models for link prediction models, ``gcn``, ``gat`` and ``sage``. | |||
| AutoLinkPredictor | |||
| >>>>>>> | |||
| Used to automatically solve the link prediction problems. For example, | |||
| .. code-block:: python | |||
| class AutoGCN(BaseAutoModel): | |||
| r""" | |||
| AutoGCN. | |||
| The model used in this automodel is GCN, i.e., the graph convolutional network from the | |||
| `"Semi-supervised Classification with Graph Convolutional | |||
| Networks" <https://arxiv.org/abs/1609.02907>`_ paper. The layer is | |||
| .. math:: | |||
| \mathbf{X}^{\prime} = \mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}} | |||
| \mathbf{\hat{D}}^{-1/2} \mathbf{X} \mathbf{\Theta}, | |||
| where :math:`\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}` denotes the | |||
| adjacency matrix with inserted self-loops and | |||
| :math:`\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}` its diagonal degree matrix. | |||
| Parameters | |||
| ---------- | |||
| num_features: ``int`` | |||
| The dimension of features. | |||
| num_classes: ``int`` | |||
| The number of classes. | |||
| device: ``torch.device`` or ``str`` | |||
| The device where model will be running on. | |||
| init: `bool`. | |||
| If True(False), the model will (not) be initialized. | |||
| """ | |||
| def __init__( | |||
| self, | |||
| num_features: int = ..., | |||
| num_classes: int = ..., | |||
| device: _typing.Union[str, torch.device] = ..., | |||
| **kwargs | |||
| ) -> None: | |||
| super().__init__(num_features, num_classes, device, **kwargs) | |||
| self.hyper_parameter_space = [ | |||
| { | |||
| "parameterName": "add_self_loops", | |||
| "type": "CATEGORICAL", | |||
| "feasiblePoints": [1], | |||
| }, | |||
| { | |||
| "parameterName": "normalize", | |||
| "type": "CATEGORICAL", | |||
| "feasiblePoints": [1], | |||
| }, | |||
| { | |||
| "parameterName": "num_layers", | |||
| "type": "DISCRETE", | |||
| "feasiblePoints": "2,3,4", | |||
| }, | |||
| { | |||
| "parameterName": "hidden", | |||
| "type": "NUMERICAL_LIST", | |||
| "numericalType": "INTEGER", | |||
| "length": 3, | |||
| "minValue": [8, 8, 8], | |||
| "maxValue": [128, 128, 128], | |||
| "scalingType": "LOG", | |||
| "cutPara": ("num_layers",), | |||
| "cutFunc": lambda x: x[0] - 1, | |||
| }, | |||
| { | |||
| "parameterName": "dropout", | |||
| "type": "DOUBLE", | |||
| "maxValue": 0.8, | |||
| "minValue": 0.2, | |||
| "scalingType": "LINEAR", | |||
| }, | |||
| { | |||
| "parameterName": "act", | |||
| "type": "CATEGORICAL", | |||
| "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"], | |||
| }, | |||
| ] | |||
| self.hyper_parameters = { | |||
| "num_layers": 3, | |||
| "hidden": [128, 64], | |||
| "dropout": 0, | |||
| "act": "relu", | |||
| } | |||
| def _initialize(self): | |||
| self._model = GCN( | |||
| self.input_dimension, | |||
| self.output_dimension, | |||
| self.hyper_parameters.get("hidden"), | |||
| self.hyper_parameters.get("act"), | |||
| self.hyper_parameters.get("dropout", None), | |||
| bool(self.hyper_parameters.get("add_self_loops", True)), | |||
| bool(self.hyper_parameters.get("normalize", True)), | |||
| ).to(self.device) | |||
| You could get define your own ``LinkPrediction`` model by using ``from_hyper_parameter`` function and specify the hyperpameryers. | |||
| .. code-block:: python | |||
| # pyg version | |||
| from autogl.module.model.pyg import AutoLinkPredictor | |||
| # from autogl.module.model.dgl import AutoLinkPredictor # dgl version | |||
| model = AutoLinkPredictor( | |||
| feature_module="NormalizeFeatures", | |||
| graph_models=(args.model, ), | |||
| hpo_module="random", | |||
| ensemble_module=None, | |||
| max_evals=1, | |||
| trainer_hp_space=fixed(**{ | |||
| "max_epoch": 100, | |||
| "early_stopping_round": 101, | |||
| "lr": 1e-2, | |||
| "weight_decay": 0.0, | |||
| }), | |||
| model_hp_spaces=[{"encoder": fixed(**model_hp), "decoder": fixed(**decoder_hp)}] | |||
| ).model | |||
| Then you can train the model for 100 epochs. | |||
| .. code-block:: python | |||
| import torch.nn.functional as F | |||
| # Define the loss optimizer. | |||
| optimizer = torch.optim.Adam(model.parameters(), lr=0.01) | |||
| # Training | |||
| for epoch in range(100): | |||
| model.train() | |||
| optimizer.zero_grad() | |||
| z = model.lp_encode(splitted[0]) | |||
| link_logits = model.lp_decode( | |||
| z, torch.stack(splitted[1].edges()), torch.stack(splitted[2].edges()) | |||
| ) | |||
| link_labels = get_link_labels( | |||
| torch.stack(splitted[1].edges()), torch.stack(splitted[2].edges()) | |||
| ) | |||
| loss = F.binary_cross_entropy_with_logits(link_logits, link_labels) | |||
| loss.backward() | |||
| optimizer.step() | |||
| auc_val = evaluate(model, splitted, "val") | |||
| if auc_val > best_auc: | |||
| best_auc = auc_val | |||
| best_parameters = pickle.dumps(model.state_dict()) | |||
| Finally, evaluate the trained model. | |||
| .. code-block:: python | |||
| model.load_state_dict(pickle.loads(best_parameters)) | |||
| evaluate(model, splitted, "test") | |||
| Automatic Search for Link Prediction Tasks | |||
| =============================================== | |||
| In AutoGL, we also provide a high-level API Solver to control the overall pipeline. | |||
| We encapsulated the training process in the Building GNN Modules part for link prediction tasks | |||
| in the solver ``AutoLinkPredictor`` that supports automatic hyperparametric optimization | |||
| as well as feature engineering and ensemble. In this part, we will show you how to use | |||
| ``AutoLinkPredictor``. | |||
| .. code-block:: python | |||
| solver = AutoLinkPredictor( | |||
| feature_module="NormalizeFeatures", | |||
| graph_models=(args.model, ), | |||
| hpo_module="random", | |||
| ensemble_module=None, | |||
| max_evals=1, | |||
| trainer_hp_space=fixed(**{ | |||
| "max_epoch": 100, | |||
| "early_stopping_round": 101, | |||
| "lr": 1e-2, | |||
| "weight_decay": 0.0, | |||
| }), | |||
| model_hp_spaces=[{"encoder": fixed(**model_hp), "decoder": fixed(**decoder_hp)}] | |||
| ) | |||
| solver.fit(dataset, train_split=0.85, val_split=0.05, evaluation_method=["auc"], seed=seed) | |||
| pre = solver.evaluate(metric="auc") | |||
| @@ -0,0 +1,212 @@ | |||
| : _homo_cn: | |||
| ========================== | |||
| 链接预测模型 | |||
| ========================== | |||
| 构建链接预测模块 | |||
| ===================================== | |||
| .. In AutoGL, we support two graph classification models, ``gin`` and ``topk``. | |||
| 在AutoGL中,我们支持三种链接预测模型: ``gcn``, ``gat`` and ``sage`` 。 | |||
| AutoLinkPredictor | |||
| >>>>>>> | |||
| .. Used to automatically solve the link prediction problems. For example, | |||
| 用于自动解决链路预测问题。例如, | |||
| .. code-block:: python | |||
| class AutoGCN(BaseAutoModel): | |||
| r""" | |||
| AutoGCN. | |||
| The model used in this automodel is GCN, i.e., the graph convolutional network from the | |||
| `"Semi-supervised Classification with Graph Convolutional | |||
| Networks" <https://arxiv.org/abs/1609.02907>`_ paper. The layer is | |||
| .. math:: | |||
| \mathbf{X}^{\prime} = \mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}} | |||
| \mathbf{\hat{D}}^{-1/2} \mathbf{X} \mathbf{\Theta}, | |||
| where :math:`\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}` denotes the | |||
| adjacency matrix with inserted self-loops and | |||
| :math:`\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}` its diagonal degree matrix. | |||
| Parameters | |||
| ---------- | |||
| num_features: ``int`` | |||
| The dimension of features. | |||
| num_classes: ``int`` | |||
| The number of classes. | |||
| device: ``torch.device`` or ``str`` | |||
| The device where model will be running on. | |||
| init: `bool`. | |||
| If True(False), the model will (not) be initialized. | |||
| """ | |||
| def __init__( | |||
| self, | |||
| num_features: int = ..., | |||
| num_classes: int = ..., | |||
| device: _typing.Union[str, torch.device] = ..., | |||
| **kwargs | |||
| ) -> None: | |||
| super().__init__(num_features, num_classes, device, **kwargs) | |||
| self.hyper_parameter_space = [ | |||
| { | |||
| "parameterName": "add_self_loops", | |||
| "type": "CATEGORICAL", | |||
| "feasiblePoints": [1], | |||
| }, | |||
| { | |||
| "parameterName": "normalize", | |||
| "type": "CATEGORICAL", | |||
| "feasiblePoints": [1], | |||
| }, | |||
| { | |||
| "parameterName": "num_layers", | |||
| "type": "DISCRETE", | |||
| "feasiblePoints": "2,3,4", | |||
| }, | |||
| { | |||
| "parameterName": "hidden", | |||
| "type": "NUMERICAL_LIST", | |||
| "numericalType": "INTEGER", | |||
| "length": 3, | |||
| "minValue": [8, 8, 8], | |||
| "maxValue": [128, 128, 128], | |||
| "scalingType": "LOG", | |||
| "cutPara": ("num_layers",), | |||
| "cutFunc": lambda x: x[0] - 1, | |||
| }, | |||
| { | |||
| "parameterName": "dropout", | |||
| "type": "DOUBLE", | |||
| "maxValue": 0.8, | |||
| "minValue": 0.2, | |||
| "scalingType": "LINEAR", | |||
| }, | |||
| { | |||
| "parameterName": "act", | |||
| "type": "CATEGORICAL", | |||
| "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"], | |||
| }, | |||
| ] | |||
| self.hyper_parameters = { | |||
| "num_layers": 3, | |||
| "hidden": [128, 64], | |||
| "dropout": 0, | |||
| "act": "relu", | |||
| } | |||
| def _initialize(self): | |||
| self._model = GCN( | |||
| self.input_dimension, | |||
| self.output_dimension, | |||
| self.hyper_parameters.get("hidden"), | |||
| self.hyper_parameters.get("act"), | |||
| self.hyper_parameters.get("dropout", None), | |||
| bool(self.hyper_parameters.get("add_self_loops", True)), | |||
| bool(self.hyper_parameters.get("normalize", True)), | |||
| ).to(self.device) | |||
| .. You could get define your own ``LinkPrediction`` model by using ``from_hyper_parameter`` function and specify the hyperpameryers. | |||
| You could get define your own ``gin`` model by using ``from_hyper_parameter`` function and specify the hyperpameryers. | |||
| 你可以通过使用 ``from_hyper_parameter`` 函数定义你自己的 ``LinkPrediction`` 模型,并对其指定超参数。 | |||
| .. code-block:: python | |||
| # pyg version | |||
| from autogl.module.model.pyg import AutoLinkPredictor | |||
| # from autogl.module.model.dgl import AutoLinkPredictor # dgl version | |||
| model = AutoLinkPredictor( | |||
| feature_module="NormalizeFeatures", | |||
| graph_models=(args.model, ), | |||
| hpo_module="random", | |||
| ensemble_module=None, | |||
| max_evals=1, | |||
| trainer_hp_space=fixed(**{ | |||
| "max_epoch": 100, | |||
| "early_stopping_round": 101, | |||
| "lr": 1e-2, | |||
| "weight_decay": 0.0, | |||
| }), | |||
| model_hp_spaces=[{"encoder": fixed(**model_hp), "decoder": fixed(**decoder_hp)}] | |||
| ).model | |||
| .. Then you can train the model for 100 epochs. | |||
| 然后你可以对模型进行100次的训练: | |||
| .. code-block:: python | |||
| import torch.nn.functional as F | |||
| # Define the loss optimizer. | |||
| optimizer = torch.optim.Adam(model.parameters(), lr=0.01) | |||
| # Training | |||
| for epoch in range(100): | |||
| model.train() | |||
| optimizer.zero_grad() | |||
| z = model.lp_encode(splitted[0]) | |||
| link_logits = model.lp_decode( | |||
| z, torch.stack(splitted[1].edges()), torch.stack(splitted[2].edges()) | |||
| ) | |||
| link_labels = get_link_labels( | |||
| torch.stack(splitted[1].edges()), torch.stack(splitted[2].edges()) | |||
| ) | |||
| loss = F.binary_cross_entropy_with_logits(link_logits, link_labels) | |||
| loss.backward() | |||
| optimizer.step() | |||
| auc_val = evaluate(model, splitted, "val") | |||
| if auc_val > best_auc: | |||
| best_auc = auc_val | |||
| best_parameters = pickle.dumps(model.state_dict()) | |||
| .. Finally, evaluate the trained model. | |||
| 最后,你可以评估该模型: | |||
| .. code-block:: python | |||
| model.load_state_dict(pickle.loads(best_parameters)) | |||
| evaluate(model, splitted, "test") | |||
| .. Automatic Search for Link Prediction Tasks | |||
| 链接预测任务的自动搜索 | |||
| =============================================== | |||
| .. In AutoGL, we also provide a high-level API Solver to control the overall pipeline. We encapsulated the training process in the Building GNN Modules part for link prediction tasks in the solver ``AutoLinkPredictor`` that supports automatic hyperparametric optimization as well as feature engineering and ensemble. In this part, we will show you how to use ``AutoLinkPredictor``. | |||
| 在AutoGL中,我们还提供了一个高级的API求解器来控制整个流水线。我们将构建图神经网络模块部分的训练过程封装在求解器 ``AutoLinkPredictor`` 中以用于图分类任务,它支持自动超参数优化,特征工程及集成。 | |||
| 在这一部分,我们提供了一个例子来指导如何使用 ``AutoLinkPredictor`` : | |||
| .. code-block:: python | |||
| solver = AutoLinkPredictor( | |||
| feature_module="NormalizeFeatures", | |||
| graph_models=(args.model, ), | |||
| hpo_module="random", | |||
| ensemble_module=None, | |||
| max_evals=1, | |||
| trainer_hp_space=fixed(**{ | |||
| "max_epoch": 100, | |||
| "early_stopping_round": 101, | |||
| "lr": 1e-2, | |||
| "weight_decay": 0.0, | |||
| }), | |||
| model_hp_spaces=[{"encoder": fixed(**model_hp), "decoder": fixed(**decoder_hp)}] | |||
| ) | |||
| solver.fit(dataset, train_split=0.85, val_split=0.05, evaluation_method=["auc"], seed=seed) | |||
| pre = solver.evaluate(metric="auc") | |||
| @@ -1,130 +0,0 @@ | |||
| import os | |||
| import pickle | |||
| from torchaudio import datasets | |||
| from tqdm import tqdm | |||
| import torch | |||
| import numpy as np | |||
| import torch.nn.functional as F | |||
| import torch.optim as optim | |||
| from deeprobust.graph.defense import GCNSVD | |||
| from deeprobust.graph.utils import * | |||
| from deeprobust.graph.data import Dataset, PrePtbDataset, Dpr2Pyg, AmazonPyg | |||
| import argparse | |||
| os.environ["AUTOGL_BACKEND"] = "pyg" | |||
| from autogl.module.model.pyg import AutoGCNSVD | |||
| from autogl.solver.utils import set_seed | |||
| def test(model, data, mask): | |||
| model.eval() | |||
| if hasattr(model, 'cls_forward'): | |||
| out = model.cls_forward(data)[mask] | |||
| else: | |||
| out = model(data)[mask] | |||
| pred = out.max(1)[1] | |||
| acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item() | |||
| return acc | |||
| def train(model, data, args): | |||
| optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) | |||
| print(data) | |||
| parameters = model.state_dict() | |||
| best_acc = 0. | |||
| for epoch in range(args.epoch): | |||
| model.train() | |||
| optimizer.zero_grad() | |||
| if hasattr(model, 'cls_forward'): | |||
| output = model.cls_forward(data) | |||
| else: | |||
| output = model(data) | |||
| loss = F.nll_loss(output[data.train_mask], data.y[data.train_mask]) | |||
| loss.backward() | |||
| optimizer.step() | |||
| val_acc = test(model, data, data.val_mask) | |||
| if val_acc > best_acc: | |||
| best_acc = val_acc | |||
| parameters = pickle.dumps(model.state_dict()) | |||
| model.load_state_dict(pickle.loads(parameters)) | |||
| return model | |||
| if __name__ == '__main__': | |||
| parser = argparse.ArgumentParser() | |||
| parser = argparse.ArgumentParser('pyg model') | |||
| parser.add_argument('--seed', type=int, default=15, help='Random seed.') | |||
| parser.add_argument('--dataset', type=str, default='cora', choices=['cora', 'cora_ml', 'citeseer', 'polblogs', 'pubmed'], help='dataset') | |||
| parser.add_argument('--ptb_rate', type=float, default=0.05, help='pertubation rate') | |||
| parser.add_argument('--k', type=int, default=15, help='Truncated Components.') | |||
| parser.add_argument('--repeat', type=int, default=1) | |||
| parser.add_argument('--device', type=str, default='cuda:0') | |||
| parser.add_argument('--lr', type=float, default=0.01) | |||
| parser.add_argument('--weight_decay', type=float, default=0.0) | |||
| parser.add_argument('--epoch', type=int, default=200) | |||
| args = parser.parse_args() | |||
| args.cuda = torch.cuda.is_available() | |||
| print('cuda: %s' % args.cuda) | |||
| # make sure you use the same data splits as you generated attacks | |||
| np.random.seed(args.seed) | |||
| if args.cuda: | |||
| torch.cuda.manual_seed(args.seed) | |||
| # Here the random seed is to split the train/val/test data, | |||
| # we need to set the random seed to be the same as that when you generate the perturbed graph | |||
| # data = Dataset(root='/tmp/', name=args.dataset, setting='nettack', seed=15) | |||
| # Or we can just use setting='prognn' to get the splits | |||
| data = Dataset(root='/tmp/', name=args.dataset, setting='prognn') | |||
| adj, features, labels = data.adj, data.features, data.labels | |||
| idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test | |||
| pyg_data = Dpr2Pyg(data).process().to(args.device) | |||
| pyg_data.num_classes = len(set(labels)) | |||
| # load pre-attacked graph | |||
| perturbed_data = PrePtbDataset(root='/tmp/', | |||
| name=args.dataset, | |||
| attack_method='meta', | |||
| ptb_rate=args.ptb_rate) | |||
| perturbed_adj = perturbed_data.adj | |||
| print('=== testing GCN-SVD on perturbed graph (AutoGL) ===') | |||
| model_hp = { | |||
| "num_layers": 2, | |||
| "hidden": [16], | |||
| "dropout": 0.5, | |||
| "act": "relu" | |||
| } | |||
| accs = [] | |||
| for seed in tqdm(range(args.repeat)): | |||
| set_seed(seed) | |||
| model = AutoGCNSVD( | |||
| num_features=pyg_data.num_node_features, | |||
| num_classes=pyg_data.num_classes, | |||
| device=args.device, | |||
| init=False | |||
| ).from_hyper_parameter(model_hp).model | |||
| model.to(args.device) | |||
| train(model, pyg_data, args) | |||
| acc = test(model, pyg_data, pyg_data.test_mask) | |||
| accs.append(acc) | |||
| print('{:.4f} ~ {:.4f}'.format(np.mean(accs), np.std(accs))) | |||
| print('=== testing GCN-SVD on perturbed graph (deeprobust)===') | |||
| model = GCNSVD(nfeat=features.shape[1], nclass=labels.max()+1, | |||
| nhid=16, device=args.device) | |||
| model = model.to(args.device) | |||
| # Test set results: loss= 0.8541 accuracy= 0.7067 | |||
| model.fit(features, perturbed_adj, labels, idx_train, idx_val, k=args.k, verbose=True) | |||
| model.eval() | |||
| output = model.test(idx_test) | |||
| print(output) | |||
| @@ -1,192 +0,0 @@ | |||
| import os | |||
| import torch | |||
| # import sys | |||
| # sys.path.insert(0, '/n/scratch2/xz204/Dr37/lib/python3.7/site-packages') | |||
| from deeprobust.graph.targeted_attack import Nettack | |||
| from deeprobust.graph.utils import * | |||
| from deeprobust.graph.data import Dataset | |||
| import argparse | |||
| # from deeprobust.graph.defense import * # GCN, GAT, GIN, JK, GCN_attack,accuracy_1 | |||
| from deeprobust.graph.defense import * | |||
| from deeprobust.graph.data import Dataset, PrePtbDataset, Dpr2Pyg, AmazonPyg | |||
| from tqdm import tqdm | |||
| import scipy | |||
| import numpy as np | |||
| from sklearn.preprocessing import normalize | |||
| import pickle | |||
| os.environ["AUTOGL_BACKEND"] = "pyg" | |||
| from autogl.module.model.pyg import AutoGNNGuard | |||
| from autogl.solver.utils import set_seed | |||
| parser = argparse.ArgumentParser() | |||
| parser.add_argument('--seed', type=int, default=14, help='Random seed.') | |||
| # cora and citeseer are binary, pubmed has not binary features | |||
| parser.add_argument('--dataset', type=str, default='citeseer', choices=['cora', 'cora_ml', 'citeseer', 'polblogs', 'pubmed'], help='dataset') | |||
| parser.add_argument('--ptb_rate', type=float, default=0.05, help='pertubation rate') | |||
| parser.add_argument('--modelname', type=str, default='GCN', choices=['GCN', 'GAT','GIN', 'JK']) | |||
| parser.add_argument('--defensemodel', type=str, default='GCNJaccard', choices=['GCNJaccard', 'RGCN', 'GCNSVD']) | |||
| parser.add_argument('--DPlabel', type=int, default=9, help='0-10') | |||
| args = parser.parse_args() | |||
| args.cuda = torch.cuda.is_available() | |||
| print('cuda: %s' % args.cuda) | |||
| args.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | |||
| def seed_torch(seed=1029): | |||
| np.random.seed(seed) | |||
| torch.manual_seed(seed) | |||
| torch.cuda.manual_seed(seed) | |||
| torch.cuda.manual_seed_all(seed) | |||
| torch.backends.cudnn.benchmark = False | |||
| torch.backends.cudnn.deterministic = True | |||
| # torch.use_deterministic_algorithms(True) | |||
| seed_torch(args.seed) | |||
| args.dataset = "cora" | |||
| args.modelname = "GCN" | |||
| data = Dataset(root='/tmp/', name=args.dataset) | |||
| adj, features, labels = data.adj, data.features, data.labels | |||
| if scipy.sparse.issparse(features)==False: | |||
| features = scipy.sparse.csr_matrix(features) | |||
| """set the number of training/val/testing nodes""" | |||
| idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test | |||
| """add undirected edges, orgn-arxiv is directed graph, we transfer it to undirected closely following | |||
| https://ogb.stanford.edu/docs/leader_nodeprop/#ogbn-arxiv | |||
| """ | |||
| adj = adj + adj.T | |||
| adj[adj>1] = 1 | |||
| pyg_data = Dpr2Pyg(data).process().to(args.device) | |||
| pyg_data.num_classes = len(set(labels)) | |||
| def main(flag): | |||
| # Setup Surrogate model | |||
| surrogate = GCN_attack(nfeat=features.shape[1], nclass=labels.max().item()+1, n_edge=adj.nonzero()[0].shape[0], nhid=16, dropout=0, with_relu=False, with_bias=False, device=args.device, ) | |||
| surrogate = surrogate.to(args.device) | |||
| surrogate.fit(features, adj, labels, idx_train, train_iters=201) # change this train_iters to 201: train_iters=201 | |||
| # Setup Attack Model | |||
| target_node = 859 | |||
| model = Nettack(surrogate, nnodes=adj.shape[0], attack_structure=True, attack_features=False, device=args.device) | |||
| model = model.to(args.device) | |||
| degrees = adj.sum(0).A1 | |||
| # How many perturbations to perform. Default: Degree of the node | |||
| n_perturbations = int(degrees[target_node]) | |||
| # # indirect attack/ influencer attack | |||
| model.attack(features, adj, labels, target_node, n_perturbations, direct=True) | |||
| modified_adj = model.modified_adj | |||
| modified_features = model.modified_features | |||
| print('=== testing GNN on original(clean) graph ===') | |||
| test(adj, features, target_node, attention=flag) | |||
| print('=== testing GCN on perturbed graph ===') | |||
| test(modified_adj, modified_features, target_node,attention=flag) | |||
| def test(adj, features, target_node, attention=False): | |||
| '' | |||
| """test on GCN """ | |||
| """model_name could be 'GCN', 'GAT', 'GIN','JK' """ | |||
| # for orgn-arxiv: nhid =256, layers =3, epoch =500 | |||
| gcn = globals()[args.modelname](nfeat=features.shape[1], nhid=256, nclass=labels.max().item() + 1, dropout=0.5, | |||
| device=args.device) | |||
| gcn = gcn.to(args.device) | |||
| gcn.fit(features, adj, labels, idx_train, idx_val=idx_val, | |||
| idx_test=idx_test, | |||
| attention=attention, verbose=True, train_iters=81) | |||
| gcn.eval() | |||
| _, output = gcn.test(idx_test=idx_test) | |||
| probs = torch.exp(output[[target_node]])[0] | |||
| print('probs: {}'.format(probs.detach().cpu().numpy())) | |||
| acc_test = accuracy(output[idx_test], labels[idx_test]) | |||
| print("Test set results:", | |||
| "accuracy= {:.4f}".format(acc_test.item())) | |||
| return acc_test.item() | |||
| def main_autogl(flag): | |||
| # Setup Surrogate model | |||
| surrogate = AutoGNNGuard( | |||
| num_features=pyg_data.num_node_features, | |||
| num_classes=pyg_data.num_classes, | |||
| device=args.device, | |||
| init=False | |||
| ).from_hyper_parameter(model_hp).model | |||
| surrogate = surrogate.to(args.device) | |||
| surrogate.fit(features, adj, labels, idx_train, train_iters=201) # change this train_iters to 201: train_iters=201 | |||
| # Setup Attack Model | |||
| target_node = 859 | |||
| model = Nettack(surrogate, nnodes=adj.shape[0], attack_structure=True, attack_features=False, device=args.device) | |||
| model = model.to(args.device) | |||
| degrees = adj.sum(0).A1 | |||
| # How many perturbations to perform. Default: Degree of the node | |||
| n_perturbations = int(degrees[target_node]) | |||
| # # indirect attack/ influencer attack | |||
| model.attack(features, adj, labels, target_node, n_perturbations, direct=True) | |||
| modified_adj = model.modified_adj | |||
| modified_features = model.modified_features | |||
| print('=== testing GNN on original(clean) graph (AutoGL) ===') | |||
| test_autogl(adj, features, target_node, attention=flag) | |||
| print('=== testing GCN on perturbed graph (AutoGL) ===') | |||
| test_autogl(modified_adj, modified_features, target_node,attention=flag) | |||
| def test_autogl(adj, features, target_node, attention=False): | |||
| '' | |||
| """test on GCN """ | |||
| """model_name could be 'GCN', 'GAT', 'GIN','JK' """ | |||
| # for orgn-arxiv: nhid =256, layers =3, epoch =500 | |||
| gcn = AutoGNNGuard( | |||
| num_features=pyg_data.num_node_features, | |||
| num_classes=pyg_data.num_classes, | |||
| device=args.device, | |||
| init=False | |||
| ).from_hyper_parameter(model_hp).model | |||
| gcn = gcn.to(args.device) | |||
| gcn.fit(features, adj, labels, idx_train, idx_val=idx_val, | |||
| idx_test=idx_test, | |||
| attention=attention, verbose=True, train_iters=81) | |||
| gcn.eval() | |||
| _, output = gcn.test(idx_test=idx_test) | |||
| probs = torch.exp(output[[target_node]])[0] | |||
| print('probs: {}'.format(probs.detach().cpu().numpy())) | |||
| acc_test = accuracy(output[idx_test], labels[idx_test]) | |||
| print("Test set results:", | |||
| "accuracy= {:.4f}".format(acc_test.item())) | |||
| return acc_test.item() | |||
| model_hp = { | |||
| "num_layers": 2, | |||
| "hidden": [16], | |||
| "dropout": 0.0, | |||
| "act": "relu" | |||
| } | |||
| if __name__ == '__main__': | |||
| # Check the performance of GCN under directed attack without defense | |||
| main(flag=False) | |||
| # Use GNNGuard for defense | |||
| main(flag=True) | |||
| # Check the performance of GCN under directed attack without defense(AutoGL) | |||
| main_autogl(flag=False) | |||
| # Use GNNGuard for defense(AutoGL) | |||
| main_autogl(flag=True) | |||
| @@ -0,0 +1,204 @@ | |||
| import os | |||
| import torch | |||
| # import sys | |||
| # sys.path.insert(0, '/n/scratch2/xz204/Dr37/lib/python3.7/site-packages') | |||
| from deeprobust.graph.targeted_attack import Nettack | |||
| from deeprobust.graph.utils import * | |||
| from deeprobust.graph.data import Dataset | |||
| from deeprobust.graph.global_attack import Metattack | |||
| import argparse | |||
| # from deeprobust.graph.defense import * # GCN, GAT, GIN, JK, GCN_attack,accuracy_1 | |||
| from deeprobust.graph.defense import * | |||
| from deeprobust.graph.data import Dataset, PrePtbDataset, Dpr2Pyg, AmazonPyg | |||
| from scipy.sparse import csr_matrix | |||
| from tqdm import tqdm | |||
| import scipy | |||
| import scipy.sparse as sp | |||
| import numpy as np | |||
| from sklearn.preprocessing import normalize | |||
| import pickle | |||
| os.environ["AUTOGL_BACKEND"] = "pyg" | |||
| from autogl.module.model.pyg import AutoGNNGuard, AutoGNNGuard_attack | |||
| from autogl.solver.utils import set_seed | |||
| def seed_torch(seed=1029): | |||
| np.random.seed(seed) | |||
| torch.manual_seed(seed) | |||
| torch.cuda.manual_seed(seed) | |||
| torch.cuda.manual_seed_all(seed) | |||
| torch.backends.cudnn.benchmark = False | |||
| torch.backends.cudnn.deterministic = True | |||
| # torch.use_deterministic_algorithms(True) | |||
| seed_torch(2048) | |||
| def main(dataset, adj, features, device): | |||
| # from deeprobust.graph.data import PrePtbDataset | |||
| # perturbed_data = PrePtbDataset(root='/tmp/', name=dataset, attack_method='meta', ptb_rate=0.2) | |||
| # modified_adj = perturbed_data.adj | |||
| # Setup Surrogate model | |||
| surrogate = GCN_attack(nfeat=features.shape[1], nclass=labels.max().item()+1, n_edge=adj.nonzero()[0].shape[0], nhid=16, dropout=0, with_relu=False, with_bias=False, device=args.device, ) | |||
| surrogate = surrogate.to(args.device) | |||
| surrogate.fit(features, adj, labels, idx_train, train_iters=201) # change this train_iters to 201: train_iters=201 | |||
| # Setup Attack Model | |||
| # model = Metattack(model=surrogate, nnodes=adj.shape[0], feature_shape=features.shape, attack_structure=True, attack_features=False, device=args.device, lambda_=0.5) # lambda_ is used to weight the two objectives in Eq. (10) in the paper. | |||
| # model = model.to(args.device) | |||
| # """save the mettacked adj""" | |||
| # model.attack(features, adj, labels, idx_train, idx_unlabeled, perturbations, ll_constraint=False) | |||
| # modified_adj = sp.csr_matrix(model.modified_adj.cpu()) | |||
| # from deeprobust.graph.data import PrePtbDataset | |||
| perturbed_data = PrePtbDataset(root='/tmp/', name=dataset, attack_method='meta', ptb_rate=0.2) | |||
| modified_adj = perturbed_data.adj | |||
| # Check the performance of GCN under directed attack without defense | |||
| flag = False | |||
| # print('=== testing GNN on original(clean) graph ===') | |||
| # print("acc_test:",test(adj, features, device, attention=flag)) | |||
| # print('=== testing GCN on perturbed graph ===') | |||
| # print("acc_test:",test(modified_adj, features, device, attention=flag)) | |||
| # Use GNNGuard for defense | |||
| flag = True | |||
| print('=== testing GNN on original(clean) graph + GNNGuard ===') | |||
| print("acc_test:",test(adj, features, device, attention=flag)) | |||
| print('=== testing GCN on perturbed graph + GNNGuard ===') | |||
| print("acc_test:",test(modified_adj, features, device, attention=flag)) | |||
| def test(adj, features, device, attention): | |||
| accs = [] | |||
| for seed in tqdm(range(5)): | |||
| classifier = GCN(nfeat=features.shape[1], nhid=16, nclass=labels.max().item() + 1, dropout=0.5, device=device) | |||
| classifier = classifier.to(device) | |||
| print(1) | |||
| classifier.fit(features, adj, labels, idx_train, train_iters=201, | |||
| idx_val=idx_val, | |||
| idx_test=idx_test, | |||
| verbose=True, attention=attention) # idx_val=idx_val, idx_test=idx_test , model_name=model_name | |||
| classifier.eval() | |||
| # classifier.fit(features, adj, labels, idx_train, idx_val) # train with validation model picking | |||
| acc_test, output = classifier.test(idx_test) | |||
| accs.append(acc_test.item()) | |||
| mean = np.mean(accs) | |||
| std = np.std(accs) | |||
| return {"mean": mean, "std": std} | |||
| def main_autogl(dataset, model_hp, adj, features, device): | |||
| # Setup Surrogate model | |||
| surrogate = AutoGNNGuard_attack( | |||
| num_features=pyg_data.num_node_features, | |||
| num_classes=pyg_data.num_classes, | |||
| device=args.device, | |||
| init=False | |||
| ).from_hyper_parameter(model_hp).model | |||
| surrogate = surrogate.to(args.device) | |||
| surrogate.fit(features, adj, labels, idx_train, train_iters=201) # change this train_iters to 201: train_iters=201 | |||
| # Setup Attack Model | |||
| # model = Metattack(model=surrogate, nnodes=adj.shape[0], feature_shape=features.shape, attack_structure=True, attack_features=False, device=args.device, lambda_=0.5) # lambda_ is used to weight the two objectives in Eq. (10) in the paper. | |||
| # model = model.to(args.device) | |||
| # """save the mettacked adj""" | |||
| # model.attack(features, adj, labels, idx_train, idx_unlabeled, perturbations, ll_constraint=False) | |||
| # modified_adj = sp.csr_matrix(model.modified_adj.cpu()) | |||
| # from deeprobust.graph.data import PrePtbDataset | |||
| perturbed_data = PrePtbDataset(root='/tmp/', name=dataset, attack_method='meta', ptb_rate=0.2) | |||
| modified_adj = perturbed_data.adj | |||
| # Check the performance of GCN under directed attack without defense(AutoGL) | |||
| # flag = False | |||
| # print('=== testing GNN on original(clean) graph (AutoGL) ===') | |||
| # print("acc_test:",test_autogl(adj, features, device, attention=flag)) | |||
| # print('=== testing GCN on perturbed graph (AutoGL) ===') | |||
| # print("acc_test:",test_autogl(modified_adj, features, device, attention=flag)) | |||
| # Use GNNGuard for defense(AutoGL) | |||
| flag = True | |||
| print('=== testing GNN on original(clean) graph (AutoGL) + GNNGuard ===') | |||
| print("acc_test:",test_autogl(adj, features, device, attention=flag)) | |||
| print('=== testing GCN on perturbed graph (AutoGL) + GNNGuard ===') | |||
| print("acc_test:",test_autogl(modified_adj, features, device, attention=flag)) | |||
| def test_autogl(adj, features, device, attention): | |||
| '' | |||
| """test on GCN """ | |||
| """model_name could be 'GCN', 'GAT', 'GIN','JK' """ | |||
| accs = [] | |||
| for seed in tqdm(range(5)): | |||
| gcn = AutoGNNGuard( | |||
| num_features=pyg_data.num_node_features, | |||
| num_classes=pyg_data.num_classes, | |||
| device=args.device, | |||
| init=False | |||
| ).from_hyper_parameter(model_hp).model | |||
| gcn = gcn.to(device) | |||
| gcn.fit(features, adj, labels, idx_train, idx_val=idx_val, | |||
| idx_test=idx_test, | |||
| attention=attention, verbose=True, train_iters=81) | |||
| gcn.eval() | |||
| acc_test, output = gcn.test(idx_test=idx_test) | |||
| accs.append(acc_test.item()) | |||
| mean = np.mean(accs) | |||
| std = np.std(accs) | |||
| return {"mean": mean, "std": std} | |||
| if __name__ == '__main__': | |||
| model_hp = { | |||
| "num_layers": 2, | |||
| "hidden": [16], | |||
| "dropout": 0.5, | |||
| "act": "relu" | |||
| } | |||
| parser = argparse.ArgumentParser() | |||
| parser.add_argument('--seed', type=int, default=14, help='Random seed.') | |||
| # cora and citeseer are binary, pubmed has not binary features | |||
| parser.add_argument('--dataset', type=str, default='citeseer', choices=['cora', 'cora_ml', 'citeseer', 'polblogs', 'pubmed'], help='dataset') | |||
| parser.add_argument('--ptb_rate', type=float, default=0.2, help='pertubation rate') | |||
| parser.add_argument('--modelname', type=str, default='GCN', choices=['GCN', 'GAT','GIN', 'JK']) | |||
| parser.add_argument('--defensemodel', type=str, default='GCNJaccard', choices=['GCNJaccard', 'RGCN', 'GCNSVD']) | |||
| parser.add_argument('--DPlabel', type=int, default=9, help='0-10') | |||
| args = parser.parse_args() | |||
| args.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | |||
| print('cuda: %s' % args.device) | |||
| args.dataset = "pubmed" | |||
| args.modelname = "GCN" | |||
| data = Dataset(root='/tmp/', name=args.dataset) | |||
| pyg_data = Dpr2Pyg(data).process().to(args.device) | |||
| pyg_data.num_classes = len(set(data.labels)) | |||
| adj, features, labels = data.adj, data.features, data.labels | |||
| idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test | |||
| idx_unlabeled = np.union1d(idx_val, idx_test) | |||
| if scipy.sparse.issparse(features)==False: | |||
| features = scipy.sparse.csr_matrix(features) | |||
| perturbations = int(args.ptb_rate * (adj.sum()//2)) ### | |||
| adj, features, labels = preprocess(adj, features, labels, preprocess_adj=False) | |||
| # to CSR sparse | |||
| adj, features = csr_matrix(adj), csr_matrix(features) | |||
| """add undirected edges, orgn-arxiv is directed graph, we transfer it to undirected closely following | |||
| https://ogb.stanford.edu/docs/leader_nodeprop/#ogbn-arxiv | |||
| """ | |||
| adj = adj + adj.T | |||
| adj[adj>1] = 1 | |||
| # main(args.dataset, adj, features, device=args.device) | |||
| main_autogl(args.dataset, model_hp, adj, features, device=args.device) | |||