| @@ -169,7 +169,7 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer): | |||
| # Get task name, i.e., `LinkPrediction`. | |||
| return "LinkPrediction" | |||
| def train_only(self, data, train_mask=None): | |||
| def train_only_pyg(self, data, train_mask=None): | |||
| """ | |||
| The function of training on the given dataset and mask. | |||
| Parameters | |||
| @@ -241,8 +241,7 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer): | |||
| The function of training on the given dataset and mask. | |||
| Parameters | |||
| ---------- | |||
| pos_data: positive links | |||
| neg_data: negative links | |||
| dataset: there are train, train_pos, train_neg graph in this dataset | |||
| Returns | |||
| ------- | |||
| self: ``autogl.train.LinkPredictionTrainer`` | |||
| @@ -308,7 +307,7 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer): | |||
| self.early_stopping.load_checkpoint(self.model.model) | |||
| def predict_only(self, data, test_mask=None): | |||
| def predict_only_pyg(self, data, test_mask=None): | |||
| """ | |||
| The function of predicting on the given dataset and mask. | |||
| @@ -342,9 +341,7 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer): | |||
| Parameters | |||
| ---------- | |||
| data: The link prediction dataset used to be predicted. | |||
| train_mask: The mask used in training stage. | |||
| dataset: The link prediction dataset used to be predicted. | |||
| Returns | |||
| ------- | |||
| res: The result of predicting on the given dataset. | |||
| @@ -377,11 +374,11 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer): | |||
| if self.pyg_dgl == 'pyg': | |||
| data = dataset[0] | |||
| data.edge_index = data.train_pos_edge_index | |||
| self.train_only(data) | |||
| self.train_only_pyg(data) | |||
| if keep_valid_result: | |||
| self.valid_result = self.predict_only(data) | |||
| self.valid_result_prob = self.predict_proba(dataset, "val") | |||
| self.valid_score = self.evaluate(dataset, mask="val", feval=self.feval) | |||
| self.valid_result = self.predict_only_pyg(data) | |||
| self.valid_result_prob = self.predict_proba_pyg(dataset, "val") | |||
| self.valid_score = self.evaluate_pyg(dataset, mask="val", feval=self.feval) | |||
| elif self.pyg_dgl == 'dgl': | |||
| self.train_only_dgl(dataset) | |||
| if keep_valid_result: | |||
| @@ -405,11 +402,17 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer): | |||
| The prediction result of ``predict_proba``. | |||
| """ | |||
| if self.pyg_dgl == 'pyg': | |||
| return self.predict_proba(dataset, mask=mask, in_log_format=False) | |||
| return self.predict_proba_pyg(dataset, mask=mask, in_log_format=False) | |||
| elif self.pyg_dgl == 'dgl': | |||
| return self.predict_proba_dgl(dataset, mask=mask, in_log_format=False) | |||
| def predict_proba(self, dataset, mask=None, in_log_format=False): | |||
| if self.pyg_dgl == 'pyg': | |||
| return self.predict_proba_pyg(dataset, mask, in_log_format) | |||
| elif self.pyg_dgl == 'dgl': | |||
| return self.predict_proba_dgl(dataset, mask, in_log_format) | |||
| def predict_proba_pyg(self, dataset, mask=None, in_log_format=False): | |||
| """ | |||
| The function of predicting the probability on the given dataset. | |||
| @@ -443,14 +446,30 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer): | |||
| self.model.model.eval() | |||
| with torch.no_grad(): | |||
| z = self.predict_only(data) | |||
| z = self.predict_only_pyg(data) | |||
| link_logits = self.model.model.lp_decode(z, pos_edge_index, neg_edge_index) | |||
| link_probs = link_logits.sigmoid() | |||
| return link_probs | |||
| def predict_proba_dgl(self, dataset, mask=None, in_log_format=False): | |||
| """ | |||
| The function of predicting the probability on the given dataset. | |||
| Parameters | |||
| ---------- | |||
| dataset: The link prediction dataset used to be predicted. | |||
| mask: ``train``, ``val``, or ``test``. | |||
| The dataset mask. | |||
| in_log_format: ``bool``. | |||
| If True(False), the probability will (not) be log format. | |||
| Returns | |||
| ------- | |||
| The prediction result. | |||
| """ | |||
| train_graph = dataset['train'] | |||
| try: | |||
| try: | |||
| @@ -547,43 +566,64 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer): | |||
| """ | |||
| if self.pyg_dgl == 'pyg': | |||
| data = dataset[0] | |||
| data = data.to(self.device) | |||
| test_mask = mask | |||
| if feval is None: | |||
| feval = self.feval | |||
| else: | |||
| feval = get_feval(feval) | |||
| return self.evaluate_pyg(self, dataset, mask, feval) | |||
| elif self.pyg_dgl == 'dgl': | |||
| return self.evaluate_dgl(dataset,mask,feval) | |||
| if mask in ["train", "val", "test"]: | |||
| pos_edge_index = data[f"{mask}_pos_edge_index"] | |||
| neg_edge_index = data[f"{mask}_neg_edge_index"] | |||
| else: | |||
| pos_edge_index = data[f"test_pos_edge_index"] | |||
| neg_edge_index = data[f"test_neg_edge_index"] | |||
| def evaluate_pyg(self, dataset, mask=None, feval=None): | |||
| data = dataset[0] | |||
| data = data.to(self.device) | |||
| test_mask = mask | |||
| if feval is None: | |||
| feval = self.feval | |||
| else: | |||
| feval = get_feval(feval) | |||
| self.model.model.eval() | |||
| with torch.no_grad(): | |||
| link_probs = self.predict_proba(dataset, mask) | |||
| link_labels = self.get_link_labels(pos_edge_index, neg_edge_index) | |||
| if mask in ["train", "val", "test"]: | |||
| pos_edge_index = data[f"{mask}_pos_edge_index"] | |||
| neg_edge_index = data[f"{mask}_neg_edge_index"] | |||
| else: | |||
| pos_edge_index = data[f"test_pos_edge_index"] | |||
| neg_edge_index = data[f"test_neg_edge_index"] | |||
| if not isinstance(feval, list): | |||
| feval = [feval] | |||
| return_signle = True | |||
| else: | |||
| return_signle = False | |||
| self.model.model.eval() | |||
| with torch.no_grad(): | |||
| link_probs = self.predict_proba_pyg(dataset, mask) | |||
| link_labels = self.get_link_labels(pos_edge_index, neg_edge_index) | |||
| if not isinstance(feval, list): | |||
| feval = [feval] | |||
| return_signle = True | |||
| else: | |||
| return_signle = False | |||
| res = [] | |||
| for f in feval: | |||
| res.append(f.evaluate(link_probs.cpu().numpy(), link_labels.cpu().numpy())) | |||
| if return_signle: | |||
| return res[0] | |||
| return res | |||
| res = [] | |||
| for f in feval: | |||
| res.append(f.evaluate(link_probs.cpu().numpy(), link_labels.cpu().numpy())) | |||
| if return_signle: | |||
| return res[0] | |||
| return res | |||
| elif self.pyg_dgl == 'dgl': | |||
| return self.evaluate_dgl(dataset,mask,feval) | |||
| def evaluate_dgl(self, dataset, mask=None, feval=None): | |||
| """ | |||
| The function of training on the given dataset and keeping valid result. | |||
| Parameters | |||
| ---------- | |||
| dataset: The link prediction dataset used to be evaluated. | |||
| mask: ``train``, ``val``, or ``test``. | |||
| The dataset mask. | |||
| feval: ``str``. | |||
| The evaluation method used in this function. | |||
| Returns | |||
| ------- | |||
| res: The evaluation result on the given dataset. | |||
| """ | |||
| if feval is None: | |||
| feval = self.feval | |||
| else: | |||
| @@ -0,0 +1,202 @@ | |||
| import dgl | |||
| import torch | |||
| import torch.nn as nn | |||
| import torch.nn.functional as F | |||
| import itertools | |||
| import numpy as np | |||
| import scipy.sparse as sp | |||
| import dgl.function as fn | |||
| import random | |||
| from dgl.data import CoraGraphDataset, PubmedGraphDataset, CiteseerGraphDataset | |||
| # from autogl.module.train.link_prediction_full import LinkPredictionTrainer | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from autogl.module.model.dgl.graphsage import GraphSAGE | |||
| import dgl.data | |||
| from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter | |||
| from tqdm import tqdm | |||
| from dgl.nn import SAGEConv | |||
| from dgl.nn.pytorch.conv import GraphConv | |||
| from dgl.nn import GATConv | |||
| from sklearn.metrics import roc_auc_score | |||
| parser = ArgumentParser( | |||
| "auto link prediction", formatter_class=ArgumentDefaultsHelpFormatter | |||
| ) | |||
| parser.add_argument("--dataset", default="Cora", type=str, help="dataset to use", choices=["Cora", "CiteSeer", "PubMed"],) | |||
| parser.add_argument("--model", default="sage", type=str,help="model to use", choices=["gcn","gat","sage"],) | |||
| parser.add_argument("--seed", type=int, default=0, help="random seed") | |||
| parser.add_argument('--repeat', type=int, default=10) | |||
| parser.add_argument("--device", default=0, type=int, help="GPU device") | |||
| args = parser.parse_args() | |||
| args.device = torch.device('cuda:0') | |||
| device = torch.device('cuda:0') | |||
| if args.dataset == 'Cora': | |||
| dataset = CoraGraphDataset() | |||
| elif args.dataset == 'CiteSeer': | |||
| dataset = CiteseerGraphDataset() | |||
| elif args.dataset == 'PubMed': | |||
| dataset = PubmedGraphDataset() | |||
| else: | |||
| assert False | |||
| def setup_seed(seed): | |||
| torch.manual_seed(seed) | |||
| torch.cuda.manual_seed_all(seed) | |||
| torch.backends.cudnn.deterministic = True | |||
| np.random.seed(seed) | |||
| random.seed(seed) | |||
| class GraphSAGE(nn.Module): | |||
| def __init__(self, in_feats, h_feats): | |||
| super(GraphSAGE, self).__init__() | |||
| self.conv1 = SAGEConv(in_feats, h_feats, 'mean') | |||
| self.conv2 = SAGEConv(h_feats, h_feats, 'mean') | |||
| def forward(self, data): | |||
| g = data | |||
| in_feat = data.ndata['feat'] | |||
| h = self.conv1(g, in_feat) | |||
| h = F.relu(h) | |||
| h = self.conv2(g, h) | |||
| return h | |||
| class GCN(nn.Module): | |||
| def __init__(self, in_feats, h_feats): | |||
| super(GCN, self).__init__() | |||
| self.conv1 = GraphConv(in_feats, h_feats) | |||
| self.conv2 = GraphConv(h_feats, h_feats) | |||
| def forward(self, data): | |||
| g = data | |||
| in_feat = data.ndata['feat'] | |||
| h = self.conv1(g, in_feat) | |||
| h = F.relu(h) | |||
| h = self.conv2(g, h) | |||
| return h | |||
| class GAT(nn.Module): | |||
| def __init__(self, in_feats, h_feats): | |||
| super(GAT, self).__init__() | |||
| self.conv1 = GATConv(in_feats, h_feats // 4, 4) | |||
| self.conv2 = GATConv(h_feats, h_feats// 4, 4) | |||
| def forward(self, data): | |||
| g = data | |||
| in_feat = data.ndata['feat'] | |||
| h = self.conv1(g, in_feat).flatten(1) | |||
| h = F.relu(h) | |||
| h = self.conv2(g, h).mean(1) | |||
| return h | |||
| def split_train_test(g): | |||
| u, v = g.edges() | |||
| eids = np.arange(g.number_of_edges()) | |||
| eids = np.random.permutation(eids) | |||
| test_size = int(len(eids) * 0.1) | |||
| train_size = g.number_of_edges() - test_size | |||
| test_pos_u, test_pos_v = u[eids[:test_size]], v[eids[:test_size]] | |||
| train_pos_u, train_pos_v = u[eids[test_size:]], v[eids[test_size:]] | |||
| # Find all negative edges and split them for training and testing | |||
| adj = sp.coo_matrix((np.ones(len(u)), (u.numpy(), v.numpy()))) | |||
| adj_neg = 1 - adj.todense() - np.eye(g.number_of_nodes()) | |||
| neg_u, neg_v = np.where(adj_neg != 0) | |||
| neg_eids = np.random.choice(len(neg_u), g.number_of_edges()) | |||
| test_neg_u, test_neg_v = neg_u[neg_eids[:test_size]], neg_v[neg_eids[:test_size]] | |||
| train_neg_u, train_neg_v = neg_u[neg_eids[train_size:]], neg_v[neg_eids[train_size:]] | |||
| train_g = dgl.remove_edges(g, eids[:test_size]) | |||
| train_pos_g = dgl.graph((train_pos_u, train_pos_v), num_nodes=g.number_of_nodes()) | |||
| train_neg_g = dgl.graph((train_neg_u, train_neg_v), num_nodes=g.number_of_nodes()) | |||
| test_pos_g = dgl.graph((test_pos_u, test_pos_v), num_nodes=g.number_of_nodes()) | |||
| test_neg_g = dgl.graph((test_neg_u, test_neg_v), num_nodes=g.number_of_nodes()) | |||
| return train_g, train_pos_g, train_neg_g, test_pos_g, test_neg_g | |||
| def get_link_labels(pos_edge_index, neg_edge_index): | |||
| E = pos_edge_index.size(1) + neg_edge_index.size(1) | |||
| link_labels = torch.zeros(E, dtype=torch.float, device=device) | |||
| link_labels[: pos_edge_index.size(1)] = 1.0 | |||
| return link_labels | |||
| def lp_decode(z, pos_edge_index, neg_edge_index): | |||
| edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1) | |||
| logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1) | |||
| return logits | |||
| res = [] | |||
| for seed in tqdm(range(1234, 1234+args.repeat)): | |||
| setup_seed(seed) | |||
| g = dataset[0].to(device) | |||
| train_g, train_pos_g, train_neg_g, test_pos_g, test_neg_g = split_train_test(g.cpu()) | |||
| train_g, train_pos_g, train_neg_g, test_pos_g, test_neg_g = train_g.to(device), train_pos_g.to(device), train_neg_g.to(device), test_pos_g.to(device), test_neg_g.to(device) | |||
| if args.model == 'gcn' or args.model == 'gat': | |||
| train_g = dgl.add_self_loop(train_g) | |||
| if args.model == 'gcn': | |||
| model = GCN(train_g.ndata['feat'].shape[1], 16).to(device) | |||
| elif args.model == 'gat': | |||
| model = GAT(train_g.ndata['feat'].shape[1], 16).to(device) | |||
| elif args.model == 'sage': | |||
| model = GraphSAGE(train_g.ndata['feat'].shape[1], 16).to(device) | |||
| else: | |||
| assert False | |||
| optimizer = torch.optim.Adam(model.parameters(), lr=0.01) | |||
| all_logits = [] | |||
| for epoch in range(100): | |||
| model.train() | |||
| optimizer.zero_grad() | |||
| z = model(train_g) | |||
| link_logits = lp_decode( | |||
| z, torch.stack(train_pos_g.edges()), torch.stack(train_neg_g.edges()) | |||
| ) | |||
| link_labels = get_link_labels( | |||
| torch.stack(train_pos_g.edges()), torch.stack(train_neg_g.edges()) | |||
| ) | |||
| loss = F.binary_cross_entropy_with_logits(link_logits, link_labels) | |||
| loss.backward() | |||
| optimizer.step() | |||
| model.eval() | |||
| with torch.no_grad(): | |||
| z = model(train_g) | |||
| link_logits = lp_decode( | |||
| z, torch.stack(test_pos_g.edges()), torch.stack(test_neg_g.edges()) | |||
| ) | |||
| link_probs = link_logits.sigmoid() | |||
| link_labels = get_link_labels( | |||
| torch.stack(test_pos_g.edges()), torch.stack(test_neg_g.edges()) | |||
| ) | |||
| result = roc_auc_score(link_labels.cpu().numpy(), link_probs.cpu().numpy()) | |||
| res.append(result) | |||
| print(np.mean(res), np.std(res)) | |||
| @@ -0,0 +1,181 @@ | |||
| import dgl | |||
| import torch | |||
| import torch.nn as nn | |||
| import torch.nn.functional as F | |||
| import itertools | |||
| import numpy as np | |||
| import scipy.sparse as sp | |||
| import dgl.function as fn | |||
| import random | |||
| from dgl.data import CoraGraphDataset, PubmedGraphDataset, CiteseerGraphDataset | |||
| # from autogl.module.train.link_prediction_full import LinkPredictionTrainer | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from autogl.module.model.dgl.graphsage import GraphSAGE | |||
| import dgl.data | |||
| from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter | |||
| from tqdm import tqdm | |||
| from dgl.nn import SAGEConv | |||
| from sklearn.metrics import roc_auc_score | |||
| parser = ArgumentParser( | |||
| "auto link prediction", formatter_class=ArgumentDefaultsHelpFormatter | |||
| ) | |||
| parser.add_argument("--dataset", default="Cora", type=str, help="dataset to use", choices=["Cora", "CiteSeer", "PubMed"],) | |||
| parser.add_argument("--model", default="sage", type=str,help="model to use", choices=["gcn","gat","sage"],) | |||
| parser.add_argument("--seed", type=int, default=0, help="random seed") | |||
| parser.add_argument('--repeat', type=int, default=10) | |||
| parser.add_argument("--device", default=0, type=int, help="GPU device") | |||
| args = parser.parse_args() | |||
| args.device = torch.device('cuda:0') | |||
| device = torch.device('cuda:0') | |||
| if args.dataset == 'Cora': | |||
| dataset = CoraGraphDataset() | |||
| elif args.dataset == 'CiteSeer': | |||
| dataset = CiteseerGraphDataset() | |||
| elif args.dataset == 'PubMed': | |||
| dataset = PubmedGraphDataset() | |||
| else: | |||
| assert False | |||
| def setup_seed(seed): | |||
| torch.manual_seed(seed) | |||
| torch.cuda.manual_seed_all(seed) | |||
| torch.backends.cudnn.deterministic = True | |||
| np.random.seed(seed) | |||
| random.seed(seed) | |||
| def split_train_test(g): | |||
| u, v = g.edges() | |||
| eids = np.arange(g.number_of_edges()) | |||
| eids = np.random.permutation(eids) | |||
| test_size = int(len(eids) * 0.1) | |||
| train_size = g.number_of_edges() - test_size | |||
| test_pos_u, test_pos_v = u[eids[:test_size]], v[eids[:test_size]] | |||
| train_pos_u, train_pos_v = u[eids[test_size:]], v[eids[test_size:]] | |||
| # Find all negative edges and split them for training and testing | |||
| adj = sp.coo_matrix((np.ones(len(u)), (u.numpy(), v.numpy()))) | |||
| adj_neg = 1 - adj.todense() - np.eye(g.number_of_nodes()) | |||
| neg_u, neg_v = np.where(adj_neg != 0) | |||
| neg_eids = np.random.choice(len(neg_u), g.number_of_edges()) | |||
| test_neg_u, test_neg_v = neg_u[neg_eids[:test_size]], neg_v[neg_eids[:test_size]] | |||
| train_neg_u, train_neg_v = neg_u[neg_eids[train_size:]], neg_v[neg_eids[train_size:]] | |||
| train_g = dgl.remove_edges(g, eids[:test_size]) | |||
| train_pos_g = dgl.graph((train_pos_u, train_pos_v), num_nodes=g.number_of_nodes()) | |||
| train_neg_g = dgl.graph((train_neg_u, train_neg_v), num_nodes=g.number_of_nodes()) | |||
| test_pos_g = dgl.graph((test_pos_u, test_pos_v), num_nodes=g.number_of_nodes()) | |||
| test_neg_g = dgl.graph((test_neg_u, test_neg_v), num_nodes=g.number_of_nodes()) | |||
| return train_g, train_pos_g, train_neg_g, test_pos_g, test_neg_g | |||
| class DotPredictor(nn.Module): | |||
| def forward(self, g, h): | |||
| with g.local_scope(): | |||
| g.ndata['h'] = h | |||
| # Compute a new edge feature named 'score' by a dot-product between the | |||
| # source node feature 'h' and destination node feature 'h'. | |||
| g.apply_edges(fn.u_dot_v('h', 'h', 'score')) | |||
| # u_dot_v returns a 1-element vector for each edge so you need to squeeze it. | |||
| return g.edata['score'][:, 0] | |||
| def compute_loss(pos_score, neg_score): | |||
| scores = torch.cat([pos_score, neg_score]) | |||
| labels = torch.cat([torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])]) | |||
| return F.binary_cross_entropy_with_logits(scores.cpu(), labels) | |||
| def compute_auc(pos_score, neg_score): | |||
| scores = torch.cat([pos_score, neg_score]).numpy() | |||
| labels = torch.cat( | |||
| [torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])]).numpy() | |||
| return roc_auc_score(labels, scores) | |||
| def get_link_labels(pos_edge_index, neg_edge_index): | |||
| E = pos_edge_index.size(1) + neg_edge_index.size(1) | |||
| link_labels = torch.zeros(E, dtype=torch.float, device=device) | |||
| link_labels[: pos_edge_index.size(1)] = 1.0 | |||
| return link_labels | |||
| res = [] | |||
| for seed in tqdm(range(1234, 1234+args.repeat)): | |||
| setup_seed(seed) | |||
| g = dataset[0].to(device) | |||
| train_g, train_pos_g, train_neg_g, test_pos_g, test_neg_g = split_train_test(g.cpu()) | |||
| train_g, train_pos_g, train_neg_g, test_pos_g, test_neg_g = train_g.to(device), train_pos_g.to(device), train_neg_g.to(device), test_pos_g.to(device), test_neg_g.to(device) | |||
| if args.model == 'gcn': | |||
| pass | |||
| elif args.model == 'gat': | |||
| pass | |||
| elif args.model == 'sage': | |||
| para = { | |||
| 'features_num': train_g.ndata['feat'].shape[1], | |||
| 'num_class': 2, | |||
| 'num_layers': 3, | |||
| 'hidden': [16, 16], | |||
| 'dropout': 0.0, | |||
| 'act': 'relu', | |||
| 'agg': 'mean', | |||
| } | |||
| model = GraphSAGE(para).to(device) | |||
| else: | |||
| assert False | |||
| optimizer = torch.optim.Adam(model.parameters(), lr=0.01) | |||
| all_logits = [] | |||
| for epoch in range(100): | |||
| model.train() | |||
| optimizer.zero_grad() | |||
| z = model.lp_encode(train_g) | |||
| link_logits = model.lp_decode( | |||
| z, torch.stack(train_pos_g.edges()), torch.stack(train_neg_g.edges()) | |||
| ) | |||
| link_labels = get_link_labels( | |||
| torch.stack(train_pos_g.edges()), torch.stack(train_neg_g.edges()) | |||
| ) | |||
| loss = F.binary_cross_entropy_with_logits(link_logits, link_labels) | |||
| loss.backward() | |||
| optimizer.step() | |||
| model.eval() | |||
| with torch.no_grad(): | |||
| z = model.lp_encode(train_g) | |||
| link_logits = model.lp_decode( | |||
| z, torch.stack(test_pos_g.edges()), torch.stack(test_neg_g.edges()) | |||
| ) | |||
| link_probs = link_logits.sigmoid() | |||
| link_labels = get_link_labels( | |||
| torch.stack(test_pos_g.edges()), torch.stack(test_neg_g.edges()) | |||
| ) | |||
| result = roc_auc_score(link_labels.cpu().numpy(), link_probs.cpu().numpy()) | |||
| res.append(result) | |||
| print(np.mean(res), np.std(res)) | |||
| @@ -0,0 +1,261 @@ | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from tqdm import tqdm | |||
| # import autogl.module.train | |||
| # import torch_geometric | |||
| # exit(0) | |||
| # | |||
| from autogl.datasets import build_dataset_from_name | |||
| from autogl.solver.classifier.link_predictor import AutoLinkPredictor | |||
| from autogl.module.train.evaluation import Auc | |||
| import yaml | |||
| import random | |||
| import torch | |||
| import numpy as np | |||
| import dgl | |||
| import torch | |||
| import torch.nn as nn | |||
| import torch.nn.functional as F | |||
| import itertools | |||
| import numpy as np | |||
| import scipy.sparse as sp | |||
| from autogl.module.model.dgl import AutoSAGE, AutoGAT, AutoGCN | |||
| def construct_negative_graph(graph, k): | |||
| src, dst = graph.edges() | |||
| neg_src = src.repeat_interleave(k) | |||
| neg_dst = torch.randint(0, graph.num_nodes(), (len(src) * k,)) | |||
| # return dgl.graph((neg_src, neg_dst), num_nodes=graph.num_nodes()).edges() | |||
| return neg_src, neg_dst | |||
| def negative_sample(data): | |||
| return construct_negative_graph(data, 5) | |||
| import autogl.datasets.utils as tmp_utils | |||
| tmp_utils.negative_sampling = negative_sample | |||
| from dgl.data import CoraGraphDataset, PubmedGraphDataset, CiteseerGraphDataset | |||
| from autogl.module.train.link_prediction_full import LinkPredictionTrainer | |||
| def setup_seed(seed): | |||
| torch.manual_seed(seed) | |||
| torch.cuda.manual_seed_all(seed) | |||
| torch.backends.cudnn.deterministic = True | |||
| np.random.seed(seed) | |||
| random.seed(seed) | |||
| def fixed(**kwargs): | |||
| return [{ | |||
| 'parameterName': k, | |||
| "type": "FIXED", | |||
| "value": v | |||
| } for k, v in kwargs.items()] | |||
| def split_train_test(g): | |||
| u, v = g.edges() | |||
| eids = np.arange(g.number_of_edges()) | |||
| eids = np.random.permutation(eids) | |||
| test_size = int(len(eids) * 0.1) | |||
| train_size = g.number_of_edges() - test_size | |||
| test_pos_u, test_pos_v = u[eids[:test_size]], v[eids[:test_size]] | |||
| train_pos_u, train_pos_v = u[eids[test_size:]], v[eids[test_size:]] | |||
| # Find all negative edges and split them for training and testing | |||
| adj = sp.coo_matrix((np.ones(len(u)), (u.numpy(), v.numpy()))) | |||
| adj_neg = 1 - adj.todense() - np.eye(g.number_of_nodes()) | |||
| neg_u, neg_v = np.where(adj_neg != 0) | |||
| neg_eids = np.random.choice(len(neg_u), g.number_of_edges()) | |||
| test_neg_u, test_neg_v = neg_u[neg_eids[:test_size]], neg_v[neg_eids[:test_size]] | |||
| train_neg_u, train_neg_v = neg_u[neg_eids[train_size:]], neg_v[neg_eids[train_size:]] | |||
| train_g = dgl.remove_edges(g, eids[:test_size]) | |||
| train_pos_g = dgl.graph((train_pos_u, train_pos_v), num_nodes=g.number_of_nodes()) | |||
| train_neg_g = dgl.graph((train_neg_u, train_neg_v), num_nodes=g.number_of_nodes()) | |||
| test_pos_g = dgl.graph((test_pos_u, test_pos_v), num_nodes=g.number_of_nodes()) | |||
| test_neg_g = dgl.graph((test_neg_u, test_neg_v), num_nodes=g.number_of_nodes()) | |||
| return train_g, train_pos_g, train_neg_g, test_pos_g, test_neg_g | |||
| def split_train_valid_test(g): | |||
| u, v = g.edges() | |||
| eids = np.arange(g.number_of_edges()) | |||
| eids = np.random.permutation(eids) | |||
| valid_size = int(len(eids) * 0.1) | |||
| test_size = int(len(eids) * 0.1) | |||
| train_size = g.number_of_edges() - test_size - valid_size | |||
| test_pos_u, test_pos_v = u[eids[:test_size]], v[eids[:test_size]] | |||
| valid_pos_u, valid_pos_v = u[eids[test_size:test_size+valid_size]], v[eids[test_size:test_size+valid_size]] | |||
| train_pos_u, train_pos_v = u[eids[test_size+valid_size:]], v[eids[test_size+valid_size:]] | |||
| # Find all negative edges and split them for training and testing | |||
| adj = sp.coo_matrix((np.ones(len(u)), (u.numpy(), v.numpy()))) | |||
| adj_neg = 1 - adj.todense() - np.eye(g.number_of_nodes()) | |||
| neg_u, neg_v = np.where(adj_neg != 0) | |||
| neg_eids = np.random.choice(len(neg_u), g.number_of_edges()) | |||
| test_neg_u, test_neg_v = neg_u[neg_eids[:test_size]], neg_v[neg_eids[:test_size]] | |||
| valid_neg_u, valid_neg_v = neg_u[neg_eids[test_size:test_size+valid_size]], neg_v[neg_eids[test_size:test_size+valid_size]] | |||
| train_neg_u, train_neg_v = neg_u[neg_eids[train_size:]], neg_v[neg_eids[train_size:]] | |||
| train_g = dgl.remove_edges(g, eids[:test_size+valid_size]) | |||
| train_pos_g = dgl.graph((train_pos_u, train_pos_v), num_nodes=g.number_of_nodes()) | |||
| train_neg_g = dgl.graph((train_neg_u, train_neg_v), num_nodes=g.number_of_nodes()) | |||
| valid_pos_g = dgl.graph((valid_pos_u, valid_pos_v), num_nodes=g.number_of_nodes()) | |||
| valid_neg_g = dgl.graph((valid_neg_u, valid_neg_v), num_nodes=g.number_of_nodes()) | |||
| test_pos_g = dgl.graph((test_pos_u, test_pos_v), num_nodes=g.number_of_nodes()) | |||
| test_neg_g = dgl.graph((test_neg_u, test_neg_v), num_nodes=g.number_of_nodes()) | |||
| return train_g, train_pos_g, train_neg_g, valid_pos_g, valid_neg_g, test_pos_g, test_neg_g | |||
| if __name__ == "__main__": | |||
| from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter | |||
| parser = ArgumentParser( | |||
| "auto link prediction", formatter_class=ArgumentDefaultsHelpFormatter | |||
| ) | |||
| parser.add_argument( | |||
| "--dataset", | |||
| default="Cora", | |||
| type=str, | |||
| help="dataset to use", | |||
| choices=[ | |||
| "Cora", | |||
| "CiteSeer", | |||
| "PubMed", | |||
| ], | |||
| ) | |||
| parser.add_argument( | |||
| "--model", | |||
| default="sage", | |||
| type=str, | |||
| help="model to use", | |||
| choices=[ | |||
| "gcn", | |||
| "gat", | |||
| "sage", | |||
| ], | |||
| ) | |||
| parser.add_argument("--seed", type=int, default=0, help="random seed") | |||
| parser.add_argument('--repeat', type=int, default=10) | |||
| parser.add_argument("--device", default=0, type=int, help="GPU device") | |||
| args = parser.parse_args() | |||
| args.device = torch.device('cuda:0') | |||
| device = torch.device('cuda:0') | |||
| if torch.cuda.is_available(): | |||
| torch.cuda.set_device(args.device) | |||
| if args.dataset == 'Cora': | |||
| dataset = CoraGraphDataset() | |||
| elif args.dataset == 'CiteSeer': | |||
| dataset = CiteseerGraphDataset() | |||
| elif args.dataset == 'PubMed': | |||
| dataset = PubmedGraphDataset() | |||
| else: | |||
| assert False | |||
| res = [] | |||
| for seed in tqdm(range(1234, 1234+args.repeat)): | |||
| # set random seed | |||
| random.seed(seed) | |||
| np.random.seed(seed) | |||
| torch.manual_seed(seed) | |||
| if torch.cuda.is_available(): | |||
| torch.cuda.manual_seed(seed) | |||
| torch.backends.cudnn.deterministic = True | |||
| torch.backends.cudnn.benchmark = False | |||
| graph = dataset[0].to(args.device) | |||
| num_features = graph.ndata['feat'].size(1) | |||
| if args.model == 'gcn': | |||
| model = AutoGCN | |||
| elif args.model == 'gat': | |||
| model = AutoGAT | |||
| elif args.model == 'sage': | |||
| automodel = AutoSAGE( | |||
| num_features=num_features, | |||
| num_classes=2, | |||
| device=args.device | |||
| ) | |||
| automodel.hyperparams = { | |||
| "num_layers": 3, | |||
| "hidden": [16, 16], | |||
| "dropout": 0.0, | |||
| "act": "relu", | |||
| "agg": "mean", | |||
| } | |||
| model_hp = { | |||
| "num_layers": 3, | |||
| "hidden": [16, 16], | |||
| "dropout": 0.0, | |||
| "act": "relu", | |||
| "agg": "mean", | |||
| } | |||
| else: | |||
| assert False | |||
| automodel.initialize() | |||
| autoClassifier = AutoLinkPredictor( | |||
| feature_module=None, | |||
| graph_models='sage', | |||
| ensemble_module=None, | |||
| max_evals=1, | |||
| hpo_module='random', | |||
| trainer_hp_space=fixed(**{ | |||
| "max_epoch": 100, | |||
| "early_stopping_round": 100 + 1, | |||
| "lr":0.01, | |||
| "weight_decay": None, | |||
| }), | |||
| model_hp_spaces=[fixed(**model_hp)] | |||
| ) | |||
| autoClassifier.fit( | |||
| dataset, | |||
| time_limit=3600, | |||
| evaluation_method=[Auc], | |||
| seed=seed, | |||
| train_split=0.85, | |||
| val_split=0.05, | |||
| ) | |||
| autoClassifier.get_leaderboard().show() | |||
| # test | |||
| predict_result = autoClassifier.predict_proba() | |||
| pos_edge_index, neg_edge_index = ( | |||
| dataset[0].test_pos_edge_index, | |||
| dataset[0].test_neg_edge_index, | |||
| ) | |||
| E = pos_edge_index.size(1) + neg_edge_index.size(1) | |||
| link_labels = torch.zeros(E) | |||
| link_labels[: pos_edge_index.size(1)] = 1.0 | |||
| print( | |||
| "test auc: %.4f" | |||
| % (Auc.evaluate(predict_result, link_labels.detach().cpu().numpy())) | |||
| ) | |||
| """ | |||
| AUC 0.8151564430268863 | |||
| """ | |||
| @@ -1,6 +1,7 @@ | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from tqdm import tqdm | |||
| # import autogl.module.train | |||
| # import torch_geometric | |||
| @@ -20,7 +21,7 @@ import torch.nn.functional as F | |||
| import itertools | |||
| import numpy as np | |||
| import scipy.sparse as sp | |||
| from autogl.module.model.dgl import AutoSAGE | |||
| from autogl.module.model.dgl import AutoSAGE, AutoGAT, AutoGCN | |||
| def construct_negative_graph(graph, k): | |||
| @@ -117,7 +118,6 @@ def split_train_valid_test(g): | |||
| if __name__ == "__main__": | |||
| setup_seed(1234) | |||
| from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter | |||
| @@ -135,21 +135,28 @@ if __name__ == "__main__": | |||
| "PubMed", | |||
| ], | |||
| ) | |||
| parser.add_argument( | |||
| "--model", | |||
| default="sage", | |||
| type=str, | |||
| help="model to use", | |||
| choices=[ | |||
| "gcn", | |||
| "gat", | |||
| "sage", | |||
| ], | |||
| ) | |||
| parser.add_argument("--seed", type=int, default=0, help="random seed") | |||
| parser.add_argument('--repeat', type=int, default=10) | |||
| parser.add_argument("--device", default=0, type=int, help="GPU device") | |||
| args = parser.parse_args() | |||
| args.device = torch.device('cuda:0') | |||
| device = torch.device('cuda:0') | |||
| if torch.cuda.is_available(): | |||
| torch.cuda.set_device(args.device) | |||
| seed = args.seed | |||
| # set random seed | |||
| random.seed(seed) | |||
| np.random.seed(seed) | |||
| torch.manual_seed(seed) | |||
| if torch.cuda.is_available(): | |||
| torch.cuda.manual_seed(seed) | |||
| torch.backends.cudnn.deterministic = True | |||
| torch.backends.cudnn.benchmark = False | |||
| if args.dataset == 'Cora': | |||
| dataset = CoraGraphDataset() | |||
| @@ -157,60 +164,76 @@ if __name__ == "__main__": | |||
| dataset = CiteseerGraphDataset() | |||
| elif args.dataset == 'PubMed': | |||
| dataset = PubmedGraphDataset() | |||
| # configs = yaml.load(open(args.configs, "r").read(), Loader=yaml.FullLoader) | |||
| # configs["hpo"]["name"] = args.hpo | |||
| # configs["hpo"]["max_evals"] = args.max_eval | |||
| # autoClassifier = AutoLinkPredictor.from_config(configs) | |||
| graph = dataset[0].to(args.device) | |||
| num_features = graph.ndata['feat'].size(1) | |||
| autoSAGE = AutoSAGE( | |||
| num_features=num_features, | |||
| num_classes=2, | |||
| device=args.device | |||
| ) | |||
| autoSAGE.hyperparams = { | |||
| "num_layers": 3, | |||
| "hidden": [16, 16], | |||
| "dropout": 0.0, | |||
| "act": "relu", | |||
| "agg": "mean", | |||
| } | |||
| autoSAGE.initialize() | |||
| trainer = LinkPredictionTrainer( | |||
| model = autoSAGE, | |||
| num_features = num_features, | |||
| optimizer = None, | |||
| lr = 1e-2, | |||
| max_epoch = 100, | |||
| early_stopping_round = 101, | |||
| weight_decay = 0.0, | |||
| device = "auto", | |||
| init = True, | |||
| feval = [Auc], | |||
| loss = "binary_cross_entropy_with_logits", | |||
| ) | |||
| train_g, train_pos_g, train_neg_g, test_pos_g, test_neg_g = split_train_test(graph.cpu()) | |||
| dataset = { | |||
| 'train': train_g.to(args.device), | |||
| 'train_pos': train_pos_g.to(args.device), | |||
| 'train_neg': train_neg_g.to(args.device), | |||
| 'test_pos': test_pos_g.to(args.device), | |||
| 'test_neg': test_neg_g.to(args.device), | |||
| } | |||
| trainer.train(dataset, True) | |||
| pre = trainer.evaluate(dataset, mask="test", feval=Auc) | |||
| print(pre.item()) | |||
| res = trainer.predict(dataset) | |||
| print(res) | |||
| exit(0) | |||
| else: | |||
| assert False | |||
| res = [] | |||
| for seed in tqdm(range(1234, 1234+args.repeat)): | |||
| # set random seed | |||
| random.seed(seed) | |||
| np.random.seed(seed) | |||
| torch.manual_seed(seed) | |||
| if torch.cuda.is_available(): | |||
| torch.cuda.manual_seed(seed) | |||
| torch.backends.cudnn.deterministic = True | |||
| torch.backends.cudnn.benchmark = False | |||
| graph = dataset[0].to(args.device) | |||
| num_features = graph.ndata['feat'].size(1) | |||
| if args.model == 'gcn': | |||
| model = AutoGCN | |||
| elif args.model == 'gat': | |||
| model = AutoGAT | |||
| elif args.model == 'sage': | |||
| automodel = AutoSAGE( | |||
| num_features=num_features, | |||
| num_classes=2, | |||
| device=args.device | |||
| ) | |||
| automodel.hyperparams = { | |||
| "num_layers": 3, | |||
| "hidden": [16, 16], | |||
| "dropout": 0.0, | |||
| "act": "relu", | |||
| "agg": "mean", | |||
| } | |||
| else: | |||
| assert False | |||
| automodel.initialize() | |||
| trainer = LinkPredictionTrainer( | |||
| model = automodel, | |||
| num_features = num_features, | |||
| optimizer = None, | |||
| lr = 1e-2, | |||
| max_epoch = 100, | |||
| early_stopping_round = 101, | |||
| weight_decay = 0.0, | |||
| device = "auto", | |||
| init = True, | |||
| feval = [Auc], | |||
| loss = "binary_cross_entropy_with_logits", | |||
| ) | |||
| train_g, train_pos_g, train_neg_g, test_pos_g, test_neg_g = split_train_test(graph.cpu()) | |||
| dataset_splitted = { | |||
| 'train': train_g.to(args.device), | |||
| 'train_pos': train_pos_g.to(args.device), | |||
| 'train_neg': train_neg_g.to(args.device), | |||
| 'test_pos': test_pos_g.to(args.device), | |||
| 'test_neg': test_neg_g.to(args.device), | |||
| } | |||
| trainer.train(dataset_splitted, False) | |||
| pre = trainer.evaluate(dataset_splitted, mask="test", feval=Auc) | |||
| result = pre.item() | |||
| res.append(result) | |||
| print(np.mean(res), np.std(res)) | |||
| exit(1) | |||
| # train | |||
| autoClassifier.fit( | |||
| @@ -0,0 +1,271 @@ | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from tqdm import tqdm | |||
| # import autogl.module.train | |||
| # import torch_geometric | |||
| # exit(0) | |||
| # | |||
| from autogl.datasets import build_dataset_from_name | |||
| # from autogl.solver.classifier.link_predictor import AutoLinkPredictor | |||
| from autogl.module.train.evaluation import Auc | |||
| import yaml | |||
| import random | |||
| import torch | |||
| import numpy as np | |||
| import dgl | |||
| import torch | |||
| import torch.nn as nn | |||
| import torch.nn.functional as F | |||
| import itertools | |||
| import numpy as np | |||
| import scipy.sparse as sp | |||
| from autogl.module.model.dgl import AutoSAGE, AutoGAT, AutoGCN | |||
| from autogl.datasets.utils.conversion import general_static_graphs_to_dgl_dataset | |||
| def construct_negative_graph(graph, k): | |||
| src, dst = graph.edges() | |||
| neg_src = src.repeat_interleave(k) | |||
| neg_dst = torch.randint(0, graph.num_nodes(), (len(src) * k,)) | |||
| # return dgl.graph((neg_src, neg_dst), num_nodes=graph.num_nodes()).edges() | |||
| return neg_src, neg_dst | |||
| def negative_sample(data): | |||
| return construct_negative_graph(data, 5) | |||
| import autogl.datasets.utils as tmp_utils | |||
| tmp_utils.negative_sampling = negative_sample | |||
| from dgl.data import CoraGraphDataset, PubmedGraphDataset, CiteseerGraphDataset | |||
| from autogl.module.train.link_prediction_full import LinkPredictionTrainer | |||
| def setup_seed(seed): | |||
| torch.manual_seed(seed) | |||
| torch.cuda.manual_seed_all(seed) | |||
| torch.backends.cudnn.deterministic = True | |||
| np.random.seed(seed) | |||
| random.seed(seed) | |||
| def split_train_test(g): | |||
| u, v = g.edges() | |||
| eids = np.arange(g.number_of_edges()) | |||
| eids = np.random.permutation(eids) | |||
| test_size = int(len(eids) * 0.1) | |||
| train_size = g.number_of_edges() - test_size | |||
| test_pos_u, test_pos_v = u[eids[:test_size]], v[eids[:test_size]] | |||
| train_pos_u, train_pos_v = u[eids[test_size:]], v[eids[test_size:]] | |||
| # Find all negative edges and split them for training and testing | |||
| adj = sp.coo_matrix((np.ones(len(u)), (u.numpy(), v.numpy()))) | |||
| adj_neg = 1 - adj.todense() - np.eye(g.number_of_nodes()) | |||
| neg_u, neg_v = np.where(adj_neg != 0) | |||
| neg_eids = np.random.choice(len(neg_u), g.number_of_edges()) | |||
| test_neg_u, test_neg_v = neg_u[neg_eids[:test_size]], neg_v[neg_eids[:test_size]] | |||
| train_neg_u, train_neg_v = neg_u[neg_eids[train_size:]], neg_v[neg_eids[train_size:]] | |||
| train_g = dgl.remove_edges(g, eids[:test_size]) | |||
| train_pos_g = dgl.graph((train_pos_u, train_pos_v), num_nodes=g.number_of_nodes()) | |||
| train_neg_g = dgl.graph((train_neg_u, train_neg_v), num_nodes=g.number_of_nodes()) | |||
| test_pos_g = dgl.graph((test_pos_u, test_pos_v), num_nodes=g.number_of_nodes()) | |||
| test_neg_g = dgl.graph((test_neg_u, test_neg_v), num_nodes=g.number_of_nodes()) | |||
| return train_g, train_pos_g, train_neg_g, test_pos_g, test_neg_g | |||
| def split_train_valid_test(g): | |||
| u, v = g.edges() | |||
| eids = np.arange(g.number_of_edges()) | |||
| eids = np.random.permutation(eids) | |||
| valid_size = int(len(eids) * 0.1) | |||
| test_size = int(len(eids) * 0.1) | |||
| train_size = g.number_of_edges() - test_size - valid_size | |||
| test_pos_u, test_pos_v = u[eids[:test_size]], v[eids[:test_size]] | |||
| valid_pos_u, valid_pos_v = u[eids[test_size:test_size+valid_size]], v[eids[test_size:test_size+valid_size]] | |||
| train_pos_u, train_pos_v = u[eids[test_size+valid_size:]], v[eids[test_size+valid_size:]] | |||
| # Find all negative edges and split them for training and testing | |||
| adj = sp.coo_matrix((np.ones(len(u)), (u.numpy(), v.numpy()))) | |||
| adj_neg = 1 - adj.todense() - np.eye(g.number_of_nodes()) | |||
| neg_u, neg_v = np.where(adj_neg != 0) | |||
| neg_eids = np.random.choice(len(neg_u), g.number_of_edges()) | |||
| test_neg_u, test_neg_v = neg_u[neg_eids[:test_size]], neg_v[neg_eids[:test_size]] | |||
| valid_neg_u, valid_neg_v = neg_u[neg_eids[test_size:test_size+valid_size]], neg_v[neg_eids[test_size:test_size+valid_size]] | |||
| train_neg_u, train_neg_v = neg_u[neg_eids[train_size:]], neg_v[neg_eids[train_size:]] | |||
| train_g = dgl.remove_edges(g, eids[:test_size+valid_size]) | |||
| train_pos_g = dgl.graph((train_pos_u, train_pos_v), num_nodes=g.number_of_nodes()) | |||
| train_neg_g = dgl.graph((train_neg_u, train_neg_v), num_nodes=g.number_of_nodes()) | |||
| valid_pos_g = dgl.graph((valid_pos_u, valid_pos_v), num_nodes=g.number_of_nodes()) | |||
| valid_neg_g = dgl.graph((valid_neg_u, valid_neg_v), num_nodes=g.number_of_nodes()) | |||
| test_pos_g = dgl.graph((test_pos_u, test_pos_v), num_nodes=g.number_of_nodes()) | |||
| test_neg_g = dgl.graph((test_neg_u, test_neg_v), num_nodes=g.number_of_nodes()) | |||
| return train_g, train_pos_g, train_neg_g, valid_pos_g, valid_neg_g, test_pos_g, test_neg_g | |||
| if __name__ == "__main__": | |||
| from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter | |||
| parser = ArgumentParser( | |||
| "auto link prediction", formatter_class=ArgumentDefaultsHelpFormatter | |||
| ) | |||
| parser.add_argument( | |||
| "--dataset", | |||
| default="Cora", | |||
| type=str, | |||
| help="dataset to use", | |||
| choices=[ | |||
| "Cora", | |||
| "CiteSeer", | |||
| "PubMed", | |||
| ], | |||
| ) | |||
| parser.add_argument( | |||
| "--model", | |||
| default="sage", | |||
| type=str, | |||
| help="model to use", | |||
| choices=[ | |||
| "gcn", | |||
| "gat", | |||
| "sage", | |||
| ], | |||
| ) | |||
| parser.add_argument("--seed", type=int, default=0, help="random seed") | |||
| parser.add_argument('--repeat', type=int, default=10) | |||
| parser.add_argument("--device", default=0, type=int, help="GPU device") | |||
| args = parser.parse_args() | |||
| args.device = torch.device('cuda:0') | |||
| device = torch.device('cuda:0') | |||
| if torch.cuda.is_available(): | |||
| torch.cuda.set_device(args.device) | |||
| if args.dataset == 'Cora': | |||
| dataset = CoraGraphDataset() | |||
| elif args.dataset == 'CiteSeer': | |||
| dataset = CiteseerGraphDataset() | |||
| elif args.dataset == 'PubMed': | |||
| dataset = PubmedGraphDataset() | |||
| else: | |||
| assert False | |||
| dataset = build_dataset_from_name(args.dataset.lower()) | |||
| dataset = general_static_graphs_to_dgl_dataset(dataset) | |||
| res = [] | |||
| for seed in tqdm(range(1234, 1234+args.repeat)): | |||
| # set random seed | |||
| random.seed(seed) | |||
| np.random.seed(seed) | |||
| torch.manual_seed(seed) | |||
| if torch.cuda.is_available(): | |||
| torch.cuda.manual_seed(seed) | |||
| torch.backends.cudnn.deterministic = True | |||
| torch.backends.cudnn.benchmark = False | |||
| graph = dataset[0].to(args.device) | |||
| num_features = graph.ndata['feat'].size(1) | |||
| if args.model == 'gcn': | |||
| model = AutoGCN | |||
| elif args.model == 'gat': | |||
| model = AutoGAT | |||
| elif args.model == 'sage': | |||
| automodel = AutoSAGE( | |||
| num_features=num_features, | |||
| num_classes=2, | |||
| device=args.device | |||
| ) | |||
| automodel.hyperparams = { | |||
| "num_layers": 3, | |||
| "hidden": [16, 16], | |||
| "dropout": 0.0, | |||
| "act": "relu", | |||
| "agg": "mean", | |||
| } | |||
| else: | |||
| assert False | |||
| automodel.initialize() | |||
| trainer = LinkPredictionTrainer( | |||
| model = automodel, | |||
| num_features = num_features, | |||
| optimizer = None, | |||
| lr = 1e-2, | |||
| max_epoch = 100, | |||
| early_stopping_round = 101, | |||
| weight_decay = 0.0, | |||
| device = "auto", | |||
| init = True, | |||
| feval = [Auc], | |||
| loss = "binary_cross_entropy_with_logits", | |||
| ) | |||
| train_g, train_pos_g, train_neg_g, test_pos_g, test_neg_g = split_train_test(graph.cpu()) | |||
| dataset_splitted = { | |||
| 'train': train_g.to(args.device), | |||
| 'train_pos': train_pos_g.to(args.device), | |||
| 'train_neg': train_neg_g.to(args.device), | |||
| 'test_pos': test_pos_g.to(args.device), | |||
| 'test_neg': test_neg_g.to(args.device), | |||
| } | |||
| trainer.train(dataset_splitted, False) | |||
| pre = trainer.evaluate(dataset_splitted, mask="test", feval=Auc) | |||
| result = pre.item() | |||
| res.append(result) | |||
| print(np.mean(res), np.std(res)) | |||
| exit(1) | |||
| # train | |||
| autoClassifier.fit( | |||
| dataset, | |||
| time_limit=3600, | |||
| evaluation_method=[Auc], | |||
| seed=seed, | |||
| train_split=0.85, | |||
| val_split=0.05, | |||
| ) | |||
| autoClassifier.get_leaderboard().show() | |||
| # test | |||
| predict_result = autoClassifier.predict_proba() | |||
| pos_edge_index, neg_edge_index = ( | |||
| dataset[0].test_pos_edge_index, | |||
| dataset[0].test_neg_edge_index, | |||
| ) | |||
| E = pos_edge_index.size(1) + neg_edge_index.size(1) | |||
| link_labels = torch.zeros(E) | |||
| link_labels[: pos_edge_index.size(1)] = 1.0 | |||
| print( | |||
| "test auc: %.4f" | |||
| % (Auc.evaluate(predict_result, link_labels.detach().cpu().numpy())) | |||
| ) | |||
| """ | |||
| AUC 0.8151564430268863 | |||
| """ | |||