diff --git a/autogl/module/model/pyg/gin.py b/autogl/module/model/pyg/gin.py index a9e816d..6d5c8a9 100644 --- a/autogl/module/model/pyg/gin.py +++ b/autogl/module/model/pyg/gin.py @@ -152,12 +152,13 @@ class AutoGIN(BaseAutoModel): num_classes=None, device=None, init=False, - num_graph_features=None, + num_graph_features=0, **args ): super().__init__(num_features, num_classes, device, num_graph_features=num_graph_features, **args) - + self.num_graph_features = num_graph_features + self.hyper_parameter_space = [ { "parameterName": "num_layers", @@ -209,7 +210,7 @@ class AutoGIN(BaseAutoModel): } def from_hyper_parameter(self, hp, **kwargs): - return super().from_hyper_parameter(hp, num_graph_features=self.num_graph_features **kwargs) + return super().from_hyper_parameter(hp, num_graph_features=self.num_graph_features, **kwargs) def _initialize(self): # """Initialize model.""" diff --git a/autogl/module/train/link_prediction_full.py b/autogl/module/train/link_prediction_full.py index 139b37a..5128e34 100644 --- a/autogl/module/train/link_prediction_full.py +++ b/autogl/module/train/link_prediction_full.py @@ -29,6 +29,7 @@ class _DummyLinkModel(torch.nn.Module): def __init__(self, encoder, decoder): super().__init__() if isinstance(encoder, BaseAutoModel): + self.automodelflag = True self.encoder = encoder.model self.decoder = None else: @@ -36,12 +37,14 @@ class _DummyLinkModel(torch.nn.Module): self.decoder = None if decoder is None else decoder.decoder def encode(self, data): - if isinstance(self.encoder, BaseAutoModel): + if self.automodelflag: return self.encoder.lp_encode(data) return self.encoder(data) def decode(self, features, data, pos_edges, neg_edges): - if isinstance(self.encoder, BaseAutoModel) or self.decoder is None: + if self.automodelflag: + return self.encoder.lp_decode(features, pos_edges, neg_edges) + if self.decoder is None: return features return self.decoder(features, data, pos_edges, neg_edges) diff --git a/autogl/solver/classifier/graph_classifier.py b/autogl/solver/classifier/graph_classifier.py index 54e7cbc..b31353e 100644 --- a/autogl/solver/classifier/graph_classifier.py +++ b/autogl/solver/classifier/graph_classifier.py @@ -5,6 +5,7 @@ import time import json from copy import deepcopy +from typing import Sequence import torch import numpy as np @@ -592,6 +593,30 @@ class AutoGraphClassifier(BaseClassifier): ) return np.argmax(proba, axis=1) + def evaluate(self, dataset=None, + inplaced=False, + inplace=False, + use_ensemble=True, + use_best=True, + name=None, + mask="test", + label=None, + metric="acc" + ): + predicted = self.predict_proba(dataset, inplaced, inplace, use_ensemble, use_best, name, mask) + if dataset is None: + dataset = self.dataset + if label is None: + if mask == "all": + masked_dataset = dataset + else: + masked_dataset = utils.graph_get_split(dataset, mask, False) + label = np.array([d.data['y' if 'y' in d.data else 'label'].item() for d in masked_dataset]) + evaluator = get_feval(metric) + if isinstance(evaluator, Sequence): + return [evals.evaluate(predicted, label) for evals in evaluator] + return evaluator.evaluate(predicted, label) + @classmethod def from_config(cls, path_or_dict, filetype="auto") -> "AutoGraphClassifier": """ diff --git a/autogl/solver/classifier/link_predictor.py b/autogl/solver/classifier/link_predictor.py index 1945b96..d6b6c47 100644 --- a/autogl/solver/classifier/link_predictor.py +++ b/autogl/solver/classifier/link_predictor.py @@ -1,10 +1,12 @@ """ Auto Classfier for Node Classification """ +import logging import time import json from copy import deepcopy +from typing import Sequence import torch import numpy as np @@ -24,6 +26,21 @@ from ...backend import DependentBackend LOGGER = get_logger("LinkPredictor") BACKEND = DependentBackend.get_backend_name() +def _negative_sample_dgl(train_graph, pos_graph): + import scipy.sparse as sp + import dgl + u, v = train_graph.edges() + up, vp = pos_graph.edges() + u_all, v_all = np.concatenate([u.numpy(), up.numpy()]), np.concatenate([v.numpy(), vp.numpy()]) + adj = sp.coo_matrix((np.ones(len(u_all)), (u_all, v_all))) + adj_neg = 1 - adj.todense() - np.eye(train_graph.number_of_nodes()) + neg_u, neg_v = np.where(adj_neg != 0) + + # sample negative edges + neg_eids = np.random.choice(len(neg_u), len(up)) + return dgl.DGLGraph((neg_u[:neg_eids], neg_v[:neg_eids]), num_nodes=train_graph.number_of_nodes()) + + class AutoLinkPredictor(BaseClassifier): """ Auto Link Predictor. @@ -642,6 +659,75 @@ class AutoLinkPredictor(BaseClassifier): ) return (proba > threshold).astype("int") + def evaluate(self, dataset=None, + inplaced=False, + inplace=False, + use_ensemble=True, + use_best=True, + name=None, + mask="test", + label=None, + metric="acc" + ): + if dataset is None: + dataset = self.dataset + assert dataset is not None, ( + "Please execute fit() first before" " predicting on remembered dataset" + ) + elif not inplaced and self.feature_module is not None: + if BACKEND == 'pyg': + dataset = self.feature_module.transform(dataset, inplace=inplace) + elif BACKEND == 'dgl': + import dgl + transformed = self.feature_module.transform([d[0] for d in dataset], inplace=inplace) + dataset = [[tran, None, None, None, None, d[1], d[2] if len(d) == 3 else dgl.DGLGraph()] for tran, d in zip(transformed, dataset)] + + graph = dataset[0] + mask2posid_dgl = {"train": 1, "val": 3, "test": 5} + mask2negid_dgl = {"train": 2, "val": 4, "test": 6} + if BACKEND == 'pyg' and not hasattr(graph, f"{mask}_neg_edge_index"): + from torch_geometric.utils import negative_sampling + logging.warn( + "No negative edges passed, will generate random negative edges instead." + " However, results may be inconsistent across different run." + " Fix negative edges before passing the dataset is recommended" + ) + setattr(graph, f"{mask}_neg_edge_index", negative_sampling( + getattr(graph, f"{mask}_pos_edge_index"), graph.num_nodes + )) + elif BACKEND == 'dgl': + neg_graph = graph[{"train": 2, "val": 4, "test": 6}[mask]] + if neg_graph is None or len(neg_graph.edges()[0]) == 0: + logging.warn( + "No negative edges passed, will generate random negative edges instead." + " However, results may be inconsistent across different run." + " Fix negative edges before passing the dataset is recommended" + ) + neg_edges = _negative_sample_dgl(graph[0], graph[{"train": 1, "val": 3, "test": 5}[mask]]) + graph[{"train": 2, "val": 4, "test": 6}[mask]] = neg_edges + + predicted = self.predict_proba(dataset, inplaced, inplace, use_ensemble, use_best, name, mask) + if label is None: + if BACKEND == 'pyg': + pos_edge_index, neg_edge_index = ( + getattr(dataset[0], f"{mask}_pos_edge_index"), + getattr(dataset[0], f"{mask}_neg_edge_index"), + ) + elif BACKEND == 'dgl': + pos_edge_index, neg_edge_index = ( + torch.stack(self.dataset[0][mask2posid_dgl[mask]].edges()), + torch.stack(self.dataset[0][mask2negid_dgl[mask]].edges()) + ) + E = pos_edge_index.size(1) + neg_edge_index.size(1) + label = torch.zeros(E, dtype=torch.float) + label[: pos_edge_index.size(1)] = 1.0 + label = label.cpu().numpy() + evaluator = get_feval(metric) + if isinstance(evaluator, Sequence): + return [evals.evaluate(predicted, label) for evals in evaluator] + return evaluator.evaluate(predicted, label) + + @classmethod def from_config(cls, path_or_dict, filetype="auto") -> "AutoLinkPredictor": """ diff --git a/autogl/solver/classifier/node_classifier.py b/autogl/solver/classifier/node_classifier.py index e251e12..1c3135f 100644 --- a/autogl/solver/classifier/node_classifier.py +++ b/autogl/solver/classifier/node_classifier.py @@ -670,6 +670,27 @@ class AutoNodeClassifier(BaseClassifier): ) return np.argmax(proba, axis=1) + def evaluate(self, dataset=None, + inplaced=False, + inplace=False, + use_ensemble=True, + use_best=True, + name=None, + mask="test", + label=None, + metric="acc" + ): + predicted = self.predict_proba(dataset, inplaced, inplace, use_ensemble, use_best, name, mask) + if dataset is None: + dataset = self.dataset + if label is None: + _node = dataset[0].nodes.data + label = _node['y' if 'y' in _node else 'label'][_node['test_mask']].cpu().numpy() + evaluator = get_feval(metric) + if isinstance(evaluator, Sequence): + return [evals.evaluate(predicted, label) for evals in evaluator] + return evaluator.evaluate(predicted, label) + @classmethod def from_config(cls, path_or_dict, filetype="auto") -> "AutoNodeClassifier": """ diff --git a/configs/lp_gcn_benchmark.yml b/configs/lp_gcn_benchmark.yml index 8fb9d8a..8c2f0bb 100644 --- a/configs/lp_gcn_benchmark.yml +++ b/configs/lp_gcn_benchmark.yml @@ -1,7 +1,7 @@ ensemble: name: null feature: -- name: PYGNormalizeFeatures +- name: NormalizeFeatures hpo: max_evals: 10 name: random @@ -36,7 +36,7 @@ models: - tanh parameterName: act type: CATEGORICAL - name: gcn + name: gcn-model trainer: hp_space: - maxValue: 150 diff --git a/examples/graph_classification.py b/examples/graph_classification.py index e7d3a49..bc6bbfd 100644 --- a/examples/graph_classification.py +++ b/examples/graph_classification.py @@ -85,13 +85,5 @@ if __name__ == "__main__": print("best single model:\n", autoClassifier.get_leaderboard().get_best_model(0)) # test - predict_result = autoClassifier.predict_proba() - print( - "test acc %.4f" - % ( - Acc.evaluate( - predict_result, - np.array([d.data["y" if backend == "pyg" else "label"] for d in dataset.test_split]), - ) - ) - ) + acc = autoClassifier.evaluate(metric="acc") + print("test acc {:.4f}".format(acc)) diff --git a/examples/graphnas.py b/examples/graphnas.py index 503f573..2c49545 100644 --- a/examples/graphnas.py +++ b/examples/graphnas.py @@ -1,6 +1,5 @@ from autogl.datasets import build_dataset_from_name from autogl.solver import AutoNodeClassifier -from autogl.module.train import Acc from autogl.solver.utils import set_seed import argparse from autogl.backend import DependentBackend @@ -18,5 +17,5 @@ if __name__ == '__main__': solver = AutoNodeClassifier.from_config(args.config) solver.fit(dataset) solver.get_leaderboard().show() - out = solver.predict_proba() - print('acc on dataset', Acc.evaluate(out, label)) + acc = solver.evaluate(metric="acc") + print('acc on dataset', acc) diff --git a/examples/link_prediction.py b/examples/link_prediction.py index abeafcf..7a8ccad 100644 --- a/examples/link_prediction.py +++ b/examples/link_prediction.py @@ -73,19 +73,5 @@ if __name__ == "__main__": ) autoClassifier.get_leaderboard().show() - # todo: move the test logic to solver, make solver handle this - # BUG: fix this under dgl backend - predict_result = autoClassifier.predict_proba() - - pos_edge_index, neg_edge_index = ( - dataset[0].test_pos_edge_index, - dataset[0].test_neg_edge_index, - ) - E = pos_edge_index.size(1) + neg_edge_index.size(1) - link_labels = torch.zeros(E) - link_labels[: pos_edge_index.size(1)] = 1.0 - - print( - "test auc: %.4f" - % (Auc.evaluate(predict_result, link_labels.detach().cpu().numpy())) - ) + auc = autoClassifier.evaluate(metric="auc") + print("test auc: {:.4f}".format(auc)) diff --git a/examples/node_classification.py b/examples/node_classification.py index e3738a7..f732550 100644 --- a/examples/node_classification.py +++ b/examples/node_classification.py @@ -79,10 +79,5 @@ if __name__ == "__main__": balanced=False, ) autoClassifier.get_leaderboard().show() - - # test - predict_result = autoClassifier.predict_proba() - print( - "test acc: %.4f" - % (Acc.evaluate(predict_result, label[dataset[0].nodes.data["test_mask"]].cpu().numpy())) - ) + acc = autoClassifier.evaluate(metric="acc") + print("test acc: {:.4f}".format(acc))