| @@ -152,12 +152,13 @@ class AutoGIN(BaseAutoModel): | |||
| num_classes=None, | |||
| device=None, | |||
| init=False, | |||
| num_graph_features=None, | |||
| num_graph_features=0, | |||
| **args | |||
| ): | |||
| super().__init__(num_features, num_classes, device, num_graph_features=num_graph_features, **args) | |||
| self.num_graph_features = num_graph_features | |||
| self.hyper_parameter_space = [ | |||
| { | |||
| "parameterName": "num_layers", | |||
| @@ -209,7 +210,7 @@ class AutoGIN(BaseAutoModel): | |||
| } | |||
| def from_hyper_parameter(self, hp, **kwargs): | |||
| return super().from_hyper_parameter(hp, num_graph_features=self.num_graph_features **kwargs) | |||
| return super().from_hyper_parameter(hp, num_graph_features=self.num_graph_features, **kwargs) | |||
| def _initialize(self): | |||
| # """Initialize model.""" | |||
| @@ -29,6 +29,7 @@ class _DummyLinkModel(torch.nn.Module): | |||
| def __init__(self, encoder, decoder): | |||
| super().__init__() | |||
| if isinstance(encoder, BaseAutoModel): | |||
| self.automodelflag = True | |||
| self.encoder = encoder.model | |||
| self.decoder = None | |||
| else: | |||
| @@ -36,12 +37,14 @@ class _DummyLinkModel(torch.nn.Module): | |||
| self.decoder = None if decoder is None else decoder.decoder | |||
| def encode(self, data): | |||
| if isinstance(self.encoder, BaseAutoModel): | |||
| if self.automodelflag: | |||
| return self.encoder.lp_encode(data) | |||
| return self.encoder(data) | |||
| def decode(self, features, data, pos_edges, neg_edges): | |||
| if isinstance(self.encoder, BaseAutoModel) or self.decoder is None: | |||
| if self.automodelflag: | |||
| return self.encoder.lp_decode(features, pos_edges, neg_edges) | |||
| if self.decoder is None: | |||
| return features | |||
| return self.decoder(features, data, pos_edges, neg_edges) | |||
| @@ -5,6 +5,7 @@ import time | |||
| import json | |||
| from copy import deepcopy | |||
| from typing import Sequence | |||
| import torch | |||
| import numpy as np | |||
| @@ -592,6 +593,30 @@ class AutoGraphClassifier(BaseClassifier): | |||
| ) | |||
| return np.argmax(proba, axis=1) | |||
| def evaluate(self, dataset=None, | |||
| inplaced=False, | |||
| inplace=False, | |||
| use_ensemble=True, | |||
| use_best=True, | |||
| name=None, | |||
| mask="test", | |||
| label=None, | |||
| metric="acc" | |||
| ): | |||
| predicted = self.predict_proba(dataset, inplaced, inplace, use_ensemble, use_best, name, mask) | |||
| if dataset is None: | |||
| dataset = self.dataset | |||
| if label is None: | |||
| if mask == "all": | |||
| masked_dataset = dataset | |||
| else: | |||
| masked_dataset = utils.graph_get_split(dataset, mask, False) | |||
| label = np.array([d.data['y' if 'y' in d.data else 'label'].item() for d in masked_dataset]) | |||
| evaluator = get_feval(metric) | |||
| if isinstance(evaluator, Sequence): | |||
| return [evals.evaluate(predicted, label) for evals in evaluator] | |||
| return evaluator.evaluate(predicted, label) | |||
| @classmethod | |||
| def from_config(cls, path_or_dict, filetype="auto") -> "AutoGraphClassifier": | |||
| """ | |||
| @@ -1,10 +1,12 @@ | |||
| """ | |||
| Auto Classfier for Node Classification | |||
| """ | |||
| import logging | |||
| import time | |||
| import json | |||
| from copy import deepcopy | |||
| from typing import Sequence | |||
| import torch | |||
| import numpy as np | |||
| @@ -24,6 +26,21 @@ from ...backend import DependentBackend | |||
| LOGGER = get_logger("LinkPredictor") | |||
| BACKEND = DependentBackend.get_backend_name() | |||
| def _negative_sample_dgl(train_graph, pos_graph): | |||
| import scipy.sparse as sp | |||
| import dgl | |||
| u, v = train_graph.edges() | |||
| up, vp = pos_graph.edges() | |||
| u_all, v_all = np.concatenate([u.numpy(), up.numpy()]), np.concatenate([v.numpy(), vp.numpy()]) | |||
| adj = sp.coo_matrix((np.ones(len(u_all)), (u_all, v_all))) | |||
| adj_neg = 1 - adj.todense() - np.eye(train_graph.number_of_nodes()) | |||
| neg_u, neg_v = np.where(adj_neg != 0) | |||
| # sample negative edges | |||
| neg_eids = np.random.choice(len(neg_u), len(up)) | |||
| return dgl.DGLGraph((neg_u[:neg_eids], neg_v[:neg_eids]), num_nodes=train_graph.number_of_nodes()) | |||
| class AutoLinkPredictor(BaseClassifier): | |||
| """ | |||
| Auto Link Predictor. | |||
| @@ -642,6 +659,75 @@ class AutoLinkPredictor(BaseClassifier): | |||
| ) | |||
| return (proba > threshold).astype("int") | |||
| def evaluate(self, dataset=None, | |||
| inplaced=False, | |||
| inplace=False, | |||
| use_ensemble=True, | |||
| use_best=True, | |||
| name=None, | |||
| mask="test", | |||
| label=None, | |||
| metric="acc" | |||
| ): | |||
| if dataset is None: | |||
| dataset = self.dataset | |||
| assert dataset is not None, ( | |||
| "Please execute fit() first before" " predicting on remembered dataset" | |||
| ) | |||
| elif not inplaced and self.feature_module is not None: | |||
| if BACKEND == 'pyg': | |||
| dataset = self.feature_module.transform(dataset, inplace=inplace) | |||
| elif BACKEND == 'dgl': | |||
| import dgl | |||
| transformed = self.feature_module.transform([d[0] for d in dataset], inplace=inplace) | |||
| dataset = [[tran, None, None, None, None, d[1], d[2] if len(d) == 3 else dgl.DGLGraph()] for tran, d in zip(transformed, dataset)] | |||
| graph = dataset[0] | |||
| mask2posid_dgl = {"train": 1, "val": 3, "test": 5} | |||
| mask2negid_dgl = {"train": 2, "val": 4, "test": 6} | |||
| if BACKEND == 'pyg' and not hasattr(graph, f"{mask}_neg_edge_index"): | |||
| from torch_geometric.utils import negative_sampling | |||
| logging.warn( | |||
| "No negative edges passed, will generate random negative edges instead." | |||
| " However, results may be inconsistent across different run." | |||
| " Fix negative edges before passing the dataset is recommended" | |||
| ) | |||
| setattr(graph, f"{mask}_neg_edge_index", negative_sampling( | |||
| getattr(graph, f"{mask}_pos_edge_index"), graph.num_nodes | |||
| )) | |||
| elif BACKEND == 'dgl': | |||
| neg_graph = graph[{"train": 2, "val": 4, "test": 6}[mask]] | |||
| if neg_graph is None or len(neg_graph.edges()[0]) == 0: | |||
| logging.warn( | |||
| "No negative edges passed, will generate random negative edges instead." | |||
| " However, results may be inconsistent across different run." | |||
| " Fix negative edges before passing the dataset is recommended" | |||
| ) | |||
| neg_edges = _negative_sample_dgl(graph[0], graph[{"train": 1, "val": 3, "test": 5}[mask]]) | |||
| graph[{"train": 2, "val": 4, "test": 6}[mask]] = neg_edges | |||
| predicted = self.predict_proba(dataset, inplaced, inplace, use_ensemble, use_best, name, mask) | |||
| if label is None: | |||
| if BACKEND == 'pyg': | |||
| pos_edge_index, neg_edge_index = ( | |||
| getattr(dataset[0], f"{mask}_pos_edge_index"), | |||
| getattr(dataset[0], f"{mask}_neg_edge_index"), | |||
| ) | |||
| elif BACKEND == 'dgl': | |||
| pos_edge_index, neg_edge_index = ( | |||
| torch.stack(self.dataset[0][mask2posid_dgl[mask]].edges()), | |||
| torch.stack(self.dataset[0][mask2negid_dgl[mask]].edges()) | |||
| ) | |||
| E = pos_edge_index.size(1) + neg_edge_index.size(1) | |||
| label = torch.zeros(E, dtype=torch.float) | |||
| label[: pos_edge_index.size(1)] = 1.0 | |||
| label = label.cpu().numpy() | |||
| evaluator = get_feval(metric) | |||
| if isinstance(evaluator, Sequence): | |||
| return [evals.evaluate(predicted, label) for evals in evaluator] | |||
| return evaluator.evaluate(predicted, label) | |||
| @classmethod | |||
| def from_config(cls, path_or_dict, filetype="auto") -> "AutoLinkPredictor": | |||
| """ | |||
| @@ -670,6 +670,27 @@ class AutoNodeClassifier(BaseClassifier): | |||
| ) | |||
| return np.argmax(proba, axis=1) | |||
| def evaluate(self, dataset=None, | |||
| inplaced=False, | |||
| inplace=False, | |||
| use_ensemble=True, | |||
| use_best=True, | |||
| name=None, | |||
| mask="test", | |||
| label=None, | |||
| metric="acc" | |||
| ): | |||
| predicted = self.predict_proba(dataset, inplaced, inplace, use_ensemble, use_best, name, mask) | |||
| if dataset is None: | |||
| dataset = self.dataset | |||
| if label is None: | |||
| _node = dataset[0].nodes.data | |||
| label = _node['y' if 'y' in _node else 'label'][_node['test_mask']].cpu().numpy() | |||
| evaluator = get_feval(metric) | |||
| if isinstance(evaluator, Sequence): | |||
| return [evals.evaluate(predicted, label) for evals in evaluator] | |||
| return evaluator.evaluate(predicted, label) | |||
| @classmethod | |||
| def from_config(cls, path_or_dict, filetype="auto") -> "AutoNodeClassifier": | |||
| """ | |||
| @@ -1,7 +1,7 @@ | |||
| ensemble: | |||
| name: null | |||
| feature: | |||
| - name: PYGNormalizeFeatures | |||
| - name: NormalizeFeatures | |||
| hpo: | |||
| max_evals: 10 | |||
| name: random | |||
| @@ -36,7 +36,7 @@ models: | |||
| - tanh | |||
| parameterName: act | |||
| type: CATEGORICAL | |||
| name: gcn | |||
| name: gcn-model | |||
| trainer: | |||
| hp_space: | |||
| - maxValue: 150 | |||
| @@ -85,13 +85,5 @@ if __name__ == "__main__": | |||
| print("best single model:\n", autoClassifier.get_leaderboard().get_best_model(0)) | |||
| # test | |||
| predict_result = autoClassifier.predict_proba() | |||
| print( | |||
| "test acc %.4f" | |||
| % ( | |||
| Acc.evaluate( | |||
| predict_result, | |||
| np.array([d.data["y" if backend == "pyg" else "label"] for d in dataset.test_split]), | |||
| ) | |||
| ) | |||
| ) | |||
| acc = autoClassifier.evaluate(metric="acc") | |||
| print("test acc {:.4f}".format(acc)) | |||
| @@ -1,6 +1,5 @@ | |||
| from autogl.datasets import build_dataset_from_name | |||
| from autogl.solver import AutoNodeClassifier | |||
| from autogl.module.train import Acc | |||
| from autogl.solver.utils import set_seed | |||
| import argparse | |||
| from autogl.backend import DependentBackend | |||
| @@ -18,5 +17,5 @@ if __name__ == '__main__': | |||
| solver = AutoNodeClassifier.from_config(args.config) | |||
| solver.fit(dataset) | |||
| solver.get_leaderboard().show() | |||
| out = solver.predict_proba() | |||
| print('acc on dataset', Acc.evaluate(out, label)) | |||
| acc = solver.evaluate(metric="acc") | |||
| print('acc on dataset', acc) | |||
| @@ -73,19 +73,5 @@ if __name__ == "__main__": | |||
| ) | |||
| autoClassifier.get_leaderboard().show() | |||
| # todo: move the test logic to solver, make solver handle this | |||
| # BUG: fix this under dgl backend | |||
| predict_result = autoClassifier.predict_proba() | |||
| pos_edge_index, neg_edge_index = ( | |||
| dataset[0].test_pos_edge_index, | |||
| dataset[0].test_neg_edge_index, | |||
| ) | |||
| E = pos_edge_index.size(1) + neg_edge_index.size(1) | |||
| link_labels = torch.zeros(E) | |||
| link_labels[: pos_edge_index.size(1)] = 1.0 | |||
| print( | |||
| "test auc: %.4f" | |||
| % (Auc.evaluate(predict_result, link_labels.detach().cpu().numpy())) | |||
| ) | |||
| auc = autoClassifier.evaluate(metric="auc") | |||
| print("test auc: {:.4f}".format(auc)) | |||
| @@ -79,10 +79,5 @@ if __name__ == "__main__": | |||
| balanced=False, | |||
| ) | |||
| autoClassifier.get_leaderboard().show() | |||
| # test | |||
| predict_result = autoClassifier.predict_proba() | |||
| print( | |||
| "test acc: %.4f" | |||
| % (Acc.evaluate(predict_result, label[dataset[0].nodes.data["test_mask"]].cpu().numpy())) | |||
| ) | |||
| acc = autoClassifier.evaluate(metric="acc") | |||
| print("test acc: {:.4f}".format(acc)) | |||