Browse Source

add evaluate function to solver

tags/v0.3.1
Frozenmad 4 years ago
parent
commit
6a3af98007
10 changed files with 151 additions and 43 deletions
  1. +4
    -3
      autogl/module/model/pyg/gin.py
  2. +5
    -2
      autogl/module/train/link_prediction_full.py
  3. +25
    -0
      autogl/solver/classifier/graph_classifier.py
  4. +86
    -0
      autogl/solver/classifier/link_predictor.py
  5. +21
    -0
      autogl/solver/classifier/node_classifier.py
  6. +2
    -2
      configs/lp_gcn_benchmark.yml
  7. +2
    -10
      examples/graph_classification.py
  8. +2
    -3
      examples/graphnas.py
  9. +2
    -16
      examples/link_prediction.py
  10. +2
    -7
      examples/node_classification.py

+ 4
- 3
autogl/module/model/pyg/gin.py View File

@@ -152,12 +152,13 @@ class AutoGIN(BaseAutoModel):
num_classes=None,
device=None,
init=False,
num_graph_features=None,
num_graph_features=0,
**args
):

super().__init__(num_features, num_classes, device, num_graph_features=num_graph_features, **args)
self.num_graph_features = num_graph_features

self.hyper_parameter_space = [
{
"parameterName": "num_layers",
@@ -209,7 +210,7 @@ class AutoGIN(BaseAutoModel):
}

def from_hyper_parameter(self, hp, **kwargs):
return super().from_hyper_parameter(hp, num_graph_features=self.num_graph_features **kwargs)
return super().from_hyper_parameter(hp, num_graph_features=self.num_graph_features, **kwargs)

def _initialize(self):
# """Initialize model."""


+ 5
- 2
autogl/module/train/link_prediction_full.py View File

@@ -29,6 +29,7 @@ class _DummyLinkModel(torch.nn.Module):
def __init__(self, encoder, decoder):
super().__init__()
if isinstance(encoder, BaseAutoModel):
self.automodelflag = True
self.encoder = encoder.model
self.decoder = None
else:
@@ -36,12 +37,14 @@ class _DummyLinkModel(torch.nn.Module):
self.decoder = None if decoder is None else decoder.decoder
def encode(self, data):
if isinstance(self.encoder, BaseAutoModel):
if self.automodelflag:
return self.encoder.lp_encode(data)
return self.encoder(data)
def decode(self, features, data, pos_edges, neg_edges):
if isinstance(self.encoder, BaseAutoModel) or self.decoder is None:
if self.automodelflag:
return self.encoder.lp_decode(features, pos_edges, neg_edges)
if self.decoder is None:
return features
return self.decoder(features, data, pos_edges, neg_edges)



+ 25
- 0
autogl/solver/classifier/graph_classifier.py View File

@@ -5,6 +5,7 @@ import time
import json

from copy import deepcopy
from typing import Sequence

import torch
import numpy as np
@@ -592,6 +593,30 @@ class AutoGraphClassifier(BaseClassifier):
)
return np.argmax(proba, axis=1)

def evaluate(self, dataset=None,
inplaced=False,
inplace=False,
use_ensemble=True,
use_best=True,
name=None,
mask="test",
label=None,
metric="acc"
):
predicted = self.predict_proba(dataset, inplaced, inplace, use_ensemble, use_best, name, mask)
if dataset is None:
dataset = self.dataset
if label is None:
if mask == "all":
masked_dataset = dataset
else:
masked_dataset = utils.graph_get_split(dataset, mask, False)
label = np.array([d.data['y' if 'y' in d.data else 'label'].item() for d in masked_dataset])
evaluator = get_feval(metric)
if isinstance(evaluator, Sequence):
return [evals.evaluate(predicted, label) for evals in evaluator]
return evaluator.evaluate(predicted, label)

@classmethod
def from_config(cls, path_or_dict, filetype="auto") -> "AutoGraphClassifier":
"""


+ 86
- 0
autogl/solver/classifier/link_predictor.py View File

@@ -1,10 +1,12 @@
"""
Auto Classfier for Node Classification
"""
import logging
import time
import json

from copy import deepcopy
from typing import Sequence

import torch
import numpy as np
@@ -24,6 +26,21 @@ from ...backend import DependentBackend
LOGGER = get_logger("LinkPredictor")
BACKEND = DependentBackend.get_backend_name()

def _negative_sample_dgl(train_graph, pos_graph):
import scipy.sparse as sp
import dgl
u, v = train_graph.edges()
up, vp = pos_graph.edges()
u_all, v_all = np.concatenate([u.numpy(), up.numpy()]), np.concatenate([v.numpy(), vp.numpy()])
adj = sp.coo_matrix((np.ones(len(u_all)), (u_all, v_all)))
adj_neg = 1 - adj.todense() - np.eye(train_graph.number_of_nodes())
neg_u, neg_v = np.where(adj_neg != 0)

# sample negative edges
neg_eids = np.random.choice(len(neg_u), len(up))
return dgl.DGLGraph((neg_u[:neg_eids], neg_v[:neg_eids]), num_nodes=train_graph.number_of_nodes())


class AutoLinkPredictor(BaseClassifier):
"""
Auto Link Predictor.
@@ -642,6 +659,75 @@ class AutoLinkPredictor(BaseClassifier):
)
return (proba > threshold).astype("int")

def evaluate(self, dataset=None,
inplaced=False,
inplace=False,
use_ensemble=True,
use_best=True,
name=None,
mask="test",
label=None,
metric="acc"
):
if dataset is None:
dataset = self.dataset
assert dataset is not None, (
"Please execute fit() first before" " predicting on remembered dataset"
)
elif not inplaced and self.feature_module is not None:
if BACKEND == 'pyg':
dataset = self.feature_module.transform(dataset, inplace=inplace)
elif BACKEND == 'dgl':
import dgl
transformed = self.feature_module.transform([d[0] for d in dataset], inplace=inplace)
dataset = [[tran, None, None, None, None, d[1], d[2] if len(d) == 3 else dgl.DGLGraph()] for tran, d in zip(transformed, dataset)]

graph = dataset[0]
mask2posid_dgl = {"train": 1, "val": 3, "test": 5}
mask2negid_dgl = {"train": 2, "val": 4, "test": 6}
if BACKEND == 'pyg' and not hasattr(graph, f"{mask}_neg_edge_index"):
from torch_geometric.utils import negative_sampling
logging.warn(
"No negative edges passed, will generate random negative edges instead."
" However, results may be inconsistent across different run."
" Fix negative edges before passing the dataset is recommended"
)
setattr(graph, f"{mask}_neg_edge_index", negative_sampling(
getattr(graph, f"{mask}_pos_edge_index"), graph.num_nodes
))
elif BACKEND == 'dgl':
neg_graph = graph[{"train": 2, "val": 4, "test": 6}[mask]]
if neg_graph is None or len(neg_graph.edges()[0]) == 0:
logging.warn(
"No negative edges passed, will generate random negative edges instead."
" However, results may be inconsistent across different run."
" Fix negative edges before passing the dataset is recommended"
)
neg_edges = _negative_sample_dgl(graph[0], graph[{"train": 1, "val": 3, "test": 5}[mask]])
graph[{"train": 2, "val": 4, "test": 6}[mask]] = neg_edges

predicted = self.predict_proba(dataset, inplaced, inplace, use_ensemble, use_best, name, mask)
if label is None:
if BACKEND == 'pyg':
pos_edge_index, neg_edge_index = (
getattr(dataset[0], f"{mask}_pos_edge_index"),
getattr(dataset[0], f"{mask}_neg_edge_index"),
)
elif BACKEND == 'dgl':
pos_edge_index, neg_edge_index = (
torch.stack(self.dataset[0][mask2posid_dgl[mask]].edges()),
torch.stack(self.dataset[0][mask2negid_dgl[mask]].edges())
)
E = pos_edge_index.size(1) + neg_edge_index.size(1)
label = torch.zeros(E, dtype=torch.float)
label[: pos_edge_index.size(1)] = 1.0
label = label.cpu().numpy()
evaluator = get_feval(metric)
if isinstance(evaluator, Sequence):
return [evals.evaluate(predicted, label) for evals in evaluator]
return evaluator.evaluate(predicted, label)


@classmethod
def from_config(cls, path_or_dict, filetype="auto") -> "AutoLinkPredictor":
"""


+ 21
- 0
autogl/solver/classifier/node_classifier.py View File

@@ -670,6 +670,27 @@ class AutoNodeClassifier(BaseClassifier):
)
return np.argmax(proba, axis=1)

def evaluate(self, dataset=None,
inplaced=False,
inplace=False,
use_ensemble=True,
use_best=True,
name=None,
mask="test",
label=None,
metric="acc"
):
predicted = self.predict_proba(dataset, inplaced, inplace, use_ensemble, use_best, name, mask)
if dataset is None:
dataset = self.dataset
if label is None:
_node = dataset[0].nodes.data
label = _node['y' if 'y' in _node else 'label'][_node['test_mask']].cpu().numpy()
evaluator = get_feval(metric)
if isinstance(evaluator, Sequence):
return [evals.evaluate(predicted, label) for evals in evaluator]
return evaluator.evaluate(predicted, label)

@classmethod
def from_config(cls, path_or_dict, filetype="auto") -> "AutoNodeClassifier":
"""


+ 2
- 2
configs/lp_gcn_benchmark.yml View File

@@ -1,7 +1,7 @@
ensemble:
name: null
feature:
- name: PYGNormalizeFeatures
- name: NormalizeFeatures
hpo:
max_evals: 10
name: random
@@ -36,7 +36,7 @@ models:
- tanh
parameterName: act
type: CATEGORICAL
name: gcn
name: gcn-model
trainer:
hp_space:
- maxValue: 150


+ 2
- 10
examples/graph_classification.py View File

@@ -85,13 +85,5 @@ if __name__ == "__main__":
print("best single model:\n", autoClassifier.get_leaderboard().get_best_model(0))

# test
predict_result = autoClassifier.predict_proba()
print(
"test acc %.4f"
% (
Acc.evaluate(
predict_result,
np.array([d.data["y" if backend == "pyg" else "label"] for d in dataset.test_split]),
)
)
)
acc = autoClassifier.evaluate(metric="acc")
print("test acc {:.4f}".format(acc))

+ 2
- 3
examples/graphnas.py View File

@@ -1,6 +1,5 @@
from autogl.datasets import build_dataset_from_name
from autogl.solver import AutoNodeClassifier
from autogl.module.train import Acc
from autogl.solver.utils import set_seed
import argparse
from autogl.backend import DependentBackend
@@ -18,5 +17,5 @@ if __name__ == '__main__':
solver = AutoNodeClassifier.from_config(args.config)
solver.fit(dataset)
solver.get_leaderboard().show()
out = solver.predict_proba()
print('acc on dataset', Acc.evaluate(out, label))
acc = solver.evaluate(metric="acc")
print('acc on dataset', acc)

+ 2
- 16
examples/link_prediction.py View File

@@ -73,19 +73,5 @@ if __name__ == "__main__":
)
autoClassifier.get_leaderboard().show()

# todo: move the test logic to solver, make solver handle this
# BUG: fix this under dgl backend
predict_result = autoClassifier.predict_proba()

pos_edge_index, neg_edge_index = (
dataset[0].test_pos_edge_index,
dataset[0].test_neg_edge_index,
)
E = pos_edge_index.size(1) + neg_edge_index.size(1)
link_labels = torch.zeros(E)
link_labels[: pos_edge_index.size(1)] = 1.0

print(
"test auc: %.4f"
% (Auc.evaluate(predict_result, link_labels.detach().cpu().numpy()))
)
auc = autoClassifier.evaluate(metric="auc")
print("test auc: {:.4f}".format(auc))

+ 2
- 7
examples/node_classification.py View File

@@ -79,10 +79,5 @@ if __name__ == "__main__":
balanced=False,
)
autoClassifier.get_leaderboard().show()

# test
predict_result = autoClassifier.predict_proba()
print(
"test acc: %.4f"
% (Acc.evaluate(predict_result, label[dataset[0].nodes.data["test_mask"]].cpu().numpy()))
)
acc = autoClassifier.evaluate(metric="acc")
print("test acc: {:.4f}".format(acc))

Loading…
Cancel
Save