Browse Source

fix bugs

tags/v0.3.1
Frozenmad 4 years ago
parent
commit
3f09dca926
14 changed files with 254 additions and 147 deletions
  1. +2
    -2
      autogl/module/model/encoders/_pyg/_gat.py
  2. +3
    -0
      autogl/module/model/pyg/gcn.py
  3. +5
    -12
      autogl/module/nas/space/base.py
  4. +3
    -1
      autogl/module/train/base.py
  5. +17
    -16
      autogl/module/train/link_prediction_full.py
  6. +7
    -5
      autogl/solver/base.py
  7. +5
    -4
      autogl/solver/classifier/link_predictor.py
  8. +104
    -0
      configs/dgl/lp_benchmark.yml
  9. +0
    -92
      configs/lp_benchmark.yml
  10. +2
    -2
      configs/nodeclf_gcn_benchmark_small.yml
  11. +94
    -0
      configs/pyg/lp_benchmark.yml
  12. +3
    -3
      examples/graphnas.py
  13. +2
    -4
      examples/link_prediction.py
  14. +7
    -6
      examples/node_classification.py

+ 2
- 2
autogl/module/model/encoders/_pyg/_gat.py View File

@@ -81,10 +81,10 @@ class GATEncoderMaintainer(base_encoder.AutoHomogeneousEncoderMaintainer):
dimensions.append(self.final_dimension)
self._encoder = _GAT(
self.input_dimension, self.hyper_parameters['hidden'],
self.hyper_parameters.get('num_hidden_heads', self.hyper_parameters['heads']),
self.hyper_parameters.get('num_hidden_heads', self.hyper_parameters.get('heads', 1)),
self.hyper_parameters.get(
'num_output_heads',
self.hyper_parameters.get('num_hidden_heads', self.hyper_parameters['heads'])
self.hyper_parameters.get('num_hidden_heads', self.hyper_parameters.get('heads', 1))
),
self.hyper_parameters['dropout'],
self.hyper_parameters['act']


+ 3
- 0
autogl/module/model/pyg/gcn.py View File

@@ -226,6 +226,9 @@ class GCN(ClassificationSupportedSequentialModel):
for __edge_index in getattr(data, "edge_indexes")
]

def forward(self, data):
return self.cls_decode(self.cls_encode(data))

def cls_encode(self, data) -> torch.Tensor:
edge_indexes_and_weights: _typing.Union[
_typing.Sequence[


+ 5
- 12
autogl/module/nas/space/base.py View File

@@ -99,7 +99,7 @@ class BoxModel(BaseAutoModel):
_logger = get_logger("space model")

def __init__(self, space_model, device=torch.device("cuda")):
super().__init__(init=True)
super().__init__(None, None, device)
self.init = True
self.space = []
self.hyperparams = {}
@@ -110,6 +110,9 @@ class BoxModel(BaseAutoModel):
self.device = device
self.selection = None

def _initialize(self):
return True

def fix(self, selection):
"""
To fix self._model with a selection
@@ -124,11 +127,6 @@ class BoxModel(BaseAutoModel):
apply_fixed_architecture(self._model, selection, verbose=False)
return self

def to(self, device):
if isinstance(device, (str, torch.device)):
self.device = device
return super().to(device)

def forward(self, *args, **kwargs):
return self._model(*args, **kwargs)

@@ -141,17 +139,12 @@ class BoxModel(BaseAutoModel):
ret_self._model.instantiate()
if ret_self.selection:
apply_fixed_architecture(ret_self._model, ret_self.selection, verbose=False)
ret_self.to(self.device)
ret_self.to_device(self.device)
return ret_self

def __repr__(self) -> str:
return str({'parameter': get_hardware_aware_metric(self.model, 'parameter')})

@property
def model(self):
return self._model


class BaseSpace(nn.Module):
"""
Base space class of NAS module. Defining space containing all models.


+ 3
- 1
autogl/module/train/base.py View File

@@ -623,8 +623,10 @@ class BaseLinkPredictionTrainer(_BaseClassificationTrainer):
device=self.device,
init=self.initialized
)
elif isinstance(dec, (BaseDecoderMaintainer, None)):
elif isinstance(dec, BaseDecoderMaintainer):
self._decoder = dec
elif dec is None:
self._decoder = None
else:
raise ValueError("Invalid decoder setting")
self.num_features = self.num_features


+ 17
- 16
autogl/module/train/link_prediction_full.py View File

@@ -174,7 +174,7 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
self.initialize()

def _compose_model(self):
return _DummyLinkModel(self.encoder, self.decoder)
return _DummyLinkModel(self.encoder, self.decoder).to(self.device)

@classmethod
def get_task_name(cls):
@@ -198,8 +198,8 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
edge_index=data.train_pos_edge_index,
num_nodes=data.num_nodes,
num_neg_samples=data.train_pos_edge_index.size(1),
)
).to(data.train_pos_edge_index.device)
optimizer.zero_grad()
link_logits = model.encode(data)
link_logits = model.decode(link_logits, data, data.train_pos_edge_index, neg_edge_index)
@@ -300,7 +300,7 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
return res

def _predict_only_dgl(self, dataset):
pos_data = dataset['train']
pos_data = dataset['train'].to(self.device)
model = self._compose_model()
model.eval()
with torch.no_grad():
@@ -394,17 +394,17 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):

def _predict_proba_dgl(self, dataset, mask=None, in_log_format=False):
dataset = dataset[0]
train_graph = dataset['train']
train_graph = dataset['train'].to(self.device)
try:
try:
pos_graph = dataset[f'{mask}_pos']
neg_graph = dataset[f'{mask}_neg']
pos_graph = dataset[f'{mask}_pos'].to(self.device)
neg_graph = dataset[f'{mask}_neg'].to(self.device)
except:
pos_graph = dataset[f'test_pos']
neg_graph = dataset[f'test_neg']
pos_graph = dataset[f'test_pos'].to(self.device)
neg_graph = dataset[f'test_neg'].to(self.device)
except:
import dgl
pos_graph = dataset[mask]
pos_graph = dataset[mask].to(self.device)
neg_graph = dgl.graph([], num_nodes=0).to(self.device)

model = self._compose_model()
@@ -530,13 +530,13 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
else:
feval = get_feval(feval)

train_graph = dataset['train']
train_graph = dataset['train'].to(self.device)
try:
pos_graph = dataset[f'{mask}_pos']
neg_graph = dataset[f'{mask}_neg']
pos_graph = dataset[f'{mask}_pos'].to(self.device)
neg_graph = dataset[f'{mask}_neg'].to(self.device)
except:
pos_graph = dataset[f'test_pos']
neg_graph = dataset[f'test_neg']
pos_graph = dataset[f'test_pos'].to(self.device)
neg_graph = dataset[f'test_neg'].to(self.device)

model = self._compose_model()
model.eval()
@@ -606,7 +606,8 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
trainer_hp = origin_hp
encoder = encoder.from_hyper_parameter(encoder_hp)
decoder = decoder.from_hyper_parameter_and_encoder(decoder_hp, encoder)
if decoder is not None:
decoder = decoder.from_hyper_parameter_and_encoder(decoder_hp, encoder)
ret = self.__class__(
model=(encoder, decoder),


+ 7
- 5
autogl/solver/base.py View File

@@ -15,7 +15,7 @@ from ..module.model import EncoderUniversalRegistry, DecoderUniversalRegistry, M
from ..module.nas.algorithm import NAS_ALGO_DICT
from ..module.nas.estimator import NAS_ESTIMATOR_DICT
from ..module.nas.space import NAS_SPACE_DICT
from ..module import BaseFeature, BaseHPOptimizer, BaseTrainer
from ..module import BaseFeatureEngineer, BaseHPOptimizer, BaseTrainer
from .utils import LeaderBoard
from ..utils import get_logger

@@ -32,11 +32,13 @@ def _initialize_single_model(model):
# initialize decoder
name = model["decoder"].pop("name")
decoder = DecoderUniversalRegistry.get_decoder(name)(**model["decoder"])
return (encoder, decoder)

if "name" in model:
# whole model
name = model.pop("name")
encoder = ModelUniversalRegistry.get_model(name)(**model)
return (encoder, decoder)
return encoder

def _parse_hp_space(spaces):
if spaces is None:
@@ -182,7 +184,7 @@ class BaseSolver:
# load feature engineer module

def get_feature(feature_engineer):
if isinstance(feature_engineer, BaseFeature):
if isinstance(feature_engineer, BaseFeatureEngineer):
return feature_engineer
if isinstance(feature_engineer, str):
if feature_engineer in FEATURE_DICT:
@@ -192,12 +194,12 @@ class BaseSolver:
)
raise TypeError(
f"Cannot parse feature argument {str(feature_engineer)} of"
" type {str(type(feature_engineer))}"
f" type {str(type(feature_engineer))}"
)

if feature_module is None:
self.feature_module = None
elif isinstance(feature_module, (BaseFeature, str)):
elif isinstance(feature_module, (BaseFeatureEngineer, str)):
self.feature_module = get_feature(feature_module)
elif isinstance(feature_module, Iterable):
self.feature_module = get_feature(feature_module[0])


+ 5
- 4
autogl/solver/classifier/link_predictor.py View File

@@ -16,7 +16,7 @@ from ..base import _parse_hp_space, _initialize_single_model, _parse_model_hp
from ...module.feature import FEATURE_DICT
from ...module.train import TRAINER_DICT, BaseLinkPredictionTrainer
from ...module.train import get_feval
from ..utils import LeaderBoard, get_graph_from_dataset, get_graph_node_features, convert_dataset, set_seed
from ..utils import LeaderBoard, get_graph_node_features, convert_dataset, set_seed
from ...datasets import utils
from ..utils import get_logger
from ...backend import DependentBackend
@@ -260,7 +260,8 @@ class AutoLinkPredictor(BaseClassifier):

# set up the dataset
if train_split is not None and val_split is not None:
utils.split_edges(dataset, train_split, val_split)
dataset = utils.split_edges(dataset, train_split, val_split)
graph_data = dataset[0]
else:
if BACKEND == 'pyg':
assert all(
@@ -292,8 +293,8 @@ class AutoLinkPredictor(BaseClassifier):
if BACKEND == 'pyg':
dataset = self.feature_module.fit_transform(dataset, inplace=inplace)
else:
dataset = self.feature_module.fit_transform([graph_data[0]], inplace=inplace)
dataset += graph_data[1:]
_dataset = self.feature_module.fit_transform([g[0] for g in dataset], inplace=inplace)
dataset = [[_d, *d[1:]] for _d, d in zip(_dataset, dataset)]

self.dataset = dataset



+ 104
- 0
configs/dgl/lp_benchmark.yml View File

@@ -0,0 +1,104 @@
ensemble:
name: voting
feature:
- name: NormalizeFeatures
hpo:
max_evals: 10
name: random
models:
- encoder:
hp_space:
- feasiblePoints: 2,3
parameterName: num_layers
type: DISCRETE
- cutFunc: lambda x:x[0] - 1
cutPara:
- num_layers
length: 2
maxValue:
- 256
- 256
minValue:
- 64
- 64
numericalType: INTEGER
parameterName: hidden
scalingType: LOG
type: NUMERICAL_LIST
- maxValue: 0.2
minValue: 0.0
parameterName: dropout
scalingType: LINEAR
type: DOUBLE
- feasiblePoints:
- leaky_relu
- relu
- elu
- tanh
parameterName: act
type: CATEGORICAL
name: gcn
- encoder:
name: gat
hp_space:
- feasiblePoints: 2,3
parameterName: num_layers
type: DISCRETE
- cutFunc: lambda x:x[0] - 1
cutPara:
- num_layers
length: 2
maxValue:
- 256
- 256
minValue:
- 64
- 64
numericalType: INTEGER
parameterName: hidden
scalingType: LOG
type: NUMERICAL_LIST
- maxValue: 0.2
minValue: 0.0
parameterName: dropout
scalingType: LINEAR
type: DOUBLE
- parameterName: num_hidden_heads
type: INTEGER
scalingType: LINEAR
minValue: 1
maxValue: 8
- parameterName: num_output_heads
type: INTEGER
scalingType: LINEAR
minValue: 1
maxValue: 8
- feasiblePoints:
- leaky_relu
- relu
- elu
- tanh
parameterName: act
type: CATEGORICAL
trainer:
hp_space:
- maxValue: 150
minValue: 50
parameterName: max_epoch
scalingType: LINEAR
type: INTEGER
- maxValue: 40
minValue: 25
parameterName: early_stopping_round
scalingType: LINEAR
type: INTEGER
- maxValue: 0.05
minValue: 0.005
parameterName: lr
scalingType: LOG
type: DOUBLE
- maxValue: 1.0E-7
minValue: 1.0E-10
parameterName: weight_decay
scalingType: LOG
type: DOUBLE

+ 0
- 92
configs/lp_benchmark.yml View File

@@ -1,92 +0,0 @@
ensemble:
name: voting
feature:
- name: PYGNormalizeFeatures
hpo:
max_evals: 10
name: random
models:
- hp_space:
- feasiblePoints: 2,3
parameterName: num_layers
type: DISCRETE
- cutFunc: lambda x:x[0] - 1
cutPara:
- num_layers
length: 2
maxValue:
- 256
- 256
minValue:
- 64
- 64
numericalType: INTEGER
parameterName: hidden
scalingType: LOG
type: NUMERICAL_LIST
- maxValue: 0.2
minValue: 0.0
parameterName: dropout
scalingType: LINEAR
type: DOUBLE
- feasiblePoints:
- leaky_relu
- relu
- elu
- tanh
parameterName: act
type: CATEGORICAL
name: gcn
- name: gat
hp_space:
- feasiblePoints: 2,3
parameterName: num_layers
type: DISCRETE
- cutFunc: lambda x:x[0] - 1
cutPara:
- num_layers
length: 2
maxValue:
- 256
- 256
minValue:
- 64
- 64
numericalType: INTEGER
parameterName: hidden
scalingType: LOG
type: NUMERICAL_LIST
- maxValue: 0.2
minValue: 0.0
parameterName: dropout
scalingType: LINEAR
type: DOUBLE
- feasiblePoints:
- leaky_relu
- relu
- elu
- tanh
parameterName: act
type: CATEGORICAL
trainer:
hp_space:
- maxValue: 150
minValue: 50
parameterName: max_epoch
scalingType: LINEAR
type: INTEGER
- maxValue: 40
minValue: 25
parameterName: early_stopping_round
scalingType: LINEAR
type: INTEGER
- maxValue: 0.05
minValue: 0.005
parameterName: lr
scalingType: LOG
type: DOUBLE
- maxValue: 1.0E-7
minValue: 1.0E-10
parameterName: weight_decay
scalingType: LOG
type: DOUBLE

+ 2
- 2
configs/nodeclf_gcn_benchmark_small.yml View File

@@ -1,7 +1,7 @@
ensemble:
name: null
feature:
- name: PYGNormalizeFeatures
- name: NormalizeFeatures
hpo:
max_evals: 10
name: random
@@ -34,7 +34,7 @@ models:
- tanh
parameterName: act
type: CATEGORICAL
name: gcn
name: gcn-model
trainer:
hp_space:
- maxValue: 300


+ 94
- 0
configs/pyg/lp_benchmark.yml View File

@@ -0,0 +1,94 @@
ensemble:
name: voting
feature:
- name: NormalizeFeatures
hpo:
max_evals: 10
name: random
models:
- encoder:
hp_space:
- feasiblePoints: 2,3
parameterName: num_layers
type: DISCRETE
- cutFunc: lambda x:x[0] - 1
cutPara:
- num_layers
length: 2
maxValue:
- 256
- 256
minValue:
- 64
- 64
numericalType: INTEGER
parameterName: hidden
scalingType: LOG
type: NUMERICAL_LIST
- maxValue: 0.2
minValue: 0.0
parameterName: dropout
scalingType: LINEAR
type: DOUBLE
- feasiblePoints:
- leaky_relu
- relu
- elu
- tanh
parameterName: act
type: CATEGORICAL
name: gcn
- encoder:
name: gat
hp_space:
- feasiblePoints: 2,3
parameterName: num_layers
type: DISCRETE
- cutFunc: lambda x:x[0] - 1
cutPara:
- num_layers
length: 2
maxValue:
- 256
- 256
minValue:
- 64
- 64
numericalType: INTEGER
parameterName: hidden
scalingType: LOG
type: NUMERICAL_LIST
- maxValue: 0.2
minValue: 0.0
parameterName: dropout
scalingType: LINEAR
type: DOUBLE
- feasiblePoints:
- leaky_relu
- relu
- elu
- tanh
parameterName: act
type: CATEGORICAL
trainer:
hp_space:
- maxValue: 150
minValue: 50
parameterName: max_epoch
scalingType: LINEAR
type: INTEGER
- maxValue: 40
minValue: 25
parameterName: early_stopping_round
scalingType: LINEAR
type: INTEGER
- maxValue: 0.05
minValue: 0.005
parameterName: lr
scalingType: LOG
type: DOUBLE
- maxValue: 1.0E-7
minValue: 1.0E-10
parameterName: weight_decay
scalingType: LOG
type: DOUBLE

+ 3
- 3
examples/graphnas.py View File

@@ -1,10 +1,9 @@
import sys
sys.path.append('../')
from autogl.datasets import build_dataset_from_name
from autogl.solver import AutoNodeClassifier
from autogl.module.train import Acc
from autogl.solver.utils import set_seed
import argparse
from autogl.backend import DependentBackend

if __name__ == '__main__':
set_seed(202106)
@@ -15,8 +14,9 @@ if __name__ == '__main__':
args = parser.parse_args()

dataset = build_dataset_from_name('cora')
label = dataset[0].nodes.data["y" if DependentBackend.is_pyg() else "label"][dataset[0].nodes.data["test_mask"]].cpu().numpy()
solver = AutoNodeClassifier.from_config(args.config)
solver.fit(dataset)
solver.get_leaderboard().show()
out = solver.predict_proba()
print('acc on dataset', Acc.evaluate(out, dataset[0].y[dataset[0].test_mask].detach().numpy()))
print('acc on dataset', Acc.evaluate(out, label))

+ 2
- 4
examples/link_prediction.py View File

@@ -1,6 +1,3 @@
import sys

sys.path.append("../")
from autogl.datasets import build_dataset_from_name
from autogl.solver.classifier.link_predictor import AutoLinkPredictor
from autogl.module.train.evaluation import Auc
@@ -76,7 +73,8 @@ if __name__ == "__main__":
)
autoClassifier.get_leaderboard().show()

# test
# todo: move the test logic to solver, make solver handle this
# BUG: fix this under dgl backend
predict_result = autoClassifier.predict_proba()

pos_edge_index, neg_edge_index = (


+ 7
- 6
examples/node_classification.py View File

@@ -1,12 +1,11 @@
import sys
import yaml
import random
import torch.backends.cudnn
import numpy as np
sys.path.append("../")
from autogl.datasets import build_dataset_from_name
from autogl.solver import AutoNodeClassifier
from autogl.module import Acc
from autogl.backend import DependentBackend

if __name__ == "__main__":

@@ -37,7 +36,7 @@ if __name__ == "__main__":
help="config to use",
)
# following arguments will override parameters in the config file
parser.add_argument("--hpo", type=str, default="autone", help="hpo methods")
parser.add_argument("--hpo", type=str, default="tpe", help="hpo methods")
parser.add_argument(
"--max_eval", type=int, default=50, help="max hpo evaluation times"
)
@@ -58,6 +57,8 @@ if __name__ == "__main__":
torch.backends.cudnn.benchmark = False

dataset = build_dataset_from_name(args.dataset)
label = dataset[0].nodes.data["y" if DependentBackend.is_pyg() else "label"]
num_classes = len(np.unique(label.numpy()))

configs = yaml.load(open(args.configs, "r").read(), Loader=yaml.FullLoader)
configs["hpo"]["name"] = args.hpo
@@ -73,8 +74,8 @@ if __name__ == "__main__":
time_limit=3600,
evaluation_method=[Acc],
seed=seed,
train_split=20 * dataset.num_classes,
val_split=30 * dataset.num_classes,
train_split=20 * num_classes,
val_split=30 * num_classes,
balanced=False,
)
autoClassifier.get_leaderboard().show()
@@ -83,5 +84,5 @@ if __name__ == "__main__":
predict_result = autoClassifier.predict_proba()
print(
"test acc: %.4f"
% (Acc.evaluate(predict_result, dataset.data.y[dataset.data.test_mask].numpy()))
% (Acc.evaluate(predict_result, label[dataset[0].nodes.data["test_mask"]].cpu().numpy()))
)

Loading…
Cancel
Save