Browse Source

debug for hpo, graph clf and dataset utils

tags/v0.3.1
Frozenmad 4 years ago
parent
commit
9d02402fc9
6 changed files with 74 additions and 29 deletions
  1. +2
    -1
      autogl/datasets/utils/__init__.py
  2. +1
    -1
      autogl/datasets/utils/_general.py
  3. +4
    -1
      autogl/module/hpo/base.py
  4. +25
    -14
      examples/graph_classification.py
  5. +22
    -12
      examples/graph_cv.py
  6. +20
    -0
      test/dataset/utils.py

+ 2
- 1
autogl/datasets/utils/__init__.py View File

@@ -5,5 +5,6 @@ from ._general import (
random_splits_mask_class,
graph_cross_validation,
graph_random_splits,
graph_get_split
graph_get_split,
set_fold,
)

+ 1
- 1
autogl/datasets/utils/_general.py View File

@@ -216,7 +216,7 @@ def graph_cross_validation(
kf = KFold(
n_splits=n_splits, shuffle=shuffle, random_state=_random_seed
)
dataset_y = [g.data['y'].item() for g in dataset]
dataset_y = [g.data['y' if 'y' in g.data else 'label'].item() for g in dataset]
idx_list = [
(train_index.tolist(), test_index.tolist())
for train_index, test_index


+ 4
- 1
autogl/module/hpo/base.py View File

@@ -35,7 +35,10 @@ class BaseHPOptimizer:
newpara = para.copy()
newpara["parameterName"] = key + ":" + para["parameterName"]
if "cutPara" in para.keys():
newpara["cutPara"] = key + ":" + para["cutPara"]
if isinstance(newpara["cutPara"], str):
newpara["cutPara"] = key + ":" + para["cutPara"]
else:
newpara["cutPara"] = [key + ":" + cutname for cutname in para["cutPara"]]
list_config.append(newpara)
return list_config



+ 25
- 14
examples/graph_classification.py View File

@@ -12,6 +12,10 @@ from autogl.solver import AutoGraphClassifier
from autogl.module import Acc
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
from autogl.backend import DependentBackend
if DependentBackend.is_pyg():
from autogl.datasets.utils.conversion import to_pyg_dataset as convert_dataset
else:
from autogl.datasets.utils.conversion import to_dgl_dataset as convert_dataset

backend = DependentBackend.get_backend_name()

@@ -29,12 +33,16 @@ if __name__ == "__main__":
parser.add_argument(
"--configs", default="../configs/graphclf_gin_benchmark.yml", help="config files"
)
parser.add_argument("--device", type=str, default="cpu", help="device to run on")
parser.add_argument("--device", type=int, default=-1, help="device to run on, -1 means cpu")
parser.add_argument("--seed", type=int, default=0, help="random seed")

args = parser.parse_args()
if torch.cuda.is_available():
torch.cuda.set_device(torch.device(args.device))

if args.device == -1:
args.device = "cpu"

if torch.cuda.is_available() and args.device != "cpu":
torch.cuda.set_device(args.device)
seed = args.seed
# set random seed
random.seed(seed)
@@ -46,23 +54,26 @@ if __name__ == "__main__":
torch.backends.cudnn.benchmark = False

dataset = build_dataset_from_name(args.dataset)
_converted_dataset = convert_dataset(dataset)
if args.dataset.startswith("imdb"):
from autogl.module.feature import OneHotDegreeGenerator

# get max degree
from autogl.module.feature._generators._pyg_impl import degree

max_degree = 0
for data in dataset:
deg_max = int(degree(data.edge_index[0], data.num_nodes).max().item())
max_degree = max(max_degree, deg_max)
if DependentBackend.is_pyg():
from torch_geometric.utils import degree
max_degree = 0
for data in _converted_dataset:
deg_max = int(degree(data.edge_index[0], data.num_nodes).max().item())
max_degree = max(max_degree, deg_max)
else:
max_degree = 0
for data, _ in _converted_dataset:
deg_max = data.in_degrees().max().item()
max_degree = max(max_degree, deg_max)
dataset = OneHotDegreeGenerator(max_degree).fit_transform(dataset, inplace=False)
elif args.dataset == "collab":
# FIXME: no onlyconst feature engineer ??
# FIXME: no auto feature engineer support !!
from autogl.module._feature.auto_feature import Onlyconst
from autogl.module.feature._auto_feature import OnlyConstFeature

dataset = Onlyconst().fit_transform(dataset, inplace=False)
dataset = OnlyConstFeature().fit_transform(dataset, inplace=False)
utils.graph_random_splits(dataset, train_ratio=0.8, val_ratio=0.1, seed=args.seed)

autoClassifier = AutoGraphClassifier.from_config(args.configs)


+ 22
- 12
examples/graph_cv.py View File

@@ -14,6 +14,11 @@ sys.path.append("../")
from autogl.datasets import build_dataset_from_name, utils
from autogl.solver import AutoGraphClassifier
from autogl.module import Acc
from autogl.backend import DependentBackend
if DependentBackend.is_pyg():
from autogl.datasets.utils.conversion import to_pyg_dataset as convert_dataset
else:
from autogl.datasets.utils.conversion import to_dgl_dataset as convert_dataset

if __name__ == "__main__":
parser = ArgumentParser(
@@ -48,27 +53,32 @@ if __name__ == "__main__":

print("begin processing dataset", args.dataset, "into", args.folds, "folds.")
dataset = build_dataset_from_name(args.dataset)
_converted_dataset = convert_dataset(dataset)
if args.dataset.startswith("imdb"):
from autogl.module.feature.generators import PYGOneHotDegree
from autogl.module.feature import OneHotDegreeGenerator

# get max degree
from torch_geometric.utils import degree

max_degree = 0
for data in dataset:
deg_max = int(degree(data.edge_index[0], data.num_nodes).max().item())
max_degree = max(max_degree, deg_max)
dataset = PYGOneHotDegree(max_degree).fit_transform(dataset, inplace=False)
if DependentBackend.is_pyg():
from torch_geometric.utils import degree
max_degree = 0
for data in _converted_dataset:
deg_max = int(degree(data.edge_index[0], data.num_nodes).max().item())
max_degree = max(max_degree, deg_max)
else:
max_degree = 0
for data, _ in _converted_dataset:
deg_max = data.in_degrees().max().item()
max_degree = max(max_degree, deg_max)
dataset = OneHotDegreeGenerator(max_degree).fit_transform(dataset, inplace=False)
elif args.dataset == "collab":
from autogl.module.feature.auto_feature import Onlyconst
from autogl.module.feature._auto_feature import OnlyConstFeature

dataset = Onlyconst().fit_transform(dataset, inplace=False)
dataset = OnlyConstFeature().fit_transform(dataset, inplace=False)
utils.graph_cross_validation(dataset, args.folds, random_seed=args.seed)

accs = []
for fold in range(args.folds):
print("evaluating on fold number:", fold)
utils.graph_set_fold_id(dataset, fold)
utils.set_fold(dataset, fold)
train_dataset = utils.graph_get_split(dataset, "train", False)
autoClassifier = AutoGraphClassifier.from_config(args.configs)



+ 20
- 0
test/dataset/utils.py View File

@@ -0,0 +1,20 @@
# test the utils function

from autogl.datasets import utils, build_dataset_from_name

def test_graph_cross_validation():
dataset = build_dataset_from_name('imdb-b')
# first level, 10 folds
utils.graph_cross_validation(dataset, 10)

# set to fold id
utils.set_fold(dataset, 1)

# get train split
train_dataset = utils.graph_get_split(dataset, "train", False)

# further split train to train / val
utils.graph_random_splits(train_dataset, 0.8, 0.2)

test_graph_cross_validation()


Loading…
Cancel
Save