diff --git a/autogl/datasets/utils/__init__.py b/autogl/datasets/utils/__init__.py index a7e80d0..ea6633e 100644 --- a/autogl/datasets/utils/__init__.py +++ b/autogl/datasets/utils/__init__.py @@ -5,5 +5,6 @@ from ._general import ( random_splits_mask_class, graph_cross_validation, graph_random_splits, - graph_get_split + graph_get_split, + set_fold, ) diff --git a/autogl/datasets/utils/_general.py b/autogl/datasets/utils/_general.py index a74a1ba..1bf58ea 100644 --- a/autogl/datasets/utils/_general.py +++ b/autogl/datasets/utils/_general.py @@ -216,7 +216,7 @@ def graph_cross_validation( kf = KFold( n_splits=n_splits, shuffle=shuffle, random_state=_random_seed ) - dataset_y = [g.data['y'].item() for g in dataset] + dataset_y = [g.data['y' if 'y' in g.data else 'label'].item() for g in dataset] idx_list = [ (train_index.tolist(), test_index.tolist()) for train_index, test_index diff --git a/autogl/module/hpo/base.py b/autogl/module/hpo/base.py index a35d0e7..badd876 100644 --- a/autogl/module/hpo/base.py +++ b/autogl/module/hpo/base.py @@ -35,7 +35,10 @@ class BaseHPOptimizer: newpara = para.copy() newpara["parameterName"] = key + ":" + para["parameterName"] if "cutPara" in para.keys(): - newpara["cutPara"] = key + ":" + para["cutPara"] + if isinstance(newpara["cutPara"], str): + newpara["cutPara"] = key + ":" + para["cutPara"] + else: + newpara["cutPara"] = [key + ":" + cutname for cutname in para["cutPara"]] list_config.append(newpara) return list_config diff --git a/examples/graph_classification.py b/examples/graph_classification.py index fb1a19a..e7d3a49 100644 --- a/examples/graph_classification.py +++ b/examples/graph_classification.py @@ -12,6 +12,10 @@ from autogl.solver import AutoGraphClassifier from autogl.module import Acc from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter from autogl.backend import DependentBackend +if DependentBackend.is_pyg(): + from autogl.datasets.utils.conversion import to_pyg_dataset as convert_dataset +else: + from autogl.datasets.utils.conversion import to_dgl_dataset as convert_dataset backend = DependentBackend.get_backend_name() @@ -29,12 +33,16 @@ if __name__ == "__main__": parser.add_argument( "--configs", default="../configs/graphclf_gin_benchmark.yml", help="config files" ) - parser.add_argument("--device", type=str, default="cpu", help="device to run on") + parser.add_argument("--device", type=int, default=-1, help="device to run on, -1 means cpu") parser.add_argument("--seed", type=int, default=0, help="random seed") args = parser.parse_args() - if torch.cuda.is_available(): - torch.cuda.set_device(torch.device(args.device)) + + if args.device == -1: + args.device = "cpu" + + if torch.cuda.is_available() and args.device != "cpu": + torch.cuda.set_device(args.device) seed = args.seed # set random seed random.seed(seed) @@ -46,23 +54,26 @@ if __name__ == "__main__": torch.backends.cudnn.benchmark = False dataset = build_dataset_from_name(args.dataset) + _converted_dataset = convert_dataset(dataset) if args.dataset.startswith("imdb"): from autogl.module.feature import OneHotDegreeGenerator - # get max degree - from autogl.module.feature._generators._pyg_impl import degree - - max_degree = 0 - for data in dataset: - deg_max = int(degree(data.edge_index[0], data.num_nodes).max().item()) - max_degree = max(max_degree, deg_max) + if DependentBackend.is_pyg(): + from torch_geometric.utils import degree + max_degree = 0 + for data in _converted_dataset: + deg_max = int(degree(data.edge_index[0], data.num_nodes).max().item()) + max_degree = max(max_degree, deg_max) + else: + max_degree = 0 + for data, _ in _converted_dataset: + deg_max = data.in_degrees().max().item() + max_degree = max(max_degree, deg_max) dataset = OneHotDegreeGenerator(max_degree).fit_transform(dataset, inplace=False) elif args.dataset == "collab": - # FIXME: no onlyconst feature engineer ?? - # FIXME: no auto feature engineer support !! - from autogl.module._feature.auto_feature import Onlyconst + from autogl.module.feature._auto_feature import OnlyConstFeature - dataset = Onlyconst().fit_transform(dataset, inplace=False) + dataset = OnlyConstFeature().fit_transform(dataset, inplace=False) utils.graph_random_splits(dataset, train_ratio=0.8, val_ratio=0.1, seed=args.seed) autoClassifier = AutoGraphClassifier.from_config(args.configs) diff --git a/examples/graph_cv.py b/examples/graph_cv.py index 2dd938c..8fc1049 100644 --- a/examples/graph_cv.py +++ b/examples/graph_cv.py @@ -14,6 +14,11 @@ sys.path.append("../") from autogl.datasets import build_dataset_from_name, utils from autogl.solver import AutoGraphClassifier from autogl.module import Acc +from autogl.backend import DependentBackend +if DependentBackend.is_pyg(): + from autogl.datasets.utils.conversion import to_pyg_dataset as convert_dataset +else: + from autogl.datasets.utils.conversion import to_dgl_dataset as convert_dataset if __name__ == "__main__": parser = ArgumentParser( @@ -48,27 +53,32 @@ if __name__ == "__main__": print("begin processing dataset", args.dataset, "into", args.folds, "folds.") dataset = build_dataset_from_name(args.dataset) + _converted_dataset = convert_dataset(dataset) if args.dataset.startswith("imdb"): - from autogl.module.feature.generators import PYGOneHotDegree + from autogl.module.feature import OneHotDegreeGenerator - # get max degree - from torch_geometric.utils import degree - - max_degree = 0 - for data in dataset: - deg_max = int(degree(data.edge_index[0], data.num_nodes).max().item()) - max_degree = max(max_degree, deg_max) - dataset = PYGOneHotDegree(max_degree).fit_transform(dataset, inplace=False) + if DependentBackend.is_pyg(): + from torch_geometric.utils import degree + max_degree = 0 + for data in _converted_dataset: + deg_max = int(degree(data.edge_index[0], data.num_nodes).max().item()) + max_degree = max(max_degree, deg_max) + else: + max_degree = 0 + for data, _ in _converted_dataset: + deg_max = data.in_degrees().max().item() + max_degree = max(max_degree, deg_max) + dataset = OneHotDegreeGenerator(max_degree).fit_transform(dataset, inplace=False) elif args.dataset == "collab": - from autogl.module.feature.auto_feature import Onlyconst + from autogl.module.feature._auto_feature import OnlyConstFeature - dataset = Onlyconst().fit_transform(dataset, inplace=False) + dataset = OnlyConstFeature().fit_transform(dataset, inplace=False) utils.graph_cross_validation(dataset, args.folds, random_seed=args.seed) accs = [] for fold in range(args.folds): print("evaluating on fold number:", fold) - utils.graph_set_fold_id(dataset, fold) + utils.set_fold(dataset, fold) train_dataset = utils.graph_get_split(dataset, "train", False) autoClassifier = AutoGraphClassifier.from_config(args.configs) diff --git a/test/dataset/utils.py b/test/dataset/utils.py new file mode 100644 index 0000000..2c2c0c6 --- /dev/null +++ b/test/dataset/utils.py @@ -0,0 +1,20 @@ +# test the utils function + +from autogl.datasets import utils, build_dataset_from_name + +def test_graph_cross_validation(): + dataset = build_dataset_from_name('imdb-b') + # first level, 10 folds + utils.graph_cross_validation(dataset, 10) + + # set to fold id + utils.set_fold(dataset, 1) + + # get train split + train_dataset = utils.graph_get_split(dataset, "train", False) + + # further split train to train / val + utils.graph_random_splits(train_dataset, 0.8, 0.2) + +test_graph_cross_validation() +