| @@ -5,5 +5,6 @@ from ._general import ( | |||
| random_splits_mask_class, | |||
| graph_cross_validation, | |||
| graph_random_splits, | |||
| graph_get_split | |||
| graph_get_split, | |||
| set_fold, | |||
| ) | |||
| @@ -216,7 +216,7 @@ def graph_cross_validation( | |||
| kf = KFold( | |||
| n_splits=n_splits, shuffle=shuffle, random_state=_random_seed | |||
| ) | |||
| dataset_y = [g.data['y'].item() for g in dataset] | |||
| dataset_y = [g.data['y' if 'y' in g.data else 'label'].item() for g in dataset] | |||
| idx_list = [ | |||
| (train_index.tolist(), test_index.tolist()) | |||
| for train_index, test_index | |||
| @@ -35,7 +35,10 @@ class BaseHPOptimizer: | |||
| newpara = para.copy() | |||
| newpara["parameterName"] = key + ":" + para["parameterName"] | |||
| if "cutPara" in para.keys(): | |||
| newpara["cutPara"] = key + ":" + para["cutPara"] | |||
| if isinstance(newpara["cutPara"], str): | |||
| newpara["cutPara"] = key + ":" + para["cutPara"] | |||
| else: | |||
| newpara["cutPara"] = [key + ":" + cutname for cutname in para["cutPara"]] | |||
| list_config.append(newpara) | |||
| return list_config | |||
| @@ -12,6 +12,10 @@ from autogl.solver import AutoGraphClassifier | |||
| from autogl.module import Acc | |||
| from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter | |||
| from autogl.backend import DependentBackend | |||
| if DependentBackend.is_pyg(): | |||
| from autogl.datasets.utils.conversion import to_pyg_dataset as convert_dataset | |||
| else: | |||
| from autogl.datasets.utils.conversion import to_dgl_dataset as convert_dataset | |||
| backend = DependentBackend.get_backend_name() | |||
| @@ -29,12 +33,16 @@ if __name__ == "__main__": | |||
| parser.add_argument( | |||
| "--configs", default="../configs/graphclf_gin_benchmark.yml", help="config files" | |||
| ) | |||
| parser.add_argument("--device", type=str, default="cpu", help="device to run on") | |||
| parser.add_argument("--device", type=int, default=-1, help="device to run on, -1 means cpu") | |||
| parser.add_argument("--seed", type=int, default=0, help="random seed") | |||
| args = parser.parse_args() | |||
| if torch.cuda.is_available(): | |||
| torch.cuda.set_device(torch.device(args.device)) | |||
| if args.device == -1: | |||
| args.device = "cpu" | |||
| if torch.cuda.is_available() and args.device != "cpu": | |||
| torch.cuda.set_device(args.device) | |||
| seed = args.seed | |||
| # set random seed | |||
| random.seed(seed) | |||
| @@ -46,23 +54,26 @@ if __name__ == "__main__": | |||
| torch.backends.cudnn.benchmark = False | |||
| dataset = build_dataset_from_name(args.dataset) | |||
| _converted_dataset = convert_dataset(dataset) | |||
| if args.dataset.startswith("imdb"): | |||
| from autogl.module.feature import OneHotDegreeGenerator | |||
| # get max degree | |||
| from autogl.module.feature._generators._pyg_impl import degree | |||
| max_degree = 0 | |||
| for data in dataset: | |||
| deg_max = int(degree(data.edge_index[0], data.num_nodes).max().item()) | |||
| max_degree = max(max_degree, deg_max) | |||
| if DependentBackend.is_pyg(): | |||
| from torch_geometric.utils import degree | |||
| max_degree = 0 | |||
| for data in _converted_dataset: | |||
| deg_max = int(degree(data.edge_index[0], data.num_nodes).max().item()) | |||
| max_degree = max(max_degree, deg_max) | |||
| else: | |||
| max_degree = 0 | |||
| for data, _ in _converted_dataset: | |||
| deg_max = data.in_degrees().max().item() | |||
| max_degree = max(max_degree, deg_max) | |||
| dataset = OneHotDegreeGenerator(max_degree).fit_transform(dataset, inplace=False) | |||
| elif args.dataset == "collab": | |||
| # FIXME: no onlyconst feature engineer ?? | |||
| # FIXME: no auto feature engineer support !! | |||
| from autogl.module._feature.auto_feature import Onlyconst | |||
| from autogl.module.feature._auto_feature import OnlyConstFeature | |||
| dataset = Onlyconst().fit_transform(dataset, inplace=False) | |||
| dataset = OnlyConstFeature().fit_transform(dataset, inplace=False) | |||
| utils.graph_random_splits(dataset, train_ratio=0.8, val_ratio=0.1, seed=args.seed) | |||
| autoClassifier = AutoGraphClassifier.from_config(args.configs) | |||
| @@ -14,6 +14,11 @@ sys.path.append("../") | |||
| from autogl.datasets import build_dataset_from_name, utils | |||
| from autogl.solver import AutoGraphClassifier | |||
| from autogl.module import Acc | |||
| from autogl.backend import DependentBackend | |||
| if DependentBackend.is_pyg(): | |||
| from autogl.datasets.utils.conversion import to_pyg_dataset as convert_dataset | |||
| else: | |||
| from autogl.datasets.utils.conversion import to_dgl_dataset as convert_dataset | |||
| if __name__ == "__main__": | |||
| parser = ArgumentParser( | |||
| @@ -48,27 +53,32 @@ if __name__ == "__main__": | |||
| print("begin processing dataset", args.dataset, "into", args.folds, "folds.") | |||
| dataset = build_dataset_from_name(args.dataset) | |||
| _converted_dataset = convert_dataset(dataset) | |||
| if args.dataset.startswith("imdb"): | |||
| from autogl.module.feature.generators import PYGOneHotDegree | |||
| from autogl.module.feature import OneHotDegreeGenerator | |||
| # get max degree | |||
| from torch_geometric.utils import degree | |||
| max_degree = 0 | |||
| for data in dataset: | |||
| deg_max = int(degree(data.edge_index[0], data.num_nodes).max().item()) | |||
| max_degree = max(max_degree, deg_max) | |||
| dataset = PYGOneHotDegree(max_degree).fit_transform(dataset, inplace=False) | |||
| if DependentBackend.is_pyg(): | |||
| from torch_geometric.utils import degree | |||
| max_degree = 0 | |||
| for data in _converted_dataset: | |||
| deg_max = int(degree(data.edge_index[0], data.num_nodes).max().item()) | |||
| max_degree = max(max_degree, deg_max) | |||
| else: | |||
| max_degree = 0 | |||
| for data, _ in _converted_dataset: | |||
| deg_max = data.in_degrees().max().item() | |||
| max_degree = max(max_degree, deg_max) | |||
| dataset = OneHotDegreeGenerator(max_degree).fit_transform(dataset, inplace=False) | |||
| elif args.dataset == "collab": | |||
| from autogl.module.feature.auto_feature import Onlyconst | |||
| from autogl.module.feature._auto_feature import OnlyConstFeature | |||
| dataset = Onlyconst().fit_transform(dataset, inplace=False) | |||
| dataset = OnlyConstFeature().fit_transform(dataset, inplace=False) | |||
| utils.graph_cross_validation(dataset, args.folds, random_seed=args.seed) | |||
| accs = [] | |||
| for fold in range(args.folds): | |||
| print("evaluating on fold number:", fold) | |||
| utils.graph_set_fold_id(dataset, fold) | |||
| utils.set_fold(dataset, fold) | |||
| train_dataset = utils.graph_get_split(dataset, "train", False) | |||
| autoClassifier = AutoGraphClassifier.from_config(args.configs) | |||
| @@ -0,0 +1,20 @@ | |||
| # test the utils function | |||
| from autogl.datasets import utils, build_dataset_from_name | |||
| def test_graph_cross_validation(): | |||
| dataset = build_dataset_from_name('imdb-b') | |||
| # first level, 10 folds | |||
| utils.graph_cross_validation(dataset, 10) | |||
| # set to fold id | |||
| utils.set_fold(dataset, 1) | |||
| # get train split | |||
| train_dataset = utils.graph_get_split(dataset, "train", False) | |||
| # further split train to train / val | |||
| utils.graph_random_splits(train_dataset, 0.8, 0.2) | |||
| test_graph_cross_validation() | |||