merge dev

4 years ago · 82a558d986
--- a/README.md
+++ b/README.md
@@ -8,6 +8,11 @@ Feel free to open <a href="https://github.com/THUMNLab/AutoGL/issues">issues</a>

 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 [![Documentation Status](https://readthedocs.org/projects/autogl/badge/?version=latest)](https://autogl.readthedocs.io/en/latest/?badge=latest)

 ## News!

 - 2021.04.10 Our paper [__AutoGL: A Library for Automated Graph Learning__](https://arxiv.org/abs/2104.04987) are accepted in _ICLR 2021 Workshop on Geometrical and Topological Representation Learning_! You can cite our paper following methods [here](#Cite).

 ## Introduction

 AutoGL is developed for researchers and developers to quickly conduct autoML on the graph datasets & tasks. See our documentation for detailed information!
@@ -104,6 +109,20 @@ make clean && make html

 The documentation will be automatically generated under `docs/_build/html`

 ## Cite

 You can cite [our paper](https://arxiv.org/abs/2104.04987) as follows if you use this code in your own work:
 ```
@inproceedings{
 guan2021autogl,
 title={Auto{GL}: A Library for Automated Graph Learning},
 author={Chaoyu Guan and Ziwei Zhang and Haoyang Li and Heng Chang and Zeyang Zhang and Yijian Qin and Jiyan Jiang and Xin Wang and Wenwu Zhu},
 booktitle={ICLR 2021 Workshop on Geometrical and Topological Representation Learning},
 year={2021},
 url={https://openreview.net/forum?id=0yHwpLeInDn}
 }
 ```

 ## License

 We follow [MIT license](LICENSE) across the entire codebase.
--- a/autogl/datasets/utils.py
+++ b/autogl/datasets/utils.py
@@ -2,7 +2,17 @@ from pdb import set_trace
 import torch
 import numpy as np
 from torch_geometric.data import DataLoader
 from sklearn.model_selection import StratifiedKFold
 from torch_geometric.utils import train_test_split_edges
 from sklearn.model_selection import StratifiedKFold, KFold


 def split_edges(dataset, train_ratio, val_ratio):
    datas = [data for data in dataset]
    for i in range(len(datas)):
        datas[i] = train_test_split_edges(
            datas[i], val_ratio, 1 - train_ratio - val_ratio
        )
    dataset.data, dataset.slices = dataset.collate(datas)


 def get_label_number(dataset):
@@ -179,7 +189,9 @@ def random_splits_mask_class(
    return dataset


 def graph_cross_validation(dataset, n_splits=10, shuffle=True, random_seed=42):
 def graph_cross_validation(
    dataset, n_splits=10, shuffle=True, random_seed=42, stratify=False
 ):
    r"""Cross validation for graph classification data, returning one fold with specific idx in autogl.datasets or pyg.Dataloader(default)

    Parameters
@@ -196,7 +208,12 @@ def graph_cross_validation(dataset, n_splits=10, shuffle=True, random_seed=42):
    random_seed : int
        random_state for sklearn.model_selection.StratifiedKFold
    """
    skf = StratifiedKFold(n_splits=n_splits, shuffle=shuffle, random_state=random_seed)
    if stratify:
        skf = StratifiedKFold(
            n_splits=n_splits, shuffle=shuffle, random_state=random_seed
        )
    else:
        skf = KFold(n_splits=n_splits, shuffle=shuffle, random_state=random_seed)
    idx_list = []

    # BUG: from pytorch_geometric, not sure whether it is a bug. The dataset.data will return
--- a/autogl/module/hpo/advisorbase.py
+++ b/autogl/module/hpo/advisorbase.py
@@ -5,6 +5,7 @@ HPO Module for tuning hyper parameters
 import time
 import json
 import math
 from tqdm import trange
 from .suggestion.models import Study
 from .base import BaseHPOptimizer, TimeTooLimitedError
 from .suggestion.algorithm.random_search import RandomSearchAlgorithm
@@ -150,7 +151,8 @@ class AdvisorBaseHPOptimizer(BaseHPOptimizer):
        best_id = None
        best_trainer = None

        for i in range(self.max_evals):
        print("HPO Search Phase:\n")
        for i in trange(self.max_evals):
            if time.time() - start_time > time_limit:
                self.logger.info("Time out of limit, Epoch: {}".format(str(i)))
                break
--- a/autogl/module/hpo/autone.py
+++ b/autogl/module/hpo/autone.py
@@ -6,6 +6,7 @@ import time
 import json
 import math
 import numpy as np
 from tqdm import trange
 from . import register_hpo
 from .suggestion.models import Study
 from .base import BaseHPOptimizer, TimeTooLimitedError
@@ -115,7 +116,8 @@ class AutoNE(BaseHPOptimizer):
        K = utils.K(len(params.type_))
        gp = utils.GaussianProcessRegressor(K)
        sample_graphs = sample_subgraph(dataset)
        for t in range(sampled_number):
        print("Sample Phase:\n")
        for t in trange(sampled_number):
            b_t = time.time()
            i = t
            subgraph = sample_graphs[t]
@@ -133,7 +135,8 @@ class AutoNE(BaseHPOptimizer):
        best_trainer = None
        best_para = None
        wne = get_wne(dataset)
        for t in range(s):
        print("HPO Search Phase:\n")
        for t in trange(s):
            if time.time() - start_time > time_limit:
                self.logger.info("Time out of limit, Epoch: {}".format(str(i)))
                break
--- a/autogl/module/model/init.py
+++ b/autogl/module/model/init.py
@@ -1,7 +1,9 @@
 from ._model_registry import MODEL_DICT, ModelUniversalRegistry, register_model
 from .base import BaseModel
 from .topkpool import AutoTopkpool
 from .graph_sage import AutoSAGE

 # from .graph_sage import AutoSAGE
 from .graphsage import AutoSAGE
 from .gcn import AutoGCN
 from .gat import AutoGAT
 from .gin import AutoGIN
--- a/autogl/module/model/base.py
+++ b/autogl/module/model/base.py
@@ -3,11 +3,15 @@ auto graph model
 a list of models with their hyper parameters
 NOTE: neural architecture search (NAS) maybe included here
 """

 import copy
 import logging
 import typing as _typing
 import torch
 import torch.nn.functional as F
 from copy import deepcopy

 base_approach_logger: logging.Logger = logging.getLogger("BaseModel")


 def activate_func(x, func):
    if func == "tanh":
@@ -22,7 +26,7 @@ def activate_func(x, func):
    return x


 class BaseModel(torch.nn.Module):
 class BaseModel:
    def __init__(self, init=False, *args, **kwargs):
        super(BaseModel, self).__init__()

@@ -46,7 +50,13 @@ class BaseModel(torch.nn.Module):
    def to(self, device):
        if isinstance(device, (str, torch.device)):
            self.device = device
        return super().to(device)
        if (
            hasattr(self, "model")
            and self.model is not None
            and isinstance(self.model, torch.nn.Module)
        ):
            self.model.to(self.device)
        return self

    def from_hyper_parameter(self, hp):
        ret_self = self.__class__(
@@ -80,3 +90,284 @@ class BaseModel(torch.nn.Module):
        ), "Cannot set graph features for tasks other than graph classification"
        self.num_graph_features = num_graph_features
        self.params["num_graph_features"] = num_graph_features


 class _BaseBaseModel:
    # todo: after renaming the experimental base class _BaseModel to BaseModel,
    #       rename this class to _BaseModel
    """
    The base class for class BaseModel,
    designed to implement some basic functionality of BaseModel.
    --  Designed by ZiXin Sun
    """

    @classmethod
    def __formulate_device(
        cls, device: _typing.Union[str, torch.device] = ...
    ) -> torch.device:
        if type(device) == torch.device or (
            type(device) == str and device.strip().lower() != "auto"
        ):
            return torch.device(device)
        elif torch.cuda.is_available() and torch.cuda.device_count() > 0:
            return torch.device("cuda")
        else:
            return torch.device("cpu")

    @property
    def device(self) -> torch.device:
        return self.__device

    @device.setter
    def device(self, __device: _typing.Union[str, torch.device, None]):
        self.__device: torch.device = self.__formulate_device(__device)

    @property
    def model(self) -> _typing.Optional[torch.nn.Module]:
        if self._model is None:
            base_approach_logger.debug(
                "property of model NOT initialized before accessing"
            )
        return self._model

    @model.setter
    def model(self, _model: torch.nn.Module) -> None:
        if not isinstance(_model, torch.nn.Module):
            raise TypeError(
                "the property of model MUST be an instance of " "torch.nn.Module"
            )
        self._model = _model

    def _initialize(self):
        raise NotImplementedError

    def initialize(self) -> bool:
        """
        Initialize the model in case that the model has NOT been initialized
        :return: whether self._initialize() method called
        """
        if not self.__is_initialized:
            self._initialize()
            self.__is_initialized = True
            return True
        return False

    # def to(self, *args, **kwargs):
    #     """
    #     Due to the signature of to() method in class BaseApproach
    #     is inconsistent with the signature of the method
    #     in the base class torch.nn.Module,
    #     this intermediate overridden method is necessary to
    #     walk around (bypass) the inspection for
    #     signature of overriding method.
    #     :param args: positional arguments list
    #     :param kwargs: keyword arguments dict
    #     :return: self
    #     """
    #     return super(_BaseBaseModel, self).to(*args, **kwargs)

    def forward(self, *args, **kwargs):
        if self.model is not None and isinstance(self.model, torch.nn.Module):
            return self.model(*args, **kwargs)
        else:
            raise NotImplementedError

    def __init__(
        self,
        model: _typing.Optional[torch.nn.Module] = None,
        initialize: bool = False,
        device: _typing.Union[str, torch.device] = ...,
    ):
        if type(initialize) != bool:
            raise TypeError
        super(_BaseBaseModel, self).__init__()
        self.__device: torch.device = self.__formulate_device(device)
        self._model: _typing.Optional[torch.nn.Module] = model
        self.__is_initialized: bool = False
        if initialize:
            self.initialize()


 class _BaseModel(_BaseBaseModel, BaseModel):
    """
    The upcoming root base class for Model, i.e. BaseModel
    --  Designed by ZiXin Sun
    """

    # todo: Deprecate and remove the legacy class "BaseModel",
    #       then rename this class to "BaseModel",
    #       correspondingly, this class will no longer extend
    #       the legacy class "BaseModel" after the removal.
    def _initialize(self):
        raise NotImplementedError

    def to(self, device: torch.device):
        self.device = device
        if self.model is not None and isinstance(self.model, torch.nn.Module):
            self.model.to(self.device)
        return super().to(device)

    @property
    def space(self) -> _typing.Sequence[_typing.Dict[str, _typing.Any]]:
        # todo: deprecate and remove in future major version
        return self.__hyper_parameter_space

    @property
    def hyper_parameter_space(self):
        return self.__hyper_parameter_space

    @hyper_parameter_space.setter
    def hyper_parameter_space(
        self, space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
    ):
        self.__hyper_parameter_space = space

    @property
    def hyper_parameter(self) -> _typing.Dict[str, _typing.Any]:
        return self.__hyper_parameter

    @hyper_parameter.setter
    def hyper_parameter(self, _hyper_parameter: _typing.Dict[str, _typing.Any]):
        if not isinstance(_hyper_parameter, dict):
            raise TypeError
        self.__hyper_parameter = _hyper_parameter

    def get_hyper_parameter(self) -> _typing.Dict[str, _typing.Any]:
        """
        todo: consider deprecating this trivial getter method in the future
        :return: copied hyper parameter
        """
        return copy.deepcopy(self.__hyper_parameter)

    def __init__(
        self,
        model: _typing.Optional[torch.nn.Module] = None,
        initialize: bool = False,
        hyper_parameter_space: _typing.Sequence[_typing.Any] = ...,
        hyper_parameter: _typing.Dict[str, _typing.Any] = ...,
        device: _typing.Union[str, torch.device] = ...,
    ):
        if type(initialize) != bool:
            raise TypeError
        super(_BaseModel, self).__init__(model, initialize, device)
        if hyper_parameter_space != Ellipsis and isinstance(
            hyper_parameter_space, _typing.Sequence
        ):
            self.__hyper_parameter_space: _typing.Sequence[
                _typing.Dict[str, _typing.Any]
            ] = hyper_parameter_space
        else:
            self.__hyper_parameter_space: _typing.Sequence[
                _typing.Dict[str, _typing.Any]
            ] = []
        if hyper_parameter != Ellipsis and isinstance(hyper_parameter, dict):
            self.__hyper_parameter: _typing.Dict[str, _typing.Any] = hyper_parameter
        else:
            self.__hyper_parameter: _typing.Dict[str, _typing.Any] = {}

    def from_hyper_parameter(self, hyper_parameter: _typing.Dict[str, _typing.Any]):
        raise NotImplementedError


 class ClassificationModel(_BaseModel):
    def _initialize(self):
        raise NotImplementedError

    def from_hyper_parameter(
        self, hyper_parameter: _typing.Dict[str, _typing.Any]
    ) -> "ClassificationModel":
        new_model: ClassificationModel = self.__class__(
            num_features=self.num_features,
            num_classes=self.num_classes,
            device=self.device,
            init=False,
        )
        _hyper_parameter = self.hyper_parameter
        _hyper_parameter.update(hyper_parameter)
        new_model.hyper_parameter = _hyper_parameter
        new_model.initialize()
        return new_model

    def __init__(
        self,
        num_features: int = ...,
        num_classes: int = ...,
        num_graph_features: int = ...,
        device: _typing.Union[str, torch.device] = ...,
        init: bool = False,
        **kwargs
    ):
        if "initialize" in kwargs:
            del kwargs["initialize"]
        super(ClassificationModel, self).__init__(
            initialize=init, device=device, **kwargs
        )
        if num_classes != Ellipsis and type(num_classes) == int:
            self.__num_classes: int = num_classes if num_classes > 0 else 0
        else:
            self.__num_classes: int = 0
        if num_features != Ellipsis and type(num_features) == int:
            self.__num_features: int = num_features if num_features > 0 else 0
        else:
            self.__num_features: int = 0
        if num_graph_features != Ellipsis and type(num_graph_features) == int:
            if num_graph_features > 0:
                self.__num_graph_features: int = num_graph_features
            else:
                self.__num_graph_features: int = 0
        else:
            self.__num_graph_features: int = 0

    @property
    def num_classes(self) -> int:
        return self.__num_classes

    @num_classes.setter
    def num_classes(self, __num_classes: int):
        if type(__num_classes) != int:
            raise TypeError
        if not __num_classes > 0:
            raise ValueError
        self.__num_classes = __num_classes if __num_classes > 0 else 0

    @property
    def num_features(self) -> int:
        return self.__num_features

    @num_features.setter
    def num_features(self, __num_features: int):
        if type(__num_features) != int:
            raise TypeError
        if not __num_features > 0:
            raise ValueError
        self.__num_features = __num_features if __num_features > 0 else 0

    def get_num_classes(self) -> int:
        # todo: consider replacing with property with getter and setter
        return self.__num_classes

    def set_num_classes(self, num_classes: int) -> None:
        # todo: consider replacing with property with getter and setter
        if type(num_classes) != int:
            raise TypeError
        self.__num_classes = num_classes if num_classes > 0 else 0

    def get_num_features(self) -> int:
        # todo: consider replacing with property with getter and setter
        return self.__num_features

    def set_num_features(self, num_features: int):
        # todo: consider replacing with property with getter and setter
        if type(num_features) != int:
            raise TypeError
        self.__num_features = num_features if num_features > 0 else 0

    def set_num_graph_features(self, num_graph_features: int):
        # todo: consider replacing with property with getter and setter
        if type(num_graph_features) != int:
            raise TypeError
        else:
            if num_graph_features > 0:
                self.__num_graph_features = num_graph_features
            else:
                self.__num_graph_features = 0
--- a/autogl/module/model/gat.py
+++ b/autogl/module/model/gat.py
@@ -95,6 +95,24 @@ class GAT(torch.nn.Module):

        return F.log_softmax(x, dim=1)

    def encode(self, data):
        x = data.x
        for i in range(self.num_layer - 1):
            x = self.convs[i](x, data.train_pos_edge_index)
            if i != self.num_layer - 2:
                x = activate_func(x, self.args["act"])
                # x = F.dropout(x, p=self.args["dropout"], training=self.training)
        return x

    def decode(self, z, pos_edge_index, neg_edge_index):
        edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
        logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
        return logits

    def decode_all(self, z):
        prob_adj = z @ z.t()
        return (prob_adj > 0).nonzero(as_tuple=False).t()


@register_model("gat")
 class AutoGAT(BaseModel):
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -1,71 +1,110 @@
 import torch
 import torch.nn.functional as F
 import torch.nn.functional
 import torch_geometric
 from torch_geometric.nn import GCNConv
 import typing as _typing
 from . import register_model
 from .base import BaseModel, activate_func
 from .base import BaseModel, activate_func, ClassificationModel
 from ...utils import get_logger

 LOGGER = get_logger("GCNModel")


 def set_default(args, d):
    for k, v in d.items():
        if k not in args:
            args[k] = v
    return args


 class GCN(torch.nn.Module):
    def __init__(self, args):
        super(GCN, self).__init__()
        self.args = args
        self.num_layer = int(self.args["num_layers"])

        missing_keys = list(
            set(["features_num", "num_class", "num_layers", "hidden", "dropout", "act"])
            - set(self.args.keys())
        )
        if len(missing_keys) > 0:
            raise Exception("Missing keys: %s." % ",".join(missing_keys))

        if not self.num_layer == len(self.args["hidden"]) + 1:
            LOGGER.warn("Warning: layer size does not match the length of hidden units")

        self.convs = torch.nn.ModuleList()
        self.convs.append(GCNConv(self.args["features_num"], self.args["hidden"][0]))
        for i in range(self.num_layer - 2):
            self.convs.append(
                GCNConv(self.args["hidden"][i], self.args["hidden"][i + 1])
    def __init__(
        self,
        num_features: int,
        num_classes: int,
        hidden_features: _typing.Sequence[int],
        dropout: float,
        activation_name: str,
    ):
        super().__init__()
        self.__convolution_layers: torch.nn.ModuleList = torch.nn.ModuleList()
        num_layers: int = len(hidden_features) + 1
        if num_layers == 1:
            self.__convolution_layers.append(GCNConv(num_features, num_classes))
        else:
            self.__convolution_layers.append(GCNConv(num_features, hidden_features[0]))
            for i in range(len(hidden_features)):
                self.__convolution_layers.append(
                    GCNConv(hidden_features[i], hidden_features[i + 1])
                    if i + 1 < len(hidden_features)
                    else GCNConv(hidden_features[i], num_classes)
                )
        self.__dropout: float = dropout
        self.__activation_name: str = activation_name

    def __layer_wise_forward(self, data):
        # todo: Implement this forward method
        #         in case that data.edge_indexes property is provided
        #         for Layer-wise and Node-wise sampled training
        raise NotImplementedError

    def __basic_forward(
        self,
        x: torch.Tensor,
        edge_index: torch.Tensor,
        edge_weight: _typing.Optional[torch.Tensor] = None,
    ) -> torch.Tensor:
        for layer_index in range(len(self.__convolution_layers)):
            x: torch.Tensor = self.__convolution_layers[layer_index](
                x, edge_index, edge_weight
            )
        self.convs.append(
            GCNConv(self.args["hidden"][self.num_layer - 2], self.args["num_class"])
        )

    def forward(self, data):
        try:
            x = data.x
        except:
            print("no x")
            pass
        try:
            edge_index = data.edge_index
        except:
            print("no index")
            pass
        try:
            edge_weight = data.edge_weight
        except:
            edge_weight = None
            pass

        for i in range(self.num_layer):
            x = self.convs[i](x, edge_index, edge_weight)
            if i != self.num_layer - 1:
                x = activate_func(x, self.args["act"])
                x = F.dropout(x, p=self.args["dropout"], training=self.training)
        return F.log_softmax(x, dim=1)


            if layer_index + 1 < len(self.__convolution_layers):
                x = activate_func(x, self.__activation_name)
                x = torch.nn.functional.dropout(
                    x, p=self.__dropout, training=self.training
                )
        return torch.nn.functional.log_softmax(x, dim=1)

    def forward(self, data) -> torch.Tensor:
        if hasattr(data, "edge_indexes") and getattr(data, "edge_indexes") is not None:
            return self.__layer_wise_forward(data)
        else:
            if not (hasattr(data, "x") and hasattr(data, "edge_index")):
                raise AttributeError
            if not (
                type(getattr(data, "x")) == torch.Tensor
                and type(getattr(data, "edge_index")) == torch.Tensor
            ):
                raise TypeError
            x: torch.Tensor = getattr(data, "x")
            edge_index: torch.LongTensor = getattr(data, "edge_index")
            if (
                hasattr(data, "edge_weight")
                and type(getattr(data, "edge_weight")) == torch.Tensor
                and getattr(data, "edge_weight").size() == (edge_index.size(1),)
            ):
                edge_weight: _typing.Optional[torch.Tensor] = getattr(
                    data, "edge_weight"
                )
            else:
                edge_weight: _typing.Optional[torch.Tensor] = None
            return self.__basic_forward(x, edge_index, edge_weight)

    def encode(self, data):
        x = data.x
        num_layers = len(self.__convolution_layers)
        for i in range(num_layers - 1):
            x = self.__convolution_layers[i](x, data.train_pos_edge_index)
            if i != num_layers - 2:
                x = activate_func(x, self.__activation_name)
                # x = F.dropout(x, p=self.args["dropout"], training=self.training)
        return x

    def decode(self, z, pos_edge_index, neg_edge_index):
        edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
        logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
        return logits

    def decode_all(self, z):
        prob_adj = z @ z.t()
        return (prob_adj > 0).nonzero(as_tuple=False).t()


 # @register_model("gcn")
 # class AutoGCN(ClassificationModel):
@register_model("gcn")
 class AutoGCN(BaseModel):
    r"""
@@ -99,15 +138,17 @@ class AutoGCN(BaseModel):
    """

    def __init__(
        self, num_features=None, num_classes=None, device=None, init=False, **args
    ):

        super(AutoGCN, self).__init__()

        self.num_features = num_features if num_features is not None else 0
        self.num_classes = int(num_classes) if num_classes is not None else 0
        self.device = device if device is not None else "cpu"
        self.init = True
        self,
        num_features: int = ...,
        num_classes: int = ...,
        device: _typing.Union[str, torch.device] = ...,
        init: bool = False,
        **kwargs
    ) -> None:
        super().__init__()
        self.num_features = num_features
        self.num_classes = num_classes
        self.device = device

        self.params = {
            "features_num": self.num_features,
@@ -145,11 +186,18 @@ class AutoGCN(BaseModel):
        ]

        # initial point of hp search
        # self.hyperparams = {
        #     "num_layers": 2,
        #     "hidden": [16],
        #     "dropout": 0.2,
        #     "act": "leaky_relu",
        # }

        self.hyperparams = {
            "num_layers": 2,
            "hidden": [16],
            "dropout": 0.2,
            "act": "leaky_relu",
            "num_layers": 3,
            "hidden": [128, 64],
            "dropout": 0,
            "act": "relu",
        }

        self.initialized = False
@@ -157,8 +205,10 @@ class AutoGCN(BaseModel):
            self.initialize()

    def initialize(self):
        # """Initialize model."""
        if self.initialized:
            return
        self.initialized = True
        self.model = GCN({**self.params, **self.hyperparams}).to(self.device)
        self.model = GCN(
            self.num_features,
            self.num_classes,
            self.hyperparams.get("hidden"),
            self.hyperparams.get("dropout"),
            self.hyperparams.get("act"),
        ).to(self.device)
--- a/autogl/module/model/graph_sage.py
+++ b/autogl/module/model/graph_sage.py
@@ -82,7 +82,7 @@ class GraphSAGE(torch.nn.Module):
            return self.__full_forward(data)


@register_model("sage")
 # @register_model("sage")
 class AutoSAGE(BaseModel):
    def __init__(
        self,
@@ -132,5 +132,5 @@ class AutoSAGE(BaseModel):
                hidden_features=self.hyperparams["hidden"],
                activation_name=self.hyperparams["act"],
                **self.hyperparams
            )
            ).to(self.__device)
            self._initialized = True
--- a/autogl/module/model/graphsage.py
+++ b/autogl/module/model/graphsage.py
@@ -171,8 +171,26 @@ class GraphSAGE(torch.nn.Module):

        return F.log_softmax(x, dim=1)

    def encode(self, data):
        x = data.x
        for i in range(self.num_layer - 1):
            x = self.convs[i](x, data.train_pos_edge_index)
            if i != self.num_layer - 2:
                x = activate_func(x, self.args["act"])
                # x = F.dropout(x, p=self.args["dropout"], training=self.training)
        return x

    def decode(self, z, pos_edge_index, neg_edge_index):
        edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
        logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
        return logits

    def decode_all(self, z):
        prob_adj = z @ z.t()
        return (prob_adj > 0).nonzero(as_tuple=False).t()


 # @register_model("sage")
@register_model("sage")
 class AutoSAGE(BaseModel):
    r"""
    AutoSAGE. The model used in this automodel is GraphSAGE, i.e., the GraphSAGE from the `"Inductive Representation Learning on
--- a/autogl/module/nas/algorithm/rl.py
+++ b/autogl/module/nas/algorithm/rl.py
@@ -13,6 +13,7 @@ from nni.nas.pytorch.fixed import apply_fixed_architecture
 from tqdm import tqdm
 from datetime import datetime
 import numpy as np

 _logger = logging.getLogger(__name__)
 def _get_mask(sampled, total):
    multihot = [i == sampled or (isinstance(sampled, list) and i in sampled) for i in range(total)]
@@ -295,7 +296,6 @@ class RL(BaseNAS):
        self.n_warmup=n_warmup
        self.model_lr = model_lr
        self.model_wd = model_wd
        self.log=open('../tmp/log.txt','w')
    def search(self, space: BaseSpace, dset, estimator):
        self.model = space
        self.dataset = dset#.to(self.device)
@@ -337,8 +337,7 @@ class RL(BaseNAS):
                self._resample()
                metric,loss=self._infer(mask='val')
                bar.set_postfix(acc=metric,loss=loss.item())
                self.log.write(f'{self.arch}\n{self.selection}\n{metric},{loss}\n')
                self.log.flush()
                _logger.debug(f'{self.arch}\n{self.selection}\n{metric},{loss}')
                reward =metric 
                rewards.append(reward)
                if self.entropy_weight:
@@ -440,8 +439,6 @@ class GraphNasRL(BaseNAS):
        self.n_warmup=n_warmup
        self.model_lr = model_lr
        self.model_wd = model_wd
        timestamp=datetime.now().strftime('%m%d-%H-%M-%S')
        self.log=open(f'../tmp/log-{timestamp}.txt','w')
        self.hist=[]
        self.topk=topk
    def search(self, space: BaseSpace, dset, estimator):
@@ -507,8 +504,7 @@ class GraphNasRL(BaseNAS):
                metric,loss=self._infer(mask='val')

                # bar.set_postfix(acc=metric,loss=loss.item())
                self.log.write(f'{self.arch}\n{self.selection}\n{metric},{loss}\n')
                self.log.flush()
                _logger.debug(f'{self.arch}\n{self.selection}\n{metric},{loss}')
                # diff: not do reward shaping as in graphnas code
                reward =metric
                self.hist.append([-metric,self.selection])
--- a/autogl/module/train/init.py
+++ b/autogl/module/train/init.py
@@ -4,8 +4,8 @@ from .base import (
    Evaluation,
    BaseNodeClassificationTrainer,
    BaseGraphClassificationTrainer,
    BaseLinkPredictionTrainer,
 )
 from .evaluation import get_feval


 def register_trainer(name):
@@ -24,5 +24,22 @@ def register_trainer(name):

 from .graph_classification_full import GraphClassificationFullTrainer
 from .node_classification_full import NodeClassificationFullTrainer
 from .link_prediction import LinkPredictionTrainer
 from .node_classification_trainer import *
 from .evaluation import get_feval, Acc, Auc, Logloss
 from .evaluation import get_feval, Acc, Auc, Logloss, Mrr

 __all__ = [
    "BaseTrainer",
    "Evaluation",
    "BaseGraphClassificationTrainer",
    "BaseNodeClassificationTrainer",
    "BaseLinkPredictionTrainer",
    "GraphClassificationFullTrainer",
    "NodeClassificationFullTrainer",
    "LinkPredictionTrainer",
    "Acc",
    "Auc",
    "Logloss",
    "Mrr",
    "get_feval",
 ]
--- a/autogl/module/train/base.py
+++ b/autogl/module/train/base.py
@@ -216,7 +216,7 @@ class BaseTrainer:
        pass

    def duplicate_from_hyper_parameter(
        self, hp, model: _typing.Union[BaseModel, str, None] = None
        self, hp, model: _typing.Optional[BaseModel] = ...
    ) -> "BaseTrainer":
        """Create a new trainer with the given hyper parameter."""
        raise NotImplementedError()
@@ -404,3 +404,20 @@ class BaseGraphClassificationTrainer(_BaseClassificationTrainer):
        super(BaseGraphClassificationTrainer, self).__init__(
            model, num_features, num_classes, device, init, feval, loss
        )


 class BaseLinkPredictionTrainer(_BaseClassificationTrainer):
    def __init__(
        self,
        model: _typing.Union[BaseModel, str],
        num_features: int,
        device: _typing.Union[torch.device, str, None] = None,
        init: bool = True,
        feval: _typing.Union[
            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
        ] = (Acc,),
        loss: str = "nll_loss",
    ):
        super(BaseLinkPredictionTrainer, self).__init__(
            model, num_features, 2, device, init, feval, loss
        )
--- a/autogl/module/train/evaluation.py
+++ b/autogl/module/train/evaluation.py
@@ -117,7 +117,13 @@ class Auc(Evaluation):
        """
        Should return: the evaluation result (float)
        """
        pos_predict = predict[:, 1]
        if len(predict.shape) == 1:
            pos_predict = predict
        else:
            assert (
                predict.shape[1] == 2
            ), "Cannot use auc on given data with %d classes!" % (predict.shape[1])
            pos_predict = predict[:, 1]
        return roc_auc_score(label, pos_predict)


@@ -139,7 +145,11 @@ class Acc(Evaluation):
        """
        Should return: the evaluation result (float)
        """
        return accuracy_score(label, np.argmax(predict, axis=1))
        if len(predict.shape) == 2:
            predict = np.argmax(predict, axis=1)
        else:
            predict = [1 if p > 0.5 else 0 for p in predict]
        return accuracy_score(label, predict)


@register_evaluate("mrr")
@@ -160,5 +170,11 @@ class Mrr(Evaluation):
        """
        Should return: the evaluation result (float)
        """
        pos_predict = predict[:, 1]
        if len(predict.shape) == 2:
            assert (
                predict.shape[1] == 2
            ), "Cannot use mrr on given data with %d classes!" % (predict.shape[1])
            pos_predict = predict[:, 1]
        else:
            pos_predict = predict
        return label_ranking_average_precision_score(label, pos_predict)
--- a/autogl/module/train/graph_classification_full.py
+++ b/autogl/module/train/graph_classification_full.py
@@ -98,7 +98,7 @@ class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
        self.lr = lr if lr is not None else 1e-4
        self.max_epoch = max_epoch if max_epoch is not None else 100
        self.batch_size = batch_size if batch_size is not None else 64
        self.num_workers = num_workers if num_workers is not None else 4
        self.num_workers = num_workers if num_workers is not None else 0
        if self.num_workers > 0:
            mp.set_start_method("fork", force=True)
        self.early_stopping_round = (
@@ -211,7 +211,7 @@ class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):

        """
        optimizer = self.optimizer(
            self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
            self.model.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
        )

        # scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
--- a/autogl/module/train/link_prediction.py
+++ b/autogl/module/train/link_prediction.py
@@ -0,0 +1,518 @@
 from . import register_trainer, Evaluation
 import torch
 from torch.optim.lr_scheduler import StepLR
 import torch.nn.functional as F
 from ..model import MODEL_DICT, BaseModel
 from .evaluation import Auc, EVALUATE_DICT
 from .base import EarlyStopping, BaseLinkPredictionTrainer
 from typing import Union
 from copy import deepcopy
 from torch_geometric.utils import negative_sampling

 from ...utils import get_logger

 LOGGER = get_logger("link prediction trainer")


 def get_feval(feval):
    if isinstance(feval, str):
        return EVALUATE_DICT[feval]
    if isinstance(feval, type) and issubclass(feval, Evaluation):
        return feval
    if isinstance(feval, list):
        return [get_feval(f) for f in feval]
    raise ValueError("feval argument of type", type(feval), "is not supported!")


@register_trainer("LinkPredictionFull")
 class LinkPredictionTrainer(BaseLinkPredictionTrainer):
    """
    The link prediction trainer.

    Used to automatically train the link prediction problem.

    Parameters
    ----------
    model: ``BaseModel`` or ``str``
        The (name of) model used to train and predict.

    optimizer: ``Optimizer`` of ``str``
        The (name of) optimizer used to train and predict.

    lr: ``float``
        The learning rate of link prediction task.

    max_epoch: ``int``
        The max number of epochs in training.

    early_stopping_round: ``int``
        The round of early stop.

    device: ``torch.device`` or ``str``
        The device where model will be running on.

    init: ``bool``
        If True(False), the model will (not) be initialized.
    """

    space = None

    def __init__(
        self,
        model: Union[BaseModel, str] = None,
        num_features=None,
        optimizer=None,
        lr=1e-4,
        max_epoch=100,
        early_stopping_round=101,
        weight_decay=1e-4,
        device="auto",
        init=True,
        feval=[Auc],
        loss="binary_cross_entropy_with_logits",
        *args,
        **kwargs,
    ):
        super().__init__(model, num_features, device, init, feval, loss)

        if type(optimizer) == str and optimizer.lower() == "adam":
            self.optimizer = torch.optim.Adam
        elif type(optimizer) == str and optimizer.lower() == "sgd":
            self.optimizer = torch.optim.SGD
        else:
            self.optimizer = torch.optim.Adam

        self.lr = lr
        self.max_epoch = max_epoch
        self.early_stopping_round = early_stopping_round
        self.device = device
        self.args = args
        self.kwargs = kwargs
        self.weight_decay = weight_decay

        self.early_stopping = EarlyStopping(
            patience=early_stopping_round, verbose=False
        )

        self.valid_result = None
        self.valid_result_prob = None
        self.valid_score = None

        self.initialized = False
        self.device = device

        self.space = [
            {
                "parameterName": "max_epoch",
                "type": "INTEGER",
                "maxValue": 500,
                "minValue": 10,
                "scalingType": "LINEAR",
            },
            {
                "parameterName": "early_stopping_round",
                "type": "INTEGER",
                "maxValue": 30,
                "minValue": 10,
                "scalingType": "LINEAR",
            },
            {
                "parameterName": "lr",
                "type": "DOUBLE",
                "maxValue": 1e-1,
                "minValue": 1e-4,
                "scalingType": "LOG",
            },
            {
                "parameterName": "weight_decay",
                "type": "DOUBLE",
                "maxValue": 1e-2,
                "minValue": 1e-4,
                "scalingType": "LOG",
            },
        ]

        LinkPredictionTrainer.space = self.space

        self.hyperparams = {
            "max_epoch": self.max_epoch,
            "early_stopping_round": self.early_stopping_round,
            "lr": self.lr,
            "weight_decay": self.weight_decay,
        }

        if init is True:
            self.initialize()

    def initialize(self):
        #  Initialize the auto model in trainer.
        if self.initialized is True:
            return
        self.initialized = True
        self.model.set_num_classes(self.num_classes)
        self.model.set_num_features(self.num_features)
        self.model.initialize()

    def get_model(self):
        # Get auto model used in trainer.
        return self.model

    @classmethod
    def get_task_name(cls):
        # Get task name, i.e., `LinkPrediction`.
        return "LinkPrediction"

    def train_only(self, data, train_mask=None):
        """
        The function of training on the given dataset and mask.

        Parameters
        ----------
        data: The link prediction dataset used to be trained. It should consist of masks, including train_mask, and etc.
        train_mask: The mask used in training stage.

        Returns
        -------
        self: ``autogl.train.LinkPredictionTrainer``
            A reference of current trainer.

        """

        # data.train_mask = data.val_mask = data.test_mask = data.y = None
        # data = train_test_split_edges(data)
        data = data.to(self.device)
        # mask = data.train_mask if train_mask is None else train_mask
        optimizer = self.optimizer(
            self.model.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
        )
        scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
        for epoch in range(1, self.max_epoch):
            self.model.model.train()

            neg_edge_index = negative_sampling(
                edge_index=data.train_pos_edge_index,
                num_nodes=data.num_nodes,
                num_neg_samples=data.train_pos_edge_index.size(1),
            )

            optimizer.zero_grad()
            # res = self.model.model.forward(data)
            z = self.model.model.encode(data)
            link_logits = self.model.model.decode(
                z, data.train_pos_edge_index, neg_edge_index
            )
            link_labels = self.get_link_labels(
                data.train_pos_edge_index, neg_edge_index
            )
            # loss = F.binary_cross_entropy_with_logits(link_logits, link_labels)
            if hasattr(F, self.loss):
                loss = getattr(F, self.loss)(link_logits, link_labels)
            else:
                raise TypeError(
                    "PyTorch does not support loss type {}".format(self.loss)
                )

            loss.backward()
            optimizer.step()
            scheduler.step()

            if type(self.feval) is list:
                feval = self.feval[0]
            else:
                feval = self.feval
            val_loss = self.evaluate([data], mask="val", feval=feval)
            if feval.is_higher_better() is True:
                val_loss = -val_loss
            self.early_stopping(val_loss, self.model.model)
            if self.early_stopping.early_stop:
                LOGGER.debug("Early stopping at %d", epoch)
                break
        self.early_stopping.load_checkpoint(self.model.model)

    def predict_only(self, data, test_mask=None):
        """
        The function of predicting on the given dataset and mask.

        Parameters
        ----------
        data: The link prediction dataset used to be predicted.
        train_mask: The mask used in training stage.

        Returns
        -------
        res: The result of predicting on the given dataset.

        """
        data = data.to(self.device)
        self.model.model.eval()
        with torch.no_grad():
            z = self.model.model.encode(data)
        return z

    def train(self, dataset, keep_valid_result=True):
        """
        The function of training on the given dataset and keeping valid result.

        Parameters
        ----------
        dataset: The link prediction dataset used to be trained.

        keep_valid_result: ``bool``
            If True(False), save the validation result after training.

        Returns
        -------
        self: ``autogl.train.LinkPredictionTrainer``
            A reference of current trainer.

        """
        data = dataset[0]
        self.train_only(data)
        if keep_valid_result:
            self.valid_result = self.predict_only(data)
            self.valid_result_prob = self.predict_proba(dataset, "val")
            self.valid_score = self.evaluate(dataset, mask="val", feval=self.feval)

    def predict(self, dataset, mask=None):
        """
        The function of predicting on the given dataset.

        Parameters
        ----------
        dataset: The link prediction dataset used to be predicted.

        mask: ``train``, ``val``, or ``test``.
            The dataset mask.

        Returns
        -------
        The prediction result of ``predict_proba``.
        """
        return self.predict_proba(dataset, mask=mask, in_log_format=False)

    def predict_proba(self, dataset, mask=None, in_log_format=False):
        """
        The function of predicting the probability on the given dataset.

        Parameters
        ----------
        dataset: The link prediction dataset used to be predicted.

        mask: ``train``, ``val``, or ``test``.
            The dataset mask.

        in_log_format: ``bool``.
            If True(False), the probability will (not) be log format.

        Returns
        -------
        The prediction result.
        """
        data = dataset[0]
        data = data.to(self.device)
        if mask in ["train", "val", "test"]:
            pos_edge_index = data[f"{mask}_pos_edge_index"]
            neg_edge_index = data[f"{mask}_neg_edge_index"]
        else:
            pos_edge_index = data[f"test_pos_edge_index"]
            neg_edge_index = data[f"test_neg_edge_index"]

        self.model.model.eval()
        with torch.no_grad():
            z = self.predict_only(data)
            link_logits = self.model.model.decode(z, pos_edge_index, neg_edge_index)
            link_probs = link_logits.sigmoid()

        return link_probs

    def get_valid_predict(self):
        # """Get the valid result."""
        return self.valid_result

    def get_valid_predict_proba(self):
        # """Get the valid result (prediction probability)."""
        return self.valid_result_prob

    def get_valid_score(self, return_major=True):
        """
        The function of getting the valid score.

        Parameters
        ----------
        return_major: ``bool``.
            If True, the return only consists of the major result.
            If False, the return consists of the all results.

        Returns
        -------
        result: The valid score in training stage.
        """
        if isinstance(self.feval, list):
            if return_major:
                return self.valid_score[0], self.feval[0].is_higher_better()
            else:
                return self.valid_score, [f.is_higher_better() for f in self.feval]
        else:
            return self.valid_score, self.feval.is_higher_better()

    def get_name_with_hp(self):
        # """Get the name of hyperparameter."""
        name = "-".join(
            [
                str(self.optimizer),
                str(self.lr),
                str(self.max_epoch),
                str(self.early_stopping_round),
                str(self.model),
                str(self.device),
            ]
        )
        name = (
            name
            + "|"
            + "-".join(
                [
                    str(x[0]) + "-" + str(x[1])
                    for x in self.model.get_hyper_parameter().items()
                ]
            )
        )
        return name

    def evaluate(self, dataset, mask=None, feval=None):
        """
        The function of training on the given dataset and keeping valid result.

        Parameters
        ----------
        dataset: The link prediction dataset used to be evaluated.

        mask: ``train``, ``val``, or ``test``.
            The dataset mask.

        feval: ``str``.
            The evaluation method used in this function.

        Returns
        -------
        res: The evaluation result on the given dataset.

        """
        data = dataset[0]
        data = data.to(self.device)
        test_mask = mask
        if feval is None:
            feval = self.feval
        else:
            feval = get_feval(feval)

        if mask in ["train", "val", "test"]:
            pos_edge_index = data[f"{mask}_pos_edge_index"]
            neg_edge_index = data[f"{mask}_neg_edge_index"]
        else:
            pos_edge_index = data[f"test_pos_edge_index"]
            neg_edge_index = data[f"test_neg_edge_index"]

        self.model.model.eval()
        with torch.no_grad():
            link_probs = self.predict_proba(dataset, mask)
            link_labels = self.get_link_labels(pos_edge_index, neg_edge_index)

        if not isinstance(feval, list):
            feval = [feval]
            return_signle = True
        else:
            return_signle = False

        res = []
        for f in feval:
            res.append(f.evaluate(link_probs.cpu().numpy(), link_labels.cpu().numpy()))
        if return_signle:
            return res[0]
        return res

    def to(self, new_device):
        assert isinstance(new_device, torch.device)
        self.device = new_device
        if self.model is not None:
            self.model.to(self.device)

    def duplicate_from_hyper_parameter(self, hp: dict, model=None, restricted=True):
        """
        The function of duplicating a new instance from the given hyperparameter.

        Parameters
        ----------
        hp: ``dict``.
            The hyperparameter used in the new instance.

        model: The model used in the new instance of trainer.

        restricted: ``bool``.
            If False(True), the hyperparameter should (not) be updated from origin hyperparameter.

        Returns
        -------
        self: ``autogl.train.LinkPredictionTrainer``
            A new instance of trainer.

        """
        if not restricted:
            origin_hp = deepcopy(self.hyperparams)
            origin_hp.update(hp)
            hp = origin_hp
        if model is None:
            model = self.model
        model.set_num_classes(self.num_classes)
        model.set_num_features(self.num_features)
        model = model.from_hyper_parameter(
            dict(
                [
                    x
                    for x in hp.items()
                    if x[0] in [y["parameterName"] for y in model.space]
                ]
            )
        )

        ret = self.__class__(
            model=model,
            num_features=self.num_features,
            optimizer=self.optimizer,
            lr=hp["lr"],
            max_epoch=hp["max_epoch"],
            early_stopping_round=hp["early_stopping_round"],
            device=self.device,
            weight_decay=hp["weight_decay"],
            feval=self.feval,
            init=True,
            *self.args,
            **self.kwargs,
        )

        return ret

    def set_feval(self, feval):
        # """Set the evaluation metrics."""
        self.feval = get_feval(feval)

    @property
    def hyper_parameter_space(self):
        # """Get the space of hyperparameter."""
        return self.space

    @hyper_parameter_space.setter
    def hyper_parameter_space(self, space):
        # """Set the space of hyperparameter."""
        self.space = space
        LinkPredictionTrainer.space = space

    def get_hyper_parameter(self):
        # """Get the hyperparameter in this trainer."""
        return self.hyperparams

    def get_link_labels(self, pos_edge_index, neg_edge_index):
        E = pos_edge_index.size(1) + neg_edge_index.size(1)
        link_labels = torch.zeros(E, dtype=torch.float, device=self.device)
        link_labels[: pos_edge_index.size(1)] = 1.0
        return link_labels
--- a/autogl/module/train/node_classification_full.py
+++ b/autogl/module/train/node_classification_full.py
@@ -189,7 +189,7 @@ class NodeClassificationFullTrainer(BaseNodeClassificationTrainer):
        data = data.to(self.device)
        mask = data.train_mask if train_mask is None else train_mask
        optimizer = self.optimizer(
            self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
            self.model.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
        )
        # scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
        lr_scheduler_type = self.lr_scheduler_type
--- a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
+++ b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
@@ -1,12 +1,14 @@
 import torch
 import logging
 import typing as _typing
 from torch.nn import functional as F
 import torch.nn.functional
 import torch.utils.data

 from .. import register_trainer
 from ..base import BaseNodeClassificationTrainer, EarlyStopping, Evaluation
 from ..evaluation import get_feval, Logloss
 from ..sampling.sampler.neighbor_sampler import NeighborSampler
 from ..sampling.sampler.graphsaint_sampler import *
 from ...model import BaseModel

 LOGGER: logging.Logger = logging.getLogger("Node classification sampling trainer")
@@ -37,7 +39,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
        ] = (Logloss,),
        loss: str = "nll_loss",
        lr_scheduler_type: _typing.Optional[str] = None,
        **kwargs
        **kwargs,
    ) -> None:
        if isinstance(optimizer, type) and issubclass(optimizer, torch.optim.Optimizer):
            self._optimizer_class: _typing.Type[torch.optim.Optimizer] = optimizer
@@ -82,45 +84,11 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):

        self._valid_result: torch.Tensor = torch.zeros(0)
        self._valid_result_prob: torch.Tensor = torch.zeros(0)
        self._valid_score = None

        self._hyper_parameter_space: _typing.List[_typing.Dict[str, _typing.Any]] = [
            {
                "parameterName": "max_epoch",
                "type": "INTEGER",
                "maxValue": 500,
                "minValue": 10,
                "scalingType": "LINEAR",
            },
            {
                "parameterName": "early_stopping_round",
                "type": "INTEGER",
                "maxValue": 30,
                "minValue": 10,
                "scalingType": "LINEAR",
            },
            {
                "parameterName": "lr",
                "type": "DOUBLE",
                "maxValue": 1e-1,
                "minValue": 1e-4,
                "scalingType": "LOG",
            },
            {
                "parameterName": "weight_decay",
                "type": "DOUBLE",
                "maxValue": 1e-2,
                "minValue": 1e-4,
                "scalingType": "LOG",
            },
        ]

        self._hyper_parameter: _typing.Dict[str, _typing.Any] = {
            "max_epoch": self._max_epoch,
            "early_stopping_round": self._early_stopping.patience,
            "lr": self._learning_rate,
            "weight_decay": self._weight_decay,
        }
        self._valid_score: _typing.Sequence[float] = []

        self._hyper_parameter_space: _typing.Sequence[
            _typing.Dict[str, _typing.Any]
        ] = []

        self.__initialized: bool = False
        if init:
@@ -129,12 +97,12 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
    def initialize(self) -> "NodeClassificationNeighborSamplingTrainer":
        if self.__initialized:
            return self
        self._model.initialize()
        self.model.initialize()
        self.__initialized = True
        return self

    def get_model(self) -> BaseModel:
        return self._model
        return self.model

    def __train_only(self, data) -> "NodeClassificationNeighborSamplingTrainer":
        """
@@ -144,7 +112,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
        """
        data = data.to(self.device)
        optimizer: torch.optim.Optimizer = self._optimizer_class(
            self._model.parameters(),
            self.model.model.parameters(),
            lr=self._learning_rate,
            weight_decay=self._weight_decay,
        )
@@ -181,17 +149,17 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
        )

        for current_epoch in range(self._max_epoch):
            self._model.model.train()
            self.model.model.train()
            """ epoch start """
            for target_node_indexes, edge_indexes in train_sampler:
                optimizer.zero_grad()
                data.edge_indexes = edge_indexes
                prediction = self._model.model(data)
                if not hasattr(F, self.loss):
                prediction = self.model.model(data)
                if not hasattr(torch.nn.functional, self.loss):
                    raise TypeError(
                        "PyTorch does not support loss type {}".format(self.loss)
                    )
                loss_function = getattr(F, self.loss)
                loss_function = getattr(torch.nn.functional, self.loss)
                loss: torch.Tensor = loss_function(
                    prediction[target_node_indexes], data.y[target_node_indexes]
                )
@@ -211,12 +179,12 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
                    validation_loss: float = -validation_results[0]
                else:
                    validation_loss: float = validation_results[0]
                self._early_stopping(validation_loss, self._model.model)
                self._early_stopping(validation_loss, self.model.model)
                if self._early_stopping.early_stop:
                    LOGGER.debug("Early stopping at %d", current_epoch)
                    break
        if hasattr(data, "val_mask") and data.val_mask is not None:
            self._early_stopping.load_checkpoint(self._model.model)
            self._early_stopping.load_checkpoint(self.model.model)
        return self

    def __predict_only(self, data):
@@ -226,9 +194,9 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
        :return: the result of prediction on the given dataset
        """
        data = data.to(self.device)
        self._model.model.eval()
        self.model.model.eval()
        with torch.no_grad():
            prediction = self._model.model(data)
            prediction = self.model.model(data)
        return prediction

    def train(self, dataset, keep_valid_result: bool = True):
@@ -286,7 +254,6 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
            return (self._valid_score, [f.is_higher_better() for f in self.feval])

    def get_name_with_hp(self) -> str:
        # """Get the name of hyperparameter."""
        name = "-".join(
            [
                str(self._optimizer_class),
@@ -349,8 +316,8 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):

    def to(self, device: torch.device):
        self.device = device
        if self._model is not None:
            self._model.to(self.device)
        if self.model is not None:
            self.model.to(self.device)

    def duplicate_from_hyper_parameter(
        self,
@@ -359,7 +326,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
    ) -> "NodeClassificationNeighborSamplingTrainer":

        if model is None or not isinstance(model, BaseModel):
            model = self._model
            model = self.model
        model = model.from_hyper_parameter(
            dict(
                [
@@ -380,7 +347,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
            feval=self.feval,
            loss=self.loss,
            lr_scheduler_type=self._lr_scheduler_type,
            **hp
            **hp,
        )

    @property
@@ -390,3 +357,405 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
    @hyper_parameter_space.setter
    def hyper_parameter_space(self, hp_space):
        self._hyper_parameter_space = hp_space


@register_trainer("NodeClassificationGraphSAINTTrainer")
 class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
    def __init__(
        self,
        model: _typing.Union[BaseModel],
        num_features: int,
        num_classes: int,
        optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None],
        lr: float = 1e-4,
        max_epoch: int = 100,
        early_stopping_round: int = 100,
        weight_decay: float = 1e-4,
        device: _typing.Optional[torch.device] = None,
        init: bool = True,
        feval: _typing.Union[
            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
        ] = (Logloss,),
        loss: str = "nll_loss",
        lr_scheduler_type: _typing.Optional[str] = None,
        **kwargs,
    ) -> None:
        if isinstance(optimizer, type) and issubclass(optimizer, torch.optim.Optimizer):
            self._optimizer_class: _typing.Type[torch.optim.Optimizer] = optimizer
        elif type(optimizer) == str:
            if optimizer.lower() == "adam":
                self._optimizer_class: _typing.Type[
                    torch.optim.Optimizer
                ] = torch.optim.Adam
            elif optimizer.lower() == "adam" + "w":
                self._optimizer_class: _typing.Type[
                    torch.optim.Optimizer
                ] = torch.optim.AdamW
            elif optimizer.lower() == "sgd":
                self._optimizer_class: _typing.Type[
                    torch.optim.Optimizer
                ] = torch.optim.SGD
            else:
                self._optimizer_class: _typing.Type[
                    torch.optim.Optimizer
                ] = torch.optim.Adam
        else:
            self._optimizer_class: _typing.Type[
                torch.optim.Optimizer
            ] = torch.optim.Adam
        self._learning_rate: float = lr if lr > 0 else 1e-4
        self._lr_scheduler_type: _typing.Optional[str] = lr_scheduler_type
        self._max_epoch: int = max_epoch if max_epoch > 0 else 1e2
        self._weight_decay: float = weight_decay if weight_decay > 0 else 1e-4
        early_stopping_round: int = (
            early_stopping_round if early_stopping_round > 0 else 1e2
        )
        self._early_stopping = EarlyStopping(
            patience=early_stopping_round, verbose=False
        )

        # Assign an empty initial hyper parameter space
        self._hyper_parameter_space: _typing.Sequence[
            _typing.Dict[str, _typing.Any]
        ] = []

        self._valid_result: torch.Tensor = torch.zeros(0)
        self._valid_result_prob: torch.Tensor = torch.zeros(0)
        self._valid_score: _typing.Sequence[float] = ()

        super(NodeClassificationGraphSAINTTrainer, self).__init__(
            model, num_features, num_classes, device, init, feval, loss
        )

        """ Set hyper parameters """
        if "num_subgraphs" not in kwargs:
            raise KeyError
        elif type(kwargs.get("num_subgraphs")) != int:
            raise TypeError
        elif not kwargs.get("num_subgraphs") > 0:
            raise ValueError
        else:
            self.__num_subgraphs: int = kwargs.get("num_subgraphs")
        if "sampling_budget" not in kwargs:
            raise KeyError
        elif type(kwargs.get("sampling_budget")) != int:
            raise TypeError
        elif not kwargs.get("sampling_budget") > 0:
            raise ValueError
        else:
            self.__sampling_budget: int = kwargs.get("sampling_budget")
        if "sampling_method" not in kwargs:
            self.__sampling_method_identifier: str = "node"
        elif type(kwargs.get("sampling_method")) != str:
            self.__sampling_method_identifier: str = "node"
        else:
            self.__sampling_method_identifier: str = kwargs.get("sampling_method")
            if self.__sampling_method_identifier.lower() not in ("node", "edge"):
                self.__sampling_method_identifier: str = "node"

        self.__is_initialized: bool = False
        if init:
            self.initialize()

    def initialize(self):
        if self.__is_initialized:
            return self
        self.model.initialize()
        self.__is_initialized = True
        return self

    def to(self, device: torch.device):
        self.device = device
        if self.model is not None:
            self.model.to(self.device)

    def get_model(self):
        return self.model

    def __train_only(self, data):
        """
        The function of training on the given dataset and mask.
        :param data: data of a specific graph
        :return: self
        """
        data = data.to(self.device)
        optimizer: torch.optim.Optimizer = self._optimizer_class(
            self.model.parameters(),
            lr=self._learning_rate,
            weight_decay=self._weight_decay,
        )
        if type(self._lr_scheduler_type) == str:
            if self._lr_scheduler_type.lower() == "step" + "lr":
                lr_scheduler: torch.optim.lr_scheduler.StepLR = (
                    torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1)
                )
            elif self._lr_scheduler_type.lower() == "multi" + "step" + "lr":
                lr_scheduler: torch.optim.lr_scheduler.MultiStepLR = (
                    torch.optim.lr_scheduler.MultiStepLR(
                        optimizer, milestones=[30, 80], gamma=0.1
                    )
                )
            elif self._lr_scheduler_type.lower() == "exponential" + "lr":
                lr_scheduler: torch.optim.lr_scheduler.ExponentialLR = (
                    torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1)
                )
            elif self._lr_scheduler_type.lower() == "ReduceLROnPlateau".lower():
                lr_scheduler: torch.optim.lr_scheduler.ReduceLROnPlateau = (
                    torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min")
                )
            else:
                lr_scheduler: torch.optim.lr_scheduler.LambdaLR = (
                    torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
                )
        else:
            lr_scheduler: torch.optim.lr_scheduler.LambdaLR = (
                torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
            )

        if self.__sampling_method_identifier.lower() == "edge":
            sub_graph_sampler = GraphSAINTRandomEdgeSampler(
                self.__sampling_budget, self.__num_subgraphs
            )
        else:
            sub_graph_sampler = GraphSAINTRandomNodeSampler(
                self.__sampling_budget, self.__num_subgraphs
            )

        for current_epoch in range(self._max_epoch):
            self.model.model.train()
            """ epoch start """
            """ Sample sub-graphs """
            sub_graph_set = sub_graph_sampler.sample(data)
            sub_graphs_loader: torch.utils.data.DataLoader = (
                torch.utils.data.DataLoader(sub_graph_set)
            )
            integral_alpha: torch.Tensor = getattr(sub_graph_set, "alpha")
            integral_lambda: torch.Tensor = getattr(sub_graph_set, "lambda")
            """ iterate sub-graphs """
            for sub_graph_data in sub_graphs_loader:
                optimizer.zero_grad()
                sampled_edge_indexes: torch.Tensor = sub_graph_data.sampled_edge_indexes
                sampled_node_indexes: torch.Tensor = sub_graph_data.sampled_node_indexes
                sampled_train_mask: torch.Tensor = sub_graph_data.train_mask

                sampled_alpha = integral_alpha[sampled_edge_indexes]
                sub_graph_data.edge_weight = 1 / sampled_alpha

                prediction: torch.Tensor = self.model.model(sub_graph_data)

                if not hasattr(torch.nn.functional, self.loss):
                    raise TypeError(f"PyTorch does not support loss type {self.loss}")
                loss_func = getattr(torch.nn.functional, self.loss)
                unreduced_loss: torch.Tensor = loss_func(
                    prediction[sampled_train_mask],
                    data.y[sampled_train_mask],
                    reduction="none",
                )

                sampled_lambda: torch.Tensor = integral_lambda[sampled_node_indexes]
                sampled_train_lambda: torch.Tensor = sampled_lambda[sampled_train_mask]
                assert unreduced_loss.size() == sampled_train_lambda.size()
                loss_weighted_sum: torch.Tensor = torch.sum(
                    unreduced_loss / sampled_train_lambda
                )
                loss_weighted_sum.backward()
                optimizer.step()

            if lr_scheduler is not None:
                lr_scheduler.step()

            """ Validate performance """
            if (
                hasattr(data, "val_mask")
                and type(getattr(data, "val_mask")) == torch.Tensor
            ):
                validation_results: _typing.Sequence[float] = self.evaluate(
                    (data,), "val", [self.feval[0]]
                )
                if self.feval[0].is_higher_better():
                    validation_loss: float = -validation_results[0]
                else:
                    validation_loss: float = validation_results[0]
                self._early_stopping(validation_loss, self.model.model)
                if self._early_stopping.early_stop:
                    LOGGER.debug("Early stopping at %d", current_epoch)
                    break
        if hasattr(data, "val_mask") and data.val_mask is not None:
            self._early_stopping.load_checkpoint(self.model.model)
        return self

    def __predict_only(self, data):
        """
        The function of predicting on the given data.
        :param data: data of a specific graph
        :return: the result of prediction on the given dataset
        """
        data = data.to(self.device)
        self.model.model.eval()
        with torch.no_grad():
            predicted_x: torch.Tensor = self.model.model(data)
        return predicted_x

    def predict_proba(
        self, dataset, mask: _typing.Optional[str] = None, in_log_format=False
    ):
        """
        The function of predicting the probability on the given dataset.
        :param dataset: The node classification dataset used to be predicted.
        :param mask:
        :param in_log_format:
        :return:
        """
        data = dataset[0].to(self.device)
        if mask is not None and type(mask) == str:
            if mask.lower() == "train":
                _mask: torch.Tensor = data.train_mask
            elif mask.lower() == "test":
                _mask: torch.Tensor = data.test_mask
            elif mask.lower() == "val":
                _mask: torch.Tensor = data.val_mask
            else:
                _mask: torch.Tensor = data.test_mask
        else:
            _mask: torch.Tensor = data.test_mask
        result = self.__predict_only(data)[_mask]
        return result if in_log_format else torch.exp(result)

    def predict(self, dataset, mask: _typing.Optional[str] = None) -> torch.Tensor:
        return self.predict_proba(dataset, mask, in_log_format=True).max(1)[1]

    def evaluate(
        self,
        dataset,
        mask: _typing.Optional[str] = None,
        feval: _typing.Union[
            None, _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
        ] = None,
    ) -> _typing.Sequence[float]:
        data = dataset[0]
        data = data.to(self.device)
        if feval is None:
            _feval: _typing.Sequence[_typing.Type[Evaluation]] = self.feval
        else:
            _feval: _typing.Sequence[_typing.Type[Evaluation]] = get_feval(list(feval))
        if mask is not None and type(mask) == str:
            if mask.lower() == "train":
                _mask: torch.Tensor = data.train_mask
            elif mask.lower() == "test":
                _mask: torch.Tensor = data.test_mask
            elif mask.lower() == "val":
                _mask: torch.Tensor = data.val_mask
            else:
                _mask: torch.Tensor = data.test_mask
        else:
            _mask: torch.Tensor = data.test_mask
        prediction_probability: torch.Tensor = self.predict_proba(dataset, mask)
        y_ground_truth: torch.Tensor = data.y[_mask]

        eval_results = []
        for f in _feval:
            try:
                eval_results.append(f.evaluate(prediction_probability, y_ground_truth))
            except:
                eval_results.append(
                    f.evaluate(
                        prediction_probability.cpu().numpy(),
                        y_ground_truth.cpu().numpy(),
                    )
                )
        return eval_results

    def train(self, dataset, keep_valid_result: bool = True):
        """
        The function of training on the given dataset and keeping valid result.
        :param dataset:
        :param keep_valid_result: Whether to save the validation result after training
        """
        data = dataset[0]
        self.__train_only(data)
        if keep_valid_result:
            prediction: torch.Tensor = self.__predict_only(data)
            self._valid_result: torch.Tensor = prediction[data.val_mask].max(1)[1]
            self._valid_result_prob: torch.Tensor = prediction[data.val_mask]
            self._valid_score: _typing.Sequence[float] = self.evaluate(dataset, "val")

    def get_valid_predict(self) -> torch.Tensor:
        return self._valid_result

    def get_valid_predict_proba(self) -> torch.Tensor:
        return self._valid_result_prob

    def get_valid_score(
        self, return_major: bool = True
    ) -> _typing.Tuple[
        _typing.Union[float, _typing.Sequence[float]],
        _typing.Union[bool, _typing.Sequence[bool]],
    ]:
        if return_major:
            return self._valid_score[0], self.feval[0].is_higher_better()
        else:
            return (self._valid_score, [f.is_higher_better() for f in self.feval])

    @property
    def hyper_parameter_space(self) -> _typing.Sequence[_typing.Dict[str, _typing.Any]]:
        return self._hyper_parameter_space

    @hyper_parameter_space.setter
    def hyper_parameter_space(
        self, hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
    ) -> None:
        if not isinstance(hp_space, _typing.Sequence):
            raise TypeError
        self._hyper_parameter_space = hp_space

    def get_name_with_hp(self) -> str:
        name = "-".join(
            [
                str(self._optimizer_class),
                str(self._learning_rate),
                str(self._max_epoch),
                str(self._early_stopping.patience),
                str(self.model),
                str(self.device),
            ]
        )
        name = (
            name
            + "|"
            + "-".join(
                [
                    str(x[0]) + "-" + str(x[1])
                    for x in self.model.get_hyper_parameter().items()
                ]
            )
        )
        return name

    def duplicate_from_hyper_parameter(
        self,
        hp: _typing.Dict[str, _typing.Any],
        model: _typing.Optional[BaseModel] = None,
    ) -> "NodeClassificationGraphSAINTTrainer":
        if model is None or not isinstance(model, BaseModel):
            model: BaseModel = self.model
        model = model.from_hyper_parameter(
            dict(
                [
                    x
                    for x in hp.items()
                    if x[0] in [y["parameterName"] for y in model.hyper_parameter_space]
                ]
            )
        )
        return NodeClassificationGraphSAINTTrainer(
            model,
            self.num_features,
            self.num_classes,
            self._optimizer_class,
            device=self.device,
            init=True,
            feval=self.feval,
            loss=self.loss,
            lr_scheduler_type=self._lr_scheduler_type,
            **hp,
        )
--- a/autogl/module/train/sampling/sampler/graphsaint_sampler.py
+++ b/autogl/module/train/sampling/sampler/graphsaint_sampler.py
@@ -0,0 +1,121 @@
 import copy
 import typing as _typing
 import torch.utils.data
 import torch_geometric


 class _SubGraphSet(torch.utils.data.Dataset):
    def __init__(self, datalist: _typing.Sequence[_typing.Any], *args, **kwargs):
        self.__graphs: _typing.Sequence[_typing.Any] = datalist
        self.__remaining_args: _typing.Sequence[_typing.Any] = args
        for key, value in kwargs.items():
            setattr(self, key, value)

    def __len__(self) -> int:
        return len(self.__graphs)

    def __getitem__(self, index: int) -> _typing.Any:
        if not 0 <= index < len(self.__graphs):
            raise IndexError
        return self.__graphs[index]


 class _GraphSAINTSubGraphSampler:
    def __init__(
        self,
        sampler_class: _typing.Type[torch_geometric.data.GraphSAINTSampler],
        budget: int,
        num_graphs: int = 1,
        walk_length: int = 1,
        num_workers: int = 0,
    ):
        """
        :param sampler_class: class of torch_geometric.data.GraphSAINTSampler
        :param budget: general budget
        :param num_graphs: number of sub-graphs to sample, i.e. N in the paper
        :param walk_length: walk length for RandomWalk Sampler
        :param num_workers: how many sub-processes to use for data loading.
                            0 means that the data will be loaded in the main process.
        """
        self.__sampler_class: _typing.Type[
            torch_geometric.data.GraphSAINTSampler
        ] = sampler_class
        self.__budget: int = budget
        self.__num_graphs: int = num_graphs
        self.__walk_length: int = walk_length
        self.__num_workers: int = num_workers if num_workers > 0 else 0

    def sample(self, _integral_data) -> _SubGraphSet:
        """
        :param _integral_data: conventional data for an integral graph
        :return: instance of _SubGraphSet
        """
        data = copy.copy(_integral_data)
        data.sampled_node_indexes = torch.arange(data.num_nodes, dtype=torch.int64)
        data.sampled_edge_indexes = torch.arange(data.num_edges, dtype=torch.int64)
        if (
            type(self.__sampler_class)
            == torch_geometric.data.GraphSAINTRandomWalkSampler
        ):
            _sampler: torch_geometric.data.GraphSAINTRandomWalkSampler = (
                torch_geometric.data.GraphSAINTRandomWalkSampler(
                    data,
                    self.__budget,
                    self.__walk_length,
                    self.__num_graphs,
                    num_workers=self.__num_workers,
                )
            )
        else:
            _sampler: torch_geometric.data.GraphSAINTSampler = self.__sampler_class(
                data, self.__budget, self.__num_graphs, num_workers=self.__num_workers
            )
        """ Sample sub-graphs """
        datalist: list = [d for d in _sampler]
        """ Compute the normalization """
        node_sampled_count = torch.zeros(data.num_nodes, dtype=torch.int64)
        edge_sampled_count = torch.zeros(data.num_edges, dtype=torch.int64)
        concatenated_sampled_nodes: torch.Tensor = torch.cat(
            [sub_graph.sampled_node_indexes for sub_graph in datalist]
        )
        concatenated_sampled_edges: torch.Tensor = torch.cat(
            [sub_graph.sampled_edge_indexes for sub_graph in datalist]
        )
        for current_sampled_node_index in concatenated_sampled_nodes.unique():
            node_sampled_count[current_sampled_node_index] = torch.where(
                concatenated_sampled_nodes == current_sampled_node_index
            )[0].size(0)
        for current_sampled_edge_index in concatenated_sampled_edges.unique():
            edge_sampled_count[current_sampled_edge_index] = torch.where(
                concatenated_sampled_edges == current_sampled_edge_index
            )[0].size(0)
        _alpha: torch.Tensor = (
            edge_sampled_count / node_sampled_count[data.edge_index[1]]
        )
        _alpha[torch.isnan(_alpha) | torch.isinf(_alpha)] = 0
        _lambda: torch.Tensor = node_sampled_count / self.__num_graphs
        return _SubGraphSet(datalist, **{"alpha": _alpha, "lambda": _lambda})


 class GraphSAINTRandomNodeSampler(_GraphSAINTSubGraphSampler):
    def __init__(self, node_budget: int, num_graphs: int = 1):
        super(GraphSAINTRandomNodeSampler, self).__init__(
            torch_geometric.data.GraphSAINTNodeSampler, node_budget, num_graphs
        )


 class GraphSAINTRandomEdgeSampler(_GraphSAINTSubGraphSampler):
    def __init__(self, edge_budget: int, num_graphs: int = 1):
        super(GraphSAINTRandomEdgeSampler, self).__init__(
            torch_geometric.data.GraphSAINTNodeSampler, edge_budget, num_graphs
        )


 class GraphSAINTRandomWalkSampler(_GraphSAINTSubGraphSampler):
    def __init__(self, edge_budget: int, num_graphs: int = 1, walk_length: int = 4):
        super(GraphSAINTRandomWalkSampler, self).__init__(
            torch_geometric.data.GraphSAINTRandomWalkSampler,
            edge_budget,
            num_graphs,
            walk_length,
        )
--- a/autogl/solver/init.py
+++ b/autogl/solver/init.py
@@ -2,7 +2,12 @@
 Auto solver for various graph tasks
 """

 from .classifier import AutoGraphClassifier, AutoNodeClassifier
 from .classifier import AutoGraphClassifier, AutoNodeClassifier, AutoLinkPredictor
 from .utils import Leaderboard

 __all__ = ["AutoNodeClassifier", "AutoGraphClassifier", "Leaderboard"]
 __all__ = [
    "AutoNodeClassifier",
    "AutoGraphClassifier",
    "AutoLinkPredictor",
    "Leaderboard",
 ]
--- a/autogl/solver/classifier/init.py
+++ b/autogl/solver/classifier/init.py
@@ -5,5 +5,11 @@ Auto classifier for classification problems.
 from .base import BaseClassifier
 from .graph_classifier import AutoGraphClassifier
 from .node_classifier import AutoNodeClassifier
 from .link_predictor import AutoLinkPredictor

 __all__ = ["BaseClassifier", "AutoGraphClassifier", "AutoNodeClassifier"]
 __all__ = [
    "BaseClassifier",
    "AutoGraphClassifier",
    "AutoNodeClassifier",
    "AutoLinkPredictor",
 ]
--- a/autogl/solver/classifier/graph_classifier.py
+++ b/autogl/solver/classifier/graph_classifier.py
@@ -117,7 +117,7 @@ class AutoGraphClassifier(BaseClassifier):
    ) -> "AutoGraphClassifier":
        # load graph network module
        self.graph_model_list = []
        if isinstance(graph_models, list):
        if isinstance(graph_models, (list, tuple)):
            for model in graph_models:
                if isinstance(model, str):
                    if model in MODEL_DICT:
--- a/autogl/solver/classifier/link_predictor.py
+++ b/autogl/solver/classifier/link_predictor.py
@@ -0,0 +1,747 @@
 """
 Auto Classfier for Node Classification
 """
 import time
 import json

 from copy import deepcopy

 import torch
 import numpy as np
 import yaml

 from .base import BaseClassifier
 from ..base import _parse_hp_space, _initialize_single_model
 from ...module.feature import FEATURE_DICT
 from ...module.model import MODEL_DICT, BaseModel
 from ...module.train import TRAINER_DICT, BaseLinkPredictionTrainer
 from ...module.train import get_feval
 from ..utils import Leaderboard, set_seed
 from ...datasets import utils
 from ...utils import get_logger

 LOGGER = get_logger("LinkPredictor")


 class AutoLinkPredictor(BaseClassifier):
    """
    Auto Link Predictor.

    Used to automatically solve the link prediction problems.

    Parameters
    ----------
    feature_module: autogl.module.feature.BaseFeatureEngineer or str or None
        The (name of) auto feature engineer used to process the given dataset. Default ``deepgl``.
        Disable feature engineer by setting it to ``None``.

    graph_models: list of autogl.module.model.BaseModel or list of str
        The (name of) models to be optimized as backbone. Default ``['gat', 'gcn']``.

    hpo_module: autogl.module.hpo.BaseHPOptimizer or str or None
        The (name of) hpo module used to search for best hyper parameters. Default ``anneal``.
        Disable hpo by setting it to ``None``.

    ensemble_module: autogl.module.ensemble.BaseEnsembler or str or None
        The (name of) ensemble module used to ensemble the multi-models found. Default ``voting``.
        Disable ensemble by setting it to ``None``.

    max_evals: int (Optional)
        If given, will set the number eval times the hpo module will use.
        Only be effective when hpo_module is ``str``. Default ``None``.

    trainer_hp_space: list of dict (Optional)
        trainer hp space or list of trainer hp spaces configuration.
        If a single trainer hp is given, will specify the hp space of trainer for every model.
        If a list of trainer hp is given, will specify every model with corrsponding
        trainer hp space.
        Default ``None``.

    model_hp_spaces: list of list of dict (Optional)
        model hp space configuration.
        If given, will specify every hp space of every passed model. Default ``None``.

    size: int (Optional)
        The max models ensemble module will use. Default ``None``.

    device: torch.device or str
        The device where model will be running on. If set to ``auto``, will use gpu when available.
        You can also specify the device by directly giving ``gpu`` or ``cuda:0``, etc.
        Default ``auto``.
    """

    def __init__(
        self,
        feature_module=None,
        graph_models=("gat", "gcn"),
        hpo_module="anneal",
        ensemble_module="voting",
        max_evals=50,
        default_trainer=None,
        trainer_hp_space=None,
        model_hp_spaces=None,
        size=4,
        device="auto",
    ):

        super().__init__(
            feature_module=feature_module,
            graph_models=graph_models,
            hpo_module=hpo_module,
            ensemble_module=ensemble_module,
            max_evals=max_evals,
            default_trainer=default_trainer or "LinkPredictionFull",
            trainer_hp_space=trainer_hp_space,
            model_hp_spaces=model_hp_spaces,
            size=size,
            device=device,
        )

        # data to be kept when fit
        self.dataset = None

    def _init_graph_module(
        self, graph_models, num_features, feval, device, loss
    ) -> "AutoLinkPredictor":
        # load graph network module
        self.graph_model_list = []
        if isinstance(graph_models, (list, tuple)):
            for model in graph_models:
                if isinstance(model, str):
                    if model in MODEL_DICT:
                        self.graph_model_list.append(
                            MODEL_DICT[model](
                                num_classes=1,
                                num_features=num_features,
                                device=device,
                                init=False,
                            )
                        )
                    else:
                        raise KeyError("cannot find model %s" % (model))
                elif isinstance(model, type) and issubclass(model, BaseModel):
                    self.graph_model_list.append(
                        model(
                            num_classes=1,
                            num_features=num_features,
                            device=device,
                            init=False,
                        )
                    )
                elif isinstance(model, BaseModel):
                    # setup the hp of num_classes and num_features
                    model.set_num_classes(1)
                    model.set_num_features(num_features)
                    self.graph_model_list.append(model.to(device))
                elif isinstance(model, BaseLinkPredictionTrainer):
                    # receive a trainer list, put trainer to list
                    assert (
                        model.get_model() is not None
                    ), "Passed trainer should contain a model"
                    model.model.set_num_classes(1)
                    model.model.set_num_features(num_features)
                    model.update_parameters(
                        num_classes=1,
                        num_features=num_features,
                        loss=loss,
                        feval=feval,
                        device=device,
                    )
                    self.graph_model_list.append(model)
                else:
                    raise KeyError("cannot find graph network %s." % (model))
        else:
            raise ValueError(
                "need graph network to be (list of) str or a BaseModel class/instance, get",
                graph_models,
                "instead.",
            )

        # wrap all model_cls with specified trainer
        for i, model in enumerate(self.graph_model_list):
            # set model hp space
            if self._model_hp_spaces is not None:
                if self._model_hp_spaces[i] is not None:
                    if isinstance(model, BaseLinkPredictionTrainer):
                        model.model.hyper_parameter_space = self._model_hp_spaces[i]
                    else:
                        model.hyper_parameter_space = self._model_hp_spaces[i]
            # initialize trainer if needed
            if isinstance(model, BaseModel):
                name = (
                    self._default_trainer
                    if isinstance(self._default_trainer, str)
                    else self._default_trainer[i]
                )
                model = TRAINER_DICT[name](
                    model=model,
                    num_features=num_features,
                    loss=loss,
                    feval=feval,
                    device=device,
                    init=False,
                )
            # set trainer hp space
            if self._trainer_hp_space is not None:
                if isinstance(self._trainer_hp_space[0], list):
                    current_hp_for_trainer = self._trainer_hp_space[i]
                else:
                    current_hp_for_trainer = self._trainer_hp_space
                model.hyper_parameter_space = current_hp_for_trainer
            self.graph_model_list[i] = model

        return self

    def _to_prob(self, sig_prob: np.ndarray):
        nelements = len(sig_prob)
        prob = np.zeros([nelements, 2])
        prob[:, 0] = 1 - sig_prob
        prob[:, 1] = sig_prob
        return prob

    # pylint: disable=arguments-differ
    def fit(
        self,
        dataset,
        time_limit=-1,
        inplace=False,
        train_split=None,
        val_split=None,
        evaluation_method="infer",
        seed=None,
    ) -> "AutoLinkPredictor":
        """
        Fit current solver on given dataset.

        Parameters
        ----------
        dataset: torch_geometric.data.dataset.Dataset
            The dataset needed to fit on. This dataset must have only one graph.

        time_limit: int
            The time limit of the whole fit process (in seconds). If set below 0,
            will ignore time limit. Default ``-1``.

        inplace: bool
            Whether we process the given dataset in inplace manner. Default ``False``.
            Set it to True if you want to save memory by modifying the given dataset directly.

        train_split: float or int (Optional)
            The train ratio (in ``float``) or number (in ``int``) of dataset. If you want to
            use default train/val/test split in dataset, please set this to ``None``.
            Default ``None``.

        val_split: float or int (Optional)
            The validation ratio (in ``float``) or number (in ``int``) of dataset. If you want
            to use default train/val/test split in dataset, please set this to ``None``.
            Default ``None``.

        evaluation_method: (list of) str or autogl.module.train.evaluation
            A (list of) evaluation method for current solver. If ``infer``, will automatically
            determine. Default ``infer``.

        seed: int (Optional)
            The random seed. If set to ``None``, will run everything at random.
            Default ``None``.

        Returns
        -------
        self: autogl.solver.AutoNodeClassifier
            A reference of current solver.
        """
        set_seed(seed)

        if time_limit < 0:
            time_limit = 3600 * 24
        time_begin = time.time()

        # initialize leaderboard
        if evaluation_method == "infer":
            if hasattr(dataset, "metric"):
                evaluation_method = [dataset.metric]
            else:
                num_of_label = dataset.num_classes
                if num_of_label == 2:
                    evaluation_method = ["auc"]
                else:
                    evaluation_method = ["acc"]
        assert isinstance(evaluation_method, list)
        evaluator_list = get_feval(evaluation_method)

        self.leaderboard = Leaderboard(
            [e.get_eval_name() for e in evaluator_list],
            {e.get_eval_name(): e.is_higher_better() for e in evaluator_list},
        )

        # set up the dataset
        if train_split is not None and val_split is not None:
            utils.split_edges(dataset, train_split, val_split)
        else:
            assert all(
                [
                    hasattr(dataset.data, f"{name}")
                    for name in [
                        "train_pos_edge_index",
                        "train_neg_adj_mask",
                        "val_pos_edge_index",
                        "val_neg_edge_index",
                        "test_pos_edge_index",
                        "test_neg_edge_index",
                    ]
                ]
            ), (
                "The dataset has no default train/val split! Please manually pass "
                "train and val ratio."
            )
            LOGGER.info("Use the default train/val/test ratio in given dataset")

        # feature engineering
        if self.feature_module is not None:
            dataset = self.feature_module.fit_transform(dataset, inplace=inplace)

        self.dataset = dataset
        assert self.dataset[0].x is not None, (
            "Does not support fit on non node-feature dataset!"
            " Please add node features to dataset or specify feature engineers that generate"
            " node features."
        )

        # initialize graph networks
        self._init_graph_module(
            self.gml,
            num_features=self.dataset[0].x.shape[1],
            feval=evaluator_list,
            device=self.runtime_device,
            loss="binary_cross_entropy_with_logits"
            if not hasattr(dataset, "loss")
            else dataset.loss,
        )

        # train the models and tune hpo
        result_valid = []
        names = []
        for idx, model in enumerate(self.graph_model_list):
            time_for_each_model = (time_limit - time.time() + time_begin) / (
                len(self.graph_model_list) - idx
            )
            if self.hpo_module is None:
                model.initialize()
                model.train(self.dataset, True)
                optimized = model
            else:
                optimized, _ = self.hpo_module.optimize(
                    trainer=model, dataset=self.dataset, time_limit=time_for_each_model
                )
            # to save memory, all the trainer derived will be mapped to cpu
            optimized.to(torch.device("cpu"))
            name = optimized.get_name_with_hp() + "_idx%d" % (idx)
            names.append(name)
            performance_on_valid, _ = optimized.get_valid_score(return_major=False)
            result_valid.append(
                self._to_prob(optimized.get_valid_predict_proba().cpu().numpy())
            )
            self.leaderboard.insert_model_performance(
                name,
                dict(
                    zip(
                        [e.get_eval_name() for e in evaluator_list],
                        performance_on_valid,
                    )
                ),
            )
            self.trained_models[name] = optimized

        # fit the ensemble model
        if self.ensemble_module is not None:
            pos_edge_index, neg_edge_index = (
                self.dataset[0].val_pos_edge_index,
                self.dataset[0].val_neg_edge_index,
            )
            E = pos_edge_index.size(1) + neg_edge_index.size(1)
            link_labels = torch.zeros(E, dtype=torch.float)
            link_labels[: pos_edge_index.size(1)] = 1.0

            performance = self.ensemble_module.fit(
                result_valid,
                link_labels.detach().cpu().numpy(),
                names,
                evaluator_list,
                n_classes=dataset.num_classes,
            )
            self.leaderboard.insert_model_performance(
                "ensemble",
                dict(zip([e.get_eval_name() for e in evaluator_list], performance)),
            )

        return self

    def fit_predict(
        self,
        dataset,
        time_limit=-1,
        inplace=False,
        train_split=None,
        val_split=None,
        evaluation_method="infer",
        use_ensemble=True,
        use_best=True,
        name=None,
    ) -> np.ndarray:
        """
        Fit current solver on given dataset and return the predicted value.

        Parameters
        ----------
        dataset: torch_geometric.data.dataset.Dataset
            The dataset needed to fit on. This dataset must have only one graph.

        time_limit: int
            The time limit of the whole fit process (in seconds).
            If set below 0, will ignore time limit. Default ``-1``.

        inplace: bool
            Whether we process the given dataset in inplace manner. Default ``False``.
            Set it to True if you want to save memory by modifying the given dataset directly.

        train_split: float or int (Optional)
            The train ratio (in ``float``) or number (in ``int``) of dataset. If you want to
            use default train/val/test split in dataset, please set this to ``None``.
            Default ``None``.

        val_split: float or int (Optional)
            The validation ratio (in ``float``) or number (in ``int``) of dataset. If you want
            to use default train/val/test split in dataset, please set this to ``None``.
            Default ``None``.

        balanced: bool
            Wether to create the train/valid/test split in a balanced way.
            If set to ``True``, the train/valid will have the same number of different classes.
            Default ``False``.

        evaluation_method: (list of) str or autogl.module.train.evaluation
            A (list of) evaluation method for current solver. If ``infer``, will automatically
            determine. Default ``infer``.

        use_ensemble: bool
            Whether to use ensemble to do the predict. Default ``True``.

        use_best: bool
            Whether to use the best single model to do the predict. Will only be effective when
            ``use_ensemble`` is ``False``.
            Default ``True``.

        name: str or None
            The name of model used to predict. Will only be effective when ``use_ensemble`` and
            ``use_best`` both are ``False``.
            Default ``None``.

        Returns
        -------
        result: np.ndarray
            An array of shape ``(N,)``, where ``N`` is the number of test nodes. The prediction
            on given dataset.
        """
        self.fit(
            dataset=dataset,
            time_limit=time_limit,
            inplace=inplace,
            train_split=train_split,
            val_split=val_split,
            evaluation_method=evaluation_method,
        )
        return self.predict(
            dataset=dataset,
            inplaced=inplace,
            inplace=inplace,
            use_ensemble=use_ensemble,
            use_best=use_best,
            name=name,
        )

    def predict_proba(
        self,
        dataset=None,
        inplaced=False,
        inplace=False,
        use_ensemble=True,
        use_best=True,
        name=None,
        mask="test",
    ) -> np.ndarray:
        """
        Predict the node probability.

        Parameters
        ----------
        dataset: torch_geometric.data.dataset.Dataset or None
            The dataset needed to predict. If ``None``, will use the processed dataset passed
            to ``fit()`` instead. Default ``None``.

        inplaced: bool
            Whether the given dataset is processed. Only be effective when ``dataset``
            is not ``None``. If you pass the dataset to ``fit()`` with ``inplace=True``, and
            you pass the dataset again to this method, you should set this argument to ``True``.
            Otherwise ``False``. Default ``False``.

        inplace: bool
            Whether we process the given dataset in inplace manner. Default ``False``. Set it to
            True if you want to save memory by modifying the given dataset directly.

        use_ensemble: bool
            Whether to use ensemble to do the predict. Default ``True``.

        use_best: bool
            Whether to use the best single model to do the predict. Will only be effective when
            ``use_ensemble`` is ``False``. Default ``True``.

        name: str or None
            The name of model used to predict. Will only be effective when ``use_ensemble`` and
            ``use_best`` both are ``False``. Default ``None``.

        mask: str
            The data split to give prediction on. Default ``test``.

        Returns
        -------
        result: np.ndarray
            An array of shape ``(N,C,)``, where ``N`` is the number of test nodes and ``C`` is
            the number of classes. The prediction on given dataset.
        """
        if dataset is None:
            dataset = self.dataset
            assert dataset is not None, (
                "Please execute fit() first before" " predicting on remembered dataset"
            )
        elif not inplaced and self.feature_module is not None:
            dataset = self.feature_module.transform(dataset, inplace=inplace)

        if use_ensemble:
            LOGGER.info("Ensemble argument on, will try using ensemble model.")

        if not use_ensemble and use_best:
            LOGGER.info(
                "Ensemble argument off and best argument on, will try using best model."
            )

        if (use_ensemble and self.ensemble_module is not None) or (
            not use_best and name == "ensemble"
        ):
            # we need to get all the prediction of every model trained
            predict_result = []
            names = []
            for model_name in self.trained_models:
                predict_result.append(
                    self._to_prob(
                        self._predict_proba_by_name(dataset, model_name, mask)
                    )
                )
                names.append(model_name)
            return self.ensemble_module.ensemble(predict_result, names)[:, 1]

        if use_ensemble and self.ensemble_module is None:
            LOGGER.warning(
                "Cannot use ensemble because no ensebmle module is given. "
                "Will use best model instead."
            )

        if use_best or (use_ensemble and self.ensemble_module is None):
            # just return the best model we have found
            name = self.leaderboard.get_best_model()
            return self._predict_proba_by_name(dataset, name, mask)

        if name is not None:
            # return model performance by name
            return self._predict_proba_by_name(dataset, name, mask)

        LOGGER.error(
            "No model name is given while ensemble and best arguments are off."
        )
        raise ValueError(
            "You need to specify a model name if you do not want use ensemble and best model."
        )

    def _predict_proba_by_name(self, dataset, name, mask="test"):
        self.trained_models[name].to(self.runtime_device)
        predicted = (
            self.trained_models[name].predict_proba(dataset, mask=mask).cpu().numpy()
        )
        self.trained_models[name].to(torch.device("cpu"))
        return predicted

    def predict(
        self,
        dataset=None,
        inplaced=False,
        inplace=False,
        use_ensemble=True,
        use_best=True,
        name=None,
        mask="test",
        threshold=0.5,
    ) -> np.ndarray:
        """
        Predict the node class number.

        Parameters
        ----------
        dataset: torch_geometric.data.dataset.Dataset or None
            The dataset needed to predict. If ``None``, will use the processed dataset passed
            to ``fit()`` instead. Default ``None``.

        inplaced: bool
            Whether the given dataset is processed. Only be effective when ``dataset``
            is not ``None``. If you pass the dataset to ``fit()`` with ``inplace=True``,
            and you pass the dataset again to this method, you should set this argument
            to ``True``. Otherwise ``False``. Default ``False``.

        inplace: bool
            Whether we process the given dataset in inplace manner. Default ``False``.
            Set it to True if you want to save memory by modifying the given dataset directly.

        use_ensemble: bool
            Whether to use ensemble to do the predict. Default ``True``.

        use_best: bool
            Whether to use the best single model to do the predict. Will only be effective
            when ``use_ensemble`` is ``False``. Default ``True``.

        name: str or None
            The name of model used to predict. Will only be effective when ``use_ensemble``
            and ``use_best`` both are ``False``. Default ``None``.

        mask: str
            The data split to give prediction on. Default ``test``.

        threshold: float
            The threshold to judge whether the edges are positive or not.

        Returns
        -------
        result: np.ndarray
            An array of shape ``(N,)``, where ``N`` is the number of test nodes.
            The prediction on given dataset.
        """
        proba = self.predict_proba(
            dataset, inplaced, inplace, use_ensemble, use_best, name, mask
        )
        return (proba > threshold).astype("int")

    @classmethod
    def from_config(cls, path_or_dict, filetype="auto") -> "AutoLinkPredictor":
        """
        Load solver from config file.

        You can use this function to directly load a solver from predefined config dict
        or config file path. Currently, only support file type of ``json`` or ``yaml``,
        if you pass a path.

        Parameters
        ----------
        path_or_dict: str or dict
            The path to the config file or the config dictionary object

        filetype: str
            The filetype the given file if the path is specified. Currently only support
            ``json`` or ``yaml``. You can set to ``auto`` to automatically detect the file
            type (from file name). Default ``auto``.

        Returns
        -------
        solver: autogl.solver.AutoGraphClassifier
            The solver that is created from given file or dictionary.
        """
        assert filetype in ["auto", "yaml", "json"], (
            "currently only support yaml file or json file type, but get type "
            + filetype
        )
        if isinstance(path_or_dict, str):
            if filetype == "auto":
                if path_or_dict.endswith(".yaml") or path_or_dict.endswith(".yml"):
                    filetype = "yaml"
                elif path_or_dict.endswith(".json"):
                    filetype = "json"
                else:
                    LOGGER.error(
                        "cannot parse the type of the given file name, "
                        "please manually set the file type"
                    )
                    raise ValueError(
                        "cannot parse the type of the given file name, "
                        "please manually set the file type"
                    )
            if filetype == "yaml":
                path_or_dict = yaml.load(
                    open(path_or_dict, "r").read(), Loader=yaml.FullLoader
                )
            else:
                path_or_dict = json.load(open(path_or_dict, "r"))

        path_or_dict = deepcopy(path_or_dict)
        solver = cls(None, [], None, None)
        fe_list = path_or_dict.pop("feature", None)
        if fe_list is not None:
            fe_list_ele = []
            for feature_engineer in fe_list:
                name = feature_engineer.pop("name")
                if name is not None:
                    fe_list_ele.append(FEATURE_DICT[name](**feature_engineer))
            if fe_list_ele != []:
                solver.set_feature_module(fe_list_ele)

        models = path_or_dict.pop("models", [{"name": "gcn"}, {"name": "gat"}])
        model_hp_space = [
            _parse_hp_space(model.pop("hp_space", None)) for model in models
        ]
        model_list = [
            _initialize_single_model(model.pop("name"), model) for model in models
        ]

        trainer = path_or_dict.pop("trainer", None)
        default_trainer = "LinkPredictionFull"
        trainer_space = None
        if isinstance(trainer, dict):
            # global default
            default_trainer = trainer.pop("name", "LinkPredictionFull")
            trainer_space = _parse_hp_space(trainer.pop("hp_space", None))
            default_kwargs = {"num_features": None}
            default_kwargs.update(trainer)
            default_kwargs["init"] = False
            for i in range(len(model_list)):
                model = model_list[i]
                trainer_wrap = TRAINER_DICT[default_trainer](
                    model=model, **default_kwargs
                )
                model_list[i] = trainer_wrap
        elif isinstance(trainer, list):
            # sequential trainer definition
            assert len(trainer) == len(
                model_list
            ), "The number of trainer and model does not match"
            trainer_space = []
            for i in range(len(model_list)):
                train, model = trainer[i], model_list[i]
                default_trainer = train.pop("name", "LinkPredictionFull")
                trainer_space.append(_parse_hp_space(train.pop("hp_space", None)))
                default_kwargs = {"num_features": None}
                default_kwargs.update(train)
                default_kwargs["init"] = False
                trainer_wrap = TRAINER_DICT[default_trainer](
                    model=model, **default_kwargs
                )
                model_list[i] = trainer_wrap

        solver.set_graph_models(
            model_list, default_trainer, trainer_space, model_hp_space
        )

        hpo_dict = path_or_dict.pop("hpo", {"name": "anneal"})
        if hpo_dict is not None:
            name = hpo_dict.pop("name")
            solver.set_hpo_module(name, **hpo_dict)

        ensemble_dict = path_or_dict.pop("ensemble", {"name": "voting"})
        if ensemble_dict is not None:
            name = ensemble_dict.pop("name")
            solver.set_ensemble_module(name, **ensemble_dict)

        return solver
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -72,12 +72,10 @@ class AutoNodeClassifier(BaseClassifier):
        Default ``auto``.
    """

    # pylint: disable=W0102

    def __init__(
        self,
        feature_module=None,
        graph_models=["gat", "gcn"],
        graph_models=("gat", "gcn"),
        nas_algorithms=None,
        nas_spaces=None,
        nas_estimators=None,
@@ -115,7 +113,7 @@ class AutoNodeClassifier(BaseClassifier):
    ) -> "AutoNodeClassifier":
        # load graph network module
        self.graph_model_list = []
        if isinstance(graph_models, list):
        if isinstance(graph_models, (list, tuple)):
            for model in graph_models:
                if isinstance(model, str):
                    if model in MODEL_DICT:
--- a/configs/lp_benchmark.yml
+++ b/configs/lp_benchmark.yml
@@ -0,0 +1,92 @@
 ensemble:
  name: voting
 feature:
 - name: PYGNormalizeFeatures
 hpo:
  max_evals: 10
  name: random
 models:
 - hp_space:
  - feasiblePoints: 2,3
    parameterName: num_layers
    type: DISCRETE
  - cutFunc: lambda x:x[0] - 1
    cutPara:
    - num_layers
    length: 2
    maxValue:
    - 256
    - 256
    minValue:
    - 64
    - 64
    numericalType: INTEGER
    parameterName: hidden
    scalingType: LOG
    type: NUMERICAL_LIST
  - maxValue: 0.2
    minValue: 0.0
    parameterName: dropout
    scalingType: LINEAR
    type: DOUBLE
  - feasiblePoints:
    - leaky_relu
    - relu
    - elu
    - tanh
    parameterName: act
    type: CATEGORICAL
  name: gcn
 - name: gat
  hp_space:
  - feasiblePoints: 2,3
    parameterName: num_layers
    type: DISCRETE
  - cutFunc: lambda x:x[0] - 1
    cutPara:
    - num_layers
    length: 2
    maxValue:
    - 256
    - 256
    minValue:
    - 64
    - 64
    numericalType: INTEGER
    parameterName: hidden
    scalingType: LOG
    type: NUMERICAL_LIST
  - maxValue: 0.2
    minValue: 0.0
    parameterName: dropout
    scalingType: LINEAR
    type: DOUBLE
  - feasiblePoints:
    - leaky_relu
    - relu
    - elu
    - tanh
    parameterName: act
    type: CATEGORICAL
 trainer:
  hp_space:
  - maxValue: 150
    minValue: 50
    parameterName: max_epoch
    scalingType: LINEAR
    type: INTEGER
  - maxValue: 40
    minValue: 25
    parameterName: early_stopping_round
    scalingType: LINEAR
    type: INTEGER
  - maxValue: 0.05
    minValue: 0.005
    parameterName: lr
    scalingType: LOG
    type: DOUBLE
  - maxValue: 1.0E-7
    minValue: 1.0E-10
    parameterName: weight_decay
    scalingType: LOG
    type: DOUBLE
--- a/configs/lp_gat_benchmark.yml
+++ b/configs/lp_gat_benchmark.yml
@@ -0,0 +1,61 @@
 ensemble:
  name: null
 feature:
 - name: PYGNormalizeFeatures
 hpo:
  max_evals: 10
  name: random
 models:
 - name: gat
  hp_space:
  - feasiblePoints: 2,3
    parameterName: num_layers
    type: DISCRETE
  - cutFunc: lambda x:x[0] - 1
    cutPara:
    - num_layers
    length: 2
    maxValue:
    - 256
    - 256
    minValue:
    - 64
    - 64
    numericalType: INTEGER
    parameterName: hidden
    scalingType: LOG
    type: NUMERICAL_LIST
  - maxValue: 0.2
    minValue: 0.0
    parameterName: dropout
    scalingType: LINEAR
    type: DOUBLE
  - feasiblePoints:
    - leaky_relu
    - relu
    - elu
    - tanh
    parameterName: act
    type: CATEGORICAL
 trainer:
  hp_space:
  - maxValue: 150
    minValue: 50
    parameterName: max_epoch
    scalingType: LINEAR
    type: INTEGER
  - maxValue: 40
    minValue: 25
    parameterName: early_stopping_round
    scalingType: LINEAR
    type: INTEGER
  - maxValue: 0.05
    minValue: 0.005
    parameterName: lr
    scalingType: LOG
    type: DOUBLE
  - maxValue: 1.0E-7
    minValue: 1.0E-10
    parameterName: weight_decay
    scalingType: LOG
    type: DOUBLE
--- a/configs/lp_gcn_benchmark.yml
+++ b/configs/lp_gcn_benchmark.yml
@@ -0,0 +1,61 @@
 ensemble:
  name: null
 feature:
 - name: PYGNormalizeFeatures
 hpo:
  max_evals: 10
  name: random
 models:
 - hp_space:
  - feasiblePoints: 2,3
    parameterName: num_layers
    type: DISCRETE
  - cutFunc: lambda x:x[0] - 1
    cutPara:
    - num_layers
    length: 2
    maxValue:
    - 256
    - 256
    minValue:
    - 64
    - 64
    numericalType: INTEGER
    parameterName: hidden
    scalingType: LOG
    type: NUMERICAL_LIST
  - maxValue: 0.2
    minValue: 0.0
    parameterName: dropout
    scalingType: LINEAR
    type: DOUBLE
  - feasiblePoints:
    - leaky_relu
    - relu
    - elu
    - tanh
    parameterName: act
    type: CATEGORICAL
  name: gcn
 trainer:
  hp_space:
  - maxValue: 150
    minValue: 50
    parameterName: max_epoch
    scalingType: LINEAR
    type: INTEGER
  - maxValue: 40
    minValue: 25
    parameterName: early_stopping_round
    scalingType: LINEAR
    type: INTEGER
  - maxValue: 0.05
    minValue: 0.005
    parameterName: lr
    scalingType: LOG
    type: DOUBLE
  - maxValue: 1.0E-7
    minValue: 1.0E-10
    parameterName: weight_decay
    scalingType: LOG
    type: DOUBLE
--- a/configs/lp_sage_benchmark.yml
+++ b/configs/lp_sage_benchmark.yml
@@ -0,0 +1,69 @@
 ensemble:
  name: null
 feature:
 - name: PYGNormalizeFeatures
 hpo:
  max_evals: 10
  name: random
 models:
 - name: sage
  hp_space:
  - parameterName: num_layers
    type: DISCRETE
    feasiblePoints: 2,3
  
  - parameterName: hidden
    type: NUMERICAL_LIST
    scalingType: LOG
    numericalType: INTEGER
    cutFunc: lambda x:x[0] - 1
    cutPara:
    - num_layers
    length: 2
    maxValue:
    - 256
    - 256
    minValue:
    - 64
    - 64
  
  - parameterName: dropout
    type: DOUBLE
    scalingType: LINEAR
    maxValue: 0.2
    minValue: 0.0
    
  - parameterName: act
    type: CATEGORICAL
    feasiblePoints:
    - leaky_relu
    - relu
    - elu
    - tanh
    
  - parameterName: agg
    type: CATEGORICAL
    feasiblePoints: ["mean", "add", "max"]    

 trainer:
  hp_space:
  - maxValue: 150
    minValue: 50
    parameterName: max_epoch
    scalingType: LINEAR
    type: INTEGER
  - maxValue: 40
    minValue: 25
    parameterName: early_stopping_round
    scalingType: LINEAR
    type: INTEGER
  - maxValue: 0.05
    minValue: 0.005
    parameterName: lr
    scalingType: LOG
    type: DOUBLE
  - maxValue: 1.0E-7
    minValue: 1.0E-10
    parameterName: weight_decay
    scalingType: LOG
    type: DOUBLE
--- a/configs/nodeclf_gat_benchmark_large.yml
+++ b/configs/nodeclf_gat_benchmark_large.yml
@@ -39,7 +39,7 @@ models:
    - tanh
    parameterName: act
    type: CATEGORICAL
  name: gcn
  name: gat
 trainer:
  hp_space:
  - maxValue: 400
--- a/examples/graph_cv.py
+++ b/examples/graph_cv.py
@@ -27,7 +27,7 @@ if __name__ == "__main__":
        choices=["mutag", "imdb-b", "imdb-m", "proteins", "collab"],
    )
    parser.add_argument(
        "--configs", default="../configs/graph_classification.yaml", help="config files"
        "--configs", default="../configs/graphclf_full.yml", help="config files"
    )
    parser.add_argument("--device", type=int, default=0, help="device to run on")
    parser.add_argument("--seed", type=int, default=0, help="random seed")
--- a/examples/link_prediction.py
+++ b/examples/link_prediction.py
@@ -0,0 +1,93 @@
 import sys

 sys.path.append("../")
 from autogl.datasets import build_dataset_from_name
 from autogl.solver.classifier.link_predictor import AutoLinkPredictor
 from autogl.module.train.evaluation import Auc
 import yaml
 import random
 import torch
 import numpy as np

 if __name__ == "__main__":

    from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter

    parser = ArgumentParser(
        "auto link prediction", formatter_class=ArgumentDefaultsHelpFormatter
    )
    parser.add_argument(
        "--dataset",
        default="cora",
        type=str,
        help="dataset to use",
        choices=[
            "cora",
            "pubmed",
            "citeseer",
            "coauthor_cs",
            "coauthor_physics",
            "amazon_computers",
            "amazon_photo",
        ],
    )
    parser.add_argument(
        "--configs",
        type=str,
        default="../configs/lp_gcn_benchmark.yml",
        help="config to use",
    )
    # following arguments will override parameters in the config file
    parser.add_argument("--hpo", type=str, default="tpe", help="hpo methods")
    parser.add_argument(
        "--max_eval", type=int, default=50, help="max hpo evaluation times"
    )
    parser.add_argument("--seed", type=int, default=0, help="random seed")
    parser.add_argument("--device", default=0, type=int, help="GPU device")

    args = parser.parse_args()
    if torch.cuda.is_available():
        torch.cuda.set_device(args.device)
    seed = args.seed
    # set random seed
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    dataset = build_dataset_from_name(args.dataset)

    configs = yaml.load(open(args.configs, "r").read(), Loader=yaml.FullLoader)
    configs["hpo"]["name"] = args.hpo
    configs["hpo"]["max_evals"] = args.max_eval
    autoClassifier = AutoLinkPredictor.from_config(configs)

    # train
    autoClassifier.fit(
        dataset,
        time_limit=3600,
        evaluation_method=[Auc],
        seed=seed,
        train_split=0.85,
        val_split=0.05,
    )
    autoClassifier.get_leaderboard().show()

    # test
    predict_result = autoClassifier.predict_proba()

    pos_edge_index, neg_edge_index = (
        dataset[0].test_pos_edge_index,
        dataset[0].test_neg_edge_index,
    )
    E = pos_edge_index.size(1) + neg_edge_index.size(1)
    link_labels = torch.zeros(E)
    link_labels[: pos_edge_index.size(1)] = 1.0

    print(
        "test auc: %.4f"
        % (Auc.evaluate(predict_result, link_labels.detach().cpu().numpy()))
    )