From ad4ef59294d2b9891da061b77b5af177c8574ea1 Mon Sep 17 00:00:00 2001
From: cluster32 <general502570@outlook.com>
Date: Thu, 8 Apr 2021 01:03:17 +0800
Subject: [PATCH 01/19] add progress bar

---
 autogl/module/hpo/advisorbase.py | 4 +++-
 autogl/module/hpo/autone.py      | 7 +++++--
 2 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/autogl/module/hpo/advisorbase.py b/autogl/module/hpo/advisorbase.py
index 6d9395a..2a63579 100644
--- a/autogl/module/hpo/advisorbase.py
+++ b/autogl/module/hpo/advisorbase.py
@@ -5,6 +5,7 @@ HPO Module for tuning hyper parameters
 import time
 import json
 import math
+from tqdm import trange
 from .suggestion.models import Study
 from .base import BaseHPOptimizer, TimeTooLimitedError
 from .suggestion.algorithm.random_search import RandomSearchAlgorithm
@@ -150,7 +151,8 @@ class AdvisorBaseHPOptimizer(BaseHPOptimizer):
         best_id = None
         best_trainer = None
 
-        for i in range(self.max_evals):
+        print("HPO Search Phase:\n")
+        for i in trange(self.max_evals):
             if time.time() - start_time > time_limit:
                 self.logger.info("Time out of limit, Epoch: {}".format(str(i)))
                 break
diff --git a/autogl/module/hpo/autone.py b/autogl/module/hpo/autone.py
index 30da0b9..f499eed 100644
--- a/autogl/module/hpo/autone.py
+++ b/autogl/module/hpo/autone.py
@@ -6,6 +6,7 @@ import time
 import json
 import math
 import numpy as np
+from tqdm import trange
 from . import register_hpo
 from .suggestion.models import Study
 from .base import BaseHPOptimizer, TimeTooLimitedError
@@ -115,7 +116,8 @@ class AutoNE(BaseHPOptimizer):
         K = utils.K(len(params.type_))
         gp = utils.GaussianProcessRegressor(K)
         sample_graphs = sample_subgraph(dataset)
-        for t in range(sampled_number):
+        print("Sample Phase:\n")
+        for t in trange(sampled_number):
             b_t = time.time()
             i = t
             subgraph = sample_graphs[t]
@@ -133,7 +135,8 @@ class AutoNE(BaseHPOptimizer):
         best_trainer = None
         best_para = None
         wne = get_wne(dataset)
-        for t in range(s):
+        print("HPO Search Phase:\n")
+        for t in trange(s):
             if time.time() - start_time > time_limit:
                 self.logger.info("Time out of limit, Epoch: {}".format(str(i)))
                 break

From afddf27745859fd707afa4fc78657e2f70079f8a Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sat, 10 Apr 2021 10:48:27 +0000
Subject: [PATCH 02/19] update paper

---
 README.md | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/README.md b/README.md
index 03bb4db..3485b42 100644
--- a/README.md
+++ b/README.md
@@ -8,6 +8,11 @@ Feel free to open <a href="https://github.com/THUMNLab/AutoGL/issues">issues</a>
 
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 [![Documentation Status](https://readthedocs.org/projects/autogl/badge/?version=latest)](https://autogl.readthedocs.io/en/latest/?badge=latest)
+
+## News!
+
+- 2021.04.10 Our paper [__AutoGL: A Library for Automated Graph Learning__](https://openreview.net/forum?id=0yHwpLeInDn) are accepted in _ICLR 2021 Workshop on Geometrical and Topological Representation Learning_! You can cite our paper following methods [here](#Cite).
+
 ## Introduction
 
 AutoGL is developed for researchers and developers to quickly conduct autoML on the graph datasets & tasks. See our documentation for detailed information!
@@ -104,6 +109,20 @@ make clean && make html
 
 The documentation will be automatically generated under `docs/_build/html`
 
+## Cite
+
+You can cite [our paper](https://openreview.net/forum?id=0yHwpLeInDn) as follows if you use this code in your own work:
+```
+@inproceedings{
+guan2021autogl,
+title={Auto{GL}: A Library for Automated Graph Learning},
+author={Chaoyu Guan and Ziwei Zhang and Haoyang Li and Heng Chang and Zeyang Zhang and Yijian Qin and Jiyan Jiang and Xin Wang and Wenwu Zhu},
+booktitle={ICLR 2021 Workshop on Geometrical and Topological Representation Learning},
+year={2021},
+url={https://openreview.net/forum?id=0yHwpLeInDn}
+}
+```
+
 ## License
 
 We follow [MIT license](LICENSE) across the entire codebase.

From 3d8e9d1847039ec2c5889b6a12f9fb49d44684af Mon Sep 17 00:00:00 2001
From: null <null>
Date: Tue, 13 Apr 2021 20:12:30 +0800
Subject: [PATCH 03/19] Implement partial GraphSAINT Methodolgy and refactor
 BaseModel

Reproduce partial implementation of GraphSAINT, a representative Subgraph-wise sampling method.
Add experimental BaseModel for future major version.
TODO: Migrate the base class for all the concrete models to ClassificationModel (ClassificationApproach)
---
 autogl/module/model/base.py                   | 288 ++++++++++-
 autogl/module/model/gcn.py                    | 215 ++++----
 autogl/module/model/graph_sage.py             |   2 +-
 autogl/module/train/base.py                   |   2 +-
 .../module/train/graph_classification_full.py |   2 +-
 .../module/train/node_classification_full.py  |   2 +-
 .../node_classification_sampled_trainer.py    | 474 +++++++++++++++---
 .../sampling/sampler/graphsaint_sampler.py    | 105 ++++
 8 files changed, 900 insertions(+), 190 deletions(-)
 create mode 100644 autogl/module/train/sampling/sampler/graphsaint_sampler.py

diff --git a/autogl/module/model/base.py b/autogl/module/model/base.py
index 6e6d8c7..33cc5a9 100644
--- a/autogl/module/model/base.py
+++ b/autogl/module/model/base.py
@@ -3,10 +3,13 @@ auto graph model
 a list of models with their hyper parameters
 NOTE: neural architecture search (NAS) maybe included here
 """
-
+import copy
+import logging
+import typing as _typing
 import torch
 import torch.nn.functional as F
 from copy import deepcopy
+base_approach_logger: logging.Logger = logging.getLogger("BaseModel")
 
 
 def activate_func(x, func):
@@ -22,7 +25,7 @@ def activate_func(x, func):
     return x
 
 
-class BaseModel(torch.nn.Module):
+class BaseModel:
     def __init__(self, init=False, *args, **kwargs):
         super(BaseModel, self).__init__()
 
@@ -46,7 +49,9 @@ class BaseModel(torch.nn.Module):
     def to(self, device):
         if isinstance(device, (str, torch.device)):
             self.device = device
-        return super().to(device)
+        if hasattr(self, "model") and self.model is not None and isinstance(self.model, torch.nn.Module):
+            self.model.to(self.device)
+        return self
 
     def from_hyper_parameter(self, hp):
         ret_self = self.__class__(
@@ -80,3 +85,280 @@ class BaseModel(torch.nn.Module):
         ), "Cannot set graph features for tasks other than graph classification"
         self.num_graph_features = num_graph_features
         self.params["num_graph_features"] = num_graph_features
+
+
+class _BaseBaseModel:
+    # todo: after renaming the experimental base class _BaseModel to BaseModel,
+    #       rename this class to _BaseModel
+    """
+    The base class for class BaseModel,
+    designed to implement some basic functionality of BaseModel.
+    --  Designed by ZiXin Sun
+    """
+    @classmethod
+    def __formulate_device(
+            cls, device: _typing.Union[str, torch.device] = ...
+    ) -> torch.device:
+        if (
+                type(device) == torch.device or
+                (type(device) == str and device.strip().lower() != "auto")
+        ):
+            return torch.device(device)
+        elif torch.cuda.is_available() and torch.cuda.device_count() > 0:
+            return torch.device("cuda")
+        else:
+            return torch.device("cpu")
+    
+    @property
+    def device(self) -> torch.device:
+        return self.__device
+    
+    @device.setter
+    def device(self, __device: _typing.Union[str, torch.device, None]):
+        self.__device: torch.device = self.__formulate_device(__device)
+    
+    @property
+    def model(self) -> _typing.Optional[torch.nn.Module]:
+        if self._model is None:
+            base_approach_logger.debug(
+                "property of model NOT initialized before accessing"
+            )
+        return self._model
+    
+    @model.setter
+    def model(self, _model: torch.nn.Module) -> None:
+        if not isinstance(_model, torch.nn.Module):
+            raise TypeError(
+                "the property of model MUST be an instance of "
+                "torch.nn.Module"
+            )
+        self._model = _model
+    
+    def _initialize(self):
+        raise NotImplementedError
+    
+    def initialize(self) -> bool:
+        """
+        Initialize the model in case that the model has NOT been initialized
+        :return: whether self._initialize() method called
+        """
+        if not self.__is_initialized:
+            self._initialize()
+            self.__is_initialized = True
+            return True
+        return False
+    
+    # def to(self, *args, **kwargs):
+    #     """
+    #     Due to the signature of to() method in class BaseApproach
+    #     is inconsistent with the signature of the method
+    #     in the base class torch.nn.Module,
+    #     this intermediate overridden method is necessary to
+    #     walk around (bypass) the inspection for
+    #     signature of overriding method.
+    #     :param args: positional arguments list
+    #     :param kwargs: keyword arguments dict
+    #     :return: self
+    #     """
+    #     return super(_BaseBaseModel, self).to(*args, **kwargs)
+    
+    def forward(self, *args, **kwargs):
+        if self.model is not None and isinstance(self.model, torch.nn.Module):
+            return self.model(*args, **kwargs)
+        else:
+            raise NotImplementedError
+    
+    def __init__(
+            self, model: _typing.Optional[torch.nn.Module] = None,
+            initialize: bool = False,
+            device: _typing.Union[str, torch.device] = ...
+    ):
+        if type(initialize) != bool:
+            raise TypeError
+        super(_BaseBaseModel, self).__init__()
+        self.__device: torch.device = self.__formulate_device(device)
+        self._model: _typing.Optional[torch.nn.Module] = model
+        self.__is_initialized: bool = False
+        if initialize:
+            self.initialize()
+
+
+class _BaseModel(_BaseBaseModel, BaseModel):
+    """
+    The upcoming root base class for Model, i.e. BaseModel
+    --  Designed by ZiXin Sun
+    """
+    # todo: Deprecate and remove the legacy class "BaseModel",
+    #       then rename this class to "BaseModel",
+    #       correspondingly, this class will no longer extend
+    #       the legacy class "BaseModel" after the removal.
+    def _initialize(self):
+        raise NotImplementedError
+    
+    def to(self, device: torch.device):
+        self.device = device
+        if self.model is not None and isinstance(self.model, torch.nn.Module):
+            self.model.to(self.device)
+        return super().to(device)
+    
+    @property
+    def space(self) -> _typing.Sequence[_typing.Dict[str, _typing.Any]]:
+        # todo: deprecate and remove in future major version
+        return self.__hyper_parameter_space
+    
+    @property
+    def hyper_parameter_space(self):
+        return self.__hyper_parameter_space
+    
+    @hyper_parameter_space.setter
+    def hyper_parameter_space(
+            self, space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
+    ):
+        self.__hyper_parameter_space = space
+    
+    @property
+    def hyper_parameter(self) -> _typing.Dict[str, _typing.Any]:
+        return self.__hyper_parameter
+    
+    @hyper_parameter.setter
+    def hyper_parameter(self, _hyper_parameter: _typing.Dict[str, _typing.Any]):
+        if not isinstance(_hyper_parameter, dict):
+            raise TypeError
+        self.__hyper_parameter = _hyper_parameter
+    
+    def get_hyper_parameter(self) -> _typing.Dict[str, _typing.Any]:
+        """
+        todo: consider deprecating this trivial getter method in the future
+        :return: copied hyper parameter
+        """
+        return copy.deepcopy(self.__hyper_parameter)
+    
+    def __init__(
+            self, model: _typing.Optional[torch.nn.Module] = None,
+            initialize: bool = False,
+            hyper_parameter_space: _typing.Sequence[_typing.Any] = ...,
+            hyper_parameter: _typing.Dict[str, _typing.Any] = ...,
+            device: _typing.Union[str, torch.device] = ...
+    ):
+        if type(initialize) != bool:
+            raise TypeError
+        super(_BaseModel, self).__init__(model, initialize, device)
+        if (
+                hyper_parameter_space != Ellipsis and
+                isinstance(hyper_parameter_space, _typing.Sequence)
+        ):
+            self.__hyper_parameter_space: _typing.Sequence[
+                _typing.Dict[str, _typing.Any]
+            ] = hyper_parameter_space
+        else:
+            self.__hyper_parameter_space: _typing.Sequence[
+                _typing.Dict[str, _typing.Any]
+            ] = []
+        if hyper_parameter != Ellipsis and isinstance(hyper_parameter, dict):
+            self.__hyper_parameter: _typing.Dict[str, _typing.Any] = hyper_parameter
+        else:
+            self.__hyper_parameter: _typing.Dict[str, _typing.Any] = {}
+
+    def from_hyper_parameter(self, hyper_parameter: _typing.Dict[str, _typing.Any]):
+        raise NotImplementedError
+
+
+class ClassificationModel(_BaseModel):
+    def _initialize(self):
+        raise NotImplementedError
+    
+    def from_hyper_parameter(
+            self, hyper_parameter: _typing.Dict[str, _typing.Any]
+    ) -> "ClassificationModel":
+        new_model: ClassificationModel = self.__class__(
+            num_features=self.num_features,
+            num_classes=self.num_classes,
+            device=self.device,
+            init=False
+        )
+        _hyper_parameter = self.hyper_parameter
+        _hyper_parameter.update(hyper_parameter)
+        new_model.hyper_parameter = _hyper_parameter
+        new_model.initialize()
+        return new_model
+    
+    def __init__(
+            self, num_features: int = ..., num_classes: int = ...,
+            num_graph_features: int = ...,
+            device: _typing.Union[str, torch.device] = ...,
+            init: bool = False, **kwargs
+    ):
+        if "initialize" in kwargs:
+            del kwargs["initialize"]
+        super(ClassificationModel, self).__init__(
+            initialize=init, device=device, **kwargs
+        )
+        if num_classes != Ellipsis and type(num_classes) == int:
+            self.__num_classes: int = num_classes if num_classes > 0 else 0
+        else:
+            self.__num_classes: int = 0
+        if num_features != Ellipsis and type(num_features) == int:
+            self.__num_features: int = num_features if num_features > 0 else 0
+        else:
+            self.__num_features: int = 0
+        if num_graph_features != Ellipsis and type(num_graph_features) == int:
+            if num_graph_features > 0:
+                self.__num_graph_features: int = num_graph_features
+            else:
+                self.__num_graph_features: int = 0
+        else:
+            self.__num_graph_features: int = 0
+    
+    @property
+    def num_classes(self) -> int:
+        return self.__num_classes
+    
+    @num_classes.setter
+    def num_classes(self, __num_classes: int):
+        if type(__num_classes) != int:
+            raise TypeError
+        if not __num_classes > 0:
+            raise ValueError
+        self.__num_classes = __num_classes if __num_classes > 0 else 0
+    
+    @property
+    def num_features(self) -> int:
+        return self.__num_features
+    
+    @num_features.setter
+    def num_features(self, __num_features: int):
+        if type(__num_features) != int:
+            raise TypeError
+        if not __num_features > 0:
+            raise ValueError
+        self.__num_features = __num_features if __num_features > 0 else 0
+    
+    def get_num_classes(self) -> int:
+        # todo: consider replacing with property with getter and setter
+        return self.__num_classes
+    
+    def set_num_classes(self, num_classes: int) -> None:
+        # todo: consider replacing with property with getter and setter
+        if type(num_classes) != int:
+            raise TypeError
+        self.__num_classes = num_classes if num_classes > 0 else 0
+    
+    def get_num_features(self) -> int:
+        # todo: consider replacing with property with getter and setter
+        return self.__num_features
+    
+    def set_num_features(self, num_features: int):
+        # todo: consider replacing with property with getter and setter
+        if type(num_features) != int:
+            raise TypeError
+        self.__num_features = num_features if num_features > 0 else 0
+    
+    def set_num_graph_features(self, num_graph_features: int):
+        # todo: consider replacing with property with getter and setter
+        if type(num_graph_features) != int:
+            raise TypeError
+        else:
+            if num_graph_features > 0:
+                self.__num_graph_features = num_graph_features
+            else:
+                self.__num_graph_features = 0
diff --git a/autogl/module/model/gcn.py b/autogl/module/model/gcn.py
index 3e6208f..4fa3594 100644
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -1,73 +1,94 @@
 import torch
-import torch.nn.functional as F
-from torch_geometric.nn import GCNConv
+import torch.nn.functional
+import torch_geometric
+import typing as _typing
 from . import register_model
-from .base import BaseModel, activate_func
+from .base import activate_func, ClassificationModel
 from ...utils import get_logger
 
 LOGGER = get_logger("GCNModel")
 
 
-def set_default(args, d):
-    for k, v in d.items():
-        if k not in args:
-            args[k] = v
-    return args
-
-
 class GCN(torch.nn.Module):
-    def __init__(self, args):
-        super(GCN, self).__init__()
-        self.args = args
-        self.num_layer = int(self.args["num_layers"])
-
-        missing_keys = list(
-            set(["features_num", "num_class", "num_layers", "hidden", "dropout", "act"])
-            - set(self.args.keys())
-        )
-        if len(missing_keys) > 0:
-            raise Exception("Missing keys: %s." % ",".join(missing_keys))
-
-        if not self.num_layer == len(self.args["hidden"]) + 1:
-            LOGGER.warn("Warning: layer size does not match the length of hidden units")
-
-        self.convs = torch.nn.ModuleList()
-        self.convs.append(GCNConv(self.args["features_num"], self.args["hidden"][0]))
-        for i in range(self.num_layer - 2):
-            self.convs.append(
-                GCNConv(self.args["hidden"][i], self.args["hidden"][i + 1])
+    def __init__(
+            self, num_features: int, num_classes: int,
+            hidden_features: _typing.Sequence[int],
+            dropout: float, activation_name: str
+    ):
+        super().__init__()
+        self.__convolution_layers: torch.nn.ModuleList = torch.nn.ModuleList()
+        num_layers: int = len(hidden_features) + 1
+        if num_layers == 1:
+            self.__convolution_layers.append(
+                torch_geometric.nn.GCNConv(
+                    num_features, num_classes, add_self_loops=False
+                )
             )
-        self.convs.append(
-            GCNConv(self.args["hidden"][self.num_layer - 2], self.args["num_class"])
-        )
-
-    def forward(self, data):
-        try:
-            x = data.x
-        except:
-            print("no x")
-            pass
-        try:
-            edge_index = data.edge_index
-        except:
-            print("no index")
-            pass
-        try:
-            edge_weight = data.edge_weight
-        except:
-            edge_weight = None
-            pass
-
-        for i in range(self.num_layer):
-            x = self.convs[i](x, edge_index, edge_weight)
-            if i != self.num_layer - 1:
-                x = activate_func(x, self.args["act"])
-                x = F.dropout(x, p=self.args["dropout"], training=self.training)
-        return F.log_softmax(x, dim=1)
+        else:
+            self.__convolution_layers.append(torch_geometric.nn.GCNConv(
+                num_features, hidden_features[0], add_self_loops=False
+            ))
+            for i in range(len(hidden_features)):
+                self.__convolution_layers.append(
+                    torch_geometric.nn.GCNConv(
+                        hidden_features[i], hidden_features[i + 1]
+                    ) if i + 1 < len(hidden_features)
+                    else torch_geometric.nn.GCNConv(
+                        hidden_features[i], num_classes
+                    )
+                )
+        self.__dropout: float = dropout
+        self.__activation_name: str = activation_name
+    
+    def __layer_wise_forward(self, data):
+        # todo: Implement this forward method
+        #         in case that data.edge_indexes property is provided
+        #         for Layer-wise and Node-wise sampled training
+        raise NotImplementedError
+    
+    def __basic_forward(
+            self, x: torch.Tensor,
+            edge_index: torch.Tensor,
+            edge_weight: _typing.Optional[torch.Tensor] = None
+    ) -> torch.Tensor:
+        for layer_index in range(len(self.__convolution_layers)):
+            x: torch.Tensor = self.__convolution_layers[layer_index](
+                x, edge_index, edge_weight
+            )
+            if layer_index + 1 < len(self.__convolution_layers):
+                x = activate_func(x, self.__activation_name)
+                x = torch.nn.functional.dropout(x, p=self.__dropout, training=self.training)
+        return torch.nn.functional.log_softmax(x, dim=1)
+    
+    def forward(self, data) -> torch.Tensor:
+        if (
+                hasattr(data, "edge_indexes") and
+                getattr(data, "edge_indexes") is not None
+        ):
+            return self.__layer_wise_forward(data)
+        else:
+            if not (hasattr(data, "x") and hasattr(data, "edge_index")):
+                raise AttributeError
+            if not (
+                    type(getattr(data, "x")) == torch.Tensor and
+                    type(getattr(data, "edge_index")) == torch.Tensor
+            ):
+                raise TypeError
+            x: torch.Tensor = getattr(data, "x")
+            edge_index: torch.LongTensor = getattr(data, "edge_index")
+            if (
+                    hasattr(data, "edge_weight") and
+                    type(getattr(data, "edge_weight")) == torch.Tensor and
+                    getattr(data, "edge_weight").size() == (edge_index.size(1),)
+            ):
+                edge_weight: _typing.Optional[torch.Tensor] = getattr(data, "edge_weight")
+            else:
+                edge_weight: _typing.Optional[torch.Tensor] = None
+            return self.__basic_forward(x, edge_index, edge_weight)
 
 
 @register_model("gcn")
-class AutoGCN(BaseModel):
+class AutoGCN(ClassificationModel):
     r"""
     AutoGCN.
     The model used in this automodel is GCN, i.e., the graph convolutional network from the
@@ -99,66 +120,18 @@ class AutoGCN(BaseModel):
     """
 
     def __init__(
-        self, num_features=None, num_classes=None, device=None, init=False, **args
-    ):
-
-        super(AutoGCN, self).__init__()
-
-        self.num_features = num_features if num_features is not None else 0
-        self.num_classes = int(num_classes) if num_classes is not None else 0
-        self.device = device if device is not None else "cpu"
-        self.init = True
-
-        self.params = {
-            "features_num": self.num_features,
-            "num_class": self.num_classes,
-        }
-        self.space = [
-            {
-                "parameterName": "num_layers",
-                "type": "DISCRETE",
-                "feasiblePoints": "2,3,4",
-            },
-            {
-                "parameterName": "hidden",
-                "type": "NUMERICAL_LIST",
-                "numericalType": "INTEGER",
-                "length": 3,
-                "minValue": [8, 8, 8],
-                "maxValue": [128, 128, 128],
-                "scalingType": "LOG",
-                "cutPara": ("num_layers",),
-                "cutFunc": lambda x: x[0] - 1,
-            },
-            {
-                "parameterName": "dropout",
-                "type": "DOUBLE",
-                "maxValue": 0.8,
-                "minValue": 0.2,
-                "scalingType": "LINEAR",
-            },
-            {
-                "parameterName": "act",
-                "type": "CATEGORICAL",
-                "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
-            },
-        ]
-
-        # initial point of hp search
-        self.hyperparams = {
-            "num_layers": 2,
-            "hidden": [16],
-            "dropout": 0.2,
-            "act": "leaky_relu",
-        }
-
-        self.initialized = False
-        if init is True:
-            self.initialize()
-
-    def initialize(self):
-        # """Initialize model."""
-        if self.initialized:
-            return
-        self.initialized = True
-        self.model = GCN({**self.params, **self.hyperparams}).to(self.device)
+            self, num_features: int = ..., num_classes: int = ...,
+            device: _typing.Union[str, torch.device] = ...,
+            init: bool = False, **kwargs
+    ) -> None:
+        super(AutoGCN, self).__init__(
+            num_features, num_classes, device=device, init=init, **kwargs
+        )
+    
+    def _initialize(self):
+        self.model = GCN(
+            self.num_features, self.num_classes,
+            self.hyper_parameter.get("hidden"),
+            self.hyper_parameter.get("dropout"),
+            self.hyper_parameter.get("act")
+        ).to(self.device)
diff --git a/autogl/module/model/graph_sage.py b/autogl/module/model/graph_sage.py
index 90ee515..fbe8f6e 100644
--- a/autogl/module/model/graph_sage.py
+++ b/autogl/module/model/graph_sage.py
@@ -120,5 +120,5 @@ class AutoSAGE(BaseModel):
                 hidden_features=self.hyperparams["hidden"],
                 activation_name=self.hyperparams["act"],
                 **self.hyperparams
-            )
+            ).to(self.__device)
             self._initialized = True
diff --git a/autogl/module/train/base.py b/autogl/module/train/base.py
index e022631..7ec47f1 100644
--- a/autogl/module/train/base.py
+++ b/autogl/module/train/base.py
@@ -212,7 +212,7 @@ class BaseTrainer:
         pass
 
     def duplicate_from_hyper_parameter(
-            self, hp, model: _typing.Union[BaseModel, str, None] = None
+            self, hp, model: _typing.Optional[BaseModel] = ...
     ) -> "BaseTrainer":
         """Create a new trainer with the given hyper parameter."""
         raise NotImplementedError()
diff --git a/autogl/module/train/graph_classification_full.py b/autogl/module/train/graph_classification_full.py
index eed5feb..a04bf18 100644
--- a/autogl/module/train/graph_classification_full.py
+++ b/autogl/module/train/graph_classification_full.py
@@ -211,7 +211,7 @@ class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
 
         """
         optimizer = self.optimizer(
-            self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
+            self.model.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
         )
 
         # scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
diff --git a/autogl/module/train/node_classification_full.py b/autogl/module/train/node_classification_full.py
index 73a7e06..e5f4937 100644
--- a/autogl/module/train/node_classification_full.py
+++ b/autogl/module/train/node_classification_full.py
@@ -196,7 +196,7 @@ class NodeClassificationFullTrainer(BaseNodeClassificationTrainer):
         data = data.to(self.device)
         mask = data.train_mask if train_mask is None else train_mask
         optimizer = self.optimizer(
-            self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
+            self.model.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
         )
         # scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
         lr_scheduler_type = self.lr_scheduler_type
diff --git a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
index 0c3e671..542b03a 100644
--- a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
+++ b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
@@ -1,12 +1,14 @@
 import torch
 import logging
 import typing as _typing
-from torch.nn import functional as F
+import torch.nn.functional
+import torch.utils.data
 
 from .. import register_trainer
 from ..base import BaseNodeClassificationTrainer, EarlyStopping, Evaluation
 from ..evaluation import get_feval, Logloss
 from ..sampling.sampler.neighbor_sampler import NeighborSampler
+from ..sampling.sampler.graphsaint_sampler import *
 from ...model import BaseModel
 
 LOGGER: logging.Logger = logging.getLogger("Node classification sampling trainer")
@@ -71,45 +73,9 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         
         self._valid_result: torch.Tensor = torch.zeros(0)
         self._valid_result_prob: torch.Tensor = torch.zeros(0)
-        self._valid_score = None
+        self._valid_score: _typing.Sequence[float] = []
         
-        self._hyper_parameter_space: _typing.List[_typing.Dict[str, _typing.Any]] = [
-            {
-                "parameterName": "max_epoch",
-                "type": "INTEGER",
-                "maxValue": 500,
-                "minValue": 10,
-                "scalingType": "LINEAR",
-            },
-            {
-                "parameterName": "early_stopping_round",
-                "type": "INTEGER",
-                "maxValue": 30,
-                "minValue": 10,
-                "scalingType": "LINEAR",
-            },
-            {
-                "parameterName": "lr",
-                "type": "DOUBLE",
-                "maxValue": 1e-1,
-                "minValue": 1e-4,
-                "scalingType": "LOG",
-            },
-            {
-                "parameterName": "weight_decay",
-                "type": "DOUBLE",
-                "maxValue": 1e-2,
-                "minValue": 1e-4,
-                "scalingType": "LOG",
-            }
-        ]
-        
-        self._hyper_parameter: _typing.Dict[str, _typing.Any] = {
-            "max_epoch": self._max_epoch,
-            "early_stopping_round": self._early_stopping.patience,
-            "lr": self._learning_rate,
-            "weight_decay": self._weight_decay
-        }
+        self._hyper_parameter_space: _typing.Sequence[_typing.Dict[str, _typing.Any]] = []
         
         self.__initialized: bool = False
         if init:
@@ -118,12 +84,12 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
     def initialize(self) -> "NodeClassificationNeighborSamplingTrainer":
         if self.__initialized:
             return self
-        self._model.initialize()
+        self.model.initialize()
         self.__initialized = True
         return self
     
     def get_model(self) -> BaseModel:
-        return self._model
+        return self.model
     
     def __train_only(
             self, data
@@ -135,7 +101,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         """
         data = data.to(self.device)
         optimizer: torch.optim.Optimizer = self._optimizer_class(
-            self._model.parameters(),
+            self.model.model.parameters(),
             lr=self._learning_rate, weight_decay=self._weight_decay
         )
         if type(self._lr_scheduler_type) == str:
@@ -169,17 +135,17 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         )
         
         for current_epoch in range(self._max_epoch):
-            self._model.model.train()
+            self.model.model.train()
             """ epoch start """
             for target_node_indexes, edge_indexes in train_sampler:
                 optimizer.zero_grad()
                 data.edge_indexes = edge_indexes
-                prediction = self._model.model(data)
-                if not hasattr(F, self.loss):
+                prediction = self.model.model(data)
+                if not hasattr(torch.nn.functional, self.loss):
                     raise TypeError(
                         "PyTorch does not support loss type {}".format(self.loss)
                     )
-                loss_function = getattr(F, self.loss)
+                loss_function = getattr(torch.nn.functional, self.loss)
                 loss: torch.Tensor = loss_function(
                     prediction[target_node_indexes],
                     data.y[target_node_indexes]
@@ -199,12 +165,12 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
                     validation_loss: float = -validation_results[0]
                 else:
                     validation_loss: float = validation_results[0]
-                self._early_stopping(validation_loss, self._model.model)
+                self._early_stopping(validation_loss, self.model.model)
                 if self._early_stopping.early_stop:
                     LOGGER.debug("Early stopping at %d", current_epoch)
                     break
         if hasattr(data, "val_mask") and data.val_mask is not None:
-            self._early_stopping.load_checkpoint(self._model.model)
+            self._early_stopping.load_checkpoint(self.model.model)
         return self
     
     def __predict_only(self, data):
@@ -214,9 +180,9 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         :return: the result of prediction on the given dataset
         """
         data = data.to(self.device)
-        self._model.model.eval()
+        self.model.model.eval()
         with torch.no_grad():
-            prediction = self._model.model(data)
+            prediction = self.model.model(data)
         return prediction
     
     def train(self, dataset, keep_valid_result: bool = True):
@@ -282,7 +248,6 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             )
         
     def get_name_with_hp(self) -> str:
-        # """Get the name of hyperparameter."""
         name = "-".join(
             [
                 str(self._optimizer_class),
@@ -294,14 +259,14 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             ]
         )
         name = (
-            name
-            + "|"
-            + "-".join(
-                [
-                    str(x[0]) + "-" + str(x[1])
-                    for x in self.model.get_hyper_parameter().items()
-                ]
-            )
+                name
+                + "|"
+                + "-".join(
+                    [
+                        str(x[0]) + "-" + str(x[1])
+                        for x in self.model.get_hyper_parameter().items()
+                    ]
+                )
         )
         return name
     
@@ -345,8 +310,8 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
     
     def to(self, device: torch.device):
         self.device = device
-        if self._model is not None:
-            self._model.to(self.device)
+        if self.model is not None:
+            self.model.to(self.device)
     
     def duplicate_from_hyper_parameter(
             self, hp: _typing.Dict[str, _typing.Any],
@@ -354,7 +319,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
     ) -> "NodeClassificationNeighborSamplingTrainer":
         
         if model is None or not isinstance(model, BaseModel):
-            model = self._model
+            model = self.model
         model = model.from_hyper_parameter(
             dict(
                 [
@@ -380,3 +345,388 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
     @hyper_parameter_space.setter
     def hyper_parameter_space(self, hp_space):
         self._hyper_parameter_space = hp_space
+
+
+@register_trainer("NodeClassificationGraphSAINTTrainer")
+class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
+    def __init__(
+            self,
+            model: _typing.Union[BaseModel],
+            num_features: int,
+            num_classes: int,
+            optimizer: _typing.Union[
+                _typing.Type[torch.optim.Optimizer], str, None
+            ],
+            lr: float = 1e-4,
+            max_epoch: int = 100,
+            early_stopping_round: int = 100,
+            weight_decay: float = 1e-4,
+            device: _typing.Optional[torch.device] = None,
+            init: bool = True,
+            feval: _typing.Union[
+                _typing.Sequence[str],
+                _typing.Sequence[_typing.Type[Evaluation]]
+            ] = (Logloss,),
+            loss: str = "nll_loss",
+            lr_scheduler_type: _typing.Optional[str] = None,
+            **kwargs
+    ) -> None:
+        if isinstance(optimizer, type) and issubclass(optimizer, torch.optim.Optimizer):
+            self._optimizer_class: _typing.Type[torch.optim.Optimizer] = optimizer
+        elif type(optimizer) == str:
+            if optimizer.lower() == "adam":
+                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.Adam
+            elif optimizer.lower() == "adam" + "w":
+                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.AdamW
+            elif optimizer.lower() == "sgd":
+                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.SGD
+            else:
+                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.Adam
+        else:
+            self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.Adam
+        self._learning_rate: float = lr if lr > 0 else 1e-4
+        self._lr_scheduler_type: _typing.Optional[str] = lr_scheduler_type
+        self._max_epoch: int = max_epoch if max_epoch > 0 else 1e2
+        self._weight_decay: float = weight_decay if weight_decay > 0 else 1e-4
+        early_stopping_round: int = early_stopping_round if early_stopping_round > 0 else 1e2
+        self._early_stopping = EarlyStopping(patience=early_stopping_round, verbose=False)
+        
+        # Assign an empty initial hyper parameter space
+        self._hyper_parameter_space: _typing.Sequence[_typing.Dict[str, _typing.Any]] = []
+        
+        self._valid_result: torch.Tensor = torch.zeros(0)
+        self._valid_result_prob: torch.Tensor = torch.zeros(0)
+        self._valid_score: _typing.Sequence[float] = ()
+
+        super(NodeClassificationGraphSAINTTrainer, self).__init__(
+            model, num_features, num_classes, device, init, feval, loss
+        )
+        
+        """ Set hyper parameters """
+        if "num_subgraphs" not in kwargs:
+            raise KeyError
+        elif type(kwargs.get("num_subgraphs")) != int:
+            raise TypeError
+        elif not kwargs.get("num_subgraphs") > 0:
+            raise ValueError
+        else:
+            self.__num_subgraphs: int = kwargs.get("num_subgraphs")
+        if "sampling_budget" not in kwargs:
+            raise KeyError
+        elif type(kwargs.get("sampling_budget")) != int:
+            raise TypeError
+        elif not kwargs.get("sampling_budget") > 0:
+            raise ValueError
+        else:
+            self.__sampling_budget: int = kwargs.get("sampling_budget")
+        if "sampling_method" not in kwargs:
+            self.__sampling_method_identifier: str = "node"
+        elif type(kwargs.get("sampling_method")) != str:
+            self.__sampling_method_identifier: str = "node"
+        else:
+            self.__sampling_method_identifier: str = kwargs.get("sampling_method")
+            if self.__sampling_method_identifier.lower() not in ("node", "edge"):
+                self.__sampling_method_identifier: str = "node"
+        
+        self.__is_initialized: bool = False
+        if init:
+            self.initialize()
+    
+    def initialize(self):
+        if self.__is_initialized:
+            return self
+        self.model.initialize()
+        self.__is_initialized = True
+        return self
+    
+    def to(self, device: torch.device):
+        self.device = device
+        if self.model is not None:
+            self.model.to(self.device)
+    
+    def get_model(self):
+        return self.model
+
+    def __train_only(self, data):
+        """
+        The function of training on the given dataset and mask.
+        :param data: data of a specific graph
+        :return: self
+        """
+        data = data.to(self.device)
+        optimizer: torch.optim.Optimizer = self._optimizer_class(
+            self.model.parameters(),
+            lr=self._learning_rate, weight_decay=self._weight_decay
+        )
+        if type(self._lr_scheduler_type) == str:
+            if self._lr_scheduler_type.lower() == "step" + "lr":
+                lr_scheduler: torch.optim.lr_scheduler.StepLR = \
+                    torch.optim.lr_scheduler.StepLR(
+                        optimizer, step_size=100, gamma=0.1
+                    )
+            elif self._lr_scheduler_type.lower() == "multi" + "step" + "lr":
+                lr_scheduler: torch.optim.lr_scheduler.MultiStepLR = \
+                    torch.optim.lr_scheduler.MultiStepLR(
+                        optimizer, milestones=[30, 80], gamma=0.1
+                    )
+            elif self._lr_scheduler_type.lower() == "exponential" + "lr":
+                lr_scheduler: torch.optim.lr_scheduler.ExponentialLR = \
+                    torch.optim.lr_scheduler.ExponentialLR(
+                        optimizer, gamma=0.1
+                    )
+            elif self._lr_scheduler_type.lower() == "ReduceLROnPlateau".lower():
+                lr_scheduler: torch.optim.lr_scheduler.ReduceLROnPlateau = \
+                    torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min")
+            else:
+                lr_scheduler: torch.optim.lr_scheduler.LambdaLR = \
+                    torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
+        else:
+            lr_scheduler: torch.optim.lr_scheduler.LambdaLR = \
+                torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
+        
+        if self.__sampling_method_identifier.lower() == "edge":
+            sub_graph_sampler = GraphSAINTRandomEdgeSampler(
+                self.__sampling_budget, self.__num_subgraphs
+            )
+        else:
+            sub_graph_sampler = GraphSAINTRandomNodeSampler(
+                self.__sampling_budget, self.__num_subgraphs
+            )
+        
+        for current_epoch in range(self._max_epoch):
+            self.model.model.train()
+            """ epoch start """
+            """ Sample sub-graphs """
+            sub_graph_set = sub_graph_sampler.sample(data)
+            sub_graphs_loader: torch.utils.data.DataLoader = \
+                torch.utils.data.DataLoader(sub_graph_set)
+            integral_alpha: torch.Tensor = getattr(sub_graph_set, "alpha")
+            integral_lambda: torch.Tensor = getattr(sub_graph_set, "lambda")
+            """ iterate sub-graphs """
+            for sub_graph_data in sub_graphs_loader:
+                optimizer.zero_grad()
+                sampled_edge_indexes: torch.Tensor = \
+                    sub_graph_data.sampled_edge_indexes
+                sampled_node_indexes: torch.Tensor = \
+                    sub_graph_data.sampled_node_indexes
+                sampled_train_mask: torch.Tensor = \
+                    sub_graph_data.train_mask
+                
+                sampled_alpha = integral_alpha[sampled_edge_indexes]
+                sub_graph_data.edge_weight = 1 / sampled_alpha
+
+                prediction: torch.Tensor = self.model.model(sub_graph_data)
+                
+                if not hasattr(torch.nn.functional, self.loss):
+                    raise TypeError(
+                        f"PyTorch does not support loss type {self.loss}"
+                    )
+                loss_func = getattr(torch.nn.functional, self.loss)
+                unreduced_loss: torch.Tensor = loss_func(
+                    prediction[sampled_train_mask],
+                    data.y[sampled_train_mask],
+                    reduction="none"
+                )
+                
+                sampled_lambda: torch.Tensor = integral_lambda[sampled_node_indexes]
+                sampled_train_lambda: torch.Tensor = sampled_lambda[sampled_train_mask]
+                assert unreduced_loss.size() == sampled_train_lambda.size()
+                loss_weighted_sum: torch.Tensor = \
+                    torch.sum(unreduced_loss / sampled_train_lambda)
+                loss_weighted_sum.backward()
+                optimizer.step()
+            
+            if lr_scheduler is not None:
+                lr_scheduler.step()
+            
+            """ Validate performance """
+            if (
+                    hasattr(data, "val_mask") and
+                    type(getattr(data, "val_mask")) == torch.Tensor
+            ):
+                validation_results: _typing.Sequence[float] = \
+                    self.evaluate((data,), "val", [self.feval[0]])
+                if self.feval[0].is_higher_better():
+                    validation_loss: float = -validation_results[0]
+                else:
+                    validation_loss: float = validation_results[0]
+                self._early_stopping(validation_loss, self.model.model)
+                if self._early_stopping.early_stop:
+                    LOGGER.debug("Early stopping at %d", current_epoch)
+                    break
+        if hasattr(data, "val_mask") and data.val_mask is not None:
+            self._early_stopping.load_checkpoint(self.model.model)
+        return self
+        
+    def __predict_only(self, data):
+        """
+        The function of predicting on the given data.
+        :param data: data of a specific graph
+        :return: the result of prediction on the given dataset
+        """
+        data = data.to(self.device)
+        self.model.model.eval()
+        with torch.no_grad():
+            predicted_x: torch.Tensor = self.model.model(data)
+        return predicted_x
+    
+    def predict_proba(
+            self, dataset, mask: _typing.Optional[str] = None,
+            in_log_format=False
+    ):
+        """
+        The function of predicting the probability on the given dataset.
+        :param dataset: The node classification dataset used to be predicted.
+        :param mask:
+        :param in_log_format:
+        :return:
+        """
+        data = dataset[0].to(self.device)
+        if mask is not None and type(mask) == str:
+            if mask.lower() == "train":
+                _mask: torch.Tensor = data.train_mask
+            elif mask.lower() == "test":
+                _mask: torch.Tensor = data.test_mask
+            elif mask.lower() == "val":
+                _mask: torch.Tensor = data.val_mask
+            else:
+                _mask: torch.Tensor = data.test_mask
+        else:
+            _mask: torch.Tensor = data.test_mask
+        result = self.__predict_only(data)[_mask]
+        return result if in_log_format else torch.exp(result)
+    
+    def predict(self, dataset, mask: _typing.Optional[str] = None) -> torch.Tensor:
+        return self.predict_proba(dataset, mask, in_log_format=True).max(1)[1]
+    
+    def evaluate(
+            self, dataset,
+            mask: _typing.Optional[str] = None,
+            feval: _typing.Union[
+                None, _typing.Sequence[str],
+                _typing.Sequence[_typing.Type[Evaluation]]
+            ] = None
+    ) -> _typing.Sequence[float]:
+        data = dataset[0]
+        data = data.to(self.device)
+        if feval is None:
+            _feval: _typing.Sequence[_typing.Type[Evaluation]] = self.feval
+        else:
+            _feval: _typing.Sequence[_typing.Type[Evaluation]] = get_feval(list(feval))
+        if mask is not None and type(mask) == str:
+            if mask.lower() == "train":
+                _mask: torch.Tensor = data.train_mask
+            elif mask.lower() == "test":
+                _mask: torch.Tensor = data.test_mask
+            elif mask.lower() == "val":
+                _mask: torch.Tensor = data.val_mask
+            else:
+                _mask: torch.Tensor = data.test_mask
+        else:
+            _mask: torch.Tensor = data.test_mask
+        prediction_probability: torch.Tensor = \
+            self.predict_proba(dataset, mask)
+        y_ground_truth: torch.Tensor = data.y[_mask]
+        
+        eval_results = []
+        for f in _feval:
+            try:
+                eval_results.append(
+                    f.evaluate(prediction_probability, y_ground_truth)
+                )
+            except:
+                eval_results.append(
+                    f.evaluate(
+                        prediction_probability.cpu().numpy(), y_ground_truth.cpu().numpy()
+                    )
+                )
+        return eval_results
+    
+    def train(self, dataset, keep_valid_result: bool = True):
+        """
+        The function of training on the given dataset and keeping valid result.
+        :param dataset:
+        :param keep_valid_result: Whether to save the validation result after training
+        """
+        data = dataset[0]
+        self.__train_only(data)
+        if keep_valid_result:
+            prediction: torch.Tensor = self.__predict_only(data)
+            self._valid_result: torch.Tensor = prediction[data.val_mask].max(1)[1]
+            self._valid_result_prob: torch.Tensor = prediction[data.val_mask]
+            self._valid_score: _typing.Sequence[float] = self.evaluate(dataset, "val")
+    
+    def get_valid_predict(self) -> torch.Tensor:
+        return self._valid_result
+    
+    def get_valid_predict_proba(self) -> torch.Tensor:
+        return self._valid_result_prob
+    
+    def get_valid_score(self, return_major: bool = True) -> _typing.Tuple[
+        _typing.Union[float, _typing.Sequence[float]],
+        _typing.Union[bool, _typing.Sequence[bool]]
+    ]:
+        if return_major:
+            return self._valid_score[0], self.feval[0].is_higher_better()
+        else:
+            return (
+                self._valid_score, [f.is_higher_better() for f in self.feval]
+            )
+    
+    @property
+    def hyper_parameter_space(self) -> _typing.Sequence[_typing.Dict[str, _typing.Any]]:
+        return self._hyper_parameter_space
+    
+    @hyper_parameter_space.setter
+    def hyper_parameter_space(
+            self, hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
+    ) -> None:
+        if not isinstance(hp_space, _typing.Sequence):
+            raise TypeError
+        self._hyper_parameter_space = hp_space
+    
+    def get_name_with_hp(self) -> str:
+        name = "-".join(
+            [
+                str(self._optimizer_class),
+                str(self._learning_rate),
+                str(self._max_epoch),
+                str(self._early_stopping.patience),
+                str(self.model),
+                str(self.device),
+            ]
+        )
+        name = (
+                name
+                + "|"
+                + "-".join(
+                    [
+                        str(x[0]) + "-" + str(x[1])
+                        for x in self.model.get_hyper_parameter().items()
+                    ]
+                )
+        )
+        return name
+    
+    def duplicate_from_hyper_parameter(
+            self, hp: _typing.Dict[str, _typing.Any],
+            model: _typing.Optional[BaseModel] = None
+    ) -> "NodeClassificationGraphSAINTTrainer":
+        if model is None or not isinstance(model, BaseModel):
+            model: BaseModel = self.model
+        model = model.from_hyper_parameter(
+            dict(
+                [
+                    x for x in hp.items()
+                    if x[0] in [y["parameterName"] for y in model.hyper_parameter_space]
+                ]
+            )
+        )
+        return NodeClassificationGraphSAINTTrainer(
+            model, self.num_features, self.num_classes,
+            self._optimizer_class,
+            device=self.device, init=True,
+            feval=self.feval, loss=self.loss,
+            lr_scheduler_type=self._lr_scheduler_type,
+            **hp
+        )
diff --git a/autogl/module/train/sampling/sampler/graphsaint_sampler.py b/autogl/module/train/sampling/sampler/graphsaint_sampler.py
new file mode 100644
index 0000000..64972fc
--- /dev/null
+++ b/autogl/module/train/sampling/sampler/graphsaint_sampler.py
@@ -0,0 +1,105 @@
+import copy
+import typing as _typing
+import torch.utils.data
+import torch_geometric
+
+
+class _SubGraphSet(torch.utils.data.Dataset[_typing.Any]):
+    def __init__(self, datalist: _typing.Sequence[_typing.Any], *args, **kwargs):
+        self.__graphs: _typing.Sequence[_typing.Any] = datalist
+        self.__remaining_args: _typing.Sequence[_typing.Any] = args
+        for key, value in kwargs.items():
+            setattr(self, key, value)
+    
+    def __len__(self) -> int:
+        return len(self.__graphs)
+    
+    def __getitem__(self, index: int) -> _typing.Any:
+        if not 0 <= index < len(self.__graphs):
+            raise IndexError
+        return self.__graphs[index]
+
+
+class _GraphSAINTSubGraphSampler:
+    def __init__(
+            self, sampler_class: _typing.Type[torch_geometric.data.GraphSAINTSampler],
+            budget: int, num_graphs: int = 1, walk_length: int = 1, num_workers: int = 0
+    ):
+        """
+        :param sampler_class: class of torch_geometric.data.GraphSAINTSampler
+        :param budget: general budget
+        :param num_graphs: number of sub-graphs to sample, i.e. N in the paper
+        :param walk_length: walk length for RandomWalk Sampler
+        :param num_workers: how many sub-processes to use for data loading.
+                            0 means that the data will be loaded in the main process.
+        """
+        self.__sampler_class: _typing.Type[
+            torch_geometric.data.GraphSAINTSampler
+        ] = sampler_class
+        self.__budget: int = budget
+        self.__num_graphs: int = num_graphs
+        self.__walk_length: int = walk_length
+        self.__num_workers: int = num_workers if num_workers > 0 else 0
+    
+    def sample(self, _integral_data) -> _SubGraphSet:
+        """
+        :param _integral_data: conventional data for an integral graph
+        :return: instance of _SubGraphSet
+        """
+        data = copy.copy(_integral_data)
+        data.sampled_node_indexes = torch.arange(data.num_nodes, dtype=torch.int64)
+        data.sampled_edge_indexes = torch.arange(data.num_edges, dtype=torch.int64)
+        if type(self.__sampler_class) == torch_geometric.data.GraphSAINTRandomWalkSampler:
+            _sampler: torch_geometric.data.GraphSAINTRandomWalkSampler = \
+                torch_geometric.data.GraphSAINTRandomWalkSampler(
+                    data, self.__budget, self.__walk_length, self.__num_graphs,
+                    num_workers=self.__num_workers
+                )
+        else:
+            _sampler: torch_geometric.data.GraphSAINTSampler = \
+                self.__sampler_class(
+                    data, self.__budget, self.__num_graphs,
+                    num_workers=self.__num_workers
+                )
+        """ Sample sub-graphs """
+        datalist: list = [d for d in _sampler]
+        """ Compute the normalization """
+        node_sampled_count = torch.zeros(data.num_nodes, dtype=torch.int64)
+        edge_sampled_count = torch.zeros(data.num_edges, dtype=torch.int64)
+        concatenated_sampled_nodes: torch.Tensor = torch.cat(
+            [sub_graph.sampled_node_indexes for sub_graph in datalist]
+        )
+        concatenated_sampled_edges: torch.Tensor = torch.cat(
+            [sub_graph.sampled_edge_indexes for sub_graph in datalist]
+        )
+        for current_sampled_node_index in concatenated_sampled_nodes.unique():
+            node_sampled_count[current_sampled_node_index] = \
+                torch.where(concatenated_sampled_nodes == current_sampled_node_index)[0].size(0)
+        for current_sampled_edge_index in concatenated_sampled_edges.unique():
+            edge_sampled_count[current_sampled_edge_index] = \
+                torch.where(concatenated_sampled_edges == current_sampled_edge_index)[0].size(0)
+        _alpha: torch.Tensor = edge_sampled_count / node_sampled_count[data.edge_index[1]]
+        _alpha[torch.isnan(_alpha) | torch.isinf(_alpha)] = 0
+        _lambda: torch.Tensor = node_sampled_count / self.__num_graphs
+        return _SubGraphSet(datalist, **{"alpha": _alpha, "lambda": _lambda})
+
+
+class GraphSAINTRandomNodeSampler(_GraphSAINTSubGraphSampler):
+    def __init__(self, node_budget: int, num_graphs: int = 1):
+        super(GraphSAINTRandomNodeSampler, self).__init__(
+            torch_geometric.data.GraphSAINTNodeSampler, node_budget, num_graphs
+        )
+
+
+class GraphSAINTRandomEdgeSampler(_GraphSAINTSubGraphSampler):
+    def __init__(self, edge_budget: int, num_graphs: int = 1):
+        super(GraphSAINTRandomEdgeSampler, self).__init__(
+            torch_geometric.data.GraphSAINTNodeSampler, edge_budget, num_graphs
+        )
+
+
+class GraphSAINTRandomWalkSampler(_GraphSAINTSubGraphSampler):
+    def __init__(self, edge_budget: int, num_graphs: int = 1, walk_length: int = 4):
+        super(GraphSAINTRandomWalkSampler, self).__init__(
+            torch_geometric.data.GraphSAINTRandomWalkSampler, edge_budget, num_graphs, walk_length
+        )

From 7106622ec1fc361cae62466eb177da73ad3c732c Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Fri, 16 Apr 2021 09:37:47 +0000
Subject: [PATCH 04/19] change url of paper

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 3485b42..dc0fe10 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ Feel free to open <a href="https://github.com/THUMNLab/AutoGL/issues">issues</a>
 
 ## News!
 
-- 2021.04.10 Our paper [__AutoGL: A Library for Automated Graph Learning__](https://openreview.net/forum?id=0yHwpLeInDn) are accepted in _ICLR 2021 Workshop on Geometrical and Topological Representation Learning_! You can cite our paper following methods [here](#Cite).
+- 2021.04.10 Our paper [__AutoGL: A Library for Automated Graph Learning__](https://arxiv.org/abs/2104.04987) are accepted in _ICLR 2021 Workshop on Geometrical and Topological Representation Learning_! You can cite our paper following methods [here](#Cite).
 
 ## Introduction
 
@@ -111,7 +111,7 @@ The documentation will be automatically generated under `docs/_build/html`
 
 ## Cite
 
-You can cite [our paper](https://openreview.net/forum?id=0yHwpLeInDn) as follows if you use this code in your own work:
+You can cite [our paper](https://arxiv.org/abs/2104.04987) as follows if you use this code in your own work:
 ```
 @inproceedings{
 guan2021autogl,

From 13106d41d18dd301adc27c1b8b11681cacb035c5 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Wed, 21 Apr 2021 02:08:47 +0000
Subject: [PATCH 05/19] add stratify support, fix bugs

---
 autogl/datasets/utils.py                                 | 9 ++++++---
 .../module/train/sampling/sampler/graphsaint_sampler.py  | 2 +-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/autogl/datasets/utils.py b/autogl/datasets/utils.py
index 4afe00a..adc3923 100644
--- a/autogl/datasets/utils.py
+++ b/autogl/datasets/utils.py
@@ -2,7 +2,7 @@ from pdb import set_trace
 import torch
 import numpy as np
 from torch_geometric.data import DataLoader
-from sklearn.model_selection import StratifiedKFold
+from sklearn.model_selection import StratifiedKFold, KFold
 
 
 def get_label_number(dataset):
@@ -179,7 +179,7 @@ def random_splits_mask_class(
     return dataset
 
 
-def graph_cross_validation(dataset, n_splits=10, shuffle=True, random_seed=42):
+def graph_cross_validation(dataset, n_splits=10, shuffle=True, random_seed=42, stratify=False):
     r"""Cross validation for graph classification data, returning one fold with specific idx in autogl.datasets or pyg.Dataloader(default)
 
     Parameters
@@ -196,7 +196,10 @@ def graph_cross_validation(dataset, n_splits=10, shuffle=True, random_seed=42):
     random_seed : int
         random_state for sklearn.model_selection.StratifiedKFold
     """
-    skf = StratifiedKFold(n_splits=n_splits, shuffle=shuffle, random_state=random_seed)
+    if stratify:
+        skf = StratifiedKFold(n_splits=n_splits, shuffle=shuffle, random_state=random_seed)
+    else:
+        skf = KFold(n_splits=n_splits, shuffle=shuffle, random_state=random_seed)
     idx_list = []
 
     # BUG: from pytorch_geometric, not sure whether it is a bug. The dataset.data will return
diff --git a/autogl/module/train/sampling/sampler/graphsaint_sampler.py b/autogl/module/train/sampling/sampler/graphsaint_sampler.py
index 64972fc..9c5c978 100644
--- a/autogl/module/train/sampling/sampler/graphsaint_sampler.py
+++ b/autogl/module/train/sampling/sampler/graphsaint_sampler.py
@@ -4,7 +4,7 @@ import torch.utils.data
 import torch_geometric
 
 
-class _SubGraphSet(torch.utils.data.Dataset[_typing.Any]):
+class _SubGraphSet(torch.utils.data.Dataset):
     def __init__(self, datalist: _typing.Sequence[_typing.Any], *args, **kwargs):
         self.__graphs: _typing.Sequence[_typing.Any] = datalist
         self.__remaining_args: _typing.Sequence[_typing.Any] = args

From 5b4c9189750c144e1a39618e7efaae89ffac7f82 Mon Sep 17 00:00:00 2001
From: lihy96 <lhy237451594@vip.qq.com>
Date: Thu, 22 Apr 2021 15:36:23 +0800
Subject: [PATCH 06/19] link prediction

---
 autogl/module/model/gcn.py             |  34 +-
 autogl/module/train/__init__.py        |   2 +
 autogl/module/train/evaluate.py        |   5 +-
 autogl/module/train/link_prediction.py | 534 +++++++++++++++++++++++++
 4 files changed, 570 insertions(+), 5 deletions(-)
 create mode 100644 autogl/module/train/link_prediction.py

diff --git a/autogl/module/model/gcn.py b/autogl/module/model/gcn.py
index 63e1bc4..44528a0 100644
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -62,6 +62,25 @@ class GCN(torch.nn.Module):
                 x = F.dropout(x, p=self.args["dropout"], training=self.training)
         return F.log_softmax(x, dim=1)
 
+    def encode(self, data):
+        x = data.x
+        for i in range(self.num_layer - 1):
+            x = self.convs[i](x, data.train_pos_edge_index)
+            if i != self.num_layer - 2:
+                x = activate_func(x, self.args["act"])
+                # x = F.dropout(x, p=self.args["dropout"], training=self.training)
+        return x
+
+    def decode(self, z, pos_edge_index, neg_edge_index):
+        edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
+        logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
+        return logits
+
+    def decode_all(self, z):
+        prob_adj = z @ z.t()
+        return (prob_adj > 0).nonzero(as_tuple=False).t()
+
+
 
 @register_model("gcn")
 class AutoGCN(BaseModel):
@@ -142,11 +161,18 @@ class AutoGCN(BaseModel):
         ]
 
         # initial point of hp search
+        # self.hyperparams = {
+        #     "num_layers": 2,
+        #     "hidden": [16],
+        #     "dropout": 0.2,
+        #     "act": "leaky_relu",
+        # }
+
         self.hyperparams = {
-            "num_layers": 2,
-            "hidden": [16],
-            "dropout": 0.2,
-            "act": "leaky_relu",
+            "num_layers": 3,
+            "hidden": [128, 64],
+            "dropout": 0,
+            "act": "relu",
         }
 
         self.initialized = False
diff --git a/autogl/module/train/__init__.py b/autogl/module/train/__init__.py
index 36fd434..8c090c2 100644
--- a/autogl/module/train/__init__.py
+++ b/autogl/module/train/__init__.py
@@ -48,12 +48,14 @@ def get_feval(feval):
 
 from .graph_classification import GraphClassificationTrainer
 from .node_classification import NodeClassificationTrainer
+from .link_prediction import LinkPredictionTrainer
 from .evaluate import Acc, Auc, Logloss
 
 __all__ = [
     "BaseTrainer",
     "GraphClassificationTrainer",
     "NodeClassificationTrainer",
+    "LinkPredictionTrainer",
     "Evaluation",
     "Acc",
     "Auc",
diff --git a/autogl/module/train/evaluate.py b/autogl/module/train/evaluate.py
index 290989e..ecd2413 100644
--- a/autogl/module/train/evaluate.py
+++ b/autogl/module/train/evaluate.py
@@ -47,7 +47,10 @@ class Auc(Evaluation):
         """
         Should return: the evaluation result (float)
         """
-        pos_predict = predict[:, 1]
+        if len(predict.shape) == 1:
+            pos_predict = predict
+        else:
+            pos_predict = predict[:, 1]
         return roc_auc_score(label, pos_predict)
 
 
diff --git a/autogl/module/train/link_prediction.py b/autogl/module/train/link_prediction.py
new file mode 100644
index 0000000..fdc7844
--- /dev/null
+++ b/autogl/module/train/link_prediction.py
@@ -0,0 +1,534 @@
+from . import register_trainer, BaseTrainer, Evaluation, EVALUATE_DICT, EarlyStopping
+import torch
+from torch.optim.lr_scheduler import StepLR
+import torch.nn.functional as F
+from ..model import MODEL_DICT, BaseModel
+from .evaluate import Logloss, Acc, Auc
+from typing import Union
+from copy import deepcopy
+from torch_geometric.utils import negative_sampling
+from torch_geometric.utils import train_test_split_edges
+
+from ...utils import get_logger
+
+LOGGER = get_logger("link prediction trainer")
+
+def get_feval(feval):
+    if isinstance(feval, str):
+        return EVALUATE_DICT[feval]
+    if isinstance(feval, type) and issubclass(feval, Evaluation):
+        return feval
+    if isinstance(feval, list):
+        return [get_feval(f) for f in feval]
+    raise ValueError("feval argument of type", type(feval), "is not supported!")
+
+
+@register_trainer("LinkPrediction")
+class LinkPredictionTrainer(BaseTrainer):
+    """
+    The link prediction trainer.
+
+    Used to automatically train the link prediction problem.
+
+    Parameters
+    ----------
+    model: ``BaseModel`` or ``str``
+        The (name of) model used to train and predict.
+
+    optimizer: ``Optimizer`` of ``str``
+        The (name of) optimizer used to train and predict.
+
+    lr: ``float``
+        The learning rate of link prediction task.
+
+    max_epoch: ``int``
+        The max number of epochs in training.
+
+    early_stopping_round: ``int``
+        The round of early stop.
+
+    device: ``torch.device`` or ``str``
+        The device where model will be running on.
+
+    init: ``bool``
+        If True(False), the model will (not) be initialized.
+    """
+
+    space = None
+
+    def __init__(
+        self,
+        model: Union[BaseModel, str],
+        num_features,
+        num_classes,
+        optimizer=None,
+        lr=None,
+        max_epoch=None,
+        early_stopping_round=None,
+        weight_decay=1e-4,
+        device=None,
+        init=True,
+        feval=[Auc],
+        loss="binary_cross_entropy_with_logits",
+        *args,
+        **kwargs
+    ):
+        super(LinkPredictionTrainer, self).__init__(model)
+
+        self.loss_type = loss
+
+        if device is None:
+            device = "cpu"
+
+        # init model
+        if isinstance(model, str):
+            assert model in MODEL_DICT, "Cannot parse model name " + model
+            self.model = MODEL_DICT[model](num_features, num_classes, device, init=init)
+        elif isinstance(model, BaseModel):
+            self.model = model
+
+        if type(optimizer) == str and optimizer.lower() == "adam":
+            self.optimizer = torch.optim.Adam
+        elif type(optimizer) == str and optimizer.lower() == "sgd":
+            self.optimizer = torch.optim.SGD
+        else:
+            self.optimizer = torch.optim.Adam
+
+        self.num_features = num_features
+        self.num_classes = num_classes
+        self.lr = lr if lr is not None else 1e-4
+        self.max_epoch = max_epoch if max_epoch is not None else 100
+        self.early_stopping_round = (
+            early_stopping_round if early_stopping_round is not None else 100
+        )
+        self.device = device
+        self.args = args
+        self.kwargs = kwargs
+
+        self.feval = get_feval(feval)
+
+        self.weight_decay = weight_decay
+
+        self.early_stopping = EarlyStopping(
+            patience=early_stopping_round, verbose=False
+        )
+
+        self.valid_result = None
+        self.valid_result_prob = None
+        self.valid_score = None
+
+        self.initialized = False
+        self.num_features = num_features
+        self.num_classes = num_classes
+        self.device = device
+
+        self.space = [
+            {
+                "parameterName": "max_epoch",
+                "type": "INTEGER",
+                "maxValue": 500,
+                "minValue": 10,
+                "scalingType": "LINEAR",
+            },
+            {
+                "parameterName": "early_stopping_round",
+                "type": "INTEGER",
+                "maxValue": 30,
+                "minValue": 10,
+                "scalingType": "LINEAR",
+            },
+            {
+                "parameterName": "lr",
+                "type": "DOUBLE",
+                "maxValue": 1e-1,
+                "minValue": 1e-4,
+                "scalingType": "LOG",
+            },
+            {
+                "parameterName": "weight_decay",
+                "type": "DOUBLE",
+                "maxValue": 1e-2,
+                "minValue": 1e-4,
+                "scalingType": "LOG",
+            },
+        ]
+        self.space += self.model.space
+        LinkPredictionTrainer.space = self.space
+
+        self.hyperparams = {
+            "max_epoch": self.max_epoch,
+            "early_stopping_round": self.early_stopping_round,
+            "lr": self.lr,
+            "weight_decay": self.weight_decay,
+        }
+        self.hyperparams = {**self.hyperparams, **self.model.get_hyper_parameter()}
+
+        if init is True:
+            self.initialize()
+
+    def initialize(self):
+        #  Initialize the auto model in trainer.
+        if self.initialized is True:
+            return
+        self.initialized = True
+        self.model.initialize()
+
+    def get_model(self):
+        # Get auto model used in trainer.
+        return self.model
+
+    @classmethod
+    def get_task_name(cls):
+        # Get task name, i.e., `LinkPrediction`.
+        return "LinkPrediction"
+
+    def train_only(self, data, train_mask=None):
+        """
+        The function of training on the given dataset and mask.
+
+        Parameters
+        ----------
+        data: The link prediction dataset used to be trained. It should consist of masks, including train_mask, and etc.
+        train_mask: The mask used in training stage.
+
+        Returns
+        -------
+        self: ``autogl.train.LinkPredictionTrainer``
+            A reference of current trainer.
+
+        """
+
+        # data.train_mask = data.val_mask = data.test_mask = data.y = None
+        # data = train_test_split_edges(data)
+        data = data.to(self.device)
+        # mask = data.train_mask if train_mask is None else train_mask
+        optimizer = self.optimizer(
+            self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
+        )
+        scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
+        for epoch in range(1, self.max_epoch):
+            self.model.model.train()
+
+            neg_edge_index = negative_sampling(
+                edge_index=data.train_pos_edge_index, num_nodes=data.num_nodes,
+                num_neg_samples=data.train_pos_edge_index.size(1))
+
+            optimizer.zero_grad()
+            # res = self.model.model.forward(data)
+            z = self.model.model.encode(data)
+            link_logits = self.model.model.decode(z, data.train_pos_edge_index, neg_edge_index)
+            link_labels = self.get_link_labels(data.train_pos_edge_index, neg_edge_index)
+            # loss = F.binary_cross_entropy_with_logits(link_logits, link_labels)
+            if hasattr(F, self.loss_type):
+                loss = getattr(F, self.loss_type)(link_logits, link_labels)
+            else:
+                raise TypeError("PyTorch does not support loss type {}".format(self.loss_type))
+
+            loss.backward()
+            optimizer.step()
+            scheduler.step()
+
+            if type(self.feval) is list:
+                feval = self.feval[0]
+            else:
+                feval = self.feval
+            val_loss = self.evaluate([data], mask='val', feval=feval)
+            if feval.is_higher_better() is True:
+                val_loss = -val_loss
+            self.early_stopping(val_loss, self.model.model)
+            if self.early_stopping.early_stop:
+                LOGGER.debug("Early stopping at %d", epoch)
+                self.early_stopping.load_checkpoint(self.model.model)
+                break
+
+    def predict_only(self, data, test_mask=None):
+        """
+        The function of predicting on the given dataset and mask.
+
+        Parameters
+        ----------
+        data: The link prediction dataset used to be predicted.
+        train_mask: The mask used in training stage.
+
+        Returns
+        -------
+        res: The result of predicting on the given dataset.
+
+        """
+        data = data.to(self.device)
+        self.model.model.eval()
+        with torch.no_grad():
+            z = self.model.model.encode(data)
+        return z
+
+    def train(self, dataset, keep_valid_result=True):
+        """
+        The function of training on the given dataset and keeping valid result.
+
+        Parameters
+        ----------
+        dataset: The link prediction dataset used to be trained.
+
+        keep_valid_result: ``bool``
+            If True(False), save the validation result after training.
+
+        Returns
+        -------
+        self: ``autogl.train.LinkPredictionTrainer``
+            A reference of current trainer.
+
+        """
+        data = dataset[0]
+        self.train_only(data)
+        if keep_valid_result:
+            self.valid_result = self.predict_only(data)
+            self.valid_result_prob = self.predict_proba(dataset, 'val')
+            self.valid_score = self.evaluate(
+                dataset, mask='val', feval=self.feval
+            )
+
+    def predict(self, dataset, mask=None):
+        """
+        The function of predicting on the given dataset.
+
+        Parameters
+        ----------
+        dataset: The link prediction dataset used to be predicted.
+
+        mask: ``train``, ``val``, or ``test``.
+            The dataset mask.
+
+        Returns
+        -------
+        The prediction result of ``predict_proba``.
+        """
+        return self.predict_proba(dataset, mask=mask, in_log_format=False)
+
+    def predict_proba(self, dataset, mask=None, in_log_format=False):
+        """
+        The function of predicting the probability on the given dataset.
+
+        Parameters
+        ----------
+        dataset: The link prediction dataset used to be predicted.
+
+        mask: ``train``, ``val``, or ``test``.
+            The dataset mask.
+
+        in_log_format: ``bool``.
+            If True(False), the probability will (not) be log format.
+
+        Returns
+        -------
+        The prediction result.
+        """
+        data = dataset[0]
+        data = data.to(self.device)
+        if mask in ["train", "val", "test"]:
+            pos_edge_index = data[f'{mask}_pos_edge_index']
+            neg_edge_index = data[f'{mask}_neg_edge_index']
+        else:
+            pos_edge_index = data[f'test_pos_edge_index']
+            neg_edge_index = data[f'test_neg_edge_index']
+
+        self.model.model.eval()
+        with torch.no_grad():
+            z = self.predict_only(data)
+            link_logits = self.model.model.decode(z, pos_edge_index, neg_edge_index)
+            link_probs = link_logits.sigmoid()
+
+        return link_probs
+
+    def get_valid_predict(self):
+        # """Get the valid result."""
+        return self.valid_result
+
+    def get_valid_predict_proba(self):
+        # """Get the valid result (prediction probability)."""
+        return self.valid_result_prob
+
+    def get_valid_score(self, return_major=True):
+        """
+        The function of getting the valid score.
+
+        Parameters
+        ----------
+        return_major: ``bool``.
+            If True, the return only consists of the major result.
+            If False, the return consists of the all results.
+
+        Returns
+        -------
+        result: The valid score in training stage.
+        """
+        if isinstance(self.feval, list):
+            if return_major:
+                return self.valid_score[0], self.feval[0].is_higher_better()
+            else:
+                return self.valid_score, [f.is_higher_better() for f in self.feval]
+        else:
+            return self.valid_score, self.feval.is_higher_better()
+
+    def get_name_with_hp(self):
+        # """Get the name of hyperparameter."""
+        name = "-".join(
+            [
+                str(self.optimizer),
+                str(self.lr),
+                str(self.max_epoch),
+                str(self.early_stopping_round),
+                str(self.model),
+                str(self.device),
+            ]
+        )
+        name = (
+            name
+            + "|"
+            + "-".join(
+                [
+                    str(x[0]) + "-" + str(x[1])
+                    for x in self.model.get_hyper_parameter().items()
+                ]
+            )
+        )
+        return name
+
+    def evaluate(self, dataset, mask=None, feval=None):
+        """
+        The function of training on the given dataset and keeping valid result.
+
+        Parameters
+        ----------
+        dataset: The link prediction dataset used to be evaluated.
+
+        mask: ``train``, ``val``, or ``test``.
+            The dataset mask.
+
+        feval: ``str``.
+            The evaluation method used in this function.
+
+        Returns
+        -------
+        res: The evaluation result on the given dataset.
+
+        """
+        data = dataset[0]
+        data = data.to(self.device)
+        test_mask = mask
+        if feval is None:
+            feval = self.feval
+        else:
+            feval = get_feval(feval)
+
+        if mask in ["train", "val", "test"]:
+            pos_edge_index = data[f'{mask}_pos_edge_index']
+            neg_edge_index = data[f'{mask}_neg_edge_index']
+        else:
+            pos_edge_index = data[f'test_pos_edge_index']
+            neg_edge_index = data[f'test_neg_edge_index']
+
+        self.model.model.eval()
+        with torch.no_grad():
+            link_probs = self.predict_proba(dataset, mask)
+            link_labels = self.get_link_labels(pos_edge_index, neg_edge_index)
+
+        if not isinstance(feval, list):
+            feval = [feval]
+            return_signle = True
+        else:
+            return_signle = False
+
+        res = []
+        for f in feval:
+            try:
+                res.append(f.evaluate(link_probs, link_labels))
+            except:
+                res.append(f.evaluate(link_probs.cpu().numpy(), link_labels.cpu().numpy()))
+        if return_signle:
+            return res[0]
+        return res
+
+    def to(self, new_device):
+        assert isinstance(new_device, torch.device)
+        self.device = new_device
+        if self.model is not None:
+            self.model.to(self.device)
+
+    def duplicate_from_hyper_parameter(self, hp: dict, model=None, restricted=True):
+        """
+        The function of duplicating a new instance from the given hyperparameter.
+
+        Parameters
+        ----------
+        hp: ``dict``.
+            The hyperparameter used in the new instance.
+
+        model: The model used in the new instance of trainer.
+
+        restricted: ``bool``.
+            If False(True), the hyperparameter should (not) be updated from origin hyperparameter.
+
+        Returns
+        -------
+        self: ``autogl.train.LinkPredictionTrainer``
+            A new instance of trainer.
+
+        """
+        if not restricted:
+            origin_hp = deepcopy(self.hyperparams)
+            origin_hp.update(hp)
+            hp = origin_hp
+        if model is None:
+            model = self.model
+        model = model.from_hyper_parameter(
+            dict(
+                [
+                    x
+                    for x in hp.items()
+                    if x[0] in [y["parameterName"] for y in model.space]
+                ]
+            )
+        )
+
+        ret = self.__class__(
+            model=model,
+            num_features=self.num_features,
+            num_classes=self.num_classes,
+            optimizer=self.optimizer,
+            lr=hp["lr"],
+            max_epoch=hp["max_epoch"],
+            early_stopping_round=hp["early_stopping_round"],
+            device=self.device,
+            weight_decay=hp["weight_decay"],
+            feval=self.feval,
+            init=True,
+            *self.args,
+            **self.kwargs
+        )
+
+        return ret
+
+    def set_feval(self, feval):
+        # """Set the evaluation metrics."""
+        self.feval = get_feval(feval)
+
+    @property
+    def hyper_parameter_space(self):
+        # """Get the space of hyperparameter."""
+        return self.space
+
+    @hyper_parameter_space.setter
+    def hyper_parameter_space(self, space):
+        # """Set the space of hyperparameter."""
+        self.space = space
+        LinkPredictionTrainer.space = space
+
+    def get_hyper_parameter(self):
+        # """Get the hyperparameter in this trainer."""
+        return self.hyperparams
+
+    def get_link_labels(self, pos_edge_index, neg_edge_index):
+        E = pos_edge_index.size(1) + neg_edge_index.size(1)
+        link_labels = torch.zeros(E, dtype=torch.float, device=self.device)
+        link_labels[:pos_edge_index.size(1)] = 1.
+        return link_labels
\ No newline at end of file

From 07d2329476aee23dc7343fe300af0cf3e2c73db1 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Mon, 26 Apr 2021 08:06:43 +0000
Subject: [PATCH 07/19] black style

---
 autogl/datasets/utils.py                      |  18 +-
 autogl/module/model/_model_registry.py        |   2 +-
 autogl/module/model/base.py                   | 115 ++--
 autogl/module/model/gcn.py                    |  74 +--
 autogl/module/model/graph_sage.py             |  60 ++-
 autogl/module/train/base.py                   | 138 ++---
 autogl/module/train/evaluation.py             |  23 +-
 .../module/train/graph_classification_full.py |  18 +-
 .../node_classification_sampled_trainer.py    | 503 +++++++++---------
 .../sampling/sampler/graphsaint_sampler.py    |  56 +-
 .../sampling/sampler/neighbor_sampler.py      |  84 +--
 autogl/solver/classifier/node_classifier.py   |   4 +-
 examples/graph_cv.py                          |   2 +-
 13 files changed, 608 insertions(+), 489 deletions(-)

diff --git a/autogl/datasets/utils.py b/autogl/datasets/utils.py
index adc3923..b0708db 100644
--- a/autogl/datasets/utils.py
+++ b/autogl/datasets/utils.py
@@ -179,7 +179,9 @@ def random_splits_mask_class(
     return dataset
 
 
-def graph_cross_validation(dataset, n_splits=10, shuffle=True, random_seed=42, stratify=False):
+def graph_cross_validation(
+    dataset, n_splits=10, shuffle=True, random_seed=42, stratify=False
+):
     r"""Cross validation for graph classification data, returning one fold with specific idx in autogl.datasets or pyg.Dataloader(default)
 
     Parameters
@@ -197,7 +199,9 @@ def graph_cross_validation(dataset, n_splits=10, shuffle=True, random_seed=42, s
         random_state for sklearn.model_selection.StratifiedKFold
     """
     if stratify:
-        skf = StratifiedKFold(n_splits=n_splits, shuffle=shuffle, random_state=random_seed)
+        skf = StratifiedKFold(
+            n_splits=n_splits, shuffle=shuffle, random_state=random_seed
+        )
     else:
         skf = KFold(n_splits=n_splits, shuffle=shuffle, random_state=random_seed)
     idx_list = []
@@ -318,7 +322,9 @@ def graph_random_splits(dataset, train_ratio=0.2, val_ratio=0.4, seed=None):
     return dataset
 
 
-def graph_get_split(dataset, mask="train", is_loader=True, batch_size=128, num_workers = 0):
+def graph_get_split(
+    dataset, mask="train", is_loader=True, batch_size=128, num_workers=0
+):
     r"""Get train/test dataset/dataloader after cross validation.
 
     Parameters
@@ -340,7 +346,11 @@ def graph_get_split(dataset, mask="train", is_loader=True, batch_size=128, num_w
         dataset, "%s_split" % (mask)
     ), "Given dataset do not have %s split" % (mask)
     if is_loader:
-        return DataLoader(getattr(dataset, "%s_split" % (mask)), batch_size=batch_size, num_workers = num_workers)
+        return DataLoader(
+            getattr(dataset, "%s_split" % (mask)),
+            batch_size=batch_size,
+            num_workers=num_workers,
+        )
     else:
         return getattr(dataset, "%s_split" % (mask))
 
diff --git a/autogl/module/model/_model_registry.py b/autogl/module/model/_model_registry.py
index d8270eb..14aa2d9 100644
--- a/autogl/module/model/_model_registry.py
+++ b/autogl/module/model/_model_registry.py
@@ -14,7 +14,7 @@ def register_model(name):
             )
         MODEL_DICT[name] = cls
         return cls
-    
+
     return register_model_cls
 
 
diff --git a/autogl/module/model/base.py b/autogl/module/model/base.py
index 33cc5a9..965c306 100644
--- a/autogl/module/model/base.py
+++ b/autogl/module/model/base.py
@@ -9,6 +9,7 @@ import typing as _typing
 import torch
 import torch.nn.functional as F
 from copy import deepcopy
+
 base_approach_logger: logging.Logger = logging.getLogger("BaseModel")
 
 
@@ -49,7 +50,11 @@ class BaseModel:
     def to(self, device):
         if isinstance(device, (str, torch.device)):
             self.device = device
-        if hasattr(self, "model") and self.model is not None and isinstance(self.model, torch.nn.Module):
+        if (
+            hasattr(self, "model")
+            and self.model is not None
+            and isinstance(self.model, torch.nn.Module)
+        ):
             self.model.to(self.device)
         return self
 
@@ -95,28 +100,28 @@ class _BaseBaseModel:
     designed to implement some basic functionality of BaseModel.
     --  Designed by ZiXin Sun
     """
+
     @classmethod
     def __formulate_device(
-            cls, device: _typing.Union[str, torch.device] = ...
+        cls, device: _typing.Union[str, torch.device] = ...
     ) -> torch.device:
-        if (
-                type(device) == torch.device or
-                (type(device) == str and device.strip().lower() != "auto")
+        if type(device) == torch.device or (
+            type(device) == str and device.strip().lower() != "auto"
         ):
             return torch.device(device)
         elif torch.cuda.is_available() and torch.cuda.device_count() > 0:
             return torch.device("cuda")
         else:
             return torch.device("cpu")
-    
+
     @property
     def device(self) -> torch.device:
         return self.__device
-    
+
     @device.setter
     def device(self, __device: _typing.Union[str, torch.device, None]):
         self.__device: torch.device = self.__formulate_device(__device)
-    
+
     @property
     def model(self) -> _typing.Optional[torch.nn.Module]:
         if self._model is None:
@@ -124,19 +129,18 @@ class _BaseBaseModel:
                 "property of model NOT initialized before accessing"
             )
         return self._model
-    
+
     @model.setter
     def model(self, _model: torch.nn.Module) -> None:
         if not isinstance(_model, torch.nn.Module):
             raise TypeError(
-                "the property of model MUST be an instance of "
-                "torch.nn.Module"
+                "the property of model MUST be an instance of " "torch.nn.Module"
             )
         self._model = _model
-    
+
     def _initialize(self):
         raise NotImplementedError
-    
+
     def initialize(self) -> bool:
         """
         Initialize the model in case that the model has NOT been initialized
@@ -147,7 +151,7 @@ class _BaseBaseModel:
             self.__is_initialized = True
             return True
         return False
-    
+
     # def to(self, *args, **kwargs):
     #     """
     #     Due to the signature of to() method in class BaseApproach
@@ -161,17 +165,18 @@ class _BaseBaseModel:
     #     :return: self
     #     """
     #     return super(_BaseBaseModel, self).to(*args, **kwargs)
-    
+
     def forward(self, *args, **kwargs):
         if self.model is not None and isinstance(self.model, torch.nn.Module):
             return self.model(*args, **kwargs)
         else:
             raise NotImplementedError
-    
+
     def __init__(
-            self, model: _typing.Optional[torch.nn.Module] = None,
-            initialize: bool = False,
-            device: _typing.Union[str, torch.device] = ...
+        self,
+        model: _typing.Optional[torch.nn.Module] = None,
+        initialize: bool = False,
+        device: _typing.Union[str, torch.device] = ...,
     ):
         if type(initialize) != bool:
             raise TypeError
@@ -188,64 +193,65 @@ class _BaseModel(_BaseBaseModel, BaseModel):
     The upcoming root base class for Model, i.e. BaseModel
     --  Designed by ZiXin Sun
     """
+
     # todo: Deprecate and remove the legacy class "BaseModel",
     #       then rename this class to "BaseModel",
     #       correspondingly, this class will no longer extend
     #       the legacy class "BaseModel" after the removal.
     def _initialize(self):
         raise NotImplementedError
-    
+
     def to(self, device: torch.device):
         self.device = device
         if self.model is not None and isinstance(self.model, torch.nn.Module):
             self.model.to(self.device)
         return super().to(device)
-    
+
     @property
     def space(self) -> _typing.Sequence[_typing.Dict[str, _typing.Any]]:
         # todo: deprecate and remove in future major version
         return self.__hyper_parameter_space
-    
+
     @property
     def hyper_parameter_space(self):
         return self.__hyper_parameter_space
-    
+
     @hyper_parameter_space.setter
     def hyper_parameter_space(
-            self, space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
+        self, space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
     ):
         self.__hyper_parameter_space = space
-    
+
     @property
     def hyper_parameter(self) -> _typing.Dict[str, _typing.Any]:
         return self.__hyper_parameter
-    
+
     @hyper_parameter.setter
     def hyper_parameter(self, _hyper_parameter: _typing.Dict[str, _typing.Any]):
         if not isinstance(_hyper_parameter, dict):
             raise TypeError
         self.__hyper_parameter = _hyper_parameter
-    
+
     def get_hyper_parameter(self) -> _typing.Dict[str, _typing.Any]:
         """
         todo: consider deprecating this trivial getter method in the future
         :return: copied hyper parameter
         """
         return copy.deepcopy(self.__hyper_parameter)
-    
+
     def __init__(
-            self, model: _typing.Optional[torch.nn.Module] = None,
-            initialize: bool = False,
-            hyper_parameter_space: _typing.Sequence[_typing.Any] = ...,
-            hyper_parameter: _typing.Dict[str, _typing.Any] = ...,
-            device: _typing.Union[str, torch.device] = ...
+        self,
+        model: _typing.Optional[torch.nn.Module] = None,
+        initialize: bool = False,
+        hyper_parameter_space: _typing.Sequence[_typing.Any] = ...,
+        hyper_parameter: _typing.Dict[str, _typing.Any] = ...,
+        device: _typing.Union[str, torch.device] = ...,
     ):
         if type(initialize) != bool:
             raise TypeError
         super(_BaseModel, self).__init__(model, initialize, device)
-        if (
-                hyper_parameter_space != Ellipsis and
-                isinstance(hyper_parameter_space, _typing.Sequence)
+        if hyper_parameter_space != Ellipsis and isinstance(
+            hyper_parameter_space, _typing.Sequence
         ):
             self.__hyper_parameter_space: _typing.Sequence[
                 _typing.Dict[str, _typing.Any]
@@ -266,27 +272,30 @@ class _BaseModel(_BaseBaseModel, BaseModel):
 class ClassificationModel(_BaseModel):
     def _initialize(self):
         raise NotImplementedError
-    
+
     def from_hyper_parameter(
-            self, hyper_parameter: _typing.Dict[str, _typing.Any]
+        self, hyper_parameter: _typing.Dict[str, _typing.Any]
     ) -> "ClassificationModel":
         new_model: ClassificationModel = self.__class__(
             num_features=self.num_features,
             num_classes=self.num_classes,
             device=self.device,
-            init=False
+            init=False,
         )
         _hyper_parameter = self.hyper_parameter
         _hyper_parameter.update(hyper_parameter)
         new_model.hyper_parameter = _hyper_parameter
         new_model.initialize()
         return new_model
-    
+
     def __init__(
-            self, num_features: int = ..., num_classes: int = ...,
-            num_graph_features: int = ...,
-            device: _typing.Union[str, torch.device] = ...,
-            init: bool = False, **kwargs
+        self,
+        num_features: int = ...,
+        num_classes: int = ...,
+        num_graph_features: int = ...,
+        device: _typing.Union[str, torch.device] = ...,
+        init: bool = False,
+        **kwargs
     ):
         if "initialize" in kwargs:
             del kwargs["initialize"]
@@ -308,11 +317,11 @@ class ClassificationModel(_BaseModel):
                 self.__num_graph_features: int = 0
         else:
             self.__num_graph_features: int = 0
-    
+
     @property
     def num_classes(self) -> int:
         return self.__num_classes
-    
+
     @num_classes.setter
     def num_classes(self, __num_classes: int):
         if type(__num_classes) != int:
@@ -320,11 +329,11 @@ class ClassificationModel(_BaseModel):
         if not __num_classes > 0:
             raise ValueError
         self.__num_classes = __num_classes if __num_classes > 0 else 0
-    
+
     @property
     def num_features(self) -> int:
         return self.__num_features
-    
+
     @num_features.setter
     def num_features(self, __num_features: int):
         if type(__num_features) != int:
@@ -332,27 +341,27 @@ class ClassificationModel(_BaseModel):
         if not __num_features > 0:
             raise ValueError
         self.__num_features = __num_features if __num_features > 0 else 0
-    
+
     def get_num_classes(self) -> int:
         # todo: consider replacing with property with getter and setter
         return self.__num_classes
-    
+
     def set_num_classes(self, num_classes: int) -> None:
         # todo: consider replacing with property with getter and setter
         if type(num_classes) != int:
             raise TypeError
         self.__num_classes = num_classes if num_classes > 0 else 0
-    
+
     def get_num_features(self) -> int:
         # todo: consider replacing with property with getter and setter
         return self.__num_features
-    
+
     def set_num_features(self, num_features: int):
         # todo: consider replacing with property with getter and setter
         if type(num_features) != int:
             raise TypeError
         self.__num_features = num_features if num_features > 0 else 0
-    
+
     def set_num_graph_features(self, num_graph_features: int):
         # todo: consider replacing with property with getter and setter
         if type(num_graph_features) != int:
diff --git a/autogl/module/model/gcn.py b/autogl/module/model/gcn.py
index 4fa3594..73b91e0 100644
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -11,9 +11,12 @@ LOGGER = get_logger("GCNModel")
 
 class GCN(torch.nn.Module):
     def __init__(
-            self, num_features: int, num_classes: int,
-            hidden_features: _typing.Sequence[int],
-            dropout: float, activation_name: str
+        self,
+        num_features: int,
+        num_classes: int,
+        hidden_features: _typing.Sequence[int],
+        dropout: float,
+        activation_name: str,
     ):
         super().__init__()
         self.__convolution_layers: torch.nn.ModuleList = torch.nn.ModuleList()
@@ -25,31 +28,33 @@ class GCN(torch.nn.Module):
                 )
             )
         else:
-            self.__convolution_layers.append(torch_geometric.nn.GCNConv(
-                num_features, hidden_features[0], add_self_loops=False
-            ))
+            self.__convolution_layers.append(
+                torch_geometric.nn.GCNConv(
+                    num_features, hidden_features[0], add_self_loops=False
+                )
+            )
             for i in range(len(hidden_features)):
                 self.__convolution_layers.append(
                     torch_geometric.nn.GCNConv(
                         hidden_features[i], hidden_features[i + 1]
-                    ) if i + 1 < len(hidden_features)
-                    else torch_geometric.nn.GCNConv(
-                        hidden_features[i], num_classes
                     )
+                    if i + 1 < len(hidden_features)
+                    else torch_geometric.nn.GCNConv(hidden_features[i], num_classes)
                 )
         self.__dropout: float = dropout
         self.__activation_name: str = activation_name
-    
+
     def __layer_wise_forward(self, data):
         # todo: Implement this forward method
         #         in case that data.edge_indexes property is provided
         #         for Layer-wise and Node-wise sampled training
         raise NotImplementedError
-    
+
     def __basic_forward(
-            self, x: torch.Tensor,
-            edge_index: torch.Tensor,
-            edge_weight: _typing.Optional[torch.Tensor] = None
+        self,
+        x: torch.Tensor,
+        edge_index: torch.Tensor,
+        edge_weight: _typing.Optional[torch.Tensor] = None,
     ) -> torch.Tensor:
         for layer_index in range(len(self.__convolution_layers)):
             x: torch.Tensor = self.__convolution_layers[layer_index](
@@ -57,31 +62,32 @@ class GCN(torch.nn.Module):
             )
             if layer_index + 1 < len(self.__convolution_layers):
                 x = activate_func(x, self.__activation_name)
-                x = torch.nn.functional.dropout(x, p=self.__dropout, training=self.training)
+                x = torch.nn.functional.dropout(
+                    x, p=self.__dropout, training=self.training
+                )
         return torch.nn.functional.log_softmax(x, dim=1)
-    
+
     def forward(self, data) -> torch.Tensor:
-        if (
-                hasattr(data, "edge_indexes") and
-                getattr(data, "edge_indexes") is not None
-        ):
+        if hasattr(data, "edge_indexes") and getattr(data, "edge_indexes") is not None:
             return self.__layer_wise_forward(data)
         else:
             if not (hasattr(data, "x") and hasattr(data, "edge_index")):
                 raise AttributeError
             if not (
-                    type(getattr(data, "x")) == torch.Tensor and
-                    type(getattr(data, "edge_index")) == torch.Tensor
+                type(getattr(data, "x")) == torch.Tensor
+                and type(getattr(data, "edge_index")) == torch.Tensor
             ):
                 raise TypeError
             x: torch.Tensor = getattr(data, "x")
             edge_index: torch.LongTensor = getattr(data, "edge_index")
             if (
-                    hasattr(data, "edge_weight") and
-                    type(getattr(data, "edge_weight")) == torch.Tensor and
-                    getattr(data, "edge_weight").size() == (edge_index.size(1),)
+                hasattr(data, "edge_weight")
+                and type(getattr(data, "edge_weight")) == torch.Tensor
+                and getattr(data, "edge_weight").size() == (edge_index.size(1),)
             ):
-                edge_weight: _typing.Optional[torch.Tensor] = getattr(data, "edge_weight")
+                edge_weight: _typing.Optional[torch.Tensor] = getattr(
+                    data, "edge_weight"
+                )
             else:
                 edge_weight: _typing.Optional[torch.Tensor] = None
             return self.__basic_forward(x, edge_index, edge_weight)
@@ -120,18 +126,22 @@ class AutoGCN(ClassificationModel):
     """
 
     def __init__(
-            self, num_features: int = ..., num_classes: int = ...,
-            device: _typing.Union[str, torch.device] = ...,
-            init: bool = False, **kwargs
+        self,
+        num_features: int = ...,
+        num_classes: int = ...,
+        device: _typing.Union[str, torch.device] = ...,
+        init: bool = False,
+        **kwargs
     ) -> None:
         super(AutoGCN, self).__init__(
             num_features, num_classes, device=device, init=init, **kwargs
         )
-    
+
     def _initialize(self):
         self.model = GCN(
-            self.num_features, self.num_classes,
+            self.num_features,
+            self.num_classes,
             self.hyper_parameter.get("hidden"),
             self.hyper_parameter.get("dropout"),
-            self.hyper_parameter.get("act")
+            self.hyper_parameter.get("act"),
         ).to(self.device)
diff --git a/autogl/module/model/graph_sage.py b/autogl/module/model/graph_sage.py
index fbe8f6e..2fe0450 100644
--- a/autogl/module/model/graph_sage.py
+++ b/autogl/module/model/graph_sage.py
@@ -9,19 +9,23 @@ from .base import BaseModel, activate_func
 
 class GraphSAGE(torch.nn.Module):
     def __init__(
-            self, num_features: int, num_classes: int,
-            hidden_features: _typing.Sequence[int],
-            dropout: float, activation_name: str,
-            aggr: str = "mean", **kwargs
+        self,
+        num_features: int,
+        num_classes: int,
+        hidden_features: _typing.Sequence[int],
+        dropout: float,
+        activation_name: str,
+        aggr: str = "mean",
+        **kwargs
     ):
         super(GraphSAGE, self).__init__()
         if type(aggr) != str:
             raise TypeError
         if aggr not in ("add", "max", "mean"):
             aggr = "mean"
-        
+
         self.__convolution_layers: torch.nn.ModuleList = torch.nn.ModuleList()
-        
+
         num_layers: int = len(hidden_features) + 1
         if num_layers == 1:
             self.__convolution_layers.append(
@@ -42,7 +46,7 @@ class GraphSAGE(torch.nn.Module):
                     )
         self.__dropout: float = dropout
         self.__activation_name: str = activation_name
-    
+
     def __full_forward(self, data):
         x: torch.Tensor = getattr(data, "x")
         edge_index: torch.Tensor = getattr(data, "edge_index")
@@ -52,24 +56,26 @@ class GraphSAGE(torch.nn.Module):
                 x = activate_func(x, self.__activation_name)
                 x = F.dropout(x, p=self.__dropout, training=self.training)
         return F.log_softmax(x, dim=1)
-    
+
     def __distributed_forward(self, data):
         x: torch.Tensor = getattr(data, "x")
         edge_indexes: _typing.Sequence[torch.Tensor] = getattr(data, "edge_indexes")
         if len(edge_indexes) != len(self.__convolution_layers):
             raise AttributeError
         for layer_index in range(len(self.__convolution_layers)):
-            x: torch.Tensor = self.__convolution_layers[layer_index](x, edge_indexes[layer_index])
+            x: torch.Tensor = self.__convolution_layers[layer_index](
+                x, edge_indexes[layer_index]
+            )
             if layer_index + 1 < len(self.__convolution_layers):
                 x = activate_func(x, self.__activation_name)
                 x = F.dropout(x, p=self.__dropout, training=self.training)
         return F.log_softmax(x, dim=1)
-    
+
     def forward(self, data):
         if (
-                hasattr(data, "edge_indexes") and
-                isinstance(getattr(data, "edge_indexes"), _typing.Sequence) and
-                len(getattr(data, "edge_indexes")) == len(self.__convolution_layers)
+            hasattr(data, "edge_indexes")
+            and isinstance(getattr(data, "edge_indexes"), _typing.Sequence)
+            and len(getattr(data, "edge_indexes")) == len(self.__convolution_layers)
         ):
             return self.__distributed_forward(data)
         else:
@@ -79,15 +85,20 @@ class GraphSAGE(torch.nn.Module):
 @register_model("sage")
 class AutoSAGE(BaseModel):
     def __init__(
-            self, num_features: int = 1, num_classes: int = 1,
-            device: _typing.Optional[torch.device] = torch.device("cpu"),
-            init: bool = False, **kwargs
+        self,
+        num_features: int = 1,
+        num_classes: int = 1,
+        device: _typing.Optional[torch.device] = torch.device("cpu"),
+        init: bool = False,
+        **kwargs
     ):
         super(AutoSAGE, self).__init__(init)
         self.__num_features: int = num_features
         self.__num_classes: int = num_classes
-        self.__device: torch.device = device if device is not None else torch.device("cpu")
-        
+        self.__device: torch.device = (
+            device if device is not None else torch.device("cpu")
+        )
+
         self.hyperparams = {
             "num_layers": 3,
             "hidden": [64, 32],
@@ -97,26 +108,27 @@ class AutoSAGE(BaseModel):
         }
         self.params = {
             "num_features": self.__num_features,
-            "num_classes": self.__num_classes
+            "num_classes": self.__num_classes,
         }
-        
+
         self._model: GraphSAGE = GraphSAGE(
             self.__num_features, self.__num_classes, [64, 32], 0.5, "relu"
         )
-        
+
         self._initialized: bool = False
         if init:
             self.initialize()
-    
+
     @property
     def model(self) -> GraphSAGE:
         return self._model
-    
+
     def initialize(self):
         """ Initialize model """
         if not self._initialized:
             self._model: GraphSAGE = GraphSAGE(
-                self.__num_features, self.__num_classes,
+                self.__num_features,
+                self.__num_classes,
                 hidden_features=self.hyperparams["hidden"],
                 activation_name=self.hyperparams["act"],
                 **self.hyperparams
diff --git a/autogl/module/train/base.py b/autogl/module/train/base.py
index 7ec47f1..af26fa4 100644
--- a/autogl/module/train/base.py
+++ b/autogl/module/train/base.py
@@ -83,15 +83,14 @@ class EarlyStopping:
 
 class BaseTrainer:
     def __init__(
-            self,
-            model: BaseModel,
-            device: _typing.Union[torch.device, str],
-            init: bool = True,
-            feval: _typing.Union[
-                _typing.Sequence[str],
-                _typing.Sequence[_typing.Type[Evaluation]]
-            ] = (Acc,),
-            loss: str = "nll_loss",
+        self,
+        model: BaseModel,
+        device: _typing.Union[torch.device, str],
+        init: bool = True,
+        feval: _typing.Union[
+            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = (Acc,),
+        loss: str = "nll_loss",
     ):
         """
         The basic trainer.
@@ -108,47 +107,50 @@ class BaseTrainer:
         """
         super().__init__()
         self.model: BaseModel = model
-        if (
-                type(device) == torch.device or
-                (type(device) == str and device.lower() != "auto")
+        if type(device) == torch.device or (
+            type(device) == str and device.lower() != "auto"
         ):
             self.__device: torch.device = torch.device(device)
         else:
             self.__device: torch.device = torch.device(
-                "cuda" if torch.cuda.is_available() and torch.cuda.device_count() > 0 else "cpu"
+                "cuda"
+                if torch.cuda.is_available() and torch.cuda.device_count() > 0
+                else "cpu"
             )
         self.init: bool = init
         self.__feval: _typing.Sequence[_typing.Type[Evaluation]] = get_feval(feval)
         self.loss: str = loss
-    
+
     @property
     def device(self) -> torch.device:
         return self.__device
-    
+
     @device.setter
     def device(self, __device: _typing.Union[torch.device, str]):
-        if (
-                type(__device) == torch.device or
-                (type(__device) == str and __device.lower() != "auto")
+        if type(__device) == torch.device or (
+            type(__device) == str and __device.lower() != "auto"
         ):
             self.__device: torch.device = torch.device(__device)
         else:
             self.__device: torch.device = torch.device(
-                "cuda" if torch.cuda.is_available() and torch.cuda.device_count() > 0 else "cpu"
+                "cuda"
+                if torch.cuda.is_available() and torch.cuda.device_count() > 0
+                else "cpu"
             )
-    
+
     @property
     def feval(self) -> _typing.Sequence[_typing.Type[Evaluation]]:
         return self.__feval
-    
+
     @feval.setter
     def feval(
-            self, _feval: _typing.Union[
-                _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
-            ]
+        self,
+        _feval: _typing.Union[
+            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ],
     ):
         self.__feval: _typing.Sequence[_typing.Type[Evaluation]] = get_feval(_feval)
-    
+
     def to(self, device: torch.device):
         """
         Transfer the trainer to another device
@@ -168,7 +170,9 @@ class BaseTrainer:
         """Get auto model used in trainer."""
         raise NotImplementedError()
 
-    def get_feval(self, return_major: bool = False) -> _typing.Union[
+    def get_feval(
+        self, return_major: bool = False
+    ) -> _typing.Union[
         _typing.Type[Evaluation], _typing.Sequence[_typing.Type[Evaluation]]
     ]:
         """
@@ -212,7 +216,7 @@ class BaseTrainer:
         pass
 
     def duplicate_from_hyper_parameter(
-            self, hp, model: _typing.Optional[BaseModel] = ...
+        self, hp, model: _typing.Optional[BaseModel] = ...
     ) -> "BaseTrainer":
         """Create a new trainer with the given hyper parameter."""
         raise NotImplementedError()
@@ -322,30 +326,30 @@ class BaseTrainer:
 
 class _BaseClassificationTrainer(BaseTrainer):
     """ Base class of trainer for classification tasks """
-    
+
     def __init__(
-            self,
-            model: _typing.Union[BaseModel, str],
-            num_features: int,
-            num_classes: int,
-            device: _typing.Union[torch.device, str, None] = "auto",
-            init: bool = True,
-            feval: _typing.Union[
-                _typing.Sequence[str],
-                _typing.Sequence[_typing.Type[Evaluation]]
-            ] = (Acc,),
-            loss: str = "nll_loss",
+        self,
+        model: _typing.Union[BaseModel, str],
+        num_features: int,
+        num_classes: int,
+        device: _typing.Union[torch.device, str, None] = "auto",
+        init: bool = True,
+        feval: _typing.Union[
+            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = (Acc,),
+        loss: str = "nll_loss",
     ):
         self.num_features: int = num_features
         self.num_classes: int = num_classes
-        if (
-                type(device) == torch.device or
-                (type(device) == str and device.lower() != "auto")
+        if type(device) == torch.device or (
+            type(device) == str and device.lower() != "auto"
         ):
             __device: torch.device = torch.device(device)
         else:
             __device: torch.device = torch.device(
-                "cuda" if torch.cuda.is_available() and torch.cuda.device_count() > 0 else "cpu"
+                "cuda"
+                if torch.cuda.is_available() and torch.cuda.device_count() > 0
+                else "cpu"
             )
         if type(model) == str:
             _model: BaseModel = ModelUniversalRegistry.get_model(model)(
@@ -357,22 +361,23 @@ class _BaseClassificationTrainer(BaseTrainer):
             raise TypeError(
                 f"Model argument only support str or BaseModel, got ${model}."
             )
-        super(_BaseClassificationTrainer, self).__init__(_model, __device, init, feval, loss)
+        super(_BaseClassificationTrainer, self).__init__(
+            _model, __device, init, feval, loss
+        )
 
 
 class BaseNodeClassificationTrainer(_BaseClassificationTrainer):
     def __init__(
-            self,
-            model: _typing.Union[BaseModel, str],
-            num_features: int,
-            num_classes: int,
-            device: _typing.Union[torch.device, str, None] = None,
-            init: bool = True,
-            feval: _typing.Union[
-                _typing.Sequence[str],
-                _typing.Sequence[_typing.Type[Evaluation]]
-            ] = (Acc,),
-            loss: str = "nll_loss",
+        self,
+        model: _typing.Union[BaseModel, str],
+        num_features: int,
+        num_classes: int,
+        device: _typing.Union[torch.device, str, None] = None,
+        init: bool = True,
+        feval: _typing.Union[
+            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = (Acc,),
+        loss: str = "nll_loss",
     ):
         super(BaseNodeClassificationTrainer, self).__init__(
             model, num_features, num_classes, device, init, feval, loss
@@ -381,18 +386,17 @@ class BaseNodeClassificationTrainer(_BaseClassificationTrainer):
 
 class BaseGraphClassificationTrainer(_BaseClassificationTrainer):
     def __init__(
-            self,
-            model: _typing.Union[BaseModel, str],
-            num_features: int,
-            num_classes: int,
-            num_graph_features: int = 0,
-            device: _typing.Union[torch.device, str, None] = None,
-            init: bool = True,
-            feval: _typing.Union[
-                _typing.Sequence[str],
-                _typing.Sequence[_typing.Type[Evaluation]]
-            ] = (Acc,),
-            loss: str = "nll_loss",
+        self,
+        model: _typing.Union[BaseModel, str],
+        num_features: int,
+        num_classes: int,
+        num_graph_features: int = 0,
+        device: _typing.Union[torch.device, str, None] = None,
+        init: bool = True,
+        feval: _typing.Union[
+            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = (Acc,),
+        loss: str = "nll_loss",
     ):
         self.num_graph_features: int = num_graph_features
         super(BaseGraphClassificationTrainer, self).__init__(
diff --git a/autogl/module/train/evaluation.py b/autogl/module/train/evaluation.py
index 63fd81a..c3ed320 100644
--- a/autogl/module/train/evaluation.py
+++ b/autogl/module/train/evaluation.py
@@ -13,12 +13,12 @@ class Evaluation:
     def get_eval_name() -> str:
         """ Expected to return the name of this evaluation method """
         raise NotImplementedError
-    
+
     @staticmethod
     def is_higher_better() -> bool:
         """ Expected to return whether this evaluation method is higher better (bool) """
         return True
-    
+
     @staticmethod
     def evaluate(predict, label) -> float:
         """ Expected to return the evaluation result (float) """
@@ -39,6 +39,7 @@ def register_evaluate(*name):
                 )
             EVALUATE_DICT[n] = cls
         return cls
+
     return register_evaluate_cls
 
 
@@ -54,22 +55,26 @@ def get_feval(feval):
 
 class EvaluationUniversalRegistry:
     @classmethod
-    def register_evaluation(cls, *names) -> _typing.Callable[
-        [_typing.Type[Evaluation]], _typing.Type[Evaluation]
-    ]:
+    def register_evaluation(
+        cls, *names
+    ) -> _typing.Callable[[_typing.Type[Evaluation]], _typing.Type[Evaluation]]:
         def _register_evaluation(
-                _class: _typing.Type[Evaluation]
+            _class: _typing.Type[Evaluation],
         ) -> _typing.Type[Evaluation]:
             for n in names:
                 if n in EVALUATE_DICT:
-                    raise ValueError("Cannot register duplicate evaluator ({})".format(n))
+                    raise ValueError(
+                        "Cannot register duplicate evaluator ({})".format(n)
+                    )
                 if not issubclass(_class, Evaluation):
                     raise ValueError(
-                        "Evaluator ({}: {}) must extend Evaluation".format(n, cls.__name__)
+                        "Evaluator ({}: {}) must extend Evaluation".format(
+                            n, cls.__name__
+                        )
                     )
                 EVALUATE_DICT[n] = _class
             return _class
-        
+
         return _register_evaluation
 
 
diff --git a/autogl/module/train/graph_classification_full.py b/autogl/module/train/graph_classification_full.py
index a04bf18..f6b32ae 100644
--- a/autogl/module/train/graph_classification_full.py
+++ b/autogl/module/train/graph_classification_full.py
@@ -100,7 +100,7 @@ class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
         self.batch_size = batch_size if batch_size is not None else 64
         self.num_workers = num_workers if num_workers is not None else 4
         if self.num_workers > 0:
-            mp.set_start_method('fork', force=True)
+            mp.set_start_method("fork", force=True)
         self.early_stopping_round = (
             early_stopping_round if early_stopping_round is not None else 100
         )
@@ -305,10 +305,10 @@ class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
 
         """
         train_loader = utils.graph_get_split(
-            dataset, "train", batch_size=self.batch_size, num_workers = self.num_workers
+            dataset, "train", batch_size=self.batch_size, num_workers=self.num_workers
         )  # DataLoader(dataset['train'], batch_size=self.batch_size)
         valid_loader = utils.graph_get_split(
-            dataset, "val", batch_size=self.batch_size, num_workers = self.num_workers
+            dataset, "val", batch_size=self.batch_size, num_workers=self.num_workers
         )  # DataLoader(dataset['val'], batch_size=self.batch_size)
         self.train_only(train_loader, valid_loader)
         if keep_valid_result and valid_loader:
@@ -332,7 +332,9 @@ class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
         -------
         The prediction result of ``predict_proba``.
         """
-        loader = utils.graph_get_split(dataset, mask, batch_size=self.batch_size, num_workers = self.num_workers)
+        loader = utils.graph_get_split(
+            dataset, mask, batch_size=self.batch_size, num_workers=self.num_workers
+        )
         return self._predict_proba(loader, in_log_format=True).max(1)[1]
 
     def predict_proba(self, dataset, mask="test", in_log_format=False):
@@ -353,7 +355,9 @@ class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
         -------
         The prediction result.
         """
-        loader = utils.graph_get_split(dataset, mask, batch_size=self.batch_size, num_workers = self.num_workers)
+        loader = utils.graph_get_split(
+            dataset, mask, batch_size=self.batch_size, num_workers=self.num_workers
+        )
         return self._predict_proba(loader, in_log_format)
 
     def _predict_proba(self, loader, in_log_format=False):
@@ -436,7 +440,9 @@ class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
         res: The evaluation result on the given dataset.
 
         """
-        loader = utils.graph_get_split(dataset, mask, batch_size=self.batch_size, num_workers = self.num_workers)
+        loader = utils.graph_get_split(
+            dataset, mask, batch_size=self.batch_size, num_workers=self.num_workers
+        )
         return self._evaluate(loader, feval)
 
     def _evaluate(self, loader, feval=None):
diff --git a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
index 542b03a..cf91fc6 100644
--- a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
+++ b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
@@ -21,79 +21,90 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
     for automatically training the node classification tasks
     with neighbour sampling
     """
-    
+
     def __init__(
-            self,
-            model: _typing.Union[BaseModel, str],
-            num_features: int,
-            num_classes: int,
-            optimizer: _typing.Union[
-                _typing.Type[torch.optim.Optimizer], str, None
-            ] = None,
-            lr: float = 1e-4,
-            max_epoch: int = 100,
-            early_stopping_round: int = 100,
-            weight_decay: float = 1e-4,
-            device: _typing.Optional[torch.device] = None,
-            init: bool = True,
-            feval: _typing.Union[
-                _typing.Sequence[str],
-                _typing.Sequence[_typing.Type[Evaluation]]
-            ] = (Logloss,),
-            loss: str = "nll_loss",
-            lr_scheduler_type: _typing.Optional[str] = None,
-            **kwargs
+        self,
+        model: _typing.Union[BaseModel, str],
+        num_features: int,
+        num_classes: int,
+        optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None] = None,
+        lr: float = 1e-4,
+        max_epoch: int = 100,
+        early_stopping_round: int = 100,
+        weight_decay: float = 1e-4,
+        device: _typing.Optional[torch.device] = None,
+        init: bool = True,
+        feval: _typing.Union[
+            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = (Logloss,),
+        loss: str = "nll_loss",
+        lr_scheduler_type: _typing.Optional[str] = None,
+        **kwargs,
     ) -> None:
         if isinstance(optimizer, type) and issubclass(optimizer, torch.optim.Optimizer):
             self._optimizer_class: _typing.Type[torch.optim.Optimizer] = optimizer
         elif type(optimizer) == str:
             if optimizer.lower() == "adam":
-                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.Adam
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.Adam
             elif optimizer.lower() == "adam" + "w":
-                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.AdamW
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.AdamW
             elif optimizer.lower() == "sgd":
-                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.SGD
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.SGD
             else:
-                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.Adam
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.Adam
         else:
-            self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.Adam
-        
+            self._optimizer_class: _typing.Type[
+                torch.optim.Optimizer
+            ] = torch.optim.Adam
+
         self._learning_rate: float = lr if lr > 0 else 1e-4
         self._lr_scheduler_type: _typing.Optional[str] = lr_scheduler_type
         self._max_epoch: int = max_epoch if max_epoch > 0 else 1e2
-        
+
         self.__sampling_sizes: _typing.Sequence[int] = kwargs.get("sampling_sizes")
-        
+
         self._weight_decay: float = weight_decay if weight_decay > 0 else 1e-4
-        early_stopping_round: int = early_stopping_round if early_stopping_round > 0 else 1e2
-        self._early_stopping = EarlyStopping(patience=early_stopping_round, verbose=False)
+        early_stopping_round: int = (
+            early_stopping_round if early_stopping_round > 0 else 1e2
+        )
+        self._early_stopping = EarlyStopping(
+            patience=early_stopping_round, verbose=False
+        )
         super(NodeClassificationNeighborSamplingTrainer, self).__init__(
             model, num_features, num_classes, device, init, feval, loss
         )
-        
+
         self._valid_result: torch.Tensor = torch.zeros(0)
         self._valid_result_prob: torch.Tensor = torch.zeros(0)
         self._valid_score: _typing.Sequence[float] = []
-        
-        self._hyper_parameter_space: _typing.Sequence[_typing.Dict[str, _typing.Any]] = []
-        
+
+        self._hyper_parameter_space: _typing.Sequence[
+            _typing.Dict[str, _typing.Any]
+        ] = []
+
         self.__initialized: bool = False
         if init:
             self.initialize()
-    
+
     def initialize(self) -> "NodeClassificationNeighborSamplingTrainer":
         if self.__initialized:
             return self
         self.model.initialize()
         self.__initialized = True
         return self
-    
+
     def get_model(self) -> BaseModel:
         return self.model
-    
-    def __train_only(
-            self, data
-    ) -> "NodeClassificationNeighborSamplingTrainer":
+
+    def __train_only(self, data) -> "NodeClassificationNeighborSamplingTrainer":
         """
         The function of training on the given dataset and mask.
         :param data: data of a specific graph
@@ -102,38 +113,41 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         data = data.to(self.device)
         optimizer: torch.optim.Optimizer = self._optimizer_class(
             self.model.model.parameters(),
-            lr=self._learning_rate, weight_decay=self._weight_decay
+            lr=self._learning_rate,
+            weight_decay=self._weight_decay,
         )
         if type(self._lr_scheduler_type) == str:
             if self._lr_scheduler_type.lower() == "step" + "lr":
-                lr_scheduler: torch.optim.lr_scheduler.StepLR = \
-                    torch.optim.lr_scheduler.StepLR(
-                        optimizer, step_size=100, gamma=0.1
-                    )
+                lr_scheduler: torch.optim.lr_scheduler.StepLR = (
+                    torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1)
+                )
             elif self._lr_scheduler_type.lower() == "multi" + "step" + "lr":
-                lr_scheduler: torch.optim.lr_scheduler.MultiStepLR = \
+                lr_scheduler: torch.optim.lr_scheduler.MultiStepLR = (
                     torch.optim.lr_scheduler.MultiStepLR(
                         optimizer, milestones=[30, 80], gamma=0.1
                     )
+                )
             elif self._lr_scheduler_type.lower() == "exponential" + "lr":
-                lr_scheduler: torch.optim.lr_scheduler.ExponentialLR = \
-                    torch.optim.lr_scheduler.ExponentialLR(
-                        optimizer, gamma=0.1
-                    )
+                lr_scheduler: torch.optim.lr_scheduler.ExponentialLR = (
+                    torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1)
+                )
             elif self._lr_scheduler_type.lower() == "ReduceLROnPlateau".lower():
-                lr_scheduler: torch.optim.lr_scheduler.ReduceLROnPlateau = \
+                lr_scheduler: torch.optim.lr_scheduler.ReduceLROnPlateau = (
                     torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min")
+                )
             else:
-                lr_scheduler: torch.optim.lr_scheduler.LambdaLR = \
+                lr_scheduler: torch.optim.lr_scheduler.LambdaLR = (
                     torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
+                )
         else:
-            lr_scheduler: torch.optim.lr_scheduler.LambdaLR = \
+            lr_scheduler: torch.optim.lr_scheduler.LambdaLR = (
                 torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
-        
+            )
+
         train_sampler: NeighborSampler = NeighborSampler(
             data, self.__sampling_sizes, batch_size=20
         )
-        
+
         for current_epoch in range(self._max_epoch):
             self.model.model.train()
             """ epoch start """
@@ -147,20 +161,20 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
                     )
                 loss_function = getattr(torch.nn.functional, self.loss)
                 loss: torch.Tensor = loss_function(
-                    prediction[target_node_indexes],
-                    data.y[target_node_indexes]
+                    prediction[target_node_indexes], data.y[target_node_indexes]
                 )
                 loss.backward()
                 optimizer.step()
-            
+
             if lr_scheduler is not None:
                 lr_scheduler.step()
-            
+
             """ Validate performance """
             if hasattr(data, "val_mask") and getattr(data, "val_mask") is not None:
-                validation_results: _typing.Sequence[float] = \
-                    self.evaluate((data,), "val", [self.feval[0]])
-                
+                validation_results: _typing.Sequence[float] = self.evaluate(
+                    (data,), "val", [self.feval[0]]
+                )
+
                 if self.feval[0].is_higher_better():
                     validation_loss: float = -validation_results[0]
                 else:
@@ -172,7 +186,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         if hasattr(data, "val_mask") and data.val_mask is not None:
             self._early_stopping.load_checkpoint(self.model.model)
         return self
-    
+
     def __predict_only(self, data):
         """
         The function of predicting on the given data.
@@ -184,7 +198,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         with torch.no_grad():
             prediction = self.model.model(data)
         return prediction
-    
+
     def train(self, dataset, keep_valid_result: bool = True):
         """
         The function of training on the given dataset and keeping valid result.
@@ -198,10 +212,9 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             self._valid_result: torch.Tensor = prediction[data.val_mask].max(1)[1]
             self._valid_result_prob: torch.Tensor = prediction[data.val_mask]
             self._valid_score = self.evaluate(dataset, "val")
-    
+
     def predict_proba(
-            self, dataset, mask: _typing.Optional[str] = None,
-            in_log_format: bool = False
+        self, dataset, mask: _typing.Optional[str] = None, in_log_format: bool = False
     ) -> torch.Tensor:
         """
         The function of predicting the probability on the given dataset.
@@ -224,29 +237,22 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             _mask = data.test_mask
         result = self.__predict_only(data)[_mask]
         return result if in_log_format else torch.exp(result)
-    
+
     def predict(self, dataset, mask: _typing.Optional[str] = None) -> torch.Tensor:
-        return self.predict_proba(
-            dataset, mask, in_log_format=True
-        ).max(1)[1]
-    
+        return self.predict_proba(dataset, mask, in_log_format=True).max(1)[1]
+
     def get_valid_predict(self) -> torch.Tensor:
         return self._valid_result
-    
+
     def get_valid_predict_proba(self) -> torch.Tensor:
         return self._valid_result_prob
-    
+
     def get_valid_score(self, return_major: bool = True):
         if return_major:
-            return (
-                self._valid_score[0],
-                self.feval[0].is_higher_better()
-            )
+            return (self._valid_score[0], self.feval[0].is_higher_better())
         else:
-            return (
-                self._valid_score, [f.is_higher_better() for f in self.feval]
-            )
-        
+            return (self._valid_score, [f.is_higher_better() for f in self.feval])
+
     def get_name_with_hp(self) -> str:
         name = "-".join(
             [
@@ -259,25 +265,24 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             ]
         )
         name = (
-                name
-                + "|"
-                + "-".join(
-                    [
-                        str(x[0]) + "-" + str(x[1])
-                        for x in self.model.get_hyper_parameter().items()
-                    ]
-                )
+            name
+            + "|"
+            + "-".join(
+                [
+                    str(x[0]) + "-" + str(x[1])
+                    for x in self.model.get_hyper_parameter().items()
+                ]
+            )
         )
         return name
-    
+
     def evaluate(
-            self,
-            dataset,
-            mask: _typing.Optional[str] = None,
-            feval: _typing.Union[
-                None, _typing.Sequence[str],
-                _typing.Sequence[_typing.Type[Evaluation]]
-            ] = None
+        self,
+        dataset,
+        mask: _typing.Optional[str] = None,
+        feval: _typing.Union[
+            None, _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = None,
     ) -> _typing.Sequence[float]:
         data = dataset[0]
         data = data.to(self.device)
@@ -295,53 +300,60 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             _mask = data.test_mask
         prediction_probability: torch.Tensor = self.predict_proba(dataset, mask)
         y_ground_truth = data.y[_mask]
-        
+
         results = []
         for f in _feval:
             try:
-                results.append(
-                    f.evaluate(prediction_probability, y_ground_truth)
-                )
+                results.append(f.evaluate(prediction_probability, y_ground_truth))
             except:
                 results.append(
-                    f.evaluate(prediction_probability.cpu().numpy(), y_ground_truth.cpu().numpy())
+                    f.evaluate(
+                        prediction_probability.cpu().numpy(),
+                        y_ground_truth.cpu().numpy(),
+                    )
                 )
         return results
-    
+
     def to(self, device: torch.device):
         self.device = device
         if self.model is not None:
             self.model.to(self.device)
-    
+
     def duplicate_from_hyper_parameter(
-            self, hp: _typing.Dict[str, _typing.Any],
-            model: _typing.Union[BaseModel, str, None] = None
+        self,
+        hp: _typing.Dict[str, _typing.Any],
+        model: _typing.Union[BaseModel, str, None] = None,
     ) -> "NodeClassificationNeighborSamplingTrainer":
-        
+
         if model is None or not isinstance(model, BaseModel):
             model = self.model
         model = model.from_hyper_parameter(
             dict(
                 [
-                    x for x in hp.items()
+                    x
+                    for x in hp.items()
                     if x[0] in [y["parameterName"] for y in model.hyper_parameter_space]
                 ]
             )
         )
-        
+
         return NodeClassificationNeighborSamplingTrainer(
-            model, self.num_features, self.num_classes,
+            model,
+            self.num_features,
+            self.num_classes,
             self._optimizer_class,
-            device=self.device, init=True,
-            feval=self.feval, loss=self.loss,
+            device=self.device,
+            init=True,
+            feval=self.feval,
+            loss=self.loss,
             lr_scheduler_type=self._lr_scheduler_type,
-            **hp
+            **hp,
         )
-    
+
     @property
     def hyper_parameter_space(self):
         return self._hyper_parameter_space
-    
+
     @hyper_parameter_space.setter
     def hyper_parameter_space(self, hp_space):
         self._hyper_parameter_space = hp_space
@@ -350,50 +362,63 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
 @register_trainer("NodeClassificationGraphSAINTTrainer")
 class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
     def __init__(
-            self,
-            model: _typing.Union[BaseModel],
-            num_features: int,
-            num_classes: int,
-            optimizer: _typing.Union[
-                _typing.Type[torch.optim.Optimizer], str, None
-            ],
-            lr: float = 1e-4,
-            max_epoch: int = 100,
-            early_stopping_round: int = 100,
-            weight_decay: float = 1e-4,
-            device: _typing.Optional[torch.device] = None,
-            init: bool = True,
-            feval: _typing.Union[
-                _typing.Sequence[str],
-                _typing.Sequence[_typing.Type[Evaluation]]
-            ] = (Logloss,),
-            loss: str = "nll_loss",
-            lr_scheduler_type: _typing.Optional[str] = None,
-            **kwargs
+        self,
+        model: _typing.Union[BaseModel],
+        num_features: int,
+        num_classes: int,
+        optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None],
+        lr: float = 1e-4,
+        max_epoch: int = 100,
+        early_stopping_round: int = 100,
+        weight_decay: float = 1e-4,
+        device: _typing.Optional[torch.device] = None,
+        init: bool = True,
+        feval: _typing.Union[
+            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = (Logloss,),
+        loss: str = "nll_loss",
+        lr_scheduler_type: _typing.Optional[str] = None,
+        **kwargs,
     ) -> None:
         if isinstance(optimizer, type) and issubclass(optimizer, torch.optim.Optimizer):
             self._optimizer_class: _typing.Type[torch.optim.Optimizer] = optimizer
         elif type(optimizer) == str:
             if optimizer.lower() == "adam":
-                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.Adam
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.Adam
             elif optimizer.lower() == "adam" + "w":
-                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.AdamW
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.AdamW
             elif optimizer.lower() == "sgd":
-                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.SGD
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.SGD
             else:
-                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.Adam
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.Adam
         else:
-            self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.Adam
+            self._optimizer_class: _typing.Type[
+                torch.optim.Optimizer
+            ] = torch.optim.Adam
         self._learning_rate: float = lr if lr > 0 else 1e-4
         self._lr_scheduler_type: _typing.Optional[str] = lr_scheduler_type
         self._max_epoch: int = max_epoch if max_epoch > 0 else 1e2
         self._weight_decay: float = weight_decay if weight_decay > 0 else 1e-4
-        early_stopping_round: int = early_stopping_round if early_stopping_round > 0 else 1e2
-        self._early_stopping = EarlyStopping(patience=early_stopping_round, verbose=False)
-        
+        early_stopping_round: int = (
+            early_stopping_round if early_stopping_round > 0 else 1e2
+        )
+        self._early_stopping = EarlyStopping(
+            patience=early_stopping_round, verbose=False
+        )
+
         # Assign an empty initial hyper parameter space
-        self._hyper_parameter_space: _typing.Sequence[_typing.Dict[str, _typing.Any]] = []
-        
+        self._hyper_parameter_space: _typing.Sequence[
+            _typing.Dict[str, _typing.Any]
+        ] = []
+
         self._valid_result: torch.Tensor = torch.zeros(0)
         self._valid_result_prob: torch.Tensor = torch.zeros(0)
         self._valid_score: _typing.Sequence[float] = ()
@@ -401,7 +426,7 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         super(NodeClassificationGraphSAINTTrainer, self).__init__(
             model, num_features, num_classes, device, init, feval, loss
         )
-        
+
         """ Set hyper parameters """
         if "num_subgraphs" not in kwargs:
             raise KeyError
@@ -427,23 +452,23 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
             self.__sampling_method_identifier: str = kwargs.get("sampling_method")
             if self.__sampling_method_identifier.lower() not in ("node", "edge"):
                 self.__sampling_method_identifier: str = "node"
-        
+
         self.__is_initialized: bool = False
         if init:
             self.initialize()
-    
+
     def initialize(self):
         if self.__is_initialized:
             return self
         self.model.initialize()
         self.__is_initialized = True
         return self
-    
+
     def to(self, device: torch.device):
         self.device = device
         if self.model is not None:
             self.model.to(self.device)
-    
+
     def get_model(self):
         return self.model
 
@@ -456,34 +481,37 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         data = data.to(self.device)
         optimizer: torch.optim.Optimizer = self._optimizer_class(
             self.model.parameters(),
-            lr=self._learning_rate, weight_decay=self._weight_decay
+            lr=self._learning_rate,
+            weight_decay=self._weight_decay,
         )
         if type(self._lr_scheduler_type) == str:
             if self._lr_scheduler_type.lower() == "step" + "lr":
-                lr_scheduler: torch.optim.lr_scheduler.StepLR = \
-                    torch.optim.lr_scheduler.StepLR(
-                        optimizer, step_size=100, gamma=0.1
-                    )
+                lr_scheduler: torch.optim.lr_scheduler.StepLR = (
+                    torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1)
+                )
             elif self._lr_scheduler_type.lower() == "multi" + "step" + "lr":
-                lr_scheduler: torch.optim.lr_scheduler.MultiStepLR = \
+                lr_scheduler: torch.optim.lr_scheduler.MultiStepLR = (
                     torch.optim.lr_scheduler.MultiStepLR(
                         optimizer, milestones=[30, 80], gamma=0.1
                     )
+                )
             elif self._lr_scheduler_type.lower() == "exponential" + "lr":
-                lr_scheduler: torch.optim.lr_scheduler.ExponentialLR = \
-                    torch.optim.lr_scheduler.ExponentialLR(
-                        optimizer, gamma=0.1
-                    )
+                lr_scheduler: torch.optim.lr_scheduler.ExponentialLR = (
+                    torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1)
+                )
             elif self._lr_scheduler_type.lower() == "ReduceLROnPlateau".lower():
-                lr_scheduler: torch.optim.lr_scheduler.ReduceLROnPlateau = \
+                lr_scheduler: torch.optim.lr_scheduler.ReduceLROnPlateau = (
                     torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min")
+                )
             else:
-                lr_scheduler: torch.optim.lr_scheduler.LambdaLR = \
+                lr_scheduler: torch.optim.lr_scheduler.LambdaLR = (
                     torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
+                )
         else:
-            lr_scheduler: torch.optim.lr_scheduler.LambdaLR = \
+            lr_scheduler: torch.optim.lr_scheduler.LambdaLR = (
                 torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
-        
+            )
+
         if self.__sampling_method_identifier.lower() == "edge":
             sub_graph_sampler = GraphSAINTRandomEdgeSampler(
                 self.__sampling_budget, self.__num_subgraphs
@@ -492,60 +520,58 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
             sub_graph_sampler = GraphSAINTRandomNodeSampler(
                 self.__sampling_budget, self.__num_subgraphs
             )
-        
+
         for current_epoch in range(self._max_epoch):
             self.model.model.train()
             """ epoch start """
             """ Sample sub-graphs """
             sub_graph_set = sub_graph_sampler.sample(data)
-            sub_graphs_loader: torch.utils.data.DataLoader = \
+            sub_graphs_loader: torch.utils.data.DataLoader = (
                 torch.utils.data.DataLoader(sub_graph_set)
+            )
             integral_alpha: torch.Tensor = getattr(sub_graph_set, "alpha")
             integral_lambda: torch.Tensor = getattr(sub_graph_set, "lambda")
             """ iterate sub-graphs """
             for sub_graph_data in sub_graphs_loader:
                 optimizer.zero_grad()
-                sampled_edge_indexes: torch.Tensor = \
-                    sub_graph_data.sampled_edge_indexes
-                sampled_node_indexes: torch.Tensor = \
-                    sub_graph_data.sampled_node_indexes
-                sampled_train_mask: torch.Tensor = \
-                    sub_graph_data.train_mask
-                
+                sampled_edge_indexes: torch.Tensor = sub_graph_data.sampled_edge_indexes
+                sampled_node_indexes: torch.Tensor = sub_graph_data.sampled_node_indexes
+                sampled_train_mask: torch.Tensor = sub_graph_data.train_mask
+
                 sampled_alpha = integral_alpha[sampled_edge_indexes]
                 sub_graph_data.edge_weight = 1 / sampled_alpha
 
                 prediction: torch.Tensor = self.model.model(sub_graph_data)
-                
+
                 if not hasattr(torch.nn.functional, self.loss):
-                    raise TypeError(
-                        f"PyTorch does not support loss type {self.loss}"
-                    )
+                    raise TypeError(f"PyTorch does not support loss type {self.loss}")
                 loss_func = getattr(torch.nn.functional, self.loss)
                 unreduced_loss: torch.Tensor = loss_func(
                     prediction[sampled_train_mask],
                     data.y[sampled_train_mask],
-                    reduction="none"
+                    reduction="none",
                 )
-                
+
                 sampled_lambda: torch.Tensor = integral_lambda[sampled_node_indexes]
                 sampled_train_lambda: torch.Tensor = sampled_lambda[sampled_train_mask]
                 assert unreduced_loss.size() == sampled_train_lambda.size()
-                loss_weighted_sum: torch.Tensor = \
-                    torch.sum(unreduced_loss / sampled_train_lambda)
+                loss_weighted_sum: torch.Tensor = torch.sum(
+                    unreduced_loss / sampled_train_lambda
+                )
                 loss_weighted_sum.backward()
                 optimizer.step()
-            
+
             if lr_scheduler is not None:
                 lr_scheduler.step()
-            
+
             """ Validate performance """
             if (
-                    hasattr(data, "val_mask") and
-                    type(getattr(data, "val_mask")) == torch.Tensor
+                hasattr(data, "val_mask")
+                and type(getattr(data, "val_mask")) == torch.Tensor
             ):
-                validation_results: _typing.Sequence[float] = \
-                    self.evaluate((data,), "val", [self.feval[0]])
+                validation_results: _typing.Sequence[float] = self.evaluate(
+                    (data,), "val", [self.feval[0]]
+                )
                 if self.feval[0].is_higher_better():
                     validation_loss: float = -validation_results[0]
                 else:
@@ -557,7 +583,7 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         if hasattr(data, "val_mask") and data.val_mask is not None:
             self._early_stopping.load_checkpoint(self.model.model)
         return self
-        
+
     def __predict_only(self, data):
         """
         The function of predicting on the given data.
@@ -569,10 +595,9 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         with torch.no_grad():
             predicted_x: torch.Tensor = self.model.model(data)
         return predicted_x
-    
+
     def predict_proba(
-            self, dataset, mask: _typing.Optional[str] = None,
-            in_log_format=False
+        self, dataset, mask: _typing.Optional[str] = None, in_log_format=False
     ):
         """
         The function of predicting the probability on the given dataset.
@@ -595,17 +620,17 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
             _mask: torch.Tensor = data.test_mask
         result = self.__predict_only(data)[_mask]
         return result if in_log_format else torch.exp(result)
-    
+
     def predict(self, dataset, mask: _typing.Optional[str] = None) -> torch.Tensor:
         return self.predict_proba(dataset, mask, in_log_format=True).max(1)[1]
-    
+
     def evaluate(
-            self, dataset,
-            mask: _typing.Optional[str] = None,
-            feval: _typing.Union[
-                None, _typing.Sequence[str],
-                _typing.Sequence[_typing.Type[Evaluation]]
-            ] = None
+        self,
+        dataset,
+        mask: _typing.Optional[str] = None,
+        feval: _typing.Union[
+            None, _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = None,
     ) -> _typing.Sequence[float]:
         data = dataset[0]
         data = data.to(self.device)
@@ -624,24 +649,22 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
                 _mask: torch.Tensor = data.test_mask
         else:
             _mask: torch.Tensor = data.test_mask
-        prediction_probability: torch.Tensor = \
-            self.predict_proba(dataset, mask)
+        prediction_probability: torch.Tensor = self.predict_proba(dataset, mask)
         y_ground_truth: torch.Tensor = data.y[_mask]
-        
+
         eval_results = []
         for f in _feval:
             try:
-                eval_results.append(
-                    f.evaluate(prediction_probability, y_ground_truth)
-                )
+                eval_results.append(f.evaluate(prediction_probability, y_ground_truth))
             except:
                 eval_results.append(
                     f.evaluate(
-                        prediction_probability.cpu().numpy(), y_ground_truth.cpu().numpy()
+                        prediction_probability.cpu().numpy(),
+                        y_ground_truth.cpu().numpy(),
                     )
                 )
         return eval_results
-    
+
     def train(self, dataset, keep_valid_result: bool = True):
         """
         The function of training on the given dataset and keeping valid result.
@@ -655,36 +678,36 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
             self._valid_result: torch.Tensor = prediction[data.val_mask].max(1)[1]
             self._valid_result_prob: torch.Tensor = prediction[data.val_mask]
             self._valid_score: _typing.Sequence[float] = self.evaluate(dataset, "val")
-    
+
     def get_valid_predict(self) -> torch.Tensor:
         return self._valid_result
-    
+
     def get_valid_predict_proba(self) -> torch.Tensor:
         return self._valid_result_prob
-    
-    def get_valid_score(self, return_major: bool = True) -> _typing.Tuple[
+
+    def get_valid_score(
+        self, return_major: bool = True
+    ) -> _typing.Tuple[
         _typing.Union[float, _typing.Sequence[float]],
-        _typing.Union[bool, _typing.Sequence[bool]]
+        _typing.Union[bool, _typing.Sequence[bool]],
     ]:
         if return_major:
             return self._valid_score[0], self.feval[0].is_higher_better()
         else:
-            return (
-                self._valid_score, [f.is_higher_better() for f in self.feval]
-            )
-    
+            return (self._valid_score, [f.is_higher_better() for f in self.feval])
+
     @property
     def hyper_parameter_space(self) -> _typing.Sequence[_typing.Dict[str, _typing.Any]]:
         return self._hyper_parameter_space
-    
+
     @hyper_parameter_space.setter
     def hyper_parameter_space(
-            self, hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
+        self, hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
     ) -> None:
         if not isinstance(hp_space, _typing.Sequence):
             raise TypeError
         self._hyper_parameter_space = hp_space
-    
+
     def get_name_with_hp(self) -> str:
         name = "-".join(
             [
@@ -697,36 +720,42 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
             ]
         )
         name = (
-                name
-                + "|"
-                + "-".join(
-                    [
-                        str(x[0]) + "-" + str(x[1])
-                        for x in self.model.get_hyper_parameter().items()
-                    ]
-                )
+            name
+            + "|"
+            + "-".join(
+                [
+                    str(x[0]) + "-" + str(x[1])
+                    for x in self.model.get_hyper_parameter().items()
+                ]
+            )
         )
         return name
-    
+
     def duplicate_from_hyper_parameter(
-            self, hp: _typing.Dict[str, _typing.Any],
-            model: _typing.Optional[BaseModel] = None
+        self,
+        hp: _typing.Dict[str, _typing.Any],
+        model: _typing.Optional[BaseModel] = None,
     ) -> "NodeClassificationGraphSAINTTrainer":
         if model is None or not isinstance(model, BaseModel):
             model: BaseModel = self.model
         model = model.from_hyper_parameter(
             dict(
                 [
-                    x for x in hp.items()
+                    x
+                    for x in hp.items()
                     if x[0] in [y["parameterName"] for y in model.hyper_parameter_space]
                 ]
             )
         )
         return NodeClassificationGraphSAINTTrainer(
-            model, self.num_features, self.num_classes,
+            model,
+            self.num_features,
+            self.num_classes,
             self._optimizer_class,
-            device=self.device, init=True,
-            feval=self.feval, loss=self.loss,
+            device=self.device,
+            init=True,
+            feval=self.feval,
+            loss=self.loss,
             lr_scheduler_type=self._lr_scheduler_type,
-            **hp
+            **hp,
         )
diff --git a/autogl/module/train/sampling/sampler/graphsaint_sampler.py b/autogl/module/train/sampling/sampler/graphsaint_sampler.py
index 9c5c978..27434e2 100644
--- a/autogl/module/train/sampling/sampler/graphsaint_sampler.py
+++ b/autogl/module/train/sampling/sampler/graphsaint_sampler.py
@@ -10,10 +10,10 @@ class _SubGraphSet(torch.utils.data.Dataset):
         self.__remaining_args: _typing.Sequence[_typing.Any] = args
         for key, value in kwargs.items():
             setattr(self, key, value)
-    
+
     def __len__(self) -> int:
         return len(self.__graphs)
-    
+
     def __getitem__(self, index: int) -> _typing.Any:
         if not 0 <= index < len(self.__graphs):
             raise IndexError
@@ -22,8 +22,12 @@ class _SubGraphSet(torch.utils.data.Dataset):
 
 class _GraphSAINTSubGraphSampler:
     def __init__(
-            self, sampler_class: _typing.Type[torch_geometric.data.GraphSAINTSampler],
-            budget: int, num_graphs: int = 1, walk_length: int = 1, num_workers: int = 0
+        self,
+        sampler_class: _typing.Type[torch_geometric.data.GraphSAINTSampler],
+        budget: int,
+        num_graphs: int = 1,
+        walk_length: int = 1,
+        num_workers: int = 0,
     ):
         """
         :param sampler_class: class of torch_geometric.data.GraphSAINTSampler
@@ -40,7 +44,7 @@ class _GraphSAINTSubGraphSampler:
         self.__num_graphs: int = num_graphs
         self.__walk_length: int = walk_length
         self.__num_workers: int = num_workers if num_workers > 0 else 0
-    
+
     def sample(self, _integral_data) -> _SubGraphSet:
         """
         :param _integral_data: conventional data for an integral graph
@@ -49,18 +53,23 @@ class _GraphSAINTSubGraphSampler:
         data = copy.copy(_integral_data)
         data.sampled_node_indexes = torch.arange(data.num_nodes, dtype=torch.int64)
         data.sampled_edge_indexes = torch.arange(data.num_edges, dtype=torch.int64)
-        if type(self.__sampler_class) == torch_geometric.data.GraphSAINTRandomWalkSampler:
-            _sampler: torch_geometric.data.GraphSAINTRandomWalkSampler = \
+        if (
+            type(self.__sampler_class)
+            == torch_geometric.data.GraphSAINTRandomWalkSampler
+        ):
+            _sampler: torch_geometric.data.GraphSAINTRandomWalkSampler = (
                 torch_geometric.data.GraphSAINTRandomWalkSampler(
-                    data, self.__budget, self.__walk_length, self.__num_graphs,
-                    num_workers=self.__num_workers
+                    data,
+                    self.__budget,
+                    self.__walk_length,
+                    self.__num_graphs,
+                    num_workers=self.__num_workers,
                 )
+            )
         else:
-            _sampler: torch_geometric.data.GraphSAINTSampler = \
-                self.__sampler_class(
-                    data, self.__budget, self.__num_graphs,
-                    num_workers=self.__num_workers
-                )
+            _sampler: torch_geometric.data.GraphSAINTSampler = self.__sampler_class(
+                data, self.__budget, self.__num_graphs, num_workers=self.__num_workers
+            )
         """ Sample sub-graphs """
         datalist: list = [d for d in _sampler]
         """ Compute the normalization """
@@ -73,12 +82,16 @@ class _GraphSAINTSubGraphSampler:
             [sub_graph.sampled_edge_indexes for sub_graph in datalist]
         )
         for current_sampled_node_index in concatenated_sampled_nodes.unique():
-            node_sampled_count[current_sampled_node_index] = \
-                torch.where(concatenated_sampled_nodes == current_sampled_node_index)[0].size(0)
+            node_sampled_count[current_sampled_node_index] = torch.where(
+                concatenated_sampled_nodes == current_sampled_node_index
+            )[0].size(0)
         for current_sampled_edge_index in concatenated_sampled_edges.unique():
-            edge_sampled_count[current_sampled_edge_index] = \
-                torch.where(concatenated_sampled_edges == current_sampled_edge_index)[0].size(0)
-        _alpha: torch.Tensor = edge_sampled_count / node_sampled_count[data.edge_index[1]]
+            edge_sampled_count[current_sampled_edge_index] = torch.where(
+                concatenated_sampled_edges == current_sampled_edge_index
+            )[0].size(0)
+        _alpha: torch.Tensor = (
+            edge_sampled_count / node_sampled_count[data.edge_index[1]]
+        )
         _alpha[torch.isnan(_alpha) | torch.isinf(_alpha)] = 0
         _lambda: torch.Tensor = node_sampled_count / self.__num_graphs
         return _SubGraphSet(datalist, **{"alpha": _alpha, "lambda": _lambda})
@@ -101,5 +114,8 @@ class GraphSAINTRandomEdgeSampler(_GraphSAINTSubGraphSampler):
 class GraphSAINTRandomWalkSampler(_GraphSAINTSubGraphSampler):
     def __init__(self, edge_budget: int, num_graphs: int = 1, walk_length: int = 4):
         super(GraphSAINTRandomWalkSampler, self).__init__(
-            torch_geometric.data.GraphSAINTRandomWalkSampler, edge_budget, num_graphs, walk_length
+            torch_geometric.data.GraphSAINTRandomWalkSampler,
+            edge_budget,
+            num_graphs,
+            walk_length,
         )
diff --git a/autogl/module/train/sampling/sampler/neighbor_sampler.py b/autogl/module/train/sampling/sampler/neighbor_sampler.py
index 0e62a74..53a6b7c 100644
--- a/autogl/module/train/sampling/sampler/neighbor_sampler.py
+++ b/autogl/module/train/sampling/sampler/neighbor_sampler.py
@@ -9,37 +9,41 @@ class NeighborSampler(torch.utils.data.DataLoader, collections.Iterable):
     class _NodeIndexesDataset(torch.utils.data.Dataset):
         def __init__(self, node_indexes):
             self.__node_indexes: _typing.Sequence[int] = node_indexes
-        
+
         def __getitem__(self, index) -> int:
             if not 0 <= index < len(self.__node_indexes):
                 raise IndexError("Index out of range")
             else:
                 return self.__node_indexes[index]
-        
+
         def __len__(self) -> int:
             return len(self.__node_indexes)
-    
+
     def __init__(
-            self, data,
-            sampling_sizes: _typing.Sequence[int],
-            target_node_indexes: _typing.Optional[_typing.Sequence[int]] = None,
-            batch_size: _typing.Optional[int] = 1,
-            *args, **kwargs
+        self,
+        data,
+        sampling_sizes: _typing.Sequence[int],
+        target_node_indexes: _typing.Optional[_typing.Sequence[int]] = None,
+        batch_size: _typing.Optional[int] = 1,
+        *args,
+        **kwargs
     ):
         self._data = data
         self.__sampling_sizes: _typing.Sequence[int] = sampling_sizes
-        
+
         if not (
-                target_node_indexes is not None and
-                isinstance(target_node_indexes, _typing.Sequence)
+            target_node_indexes is not None
+            and isinstance(target_node_indexes, _typing.Sequence)
         ):
             if hasattr(data, "train_mask"):
-                target_node_indexes: _typing.Sequence[int] = \
-                    torch.where(getattr(data, "train_mask"))[0]
+                target_node_indexes: _typing.Sequence[int] = torch.where(
+                    getattr(data, "train_mask")
+                )[0]
             else:
-                target_node_indexes: _typing.Sequence[int] = \
-                    list(np.arange(0, data.x.shape[0]))
-        
+                target_node_indexes: _typing.Sequence[int] = list(
+                    np.arange(0, data.x.shape[0])
+                )
+
         self.__edge_index_map: _typing.Dict[
             int, _typing.Union[torch.Tensor, _typing.Sequence[int]]
         ] = {}
@@ -47,9 +51,11 @@ class NeighborSampler(torch.utils.data.DataLoader, collections.Iterable):
         super(NeighborSampler, self).__init__(
             self._NodeIndexesDataset(target_node_indexes),
             batch_size=batch_size if batch_size > 0 else 1,
-            collate_fn=self.__sample, *args, **kwargs
+            collate_fn=self.__sample,
+            *args,
+            **kwargs
         )
-    
+
     def __init_edge_index_map(self):
         self.__edge_index_map.clear()
         all_edge_index: torch.Tensor = getattr(self._data, "edge_index")
@@ -58,12 +64,12 @@ class NeighborSampler(torch.utils.data.DataLoader, collections.Iterable):
             self.__edge_index_map[target_node_index] = torch.where(
                 all_edge_index[1] == target_node_index
             )[0]
-    
+
     def __iter__(self):
         return super(NeighborSampler, self).__iter__()
-    
+
     def __sample(
-            self, target_nodes_indexes: _typing.List[int]
+        self, target_nodes_indexes: _typing.List[int]
     ) -> _typing.Tuple[torch.Tensor, _typing.List[torch.Tensor]]:
         """
         Sample a sub-graph with neighborhood sampling
@@ -71,14 +77,15 @@ class NeighborSampler(torch.utils.data.DataLoader, collections.Iterable):
         """
         original_edge_index: torch.Tensor = self._data.edge_index
         edges_indexes: _typing.List[torch.Tensor] = []
-        
+
         current_target_nodes_indexes: _typing.List[int] = target_nodes_indexes
         for current_sampling_size in self.__sampling_sizes:
             current_edge_index: _typing.Optional[torch.Tensor] = None
             for current_target_node_index in current_target_nodes_indexes:
                 if current_target_node_index in self.__edge_index_map:
-                    all_indexes: torch.Tensor = \
-                        self.__edge_index_map.get(current_target_node_index)
+                    all_indexes: torch.Tensor = self.__edge_index_map.get(
+                        current_target_node_index
+                    )
                 else:
                     all_indexes: torch.Tensor = torch.where(
                         original_edge_index[1] == current_target_node_index
@@ -89,25 +96,38 @@ class NeighborSampler(torch.utils.data.DataLoader, collections.Iterable):
                     )
                     if current_edge_index is not None:
                         current_edge_index: torch.Tensor = torch.cat(
-                            [current_edge_index, original_edge_index[:, sampled_indexes]], dim=1
+                            [
+                                current_edge_index,
+                                original_edge_index[:, sampled_indexes],
+                            ],
+                            dim=1,
                         )
                     else:
-                        current_edge_index: torch.Tensor = original_edge_index[:, sampled_indexes]
+                        current_edge_index: torch.Tensor = original_edge_index[
+                            :, sampled_indexes
+                        ]
                 else:
                     all_indexes_list = all_indexes.tolist()
                     random.shuffle(all_indexes_list)
-                    shuffled_indexes_list: _typing.List[int] = \
-                        all_indexes_list[0: current_sampling_size]
+                    shuffled_indexes_list: _typing.List[int] = all_indexes_list[
+                        0:current_sampling_size
+                    ]
                     if current_edge_index is not None:
                         current_edge_index: torch.Tensor = torch.cat(
-                            [current_edge_index, original_edge_index[:, shuffled_indexes_list]], dim=1
+                            [
+                                current_edge_index,
+                                original_edge_index[:, shuffled_indexes_list],
+                            ],
+                            dim=1,
                         )
                     else:
-                        current_edge_index: torch.Tensor = original_edge_index[:, shuffled_indexes_list]
+                        current_edge_index: torch.Tensor = original_edge_index[
+                            :, shuffled_indexes_list
+                        ]
             edges_indexes.append(current_edge_index)
-            
+
             if len(edges_indexes) < len(self.__sampling_sizes):
                 next_target_nodes_indexes: torch.Tensor = current_edge_index[0].unique()
                 current_target_nodes_indexes = next_target_nodes_indexes.tolist()
-        
+
         return torch.tensor(target_nodes_indexes), edges_indexes[::-1]
diff --git a/autogl/solver/classifier/node_classifier.py b/autogl/solver/classifier/node_classifier.py
index 1d41d1a..cd0ed86 100644
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -70,12 +70,10 @@ class AutoNodeClassifier(BaseClassifier):
         Default ``auto``.
     """
 
-    # pylint: disable=W0102
-
     def __init__(
         self,
         feature_module=None,
-        graph_models=["gat", "gcn"],
+        graph_models=("gat", "gcn"),
         hpo_module="anneal",
         ensemble_module="voting",
         max_evals=50,
diff --git a/examples/graph_cv.py b/examples/graph_cv.py
index 49e409a..2dd938c 100644
--- a/examples/graph_cv.py
+++ b/examples/graph_cv.py
@@ -27,7 +27,7 @@ if __name__ == "__main__":
         choices=["mutag", "imdb-b", "imdb-m", "proteins", "collab"],
     )
     parser.add_argument(
-        "--configs", default="../configs/graph_classification.yaml", help="config files"
+        "--configs", default="../configs/graphclf_full.yml", help="config files"
     )
     parser.add_argument("--device", type=int, default=0, help="device to run on")
     parser.add_argument("--seed", type=int, default=0, help="random seed")

From ee6e58e802c351b80eb37abec7648cb1d9646616 Mon Sep 17 00:00:00 2001
From: lihy96 <lhy237451594@vip.qq.com>
Date: Thu, 29 Apr 2021 14:18:18 +0800
Subject: [PATCH 08/19] roc auc (gcn on cora) ~0.911

---
 examples/link_prediction.py | 57 +++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 examples/link_prediction.py

diff --git a/examples/link_prediction.py b/examples/link_prediction.py
new file mode 100644
index 0000000..65236a4
--- /dev/null
+++ b/examples/link_prediction.py
@@ -0,0 +1,57 @@
+import os.path as osp
+import sys
+sys.path.insert(0, '../')
+import torch
+from torch_geometric.datasets import Planetoid
+import torch_geometric.transforms as T
+from sklearn.metrics import accuracy_score as acc
+from sklearn.metrics import roc_auc_score
+from autogl.module.train import LinkPredictionTrainer
+import numpy as np
+from torch_geometric.utils import train_test_split_edges
+
+dataset = 'Cora'
+path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset)
+dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures())
+
+print('len', len(dataset))
+print('num_class', dataset.num_classes)
+print('num_node_features', dataset.num_node_features)
+
+a = []
+for _ in range(10):
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    data = dataset[0]
+
+    data = data.to(device)
+    data.train_mask = data.val_mask = data.test_mask = data.y = None
+    data = train_test_split_edges(data)
+
+    clf = LinkPredictionTrainer(
+        'gcn',
+        num_features=dataset.num_node_features,
+        num_classes=dataset.num_classes,
+        max_epoch=100,
+        early_stopping_round=101,
+        feval=['auc'],
+        lr=0.01,
+        weight_decay=0,
+        lr_scheduler_type=None,
+    )
+    clf.train([data], keep_valid_result=True)
+    print(clf.valid_score, end=',')
+    y = clf.predict([data], 'test')
+    y_ = y.cpu().numpy()
+    # acc_ = y.eq(data.y[data.test_mask]).sum().item() / data.test_mask.sum().item()
+    # print(acc_, end=',')
+
+    pos_edge_index = data[f'test_pos_edge_index']
+    neg_edge_index = data[f'test_neg_edge_index']
+    link_labels = clf.get_link_labels(pos_edge_index, neg_edge_index)
+    label = link_labels.cpu().numpy()
+    ret = roc_auc_score(label, y_)
+    print(ret)
+    a.append(ret)
+print(np.mean(a), np.std(a))
+
+

From 9cbd8af51aee2ea2647f572fdcc3e9b15355622c Mon Sep 17 00:00:00 2001
From: lihy96 <lhy237451594@vip.qq.com>
Date: Thu, 29 Apr 2021 14:42:16 +0800
Subject: [PATCH 09/19] add encode and decode in gat and graphsage

---
 autogl/module/model/gat.py       | 18 ++++++++++++++++++
 autogl/module/model/graphsage.py | 18 ++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/autogl/module/model/gat.py b/autogl/module/model/gat.py
index 4a5a3f2..9f20530 100644
--- a/autogl/module/model/gat.py
+++ b/autogl/module/model/gat.py
@@ -82,6 +82,24 @@ class GAT(torch.nn.Module):
 
         return F.log_softmax(x, dim=1)
 
+    def encode(self, data):
+        x = data.x
+        for i in range(self.num_layer - 1):
+            x = self.convs[i](x, data.train_pos_edge_index)
+            if i != self.num_layer - 2:
+                x = activate_func(x, self.args["act"])
+                # x = F.dropout(x, p=self.args["dropout"], training=self.training)
+        return x
+
+    def decode(self, z, pos_edge_index, neg_edge_index):
+        edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
+        logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
+        return logits
+
+    def decode_all(self, z):
+        prob_adj = z @ z.t()
+        return (prob_adj > 0).nonzero(as_tuple=False).t()
+
 
 @register_model("gat")
 class AutoGAT(BaseModel):
diff --git a/autogl/module/model/graphsage.py b/autogl/module/model/graphsage.py
index 6c492a5..3472896 100644
--- a/autogl/module/model/graphsage.py
+++ b/autogl/module/model/graphsage.py
@@ -159,6 +159,24 @@ class GraphSAGE(torch.nn.Module):
 
         return F.log_softmax(x, dim=1)
 
+    def encode(self, data):
+        x = data.x
+        for i in range(self.num_layer - 1):
+            x = self.convs[i](x, data.train_pos_edge_index)
+            if i != self.num_layer - 2:
+                x = activate_func(x, self.args["act"])
+                # x = F.dropout(x, p=self.args["dropout"], training=self.training)
+        return x
+
+    def decode(self, z, pos_edge_index, neg_edge_index):
+        edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
+        logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
+        return logits
+
+    def decode_all(self, z):
+        prob_adj = z @ z.t()
+        return (prob_adj > 0).nonzero(as_tuple=False).t()
+
 
 @register_model("sage")
 class AutoSAGE(BaseModel):

From 3e9a11fa36be299dd3a9d4629687f0d5ff32ccbb Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Thu, 29 Apr 2021 11:25:17 +0000
Subject: [PATCH 10/19] change the default num_workers to 0

---
 autogl/module/train/graph_classification_full.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autogl/module/train/graph_classification_full.py b/autogl/module/train/graph_classification_full.py
index f6b32ae..1f4bb4a 100644
--- a/autogl/module/train/graph_classification_full.py
+++ b/autogl/module/train/graph_classification_full.py
@@ -98,7 +98,7 @@ class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
         self.lr = lr if lr is not None else 1e-4
         self.max_epoch = max_epoch if max_epoch is not None else 100
         self.batch_size = batch_size if batch_size is not None else 64
-        self.num_workers = num_workers if num_workers is not None else 4
+        self.num_workers = num_workers if num_workers is not None else 0
         if self.num_workers > 0:
             mp.set_start_method("fork", force=True)
         self.early_stopping_round = (

From 5602bf794e15021b5721b4a8847f7611d069bd63 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Mon, 24 May 2021 21:45:30 +0800
Subject: [PATCH 11/19] adjust for new model

---
 autogl/module/train/link_prediction.py | 8 ++++----
 examples/link_prediction.py            | 9 ++-------
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/autogl/module/train/link_prediction.py b/autogl/module/train/link_prediction.py
index fdc7844..3adcb95 100644
--- a/autogl/module/train/link_prediction.py
+++ b/autogl/module/train/link_prediction.py
@@ -1,13 +1,13 @@
-from . import register_trainer, BaseTrainer, Evaluation, EVALUATE_DICT, EarlyStopping
+from . import register_trainer, BaseTrainer, Evaluation
 import torch
 from torch.optim.lr_scheduler import StepLR
 import torch.nn.functional as F
 from ..model import MODEL_DICT, BaseModel
-from .evaluate import Logloss, Acc, Auc
+from .evaluation import Auc, EVALUATE_DICT
+from .base import EarlyStopping
 from typing import Union
 from copy import deepcopy
 from torch_geometric.utils import negative_sampling
-from torch_geometric.utils import train_test_split_edges
 
 from ...utils import get_logger
 
@@ -73,7 +73,7 @@ class LinkPredictionTrainer(BaseTrainer):
         *args,
         **kwargs
     ):
-        super(LinkPredictionTrainer, self).__init__(model)
+        super().__init__(model, device, init, feval, loss)
 
         self.loss_type = loss
 
diff --git a/examples/link_prediction.py b/examples/link_prediction.py
index 65236a4..a9c9825 100644
--- a/examples/link_prediction.py
+++ b/examples/link_prediction.py
@@ -2,17 +2,12 @@ import os.path as osp
 import sys
 sys.path.insert(0, '../')
 import torch
-from torch_geometric.datasets import Planetoid
-import torch_geometric.transforms as T
-from sklearn.metrics import accuracy_score as acc
-from sklearn.metrics import roc_auc_score
+from autogl.datasets import build_dataset_from_name
 from autogl.module.train import LinkPredictionTrainer
 import numpy as np
 from torch_geometric.utils import train_test_split_edges
 
-dataset = 'Cora'
-path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset)
-dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures())
+dataset = build_dataset_from_name('cora')
 
 print('len', len(dataset))
 print('num_class', dataset.num_classes)

From e2321300dfe3d1793799bf6bcc83c2f1578caba6 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sat, 29 May 2021 14:38:56 +0800
Subject: [PATCH 12/19] update lp solver

---
 autogl/datasets/utils.py                   |   7 +
 autogl/module/model/__init__.py            |   3 +-
 autogl/module/model/gcn.py                 |  30 +-
 autogl/module/model/graph_sage.py          |   2 +-
 autogl/module/model/graphsage.py           |   2 +-
 autogl/module/train/__init__.py            |   2 +
 autogl/module/train/base.py                |  16 +
 autogl/module/train/link_prediction.py     |  71 +-
 autogl/solver/classifier/link_predictor.py | 722 +++++++++++++++++++++
 configs/lp_gcn_benchmark_small.yml         |  61 ++
 examples/link_prediction.py                |   1 +
 examples/link_prediction_solver.py         |  90 +++
 12 files changed, 944 insertions(+), 63 deletions(-)
 create mode 100644 autogl/solver/classifier/link_predictor.py
 create mode 100644 configs/lp_gcn_benchmark_small.yml
 create mode 100644 examples/link_prediction_solver.py

diff --git a/autogl/datasets/utils.py b/autogl/datasets/utils.py
index b0708db..7b5679c 100644
--- a/autogl/datasets/utils.py
+++ b/autogl/datasets/utils.py
@@ -2,9 +2,16 @@ from pdb import set_trace
 import torch
 import numpy as np
 from torch_geometric.data import DataLoader
+from torch_geometric.utils import train_test_split_edges
 from sklearn.model_selection import StratifiedKFold, KFold
 
 
+def split_edges(dataset, train_ratio, val_ratio):
+    datas = [data for data in dataset]
+    for i in range(len(datas)):
+        datas[i] = train_test_split_edges(datas[i], val_ratio, 1 - train_ratio - val_ratio)
+    dataset.data, dataset.slices = dataset.collate(datas)
+
 def get_label_number(dataset):
     r"""Get the number of labels in this dataset as dict."""
     label_num = {}
diff --git a/autogl/module/model/__init__.py b/autogl/module/model/__init__.py
index ef2a92d..42bdbc4 100644
--- a/autogl/module/model/__init__.py
+++ b/autogl/module/model/__init__.py
@@ -1,7 +1,8 @@
 from ._model_registry import MODEL_DICT, ModelUniversalRegistry, register_model
 from .base import BaseModel
 from .topkpool import AutoTopkpool
-from .graph_sage import AutoSAGE
+#from .graph_sage import AutoSAGE
+from .graphsage import AutoSAGE
 from .gcn import AutoGCN
 from .gat import AutoGAT
 from .gin import AutoGIN
diff --git a/autogl/module/model/gcn.py b/autogl/module/model/gcn.py
index 276a9e2..71ce274 100644
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -3,7 +3,7 @@ import torch.nn.functional
 import torch_geometric
 import typing as _typing
 from . import register_model
-from .base import activate_func, ClassificationModel
+from .base import BaseModel, activate_func, ClassificationModel
 from ...utils import get_logger
 
 LOGGER = get_logger("GCNModel")
@@ -94,10 +94,11 @@ class GCN(torch.nn.Module):
 
     def encode(self, data):
         x = data.x
-        for i in range(self.num_layer - 1):
-            x = self.convs[i](x, data.train_pos_edge_index)
-            if i != self.num_layer - 2:
-                x = activate_func(x, self.args["act"])
+        num_layers = len(self.__convolution_layers)
+        for i in range(num_layers - 1):
+            x = self.__convolution_layers[i](x, data.train_pos_edge_index)
+            if i != num_layers - 2:
+                x = activate_func(x, self.__activation_name)
                 # x = F.dropout(x, p=self.args["dropout"], training=self.training)
         return x
 
@@ -112,8 +113,10 @@ class GCN(torch.nn.Module):
 
 
 
+#@register_model("gcn")
+#class AutoGCN(ClassificationModel):
 @register_model("gcn")
-class AutoGCN(ClassificationModel):
+class AutoGCN(BaseModel):
     r"""
     AutoGCN.
     The model used in this automodel is GCN, i.e., the graph convolutional network from the
@@ -152,9 +155,10 @@ class AutoGCN(ClassificationModel):
         init: bool = False,
         **kwargs
     ) -> None:
-        super(AutoGCN, self).__init__(
-            num_features, num_classes, device=device, init=init, **kwargs
-        )
+        super().__init__()
+        self.num_features = num_features
+        self.num_classes = num_classes
+        self.device = device
 
         self.params = {
             "features_num": self.num_features,
@@ -210,11 +214,11 @@ class AutoGCN(ClassificationModel):
         if init is True:
             self.initialize()
 
-    def _initialize(self):
+    def initialize(self):
         self.model = GCN(
             self.num_features,
             self.num_classes,
-            self.hyper_parameter.get("hidden"),
-            self.hyper_parameter.get("dropout"),
-            self.hyper_parameter.get("act"),
+            self.hyperparams.get("hidden"),
+            self.hyperparams.get("dropout"),
+            self.hyperparams.get("act"),
         ).to(self.device)
diff --git a/autogl/module/model/graph_sage.py b/autogl/module/model/graph_sage.py
index 2fe0450..a064585 100644
--- a/autogl/module/model/graph_sage.py
+++ b/autogl/module/model/graph_sage.py
@@ -82,7 +82,7 @@ class GraphSAGE(torch.nn.Module):
             return self.__full_forward(data)
 
 
-@register_model("sage")
+# @register_model("sage")
 class AutoSAGE(BaseModel):
     def __init__(
         self,
diff --git a/autogl/module/model/graphsage.py b/autogl/module/model/graphsage.py
index b9245a6..5b09817 100644
--- a/autogl/module/model/graphsage.py
+++ b/autogl/module/model/graphsage.py
@@ -190,7 +190,7 @@ class GraphSAGE(torch.nn.Module):
         return (prob_adj > 0).nonzero(as_tuple=False).t()
 
 
-# @register_model("sage")
+@register_model("sage")
 class AutoSAGE(BaseModel):
     r"""
     AutoSAGE. The model used in this automodel is GraphSAGE, i.e., the GraphSAGE from the `"Inductive Representation Learning on
diff --git a/autogl/module/train/__init__.py b/autogl/module/train/__init__.py
index 35f52b4..8e70ff2 100644
--- a/autogl/module/train/__init__.py
+++ b/autogl/module/train/__init__.py
@@ -4,6 +4,7 @@ from .base import (
     Evaluation,
     BaseNodeClassificationTrainer,
     BaseGraphClassificationTrainer,
+    BaseLinkPredictionTrainer
 )
 
 def register_trainer(name):
@@ -30,6 +31,7 @@ __all__ = [
     "Evaluation",
     "BaseGraphClassificationTrainer",
     "BaseNodeClassificationTrainer",
+    "BaseLinkPredictionTrainer",
     "GraphClassificationFullTrainer",
     "NodeClassificationFullTrainer",
     "LinkPredictionTrainer",
diff --git a/autogl/module/train/base.py b/autogl/module/train/base.py
index af26fa4..76c6a61 100644
--- a/autogl/module/train/base.py
+++ b/autogl/module/train/base.py
@@ -402,3 +402,19 @@ class BaseGraphClassificationTrainer(_BaseClassificationTrainer):
         super(BaseGraphClassificationTrainer, self).__init__(
             model, num_features, num_classes, device, init, feval, loss
         )
+
+class BaseLinkPredictionTrainer(_BaseClassificationTrainer):
+    def __init__(
+        self,
+        model: _typing.Union[BaseModel, str],
+        num_features: int,
+        device: _typing.Union[torch.device, str, None] = None,
+        init: bool = True,
+        feval: _typing.Union[
+            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = (Acc,),
+        loss: str = "nll_loss",
+    ):
+        super(BaseLinkPredictionTrainer, self).__init__(
+            model, num_features, 2, device, init, feval, loss
+        )
\ No newline at end of file
diff --git a/autogl/module/train/link_prediction.py b/autogl/module/train/link_prediction.py
index 3adcb95..5c17047 100644
--- a/autogl/module/train/link_prediction.py
+++ b/autogl/module/train/link_prediction.py
@@ -1,10 +1,10 @@
-from . import register_trainer, BaseTrainer, Evaluation
+from . import register_trainer, Evaluation
 import torch
 from torch.optim.lr_scheduler import StepLR
 import torch.nn.functional as F
 from ..model import MODEL_DICT, BaseModel
 from .evaluation import Auc, EVALUATE_DICT
-from .base import EarlyStopping
+from .base import EarlyStopping, BaseLinkPredictionTrainer
 from typing import Union
 from copy import deepcopy
 from torch_geometric.utils import negative_sampling
@@ -23,8 +23,8 @@ def get_feval(feval):
     raise ValueError("feval argument of type", type(feval), "is not supported!")
 
 
-@register_trainer("LinkPrediction")
-class LinkPredictionTrainer(BaseTrainer):
+@register_trainer("LinkPredictionFull")
+class LinkPredictionTrainer(BaseLinkPredictionTrainer):
     """
     The link prediction trainer.
 
@@ -58,34 +58,21 @@ class LinkPredictionTrainer(BaseTrainer):
 
     def __init__(
         self,
-        model: Union[BaseModel, str],
-        num_features,
-        num_classes,
+        model: Union[BaseModel, str] = None,
+        num_features=None,
         optimizer=None,
-        lr=None,
-        max_epoch=None,
-        early_stopping_round=None,
+        lr=1e-4,
+        max_epoch=100,
+        early_stopping_round=101,
         weight_decay=1e-4,
-        device=None,
+        device='auto',
         init=True,
         feval=[Auc],
         loss="binary_cross_entropy_with_logits",
         *args,
         **kwargs
     ):
-        super().__init__(model, device, init, feval, loss)
-
-        self.loss_type = loss
-
-        if device is None:
-            device = "cpu"
-
-        # init model
-        if isinstance(model, str):
-            assert model in MODEL_DICT, "Cannot parse model name " + model
-            self.model = MODEL_DICT[model](num_features, num_classes, device, init=init)
-        elif isinstance(model, BaseModel):
-            self.model = model
+        super().__init__(model, num_features, device, init, feval, loss)
 
         if type(optimizer) == str and optimizer.lower() == "adam":
             self.optimizer = torch.optim.Adam
@@ -94,19 +81,12 @@ class LinkPredictionTrainer(BaseTrainer):
         else:
             self.optimizer = torch.optim.Adam
 
-        self.num_features = num_features
-        self.num_classes = num_classes
-        self.lr = lr if lr is not None else 1e-4
-        self.max_epoch = max_epoch if max_epoch is not None else 100
-        self.early_stopping_round = (
-            early_stopping_round if early_stopping_round is not None else 100
-        )
+        self.lr = lr
+        self.max_epoch = max_epoch
+        self.early_stopping_round = early_stopping_round
         self.device = device
         self.args = args
         self.kwargs = kwargs
-
-        self.feval = get_feval(feval)
-
         self.weight_decay = weight_decay
 
         self.early_stopping = EarlyStopping(
@@ -118,8 +98,6 @@ class LinkPredictionTrainer(BaseTrainer):
         self.valid_score = None
 
         self.initialized = False
-        self.num_features = num_features
-        self.num_classes = num_classes
         self.device = device
 
         self.space = [
@@ -152,7 +130,7 @@ class LinkPredictionTrainer(BaseTrainer):
                 "scalingType": "LOG",
             },
         ]
-        self.space += self.model.space
+
         LinkPredictionTrainer.space = self.space
 
         self.hyperparams = {
@@ -161,7 +139,6 @@ class LinkPredictionTrainer(BaseTrainer):
             "lr": self.lr,
             "weight_decay": self.weight_decay,
         }
-        self.hyperparams = {**self.hyperparams, **self.model.get_hyper_parameter()}
 
         if init is True:
             self.initialize()
@@ -171,6 +148,8 @@ class LinkPredictionTrainer(BaseTrainer):
         if self.initialized is True:
             return
         self.initialized = True
+        self.model.set_num_classes(self.num_classes)
+        self.model.set_num_features(self.num_features)
         self.model.initialize()
 
     def get_model(self):
@@ -203,7 +182,7 @@ class LinkPredictionTrainer(BaseTrainer):
         data = data.to(self.device)
         # mask = data.train_mask if train_mask is None else train_mask
         optimizer = self.optimizer(
-            self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
+            self.model.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
         )
         scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
         for epoch in range(1, self.max_epoch):
@@ -219,10 +198,10 @@ class LinkPredictionTrainer(BaseTrainer):
             link_logits = self.model.model.decode(z, data.train_pos_edge_index, neg_edge_index)
             link_labels = self.get_link_labels(data.train_pos_edge_index, neg_edge_index)
             # loss = F.binary_cross_entropy_with_logits(link_logits, link_labels)
-            if hasattr(F, self.loss_type):
-                loss = getattr(F, self.loss_type)(link_logits, link_labels)
+            if hasattr(F, self.loss):
+                loss = getattr(F, self.loss)(link_logits, link_labels)
             else:
-                raise TypeError("PyTorch does not support loss type {}".format(self.loss_type))
+                raise TypeError("PyTorch does not support loss type {}".format(self.loss))
 
             loss.backward()
             optimizer.step()
@@ -440,10 +419,7 @@ class LinkPredictionTrainer(BaseTrainer):
 
         res = []
         for f in feval:
-            try:
-                res.append(f.evaluate(link_probs, link_labels))
-            except:
-                res.append(f.evaluate(link_probs.cpu().numpy(), link_labels.cpu().numpy()))
+            res.append(f.evaluate(link_probs.cpu().numpy(), link_labels.cpu().numpy()))
         if return_signle:
             return res[0]
         return res
@@ -480,6 +456,8 @@ class LinkPredictionTrainer(BaseTrainer):
             hp = origin_hp
         if model is None:
             model = self.model
+        model.set_num_classes(self.num_classes)
+        model.set_num_features(self.num_features)
         model = model.from_hyper_parameter(
             dict(
                 [
@@ -493,7 +471,6 @@ class LinkPredictionTrainer(BaseTrainer):
         ret = self.__class__(
             model=model,
             num_features=self.num_features,
-            num_classes=self.num_classes,
             optimizer=self.optimizer,
             lr=hp["lr"],
             max_epoch=hp["max_epoch"],
diff --git a/autogl/solver/classifier/link_predictor.py b/autogl/solver/classifier/link_predictor.py
new file mode 100644
index 0000000..d2ef6b4
--- /dev/null
+++ b/autogl/solver/classifier/link_predictor.py
@@ -0,0 +1,722 @@
+"""
+Auto Classfier for Node Classification
+"""
+import time
+import json
+
+from copy import deepcopy
+
+import torch
+import numpy as np
+import yaml
+
+from .base import BaseClassifier
+from ..base import _parse_hp_space, _initialize_single_model
+from ...module.feature import FEATURE_DICT
+from ...module.model import MODEL_DICT, BaseModel
+from ...module.train import TRAINER_DICT, BaseLinkPredictionTrainer
+from ...module.train import get_feval
+from ..utils import Leaderboard, set_seed
+from ...datasets import utils
+from ...utils import get_logger
+
+LOGGER = get_logger("LinkPredictor")
+
+
+class AutoLinkPredictor(BaseClassifier):
+    """
+    Auto Link Predictor.
+
+    Used to automatically solve the link prediction problems.
+
+    Parameters
+    ----------
+    feature_module: autogl.module.feature.BaseFeatureEngineer or str or None
+        The (name of) auto feature engineer used to process the given dataset. Default ``deepgl``.
+        Disable feature engineer by setting it to ``None``.
+
+    graph_models: list of autogl.module.model.BaseModel or list of str
+        The (name of) models to be optimized as backbone. Default ``['gat', 'gcn']``.
+
+    hpo_module: autogl.module.hpo.BaseHPOptimizer or str or None
+        The (name of) hpo module used to search for best hyper parameters. Default ``anneal``.
+        Disable hpo by setting it to ``None``.
+
+    ensemble_module: autogl.module.ensemble.BaseEnsembler or str or None
+        The (name of) ensemble module used to ensemble the multi-models found. Default ``voting``.
+        Disable ensemble by setting it to ``None``.
+
+    max_evals: int (Optional)
+        If given, will set the number eval times the hpo module will use.
+        Only be effective when hpo_module is ``str``. Default ``None``.
+
+    trainer_hp_space: list of dict (Optional)
+        trainer hp space or list of trainer hp spaces configuration.
+        If a single trainer hp is given, will specify the hp space of trainer for every model.
+        If a list of trainer hp is given, will specify every model with corrsponding
+        trainer hp space.
+        Default ``None``.
+
+    model_hp_spaces: list of list of dict (Optional)
+        model hp space configuration.
+        If given, will specify every hp space of every passed model. Default ``None``.
+
+    size: int (Optional)
+        The max models ensemble module will use. Default ``None``.
+
+    device: torch.device or str
+        The device where model will be running on. If set to ``auto``, will use gpu when available.
+        You can also specify the device by directly giving ``gpu`` or ``cuda:0``, etc.
+        Default ``auto``.
+    """
+
+    def __init__(
+        self,
+        feature_module=None,
+        graph_models=("gat", "gcn"),
+        hpo_module="anneal",
+        ensemble_module="voting",
+        max_evals=50,
+        default_trainer=None,
+        trainer_hp_space=None,
+        model_hp_spaces=None,
+        size=4,
+        device="auto",
+    ):
+
+        super().__init__(
+            feature_module=feature_module,
+            graph_models=graph_models,
+            hpo_module=hpo_module,
+            ensemble_module=ensemble_module,
+            max_evals=max_evals,
+            default_trainer=default_trainer or "LinkPredictionFull",
+            trainer_hp_space=trainer_hp_space,
+            model_hp_spaces=model_hp_spaces,
+            size=size,
+            device=device,
+        )
+
+        # data to be kept when fit
+        self.dataset = None
+
+    def _init_graph_module(
+        self, graph_models, num_features, feval, device, loss
+    ) -> "AutoLinkPredictor":
+        # load graph network module
+        self.graph_model_list = []
+        if isinstance(graph_models, list):
+            for model in graph_models:
+                if isinstance(model, str):
+                    if model in MODEL_DICT:
+                        self.graph_model_list.append(
+                            MODEL_DICT[model](
+                                num_classes=1,
+                                num_features=num_features,
+                                device=device,
+                                init=False,
+                            )
+                        )
+                    else:
+                        raise KeyError("cannot find model %s" % (model))
+                elif isinstance(model, type) and issubclass(model, BaseModel):
+                    self.graph_model_list.append(
+                        model(
+                            num_classes=1,
+                            num_features=num_features,
+                            device=device,
+                            init=False,
+                        )
+                    )
+                elif isinstance(model, BaseModel):
+                    # setup the hp of num_classes and num_features
+                    model.set_num_classes(1)
+                    model.set_num_features(num_features)
+                    self.graph_model_list.append(model.to(device))
+                elif isinstance(model, BaseLinkPredictionTrainer):
+                    # receive a trainer list, put trainer to list
+                    assert (
+                        model.get_model() is not None
+                    ), "Passed trainer should contain a model"
+                    model.model.set_num_classes(1)
+                    model.model.set_num_features(num_features)
+                    model.update_parameters(
+                        num_classes=1,
+                        num_features=num_features,
+                        loss=loss,
+                        feval=feval,
+                        device=device,
+                    )
+                    self.graph_model_list.append(model)
+                else:
+                    raise KeyError("cannot find graph network %s." % (model))
+        else:
+            raise ValueError(
+                "need graph network to be (list of) str or a BaseModel class/instance, get",
+                graph_models,
+                "instead.",
+            )
+
+        # wrap all model_cls with specified trainer
+        for i, model in enumerate(self.graph_model_list):
+            # set model hp space
+            if self._model_hp_spaces is not None:
+                if self._model_hp_spaces[i] is not None:
+                    if isinstance(model, BaseLinkPredictionTrainer):
+                        model.model.hyper_parameter_space = self._model_hp_spaces[i]
+                    else:
+                        model.hyper_parameter_space = self._model_hp_spaces[i]
+            # initialize trainer if needed
+            if isinstance(model, BaseModel):
+                name = (
+                    self._default_trainer
+                    if isinstance(self._default_trainer, str)
+                    else self._default_trainer[i]
+                )
+                model = TRAINER_DICT[name](
+                    model=model,
+                    num_features=num_features,
+                    loss=loss,
+                    feval=feval,
+                    device=device,
+                    init=False,
+                )
+            # set trainer hp space
+            if self._trainer_hp_space is not None:
+                if isinstance(self._trainer_hp_space[0], list):
+                    current_hp_for_trainer = self._trainer_hp_space[i]
+                else:
+                    current_hp_for_trainer = self._trainer_hp_space
+                model.hyper_parameter_space = current_hp_for_trainer
+            self.graph_model_list[i] = model
+
+        return self
+
+    # pylint: disable=arguments-differ
+    def fit(
+        self,
+        dataset,
+        time_limit=-1,
+        inplace=False,
+        train_split=None,
+        val_split=None,
+        evaluation_method="infer",
+        seed=None,
+    ) -> "AutoLinkPredictor":
+        """
+        Fit current solver on given dataset.
+
+        Parameters
+        ----------
+        dataset: torch_geometric.data.dataset.Dataset
+            The dataset needed to fit on. This dataset must have only one graph.
+
+        time_limit: int
+            The time limit of the whole fit process (in seconds). If set below 0,
+            will ignore time limit. Default ``-1``.
+
+        inplace: bool
+            Whether we process the given dataset in inplace manner. Default ``False``.
+            Set it to True if you want to save memory by modifying the given dataset directly.
+
+        train_split: float or int (Optional)
+            The train ratio (in ``float``) or number (in ``int``) of dataset. If you want to
+            use default train/val/test split in dataset, please set this to ``None``.
+            Default ``None``.
+
+        val_split: float or int (Optional)
+            The validation ratio (in ``float``) or number (in ``int``) of dataset. If you want
+            to use default train/val/test split in dataset, please set this to ``None``.
+            Default ``None``.
+
+        evaluation_method: (list of) str or autogl.module.train.evaluation
+            A (list of) evaluation method for current solver. If ``infer``, will automatically
+            determine. Default ``infer``.
+
+        seed: int (Optional)
+            The random seed. If set to ``None``, will run everything at random.
+            Default ``None``.
+
+        Returns
+        -------
+        self: autogl.solver.AutoNodeClassifier
+            A reference of current solver.
+        """
+        set_seed(seed)
+
+        if time_limit < 0:
+            time_limit = 3600 * 24
+        time_begin = time.time()
+
+        # initialize leaderboard
+        if evaluation_method == "infer":
+            if hasattr(dataset, "metric"):
+                evaluation_method = [dataset.metric]
+            else:
+                num_of_label = dataset.num_classes
+                if num_of_label == 2:
+                    evaluation_method = ["auc"]
+                else:
+                    evaluation_method = ["acc"]
+        assert isinstance(evaluation_method, list)
+        evaluator_list = get_feval(evaluation_method)
+
+        self.leaderboard = Leaderboard(
+            [e.get_eval_name() for e in evaluator_list],
+            {e.get_eval_name(): e.is_higher_better() for e in evaluator_list},
+        )
+
+        # set up the dataset
+        if train_split is not None and val_split is not None:
+            utils.split_edges(dataset, train_split, val_split)
+        else:
+            assert all([hasattr(dataset.data, f'{name}') for name in [
+                'train_pos_edge_index', 
+                'train_neg_adj_mask', 
+                'val_pos_edge_index',
+                'val_neg_edge_index', 
+                'test_pos_edge_index', 
+                'test_neg_edge_index'
+            ]]), (
+                "The dataset has no default train/val split! Please manually pass "
+                "train and val ratio."
+            )
+            LOGGER.info("Use the default train/val/test ratio in given dataset")
+
+        # feature engineering
+        if self.feature_module is not None:
+            dataset = self.feature_module.fit_transform(dataset, inplace=inplace)
+
+        self.dataset = dataset
+        assert self.dataset[0].x is not None, (
+            "Does not support fit on non node-feature dataset!"
+            " Please add node features to dataset or specify feature engineers that generate"
+            " node features."
+        )
+
+        # initialize graph networks
+        self._init_graph_module(
+            self.gml,
+            num_features=self.dataset[0].x.shape[1],
+            feval=evaluator_list,
+            device=self.runtime_device,
+            loss="binary_cross_entropy_with_logits" if not hasattr(dataset, "loss") else dataset.loss,
+        )
+
+        # train the models and tune hpo
+        result_valid = []
+        names = []
+        for idx, model in enumerate(self.graph_model_list):
+            time_for_each_model = (time_limit - time.time() + time_begin) / (
+                len(self.graph_model_list) - idx
+            )
+            if self.hpo_module is None:
+                model.initialize()
+                model.train(self.dataset, True)
+                optimized = model
+            else:
+                optimized, _ = self.hpo_module.optimize(
+                    trainer=model, dataset=self.dataset, time_limit=time_for_each_model
+                )
+            # to save memory, all the trainer derived will be mapped to cpu
+            optimized.to(torch.device("cpu"))
+            name = optimized.get_name_with_hp() + "_idx%d" % (idx)
+            names.append(name)
+            performance_on_valid, _ = optimized.get_valid_score(return_major=False)
+            result_valid.append(optimized.get_valid_predict_proba().cpu().numpy())
+            self.leaderboard.insert_model_performance(
+                name,
+                dict(
+                    zip(
+                        [e.get_eval_name() for e in evaluator_list],
+                        performance_on_valid,
+                    )
+                ),
+            )
+            self.trained_models[name] = optimized
+
+        # fit the ensemble model
+        if self.ensemble_module is not None:
+            pos_edge_index, neg_edge_index = self.dataset[0].val_pos_edge_index, self.dataset[0].val_neg_edge_index
+            E = pos_edge_index.size(1) + neg_edge_index.size(1)
+            link_labels = torch.zeros(E, dtype=torch.float)
+            link_labels[:pos_edge_index.size(1)] = 1.
+
+            performance = self.ensemble_module.fit(
+                result_valid,
+                link_labels.detach().cpu().numpy(),
+                names,
+                evaluator_list,
+                n_classes=dataset.num_classes,
+            )
+            self.leaderboard.insert_model_performance(
+                "ensemble",
+                dict(zip([e.get_eval_name() for e in evaluator_list], performance)),
+            )
+
+        return self
+
+    def fit_predict(
+        self,
+        dataset,
+        time_limit=-1,
+        inplace=False,
+        train_split=None,
+        val_split=None,
+        evaluation_method="infer",
+        use_ensemble=True,
+        use_best=True,
+        name=None,
+    ) -> np.ndarray:
+        """
+        Fit current solver on given dataset and return the predicted value.
+
+        Parameters
+        ----------
+        dataset: torch_geometric.data.dataset.Dataset
+            The dataset needed to fit on. This dataset must have only one graph.
+
+        time_limit: int
+            The time limit of the whole fit process (in seconds).
+            If set below 0, will ignore time limit. Default ``-1``.
+
+        inplace: bool
+            Whether we process the given dataset in inplace manner. Default ``False``.
+            Set it to True if you want to save memory by modifying the given dataset directly.
+
+        train_split: float or int (Optional)
+            The train ratio (in ``float``) or number (in ``int``) of dataset. If you want to
+            use default train/val/test split in dataset, please set this to ``None``.
+            Default ``None``.
+
+        val_split: float or int (Optional)
+            The validation ratio (in ``float``) or number (in ``int``) of dataset. If you want
+            to use default train/val/test split in dataset, please set this to ``None``.
+            Default ``None``.
+
+        balanced: bool
+            Wether to create the train/valid/test split in a balanced way.
+            If set to ``True``, the train/valid will have the same number of different classes.
+            Default ``False``.
+
+        evaluation_method: (list of) str or autogl.module.train.evaluation
+            A (list of) evaluation method for current solver. If ``infer``, will automatically
+            determine. Default ``infer``.
+
+        use_ensemble: bool
+            Whether to use ensemble to do the predict. Default ``True``.
+
+        use_best: bool
+            Whether to use the best single model to do the predict. Will only be effective when
+            ``use_ensemble`` is ``False``.
+            Default ``True``.
+
+        name: str or None
+            The name of model used to predict. Will only be effective when ``use_ensemble`` and
+            ``use_best`` both are ``False``.
+            Default ``None``.
+
+        Returns
+        -------
+        result: np.ndarray
+            An array of shape ``(N,)``, where ``N`` is the number of test nodes. The prediction
+            on given dataset.
+        """
+        self.fit(
+            dataset=dataset,
+            time_limit=time_limit,
+            inplace=inplace,
+            train_split=train_split,
+            val_split=val_split,
+            evaluation_method=evaluation_method,
+        )
+        return self.predict(
+            dataset=dataset,
+            inplaced=inplace,
+            inplace=inplace,
+            use_ensemble=use_ensemble,
+            use_best=use_best,
+            name=name,
+        )
+
+    def predict_proba(
+        self,
+        dataset=None,
+        inplaced=False,
+        inplace=False,
+        use_ensemble=True,
+        use_best=True,
+        name=None,
+        mask="test",
+    ) -> np.ndarray:
+        """
+        Predict the node probability.
+
+        Parameters
+        ----------
+        dataset: torch_geometric.data.dataset.Dataset or None
+            The dataset needed to predict. If ``None``, will use the processed dataset passed
+            to ``fit()`` instead. Default ``None``.
+
+        inplaced: bool
+            Whether the given dataset is processed. Only be effective when ``dataset``
+            is not ``None``. If you pass the dataset to ``fit()`` with ``inplace=True``, and
+            you pass the dataset again to this method, you should set this argument to ``True``.
+            Otherwise ``False``. Default ``False``.
+
+        inplace: bool
+            Whether we process the given dataset in inplace manner. Default ``False``. Set it to
+            True if you want to save memory by modifying the given dataset directly.
+
+        use_ensemble: bool
+            Whether to use ensemble to do the predict. Default ``True``.
+
+        use_best: bool
+            Whether to use the best single model to do the predict. Will only be effective when
+            ``use_ensemble`` is ``False``. Default ``True``.
+
+        name: str or None
+            The name of model used to predict. Will only be effective when ``use_ensemble`` and
+            ``use_best`` both are ``False``. Default ``None``.
+
+        mask: str
+            The data split to give prediction on. Default ``test``.
+
+        Returns
+        -------
+        result: np.ndarray
+            An array of shape ``(N,C,)``, where ``N`` is the number of test nodes and ``C`` is
+            the number of classes. The prediction on given dataset.
+        """
+        if dataset is None:
+            dataset = self.dataset
+            assert dataset is not None, (
+                "Please execute fit() first before" " predicting on remembered dataset"
+            )
+        elif not inplaced and self.feature_module is not None:
+            dataset = self.feature_module.transform(dataset, inplace=inplace)
+
+        if use_ensemble:
+            LOGGER.info("Ensemble argument on, will try using ensemble model.")
+
+        if not use_ensemble and use_best:
+            LOGGER.info(
+                "Ensemble argument off and best argument on, will try using best model."
+            )
+
+        if (use_ensemble and self.ensemble_module is not None) or (
+            not use_best and name == "ensemble"
+        ):
+            # we need to get all the prediction of every model trained
+            predict_result = []
+            names = []
+            for model_name in self.trained_models:
+                predict_result.append(
+                    self._predict_proba_by_name(dataset, model_name, mask)
+                )
+                names.append(model_name)
+            return self.ensemble_module.ensemble(predict_result, names)
+
+        if use_ensemble and self.ensemble_module is None:
+            LOGGER.warning(
+                "Cannot use ensemble because no ensebmle module is given. "
+                "Will use best model instead."
+            )
+
+        if use_best or (use_ensemble and self.ensemble_module is None):
+            # just return the best model we have found
+            name = self.leaderboard.get_best_model()
+            return self._predict_proba_by_name(dataset, name, mask)
+
+        if name is not None:
+            # return model performance by name
+            return self._predict_proba_by_name(dataset, name, mask)
+
+        LOGGER.error(
+            "No model name is given while ensemble and best arguments are off."
+        )
+        raise ValueError(
+            "You need to specify a model name if you do not want use ensemble and best model."
+        )
+
+    def _predict_proba_by_name(self, dataset, name, mask="test"):
+        self.trained_models[name].to(self.runtime_device)
+        predicted = (
+            self.trained_models[name].predict_proba(dataset, mask=mask).cpu().numpy()
+        )
+        self.trained_models[name].to(torch.device("cpu"))
+        return predicted
+
+    def predict(
+        self,
+        dataset=None,
+        inplaced=False,
+        inplace=False,
+        use_ensemble=True,
+        use_best=True,
+        name=None,
+        mask="test",
+    ) -> np.ndarray:
+        """
+        Predict the node class number.
+
+        Parameters
+        ----------
+        dataset: torch_geometric.data.dataset.Dataset or None
+            The dataset needed to predict. If ``None``, will use the processed dataset passed
+            to ``fit()`` instead. Default ``None``.
+
+        inplaced: bool
+            Whether the given dataset is processed. Only be effective when ``dataset``
+            is not ``None``. If you pass the dataset to ``fit()`` with ``inplace=True``,
+            and you pass the dataset again to this method, you should set this argument
+            to ``True``. Otherwise ``False``. Default ``False``.
+
+        inplace: bool
+            Whether we process the given dataset in inplace manner. Default ``False``.
+            Set it to True if you want to save memory by modifying the given dataset directly.
+
+        use_ensemble: bool
+            Whether to use ensemble to do the predict. Default ``True``.
+
+        use_best: bool
+            Whether to use the best single model to do the predict. Will only be effective
+            when ``use_ensemble`` is ``False``. Default ``True``.
+
+        name: str or None
+            The name of model used to predict. Will only be effective when ``use_ensemble``
+            and ``use_best`` both are ``False``. Default ``None``.
+
+        mask: str
+            The data split to give prediction on. Default ``test``.
+
+        Returns
+        -------
+        result: np.ndarray
+            An array of shape ``(N,)``, where ``N`` is the number of test nodes.
+            The prediction on given dataset.
+        """
+        proba = self.predict_proba(
+            dataset, inplaced, inplace, use_ensemble, use_best, name, mask
+        )
+        return np.argmax(proba, axis=1)
+
+    @classmethod
+    def from_config(cls, path_or_dict, filetype="auto") -> "AutoLinkPredictor":
+        """
+        Load solver from config file.
+
+        You can use this function to directly load a solver from predefined config dict
+        or config file path. Currently, only support file type of ``json`` or ``yaml``,
+        if you pass a path.
+
+        Parameters
+        ----------
+        path_or_dict: str or dict
+            The path to the config file or the config dictionary object
+
+        filetype: str
+            The filetype the given file if the path is specified. Currently only support
+            ``json`` or ``yaml``. You can set to ``auto`` to automatically detect the file
+            type (from file name). Default ``auto``.
+
+        Returns
+        -------
+        solver: autogl.solver.AutoGraphClassifier
+            The solver that is created from given file or dictionary.
+        """
+        assert filetype in ["auto", "yaml", "json"], (
+            "currently only support yaml file or json file type, but get type "
+            + filetype
+        )
+        if isinstance(path_or_dict, str):
+            if filetype == "auto":
+                if path_or_dict.endswith(".yaml") or path_or_dict.endswith(".yml"):
+                    filetype = "yaml"
+                elif path_or_dict.endswith(".json"):
+                    filetype = "json"
+                else:
+                    LOGGER.error(
+                        "cannot parse the type of the given file name, "
+                        "please manually set the file type"
+                    )
+                    raise ValueError(
+                        "cannot parse the type of the given file name, "
+                        "please manually set the file type"
+                    )
+            if filetype == "yaml":
+                path_or_dict = yaml.load(
+                    open(path_or_dict, "r").read(), Loader=yaml.FullLoader
+                )
+            else:
+                path_or_dict = json.load(open(path_or_dict, "r"))
+
+        path_or_dict = deepcopy(path_or_dict)
+        solver = cls(None, [], None, None)
+        fe_list = path_or_dict.pop("feature", None)
+        if fe_list is not None:
+            fe_list_ele = []
+            for feature_engineer in fe_list:
+                name = feature_engineer.pop("name")
+                if name is not None:
+                    fe_list_ele.append(FEATURE_DICT[name](**feature_engineer))
+            if fe_list_ele != []:
+                solver.set_feature_module(fe_list_ele)
+
+        models = path_or_dict.pop("models", [{"name": "gcn"}, {"name": "gat"}])
+        model_hp_space = [
+            _parse_hp_space(model.pop("hp_space", None)) for model in models
+        ]
+        model_list = [
+            _initialize_single_model(model.pop("name"), model) for model in models
+        ]
+
+        trainer = path_or_dict.pop("trainer", None)
+        default_trainer = "LinkPredictionFull"
+        trainer_space = None
+        if isinstance(trainer, dict):
+            # global default
+            default_trainer = trainer.pop("name", "LinkPredictionFull")
+            trainer_space = _parse_hp_space(trainer.pop("hp_space", None))
+            default_kwargs = {"num_features": None}
+            default_kwargs.update(trainer)
+            default_kwargs["init"] = False
+            for i in range(len(model_list)):
+                model = model_list[i]
+                trainer_wrap = TRAINER_DICT[default_trainer](
+                    model=model, **default_kwargs
+                )
+                model_list[i] = trainer_wrap
+        elif isinstance(trainer, list):
+            # sequential trainer definition
+            assert len(trainer) == len(
+                model_list
+            ), "The number of trainer and model does not match"
+            trainer_space = []
+            for i in range(len(model_list)):
+                train, model = trainer[i], model_list[i]
+                default_trainer = train.pop("name", "LinkPredictionFull")
+                trainer_space.append(_parse_hp_space(train.pop("hp_space", None)))
+                default_kwargs = {"num_features": None}
+                default_kwargs.update(train)
+                default_kwargs["init"] = False
+                trainer_wrap = TRAINER_DICT[default_trainer](
+                    model=model, **default_kwargs
+                )
+                model_list[i] = trainer_wrap
+
+        solver.set_graph_models(
+            model_list, default_trainer, trainer_space, model_hp_space
+        )
+
+        hpo_dict = path_or_dict.pop("hpo", {"name": "anneal"})
+        if hpo_dict is not None:
+            name = hpo_dict.pop("name")
+            solver.set_hpo_module(name, **hpo_dict)
+
+        ensemble_dict = path_or_dict.pop("ensemble", {"name": "voting"})
+        if ensemble_dict is not None:
+            name = ensemble_dict.pop("name")
+            solver.set_ensemble_module(name, **ensemble_dict)
+
+        return solver
diff --git a/configs/lp_gcn_benchmark_small.yml b/configs/lp_gcn_benchmark_small.yml
new file mode 100644
index 0000000..8fb9d8a
--- /dev/null
+++ b/configs/lp_gcn_benchmark_small.yml
@@ -0,0 +1,61 @@
+ensemble:
+  name: null
+feature:
+- name: PYGNormalizeFeatures
+hpo:
+  max_evals: 10
+  name: random
+models:
+- hp_space:
+  - feasiblePoints: 2,3
+    parameterName: num_layers
+    type: DISCRETE
+  - cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 2
+    maxValue:
+    - 256
+    - 256
+    minValue:
+    - 64
+    - 64
+    numericalType: INTEGER
+    parameterName: hidden
+    scalingType: LOG
+    type: NUMERICAL_LIST
+  - maxValue: 0.2
+    minValue: 0.0
+    parameterName: dropout
+    scalingType: LINEAR
+    type: DOUBLE
+  - feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    parameterName: act
+    type: CATEGORICAL
+  name: gcn
+trainer:
+  hp_space:
+  - maxValue: 150
+    minValue: 50
+    parameterName: max_epoch
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 40
+    minValue: 25
+    parameterName: early_stopping_round
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 0.05
+    minValue: 0.005
+    parameterName: lr
+    scalingType: LOG
+    type: DOUBLE
+  - maxValue: 1.0E-7
+    minValue: 1.0E-10
+    parameterName: weight_decay
+    scalingType: LOG
+    type: DOUBLE
diff --git a/examples/link_prediction.py b/examples/link_prediction.py
index a9c9825..331ec31 100644
--- a/examples/link_prediction.py
+++ b/examples/link_prediction.py
@@ -6,6 +6,7 @@ from autogl.datasets import build_dataset_from_name
 from autogl.module.train import LinkPredictionTrainer
 import numpy as np
 from torch_geometric.utils import train_test_split_edges
+from sklearn.metrics import roc_auc_score
 
 dataset = build_dataset_from_name('cora')
 
diff --git a/examples/link_prediction_solver.py b/examples/link_prediction_solver.py
new file mode 100644
index 0000000..a2051d7
--- /dev/null
+++ b/examples/link_prediction_solver.py
@@ -0,0 +1,90 @@
+import sys
+
+sys.path.append("../")
+from autogl.datasets import build_dataset_from_name
+from autogl.solver.classifier.link_predictor import AutoLinkPredictor
+from autogl.module.train.evaluation import Auc
+import yaml
+import random
+import torch
+import numpy as np
+
+if __name__ == "__main__":
+
+    from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
+
+    parser = ArgumentParser(
+        "auto link prediction", formatter_class=ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "--dataset",
+        default="cora",
+        type=str,
+        help="dataset to use",
+        choices=[
+            "cora",
+            "pubmed",
+            "citeseer",
+            "coauthor_cs",
+            "coauthor_physics",
+            "amazon_computers",
+            "amazon_photo",
+        ],
+    )
+    parser.add_argument(
+        "--configs",
+        type=str,
+        default="../configs/lp_gcn_benchmark_small.yml",
+        help="config to use",
+    )
+    # following arguments will override parameters in the config file
+    parser.add_argument("--hpo", type=str, default="tpe", help="hpo methods")
+    parser.add_argument(
+        "--max_eval", type=int, default=50, help="max hpo evaluation times"
+    )
+    parser.add_argument("--seed", type=int, default=0, help="random seed")
+    parser.add_argument("--device", default=0, type=int, help="GPU device")
+
+    args = parser.parse_args()
+    if torch.cuda.is_available():
+        torch.cuda.set_device(args.device)
+    seed = args.seed
+    # set random seed
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+
+    dataset = build_dataset_from_name(args.dataset)
+
+    configs = yaml.load(open(args.configs, "r").read(), Loader=yaml.FullLoader)
+    configs["hpo"]["name"] = args.hpo
+    configs["hpo"]["max_evals"] = args.max_eval
+    autoClassifier = AutoLinkPredictor.from_config(configs)
+
+    # train
+    autoClassifier.fit(
+        dataset,
+        time_limit=3600,
+        evaluation_method=[Auc],
+        seed=seed,
+        train_split=0.85,
+        val_split=0.05,
+    )
+    autoClassifier.get_leaderboard().show()
+
+    # test
+    predict_result = autoClassifier.predict_proba()
+
+    pos_edge_index, neg_edge_index = dataset[0].test_pos_edge_index, dataset[0].test_neg_edge_index
+    E = pos_edge_index.size(1) + neg_edge_index.size(1)
+    link_labels = torch.zeros(E)
+    link_labels[:pos_edge_index.size(1)] = 1.
+
+    print(
+        "test auc: %.4f"
+        % (Auc.evaluate(predict_result, link_labels.detach().cpu().numpy()))
+    )

From 5c88a3e57f91b90374317581e1f1a0409fae69a0 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sat, 29 May 2021 15:17:15 +0800
Subject: [PATCH 13/19] add comparison code

---
 benchmark/lp_pyg.py       | 126 ++++++++++++++++++++++++++++++++++++++
 benchmark/lp_reproduce.py |  82 +++++++++++++++++++++++++
 2 files changed, 208 insertions(+)
 create mode 100644 benchmark/lp_pyg.py
 create mode 100644 benchmark/lp_reproduce.py

diff --git a/benchmark/lp_pyg.py b/benchmark/lp_pyg.py
new file mode 100644
index 0000000..32d8db4
--- /dev/null
+++ b/benchmark/lp_pyg.py
@@ -0,0 +1,126 @@
+import sys
+sys.path.append('../')
+import os.path as osp
+
+import torch
+import torch.nn.functional as F
+from sklearn.metrics import roc_auc_score
+
+from torch_geometric.utils import negative_sampling
+from torch_geometric.nn import GCNConv, GATConv, SAGEConv
+from torch_geometric.utils import train_test_split_edges
+
+import argparse
+import pickle
+import numpy as np
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--model', choices=['gcn', 'sage', 'gat'], type=str, default='gcn', help='model to train')
+parser.add_argument('--dataset', choices=['cora', 'citseer', 'pubmed'], type=str, default='cora', help='dataset to evaluate')
+parser.add_argument('--times', type=int, default=10, help='time to rerun')
+
+args = parser.parse_args()
+
+DIM = 64
+dataset = pickle.load(open(f'/DATA/DATANAS1/guancy/github/AutoGL/env/cache/{args.dataset}-edge.data', 'rb'))
+data = dataset[0]
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+data = data.to(device)
+
+def _decode(z, pos_edge_index, neg_edge_index):
+    edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
+    return (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
+
+class GNN(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def encode(self, x, edge_index):
+        return self.conv2(self.conv1(x, edge_index).relu(), edge_index)
+
+class GCN(GNN):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.conv1 = GCNConv(in_channels, 128)
+        self.conv2 = GCNConv(128, DIM)
+
+class GAT(GNN):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.conv1 = GATConv(in_channels, 16, 8)
+        self.conv2 = GATConv(128, DIM // 8, 8)
+
+class SAGE(GNN):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.conv1 = SAGEConv(in_channels, 128)
+        self.conv2 = SAGEConv(128, DIM)
+
+MODEL = {
+    'gcn': GCN,
+    'gat': GAT,
+    'sage': SAGE
+}
+
+scores = []
+
+for t in range(args.times):
+
+    model = MODEL[args.model](dataset.num_features).to(device)
+    optimizer = torch.optim.Adam(params=model.parameters(), lr=0.01)
+
+    def get_link_labels(pos_edge_index, neg_edge_index):
+        num_links = pos_edge_index.size(1) + neg_edge_index.size(1)
+        link_labels = torch.zeros(num_links, dtype=torch.float, device=device)
+        link_labels[:pos_edge_index.size(1)] = 1.
+        return link_labels
+
+    def train(data):
+        model.train()
+
+        neg_edge_index = negative_sampling(
+            edge_index=data.train_pos_edge_index, num_nodes=data.num_nodes,
+            num_neg_samples=data.train_pos_edge_index.size(1))
+
+        optimizer.zero_grad()
+        z = model.encode(data.x, data.train_pos_edge_index)
+        link_logits = _decode(z, data.train_pos_edge_index, neg_edge_index)
+        link_labels = get_link_labels(data.train_pos_edge_index, neg_edge_index)
+        loss = F.binary_cross_entropy_with_logits(link_logits, link_labels)
+        loss.backward()
+        optimizer.step()
+
+        return loss
+
+
+    @torch.no_grad()
+    def test(data):
+        model.eval()
+
+        z = model.encode(data.x, data.train_pos_edge_index)
+
+        results = []
+        for prefix in ['val', 'test']:
+            pos_edge_index = data[f'{prefix}_pos_edge_index']
+            neg_edge_index = data[f'{prefix}_neg_edge_index']
+            link_logits = _decode(z, pos_edge_index, neg_edge_index)
+            link_probs = link_logits.sigmoid()
+            link_labels = get_link_labels(pos_edge_index, neg_edge_index)
+            results.append(roc_auc_score(link_labels.cpu(), link_probs.cpu()))
+        return results
+
+
+    best_val_auc = test_auc = 0
+    for epoch in range(1, 101):
+        loss = train(data)
+        val_auc, tmp_test_auc = test(data)
+        if val_auc > best_val_auc:
+            best_val = val_auc
+            test_auc = tmp_test_auc
+        # print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Val: {val_auc:.4f}, '
+        #     f'Test: {test_auc:.4f}')
+
+    scores.append(test_auc)
+    print('time', t, test_auc)
+print('mean', np.mean(scores), 'std', np.std(scores))
+open('lp_pyg.log', 'a').write('\t'.join([args.dataset, args.model, str(np.mean(scores)), str(np.std(scores)), '\n']))
diff --git a/benchmark/lp_reproduce.py b/benchmark/lp_reproduce.py
new file mode 100644
index 0000000..1409f17
--- /dev/null
+++ b/benchmark/lp_reproduce.py
@@ -0,0 +1,82 @@
+"""
+Used to reproduce the statistics from pyg
+"""
+
+import sys
+sys.path.append('../')
+import pickle
+import torch
+import argparse
+import numpy as np
+from sklearn.metrics import roc_auc_score
+
+from autogl.module.train import LinkPredictionTrainer
+# Fix data split
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--model', choices=['gcn', 'sage', 'gat'], type=str, default='gcn', help='model to train')
+parser.add_argument('--dataset', choices=['cora', 'citseer', 'pubmed'], type=str, default='cora', help='dataset to evaluate')
+parser.add_argument('--times', type=int, default=10, help='time to rerun')
+
+args = parser.parse_args()
+
+DIM = 64
+dataset = pickle.load(open(f'/DATA/DATANAS1/guancy/github/AutoGL/env/cache/{args.dataset}-edge.data', 'rb'))
+data = dataset[0]
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+data = data.to(device)
+
+HP = {
+    'gcn' : {
+        'num_layers': 3,
+        'hidden': [128, DIM],
+        'dropout': 0.0,
+        'act': 'relu'
+    },
+    'gat' : {
+        'num_layers': 3,
+        'hidden': [16, DIM // 8],
+        'dropout': 0.0,
+        'act': 'relu',
+        'heads': 8
+    },
+    'sage': {
+        'num_layers': 3,
+        'hidden': [128, DIM],
+        'dropout': 0.0,
+        'act': 'relu',
+        'aggr': 'mean'
+    }
+}
+
+scores = []
+
+for t in range(args.times):
+
+    trainer = LinkPredictionTrainer(
+        args.model,
+        num_features=dataset.num_features,
+        lr=0.01,
+        max_epoch=100,
+        early_stopping_round=101,
+        weight_decay=0,
+        device='cuda',
+        init=False,
+        feval='auc',
+        loss="binary_cross_entropy_with_logits",
+    )
+
+    trainer = trainer.duplicate_from_hyper_parameter(HP[args.model], restricted=False)
+    trainer.train([data], keep_valid_result=True)
+    y = trainer.predict([data], 'test')
+    y_ = y.cpu().numpy()
+    
+    pos_edge_index = data[f'test_pos_edge_index']
+    neg_edge_index = data[f'test_neg_edge_index']
+    link_labels = trainer.get_link_labels(pos_edge_index, neg_edge_index)
+    label = link_labels.cpu().numpy()
+    test_auc = roc_auc_score(label, y_)
+    scores.append(test_auc)
+    print('time', t, test_auc)
+print('mean', np.mean(scores), 'std', np.std(scores))
+open('lp_reproduce.log', 'a').write('\t'.join([args.dataset, args.model, str(np.mean(scores)), str(np.std(scores)), '\n']))

From 4878a0abe50e14aa04c220d601208935cf04069e Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Mon, 31 May 2021 13:19:39 +0800
Subject: [PATCH 14/19] adjust metrics input

---
 autogl/module/train/evaluation.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/autogl/module/train/evaluation.py b/autogl/module/train/evaluation.py
index af3a79a..d36f708 100644
--- a/autogl/module/train/evaluation.py
+++ b/autogl/module/train/evaluation.py
@@ -120,6 +120,7 @@ class Auc(Evaluation):
         if len(predict.shape) == 1:
             pos_predict = predict
         else:
+            assert predict.shape[1] == 2, "Cannot use auc on given data with %d classes!" % (predict.shape[1])
             pos_predict = predict[:, 1]
         return roc_auc_score(label, pos_predict)
 
@@ -142,7 +143,11 @@ class Acc(Evaluation):
         """
         Should return: the evaluation result (float)
         """
-        return accuracy_score(label, np.argmax(predict, axis=1))
+        if len(predict.shape) == 2:
+            predict = np.argmax(predict, axis=1)
+        else:
+            predict = [1 if p > 0.5 else 0 for p in predict]
+        return accuracy_score(label, predict)
 
 
 @register_evaluate("mrr")
@@ -163,5 +168,9 @@ class Mrr(Evaluation):
         """
         Should return: the evaluation result (float)
         """
-        pos_predict = predict[:, 1]
+        if len(predict.shape) == 2:
+            assert predict.shape[1] == 2, "Cannot use mrr on given data with %d classes!" % (predict.shape[1])
+            pos_predict = predict[:, 1]
+        else:
+            pos_predict = predict
         return label_ranking_average_precision_score(label, pos_predict)

From a08843dc3b10a618dca706e10288ba00f7c6bed3 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Fri, 11 Jun 2021 22:03:34 +0800
Subject: [PATCH 15/19] fix performance bugs of gcn on link prediction

---
 autogl/module/model/gcn.py             | 13 +++++--------
 autogl/module/train/link_prediction.py |  2 +-
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/autogl/module/model/gcn.py b/autogl/module/model/gcn.py
index 71ce274..7f279a7 100644
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -1,6 +1,7 @@
 import torch
 import torch.nn.functional
 import torch_geometric
+from torch_geometric.nn import GCNConv
 import typing as _typing
 from . import register_model
 from .base import BaseModel, activate_func, ClassificationModel
@@ -23,23 +24,19 @@ class GCN(torch.nn.Module):
         num_layers: int = len(hidden_features) + 1
         if num_layers == 1:
             self.__convolution_layers.append(
-                torch_geometric.nn.GCNConv(
-                    num_features, num_classes, add_self_loops=False
-                )
+                GCNConv(num_features, num_classes)
             )
         else:
             self.__convolution_layers.append(
-                torch_geometric.nn.GCNConv(
-                    num_features, hidden_features[0], add_self_loops=False
-                )
+                GCNConv(num_features, hidden_features[0])
             )
             for i in range(len(hidden_features)):
                 self.__convolution_layers.append(
-                    torch_geometric.nn.GCNConv(
+                    GCNConv(
                         hidden_features[i], hidden_features[i + 1]
                     )
                     if i + 1 < len(hidden_features)
-                    else torch_geometric.nn.GCNConv(hidden_features[i], num_classes)
+                    else GCNConv(hidden_features[i], num_classes)
                 )
         self.__dropout: float = dropout
         self.__activation_name: str = activation_name
diff --git a/autogl/module/train/link_prediction.py b/autogl/module/train/link_prediction.py
index 5c17047..a444ae3 100644
--- a/autogl/module/train/link_prediction.py
+++ b/autogl/module/train/link_prediction.py
@@ -217,8 +217,8 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
             self.early_stopping(val_loss, self.model.model)
             if self.early_stopping.early_stop:
                 LOGGER.debug("Early stopping at %d", epoch)
-                self.early_stopping.load_checkpoint(self.model.model)
                 break
+        self.early_stopping.load_checkpoint(self.model.model)
 
     def predict_only(self, data, test_mask=None):
         """

From c28d1c3a3bd9b6ec2b15a25fd789f5ae49e8e6f9 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Thu, 17 Jun 2021 16:20:04 +0000
Subject: [PATCH 16/19] fix ensemble bug

---
 autogl/solver/classifier/link_predictor.py    |  13 +-
 benchmark/lp_pyg.py                           | 126 ------------------
 benchmark/lp_reproduce.py                     |  82 ------------
 configs/lp_benchmark.yml                      |  92 +++++++++++++
 configs/lp_gat_benchmark.yml                  |  61 +++++++++
 ...nchmark_small.yml => lp_gcn_benchmark.yml} |   0
 configs/lp_sage_benchmark.yml                 |  69 ++++++++++
 configs/nodeclf_gat_benchmark_large.yml       |   2 +-
 8 files changed, 233 insertions(+), 212 deletions(-)
 delete mode 100644 benchmark/lp_pyg.py
 delete mode 100644 benchmark/lp_reproduce.py
 create mode 100644 configs/lp_benchmark.yml
 create mode 100644 configs/lp_gat_benchmark.yml
 rename configs/{lp_gcn_benchmark_small.yml => lp_gcn_benchmark.yml} (100%)
 create mode 100644 configs/lp_sage_benchmark.yml

diff --git a/autogl/solver/classifier/link_predictor.py b/autogl/solver/classifier/link_predictor.py
index d2ef6b4..040da36 100644
--- a/autogl/solver/classifier/link_predictor.py
+++ b/autogl/solver/classifier/link_predictor.py
@@ -192,6 +192,13 @@ class AutoLinkPredictor(BaseClassifier):
 
         return self
 
+    def _to_prob(self, sig_prob: np.ndarray):
+        nelements = len(sig_prob)
+        prob = np.zeros([nelements, 2])
+        prob[:,0] = 1 - sig_prob
+        prob[:,1] = sig_prob
+        return prob
+
     # pylint: disable=arguments-differ
     def fit(
         self,
@@ -323,7 +330,7 @@ class AutoLinkPredictor(BaseClassifier):
             name = optimized.get_name_with_hp() + "_idx%d" % (idx)
             names.append(name)
             performance_on_valid, _ = optimized.get_valid_score(return_major=False)
-            result_valid.append(optimized.get_valid_predict_proba().cpu().numpy())
+            result_valid.append(self._to_prob(optimized.get_valid_predict_proba().cpu().numpy()))
             self.leaderboard.insert_model_performance(
                 name,
                 dict(
@@ -512,10 +519,10 @@ class AutoLinkPredictor(BaseClassifier):
             names = []
             for model_name in self.trained_models:
                 predict_result.append(
-                    self._predict_proba_by_name(dataset, model_name, mask)
+                    self._to_prob(self._predict_proba_by_name(dataset, model_name, mask))
                 )
                 names.append(model_name)
-            return self.ensemble_module.ensemble(predict_result, names)
+            return self.ensemble_module.ensemble(predict_result, names)[:,1]
 
         if use_ensemble and self.ensemble_module is None:
             LOGGER.warning(
diff --git a/benchmark/lp_pyg.py b/benchmark/lp_pyg.py
deleted file mode 100644
index 32d8db4..0000000
--- a/benchmark/lp_pyg.py
+++ /dev/null
@@ -1,126 +0,0 @@
-import sys
-sys.path.append('../')
-import os.path as osp
-
-import torch
-import torch.nn.functional as F
-from sklearn.metrics import roc_auc_score
-
-from torch_geometric.utils import negative_sampling
-from torch_geometric.nn import GCNConv, GATConv, SAGEConv
-from torch_geometric.utils import train_test_split_edges
-
-import argparse
-import pickle
-import numpy as np
-
-parser = argparse.ArgumentParser()
-parser.add_argument('--model', choices=['gcn', 'sage', 'gat'], type=str, default='gcn', help='model to train')
-parser.add_argument('--dataset', choices=['cora', 'citseer', 'pubmed'], type=str, default='cora', help='dataset to evaluate')
-parser.add_argument('--times', type=int, default=10, help='time to rerun')
-
-args = parser.parse_args()
-
-DIM = 64
-dataset = pickle.load(open(f'/DATA/DATANAS1/guancy/github/AutoGL/env/cache/{args.dataset}-edge.data', 'rb'))
-data = dataset[0]
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-data = data.to(device)
-
-def _decode(z, pos_edge_index, neg_edge_index):
-    edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
-    return (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
-
-class GNN(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-
-    def encode(self, x, edge_index):
-        return self.conv2(self.conv1(x, edge_index).relu(), edge_index)
-
-class GCN(GNN):
-    def __init__(self, in_channels):
-        super().__init__()
-        self.conv1 = GCNConv(in_channels, 128)
-        self.conv2 = GCNConv(128, DIM)
-
-class GAT(GNN):
-    def __init__(self, in_channels):
-        super().__init__()
-        self.conv1 = GATConv(in_channels, 16, 8)
-        self.conv2 = GATConv(128, DIM // 8, 8)
-
-class SAGE(GNN):
-    def __init__(self, in_channels):
-        super().__init__()
-        self.conv1 = SAGEConv(in_channels, 128)
-        self.conv2 = SAGEConv(128, DIM)
-
-MODEL = {
-    'gcn': GCN,
-    'gat': GAT,
-    'sage': SAGE
-}
-
-scores = []
-
-for t in range(args.times):
-
-    model = MODEL[args.model](dataset.num_features).to(device)
-    optimizer = torch.optim.Adam(params=model.parameters(), lr=0.01)
-
-    def get_link_labels(pos_edge_index, neg_edge_index):
-        num_links = pos_edge_index.size(1) + neg_edge_index.size(1)
-        link_labels = torch.zeros(num_links, dtype=torch.float, device=device)
-        link_labels[:pos_edge_index.size(1)] = 1.
-        return link_labels
-
-    def train(data):
-        model.train()
-
-        neg_edge_index = negative_sampling(
-            edge_index=data.train_pos_edge_index, num_nodes=data.num_nodes,
-            num_neg_samples=data.train_pos_edge_index.size(1))
-
-        optimizer.zero_grad()
-        z = model.encode(data.x, data.train_pos_edge_index)
-        link_logits = _decode(z, data.train_pos_edge_index, neg_edge_index)
-        link_labels = get_link_labels(data.train_pos_edge_index, neg_edge_index)
-        loss = F.binary_cross_entropy_with_logits(link_logits, link_labels)
-        loss.backward()
-        optimizer.step()
-
-        return loss
-
-
-    @torch.no_grad()
-    def test(data):
-        model.eval()
-
-        z = model.encode(data.x, data.train_pos_edge_index)
-
-        results = []
-        for prefix in ['val', 'test']:
-            pos_edge_index = data[f'{prefix}_pos_edge_index']
-            neg_edge_index = data[f'{prefix}_neg_edge_index']
-            link_logits = _decode(z, pos_edge_index, neg_edge_index)
-            link_probs = link_logits.sigmoid()
-            link_labels = get_link_labels(pos_edge_index, neg_edge_index)
-            results.append(roc_auc_score(link_labels.cpu(), link_probs.cpu()))
-        return results
-
-
-    best_val_auc = test_auc = 0
-    for epoch in range(1, 101):
-        loss = train(data)
-        val_auc, tmp_test_auc = test(data)
-        if val_auc > best_val_auc:
-            best_val = val_auc
-            test_auc = tmp_test_auc
-        # print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Val: {val_auc:.4f}, '
-        #     f'Test: {test_auc:.4f}')
-
-    scores.append(test_auc)
-    print('time', t, test_auc)
-print('mean', np.mean(scores), 'std', np.std(scores))
-open('lp_pyg.log', 'a').write('\t'.join([args.dataset, args.model, str(np.mean(scores)), str(np.std(scores)), '\n']))
diff --git a/benchmark/lp_reproduce.py b/benchmark/lp_reproduce.py
deleted file mode 100644
index 1409f17..0000000
--- a/benchmark/lp_reproduce.py
+++ /dev/null
@@ -1,82 +0,0 @@
-"""
-Used to reproduce the statistics from pyg
-"""
-
-import sys
-sys.path.append('../')
-import pickle
-import torch
-import argparse
-import numpy as np
-from sklearn.metrics import roc_auc_score
-
-from autogl.module.train import LinkPredictionTrainer
-# Fix data split
-
-parser = argparse.ArgumentParser()
-parser.add_argument('--model', choices=['gcn', 'sage', 'gat'], type=str, default='gcn', help='model to train')
-parser.add_argument('--dataset', choices=['cora', 'citseer', 'pubmed'], type=str, default='cora', help='dataset to evaluate')
-parser.add_argument('--times', type=int, default=10, help='time to rerun')
-
-args = parser.parse_args()
-
-DIM = 64
-dataset = pickle.load(open(f'/DATA/DATANAS1/guancy/github/AutoGL/env/cache/{args.dataset}-edge.data', 'rb'))
-data = dataset[0]
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-data = data.to(device)
-
-HP = {
-    'gcn' : {
-        'num_layers': 3,
-        'hidden': [128, DIM],
-        'dropout': 0.0,
-        'act': 'relu'
-    },
-    'gat' : {
-        'num_layers': 3,
-        'hidden': [16, DIM // 8],
-        'dropout': 0.0,
-        'act': 'relu',
-        'heads': 8
-    },
-    'sage': {
-        'num_layers': 3,
-        'hidden': [128, DIM],
-        'dropout': 0.0,
-        'act': 'relu',
-        'aggr': 'mean'
-    }
-}
-
-scores = []
-
-for t in range(args.times):
-
-    trainer = LinkPredictionTrainer(
-        args.model,
-        num_features=dataset.num_features,
-        lr=0.01,
-        max_epoch=100,
-        early_stopping_round=101,
-        weight_decay=0,
-        device='cuda',
-        init=False,
-        feval='auc',
-        loss="binary_cross_entropy_with_logits",
-    )
-
-    trainer = trainer.duplicate_from_hyper_parameter(HP[args.model], restricted=False)
-    trainer.train([data], keep_valid_result=True)
-    y = trainer.predict([data], 'test')
-    y_ = y.cpu().numpy()
-    
-    pos_edge_index = data[f'test_pos_edge_index']
-    neg_edge_index = data[f'test_neg_edge_index']
-    link_labels = trainer.get_link_labels(pos_edge_index, neg_edge_index)
-    label = link_labels.cpu().numpy()
-    test_auc = roc_auc_score(label, y_)
-    scores.append(test_auc)
-    print('time', t, test_auc)
-print('mean', np.mean(scores), 'std', np.std(scores))
-open('lp_reproduce.log', 'a').write('\t'.join([args.dataset, args.model, str(np.mean(scores)), str(np.std(scores)), '\n']))
diff --git a/configs/lp_benchmark.yml b/configs/lp_benchmark.yml
new file mode 100644
index 0000000..f1ca373
--- /dev/null
+++ b/configs/lp_benchmark.yml
@@ -0,0 +1,92 @@
+ensemble:
+  name: voting
+feature:
+- name: PYGNormalizeFeatures
+hpo:
+  max_evals: 10
+  name: random
+models:
+- hp_space:
+  - feasiblePoints: 2,3
+    parameterName: num_layers
+    type: DISCRETE
+  - cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 2
+    maxValue:
+    - 256
+    - 256
+    minValue:
+    - 64
+    - 64
+    numericalType: INTEGER
+    parameterName: hidden
+    scalingType: LOG
+    type: NUMERICAL_LIST
+  - maxValue: 0.2
+    minValue: 0.0
+    parameterName: dropout
+    scalingType: LINEAR
+    type: DOUBLE
+  - feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    parameterName: act
+    type: CATEGORICAL
+  name: gcn
+- name: gat
+  hp_space:
+  - feasiblePoints: 2,3
+    parameterName: num_layers
+    type: DISCRETE
+  - cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 2
+    maxValue:
+    - 256
+    - 256
+    minValue:
+    - 64
+    - 64
+    numericalType: INTEGER
+    parameterName: hidden
+    scalingType: LOG
+    type: NUMERICAL_LIST
+  - maxValue: 0.2
+    minValue: 0.0
+    parameterName: dropout
+    scalingType: LINEAR
+    type: DOUBLE
+  - feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    parameterName: act
+    type: CATEGORICAL
+trainer:
+  hp_space:
+  - maxValue: 150
+    minValue: 50
+    parameterName: max_epoch
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 40
+    minValue: 25
+    parameterName: early_stopping_round
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 0.05
+    minValue: 0.005
+    parameterName: lr
+    scalingType: LOG
+    type: DOUBLE
+  - maxValue: 1.0E-7
+    minValue: 1.0E-10
+    parameterName: weight_decay
+    scalingType: LOG
+    type: DOUBLE
diff --git a/configs/lp_gat_benchmark.yml b/configs/lp_gat_benchmark.yml
new file mode 100644
index 0000000..abf8e28
--- /dev/null
+++ b/configs/lp_gat_benchmark.yml
@@ -0,0 +1,61 @@
+ensemble:
+  name: null
+feature:
+- name: PYGNormalizeFeatures
+hpo:
+  max_evals: 10
+  name: random
+models:
+- name: gat
+  hp_space:
+  - feasiblePoints: 2,3
+    parameterName: num_layers
+    type: DISCRETE
+  - cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 2
+    maxValue:
+    - 256
+    - 256
+    minValue:
+    - 64
+    - 64
+    numericalType: INTEGER
+    parameterName: hidden
+    scalingType: LOG
+    type: NUMERICAL_LIST
+  - maxValue: 0.2
+    minValue: 0.0
+    parameterName: dropout
+    scalingType: LINEAR
+    type: DOUBLE
+  - feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    parameterName: act
+    type: CATEGORICAL
+trainer:
+  hp_space:
+  - maxValue: 150
+    minValue: 50
+    parameterName: max_epoch
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 40
+    minValue: 25
+    parameterName: early_stopping_round
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 0.05
+    minValue: 0.005
+    parameterName: lr
+    scalingType: LOG
+    type: DOUBLE
+  - maxValue: 1.0E-7
+    minValue: 1.0E-10
+    parameterName: weight_decay
+    scalingType: LOG
+    type: DOUBLE
diff --git a/configs/lp_gcn_benchmark_small.yml b/configs/lp_gcn_benchmark.yml
similarity index 100%
rename from configs/lp_gcn_benchmark_small.yml
rename to configs/lp_gcn_benchmark.yml
diff --git a/configs/lp_sage_benchmark.yml b/configs/lp_sage_benchmark.yml
new file mode 100644
index 0000000..de0b6ab
--- /dev/null
+++ b/configs/lp_sage_benchmark.yml
@@ -0,0 +1,69 @@
+ensemble:
+  name: null
+feature:
+- name: PYGNormalizeFeatures
+hpo:
+  max_evals: 10
+  name: random
+models:
+- name: sage
+  hp_space:
+  - parameterName: num_layers
+    type: DISCRETE
+    feasiblePoints: 2,3
+  
+  - parameterName: hidden
+    type: NUMERICAL_LIST
+    scalingType: LOG
+    numericalType: INTEGER
+    cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 2
+    maxValue:
+    - 256
+    - 256
+    minValue:
+    - 64
+    - 64
+  
+  - parameterName: dropout
+    type: DOUBLE
+    scalingType: LINEAR
+    maxValue: 0.2
+    minValue: 0.0
+    
+  - parameterName: act
+    type: CATEGORICAL
+    feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    
+  - parameterName: agg
+    type: CATEGORICAL
+    feasiblePoints: ["mean", "add", "max"]    
+
+trainer:
+  hp_space:
+  - maxValue: 150
+    minValue: 50
+    parameterName: max_epoch
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 40
+    minValue: 25
+    parameterName: early_stopping_round
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 0.05
+    minValue: 0.005
+    parameterName: lr
+    scalingType: LOG
+    type: DOUBLE
+  - maxValue: 1.0E-7
+    minValue: 1.0E-10
+    parameterName: weight_decay
+    scalingType: LOG
+    type: DOUBLE
diff --git a/configs/nodeclf_gat_benchmark_large.yml b/configs/nodeclf_gat_benchmark_large.yml
index 1b5933f..fe47281 100644
--- a/configs/nodeclf_gat_benchmark_large.yml
+++ b/configs/nodeclf_gat_benchmark_large.yml
@@ -39,7 +39,7 @@ models:
     - tanh
     parameterName: act
     type: CATEGORICAL
-  name: gcn
+  name: gat
 trainer:
   hp_space:
   - maxValue: 400

From 34a452e25be732845a192d94fce7a185bf7df950 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Thu, 17 Jun 2021 16:21:33 +0000
Subject: [PATCH 17/19] black format

---
 autogl/datasets/utils.py                   |  5 +-
 autogl/module/model/__init__.py            |  3 +-
 autogl/module/model/gcn.py                 | 17 ++-----
 autogl/module/train/__init__.py            |  6 ++-
 autogl/module/train/base.py                |  3 +-
 autogl/module/train/evaluation.py          |  8 +++-
 autogl/module/train/link_prediction.py     | 53 ++++++++++++----------
 autogl/solver/classifier/link_predictor.py | 46 ++++++++++++-------
 8 files changed, 83 insertions(+), 58 deletions(-)

diff --git a/autogl/datasets/utils.py b/autogl/datasets/utils.py
index 7b5679c..4885ea0 100644
--- a/autogl/datasets/utils.py
+++ b/autogl/datasets/utils.py
@@ -9,9 +9,12 @@ from sklearn.model_selection import StratifiedKFold, KFold
 def split_edges(dataset, train_ratio, val_ratio):
     datas = [data for data in dataset]
     for i in range(len(datas)):
-        datas[i] = train_test_split_edges(datas[i], val_ratio, 1 - train_ratio - val_ratio)
+        datas[i] = train_test_split_edges(
+            datas[i], val_ratio, 1 - train_ratio - val_ratio
+        )
     dataset.data, dataset.slices = dataset.collate(datas)
 
+
 def get_label_number(dataset):
     r"""Get the number of labels in this dataset as dict."""
     label_num = {}
diff --git a/autogl/module/model/__init__.py b/autogl/module/model/__init__.py
index 42bdbc4..4bff9bb 100644
--- a/autogl/module/model/__init__.py
+++ b/autogl/module/model/__init__.py
@@ -1,7 +1,8 @@
 from ._model_registry import MODEL_DICT, ModelUniversalRegistry, register_model
 from .base import BaseModel
 from .topkpool import AutoTopkpool
-#from .graph_sage import AutoSAGE
+
+# from .graph_sage import AutoSAGE
 from .graphsage import AutoSAGE
 from .gcn import AutoGCN
 from .gat import AutoGAT
diff --git a/autogl/module/model/gcn.py b/autogl/module/model/gcn.py
index 7f279a7..e28bd80 100644
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -23,18 +23,12 @@ class GCN(torch.nn.Module):
         self.__convolution_layers: torch.nn.ModuleList = torch.nn.ModuleList()
         num_layers: int = len(hidden_features) + 1
         if num_layers == 1:
-            self.__convolution_layers.append(
-                GCNConv(num_features, num_classes)
-            )
+            self.__convolution_layers.append(GCNConv(num_features, num_classes))
         else:
-            self.__convolution_layers.append(
-                GCNConv(num_features, hidden_features[0])
-            )
+            self.__convolution_layers.append(GCNConv(num_features, hidden_features[0]))
             for i in range(len(hidden_features)):
                 self.__convolution_layers.append(
-                    GCNConv(
-                        hidden_features[i], hidden_features[i + 1]
-                    )
+                    GCNConv(hidden_features[i], hidden_features[i + 1])
                     if i + 1 < len(hidden_features)
                     else GCNConv(hidden_features[i], num_classes)
                 )
@@ -109,9 +103,8 @@ class GCN(torch.nn.Module):
         return (prob_adj > 0).nonzero(as_tuple=False).t()
 
 
-
-#@register_model("gcn")
-#class AutoGCN(ClassificationModel):
+# @register_model("gcn")
+# class AutoGCN(ClassificationModel):
 @register_model("gcn")
 class AutoGCN(BaseModel):
     r"""
diff --git a/autogl/module/train/__init__.py b/autogl/module/train/__init__.py
index 8e70ff2..a7fe443 100644
--- a/autogl/module/train/__init__.py
+++ b/autogl/module/train/__init__.py
@@ -4,9 +4,10 @@ from .base import (
     Evaluation,
     BaseNodeClassificationTrainer,
     BaseGraphClassificationTrainer,
-    BaseLinkPredictionTrainer
+    BaseLinkPredictionTrainer,
 )
 
+
 def register_trainer(name):
     def register_trainer_cls(cls):
         if name in TRAINER_DICT:
@@ -20,6 +21,7 @@ def register_trainer(name):
 
     return register_trainer_cls
 
+
 from .graph_classification_full import GraphClassificationFullTrainer
 from .node_classification_full import NodeClassificationFullTrainer
 from .link_prediction import LinkPredictionTrainer
@@ -39,5 +41,5 @@ __all__ = [
     "Auc",
     "Logloss",
     "Mrr",
-    "get_feval"
+    "get_feval",
 ]
diff --git a/autogl/module/train/base.py b/autogl/module/train/base.py
index 76c6a61..ba36e74 100644
--- a/autogl/module/train/base.py
+++ b/autogl/module/train/base.py
@@ -403,6 +403,7 @@ class BaseGraphClassificationTrainer(_BaseClassificationTrainer):
             model, num_features, num_classes, device, init, feval, loss
         )
 
+
 class BaseLinkPredictionTrainer(_BaseClassificationTrainer):
     def __init__(
         self,
@@ -417,4 +418,4 @@ class BaseLinkPredictionTrainer(_BaseClassificationTrainer):
     ):
         super(BaseLinkPredictionTrainer, self).__init__(
             model, num_features, 2, device, init, feval, loss
-        )
\ No newline at end of file
+        )
diff --git a/autogl/module/train/evaluation.py b/autogl/module/train/evaluation.py
index d36f708..db16fd3 100644
--- a/autogl/module/train/evaluation.py
+++ b/autogl/module/train/evaluation.py
@@ -120,7 +120,9 @@ class Auc(Evaluation):
         if len(predict.shape) == 1:
             pos_predict = predict
         else:
-            assert predict.shape[1] == 2, "Cannot use auc on given data with %d classes!" % (predict.shape[1])
+            assert (
+                predict.shape[1] == 2
+            ), "Cannot use auc on given data with %d classes!" % (predict.shape[1])
             pos_predict = predict[:, 1]
         return roc_auc_score(label, pos_predict)
 
@@ -169,7 +171,9 @@ class Mrr(Evaluation):
         Should return: the evaluation result (float)
         """
         if len(predict.shape) == 2:
-            assert predict.shape[1] == 2, "Cannot use mrr on given data with %d classes!" % (predict.shape[1])
+            assert (
+                predict.shape[1] == 2
+            ), "Cannot use mrr on given data with %d classes!" % (predict.shape[1])
             pos_predict = predict[:, 1]
         else:
             pos_predict = predict
diff --git a/autogl/module/train/link_prediction.py b/autogl/module/train/link_prediction.py
index a444ae3..56cf3fe 100644
--- a/autogl/module/train/link_prediction.py
+++ b/autogl/module/train/link_prediction.py
@@ -13,6 +13,7 @@ from ...utils import get_logger
 
 LOGGER = get_logger("link prediction trainer")
 
+
 def get_feval(feval):
     if isinstance(feval, str):
         return EVALUATE_DICT[feval]
@@ -65,12 +66,12 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
         max_epoch=100,
         early_stopping_round=101,
         weight_decay=1e-4,
-        device='auto',
+        device="auto",
         init=True,
         feval=[Auc],
         loss="binary_cross_entropy_with_logits",
         *args,
-        **kwargs
+        **kwargs,
     ):
         super().__init__(model, num_features, device, init, feval, loss)
 
@@ -189,19 +190,27 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
             self.model.model.train()
 
             neg_edge_index = negative_sampling(
-                edge_index=data.train_pos_edge_index, num_nodes=data.num_nodes,
-                num_neg_samples=data.train_pos_edge_index.size(1))
+                edge_index=data.train_pos_edge_index,
+                num_nodes=data.num_nodes,
+                num_neg_samples=data.train_pos_edge_index.size(1),
+            )
 
             optimizer.zero_grad()
             # res = self.model.model.forward(data)
             z = self.model.model.encode(data)
-            link_logits = self.model.model.decode(z, data.train_pos_edge_index, neg_edge_index)
-            link_labels = self.get_link_labels(data.train_pos_edge_index, neg_edge_index)
+            link_logits = self.model.model.decode(
+                z, data.train_pos_edge_index, neg_edge_index
+            )
+            link_labels = self.get_link_labels(
+                data.train_pos_edge_index, neg_edge_index
+            )
             # loss = F.binary_cross_entropy_with_logits(link_logits, link_labels)
             if hasattr(F, self.loss):
                 loss = getattr(F, self.loss)(link_logits, link_labels)
             else:
-                raise TypeError("PyTorch does not support loss type {}".format(self.loss))
+                raise TypeError(
+                    "PyTorch does not support loss type {}".format(self.loss)
+                )
 
             loss.backward()
             optimizer.step()
@@ -211,7 +220,7 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
                 feval = self.feval[0]
             else:
                 feval = self.feval
-            val_loss = self.evaluate([data], mask='val', feval=feval)
+            val_loss = self.evaluate([data], mask="val", feval=feval)
             if feval.is_higher_better() is True:
                 val_loss = -val_loss
             self.early_stopping(val_loss, self.model.model)
@@ -261,10 +270,8 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
         self.train_only(data)
         if keep_valid_result:
             self.valid_result = self.predict_only(data)
-            self.valid_result_prob = self.predict_proba(dataset, 'val')
-            self.valid_score = self.evaluate(
-                dataset, mask='val', feval=self.feval
-            )
+            self.valid_result_prob = self.predict_proba(dataset, "val")
+            self.valid_score = self.evaluate(dataset, mask="val", feval=self.feval)
 
     def predict(self, dataset, mask=None):
         """
@@ -304,11 +311,11 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
         data = dataset[0]
         data = data.to(self.device)
         if mask in ["train", "val", "test"]:
-            pos_edge_index = data[f'{mask}_pos_edge_index']
-            neg_edge_index = data[f'{mask}_neg_edge_index']
+            pos_edge_index = data[f"{mask}_pos_edge_index"]
+            neg_edge_index = data[f"{mask}_neg_edge_index"]
         else:
-            pos_edge_index = data[f'test_pos_edge_index']
-            neg_edge_index = data[f'test_neg_edge_index']
+            pos_edge_index = data[f"test_pos_edge_index"]
+            neg_edge_index = data[f"test_neg_edge_index"]
 
         self.model.model.eval()
         with torch.no_grad():
@@ -400,11 +407,11 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
             feval = get_feval(feval)
 
         if mask in ["train", "val", "test"]:
-            pos_edge_index = data[f'{mask}_pos_edge_index']
-            neg_edge_index = data[f'{mask}_neg_edge_index']
+            pos_edge_index = data[f"{mask}_pos_edge_index"]
+            neg_edge_index = data[f"{mask}_neg_edge_index"]
         else:
-            pos_edge_index = data[f'test_pos_edge_index']
-            neg_edge_index = data[f'test_neg_edge_index']
+            pos_edge_index = data[f"test_pos_edge_index"]
+            neg_edge_index = data[f"test_neg_edge_index"]
 
         self.model.model.eval()
         with torch.no_grad():
@@ -480,7 +487,7 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
             feval=self.feval,
             init=True,
             *self.args,
-            **self.kwargs
+            **self.kwargs,
         )
 
         return ret
@@ -507,5 +514,5 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
     def get_link_labels(self, pos_edge_index, neg_edge_index):
         E = pos_edge_index.size(1) + neg_edge_index.size(1)
         link_labels = torch.zeros(E, dtype=torch.float, device=self.device)
-        link_labels[:pos_edge_index.size(1)] = 1.
-        return link_labels
\ No newline at end of file
+        link_labels[: pos_edge_index.size(1)] = 1.0
+        return link_labels
diff --git a/autogl/solver/classifier/link_predictor.py b/autogl/solver/classifier/link_predictor.py
index 040da36..71fe591 100644
--- a/autogl/solver/classifier/link_predictor.py
+++ b/autogl/solver/classifier/link_predictor.py
@@ -195,8 +195,8 @@ class AutoLinkPredictor(BaseClassifier):
     def _to_prob(self, sig_prob: np.ndarray):
         nelements = len(sig_prob)
         prob = np.zeros([nelements, 2])
-        prob[:,0] = 1 - sig_prob
-        prob[:,1] = sig_prob
+        prob[:, 0] = 1 - sig_prob
+        prob[:, 1] = sig_prob
         return prob
 
     # pylint: disable=arguments-differ
@@ -277,14 +277,19 @@ class AutoLinkPredictor(BaseClassifier):
         if train_split is not None and val_split is not None:
             utils.split_edges(dataset, train_split, val_split)
         else:
-            assert all([hasattr(dataset.data, f'{name}') for name in [
-                'train_pos_edge_index', 
-                'train_neg_adj_mask', 
-                'val_pos_edge_index',
-                'val_neg_edge_index', 
-                'test_pos_edge_index', 
-                'test_neg_edge_index'
-            ]]), (
+            assert all(
+                [
+                    hasattr(dataset.data, f"{name}")
+                    for name in [
+                        "train_pos_edge_index",
+                        "train_neg_adj_mask",
+                        "val_pos_edge_index",
+                        "val_neg_edge_index",
+                        "test_pos_edge_index",
+                        "test_neg_edge_index",
+                    ]
+                ]
+            ), (
                 "The dataset has no default train/val split! Please manually pass "
                 "train and val ratio."
             )
@@ -307,7 +312,9 @@ class AutoLinkPredictor(BaseClassifier):
             num_features=self.dataset[0].x.shape[1],
             feval=evaluator_list,
             device=self.runtime_device,
-            loss="binary_cross_entropy_with_logits" if not hasattr(dataset, "loss") else dataset.loss,
+            loss="binary_cross_entropy_with_logits"
+            if not hasattr(dataset, "loss")
+            else dataset.loss,
         )
 
         # train the models and tune hpo
@@ -330,7 +337,9 @@ class AutoLinkPredictor(BaseClassifier):
             name = optimized.get_name_with_hp() + "_idx%d" % (idx)
             names.append(name)
             performance_on_valid, _ = optimized.get_valid_score(return_major=False)
-            result_valid.append(self._to_prob(optimized.get_valid_predict_proba().cpu().numpy()))
+            result_valid.append(
+                self._to_prob(optimized.get_valid_predict_proba().cpu().numpy())
+            )
             self.leaderboard.insert_model_performance(
                 name,
                 dict(
@@ -344,10 +353,13 @@ class AutoLinkPredictor(BaseClassifier):
 
         # fit the ensemble model
         if self.ensemble_module is not None:
-            pos_edge_index, neg_edge_index = self.dataset[0].val_pos_edge_index, self.dataset[0].val_neg_edge_index
+            pos_edge_index, neg_edge_index = (
+                self.dataset[0].val_pos_edge_index,
+                self.dataset[0].val_neg_edge_index,
+            )
             E = pos_edge_index.size(1) + neg_edge_index.size(1)
             link_labels = torch.zeros(E, dtype=torch.float)
-            link_labels[:pos_edge_index.size(1)] = 1.
+            link_labels[: pos_edge_index.size(1)] = 1.0
 
             performance = self.ensemble_module.fit(
                 result_valid,
@@ -519,10 +531,12 @@ class AutoLinkPredictor(BaseClassifier):
             names = []
             for model_name in self.trained_models:
                 predict_result.append(
-                    self._to_prob(self._predict_proba_by_name(dataset, model_name, mask))
+                    self._to_prob(
+                        self._predict_proba_by_name(dataset, model_name, mask)
+                    )
                 )
                 names.append(model_name)
-            return self.ensemble_module.ensemble(predict_result, names)[:,1]
+            return self.ensemble_module.ensemble(predict_result, names)[:, 1]
 
         if use_ensemble and self.ensemble_module is None:
             LOGGER.warning(

From 9815e39d82a76741873a10112f692ac87152e174 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sat, 19 Jun 2021 06:19:23 +0000
Subject: [PATCH 18/19] fix tuple bugs, add lp to solver __init__

---
 autogl/solver/__init__.py                    | 9 +++++++--
 autogl/solver/classifier/__init__.py         | 8 +++++++-
 autogl/solver/classifier/graph_classifier.py | 2 +-
 autogl/solver/classifier/link_predictor.py   | 8 ++++++--
 autogl/solver/classifier/node_classifier.py  | 2 +-
 5 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/autogl/solver/__init__.py b/autogl/solver/__init__.py
index 54172f8..0fae590 100644
--- a/autogl/solver/__init__.py
+++ b/autogl/solver/__init__.py
@@ -2,7 +2,12 @@
 Auto solver for various graph tasks
 """
 
-from .classifier import AutoGraphClassifier, AutoNodeClassifier
+from .classifier import AutoGraphClassifier, AutoNodeClassifier, AutoLinkPredictor
 from .utils import Leaderboard
 
-__all__ = ["AutoNodeClassifier", "AutoGraphClassifier", "Leaderboard"]
+__all__ = [
+    "AutoNodeClassifier",
+    "AutoGraphClassifier",
+    "AutoLinkPredictor",
+    "Leaderboard",
+]
diff --git a/autogl/solver/classifier/__init__.py b/autogl/solver/classifier/__init__.py
index fc74cd6..e30c582 100644
--- a/autogl/solver/classifier/__init__.py
+++ b/autogl/solver/classifier/__init__.py
@@ -5,5 +5,11 @@ Auto classifier for classification problems.
 from .base import BaseClassifier
 from .graph_classifier import AutoGraphClassifier
 from .node_classifier import AutoNodeClassifier
+from .link_predictor import AutoLinkPredictor
 
-__all__ = ["BaseClassifier", "AutoGraphClassifier", "AutoNodeClassifier"]
+__all__ = [
+    "BaseClassifier",
+    "AutoGraphClassifier",
+    "AutoNodeClassifier",
+    "AutoLinkPredictor",
+]
diff --git a/autogl/solver/classifier/graph_classifier.py b/autogl/solver/classifier/graph_classifier.py
index 7427e13..0ceb15d 100644
--- a/autogl/solver/classifier/graph_classifier.py
+++ b/autogl/solver/classifier/graph_classifier.py
@@ -111,7 +111,7 @@ class AutoGraphClassifier(BaseClassifier):
     ) -> "AutoGraphClassifier":
         # load graph network module
         self.graph_model_list = []
-        if isinstance(graph_models, list):
+        if isinstance(graph_models, (list, tuple)):
             for model in graph_models:
                 if isinstance(model, str):
                     if model in MODEL_DICT:
diff --git a/autogl/solver/classifier/link_predictor.py b/autogl/solver/classifier/link_predictor.py
index 71fe591..17b110e 100644
--- a/autogl/solver/classifier/link_predictor.py
+++ b/autogl/solver/classifier/link_predictor.py
@@ -105,7 +105,7 @@ class AutoLinkPredictor(BaseClassifier):
     ) -> "AutoLinkPredictor":
         # load graph network module
         self.graph_model_list = []
-        if isinstance(graph_models, list):
+        if isinstance(graph_models, (list, tuple)):
             for model in graph_models:
                 if isinstance(model, str):
                     if model in MODEL_DICT:
@@ -577,6 +577,7 @@ class AutoLinkPredictor(BaseClassifier):
         use_best=True,
         name=None,
         mask="test",
+        threshold=0.5,
     ) -> np.ndarray:
         """
         Predict the node class number.
@@ -611,6 +612,9 @@ class AutoLinkPredictor(BaseClassifier):
         mask: str
             The data split to give prediction on. Default ``test``.
 
+        threshold: float
+            The threshold to judge whether the edges are positive or not.
+
         Returns
         -------
         result: np.ndarray
@@ -620,7 +624,7 @@ class AutoLinkPredictor(BaseClassifier):
         proba = self.predict_proba(
             dataset, inplaced, inplace, use_ensemble, use_best, name, mask
         )
-        return np.argmax(proba, axis=1)
+        return (proba > threshold).astype("int")
 
     @classmethod
     def from_config(cls, path_or_dict, filetype="auto") -> "AutoLinkPredictor":
diff --git a/autogl/solver/classifier/node_classifier.py b/autogl/solver/classifier/node_classifier.py
index cd0ed86..79a882f 100644
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -105,7 +105,7 @@ class AutoNodeClassifier(BaseClassifier):
     ) -> "AutoNodeClassifier":
         # load graph network module
         self.graph_model_list = []
-        if isinstance(graph_models, list):
+        if isinstance(graph_models, (list, tuple)):
             for model in graph_models:
                 if isinstance(model, str):
                     if model in MODEL_DICT:

From 2b251671bf19fbb7670c123249721212e2d517d3 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Mon, 21 Jun 2021 04:16:19 +0000
Subject: [PATCH 19/19] move lp solver

---
 examples/link_prediction.py        | 134 +++++++++++++++++++----------
 examples/link_prediction_solver.py |  90 -------------------
 2 files changed, 87 insertions(+), 137 deletions(-)
 delete mode 100644 examples/link_prediction_solver.py

diff --git a/examples/link_prediction.py b/examples/link_prediction.py
index 331ec31..4de4393 100644
--- a/examples/link_prediction.py
+++ b/examples/link_prediction.py
@@ -1,53 +1,93 @@
-import os.path as osp
 import sys
-sys.path.insert(0, '../')
-import torch
+
+sys.path.append("../")
 from autogl.datasets import build_dataset_from_name
-from autogl.module.train import LinkPredictionTrainer
+from autogl.solver.classifier.link_predictor import AutoLinkPredictor
+from autogl.module.train.evaluation import Auc
+import yaml
+import random
+import torch
 import numpy as np
-from torch_geometric.utils import train_test_split_edges
-from sklearn.metrics import roc_auc_score
-
-dataset = build_dataset_from_name('cora')
-
-print('len', len(dataset))
-print('num_class', dataset.num_classes)
-print('num_node_features', dataset.num_node_features)
-
-a = []
-for _ in range(10):
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    data = dataset[0]
-
-    data = data.to(device)
-    data.train_mask = data.val_mask = data.test_mask = data.y = None
-    data = train_test_split_edges(data)
-
-    clf = LinkPredictionTrainer(
-        'gcn',
-        num_features=dataset.num_node_features,
-        num_classes=dataset.num_classes,
-        max_epoch=100,
-        early_stopping_round=101,
-        feval=['auc'],
-        lr=0.01,
-        weight_decay=0,
-        lr_scheduler_type=None,
+
+if __name__ == "__main__":
+
+    from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
+
+    parser = ArgumentParser(
+        "auto link prediction", formatter_class=ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "--dataset",
+        default="cora",
+        type=str,
+        help="dataset to use",
+        choices=[
+            "cora",
+            "pubmed",
+            "citeseer",
+            "coauthor_cs",
+            "coauthor_physics",
+            "amazon_computers",
+            "amazon_photo",
+        ],
+    )
+    parser.add_argument(
+        "--configs",
+        type=str,
+        default="../configs/lp_gcn_benchmark.yml",
+        help="config to use",
+    )
+    # following arguments will override parameters in the config file
+    parser.add_argument("--hpo", type=str, default="tpe", help="hpo methods")
+    parser.add_argument(
+        "--max_eval", type=int, default=50, help="max hpo evaluation times"
     )
-    clf.train([data], keep_valid_result=True)
-    print(clf.valid_score, end=',')
-    y = clf.predict([data], 'test')
-    y_ = y.cpu().numpy()
-    # acc_ = y.eq(data.y[data.test_mask]).sum().item() / data.test_mask.sum().item()
-    # print(acc_, end=',')
-
-    pos_edge_index = data[f'test_pos_edge_index']
-    neg_edge_index = data[f'test_neg_edge_index']
-    link_labels = clf.get_link_labels(pos_edge_index, neg_edge_index)
-    label = link_labels.cpu().numpy()
-    ret = roc_auc_score(label, y_)
-    print(ret)
-    a.append(ret)
-print(np.mean(a), np.std(a))
+    parser.add_argument("--seed", type=int, default=0, help="random seed")
+    parser.add_argument("--device", default=0, type=int, help="GPU device")
 
+    args = parser.parse_args()
+    if torch.cuda.is_available():
+        torch.cuda.set_device(args.device)
+    seed = args.seed
+    # set random seed
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
 
+    dataset = build_dataset_from_name(args.dataset)
+
+    configs = yaml.load(open(args.configs, "r").read(), Loader=yaml.FullLoader)
+    configs["hpo"]["name"] = args.hpo
+    configs["hpo"]["max_evals"] = args.max_eval
+    autoClassifier = AutoLinkPredictor.from_config(configs)
+
+    # train
+    autoClassifier.fit(
+        dataset,
+        time_limit=3600,
+        evaluation_method=[Auc],
+        seed=seed,
+        train_split=0.85,
+        val_split=0.05,
+    )
+    autoClassifier.get_leaderboard().show()
+
+    # test
+    predict_result = autoClassifier.predict_proba()
+
+    pos_edge_index, neg_edge_index = (
+        dataset[0].test_pos_edge_index,
+        dataset[0].test_neg_edge_index,
+    )
+    E = pos_edge_index.size(1) + neg_edge_index.size(1)
+    link_labels = torch.zeros(E)
+    link_labels[: pos_edge_index.size(1)] = 1.0
+
+    print(
+        "test auc: %.4f"
+        % (Auc.evaluate(predict_result, link_labels.detach().cpu().numpy()))
+    )
diff --git a/examples/link_prediction_solver.py b/examples/link_prediction_solver.py
deleted file mode 100644
index a2051d7..0000000
--- a/examples/link_prediction_solver.py
+++ /dev/null
@@ -1,90 +0,0 @@
-import sys
-
-sys.path.append("../")
-from autogl.datasets import build_dataset_from_name
-from autogl.solver.classifier.link_predictor import AutoLinkPredictor
-from autogl.module.train.evaluation import Auc
-import yaml
-import random
-import torch
-import numpy as np
-
-if __name__ == "__main__":
-
-    from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
-
-    parser = ArgumentParser(
-        "auto link prediction", formatter_class=ArgumentDefaultsHelpFormatter
-    )
-    parser.add_argument(
-        "--dataset",
-        default="cora",
-        type=str,
-        help="dataset to use",
-        choices=[
-            "cora",
-            "pubmed",
-            "citeseer",
-            "coauthor_cs",
-            "coauthor_physics",
-            "amazon_computers",
-            "amazon_photo",
-        ],
-    )
-    parser.add_argument(
-        "--configs",
-        type=str,
-        default="../configs/lp_gcn_benchmark_small.yml",
-        help="config to use",
-    )
-    # following arguments will override parameters in the config file
-    parser.add_argument("--hpo", type=str, default="tpe", help="hpo methods")
-    parser.add_argument(
-        "--max_eval", type=int, default=50, help="max hpo evaluation times"
-    )
-    parser.add_argument("--seed", type=int, default=0, help="random seed")
-    parser.add_argument("--device", default=0, type=int, help="GPU device")
-
-    args = parser.parse_args()
-    if torch.cuda.is_available():
-        torch.cuda.set_device(args.device)
-    seed = args.seed
-    # set random seed
-    random.seed(seed)
-    np.random.seed(seed)
-    torch.manual_seed(seed)
-    if torch.cuda.is_available():
-        torch.cuda.manual_seed(seed)
-        torch.backends.cudnn.deterministic = True
-        torch.backends.cudnn.benchmark = False
-
-    dataset = build_dataset_from_name(args.dataset)
-
-    configs = yaml.load(open(args.configs, "r").read(), Loader=yaml.FullLoader)
-    configs["hpo"]["name"] = args.hpo
-    configs["hpo"]["max_evals"] = args.max_eval
-    autoClassifier = AutoLinkPredictor.from_config(configs)
-
-    # train
-    autoClassifier.fit(
-        dataset,
-        time_limit=3600,
-        evaluation_method=[Auc],
-        seed=seed,
-        train_split=0.85,
-        val_split=0.05,
-    )
-    autoClassifier.get_leaderboard().show()
-
-    # test
-    predict_result = autoClassifier.predict_proba()
-
-    pos_edge_index, neg_edge_index = dataset[0].test_pos_edge_index, dataset[0].test_neg_edge_index
-    E = pos_edge_index.size(1) + neg_edge_index.size(1)
-    link_labels = torch.zeros(E)
-    link_labels[:pos_edge_index.size(1)] = 1.
-
-    print(
-        "test auc: %.4f"
-        % (Auc.evaluate(predict_result, link_labels.detach().cpu().numpy()))
-    )