diff --git a/autogl/module/model/dgl/__init__.py b/autogl/module/model/dgl/__init__.py
index 8b13789..7a45f5e 100644
--- a/autogl/module/model/dgl/__init__.py
+++ b/autogl/module/model/dgl/__init__.py
@@ -1 +1,24 @@
+from ._model_registry import MODEL_DICT, ModelUniversalRegistry, register_model
+from .base import BaseModel
+from .topkpool import AutoTopkpool
 
+# from .graph_sage import AutoSAGE
+from .graphsage import AutoSAGE
+from .graph_saint import GraphSAINTAggregationModel
+from .gcn import AutoGCN
+from .gat import AutoGAT
+from .gin import AutoGIN
+from .gin_dgl import GIN
+
+__all__ = [
+    "ModelUniversalRegistry",
+    "register_model",
+    "BaseModel",
+    "AutoTopkpool",
+    "AutoSAGE",
+    "GraphSAINTAggregationModel",
+    "AutoGCN",
+    "AutoGAT",
+    "AutoGIN",
+    "GIN",
+]
diff --git a/autogl/module/model/dgl/_model_registry.py b/autogl/module/model/dgl/_model_registry.py
new file mode 100644
index 0000000..14aa2d9
--- /dev/null
+++ b/autogl/module/model/dgl/_model_registry.py
@@ -0,0 +1,28 @@
+import typing as _typing
+from .base import BaseModel
+
+MODEL_DICT: _typing.Dict[str, _typing.Type[BaseModel]] = {}
+
+
+def register_model(name):
+    def register_model_cls(cls):
+        if name in MODEL_DICT:
+            raise ValueError("Cannot register duplicate trainer ({})".format(name))
+        if not issubclass(cls, BaseModel):
+            raise ValueError(
+                "Trainer ({}: {}) must extend BaseModel".format(name, cls.__name__)
+            )
+        MODEL_DICT[name] = cls
+        return cls
+
+    return register_model_cls
+
+
+class ModelUniversalRegistry:
+    @classmethod
+    def get_model(cls, name: str) -> _typing.Type[BaseModel]:
+        if type(name) != str:
+            raise TypeError
+        if name not in MODEL_DICT:
+            raise KeyError
+        return MODEL_DICT.get(name)
diff --git a/autogl/module/model/dgl/base.py b/autogl/module/model/dgl/base.py
new file mode 100644
index 0000000..99f2c2c
--- /dev/null
+++ b/autogl/module/model/dgl/base.py
@@ -0,0 +1,413 @@
+"""
+auto graph model
+a list of models with their hyper parameters
+NOTE: neural architecture search (NAS) maybe included here
+"""
+import copy
+import logging
+import typing as _typing
+import torch
+import torch.nn.functional as F
+from copy import deepcopy
+
+base_approach_logger: logging.Logger = logging.getLogger("BaseModel")
+
+
+def activate_func(x, func):
+    if func == "tanh":
+        return torch.tanh(x)
+    elif hasattr(F, func):
+        return getattr(F, func)(x)
+    elif func == "":
+        pass
+    else:
+        raise TypeError("PyTorch does not support activation function {}".format(func))
+
+    return x
+
+
+class BaseModel:
+    def __init__(self, init=False, *args, **kwargs):
+        super(BaseModel, self).__init__()
+
+    def get_hyper_parameter(self):
+        return deepcopy(self.hyperparams)
+
+    @property
+    def hyper_parameter_space(self):
+        return self.space
+
+    @hyper_parameter_space.setter
+    def hyper_parameter_space(self, space):
+        self.space = space
+
+    def initialize(self):
+        pass
+
+    def forward(self):
+        pass
+
+    def to(self, device):
+        if isinstance(device, (str, torch.device)):
+            self.device = device
+        if (
+            hasattr(self, "model")
+            and self.model is not None
+            and isinstance(self.model, torch.nn.Module)
+        ):
+            self.model.to(self.device)
+        return self
+
+    def from_hyper_parameter(self, hp):
+        ret_self = self.__class__(
+            num_features=self.num_features,
+            num_classes=self.num_classes,
+            device=self.device,
+            init=False,
+        )
+        ret_self.hyperparams.update(hp)
+        ret_self.params.update(self.params)
+        ret_self.initialize()
+        return ret_self
+
+    def get_num_classes(self):
+        return self.num_classes
+
+    def set_num_classes(self, num_classes):
+        self.num_classes = num_classes
+        self.params["num_class"] = num_classes
+
+    def get_num_features(self):
+        return self.num_features
+
+    def set_num_features(self, num_features):
+        self.num_features = num_features
+        self.params["features_num"] = self.num_features
+
+    def set_num_graph_features(self, num_graph_features):
+        assert hasattr(
+            self, "num_graph_features"
+        ), "Cannot set graph features for tasks other than graph classification"
+        self.num_graph_features = num_graph_features
+        self.params["num_graph_features"] = num_graph_features
+
+
+class _BaseBaseModel:
+    # todo: after renaming the experimental base class _BaseModel to BaseModel,
+    #       rename this class to _BaseModel
+    """
+    The base class for class BaseModel,
+    designed to implement some basic functionality of BaseModel.
+    --  Designed by ZiXin Sun
+    """
+
+    @classmethod
+    def __formulate_device(
+        cls, device: _typing.Union[str, torch.device] = ...
+    ) -> torch.device:
+        if type(device) == torch.device or (
+            type(device) == str and device.strip().lower() != "auto"
+        ):
+            return torch.device(device)
+        elif torch.cuda.is_available() and torch.cuda.device_count() > 0:
+            return torch.device("cuda")
+        else:
+            return torch.device("cpu")
+
+    @property
+    def device(self) -> torch.device:
+        return self.__device
+
+    @device.setter
+    def device(self, __device: _typing.Union[str, torch.device, None]):
+        self.__device: torch.device = self.__formulate_device(__device)
+
+    @property
+    def model(self) -> _typing.Optional[torch.nn.Module]:
+        if self._model is None:
+            base_approach_logger.debug(
+                "property of model NOT initialized before accessing"
+            )
+        return self._model
+
+    @model.setter
+    def model(self, _model: torch.nn.Module) -> None:
+        if not isinstance(_model, torch.nn.Module):
+            raise TypeError(
+                "the property of model MUST be an instance of " "torch.nn.Module"
+            )
+        self._model = _model
+
+    def _initialize(self):
+        raise NotImplementedError
+
+    def initialize(self) -> bool:
+        """
+        Initialize the model in case that the model has NOT been initialized
+        :return: whether self._initialize() method called
+        """
+        if not self.__is_initialized:
+            self._initialize()
+            self.__is_initialized = True
+            return True
+        return False
+
+    # def to(self, *args, **kwargs):
+    #     """
+    #     Due to the signature of to() method in class BaseApproach
+    #     is inconsistent with the signature of the method
+    #     in the base class torch.nn.Module,
+    #     this intermediate overridden method is necessary to
+    #     walk around (bypass) the inspection for
+    #     signature of overriding method.
+    #     :param args: positional arguments list
+    #     :param kwargs: keyword arguments dict
+    #     :return: self
+    #     """
+    #     return super(_BaseBaseModel, self).to(*args, **kwargs)
+
+    def forward(self, *args, **kwargs):
+        if self.model is not None and isinstance(self.model, torch.nn.Module):
+            return self.model(*args, **kwargs)
+        else:
+            raise NotImplementedError
+
+    def __init__(
+        self,
+        model: _typing.Optional[torch.nn.Module] = None,
+        initialize: bool = False,
+        device: _typing.Union[str, torch.device] = ...,
+    ):
+        if type(initialize) != bool:
+            raise TypeError
+        super(_BaseBaseModel, self).__init__()
+        self.__device: torch.device = self.__formulate_device(device)
+        self._model: _typing.Optional[torch.nn.Module] = model
+        self.__is_initialized: bool = False
+        if initialize:
+            self.initialize()
+
+
+class _BaseModel(_BaseBaseModel, BaseModel):
+    """
+    The upcoming root base class for Model, i.e. BaseModel
+    --  Designed by ZiXin Sun
+    """
+
+    # todo: Deprecate and remove the legacy class "BaseModel",
+    #       then rename this class to "BaseModel",
+    #       correspondingly, this class will no longer extend
+    #       the legacy class "BaseModel" after the removal.
+    def _initialize(self):
+        raise NotImplementedError
+
+    def to(self, device: torch.device):
+        self.device = device
+        if self.model is not None and isinstance(self.model, torch.nn.Module):
+            self.model.to(self.device)
+        return super().to(device)
+
+    @property
+    def space(self) -> _typing.Sequence[_typing.Dict[str, _typing.Any]]:
+        # todo: deprecate and remove in future major version
+        return self.__hyper_parameter_space
+
+    @property
+    def hyper_parameter_space(self):
+        return self.__hyper_parameter_space
+
+    @hyper_parameter_space.setter
+    def hyper_parameter_space(
+        self, space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
+    ):
+        self.__hyper_parameter_space = space
+
+    @property
+    def hyper_parameter(self) -> _typing.Dict[str, _typing.Any]:
+        return self.__hyper_parameter
+
+    @hyper_parameter.setter
+    def hyper_parameter(self, _hyper_parameter: _typing.Dict[str, _typing.Any]):
+        if not isinstance(_hyper_parameter, dict):
+            raise TypeError
+        self.__hyper_parameter = _hyper_parameter
+
+    def get_hyper_parameter(self) -> _typing.Dict[str, _typing.Any]:
+        """
+        todo: consider deprecating this trivial getter method in the future
+        :return: copied hyper parameter
+        """
+        return copy.deepcopy(self.__hyper_parameter)
+
+    def __init__(
+        self,
+        model: _typing.Optional[torch.nn.Module] = None,
+        initialize: bool = False,
+        hyper_parameter_space: _typing.Sequence[_typing.Any] = ...,
+        hyper_parameter: _typing.Dict[str, _typing.Any] = ...,
+        device: _typing.Union[str, torch.device] = ...,
+    ):
+        if type(initialize) != bool:
+            raise TypeError
+        super(_BaseModel, self).__init__(model, initialize, device)
+        if hyper_parameter_space != Ellipsis and isinstance(
+            hyper_parameter_space, _typing.Sequence
+        ):
+            self.__hyper_parameter_space: _typing.Sequence[
+                _typing.Dict[str, _typing.Any]
+            ] = hyper_parameter_space
+        else:
+            self.__hyper_parameter_space: _typing.Sequence[
+                _typing.Dict[str, _typing.Any]
+            ] = []
+        if hyper_parameter != Ellipsis and isinstance(hyper_parameter, dict):
+            self.__hyper_parameter: _typing.Dict[str, _typing.Any] = hyper_parameter
+        else:
+            self.__hyper_parameter: _typing.Dict[str, _typing.Any] = {}
+
+    def from_hyper_parameter(self, hyper_parameter: _typing.Dict[str, _typing.Any]):
+        raise NotImplementedError
+
+
+class ClassificationModel(_BaseModel):
+    def _initialize(self):
+        raise NotImplementedError
+
+    def from_hyper_parameter(
+        self, hyper_parameter: _typing.Dict[str, _typing.Any]
+    ) -> "ClassificationModel":
+        new_model: ClassificationModel = self.__class__(
+            num_features=self.num_features,
+            num_classes=self.num_classes,
+            device=self.device,
+            init=False,
+        )
+        _hyper_parameter = self.hyper_parameter
+        _hyper_parameter.update(hyper_parameter)
+        new_model.hyper_parameter = _hyper_parameter
+        new_model.initialize()
+        return new_model
+
+    def __init__(
+        self,
+        num_features: int = ...,
+        num_classes: int = ...,
+        num_graph_features: int = ...,
+        device: _typing.Union[str, torch.device] = ...,
+        hyper_parameter_space: _typing.Sequence[_typing.Any] = ...,
+        hyper_parameter: _typing.Dict[str, _typing.Any] = ...,
+        init: bool = False,
+        **kwargs
+    ):
+        if "initialize" in kwargs:
+            del kwargs["initialize"]
+        super(ClassificationModel, self).__init__(
+            initialize=init,
+            hyper_parameter_space=hyper_parameter_space,
+            hyper_parameter=hyper_parameter,
+            device=device,
+            **kwargs
+        )
+        if num_classes != Ellipsis and type(num_classes) == int:
+            self.__num_classes: int = num_classes if num_classes > 0 else 0
+        else:
+            self.__num_classes: int = 0
+        if num_features != Ellipsis and type(num_features) == int:
+            self.__num_features: int = num_features if num_features > 0 else 0
+        else:
+            self.__num_features: int = 0
+        if num_graph_features != Ellipsis and type(num_graph_features) == int:
+            if num_graph_features > 0:
+                self.__num_graph_features: int = num_graph_features
+            else:
+                self.__num_graph_features: int = 0
+        else:
+            self.__num_graph_features: int = 0
+
+    def __repr__(self) -> str:
+        import yaml
+
+        return yaml.dump(self.hyper_parameter)
+
+    @property
+    def num_classes(self) -> int:
+        return self.__num_classes
+
+    @num_classes.setter
+    def num_classes(self, __num_classes: int):
+        if type(__num_classes) != int:
+            raise TypeError
+        if not __num_classes > 0:
+            raise ValueError
+        self.__num_classes = __num_classes if __num_classes > 0 else 0
+
+    @property
+    def num_features(self) -> int:
+        return self.__num_features
+
+    @num_features.setter
+    def num_features(self, __num_features: int):
+        if type(__num_features) != int:
+            raise TypeError
+        if not __num_features > 0:
+            raise ValueError
+        self.__num_features = __num_features if __num_features > 0 else 0
+
+    def get_num_classes(self) -> int:
+        # todo: consider replacing with property with getter and setter
+        return self.__num_classes
+
+    def set_num_classes(self, num_classes: int) -> None:
+        # todo: consider replacing with property with getter and setter
+        if type(num_classes) != int:
+            raise TypeError
+        self.__num_classes = num_classes if num_classes > 0 else 0
+
+    def get_num_features(self) -> int:
+        # todo: consider replacing with property with getter and setter
+        return self.__num_features
+
+    def set_num_features(self, num_features: int):
+        # todo: consider replacing with property with getter and setter
+        if type(num_features) != int:
+            raise TypeError
+        self.__num_features = num_features if num_features > 0 else 0
+
+    def set_num_graph_features(self, num_graph_features: int):
+        # todo: consider replacing with property with getter and setter
+        if type(num_graph_features) != int:
+            raise TypeError
+        else:
+            if num_graph_features > 0:
+                self.__num_graph_features = num_graph_features
+            else:
+                self.__num_graph_features = 0
+
+
+class _ClassificationModel(torch.nn.Module):
+    def __init__(self):
+        super(_ClassificationModel, self).__init__()
+
+    def cls_encode(self, data) -> torch.Tensor:
+        raise NotImplementedError
+
+    def cls_decode(self, x: torch.Tensor) -> torch.Tensor:
+        raise NotImplementedError
+
+    def cls_forward(self, data) -> torch.Tensor:
+        return self.cls_decode(self.cls_encode(data))
+
+
+class ClassificationSupportedSequentialModel(_ClassificationModel):
+    def __init__(self):
+        super(ClassificationSupportedSequentialModel, self).__init__()
+
+    @property
+    def sequential_encoding_layers(self) -> torch.nn.ModuleList:
+        raise NotImplementedError
+
+    def cls_encode(self, data) -> torch.Tensor:
+        raise NotImplementedError
+
+    def cls_decode(self, x: torch.Tensor) -> torch.Tensor:
+        raise NotImplementedError
diff --git a/autogl/module/model/dgl/dataloader_gin.py b/autogl/module/model/dgl/dataloader_gin.py
new file mode 100644
index 0000000..0721b3a
--- /dev/null
+++ b/autogl/module/model/dgl/dataloader_gin.py
@@ -0,0 +1,85 @@
+"""
+PyTorch compatible dataloader
+"""
+
+
+import math
+import numpy as np
+import torch
+from torch.utils.data.sampler import SubsetRandomSampler
+from sklearn.model_selection import StratifiedKFold
+import dgl
+from dgl.dataloading import GraphDataLoader
+
+
+class GINDataLoader():
+    def __init__(self,
+                 dataset,
+                 batch_size,
+                 device,
+                 collate_fn=None,
+                 seed=0,
+                 shuffle=True,
+                 split_name='fold10',
+                 fold_idx=0,
+                 split_ratio=0.7):
+
+        self.shuffle = shuffle
+        self.seed = seed
+        self.kwargs = {'pin_memory': True} if 'cuda' in device.type else {}
+
+        labels = [l for _, l in dataset]
+
+        if split_name == 'fold10':
+            train_idx, valid_idx = self._split_fold10(
+                labels, fold_idx, seed, shuffle)
+        elif split_name == 'rand':
+            train_idx, valid_idx = self._split_rand(
+                labels, split_ratio, seed, shuffle)
+        else:
+            raise NotImplementedError()
+
+        train_sampler = SubsetRandomSampler(train_idx)
+        valid_sampler = SubsetRandomSampler(valid_idx)
+
+        self.train_loader = GraphDataLoader(
+            dataset, sampler=train_sampler,
+            batch_size=batch_size, collate_fn=collate_fn, **self.kwargs)
+        self.valid_loader = GraphDataLoader(
+            dataset, sampler=valid_sampler,
+            batch_size=batch_size, collate_fn=collate_fn, **self.kwargs)
+
+    def train_valid_loader(self):
+        return self.train_loader, self.valid_loader
+
+    def _split_fold10(self, labels, fold_idx=0, seed=0, shuffle=True):
+        ''' 10 flod '''
+        assert 0 <= fold_idx and fold_idx < 10, print(
+            "fold_idx must be from 0 to 9.")
+
+        skf = StratifiedKFold(n_splits=10, shuffle=shuffle, random_state=seed)
+        idx_list = []
+        for idx in skf.split(np.zeros(len(labels)), labels):    # split(x, y)
+            idx_list.append(idx)
+        train_idx, valid_idx = idx_list[fold_idx]
+
+        print(
+            "train_set : test_set = %d : %d",
+            len(train_idx), len(valid_idx))
+
+        return train_idx, valid_idx
+
+    def _split_rand(self, labels, split_ratio=0.7, seed=0, shuffle=True):
+        num_entries = len(labels)
+        indices = list(range(num_entries))
+        np.random.seed(seed)
+        np.random.shuffle(indices)
+        split = int(math.floor(split_ratio * num_entries))
+        train_idx, valid_idx = indices[:split], indices[split:]
+
+        print(
+            "train_set : test_set = %d : %d",
+            len(train_idx), len(valid_idx))
+
+        return train_idx, valid_idx
+
diff --git a/autogl/module/model/dgl/gat.py b/autogl/module/model/dgl/gat.py
new file mode 100644
index 0000000..d153685
--- /dev/null
+++ b/autogl/module/model/dgl/gat.py
@@ -0,0 +1,223 @@
+import torch
+import torch.nn.functional as F
+from torch_geometric.nn import GATConv
+from . import register_model
+from .base import BaseModel, activate_func
+from ....utils import get_logger
+
+LOGGER = get_logger("GATModel")
+
+
+def set_default(args, d):
+    for k, v in d.items():
+        if k not in args:
+            args[k] = v
+    return args
+
+
+class GAT(torch.nn.Module):
+    def __init__(self, args):
+        super(GAT, self).__init__()
+        self.args = args
+        self.num_layer = int(self.args["num_layers"])
+
+        missing_keys = list(
+            set(
+                [
+                    "features_num",
+                    "num_class",
+                    "num_layers",
+                    "hidden",
+                    "heads",
+                    "dropout",
+                    "act",
+                ]
+            )
+            - set(self.args.keys())
+        )
+        if len(missing_keys) > 0:
+            raise Exception("Missing keys: %s." % ",".join(missing_keys))
+
+        if not self.num_layer == len(self.args["hidden"]) + 1:
+            LOGGER.warn("Warning: layer size does not match the length of hidden units")
+        self.convs = torch.nn.ModuleList()
+        self.convs.append(
+            GATConv(
+                self.args["features_num"],
+                self.args["hidden"][0],
+                heads=self.args["heads"],
+                dropout=self.args["dropout"],
+            )
+        )
+        last_dim = self.args["hidden"][0] * self.args["heads"]
+        for i in range(self.num_layer - 2):
+            self.convs.append(
+                GATConv(
+                    last_dim,
+                    self.args["hidden"][i + 1],
+                    heads=self.args["heads"],
+                    dropout=self.args["dropout"],
+                )
+            )
+            last_dim = self.args["hidden"][i + 1] * self.args["heads"]
+        self.convs.append(
+            GATConv(
+                last_dim,
+                self.args["num_class"],
+                heads=1,
+                concat=False,
+                dropout=self.args["dropout"],
+            )
+        )
+
+    def forward(self, data):
+        try:
+            x = data.x
+        except:
+            print("no x")
+            pass
+        try:
+            edge_index = data.edge_index
+        except:
+            print("no index")
+            pass
+        try:
+            edge_weight = data.edge_weight
+        except:
+            edge_weight = None
+            pass
+
+        for i in range(self.num_layer):
+            x = F.dropout(x, p=self.args["dropout"], training=self.training)
+            x = self.convs[i](x, edge_index, edge_weight)
+            if i != self.num_layer - 1:
+                x = activate_func(x, self.args["act"])
+
+        return F.log_softmax(x, dim=1)
+
+    def lp_encode(self, data):
+        x = data.x
+        for i in range(self.num_layer - 1):
+            x = self.convs[i](x, data.train_pos_edge_index)
+            if i != self.num_layer - 2:
+                x = activate_func(x, self.args["act"])
+                # x = F.dropout(x, p=self.args["dropout"], training=self.training)
+        return x
+
+    def lp_decode(self, z, pos_edge_index, neg_edge_index):
+        edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
+        logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
+        return logits
+
+    def lp_decode_all(self, z):
+        prob_adj = z @ z.t()
+        return (prob_adj > 0).nonzero(as_tuple=False).t()
+
+
+@register_model("gat")
+class AutoGAT(BaseModel):
+    r"""
+    AutoGAT. The model used in this automodel is GAT, i.e., the graph attentional network from the `"Graph Attention Networks"
+    <https://arxiv.org/abs/1710.10903>`_ paper. The layer is
+
+    .. math::
+        \mathbf{x}^{\prime}_i = \alpha_{i,i}\mathbf{\Theta}\mathbf{x}_{i} +
+        \sum_{j \in \mathcal{N}(i)} \alpha_{i,j}\mathbf{\Theta}\mathbf{x}_{j}
+
+    where the attention coefficients :math:`\alpha_{i,j}` are computed as
+
+    .. math::
+        \alpha_{i,j} =
+        \frac{
+        \exp\left(\mathrm{LeakyReLU}\left(\mathbf{a}^{\top}
+        [\mathbf{\Theta}\mathbf{x}_i \, \Vert \, \mathbf{\Theta}\mathbf{x}_j]
+        \right)\right)}
+        {\sum_{k \in \mathcal{N}(i) \cup \{ i \}}
+        \exp\left(\mathrm{LeakyReLU}\left(\mathbf{a}^{\top}
+        [\mathbf{\Theta}\mathbf{x}_i \, \Vert \, \mathbf{\Theta}\mathbf{x}_k]
+        \right)\right)}.
+
+    Parameters
+    ----------
+    num_features: `int`.
+        The dimension of features.
+
+    num_classes: `int`.
+        The number of classes.
+
+    device: `torch.device` or `str`
+        The device where model will be running on.
+
+    init: `bool`.
+        If True(False), the model will (not) be initialized.
+
+    args: Other parameters.
+    """
+
+    def __init__(
+        self, num_features=None, num_classes=None, device=None, init=False, **args
+    ):
+        super(AutoGAT, self).__init__()
+        self.num_features = num_features if num_features is not None else 0
+        self.num_classes = int(num_classes) if num_classes is not None else 0
+        self.device = device if device is not None else "cpu"
+        self.init = True
+
+        self.params = {
+            "features_num": self.num_features,
+            "num_class": self.num_classes,
+        }
+        self.space = [
+            {
+                "parameterName": "num_layers",
+                "type": "DISCRETE",
+                "feasiblePoints": "2,3,4",
+            },
+            {
+                "parameterName": "hidden",
+                "type": "NUMERICAL_LIST",
+                "numericalType": "INTEGER",
+                "length": 3,
+                "minValue": [8, 8, 8],
+                "maxValue": [64, 64, 64],
+                "scalingType": "LOG",
+                "cutPara": ("num_layers",),
+                "cutFunc": lambda x: x[0] - 1,
+            },
+            {
+                "parameterName": "dropout",
+                "type": "DOUBLE",
+                "maxValue": 0.8,
+                "minValue": 0.2,
+                "scalingType": "LINEAR",
+            },
+            {
+                "parameterName": "heads",
+                "type": "DISCRETE",
+                "feasiblePoints": "2,4,8,16",
+            },
+            {
+                "parameterName": "act",
+                "type": "CATEGORICAL",
+                "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
+            },
+        ]
+
+        self.hyperparams = {
+            "num_layers": 2,
+            "hidden": [32],
+            "heads": 4,
+            "dropout": 0.2,
+            "act": "leaky_relu",
+        }
+
+        self.initialized = False
+        if init is True:
+            self.initialize()
+
+    def initialize(self):
+        # """Initialize model."""
+        if self.initialized:
+            return
+        self.initialized = True
+        self.model = GAT({**self.params, **self.hyperparams}).to(self.device)
diff --git a/autogl/module/model/dgl/gcn.py b/autogl/module/model/dgl/gcn.py
new file mode 100644
index 0000000..30bc1a7
--- /dev/null
+++ b/autogl/module/model/dgl/gcn.py
@@ -0,0 +1,408 @@
+import torch
+import torch.nn.functional
+import typing as _typing
+
+from torch_geometric.nn.conv import GCNConv
+import autogl.data
+from . import register_model
+from .base import BaseModel, activate_func, ClassificationSupportedSequentialModel
+from ....utils import get_logger
+
+LOGGER = get_logger("GCNModel")
+
+
+class GCN(ClassificationSupportedSequentialModel):
+    class _GCNLayer(torch.nn.Module):
+        def __init__(
+            self,
+            input_channels: int,
+            output_channels: int,
+            add_self_loops: bool = True,
+            normalize: bool = True,
+            activation_name: _typing.Optional[str] = ...,
+            dropout_probability: _typing.Optional[float] = ...,
+        ):
+            super().__init__()
+            self._convolution: GCNConv = GCNConv(
+                input_channels,
+                output_channels,
+                add_self_loops=bool(add_self_loops),
+                normalize=bool(normalize),
+            )
+            if (
+                activation_name is not Ellipsis
+                and activation_name is not None
+                and type(activation_name) == str
+            ):
+                self._activation_name: _typing.Optional[str] = activation_name
+            else:
+                self._activation_name: _typing.Optional[str] = None
+            if (
+                dropout_probability is not Ellipsis
+                and dropout_probability is not None
+                and type(dropout_probability) == float
+            ):
+                if dropout_probability < 0:
+                    dropout_probability = 0
+                if dropout_probability > 1:
+                    dropout_probability = 1
+                self._dropout: _typing.Optional[torch.nn.Dropout] = torch.nn.Dropout(
+                    dropout_probability
+                )
+            else:
+                self._dropout: _typing.Optional[torch.nn.Dropout] = None
+
+        def forward(self, data, enable_activation: bool = True) -> torch.Tensor:
+            x: torch.Tensor = getattr(data, "x")
+            edge_index: torch.LongTensor = getattr(data, "edge_index")
+            edge_weight: _typing.Optional[torch.Tensor] = getattr(
+                data, "edge_weight", None
+            )
+            """ Validate the arguments """
+            if not type(x) == type(edge_index) == torch.Tensor:
+                raise TypeError
+            if edge_weight is not None and (
+                type(edge_weight) != torch.Tensor
+                or edge_index.size() != (2, edge_weight.size(0))
+            ):
+                edge_weight: _typing.Optional[torch.Tensor] = None
+
+            x: torch.Tensor = self._convolution.forward(x, edge_index, edge_weight)
+            if self._activation_name is not None and enable_activation:
+                x: torch.Tensor = activate_func(x, self._activation_name)
+            if self._dropout is not None:
+                x: torch.Tensor = self._dropout.forward(x)
+            return x
+
+    def __init__(
+        self,
+        num_features: int,
+        num_classes: int,
+        hidden_features: _typing.Sequence[int],
+        activation_name: str,
+        dropout: _typing.Union[
+            _typing.Optional[float], _typing.Sequence[_typing.Optional[float]]
+        ] = None,
+        add_self_loops: bool = True,
+        normalize: bool = True,
+    ):
+        if isinstance(dropout, _typing.Sequence):
+            if len(dropout) != len(hidden_features) + 1:
+                raise TypeError(
+                    "When the dropout argument is a sequence, "
+                    "The sequence length must equal to the number of layers to construct."
+                )
+            for _dropout in dropout:
+                if _dropout is not None and type(_dropout) != float:
+                    raise TypeError(
+                        "When the dropout argument is a sequence, "
+                        "every item in the sequence must be float or None"
+                    )
+            dropout_list: _typing.Sequence[_typing.Optional[float]] = dropout
+        elif type(dropout) == float:
+            if dropout < 0:
+                dropout = 0
+            if dropout > 1:
+                dropout = 1
+            dropout_list: _typing.Sequence[_typing.Optional[float]] = [
+                dropout for _ in range(len(hidden_features))
+            ] + [None]
+        elif dropout in (None, Ellipsis, ...):
+            dropout_list: _typing.Sequence[_typing.Optional[float]] = [
+                None for _ in range(len(hidden_features) + 1)
+            ]
+        else:
+            raise TypeError(
+                "The provided dropout argument must be a float number or None or "
+                "a sequence in which each item is either a float Number or None."
+            )
+        super().__init__()
+        if len(hidden_features) == 0:
+            self.__sequential_encoding_layers: torch.nn.ModuleList = (
+                torch.nn.ModuleList(
+                    (
+                        self._GCNLayer(
+                            num_features,
+                            num_classes,
+                            add_self_loops,
+                            normalize,
+                            dropout_probability=dropout_list[0],
+                        ),
+                    )
+                )
+            )
+        else:
+            self.__sequential_encoding_layers: torch.nn.ModuleList = (
+                torch.nn.ModuleList()
+            )
+            self.__sequential_encoding_layers.append(
+                self._GCNLayer(
+                    num_features,
+                    hidden_features[0],
+                    add_self_loops,
+                    normalize,
+                    activation_name,
+                    dropout_list[0],
+                )
+            )
+            for hidden_feature_index in range(len(hidden_features)):
+                if hidden_feature_index + 1 < len(hidden_features):
+                    self.__sequential_encoding_layers.append(
+                        self._GCNLayer(
+                            hidden_features[hidden_feature_index],
+                            hidden_features[hidden_feature_index + 1],
+                            add_self_loops,
+                            normalize,
+                            activation_name,
+                            dropout_list[hidden_feature_index + 1],
+                        )
+                    )
+                else:
+                    self.__sequential_encoding_layers.append(
+                        self._GCNLayer(
+                            hidden_features[hidden_feature_index],
+                            num_classes,
+                            add_self_loops,
+                            normalize,
+                            dropout_list[-1],
+                        )
+                    )
+
+    @property
+    def sequential_encoding_layers(self) -> torch.nn.ModuleList:
+        return self.__sequential_encoding_layers
+
+    def __extract_edge_indexes_and_weights(
+        self, data
+    ) -> _typing.Union[
+        _typing.Sequence[
+            _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]
+        ],
+        _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]],
+    ]:
+        def __compose_edge_index_and_weight(
+            _edge_index: torch.LongTensor,
+            _edge_weight: _typing.Optional[torch.Tensor] = None,
+        ) -> _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]:
+            if type(_edge_index) != torch.Tensor or _edge_index.dtype != torch.int64:
+                raise TypeError
+            if _edge_weight is not None and (
+                type(_edge_weight) != torch.Tensor
+                or _edge_index.size() != (2, _edge_weight.size(0))
+            ):
+                _edge_weight: _typing.Optional[torch.Tensor] = None
+            return _edge_index, _edge_weight
+
+        if not (
+            hasattr(data, "edge_indexes")
+            and isinstance(getattr(data, "edge_indexes"), _typing.Sequence)
+            and len(getattr(data, "edge_indexes"))
+            == len(self.__sequential_encoding_layers)
+        ):
+            return __compose_edge_index_and_weight(
+                getattr(data, "edge_index"), getattr(data, "edge_weight", None)
+            )
+        for __edge_index in getattr(data, "edge_indexes"):
+            if type(__edge_index) != torch.Tensor or __edge_index.dtype != torch.int64:
+                return __compose_edge_index_and_weight(
+                    getattr(data, "edge_index"), getattr(data, "edge_weight", None)
+                )
+
+        if (
+            hasattr(data, "edge_weights")
+            and isinstance(getattr(data, "edge_weights"), _typing.Sequence)
+            and len(getattr(data, "edge_weights"))
+            == len(self.__sequential_encoding_layers)
+        ):
+            return [
+                __compose_edge_index_and_weight(_edge_index, _edge_weight)
+                for _edge_index, _edge_weight in zip(
+                    getattr(data, "edge_indexes"), getattr(data, "edge_weights")
+                )
+            ]
+        else:
+            return [
+                __compose_edge_index_and_weight(__edge_index)
+                for __edge_index in getattr(data, "edge_indexes")
+            ]
+
+    def cls_encode(self, data) -> torch.Tensor:
+        edge_indexes_and_weights: _typing.Union[
+            _typing.Sequence[
+                _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]
+            ],
+            _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]],
+        ] = self.__extract_edge_indexes_and_weights(data)
+
+        if (not isinstance(edge_indexes_and_weights, tuple)) and isinstance(
+            edge_indexes_and_weights[0], tuple
+        ):
+            """ edge_indexes_and_weights is sequence of (edge_index, edge_weight) """
+            assert len(edge_indexes_and_weights) == len(
+                self.__sequential_encoding_layers
+            )
+            x: torch.Tensor = getattr(data, "x")
+            for _edge_index_and_weight, gcn in zip(
+                edge_indexes_and_weights, self.__sequential_encoding_layers
+            ):
+                _temp_data = autogl.data.Data(x=x, edge_index=_edge_index_and_weight[0])
+                _temp_data.edge_weight = _edge_index_and_weight[1]
+                x = gcn(_temp_data)
+            return x
+        else:
+            """ edge_indexes_and_weights is (edge_index, edge_weight) """
+            x = getattr(data, "x")
+            for gcn in self.__sequential_encoding_layers:
+                _temp_data = autogl.data.Data(
+                    x=x, edge_index=edge_indexes_and_weights[0]
+                )
+                _temp_data.edge_weight = edge_indexes_and_weights[1]
+                x = gcn(_temp_data)
+            return x
+
+    def cls_decode(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.nn.functional.log_softmax(x, dim=1)
+
+    def lp_encode(self, data):
+        x: torch.Tensor = getattr(data, "x")
+        for i in range(len(self.__sequential_encoding_layers) - 2):
+            x = self.__sequential_encoding_layers[i](
+                autogl.data.Data(x, getattr(data, "edge_index"))
+            )
+        x = self.__sequential_encoding_layers[-2](
+            autogl.data.Data(x, getattr(data, "edge_index")), enable_activation=False
+        )
+        return x
+
+    def lp_decode(self, z, pos_edge_index, neg_edge_index):
+        edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
+        logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
+        return logits
+
+    def lp_decode_all(self, z):
+        prob_adj = z @ z.t()
+        return (prob_adj > 0).nonzero(as_tuple=False).t()
+
+
+@register_model("gcn")
+class AutoGCN(BaseModel):
+    r"""
+    AutoGCN.
+    The model used in this automodel is GCN, i.e., the graph convolutional network from the
+    `"Semi-supervised Classification with Graph Convolutional
+    Networks" <https://arxiv.org/abs/1609.02907>`_ paper. The layer is
+
+    .. math::
+
+        \mathbf{X}^{\prime} = \mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}}
+        \mathbf{\hat{D}}^{-1/2} \mathbf{X} \mathbf{\Theta},
+
+    where :math:`\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}` denotes the
+    adjacency matrix with inserted self-loops and
+    :math:`\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}` its diagonal degree matrix.
+
+    Parameters
+    ----------
+    num_features: ``int``
+        The dimension of features.
+
+    num_classes: ``int``
+        The number of classes.
+
+    device: ``torch.device`` or ``str``
+        The device where model will be running on.
+
+    init: `bool`.
+        If True(False), the model will (not) be initialized.
+    """
+
+    def __init__(
+        self,
+        num_features: int = ...,
+        num_classes: int = ...,
+        device: _typing.Union[str, torch.device] = ...,
+        init: bool = False,
+        **kwargs
+    ) -> None:
+        super().__init__()
+        self.num_features = num_features
+        self.num_classes = num_classes
+        self.device = device
+
+        self.params = {
+            "features_num": self.num_features,
+            "num_class": self.num_classes,
+        }
+        self.space = [
+            {
+                "parameterName": "add_self_loops",
+                "type": "CATEGORICAL",
+                "feasiblePoints": [1],
+            },
+            {
+                "parameterName": "normalize",
+                "type": "CATEGORICAL",
+                "feasiblePoints": [1],
+            },
+            {
+                "parameterName": "num_layers",
+                "type": "DISCRETE",
+                "feasiblePoints": "2,3,4",
+            },
+            {
+                "parameterName": "hidden",
+                "type": "NUMERICAL_LIST",
+                "numericalType": "INTEGER",
+                "length": 3,
+                "minValue": [8, 8, 8],
+                "maxValue": [128, 128, 128],
+                "scalingType": "LOG",
+                "cutPara": ("num_layers",),
+                "cutFunc": lambda x: x[0] - 1,
+            },
+            {
+                "parameterName": "dropout",
+                "type": "DOUBLE",
+                "maxValue": 0.8,
+                "minValue": 0.2,
+                "scalingType": "LINEAR",
+            },
+            {
+                "parameterName": "act",
+                "type": "CATEGORICAL",
+                "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
+            },
+        ]
+
+        # initial point of hp search
+        # self.hyperparams = {
+        #     "num_layers": 2,
+        #     "hidden": [16],
+        #     "dropout": 0.2,
+        #     "act": "leaky_relu",
+        # }
+
+        self.hyperparams = {
+            "num_layers": 3,
+            "hidden": [128, 64],
+            "dropout": 0,
+            "act": "relu",
+        }
+
+        self.initialized = False
+        if init is True:
+            self.initialize()
+
+    def initialize(self):
+        if self.initialized:
+            return
+        self.initialized = True
+        self.model = GCN(
+            self.num_features,
+            self.num_classes,
+            self.hyperparams.get("hidden"),
+            self.hyperparams.get("act"),
+            self.hyperparams.get("dropout", None),
+            bool(self.hyperparams.get("add_self_loops", True)),
+            bool(self.hyperparams.get("normalize", True)),
+        ).to(self.device)
diff --git a/autogl/module/model/dgl/gin.py b/autogl/module/model/dgl/gin.py
new file mode 100644
index 0000000..52a495a
--- /dev/null
+++ b/autogl/module/model/dgl/gin.py
@@ -0,0 +1,232 @@
+import torch
+import torch.nn.functional as F
+from torch.nn import Linear, ReLU, Sequential, LeakyReLU, Tanh, ELU
+from torch_geometric.nn import GINConv, global_add_pool
+from torch.nn import BatchNorm1d
+from . import register_model
+from .base import BaseModel, activate_func
+from copy import deepcopy
+from ....utils import get_logger
+
+LOGGER = get_logger("GINModel")
+
+
+def set_default(args, d):
+    for k, v in d.items():
+        if k not in args:
+            args[k] = v
+    return args
+
+
+class GIN(torch.nn.Module):
+    def __init__(self, args):
+        super(GIN, self).__init__()
+        self.args = args
+        self.num_layer = int(self.args["num_layers"])
+        assert self.num_layer > 2, "Number of layers in GIN should not less than 3"
+
+        missing_keys = list(
+            set(
+                [
+                    "features_num",
+                    "num_class",
+                    "num_graph_features",
+                    "num_layers",
+                    "hidden",
+                    "dropout",
+                    "act",
+                    "mlp_layers",
+                    "eps",
+                ]
+            )
+            - set(self.args.keys())
+        )
+        if len(missing_keys) > 0:
+            raise Exception("Missing keys: %s." % ",".join(missing_keys))
+        if not self.num_layer == len(self.args["hidden"]) + 1:
+            LOGGER.warn("Warning: layer size does not match the length of hidden units")
+        self.num_graph_features = self.args["num_graph_features"]
+
+        if self.args["act"] == "leaky_relu":
+            act = LeakyReLU()
+        elif self.args["act"] == "relu":
+            act = ReLU()
+        elif self.args["act"] == "elu":
+            act = ELU()
+        elif self.args["act"] == "tanh":
+            act = Tanh()
+        else:
+            act = ReLU()
+
+        train_eps = True if self.args["eps"] == "True" else False
+
+        self.convs = torch.nn.ModuleList()
+        self.bns = torch.nn.ModuleList()
+
+        nn = [Linear(self.args["features_num"], self.args["hidden"][0])]
+        for _ in range(self.args["mlp_layers"] - 1):
+            nn.append(act)
+            nn.append(Linear(self.args["hidden"][0], self.args["hidden"][0]))
+        # nn.append(BatchNorm1d(self.args['hidden'][0]))
+        self.convs.append(GINConv(Sequential(*nn), train_eps=train_eps))
+        self.bns.append(BatchNorm1d(self.args["hidden"][0]))
+
+        for i in range(self.num_layer - 3):
+            nn = [Linear(self.args["hidden"][i], self.args["hidden"][i + 1])]
+            for _ in range(self.args["mlp_layers"] - 1):
+                nn.append(act)
+                nn.append(
+                    Linear(self.args["hidden"][i + 1], self.args["hidden"][i + 1])
+                )
+            # nn.append(BatchNorm1d(self.args['hidden'][i+1]))
+            self.convs.append(GINConv(Sequential(*nn), train_eps=train_eps))
+            self.bns.append(BatchNorm1d(self.args["hidden"][i + 1]))
+
+        self.fc1 = Linear(
+            self.args["hidden"][self.num_layer - 3] + self.num_graph_features,
+            self.args["hidden"][self.num_layer - 2],
+        )
+        self.fc2 = Linear(
+            self.args["hidden"][self.num_layer - 2], self.args["num_class"]
+        )
+
+    def forward(self, data):
+        x, edge_index, batch = data.x, data.edge_index, data.batch
+
+        if self.num_graph_features > 0:
+            graph_feature = data.gf
+
+        for i in range(self.num_layer - 2):
+            x = self.convs[i](x, edge_index)
+            x = activate_func(x, self.args["act"])
+            x = self.bns[i](x)
+
+        x = global_add_pool(x, batch)
+        if self.num_graph_features > 0:
+            x = torch.cat([x, graph_feature], dim=-1)
+        x = self.fc1(x)
+        x = activate_func(x, self.args["act"])
+        x = F.dropout(x, p=self.args["dropout"], training=self.training)
+
+        x = self.fc2(x)
+
+        return F.log_softmax(x, dim=1)
+
+
+@register_model("gin")
+class AutoGIN(BaseModel):
+    r"""
+    AutoGIN. The model used in this automodel is GIN, i.e., the graph isomorphism network from the `"How Powerful are
+    Graph Neural Networks?" <https://arxiv.org/abs/1810.00826>`_ paper. The layer is
+
+    .. math::
+        \mathbf{x}^{\prime}_i = h_{\mathbf{\Theta}} \left( (1 + \epsilon) \cdot
+        \mathbf{x}_i + \sum_{j \in \mathcal{N}(i)} \mathbf{x}_j \right)
+
+    or
+
+    .. math::
+        \mathbf{X}^{\prime} = h_{\mathbf{\Theta}} \left( \left( \mathbf{A} +
+        (1 + \epsilon) \cdot \mathbf{I} \right) \cdot \mathbf{X} \right),
+
+    here :math:`h_{\mathbf{\Theta}}` denotes a neural network, *.i.e.* an MLP.
+
+    Parameters
+    ----------
+    num_features: `int`.
+        The dimension of features.
+
+    num_classes: `int`.
+        The number of classes.
+
+    device: `torch.device` or `str`
+        The device where model will be running on.
+
+    init: `bool`.
+        If True(False), the model will (not) be initialized.
+    """
+
+    def __init__(
+        self,
+        num_features=None,
+        num_classes=None,
+        device=None,
+        init=False,
+        num_graph_features=None,
+        **args
+    ):
+
+        super(AutoGIN, self).__init__()
+        self.num_features = num_features if num_features is not None else 0
+        self.num_classes = int(num_classes) if num_classes is not None else 0
+        self.num_graph_features = (
+            int(num_graph_features) if num_graph_features is not None else 0
+        )
+        self.device = device if device is not None else "cpu"
+        self.init = True
+
+        self.params = {
+            "features_num": self.num_features,
+            "num_class": self.num_classes,
+            "num_graph_features": self.num_graph_features,
+        }
+        self.space = [
+            {
+                "parameterName": "num_layers",
+                "type": "DISCRETE",
+                "feasiblePoints": "4,5,6",
+            },
+            {
+                "parameterName": "hidden",
+                "type": "NUMERICAL_LIST",
+                "numericalType": "INTEGER",
+                "length": 5,
+                "minValue": [8, 8, 8, 8, 8],
+                "maxValue": [64, 64, 64, 64, 64],
+                "scalingType": "LOG",
+                "cutPara": ("num_layers",),
+                "cutFunc": lambda x: x[0] - 1,
+            },
+            {
+                "parameterName": "dropout",
+                "type": "DOUBLE",
+                "maxValue": 0.9,
+                "minValue": 0.1,
+                "scalingType": "LINEAR",
+            },
+            {
+                "parameterName": "act",
+                "type": "CATEGORICAL",
+                "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
+            },
+            {
+                "parameterName": "eps",
+                "type": "CATEGORICAL",
+                "feasiblePoints": ["True", "False"],
+            },
+            {
+                "parameterName": "mlp_layers",
+                "type": "DISCRETE",
+                "feasiblePoints": "2,3,4",
+            },
+        ]
+
+        self.hyperparams = {
+            "num_layers": 3,
+            "hidden": [64, 32],
+            "dropout": 0.5,
+            "act": "relu",
+            "eps": "True",
+            "mlp_layers": 2,
+        }
+
+        self.initialized = False
+        if init is True:
+            self.initialize()
+
+    def initialize(self):
+        # """Initialize model."""
+        if self.initialized:
+            return
+        self.initialized = True
+        self.model = GIN({**self.params, **self.hyperparams}).to(self.device)
diff --git a/autogl/module/model/dgl/gin_dgl.py b/autogl/module/model/dgl/gin_dgl.py
new file mode 100644
index 0000000..8a5408e
--- /dev/null
+++ b/autogl/module/model/dgl/gin_dgl.py
@@ -0,0 +1,171 @@
+"""
+How Powerful are Graph Neural Networks
+https://arxiv.org/abs/1810.00826
+https://openreview.net/forum?id=ryGs6iA5Km
+Author's implementation: https://github.com/weihua916/powerful-gnns
+"""
+
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from dgl.nn.pytorch.conv import GINConv
+from dgl.nn.pytorch.glob import SumPooling, AvgPooling, MaxPooling
+
+
+class ApplyNodeFunc(nn.Module):
+    """Update the node feature hv with MLP, BN and ReLU."""
+    def __init__(self, mlp):
+        super(ApplyNodeFunc, self).__init__()
+        self.mlp = mlp
+        self.bn = nn.BatchNorm1d(self.mlp.output_dim)
+
+    def forward(self, h):
+        h = self.mlp(h)
+        h = self.bn(h)
+        h = F.relu(h)
+        return h
+
+
+class MLP(nn.Module):
+    """MLP with linear output"""
+    def __init__(self, num_layers, input_dim, hidden_dim, output_dim):
+        """MLP layers construction
+
+        Paramters
+        ---------
+        num_layers: int
+            The number of linear layers
+        input_dim: int
+            The dimensionality of input features
+        hidden_dim: int
+            The dimensionality of hidden units at ALL layers
+        output_dim: int
+            The number of classes for prediction
+
+        """
+        super(MLP, self).__init__()
+        self.linear_or_not = True  # default is linear model
+        self.num_layers = num_layers
+        self.output_dim = output_dim
+
+        if num_layers < 1:
+            raise ValueError("number of layers should be positive!")
+        elif num_layers == 1:
+            # Linear model
+            self.linear = nn.Linear(input_dim, output_dim)
+        else:
+            # Multi-layer model
+            self.linear_or_not = False
+            self.linears = torch.nn.ModuleList()
+            self.batch_norms = torch.nn.ModuleList()
+
+            self.linears.append(nn.Linear(input_dim, hidden_dim))
+            for layer in range(num_layers - 2):
+                self.linears.append(nn.Linear(hidden_dim, hidden_dim))
+            self.linears.append(nn.Linear(hidden_dim, output_dim))
+
+            for layer in range(num_layers - 1):
+                self.batch_norms.append(nn.BatchNorm1d((hidden_dim)))
+
+    def forward(self, x):
+        if self.linear_or_not:
+            # If linear model
+            return self.linear(x)
+        else:
+            # If MLP
+            h = x
+            for i in range(self.num_layers - 1):
+                h = F.relu(self.batch_norms[i](self.linears[i](h)))
+            return self.linears[-1](h)
+
+
+class GIN(nn.Module):
+    """GIN model"""
+    def __init__(self, num_layers, num_mlp_layers, input_dim, hidden_dim,
+                 output_dim, final_dropout, learn_eps, graph_pooling_type,
+                 neighbor_pooling_type):
+        """model parameters setting
+
+        Paramters
+        ---------
+        num_layers: int
+            The number of linear layers in the neural network
+        num_mlp_layers: int
+            The number of linear layers in mlps
+        input_dim: int
+            The dimensionality of input features
+        hidden_dim: int
+            The dimensionality of hidden units at ALL layers
+        output_dim: int
+            The number of classes for prediction
+        final_dropout: float
+            dropout ratio on the final linear layer
+        learn_eps: boolean
+            If True, learn epsilon to distinguish center nodes from neighbors
+            If False, aggregate neighbors and center nodes altogether.
+        neighbor_pooling_type: str
+            how to aggregate neighbors (sum, mean, or max)
+        graph_pooling_type: str
+            how to aggregate entire nodes in a graph (sum, mean or max)
+
+        """
+        super(GIN, self).__init__()
+        self.num_layers = num_layers
+        self.learn_eps = learn_eps
+
+        # List of MLPs
+        self.ginlayers = torch.nn.ModuleList()
+        self.batch_norms = torch.nn.ModuleList()
+
+        for layer in range(self.num_layers - 1):
+            if layer == 0:
+                mlp = MLP(num_mlp_layers, input_dim, hidden_dim, hidden_dim)
+            else:
+                mlp = MLP(num_mlp_layers, hidden_dim, hidden_dim, hidden_dim)
+
+            self.ginlayers.append(
+                GINConv(ApplyNodeFunc(mlp), neighbor_pooling_type, 0, self.learn_eps))
+            self.batch_norms.append(nn.BatchNorm1d(hidden_dim))
+
+        # Linear function for graph poolings of output of each layer
+        # which maps the output of different layers into a prediction score
+        self.linears_prediction = torch.nn.ModuleList()
+
+        for layer in range(num_layers):
+            if layer == 0:
+                self.linears_prediction.append(
+                    nn.Linear(input_dim, output_dim))
+            else:
+                self.linears_prediction.append(
+                    nn.Linear(hidden_dim, output_dim))
+
+        self.drop = nn.Dropout(final_dropout)
+
+        if graph_pooling_type == 'sum':
+            self.pool = SumPooling()
+        elif graph_pooling_type == 'mean':
+            self.pool = AvgPooling()
+        elif graph_pooling_type == 'max':
+            self.pool = MaxPooling()
+        else:
+            raise NotImplementedError
+
+    def forward(self, g, h):
+        # list of hidden representation at each layer (including input)
+        hidden_rep = [h]
+
+        for i in range(self.num_layers - 1):
+            h = self.ginlayers[i](g, h)
+            h = self.batch_norms[i](h)
+            h = F.relu(h)
+            hidden_rep.append(h)
+
+        score_over_layer = 0
+
+        # perform pooling over all nodes in each graph in every layer
+        for i, h in enumerate(hidden_rep):
+            pooled_h = self.pool(g, h)
+            score_over_layer += self.drop(self.linears_prediction[i](pooled_h))
+
+        return score_over_layer
diff --git a/autogl/module/model/dgl/ginparser.py b/autogl/module/model/dgl/ginparser.py
new file mode 100644
index 0000000..280aa12
--- /dev/null
+++ b/autogl/module/model/dgl/ginparser.py
@@ -0,0 +1,81 @@
+"""Parser for arguments
+
+Put all arguments in one file and group similar arguments
+"""
+import argparse
+
+
+class Parser():
+
+    def __init__(self, description):
+        '''
+           arguments parser
+        '''
+        self.parser = argparse.ArgumentParser(description=description)
+        self.args = None
+        self._parse()
+
+    def _parse(self):
+        # dataset
+        self.parser.add_argument(
+            '--dataset', type=str, default="MUTAG",
+            choices=['MUTAG', 'COLLAB', 'IMDBBINARY', 'IMDBMULTI'],
+            help='name of dataset (default: MUTAG)')
+        self.parser.add_argument(
+            '--batch_size', type=int, default=32,
+            help='batch size for training and validation (default: 32)')
+        self.parser.add_argument(
+            '--fold_idx', type=int, default=0,
+            help='the index(<10) of fold in 10-fold validation.')
+        self.parser.add_argument(
+            '--filename', type=str, default="",
+            help='output file')
+
+        # device
+        self.parser.add_argument(
+            '--disable-cuda', action='store_true',
+            help='Disable CUDA')
+        self.parser.add_argument(
+            '--device', type=int, default=0,
+            help='which gpu device to use (default: 0)')
+
+        # net
+        self.parser.add_argument(
+            '--num_layers', type=int, default=5,
+            help='number of layers (default: 5)')
+        self.parser.add_argument(
+            '--num_mlp_layers', type=int, default=2,
+            help='number of MLP layers(default: 2). 1 means linear model.')
+        self.parser.add_argument(
+            '--hidden_dim', type=int, default=64,
+            help='number of hidden units (default: 64)')
+
+        # graph
+        self.parser.add_argument(
+            '--graph_pooling_type', type=str,
+            default="sum", choices=["sum", "mean", "max"],
+            help='type of graph pooling: sum, mean or max')
+        self.parser.add_argument(
+            '--neighbor_pooling_type', type=str,
+            default="sum", choices=["sum", "mean", "max"],
+            help='type of neighboring pooling: sum, mean or max')
+        self.parser.add_argument(
+            '--learn_eps', action="store_true",
+            help='learn the epsilon weighting')
+
+        # learning
+        self.parser.add_argument(
+            '--seed', type=int, default=0,
+            help='random seed (default: 0)')
+        self.parser.add_argument(
+            '--epochs', type=int, default=350,
+            help='number of epochs to train (default: 350)')
+        self.parser.add_argument(
+            '--lr', type=float, default=0.01,
+            help='learning rate (default: 0.01)')
+        self.parser.add_argument(
+            '--final_dropout', type=float, default=0.5,
+            help='final layer dropout (default: 0.5)')
+
+        # done
+        self.args = self.parser.parse_args()
diff --git a/autogl/module/model/dgl/graph_saint.py b/autogl/module/model/dgl/graph_saint.py
new file mode 100644
index 0000000..1b4bc7a
--- /dev/null
+++ b/autogl/module/model/dgl/graph_saint.py
@@ -0,0 +1,407 @@
+import typing as _typing
+import torch.nn.functional
+from torch_geometric.nn.conv import MessagePassing
+from torch_sparse import SparseTensor, matmul
+
+from . import register_model
+from .base import ClassificationModel, ClassificationSupportedSequentialModel
+
+
+class _GraphSAINTAggregationLayers:
+    class MultiOrderAggregationLayer(torch.nn.Module):
+        class Order0Aggregator(torch.nn.Module):
+            def __init__(
+                self,
+                input_dimension: int,
+                output_dimension: int,
+                bias: bool = True,
+                activation: _typing.Optional[str] = "ReLU",
+                batch_norm: bool = True,
+            ):
+                super().__init__()
+                if not type(input_dimension) == type(output_dimension) == int:
+                    raise TypeError
+                if not (input_dimension > 0 and output_dimension > 0):
+                    raise ValueError
+                if not type(bias) == bool:
+                    raise TypeError
+                self.__linear_transform = torch.nn.Linear(
+                    input_dimension, output_dimension, bias
+                )
+                self.__linear_transform.reset_parameters()
+                if type(activation) == str:
+                    if activation.lower() == "ReLU".lower():
+                        self.__activation = torch.nn.functional.relu
+                    elif activation.lower() == "elu":
+                        self.__activation = torch.nn.functional.elu
+                    elif hasattr(torch.nn.functional, activation) and callable(
+                        getattr(torch.nn.functional, activation)
+                    ):
+                        self.__activation = getattr(torch.nn.functional, activation)
+                    else:
+                        self.__activation = lambda x: x
+                else:
+                    self.__activation = lambda x: x
+                if type(batch_norm) != bool:
+                    raise TypeError
+                else:
+                    self.__optional_batch_normalization: _typing.Optional[
+                        torch.nn.BatchNorm1d
+                    ] = (
+                        torch.nn.BatchNorm1d(output_dimension, 1e-8)
+                        if batch_norm
+                        else None
+                    )
+
+            def forward(
+                self,
+                x: _typing.Union[
+                    torch.Tensor, _typing.Tuple[torch.Tensor, torch.Tensor]
+                ],
+                _edge_index: torch.Tensor,
+                _edge_weight: _typing.Optional[torch.Tensor] = None,
+                _size: _typing.Optional[_typing.Tuple[int, int]] = None,
+            ) -> torch.Tensor:
+                __output: torch.Tensor = self.__linear_transform(x)
+                if self.__activation is not None and callable(self.__activation):
+                    __output: torch.Tensor = self.__activation(__output)
+                if self.__optional_batch_normalization is not None and isinstance(
+                    self.__optional_batch_normalization, torch.nn.BatchNorm1d
+                ):
+                    __output: torch.Tensor = self.__optional_batch_normalization(
+                        __output
+                    )
+                return __output
+
+        class Order1Aggregator(MessagePassing):
+            def __init__(
+                self,
+                input_dimension: int,
+                output_dimension: int,
+                bias: bool = True,
+                activation: _typing.Optional[str] = "ReLU",
+                batch_norm: bool = True,
+            ):
+                super().__init__(aggr="add")
+                if not type(input_dimension) == type(output_dimension) == int:
+                    raise TypeError
+                if not (input_dimension > 0 and output_dimension > 0):
+                    raise ValueError
+                if not type(bias) == bool:
+                    raise TypeError
+                self.__linear_transform = torch.nn.Linear(
+                    input_dimension, output_dimension, bias
+                )
+                self.__linear_transform.reset_parameters()
+                if type(activation) == str:
+                    if activation.lower() == "ReLU".lower():
+                        self.__activation = torch.nn.functional.relu
+                    elif activation.lower() == "elu":
+                        self.__activation = torch.nn.functional.elu
+                    elif hasattr(torch.nn.functional, activation) and callable(
+                        getattr(torch.nn.functional, activation)
+                    ):
+                        self.__activation = getattr(torch.nn.functional, activation)
+                    else:
+                        self.__activation = lambda x: x
+                else:
+                    self.__activation = lambda x: x
+                if type(batch_norm) != bool:
+                    raise TypeError
+                else:
+                    self.__optional_batch_normalization: _typing.Optional[
+                        torch.nn.BatchNorm1d
+                    ] = (
+                        torch.nn.BatchNorm1d(output_dimension, 1e-8)
+                        if batch_norm
+                        else None
+                    )
+
+            def forward(
+                self,
+                x: _typing.Union[
+                    torch.Tensor, _typing.Tuple[torch.Tensor, torch.Tensor]
+                ],
+                _edge_index: torch.Tensor,
+                _edge_weight: _typing.Optional[torch.Tensor] = None,
+                _size: _typing.Optional[_typing.Tuple[int, int]] = None,
+            ) -> torch.Tensor:
+
+                if type(x) == torch.Tensor:
+                    x: _typing.Tuple[torch.Tensor, torch.Tensor] = (x, x)
+
+                __output = self.propagate(
+                    _edge_index, x=x, edge_weight=_edge_weight, size=_size
+                )
+                __output: torch.Tensor = self.__linear_transform(__output)
+                if self.__activation is not None and callable(self.__activation):
+                    __output: torch.Tensor = self.__activation(__output)
+                if self.__optional_batch_normalization is not None and isinstance(
+                    self.__optional_batch_normalization, torch.nn.BatchNorm1d
+                ):
+                    __output: torch.Tensor = self.__optional_batch_normalization(
+                        __output
+                    )
+                return __output
+
+            def message(
+                self, x_j: torch.Tensor, edge_weight: _typing.Optional[torch.Tensor]
+            ) -> torch.Tensor:
+                return x_j if edge_weight is None else edge_weight.view(-1, 1) * x_j
+
+            def message_and_aggregate(
+                self,
+                adj_t: SparseTensor,
+                x: _typing.Union[
+                    torch.Tensor, _typing.Tuple[torch.Tensor, torch.Tensor]
+                ],
+            ) -> torch.Tensor:
+                return matmul(adj_t, x[0], reduce=self.aggr)
+
+        @property
+        def integral_output_dimension(self) -> int:
+            return (self._order + 1) * self._each_order_output_dimension
+
+        def __init__(
+            self,
+            _input_dimension: int,
+            _each_order_output_dimension: int,
+            _order: int,
+            bias: bool = True,
+            activation: _typing.Optional[str] = "ReLU",
+            batch_norm: bool = True,
+            _dropout: _typing.Optional[float] = ...,
+        ):
+            super().__init__()
+            if not (
+                type(_input_dimension) == type(_order) == int
+                and type(_each_order_output_dimension) == int
+            ):
+                raise TypeError
+            if _input_dimension <= 0 or _each_order_output_dimension <= 0:
+                raise ValueError
+            if _order not in (0, 1):
+                raise ValueError("Unsupported order number")
+            self._input_dimension: int = _input_dimension
+            self._each_order_output_dimension: int = _each_order_output_dimension
+            self._order: int = _order
+            if type(bias) != bool:
+                raise TypeError
+            self.__order0_transform = self.Order0Aggregator(
+                self._input_dimension,
+                self._each_order_output_dimension,
+                bias,
+                activation,
+                batch_norm,
+            )
+            if _order == 1:
+                self.__order1_transform = self.Order1Aggregator(
+                    self._input_dimension,
+                    self._each_order_output_dimension,
+                    bias,
+                    activation,
+                    batch_norm,
+                )
+            else:
+                self.__order1_transform = None
+            if _dropout is not None and type(_dropout) == float:
+                if _dropout < 0:
+                    _dropout = 0
+                if _dropout > 1:
+                    _dropout = 1
+                self.__optional_dropout: _typing.Optional[
+                    torch.nn.Dropout
+                ] = torch.nn.Dropout(_dropout)
+            else:
+                self.__optional_dropout: _typing.Optional[torch.nn.Dropout] = None
+
+        def _forward(
+            self,
+            x: _typing.Union[torch.Tensor, _typing.Tuple[torch.Tensor, torch.Tensor]],
+            edge_index: torch.Tensor,
+            edge_weight: _typing.Optional[torch.Tensor] = None,
+            size: _typing.Optional[_typing.Tuple[int, int]] = None,
+        ) -> torch.Tensor:
+            if self.__order1_transform is not None and isinstance(
+                self.__order1_transform, self.Order1Aggregator
+            ):
+                __output: torch.Tensor = torch.cat(
+                    [
+                        self.__order0_transform(x, edge_index, edge_weight, size),
+                        self.__order1_transform(x, edge_index, edge_weight, size),
+                    ],
+                    dim=1,
+                )
+            else:
+                __output: torch.Tensor = self.__order0_transform(
+                    x, edge_index, edge_weight, size
+                )
+            if self.__optional_dropout is not None and isinstance(
+                self.__optional_dropout, torch.nn.Dropout
+            ):
+                __output: torch.Tensor = self.__optional_dropout(__output)
+            return __output
+
+        def forward(self, data) -> torch.Tensor:
+            x: torch.Tensor = getattr(data, "x")
+            if type(x) != torch.Tensor:
+                raise TypeError
+            edge_index: torch.LongTensor = getattr(data, "edge_index")
+            if type(edge_index) != torch.Tensor:
+                raise TypeError
+            edge_weight: _typing.Optional[torch.Tensor] = getattr(
+                data, "edge_weight", None
+            )
+            if edge_weight is not None and type(edge_weight) != torch.Tensor:
+                raise TypeError
+            return self._forward(x, edge_index, edge_weight)
+
+    class WrappedDropout(torch.nn.Module):
+        def __init__(self, dropout_module: torch.nn.Dropout):
+            super().__init__()
+            self.__dropout_module: torch.nn.Dropout = dropout_module
+
+        def forward(self, tenser_or_data) -> torch.Tensor:
+            if type(tenser_or_data) == torch.Tensor:
+                return self.__dropout_module(tenser_or_data)
+            elif (
+                hasattr(tenser_or_data, "x")
+                and type(getattr(tenser_or_data, "x")) == torch.Tensor
+            ):
+                return self.__dropout_module(getattr(tenser_or_data, "x"))
+            else:
+                raise TypeError
+
+
+class GraphSAINTMultiOrderAggregationModel(ClassificationSupportedSequentialModel):
+    def __init__(
+        self,
+        num_features: int,
+        num_classes: int,
+        _output_dimension_for_each_order: int,
+        _layers_order_list: _typing.Sequence[int],
+        _pre_dropout: float,
+        _layers_dropout: _typing.Union[float, _typing.Sequence[float]],
+        activation: _typing.Optional[str] = "ReLU",
+        bias: bool = True,
+        batch_norm: bool = True,
+        normalize: bool = True,
+    ):
+        super(GraphSAINTMultiOrderAggregationModel, self).__init__()
+        if type(_output_dimension_for_each_order) != int:
+            raise TypeError
+        if not _output_dimension_for_each_order > 0:
+            raise ValueError
+        self._layers_order_list: _typing.Sequence[int] = _layers_order_list
+
+        if isinstance(_layers_dropout, _typing.Sequence):
+            if len(_layers_dropout) != len(_layers_order_list):
+                raise ValueError
+            else:
+                self._layers_dropout: _typing.Sequence[float] = _layers_dropout
+        elif type(_layers_dropout) == float:
+            if _layers_dropout < 0:
+                _layers_dropout = 0
+            if _layers_dropout > 1:
+                _layers_dropout = 1
+            self._layers_dropout: _typing.Sequence[float] = [
+                _layers_dropout for _ in _layers_order_list
+            ]
+        else:
+            raise TypeError
+        if type(_pre_dropout) != float:
+            raise TypeError
+        else:
+            if _pre_dropout < 0:
+                _pre_dropout = 0
+            if _pre_dropout > 1:
+                _pre_dropout = 1
+        self.__sequential_encoding_layers: torch.nn.ModuleList = torch.nn.ModuleList(
+            (
+                _GraphSAINTAggregationLayers.WrappedDropout(
+                    torch.nn.Dropout(_pre_dropout)
+                ),
+                _GraphSAINTAggregationLayers.MultiOrderAggregationLayer(
+                    num_features,
+                    _output_dimension_for_each_order,
+                    _layers_order_list[0],
+                    bias,
+                    activation,
+                    batch_norm,
+                    _layers_dropout[0],
+                ),
+            )
+        )
+        for _layer_index in range(1, len(_layers_order_list)):
+            self.__sequential_encoding_layers.append(
+                _GraphSAINTAggregationLayers.MultiOrderAggregationLayer(
+                    self.__sequential_encoding_layers[-1].integral_output_dimension,
+                    _output_dimension_for_each_order,
+                    _layers_order_list[_layer_index],
+                    bias,
+                    activation,
+                    batch_norm,
+                    _layers_dropout[_layer_index],
+                )
+            )
+        self.__apply_normalize: bool = normalize
+        self.__linear_transform: torch.nn.Linear = torch.nn.Linear(
+            self.__sequential_encoding_layers[-1].integral_output_dimension,
+            num_classes,
+            bias,
+        )
+        self.__linear_transform.reset_parameters()
+
+    def cls_decode(self, x: torch.Tensor) -> torch.Tensor:
+        if self.__apply_normalize:
+            x: torch.Tensor = torch.nn.functional.normalize(x, p=2, dim=1)
+        return torch.nn.functional.log_softmax(self.__linear_transform(x), dim=1)
+
+    def cls_encode(self, data) -> torch.Tensor:
+        if type(getattr(data, "x")) != torch.Tensor:
+            raise TypeError
+        if type(getattr(data, "edge_index")) != torch.Tensor:
+            raise TypeError
+        if (
+            getattr(data, "edge_weight", None) is not None
+            and type(getattr(data, "edge_weight")) != torch.Tensor
+        ):
+            raise TypeError
+        for encoding_layer in self.__sequential_encoding_layers:
+            setattr(data, "x", encoding_layer(data))
+        return getattr(data, "x")
+
+    @property
+    def sequential_encoding_layers(self) -> torch.nn.ModuleList:
+        return self.__sequential_encoding_layers
+
+
+@register_model("GraphSAINTAggregationModel")
+class GraphSAINTAggregationModel(ClassificationModel):
+    def __init__(
+        self,
+        num_features: int = ...,
+        num_classes: int = ...,
+        device: _typing.Union[str, torch.device] = ...,
+        init: bool = False,
+        **kwargs
+    ):
+        super(GraphSAINTAggregationModel, self).__init__(
+            num_features, num_classes, device=device, init=init, **kwargs
+        )
+        # todo: Initialize with default hyper parameter space and hyper parameter
+
+    def _initialize(self):
+        """ Initialize model """
+        self.model = GraphSAINTMultiOrderAggregationModel(
+            self.num_features,
+            self.num_classes,
+            self.hyper_parameter.get("output_dimension_for_each_order"),
+            self.hyper_parameter.get("layers_order_list"),
+            self.hyper_parameter.get("pre_dropout"),
+            self.hyper_parameter.get("layers_dropout"),
+            self.hyper_parameter.get("activation", "ReLU"),
+            bool(self.hyper_parameter.get("bias", True)),
+            bool(self.hyper_parameter.get("batch_norm", True)),
+            bool(self.hyper_parameter.get("normalize", True)),
+        ).to(self.device)
diff --git a/autogl/module/model/dgl/graphsage.py b/autogl/module/model/dgl/graphsage.py
new file mode 100644
index 0000000..6cf5a17
--- /dev/null
+++ b/autogl/module/model/dgl/graphsage.py
@@ -0,0 +1,306 @@
+import torch
+import typing as _typing
+
+from torch_geometric.nn.conv import SAGEConv
+import torch.nn.functional
+import autogl.data
+from . import register_model
+from .base import BaseModel, activate_func, ClassificationSupportedSequentialModel
+from ....utils import get_logger
+
+LOGGER = get_logger("SAGEModel")
+
+
+class GraphSAGE(ClassificationSupportedSequentialModel):
+    class _SAGELayer(torch.nn.Module):
+        def __init__(
+            self,
+            input_channels: int,
+            output_channels: int,
+            aggr: str,
+            activation_name: _typing.Optional[str] = ...,
+            dropout_probability: _typing.Optional[float] = ...,
+        ):
+            super().__init__()
+            self._convolution: SAGEConv = SAGEConv(
+                input_channels, output_channels, aggr=aggr
+            )
+            if (
+                activation_name is not Ellipsis
+                and activation_name is not None
+                and type(activation_name) == str
+            ):
+                self._activation_name: _typing.Optional[str] = activation_name
+            else:
+                self._activation_name: _typing.Optional[str] = None
+            if (
+                dropout_probability is not Ellipsis
+                and dropout_probability is not None
+                and type(dropout_probability) == float
+            ):
+                if dropout_probability < 0:
+                    dropout_probability = 0
+                if dropout_probability > 1:
+                    dropout_probability = 1
+                self._dropout: _typing.Optional[torch.nn.Dropout] = torch.nn.Dropout(
+                    dropout_probability
+                )
+            else:
+                self._dropout: _typing.Optional[torch.nn.Dropout] = None
+
+        def forward(self, data, enable_activation: bool = True) -> torch.Tensor:
+            x: torch.Tensor = getattr(data, "x")
+            edge_index: torch.Tensor = getattr(data, "edge_index")
+            if type(x) != torch.Tensor or type(edge_index) != torch.Tensor:
+                raise TypeError
+
+            x: torch.Tensor = self._convolution.forward(x, edge_index)
+            if self._activation_name is not None and enable_activation:
+                x: torch.Tensor = activate_func(x, self._activation_name)
+            if self._dropout is not None:
+                x: torch.Tensor = self._dropout.forward(x)
+            return x
+
+    def __init__(
+        self,
+        num_features: int,
+        num_classes: int,
+        hidden_features: _typing.Sequence[int],
+        activation_name: str,
+        layers_dropout: _typing.Union[
+            _typing.Optional[float], _typing.Sequence[_typing.Optional[float]]
+        ] = None,
+        aggr: str = "mean",
+    ):
+        super().__init__()
+        if not type(num_features) == type(num_classes) == int:
+            raise TypeError
+        if not isinstance(hidden_features, _typing.Sequence):
+            raise TypeError
+        for hidden_feature in hidden_features:
+            if type(hidden_feature) != int:
+                raise TypeError
+            elif hidden_feature <= 0:
+                raise ValueError
+        if isinstance(layers_dropout, _typing.Sequence):
+            if len(layers_dropout) != (len(hidden_features) + 1):
+                raise TypeError
+            for d in layers_dropout:
+                if d is not None and type(d) != float:
+                    raise TypeError
+            _layers_dropout: _typing.Sequence[_typing.Optional[float]] = layers_dropout
+        elif layers_dropout is None or type(layers_dropout) == float:
+            _layers_dropout: _typing.Sequence[_typing.Optional[float]] = [
+                layers_dropout for _ in range(len(hidden_features))
+            ] + [None]
+        else:
+            raise TypeError
+        if not type(activation_name) == type(aggr) == str:
+            raise TypeError
+        if aggr not in ("add", "max", "mean"):
+            aggr = "mean"
+
+        if len(hidden_features) == 0:
+            self.__sequential_encoding_layers: torch.nn.ModuleList = (
+                torch.nn.ModuleList(
+                    [
+                        self._SAGELayer(
+                            num_features,
+                            num_classes,
+                            aggr,
+                            activation_name,
+                            _layers_dropout[0],
+                        )
+                    ]
+                )
+            )
+        else:
+            self.__sequential_encoding_layers: torch.nn.ModuleList = (
+                torch.nn.ModuleList(
+                    [
+                        self._SAGELayer(
+                            num_features,
+                            hidden_features[0],
+                            aggr,
+                            activation_name,
+                            _layers_dropout[0],
+                        )
+                    ]
+                )
+            )
+            for i in range(len(hidden_features)):
+                if i + 1 < len(hidden_features):
+                    self.__sequential_encoding_layers.append(
+                        self._SAGELayer(
+                            hidden_features[i],
+                            hidden_features[i + 1],
+                            aggr,
+                            activation_name,
+                            _layers_dropout[i + 1],
+                        )
+                    )
+                else:
+                    self.__sequential_encoding_layers.append(
+                        self._SAGELayer(
+                            hidden_features[i],
+                            num_classes,
+                            aggr,
+                            _layers_dropout[i + 1],
+                        )
+                    )
+
+    @property
+    def sequential_encoding_layers(self) -> torch.nn.ModuleList:
+        return self.__sequential_encoding_layers
+
+    def cls_encode(self, data) -> torch.Tensor:
+        if (
+            hasattr(data, "edge_indexes")
+            and isinstance(getattr(data, "edge_indexes"), _typing.Sequence)
+            and len(getattr(data, "edge_indexes"))
+            == len(self.__sequential_encoding_layers)
+        ):
+            for __edge_index in getattr(data, "edge_indexes"):
+                if type(__edge_index) != torch.Tensor:
+                    raise TypeError
+            """ Layer-wise encode """
+            x: torch.Tensor = getattr(data, "x")
+            for i, __edge_index in enumerate(getattr(data, "edge_indexes")):
+                x: torch.Tensor = self.__sequential_encoding_layers[i](
+                    autogl.data.Data(x=x, edge_index=__edge_index)
+                )
+            return x
+        else:
+            x: torch.Tensor = getattr(data, "x")
+            for i in range(len(self.__sequential_encoding_layers)):
+                x = self.__sequential_encoding_layers[i](
+                    autogl.data.Data(x, getattr(data, "edge_index"))
+                )
+            return x
+
+    def cls_decode(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.nn.functional.log_softmax(x, dim=1)
+
+    def lp_encode(self, data):
+        x: torch.Tensor = getattr(data, "x")
+        for i in range(len(self.__sequential_encoding_layers) - 2):
+            x = self.__sequential_encoding_layers[i](
+                autogl.data.Data(x, getattr(data, "edge_index"))
+            )
+        x = self.__sequential_encoding_layers[-2](
+            autogl.data.Data(x, getattr(data, "edge_index")), enable_activation=False
+        )
+        return x
+
+    def lp_decode(self, z, pos_edge_index, neg_edge_index):
+        edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
+        logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
+        return logits
+
+    def lp_decode_all(self, z):
+        prob_adj = z @ z.t()
+        return (prob_adj > 0).nonzero(as_tuple=False).t()
+
+
+@register_model("sage")
+class AutoSAGE(BaseModel):
+    r"""
+    AutoSAGE. The model used in this automodel is GraphSAGE, i.e., the GraphSAGE from the `"Inductive Representation Learning on
+    Large Graphs" <https://arxiv.org/abs/1706.02216>`_ paper. The layer is
+
+    .. math::
+
+        \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + \mathbf{W_2} \cdot
+        \mathrm{mean}_{j \in \mathcal{N(i)}} \mathbf{x}_j
+
+    Parameters
+    ----------
+    num_features: `int`.
+        The dimension of features.
+
+    num_classes: `int`.
+        The number of classes.
+
+    device: `torch.device` or `str`
+        The device where model will be running on.
+
+    init: `bool`.
+        If True(False), the model will (not) be initialized.
+
+    """
+
+    def __init__(
+        self, num_features=None, num_classes=None, device=None, init=False, **args
+    ):
+
+        super(AutoSAGE, self).__init__()
+
+        self.num_features = num_features if num_features is not None else 0
+        self.num_classes = int(num_classes) if num_classes is not None else 0
+        self.device = device if device is not None else "cpu"
+        self.init = True
+
+        self.params = {
+            "features_num": self.num_features,
+            "num_class": self.num_classes,
+        }
+        self.space = [
+            {
+                "parameterName": "num_layers",
+                "type": "DISCRETE",
+                "feasiblePoints": "2,3,4",
+            },
+            {
+                "parameterName": "hidden",
+                "type": "NUMERICAL_LIST",
+                "numericalType": "INTEGER",
+                "length": 3,
+                "minValue": [8, 8, 8],
+                "maxValue": [128, 128, 128],
+                "scalingType": "LOG",
+                "cutPara": ("num_layers",),
+                "cutFunc": lambda x: x[0] - 1,
+            },
+            {
+                "parameterName": "dropout",
+                "type": "DOUBLE",
+                "maxValue": 0.8,
+                "minValue": 0.2,
+                "scalingType": "LINEAR",
+            },
+            {
+                "parameterName": "act",
+                "type": "CATEGORICAL",
+                "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
+            },
+            {
+                "parameterName": "agg",
+                "type": "CATEGORICAL",
+                "feasiblePoints": ["mean", "add", "max"],
+            },
+        ]
+
+        self.hyperparams = {
+            "num_layers": 3,
+            "hidden": [64, 32],
+            "dropout": 0.5,
+            "act": "relu",
+            "agg": "mean",
+        }
+
+        self.initialized = False
+        if init is True:
+            self.initialize()
+
+    def initialize(self):
+        if self.initialized:
+            return
+        self.initialized = True
+        self.model = GraphSAGE(
+            self.num_features,
+            self.num_classes,
+            self.hyperparams.get("hidden"),
+            self.hyperparams.get("act", "relu"),
+            self.hyperparams.get("dropout", None),
+            self.hyperparams.get("agg", "mean"),
+        ).to(self.device)
diff --git a/autogl/module/model/dgl/topkpool.py b/autogl/module/model/dgl/topkpool.py
new file mode 100644
index 0000000..c1872bd
--- /dev/null
+++ b/autogl/module/model/dgl/topkpool.py
@@ -0,0 +1,169 @@
+import torch
+import torch.nn.functional as F
+from torch_geometric.nn import GraphConv, TopKPooling
+from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp
+from . import register_model
+from .base import BaseModel, activate_func
+from ....utils import get_logger
+
+LOGGER = get_logger("TopkModel")
+
+
+def set_default(args, d):
+    for k, v in d.items():
+        if k not in args:
+            args[k] = v
+    return args
+
+
+class Topkpool(torch.nn.Module):
+    def __init__(self, args):
+        super(Topkpool, self).__init__()
+        self.args = args
+
+        missing_keys = list(
+            set(
+                [
+                    "features_num",
+                    "num_class",
+                    "num_graph_features",
+                    "ratio",
+                    "dropout",
+                    "act",
+                ]
+            )
+            - set(self.args.keys())
+        )
+        if len(missing_keys) > 0:
+            raise Exception("Missing keys: %s." % ",".join(missing_keys))
+
+        self.num_features = self.args["features_num"]
+        self.num_classes = self.args["num_class"]
+        self.ratio = self.args["ratio"]
+        self.dropout = self.args["dropout"]
+        self.num_graph_features = self.args["num_graph_features"]
+
+        self.conv1 = GraphConv(self.num_features, 128)
+        self.pool1 = TopKPooling(128, ratio=self.ratio)
+        self.conv2 = GraphConv(128, 128)
+        self.pool2 = TopKPooling(128, ratio=self.ratio)
+        self.conv3 = GraphConv(128, 128)
+        self.pool3 = TopKPooling(128, ratio=self.ratio)
+
+        self.lin1 = torch.nn.Linear(256 + self.num_graph_features, 128)
+        self.lin2 = torch.nn.Linear(128, 64)
+        self.lin3 = torch.nn.Linear(64, self.num_classes)
+
+    def forward(self, data):
+        x, edge_index, batch = data.x, data.edge_index, data.batch
+        if self.num_graph_features > 0:
+            graph_feature = data.gf
+
+        x = F.relu(self.conv1(x, edge_index))
+        x, edge_index, _, batch, _, _ = self.pool1(x, edge_index, None, batch)
+        x1 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)
+
+        x = F.relu(self.conv2(x, edge_index))
+        x, edge_index, _, batch, _, _ = self.pool2(x, edge_index, None, batch)
+        x2 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)
+
+        x = F.relu(self.conv3(x, edge_index))
+        x, edge_index, _, batch, _, _ = self.pool3(x, edge_index, None, batch)
+        x3 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)
+
+        x = x1 + x2 + x3
+        if self.num_graph_features > 0:
+            x = torch.cat([x, graph_feature], dim=-1)
+        x = self.lin1(x)
+        x = activate_func(x, self.args["act"])
+        x = F.dropout(x, p=self.dropout, training=self.training)
+        x = self.lin2(x)
+        x = activate_func(x, self.args["act"])
+        x = F.log_softmax(self.lin3(x), dim=-1)
+
+        return x
+
+
+@register_model("topkpool")
+class AutoTopkpool(BaseModel):
+    r"""
+    AutoTopkpool. The model used in this automodel is from https://arxiv.org/abs/1905.05178, https://arxiv.org/abs/1905.02850
+
+    Parameters
+    ----------
+    num_features: `int`.
+        The dimension of features.
+
+    num_classes: `int`.
+        The number of classes.
+
+    device: `torch.device` or `str`
+        The device where model will be running on.
+
+    init: `bool`.
+        If True(False), the model will (not) be initialized.
+
+    """
+
+    def __init__(
+        self,
+        num_features=None,
+        num_classes=None,
+        device=None,
+        init=False,
+        num_graph_features=None,
+        **args
+    ):
+        super(AutoTopkpool, self).__init__()
+        LOGGER.debug(
+            "topkpool __init__ get params num_graph_features {}".format(
+                num_graph_features
+            )
+        )
+        self.num_features = num_features if num_features is not None else 0
+        self.num_classes = int(num_classes) if num_classes is not None else 0
+        self.num_graph_features = (
+            int(num_graph_features) if num_graph_features is not None else 0
+        )
+        self.device = device if device is not None else "cpu"
+        self.init = True
+
+        self.params = {
+            "features_num": self.num_features,
+            "num_class": self.num_classes,
+            "num_graph_features": self.num_graph_features,
+        }
+        self.space = [
+            {
+                "parameterName": "ratio",
+                "type": "DOUBLE",
+                "maxValue": 0.9,
+                "minValue": 0.1,
+                "scalingType": "LINEAR",
+            },
+            {
+                "parameterName": "dropout",
+                "type": "DOUBLE",
+                "maxValue": 0.9,
+                "minValue": 0.1,
+                "scalingType": "LINEAR",
+            },
+            {
+                "parameterName": "act",
+                "type": "CATEGORICAL",
+                "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
+            },
+        ]
+
+        self.hyperparams = {"ratio": 0.8, "dropout": 0.5, "act": "relu"}
+
+        self.initialized = False
+        if init is True:
+            self.initialize()
+
+    def initialize(self):
+        if self.initialized:
+            return
+        self.initialized = True
+        LOGGER.debug("topkpool initialize with parameters {}".format(self.params))
+        self.model = Topkpool({**self.params, **self.hyperparams}).to(self.device)
diff --git a/test/model_glf/gclf_dgl.py b/test/model_glf/gclf_dgl.py
new file mode 100644
index 0000000..a3d16b6
--- /dev/null
+++ b/test/model_glf/gclf_dgl.py
@@ -0,0 +1,176 @@
+import os
+import sys
+import logging
+logging.basicConfig(level=logging.INFO)
+from tqdm import tqdm
+
+sys.path.append("../../")
+print(os.getcwd())
+os.environ["AUTOGL_BACKEND"] = "dgl"
+#os.environ["AUTOGL_BACKEND"] = "pyg"
+from autogl.backend import DependentBackend
+import dgl
+from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset, GINDataset
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+
+from autogl.module.model.ginparser import Parser
+from autogl.module.model.dataloader_gin import GINDataLoader
+from autogl.module.model import GIN
+
+from pdb import set_trace
+import numpy as np
+from autogl.solver.utils import set_seed
+set_seed(202106)
+
+
+def train(args, net, trainloader, optimizer, criterion, epoch):
+    net.train()
+
+    running_loss = 0
+    total_iters = len(trainloader)
+    # setup the offset to avoid the overlap with mouse cursor
+    bar = tqdm(range(total_iters), unit='batch', position=2, file=sys.stdout)
+
+    for pos, (graphs, labels) in zip(bar, trainloader):
+        # batch graphs will be shipped to device in forward part of model
+        labels = labels.to(args.device)
+        graphs = graphs.to(args.device)
+        feat = graphs.ndata.pop('attr')
+        outputs = net(graphs, feat)
+
+        loss = criterion(outputs, labels)
+        running_loss += loss.item()
+
+        # backprop
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+        # report
+        bar.set_description('epoch-{}'.format(epoch))
+    bar.close()
+    # the final batch will be aligned
+    running_loss = running_loss / total_iters
+
+    return running_loss
+
+
+def eval_net(args, net, dataloader, criterion):
+    net.eval()
+
+    total = 0
+    total_loss = 0
+    total_correct = 0
+
+    for data in dataloader:
+        graphs, labels = data
+        graphs = graphs.to(args.device)
+        labels = labels.to(args.device)
+        feat = graphs.ndata.pop('attr')
+        total += len(labels)
+        outputs = net(graphs, feat)
+        _, predicted = torch.max(outputs.data, 1)
+
+        total_correct += (predicted == labels.data).sum().item()
+        loss = criterion(outputs, labels)
+        # crossentropy(reduce=True) for default
+        total_loss += loss.item() * len(labels)
+
+    loss, acc = 1.0*total_loss / total, 1.0*total_correct / total
+
+    net.train()
+
+    return loss, acc
+
+
+def main(args):
+
+    # set up seeds, args.seed supported
+    torch.manual_seed(seed=args.seed)
+    np.random.seed(seed=args.seed)
+
+    is_cuda = not args.disable_cuda and torch.cuda.is_available()
+
+    if is_cuda:
+        args.device = torch.device("cuda:" + str(args.device))
+        torch.cuda.manual_seed_all(seed=args.seed)
+    else:
+        args.device = torch.device("cpu")
+
+    dataset = GINDataset(args.dataset, not args.learn_eps)
+
+    trainloader, validloader = GINDataLoader(
+        dataset, batch_size=args.batch_size, device=args.device,
+        seed=args.seed, shuffle=True,
+        split_name='fold10', fold_idx=args.fold_idx).train_valid_loader()
+    # or split_name='rand', split_ratio=0.7
+
+    model = GIN(
+        args.num_layers, args.num_mlp_layers,
+        dataset.dim_nfeats, args.hidden_dim, dataset.gclasses,
+        args.final_dropout, args.learn_eps,
+        args.graph_pooling_type, args.neighbor_pooling_type).to(args.device)
+
+    criterion = nn.CrossEntropyLoss()  # defaul reduce is true
+    optimizer = optim.Adam(model.parameters(), lr=args.lr)
+    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)
+
+    # it's not cost-effective to hanle the cursor and init 0
+    # https://stackoverflow.com/a/23121189
+    tbar = tqdm(range(args.epochs), unit="epoch", position=3, ncols=0, file=sys.stdout)
+    vbar = tqdm(range(args.epochs), unit="epoch", position=4, ncols=0, file=sys.stdout)
+    lrbar = tqdm(range(args.epochs), unit="epoch", position=5, ncols=0, file=sys.stdout)
+
+    for epoch, _, _ in zip(tbar, vbar, lrbar):
+
+        train(args, model, trainloader, optimizer, criterion, epoch)
+        scheduler.step()
+
+        train_loss, train_acc = eval_net(
+            args, model, trainloader, criterion)
+        tbar.set_description(
+            'train set - average loss: {:.4f}, accuracy: {:.0f}%'
+            .format(train_loss, 100. * train_acc))
+
+        valid_loss, valid_acc = eval_net(
+            args, model, validloader, criterion)
+        vbar.set_description(
+            'valid set - average loss: {:.4f}, accuracy: {:.0f}%'
+            .format(valid_loss, 100. * valid_acc))
+
+        if not args.filename == "":
+            with open(args.filename, 'a') as f:
+                f.write('%s %s %s %s' % (
+                    args.dataset,
+                    args.learn_eps,
+                    args.neighbor_pooling_type,
+                    args.graph_pooling_type
+                ))
+                f.write("\n")
+                f.write("%f %f %f %f" % (
+                    train_loss,
+                    train_acc,
+                    valid_loss,
+                    valid_acc
+                ))
+                f.write("\n")
+
+        lrbar.set_description(
+            "Learning eps with learn_eps={}: {}".format(
+                args.learn_eps, [layer.eps.data.item() for layer in model.ginlayers]))
+
+    tbar.close()
+    vbar.close()
+    lrbar.close()
+
+
+if __name__ == '__main__':
+    args = Parser(description='GIN').args
+    print('show all arguments configuration...')
+    print(args)
+
+    main(args)
+