diff --git a/autogl/module/model/base.py b/autogl/module/model/base.py
index 965c306..9695eb5 100644
--- a/autogl/module/model/base.py
+++ b/autogl/module/model/base.py
@@ -294,13 +294,15 @@ class ClassificationModel(_BaseModel):
         num_classes: int = ...,
         num_graph_features: int = ...,
         device: _typing.Union[str, torch.device] = ...,
+        hyper_parameter_space: _typing.Sequence[_typing.Any] = ...,
         init: bool = False,
         **kwargs
     ):
         if "initialize" in kwargs:
             del kwargs["initialize"]
         super(ClassificationModel, self).__init__(
-            initialize=init, device=device, **kwargs
+            initialize=init, hyper_parameter_space=hyper_parameter_space,
+            device=device, **kwargs
         )
         if num_classes != Ellipsis and type(num_classes) == int:
             self.__num_classes: int = num_classes if num_classes > 0 else 0
diff --git a/autogl/module/model/gcn.py b/autogl/module/model/gcn.py
index 73b91e0..dbe0f8a 100644
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -17,6 +17,7 @@ class GCN(torch.nn.Module):
         hidden_features: _typing.Sequence[int],
         dropout: float,
         activation_name: str,
+        add_self_loops: bool = True
     ):
         super().__init__()
         self.__convolution_layers: torch.nn.ModuleList = torch.nn.ModuleList()
@@ -24,13 +25,13 @@ class GCN(torch.nn.Module):
         if num_layers == 1:
             self.__convolution_layers.append(
                 torch_geometric.nn.GCNConv(
-                    num_features, num_classes, add_self_loops=False
+                    num_features, num_classes, add_self_loops=add_self_loops
                 )
             )
         else:
             self.__convolution_layers.append(
                 torch_geometric.nn.GCNConv(
-                    num_features, hidden_features[0], add_self_loops=False
+                    num_features, hidden_features[0], add_self_loops=add_self_loops
                 )
             )
             for i in range(len(hidden_features)):
@@ -44,11 +45,31 @@ class GCN(torch.nn.Module):
         self.__dropout: float = dropout
         self.__activation_name: str = activation_name
 
-    def __layer_wise_forward(self, data):
-        # todo: Implement this forward method
-        #         in case that data.edge_indexes property is provided
-        #         for Layer-wise and Node-wise sampled training
-        raise NotImplementedError
+    def __layer_wise_forward(
+            self, x: torch.Tensor,
+            edge_indexes: _typing.Sequence[torch.Tensor],
+            edge_weights: _typing.Sequence[_typing.Optional[torch.Tensor]]
+    ) -> torch.Tensor:
+        assert len(edge_indexes) == len(edge_weights) == len(self.__convolution_layers)
+        for edge_index in edge_indexes:
+            if type(edge_index) != torch.Tensor:
+                raise TypeError
+            if edge_index.size(0) != 2:
+                raise ValueError
+        for edge_weight in edge_weights:
+            if not (edge_weight is None or type(edge_weight) == torch.Tensor):
+                raise TypeError
+
+        for layer_index in range(len(self.__convolution_layers)):
+            x: torch.Tensor = self.__convolution_layers[layer_index](
+                x, edge_indexes[layer_index], edge_weights[layer_index]
+            )
+            if layer_index + 1 < len(self.__convolution_layers):
+                x = activate_func(x, self.__activation_name)
+                x = torch.nn.functional.dropout(
+                    x, p=self.__dropout, training=self.training
+                )
+        return torch.nn.functional.log_softmax(x, dim=1)
 
     def __basic_forward(
         self,
@@ -68,8 +89,27 @@ class GCN(torch.nn.Module):
         return torch.nn.functional.log_softmax(x, dim=1)
 
     def forward(self, data) -> torch.Tensor:
-        if hasattr(data, "edge_indexes") and getattr(data, "edge_indexes") is not None:
-            return self.__layer_wise_forward(data)
+        if (
+                hasattr(data, "edge_indexes") and
+                isinstance(getattr(data, "edge_indexes"), _typing.Sequence) and
+                len(getattr(data, "edge_indexes")) == len(self.__convolution_layers)
+        ):
+            edge_indexes: _typing.Sequence[torch.Tensor] = getattr(data, "edge_indexes")
+            if (
+                hasattr(data, "edge_weights") and
+                isinstance(getattr(data, "edge_weights"), _typing.Sequence) and
+                len(getattr(data, "edge_weights")) == len(self.__convolution_layers)
+            ):
+                edge_weights: _typing.Sequence[_typing.Optional[torch.Tensor]] = (
+                    getattr(data, "edge_weights")
+                )
+            else:
+                edge_weights: _typing.Sequence[_typing.Optional[torch.Tensor]] = [
+                    None for _ in range(len(self.__convolution_layers))
+                ]
+            return self.__layer_wise_forward(
+                getattr(data, "x"), edge_indexes, edge_weights
+            )
         else:
             if not (hasattr(data, "x") and hasattr(data, "edge_index")):
                 raise AttributeError
@@ -133,8 +173,45 @@ class AutoGCN(ClassificationModel):
         init: bool = False,
         **kwargs
     ) -> None:
+        default_hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]] = [
+            {
+                "parameterName": "add_self_loops",
+                "type": "CATEGORICAL",
+                "feasiblePoints": [1],
+            },
+            {
+                "parameterName": "num_layers",
+                "type": "DISCRETE",
+                "feasiblePoints": "2,3,4",
+            },
+            {
+                "parameterName": "hidden",
+                "type": "NUMERICAL_LIST",
+                "numericalType": "INTEGER",
+                "length": 3,
+                "minValue": [8, 8, 8],
+                "maxValue": [128, 128, 128],
+                "scalingType": "LOG",
+                "cutPara": ("num_layers",),
+                "cutFunc": lambda x: x[0] - 1,
+            },
+            {
+                "parameterName": "dropout",
+                "type": "DOUBLE",
+                "maxValue": 0.8,
+                "minValue": 0.2,
+                "scalingType": "LINEAR",
+            },
+            {
+                "parameterName": "act",
+                "type": "CATEGORICAL",
+                "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
+            },
+        ]
+
         super(AutoGCN, self).__init__(
-            num_features, num_classes, device=device, init=init, **kwargs
+            num_features, num_classes, device=device,
+            hyper_parameter_space=default_hp_space, init=init, **kwargs
         )
 
     def _initialize(self):
@@ -144,4 +221,8 @@ class AutoGCN(ClassificationModel):
             self.hyper_parameter.get("hidden"),
             self.hyper_parameter.get("dropout"),
             self.hyper_parameter.get("act"),
+            add_self_loops=(
+                    "add_self_loops" in self.hyper_parameter
+                    and self.hyper_parameter.get("add_self_loops")
+            )
         ).to(self.device)
diff --git a/autogl/module/model/graph_sage.py b/autogl/module/model/graph_sage.py
index 2fe0450..1405b20 100644
--- a/autogl/module/model/graph_sage.py
+++ b/autogl/module/model/graph_sage.py
@@ -1,10 +1,10 @@
 import typing as _typing
 import torch
-import torch.nn.functional as F
+import torch.nn.functional
 from torch_geometric.nn.conv import SAGEConv
 
 from . import register_model
-from .base import BaseModel, activate_func
+from .base import ClassificationModel, activate_func
 
 
 class GraphSAGE(torch.nn.Module):
@@ -15,8 +15,7 @@ class GraphSAGE(torch.nn.Module):
         hidden_features: _typing.Sequence[int],
         dropout: float,
         activation_name: str,
-        aggr: str = "mean",
-        **kwargs
+        aggr: str = "mean"
     ):
         super(GraphSAGE, self).__init__()
         if type(aggr) != str:
@@ -47,90 +46,173 @@ class GraphSAGE(torch.nn.Module):
         self.__dropout: float = dropout
         self.__activation_name: str = activation_name
 
-    def __full_forward(self, data):
-        x: torch.Tensor = getattr(data, "x")
-        edge_index: torch.Tensor = getattr(data, "edge_index")
+    def __basic_forward(
+            self,
+            x: torch.Tensor,
+            edge_index: torch.Tensor,
+            edge_weight: _typing.Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
         for layer_index in range(len(self.__convolution_layers)):
-            x: torch.Tensor = self.__convolution_layers[layer_index](x, edge_index)
+            x: torch.Tensor = self.__convolution_layers[layer_index](
+                x, edge_index, edge_weight
+            )
             if layer_index + 1 < len(self.__convolution_layers):
                 x = activate_func(x, self.__activation_name)
-                x = F.dropout(x, p=self.__dropout, training=self.training)
-        return F.log_softmax(x, dim=1)
-
-    def __distributed_forward(self, data):
-        x: torch.Tensor = getattr(data, "x")
-        edge_indexes: _typing.Sequence[torch.Tensor] = getattr(data, "edge_indexes")
-        if len(edge_indexes) != len(self.__convolution_layers):
-            raise AttributeError
+                x = torch.nn.functional.dropout(
+                    x, p=self.__dropout, training=self.training
+                )
+        return torch.nn.functional.log_softmax(x, dim=1)
+
+    def __layer_wise_forward(
+            self, x: torch.Tensor,
+            edge_indexes: _typing.Sequence[torch.Tensor],
+            edge_weights: _typing.Sequence[_typing.Optional[torch.Tensor]]
+    ) -> torch.Tensor:
+        assert len(edge_indexes) == len(edge_weights) == len(self.__convolution_layers)
+        for edge_index in edge_indexes:
+            if type(edge_index) != torch.Tensor:
+                raise TypeError
+            if edge_index.size(0) != 2:
+                raise ValueError
+        for edge_weight in edge_weights:
+            if not (edge_weight is None or type(edge_weight) == torch.Tensor):
+                raise TypeError
+
         for layer_index in range(len(self.__convolution_layers)):
             x: torch.Tensor = self.__convolution_layers[layer_index](
                 x, edge_indexes[layer_index]
             )
             if layer_index + 1 < len(self.__convolution_layers):
                 x = activate_func(x, self.__activation_name)
-                x = F.dropout(x, p=self.__dropout, training=self.training)
-        return F.log_softmax(x, dim=1)
+                x = torch.nn.functional.dropout(x, p=self.__dropout, training=self.training)
+        return torch.nn.functional.log_softmax(x, dim=1)
 
-    def forward(self, data):
+    def forward(self, data) -> torch.Tensor:
         if (
-            hasattr(data, "edge_indexes")
-            and isinstance(getattr(data, "edge_indexes"), _typing.Sequence)
-            and len(getattr(data, "edge_indexes")) == len(self.__convolution_layers)
+                hasattr(data, "edge_indexes") and
+                isinstance(getattr(data, "edge_indexes"), _typing.Sequence) and
+                len(getattr(data, "edge_indexes")) == len(self.__convolution_layers)
         ):
-            return self.__distributed_forward(data)
+            edge_indexes: _typing.Sequence[torch.Tensor] = getattr(data, "edge_indexes")
+            if (
+                hasattr(data, "edge_weights") and
+                isinstance(getattr(data, "edge_weights"), _typing.Sequence) and
+                len(getattr(data, "edge_weights")) == len(self.__convolution_layers)
+            ):
+                edge_weights: _typing.Sequence[_typing.Optional[torch.Tensor]] = (
+                    getattr(data, "edge_weights")
+                )
+            else:
+                edge_weights: _typing.Sequence[_typing.Optional[torch.Tensor]] = [
+                    None for _ in range(len(self.__convolution_layers))
+                ]
+            return self.__layer_wise_forward(
+                getattr(data, "x"), edge_indexes, edge_weights
+            )
         else:
-            return self.__full_forward(data)
+            if not (hasattr(data, "x") and hasattr(data, "edge_index")):
+                raise AttributeError
+            if not (
+                type(getattr(data, "x")) == torch.Tensor
+                and type(getattr(data, "edge_index")) == torch.Tensor
+            ):
+                raise TypeError
+            x: torch.Tensor = getattr(data, "x")
+            edge_index: torch.LongTensor = getattr(data, "edge_index")
+            if (
+                hasattr(data, "edge_weight")
+                and type(getattr(data, "edge_weight")) == torch.Tensor
+                and getattr(data, "edge_weight").size() == (edge_index.size(1),)
+            ):
+                edge_weight: _typing.Optional[torch.Tensor] = getattr(
+                    data, "edge_weight"
+                )
+            else:
+                edge_weight: _typing.Optional[torch.Tensor] = None
+            return self.__basic_forward(x, edge_index, edge_weight)
 
 
 @register_model("sage")
-class AutoSAGE(BaseModel):
-    def __init__(
-        self,
-        num_features: int = 1,
-        num_classes: int = 1,
-        device: _typing.Optional[torch.device] = torch.device("cpu"),
-        init: bool = False,
-        **kwargs
-    ):
-        super(AutoSAGE, self).__init__(init)
-        self.__num_features: int = num_features
-        self.__num_classes: int = num_classes
-        self.__device: torch.device = (
-            device if device is not None else torch.device("cpu")
-        )
+class AutoSAGE(ClassificationModel):
+    r"""
+    AutoSAGE. The model used in this automodel is GraphSAGE, i.e., the GraphSAGE from the `"Inductive Representation Learning on
+    Large Graphs" <https://arxiv.org/abs/1706.02216>`_ paper. The layer is
 
-        self.hyperparams = {
-            "num_layers": 3,
-            "hidden": [64, 32],
-            "dropout": 0.5,
-            "act": "relu",
-            "aggr": "mean",
-        }
-        self.params = {
-            "num_features": self.__num_features,
-            "num_classes": self.__num_classes,
-        }
-
-        self._model: GraphSAGE = GraphSAGE(
-            self.__num_features, self.__num_classes, [64, 32], 0.5, "relu"
-        )
+    .. math::
+
+        \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + \mathbf{W_2} \cdot
+        \mathrm{mean}_{j \in \mathcal{N(i)}} \mathbf{x}_j
+
+    Parameters
+    ----------
+    num_features: `int`.
+        The dimension of features.
+
+    num_classes: `int`.
+        The number of classes.
 
-        self._initialized: bool = False
-        if init:
-            self.initialize()
+    device: `torch.device` or `str`
+        The device where model will be running on.
 
-    @property
-    def model(self) -> GraphSAGE:
-        return self._model
+    init: `bool`.
+        If True(False), the model will (not) be initialized.
+    """
+
+    def __init__(
+            self,
+            num_features: int = ...,
+            num_classes: int = ...,
+            device: _typing.Union[str, torch.device] = ...,
+            init: bool = False,
+            **kwargs
+    ):
+        default_hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]] = [
+            {
+                "parameterName": "num_layers",
+                "type": "DISCRETE",
+                "feasiblePoints": "2,3,4",
+            },
+            {
+                "parameterName": "hidden",
+                "type": "NUMERICAL_LIST",
+                "numericalType": "INTEGER",
+                "length": 3,
+                "minValue": [8, 8, 8],
+                "maxValue": [128, 128, 128],
+                "scalingType": "LOG",
+                "cutPara": ("num_layers",),
+                "cutFunc": lambda x: x[0] - 1,
+            },
+            {
+                "parameterName": "dropout",
+                "type": "DOUBLE",
+                "maxValue": 0.8,
+                "minValue": 0.2,
+                "scalingType": "LINEAR",
+            },
+            {
+                "parameterName": "act",
+                "type": "CATEGORICAL",
+                "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
+            },
+            {
+                "parameterName": "aggr",
+                "type": "CATEGORICAL",
+                "feasiblePoints": ["mean", "add", "max"],
+            },
+        ]
+        super(AutoSAGE, self).__init__(
+            num_features, num_classes, device=device,
+            hyper_parameter_space=default_hp_space, init=init, **kwargs
+        )
 
-    def initialize(self):
+    def _initialize(self):
         """ Initialize model """
-        if not self._initialized:
-            self._model: GraphSAGE = GraphSAGE(
-                self.__num_features,
-                self.__num_classes,
-                hidden_features=self.hyperparams["hidden"],
-                activation_name=self.hyperparams["act"],
-                **self.hyperparams
-            ).to(self.__device)
-            self._initialized = True
+        self.model = GraphSAGE(
+            self.num_features,
+            self.num_classes,
+            self.hyper_parameter.get("hidden"),
+            self.hyper_parameter.get("dropout"),
+            self.hyper_parameter.get("act"),
+            self.hyper_parameter.get("aggr")
+        ).to(self.device)
diff --git a/autogl/module/model/graphsage.py b/autogl/module/model/graphsage.py
deleted file mode 100644
index ac541b8..0000000
--- a/autogl/module/model/graphsage.py
+++ /dev/null
@@ -1,270 +0,0 @@
-import torch
-from . import register_model
-from .base import BaseModel, activate_func
-
-from typing import Union, Tuple
-from torch_geometric.typing import OptPairTensor, Adj, Size
-
-from torch import Tensor
-from torch.nn import Linear
-import torch.nn.functional as F
-from torch_sparse import SparseTensor, matmul
-from torch_geometric.nn.conv import MessagePassing
-from ...utils import get_logger
-
-LOGGER = get_logger("SAGEModel")
-
-
-class SAGEConv(MessagePassing):
-    r"""Modified from SAGEConv in Pytorch Geometric <https://github.com/rusty1s/pytorch_geometric/blob/master/torch_geometric/nn/conv/sage_conv.py>
-    The GraphSAGE operator from the `"Inductive Representation Learning on
-    Large Graphs" <https://arxiv.org/abs/1706.02216>`_ paper
-    .. math::
-        \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + \mathbf{W_2} \cdot
-        \mathrm{mean}_{j \in \mathcal{N(i)}} \mathbf{x}_j
-    Args:
-        in_channels (int or tuple): Size of each input sample. A tuple
-            corresponds to the sizes of source and target dimensionalities.
-        out_channels (int): Size of each output sample.
-        normalize (bool, optional): If set to :obj:`True`, output features
-            will be :math:`\ell_2`-normalized, *i.e.*,
-            :math:`\frac{\mathbf{x}^{\prime}_i}
-            {\| \mathbf{x}^{\prime}_i \|_2}`.
-            (default: :obj:`False`)
-        bias (bool, optional): If set to :obj:`False`, the layer will not learn
-            an additive bias. (default: :obj:`True`)
-        **kwargs (optional): Additional arguments of
-            :class:`torch_geometric.nn.conv.MessagePassing`.
-    """
-
-    def __init__(
-        self,
-        in_channels: Union[int, Tuple[int, int]],
-        out_channels: int,
-        normalize: bool = False,
-        bias: bool = True,
-        aggr: str = "mean",
-        **kwargs
-    ):
-        super(SAGEConv, self).__init__(aggr=aggr, **kwargs)
-
-        self.in_channels = in_channels
-        self.out_channels = out_channels
-        self.normalize = normalize
-
-        if isinstance(in_channels, int):
-            in_channels = (in_channels, in_channels)
-
-        self.lin_l = Linear(in_channels[0], out_channels, bias=bias)
-        self.lin_r = Linear(in_channels[1], out_channels, bias=False)
-
-        self.reset_parameters()
-
-    def reset_parameters(self):
-        self.lin_l.reset_parameters()
-        self.lin_r.reset_parameters()
-
-    def forward(
-        self, x: Union[Tensor, OptPairTensor], edge_index: Adj, size: Size = None
-    ) -> Tensor:
-        """"""
-        if isinstance(x, Tensor):
-            x: OptPairTensor = (x, x)
-
-        # propagate_type: (x: OptPairTensor)
-        out = self.propagate(edge_index, x=x, size=size)
-        out = self.lin_l(out)
-
-        x_r = x[1]
-        if x_r is not None:
-            out += self.lin_r(x_r)
-
-        if self.normalize:
-            out = F.normalize(out, p=2.0, dim=-1)
-
-        return out
-
-    def message(self, x_j: Tensor) -> Tensor:
-        return x_j
-
-    def message_and_aggregate(self, adj_t: SparseTensor, x: OptPairTensor) -> Tensor:
-        adj_t = adj_t.set_value(None, layout=None)
-        return matmul(adj_t, x[0], reduce=self.aggr)
-
-    def __repr__(self):
-        return "{}({}, {})".format(
-            self.__class__.__name__, self.in_channels, self.out_channels
-        )
-
-
-def set_default(args, d):
-    for k, v in d.items():
-        if k not in args:
-            args[k] = v
-    return args
-
-
-class GraphSAGE(torch.nn.Module):
-    def __init__(self, args):
-        super(GraphSAGE, self).__init__()
-        self.args = args
-        agg = self.args["agg"]
-        self.num_layer = int(self.args["num_layers"])
-        if not self.num_layer == len(self.args["hidden"]) + 1:
-            LOGGER.warn("Warning: layer size does not match the length of hidden units")
-
-        missing_keys = list(
-            set(
-                [
-                    "features_num",
-                    "num_class",
-                    "num_layers",
-                    "hidden",
-                    "dropout",
-                    "act",
-                    "agg",
-                ]
-            )
-            - set(self.args.keys())
-        )
-        if len(missing_keys) > 0:
-            raise Exception("Missing keys: %s." % ",".join(missing_keys))
-
-        self.convs = torch.nn.ModuleList()
-        self.convs.append(
-            SAGEConv(self.args["features_num"], self.args["hidden"][0], aggr=agg)
-        )
-        for i in range(self.num_layer - 2):
-            self.convs.append(
-                SAGEConv(self.args["hidden"][i], self.args["hidden"][i + 1], aggr=agg)
-            )
-        self.convs.append(
-            SAGEConv(
-                self.args["hidden"][self.num_layer - 2],
-                self.args["num_class"],
-                aggr=agg,
-            )
-        )
-
-    def forward(self, data):
-        try:
-            x = data.x
-        except:
-            print("no x")
-            pass
-        try:
-            edge_index = data.edge_index
-        except:
-            print("no index")
-            pass
-        try:
-            edge_weight = data.edge_weight
-        except:
-            edge_weight = None
-            pass
-
-        for i in range(self.num_layer):
-            x = self.convs[i](x, edge_index, edge_weight)
-            if i != self.num_layer - 1:
-                x = activate_func(x, self.args["act"])
-                x = F.dropout(x, p=self.args["dropout"], training=self.training)
-
-        return F.log_softmax(x, dim=1)
-
-
-# @register_model("sage")
-class AutoSAGE(BaseModel):
-    r"""
-    AutoSAGE. The model used in this automodel is GraphSAGE, i.e., the GraphSAGE from the `"Inductive Representation Learning on
-    Large Graphs" <https://arxiv.org/abs/1706.02216>`_ paper. The layer is
-
-    .. math::
-
-        \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + \mathbf{W_2} \cdot
-        \mathrm{mean}_{j \in \mathcal{N(i)}} \mathbf{x}_j
-
-    Parameters
-    ----------
-    num_features: `int`.
-        The dimension of features.
-
-    num_classes: `int`.
-        The number of classes.
-
-    device: `torch.device` or `str`
-        The device where model will be running on.
-
-    init: `bool`.
-        If True(False), the model will (not) be initialized.
-
-    """
-
-    def __init__(
-        self, num_features=None, num_classes=None, device=None, init=False, **args
-    ):
-
-        super(AutoSAGE, self).__init__()
-
-        self.num_features = num_features if num_features is not None else 0
-        self.num_classes = int(num_classes) if num_classes is not None else 0
-        self.device = device if device is not None else "cpu"
-        self.init = True
-
-        self.params = {
-            "features_num": self.num_features,
-            "num_class": self.num_classes,
-        }
-        self.space = [
-            {
-                "parameterName": "num_layers",
-                "type": "DISCRETE",
-                "feasiblePoints": "2,3,4",
-            },
-            {
-                "parameterName": "hidden",
-                "type": "NUMERICAL_LIST",
-                "numericalType": "INTEGER",
-                "length": 3,
-                "minValue": [8, 8, 8],
-                "maxValue": [128, 128, 128],
-                "scalingType": "LOG",
-                "cutPara": ("num_layers",),
-                "cutFunc": lambda x: x[0] - 1,
-            },
-            {
-                "parameterName": "dropout",
-                "type": "DOUBLE",
-                "maxValue": 0.8,
-                "minValue": 0.2,
-                "scalingType": "LINEAR",
-            },
-            {
-                "parameterName": "act",
-                "type": "CATEGORICAL",
-                "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
-            },
-            {
-                "parameterName": "agg",
-                "type": "CATEGORICAL",
-                "feasiblePoints": ["mean", "add", "max"],
-            },
-        ]
-
-        self.hyperparams = {
-            "num_layers": 3,
-            "hidden": [64, 32],
-            "dropout": 0.5,
-            "act": "relu",
-            "agg": "mean",
-        }
-
-        self.initialized = False
-        if init is True:
-            self.initialize()
-
-    def initialize(self):
-        # """Initialize model."""
-        if self.initialized:
-            return
-        self.initialized = True
-        self.model = GraphSAGE({**self.params, **self.hyperparams}).to(self.device)
diff --git a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
index cf91fc6..8f727f9 100644
--- a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
+++ b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
@@ -9,6 +9,9 @@ from ..base import BaseNodeClassificationTrainer, EarlyStopping, Evaluation
 from ..evaluation import get_feval, Logloss
 from ..sampling.sampler.neighbor_sampler import NeighborSampler
 from ..sampling.sampler.graphsaint_sampler import *
+from ..sampling.sampler.layer_dependent_importance_sampler import (
+    LayerDependentImportanceSampler
+)
 from ...model import BaseModel
 
 LOGGER: logging.Logger = logging.getLogger("Node classification sampling trainer")
@@ -366,7 +369,7 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         model: _typing.Union[BaseModel],
         num_features: int,
         num_classes: int,
-        optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None],
+        optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None] = ...,
         lr: float = 1e-4,
         max_epoch: int = 100,
         early_stopping_round: int = 100,
@@ -428,30 +431,16 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         )
 
         """ Set hyper parameters """
-        if "num_subgraphs" not in kwargs:
-            raise KeyError
-        elif type(kwargs.get("num_subgraphs")) != int:
-            raise TypeError
-        elif not kwargs.get("num_subgraphs") > 0:
-            raise ValueError
-        else:
-            self.__num_subgraphs: int = kwargs.get("num_subgraphs")
-        if "sampling_budget" not in kwargs:
-            raise KeyError
-        elif type(kwargs.get("sampling_budget")) != int:
-            raise TypeError
-        elif not kwargs.get("sampling_budget") > 0:
-            raise ValueError
+        self.__num_subgraphs: int = kwargs.get("num_subgraphs")
+        self.__sampling_budget: int = kwargs.get("sampling_budget")
+        if (
+                kwargs.get("sampling_method") is not None
+                and type(kwargs.get("sampling_method")) == str
+                and kwargs.get("sampling_method") in ("node", "edge")
+        ):
+            self.__sampling_method_identifier: str = kwargs.get("sampling_method")
         else:
-            self.__sampling_budget: int = kwargs.get("sampling_budget")
-        if "sampling_method" not in kwargs:
-            self.__sampling_method_identifier: str = "node"
-        elif type(kwargs.get("sampling_method")) != str:
             self.__sampling_method_identifier: str = "node"
-        else:
-            self.__sampling_method_identifier: str = kwargs.get("sampling_method")
-            if self.__sampling_method_identifier.lower() not in ("node", "edge"):
-                self.__sampling_method_identifier: str = "node"
 
         self.__is_initialized: bool = False
         if init:
@@ -480,7 +469,7 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         """
         data = data.to(self.device)
         optimizer: torch.optim.Optimizer = self._optimizer_class(
-            self.model.parameters(),
+            self.model.model.parameters(),
             lr=self._learning_rate,
             weight_decay=self._weight_decay,
         )
@@ -694,7 +683,9 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         if return_major:
             return self._valid_score[0], self.feval[0].is_higher_better()
         else:
-            return (self._valid_score, [f.is_higher_better() for f in self.feval])
+            return (
+                self._valid_score, [f.is_higher_better() for f in self.feval]
+            )
 
     @property
     def hyper_parameter_space(self) -> _typing.Sequence[_typing.Dict[str, _typing.Any]]:
@@ -759,3 +750,377 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
             lr_scheduler_type=self._lr_scheduler_type,
             **hp,
         )
+
+
+@register_trainer("NodeClassificationLayerDependentImportanceSamplingTrainer")
+class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassificationTrainer):
+    def __init__(
+            self,
+            model: _typing.Union[BaseModel, str],
+            num_features: int,
+            num_classes: int,
+            optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None] = ...,
+            lr: float = 1e-4,
+            max_epoch: int = 100,
+            early_stopping_round: int = 100,
+            weight_decay: float = 1e-4,
+            device: _typing.Optional[torch.device] = None,
+            init: bool = True,
+            feval: _typing.Union[
+                _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+            ] = (Logloss,),
+            loss: str = "nll_loss",
+            lr_scheduler_type: _typing.Optional[str] = None,
+            **kwargs,
+    ) -> None:
+        if isinstance(optimizer, type) and issubclass(optimizer, torch.optim.Optimizer):
+            self._optimizer_class: _typing.Type[torch.optim.Optimizer] = optimizer
+        elif type(optimizer) == str:
+            if optimizer.lower() == "adam":
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.Adam
+            elif optimizer.lower() == "adam" + "w":
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.AdamW
+            elif optimizer.lower() == "sgd":
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.SGD
+            else:
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.Adam
+        else:
+            self._optimizer_class: _typing.Type[
+                torch.optim.Optimizer
+            ] = torch.optim.Adam
+        self._learning_rate: float = lr if lr > 0 else 1e-4
+        self._lr_scheduler_type: _typing.Optional[str] = lr_scheduler_type
+        self._max_epoch: int = max_epoch if max_epoch > 0 else 1e2
+        self._weight_decay: float = weight_decay if weight_decay > 0 else 1e-4
+        self._early_stopping = EarlyStopping(
+            patience=early_stopping_round if early_stopping_round > 0 else 1e2,
+            verbose=False
+        )
+        """ Assign an empty initial hyper parameter space """
+        self._hyper_parameter_space: _typing.Sequence[_typing.Dict[str, _typing.Any]] = []
+
+        self._valid_result: torch.Tensor = torch.zeros(0)
+        self._valid_result_prob: torch.Tensor = torch.zeros(0)
+        self._valid_score: _typing.Sequence[float] = ()
+
+        super(NodeClassificationLayerDependentImportanceSamplingTrainer, self).__init__(
+            model, num_features, num_classes, device, init, feval, loss
+        )
+
+        """ Set hyper parameters """
+        " Configure num_layers "
+        self.__num_layers: int = kwargs.get("num_layers")
+        " Configure sampled_node_size_budget "
+        self.__sampled_node_size_budget: int = (
+            kwargs.get("sampled_node_size_budget")
+        )
+
+        self.__is_initialized: bool = False
+        if init:
+            self.initialize()
+
+    def initialize(self):
+        if self.__is_initialized:
+            return self
+        self.model.initialize()
+        self.__is_initialized = True
+        return self
+
+    def to(self, device: torch.device):
+        self.device = device
+        if self.model is not None:
+            self.model.to(self.device)
+
+    def get_model(self):
+        return self.model
+
+    def __train_only(self, data):
+        """
+        The function of training on the given dataset and mask.
+        :param data: data of a specific graph
+        :return: self
+        """
+        optimizer: torch.optim.Optimizer = self._optimizer_class(
+            self.model.model.parameters(),
+            lr=self._learning_rate,
+            weight_decay=self._weight_decay
+        )
+
+        if type(self._lr_scheduler_type) == str:
+            if self._lr_scheduler_type.lower() == "step" + "lr":
+                lr_scheduler: torch.optim.lr_scheduler.StepLR = (
+                    torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1)
+                )
+            elif self._lr_scheduler_type.lower() == "multi" + "step" + "lr":
+                lr_scheduler: torch.optim.lr_scheduler.MultiStepLR = (
+                    torch.optim.lr_scheduler.MultiStepLR(
+                        optimizer, milestones=[30, 80], gamma=0.1
+                    )
+                )
+            elif self._lr_scheduler_type.lower() == "exponential" + "lr":
+                lr_scheduler: torch.optim.lr_scheduler.ExponentialLR = (
+                    torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1)
+                )
+            elif self._lr_scheduler_type.lower() == "ReduceLROnPlateau".lower():
+                lr_scheduler: torch.optim.lr_scheduler.ReduceLROnPlateau = (
+                    torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min")
+                )
+            else:
+                lr_scheduler: torch.optim.lr_scheduler.LambdaLR = (
+                    torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
+                )
+        else:
+            lr_scheduler: torch.optim.lr_scheduler.LambdaLR = (
+                torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
+            )
+
+        sampled_node_size_budget: int = self.__sampled_node_size_budget
+        num_layers: int = self.__num_layers
+
+        __layer_dependent_importance_sampler: LayerDependentImportanceSampler = (
+            LayerDependentImportanceSampler(data.edge_index)
+        )
+        __top_layer_target_nodes_indexes: torch.LongTensor = (
+            torch.where(data.train_mask)[0].unique()
+        )
+        for current_epoch in range(self._max_epoch):
+            self.model.model.train()
+            optimizer.zero_grad()
+            """ epoch start """
+            " sample graphs "
+            __layers: _typing.Sequence[
+                _typing.Tuple[torch.Tensor, torch.Tensor]
+            ] = __layer_dependent_importance_sampler.sample(
+                __top_layer_target_nodes_indexes,
+                [sampled_node_size_budget for _ in range(num_layers)]
+            )
+            data.edge_indexes = [layer[0] for layer in __layers]
+            data.edge_weights = [layer[1] for layer in __layers]
+            data = data.to(self.device)
+
+            result: torch.Tensor = self.model.model.forward(data)
+            if hasattr(torch.nn.functional, self.loss):
+                loss_function = getattr(
+                    torch.nn.functional, self.loss
+                )
+                loss_value: torch.Tensor = loss_function(
+                    result[data.train_mask],
+                    data.y[data.train_mask]
+                )
+            else:
+                raise TypeError(
+                    f"PyTorch does not support loss type {self.loss}"
+                )
+
+            loss_value.backward()
+            optimizer.step()
+            if self._lr_scheduler_type:
+                lr_scheduler.step()
+
+            if (
+                    hasattr(data, "val_mask") and
+                    getattr(data, "val_mask") is not None and
+                    type(getattr(data, "val_mask")) == torch.Tensor
+            ):
+                validation_results: _typing.Sequence[float] = self.evaluate(
+                    (data,), "val", [self.feval[0]]
+                )
+                if self.feval[0].is_higher_better():
+                    validation_loss: float = -validation_results[0]
+                else:
+                    validation_loss: float = validation_results[0]
+                self._early_stopping(validation_loss, self.model.model)
+                if self._early_stopping.early_stop:
+                    LOGGER.debug("Early stopping at %d", current_epoch)
+                    break
+        if (
+                hasattr(data, "val_mask") and
+                getattr(data, "val_mask") is not None and
+                type(getattr(data, "val_mask")) == torch.Tensor
+        ):
+            self._early_stopping.load_checkpoint(self.model.model)
+
+    def __predict_only(self, data) -> torch.Tensor:
+        """
+        The function of predicting on the given data.
+        :param data: data of a specific graph
+        :return: the result of prediction on the given dataset
+        """
+        data = data.to(self.device)
+        self.model.model.eval()
+        with torch.no_grad():
+            predicted_x: torch.Tensor = self.model.model(data)
+        return predicted_x
+
+    def predict_proba(
+            self, dataset, mask: _typing.Optional[str]=None,
+            in_log_format: bool=False
+    ):
+        """
+        The function of predicting the probability on the given dataset.
+        :param dataset: The node classification dataset used to be predicted.
+        :param mask:
+        :param in_log_format:
+        :return:
+        """
+        data = dataset[0].to(self.device)
+        if mask is not None and type(mask) == str:
+            if mask.lower() == "train":
+                _mask: torch.Tensor = data.train_mask
+            elif mask.lower() == "test":
+                _mask: torch.Tensor = data.test_mask
+            elif mask.lower() == "val":
+                _mask: torch.Tensor = data.val_mask
+            else:
+                _mask: torch.Tensor = data.test_mask
+        else:
+            _mask: torch.Tensor = data.test_mask
+        result = self.__predict_only(data)[_mask]
+        return result if in_log_format else torch.exp(result)
+
+    def predict(self, dataset, mask: _typing.Optional[str] = None) -> torch.Tensor:
+        return self.predict_proba(dataset, mask, in_log_format=True).max(1)[1]
+
+    def evaluate(
+            self,
+            dataset,
+            mask: _typing.Optional[str] = None,
+            feval: _typing.Union[
+                None, _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+            ] = None,
+    ) -> _typing.Sequence[float]:
+        data = dataset[0]
+        data = data.to(self.device)
+        if feval is None:
+            _feval: _typing.Sequence[_typing.Type[Evaluation]] = self.feval
+        else:
+            _feval: _typing.Sequence[_typing.Type[Evaluation]] = get_feval(list(feval))
+        if mask is not None and type(mask) == str:
+            if mask.lower() == "train":
+                _mask: torch.Tensor = data.train_mask
+            elif mask.lower() == "test":
+                _mask: torch.Tensor = data.test_mask
+            elif mask.lower() == "val":
+                _mask: torch.Tensor = data.val_mask
+            else:
+                _mask: torch.Tensor = data.test_mask
+        else:
+            _mask: torch.Tensor = data.test_mask
+        prediction_probability: torch.Tensor = self.predict_proba(dataset, mask)
+        y_ground_truth: torch.Tensor = data.y[_mask]
+
+        eval_results = []
+        for f in _feval:
+            try:
+                eval_results.append(f.evaluate(prediction_probability, y_ground_truth))
+            except:
+                eval_results.append(
+                    f.evaluate(
+                        prediction_probability.cpu().numpy(),
+                        y_ground_truth.cpu().numpy(),
+                    )
+                )
+        return eval_results
+
+    def train(self, dataset, keep_valid_result: bool = True):
+        """
+        The function of training on the given dataset and keeping valid result.
+        :param dataset:
+        :param keep_valid_result: Whether to save the validation result after training
+        """
+        data = dataset[0]
+        self.__train_only(data)
+        if keep_valid_result:
+            prediction: torch.Tensor = self.__predict_only(data)
+            self._valid_result: torch.Tensor = prediction[data.val_mask].max(1)[1]
+            self._valid_result_prob: torch.Tensor = prediction[data.val_mask]
+            self._valid_score: _typing.Sequence[float] = self.evaluate(dataset, "val")
+
+    def get_valid_predict(self) -> torch.Tensor:
+        return self._valid_result
+
+    def get_valid_predict_proba(self) -> torch.Tensor:
+        return self._valid_result_prob
+
+    def get_valid_score(
+            self, return_major: bool = True
+    ) -> _typing.Union[
+        _typing.Tuple[float, bool],
+        _typing.Tuple[_typing.Sequence[float], _typing.Sequence[bool]]
+    ]:
+        if return_major:
+            return self._valid_score[0], self.feval[0].is_higher_better()
+        else:
+            return self._valid_score, [f.is_higher_better() for f in self.feval]
+
+    @property
+    def hyper_parameter_space(self) -> _typing.Sequence[_typing.Dict[str, _typing.Any]]:
+        return self._hyper_parameter_space
+
+    @hyper_parameter_space.setter
+    def hyper_parameter_space(
+            self, hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
+    ) -> None:
+        if not isinstance(hp_space, _typing.Sequence):
+            raise TypeError
+        self._hyper_parameter_space = hp_space
+
+    def get_name_with_hp(self) -> str:
+        name = "-".join(
+            [
+                str(self._optimizer_class),
+                str(self._learning_rate),
+                str(self._max_epoch),
+                str(self._early_stopping.patience),
+                str(self.model),
+                str(self.device),
+            ]
+        )
+        name = (
+            name
+            + "|"
+            + "-".join(
+                [
+                    str(x[0]) + "-" + str(x[1])
+                    for x in self.model.get_hyper_parameter().items()
+                ]
+            )
+        )
+        return name
+
+    def duplicate_from_hyper_parameter(
+            self,
+            hp: _typing.Dict[str, _typing.Any],
+            model: _typing.Optional[BaseModel] = None,
+    ) -> "NodeClassificationLayerDependentImportanceSamplingTrainer":
+        if model is None or not isinstance(model, BaseModel):
+            model: BaseModel = self.model
+        model = model.from_hyper_parameter(
+            dict(
+                [
+                    x
+                    for x in hp.items()
+                    if x[0] in [y["parameterName"] for y in model.hyper_parameter_space]
+                ]
+            )
+        )
+        return NodeClassificationLayerDependentImportanceSamplingTrainer(
+            model,
+            self.num_features,
+            self.num_classes,
+            self._optimizer_class,
+            device=self.device,
+            init=True,
+            feval=self.feval,
+            loss=self.loss,
+            lr_scheduler_type=self._lr_scheduler_type,
+            **hp,
+        )
diff --git a/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py b/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py
new file mode 100644
index 0000000..a46ba56
--- /dev/null
+++ b/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py
@@ -0,0 +1,215 @@
+import numpy as np
+import torch
+import torch.utils.data
+import typing as _typing
+import torch_geometric
+
+
+class LayerDependentImportanceSampler:
+    class _Utility:
+        @classmethod
+        def compute_edge_weights(cls, __all_edge_index_with_self_loops: torch.LongTensor) -> torch.Tensor:
+            __out_degree: torch.Tensor = \
+                torch_geometric.utils.degree(__all_edge_index_with_self_loops[0])
+            __in_degree: torch.Tensor = \
+                torch_geometric.utils.degree(__all_edge_index_with_self_loops[1])
+
+            # temp_tensor: torch.Tensor = torch.zeros_like(__all_edge_index_with_self_loops)
+            # temp_tensor[0] = __out_degree[__all_edge_index_with_self_loops[0]]
+            # temp_tensor[1] = __in_degree[__all_edge_index_with_self_loops[1]]
+            temp_tensor: torch.Tensor = torch.stack(
+                [
+                    __out_degree[__all_edge_index_with_self_loops[0]],
+                    __in_degree[__all_edge_index_with_self_loops[1]]
+                ]
+            )
+            temp_tensor: torch.Tensor = 1.0 / temp_tensor
+            temp_tensor[torch.isinf(temp_tensor)] = 0.0
+            return temp_tensor[0] * temp_tensor[1]
+
+        @classmethod
+        def get_candidate_source_nodes_probabilities(
+                cls, all_candidate_edge_indexes: torch.Tensor,
+                all_edge_index_with_self_loops: torch.Tensor,
+                all_edge_weights: torch.Tensor
+        ) -> _typing.Tuple[torch.LongTensor, torch.Tensor]:
+            """
+            :param all_candidate_edge_indexes:
+            :param all_edge_index_with_self_loops: integral edge index with self-loops
+            :param all_edge_weights:
+            :return: (all_source_nodes_indexes, all_source_nodes_probabilities)
+            """
+            _all_candidate_edges: torch.Tensor = \
+                all_edge_index_with_self_loops[:, all_candidate_edge_indexes]
+            _all_candidate_edges_weights: torch.Tensor = \
+                all_edge_weights[all_candidate_edge_indexes]
+
+            all_candidate_source_nodes_indexes: torch.LongTensor = _all_candidate_edges[0].unique()
+            all_candidate_source_nodes_probabilities: torch.Tensor = torch.tensor(
+                [
+                    torch.sum(
+                        _all_candidate_edges_weights[_all_candidate_edges[0] == _current_source_node_index]
+                    ).item() / torch.sum(_all_candidate_edges_weights).item()
+                    for _current_source_node_index in all_candidate_source_nodes_indexes.tolist()
+                ]
+            )
+            assert (
+                    all_candidate_source_nodes_indexes.size() ==
+                    all_candidate_source_nodes_probabilities.size()
+            )
+            return all_candidate_source_nodes_indexes, all_candidate_source_nodes_probabilities
+
+        @classmethod
+        def filter_selected_edges_by_source_nodes_and_target_nodes(
+                cls, all_edges_with_self_loops: torch.Tensor,
+                selected_source_node_indexes: torch.LongTensor,
+                selected_target_node_indexes: torch.LongTensor
+        ) -> torch.Tensor:
+            """
+            :param all_edges_with_self_loops: all edges with self loops
+            :param selected_source_node_indexes: selected source node indexes
+            :param selected_target_node_indexes: selected target node indexes
+            :return: filtered edge indexes
+            """
+            selected_edges_mask_for_source_nodes: torch.Tensor = torch.zeros(
+                all_edges_with_self_loops.size(1), dtype=torch.bool
+            )
+            selected_edges_mask_for_source_nodes[
+                torch.cat([
+                    torch.where(all_edges_with_self_loops[0] == __current_selected_source_node_index)[0]
+                    for __current_selected_source_node_index in selected_source_node_indexes.unique().tolist()
+                ]).unique()
+            ] = True
+            selected_edges_mask_for_target_nodes: torch.Tensor = torch.zeros(
+                all_edges_with_self_loops.size(1), dtype=torch.bool
+            )
+            selected_edges_mask_for_target_nodes[
+                torch.cat([
+                    torch.where(all_edges_with_self_loops[1] == __current_selected_target_node_index)[0]
+                    for __current_selected_target_node_index in selected_target_node_indexes.unique().tolist()
+                ])
+            ] = True
+            return torch.where(
+                selected_edges_mask_for_source_nodes & selected_edges_mask_for_target_nodes
+            )[0]
+
+    def __init__(self, all_edge_index: torch.LongTensor):
+        self.__all_edge_index_with_self_loops: torch.LongTensor = \
+            torch_geometric.utils.add_remaining_self_loops(all_edge_index)[0]
+        self.__all_edge_weights: torch.Tensor = \
+            self._Utility.compute_edge_weights(self.__all_edge_index_with_self_loops)
+
+    def __sample_layer(
+            self, target_nodes_indexes: torch.LongTensor,
+            sampled_node_size_budget: int
+    ) -> _typing.Tuple[torch.Tensor, torch.Tensor, torch.LongTensor, torch.LongTensor]:
+        """
+        :param target_nodes_indexes:
+                node indexes for target nodes in the top layer or nodes sampled in upper layer
+        :param sampled_node_size_budget:
+        :return: (Tensor, Tensor, LongTensor, LongTensor)
+        """
+        all_candidate_edge_indexes: torch.LongTensor = torch.cat(
+            [
+                torch.where(self.__all_edge_index_with_self_loops[1] == current_target_node_index)[0]
+                for current_target_node_index in target_nodes_indexes.unique().tolist()
+            ]
+        ).unique()
+        __all_candidate_source_nodes_indexes, all_candidate_source_nodes_probabilities = \
+            self._Utility.get_candidate_source_nodes_probabilities(
+                all_candidate_edge_indexes,
+                self.__all_edge_index_with_self_loops,
+                self.__all_edge_weights
+            )
+        assert __all_candidate_source_nodes_indexes.size() == all_candidate_source_nodes_probabilities.size()
+
+        """ Sampling """
+        if sampled_node_size_budget < __all_candidate_source_nodes_indexes.numel():
+            selected_source_node_indexes: torch.LongTensor = __all_candidate_source_nodes_indexes[
+                torch.from_numpy(
+                    np.unique(np.random.choice(
+                        np.arange(__all_candidate_source_nodes_indexes.numel()), sampled_node_size_budget,
+                        p=all_candidate_source_nodes_probabilities.numpy()
+                    ))
+                ).unique()
+            ].unique()
+        else:
+            selected_source_node_indexes: torch.LongTensor = __all_candidate_source_nodes_indexes
+
+        __selected_edges_indexes: torch.LongTensor = (
+            self._Utility.filter_selected_edges_by_source_nodes_and_target_nodes(
+                self.__all_edge_index_with_self_loops,
+                selected_source_node_indexes, target_nodes_indexes
+            )
+        ).unique()
+
+        non_normalized_selected_edges_weight: torch.Tensor = (
+                self.__all_edge_weights[__selected_edges_indexes] / (
+                    selected_source_node_indexes.numel() * torch.tensor(
+                        [
+                            all_candidate_source_nodes_probabilities[
+                                __all_candidate_source_nodes_indexes == current_source_node_index
+                            ].item()
+                            for current_source_node_index
+                            in self.__all_edge_index_with_self_loops[0, __selected_edges_indexes].tolist()
+                        ]
+                    )
+                )
+        )
+
+        def __normalize_edges_weight_by_target_nodes(
+                __edge_index: torch.Tensor, __edge_weight: torch.Tensor
+        ) -> torch.Tensor:
+            if __edge_index.size(1) != __edge_weight.numel():
+                raise ValueError
+            for current_target_node_index in __edge_index[1].unique().tolist():
+                __current_mask_for_edges: torch.BoolTensor = (
+                        __edge_index[1] == current_target_node_index
+                )
+                __edge_weight[__current_mask_for_edges] = (
+                    __edge_weight[__current_mask_for_edges] / (
+                        torch.sum(__edge_weight[__current_mask_for_edges])
+                    )
+                )
+            return __edge_weight
+
+        normalized_selected_edges_weight: torch.Tensor = __normalize_edges_weight_by_target_nodes(
+            self.__all_edge_index_with_self_loops[:, __selected_edges_indexes],
+            non_normalized_selected_edges_weight
+        )
+        return (
+            self.__all_edge_index_with_self_loops[:, __selected_edges_indexes],
+            normalized_selected_edges_weight,
+            selected_source_node_indexes,
+            __selected_edges_indexes
+        )
+
+    def sample(
+            self, __top_layer_target_nodes_indexes: torch.LongTensor,
+            sampling_node_size_budgets: _typing.Sequence[int]
+    ) -> _typing.Sequence[_typing.Tuple[torch.Tensor, torch.Tensor]]:
+        """
+        :param __top_layer_target_nodes_indexes: indexes of target nodes for the top layer
+        :param sampling_node_size_budgets:
+        :return:
+        """
+        if type(__top_layer_target_nodes_indexes) != torch.Tensor:
+            raise TypeError
+        if not isinstance(sampling_node_size_budgets, _typing.Sequence):
+            raise TypeError
+        if len(sampling_node_size_budgets) == 0:
+            raise ValueError
+
+        layers: _typing.List[_typing.Tuple[torch.Tensor, torch.Tensor]] = []
+        upper_layer_sampled_node_indexes: torch.LongTensor = __top_layer_target_nodes_indexes
+        for current_sampled_node_size_budget in sampling_node_size_budgets[::-1]:
+            _sampling_result: _typing.Tuple[
+                torch.Tensor, torch.Tensor, torch.LongTensor, torch.LongTensor
+            ] = self.__sample_layer(upper_layer_sampled_node_indexes, current_sampled_node_size_budget)
+            current_layer_edge_index: torch.Tensor = _sampling_result[0]
+            current_layer_edge_weight: torch.Tensor = _sampling_result[1]
+            layers.append((current_layer_edge_index, current_layer_edge_weight))
+
+            upper_layer_sampled_node_indexes: torch.LongTensor = _sampling_result[2]
+
+        return layers[::-1]
diff --git a/configs/nodeclf_ladies_gcn.yml b/configs/nodeclf_ladies_gcn.yml
new file mode 100644
index 0000000..83c7e6f
--- /dev/null
+++ b/configs/nodeclf_ladies_gcn.yml
@@ -0,0 +1,65 @@
+ensemble:
+  name: null
+feature:
+- name: PYGNormalizeFeatures
+hpo:
+  max_evals: 10
+  name: random
+models:
+- hp_space:
+  - feasiblePoints:
+    - 0
+    parameterName: add_self_loops,
+    type: CATEGORICAL,
+  - feasiblePoints: 5,5
+    parameterName: num_layers
+    type: DISCRETE
+  - cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 4
+    maxValue: 256
+    minValue: 64
+    numericalType: INTEGER
+    parameterName: hidden
+    scalingType: LOG
+    type: NUMERICAL_LIST
+  - maxValue: 0.8
+    minValue: 0.2
+    parameterName: dropout
+    scalingType: LINEAR
+    type: DOUBLE
+  - feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    parameterName: act
+    type: CATEGORICAL
+  name: gcn
+trainer:
+  name: NodeClassificationLayerDependentImportanceSamplingTrainer
+  hp_space:
+  - feasiblePoints: 128,256,512
+    parameterName: sampled_node_size_budget
+    type: DISCRETE
+  - maxValue: 300
+    minValue: 100
+    parameterName: max_epoch
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 30
+    minValue: 10
+    parameterName: early_stopping_round
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 0.05
+    minValue: 0.01
+    parameterName: lr
+    scalingType: LOG
+    type: DOUBLE
+  - maxValue: 0.0005
+    minValue: 0.0001
+    parameterName: weight_decay
+    scalingType: LOG
+    type: DOUBLE
diff --git a/configs/nodeclf_sage_benchmark_large.yml b/configs/nodeclf_sage_benchmark_large.yml
index 8b7c2a5..2cdf556 100644
--- a/configs/nodeclf_sage_benchmark_large.yml
+++ b/configs/nodeclf_sage_benchmark_large.yml
@@ -29,10 +29,10 @@ models:
     parameterName: dropout
     scalingType: LINEAR
     type: DOUBLE
-  - feasiblePoints":
+  - feasiblePoints:
     - mean
-    parameterName: aggr,
-    type: CATEGORICAL,
+    parameterName: aggr
+    type: CATEGORICAL
   - feasiblePoints:
     - leaky_relu
     - relu
diff --git a/configs/nodeclf_sage_benchmark_small.yml b/configs/nodeclf_sage_benchmark_small.yml
index 2bd0ffe..9bd7aaa 100644
--- a/configs/nodeclf_sage_benchmark_small.yml
+++ b/configs/nodeclf_sage_benchmark_small.yml
@@ -29,12 +29,12 @@ models:
     parameterName: dropout
     scalingType: LINEAR
     type: DOUBLE
-  - feasiblePoints":
+  - feasiblePoints:
     - mean
     - add
     - max
-    parameterName: agg,
-    type: CATEGORICAL,
+    parameterName: aggr
+    type: CATEGORICAL
   - feasiblePoints:
     - leaky_relu
     - relu