Reproduce LADIES, a layer-wise sampling approach

Reproduce LADIES, a layer-wise sampling approach assign default hyper parameter space for model fix bug for configs Planning major refactorings for upcomming minor unstable version.
5 years ago · ab0f8aec5c
--- a/autogl/module/model/base.py
+++ b/autogl/module/model/base.py
@@ -294,13 +294,15 @@ class ClassificationModel(_BaseModel):
        num_classes: int = ...,
        num_graph_features: int = ...,
        device: _typing.Union[str, torch.device] = ...,
        hyper_parameter_space: _typing.Sequence[_typing.Any] = ...,
        init: bool = False,
        **kwargs
    ):
        if "initialize" in kwargs:
            del kwargs["initialize"]
        super(ClassificationModel, self).__init__(
            initialize=init, device=device, **kwargs
            initialize=init, hyper_parameter_space=hyper_parameter_space,
            device=device, **kwargs
        )
        if num_classes != Ellipsis and type(num_classes) == int:
            self.__num_classes: int = num_classes if num_classes > 0 else 0
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -17,6 +17,7 @@ class GCN(torch.nn.Module):
        hidden_features: _typing.Sequence[int],
        dropout: float,
        activation_name: str,
        add_self_loops: bool = True
    ):
        super().__init__()
        self.__convolution_layers: torch.nn.ModuleList = torch.nn.ModuleList()
@@ -24,13 +25,13 @@ class GCN(torch.nn.Module):
        if num_layers == 1:
            self.__convolution_layers.append(
                torch_geometric.nn.GCNConv(
                    num_features, num_classes, add_self_loops=False
                    num_features, num_classes, add_self_loops=add_self_loops
                )
            )
        else:
            self.__convolution_layers.append(
                torch_geometric.nn.GCNConv(
                    num_features, hidden_features[0], add_self_loops=False
                    num_features, hidden_features[0], add_self_loops=add_self_loops
                )
            )
            for i in range(len(hidden_features)):
@@ -44,11 +45,31 @@ class GCN(torch.nn.Module):
        self.__dropout: float = dropout
        self.__activation_name: str = activation_name

    def __layer_wise_forward(self, data):
        # todo: Implement this forward method
        #         in case that data.edge_indexes property is provided
        #         for Layer-wise and Node-wise sampled training
        raise NotImplementedError
    def __layer_wise_forward(
            self, x: torch.Tensor,
            edge_indexes: _typing.Sequence[torch.Tensor],
            edge_weights: _typing.Sequence[_typing.Optional[torch.Tensor]]
    ) -> torch.Tensor:
        assert len(edge_indexes) == len(edge_weights) == len(self.__convolution_layers)
        for edge_index in edge_indexes:
            if type(edge_index) != torch.Tensor:
                raise TypeError
            if edge_index.size(0) != 2:
                raise ValueError
        for edge_weight in edge_weights:
            if not (edge_weight is None or type(edge_weight) == torch.Tensor):
                raise TypeError

        for layer_index in range(len(self.__convolution_layers)):
            x: torch.Tensor = self.__convolution_layers[layer_index](
                x, edge_indexes[layer_index], edge_weights[layer_index]
            )
            if layer_index + 1 < len(self.__convolution_layers):
                x = activate_func(x, self.__activation_name)
                x = torch.nn.functional.dropout(
                    x, p=self.__dropout, training=self.training
                )
        return torch.nn.functional.log_softmax(x, dim=1)

    def __basic_forward(
        self,
@@ -68,8 +89,27 @@ class GCN(torch.nn.Module):
        return torch.nn.functional.log_softmax(x, dim=1)

    def forward(self, data) -> torch.Tensor:
        if hasattr(data, "edge_indexes") and getattr(data, "edge_indexes") is not None:
            return self.__layer_wise_forward(data)
        if (
                hasattr(data, "edge_indexes") and
                isinstance(getattr(data, "edge_indexes"), _typing.Sequence) and
                len(getattr(data, "edge_indexes")) == len(self.__convolution_layers)
        ):
            edge_indexes: _typing.Sequence[torch.Tensor] = getattr(data, "edge_indexes")
            if (
                hasattr(data, "edge_weights") and
                isinstance(getattr(data, "edge_weights"), _typing.Sequence) and
                len(getattr(data, "edge_weights")) == len(self.__convolution_layers)
            ):
                edge_weights: _typing.Sequence[_typing.Optional[torch.Tensor]] = (
                    getattr(data, "edge_weights")
                )
            else:
                edge_weights: _typing.Sequence[_typing.Optional[torch.Tensor]] = [
                    None for _ in range(len(self.__convolution_layers))
                ]
            return self.__layer_wise_forward(
                getattr(data, "x"), edge_indexes, edge_weights
            )
        else:
            if not (hasattr(data, "x") and hasattr(data, "edge_index")):
                raise AttributeError
@@ -133,8 +173,45 @@ class AutoGCN(ClassificationModel):
        init: bool = False,
        **kwargs
    ) -> None:
        default_hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]] = [
            {
                "parameterName": "add_self_loops",
                "type": "CATEGORICAL",
                "feasiblePoints": [1],
            },
            {
                "parameterName": "num_layers",
                "type": "DISCRETE",
                "feasiblePoints": "2,3,4",
            },
            {
                "parameterName": "hidden",
                "type": "NUMERICAL_LIST",
                "numericalType": "INTEGER",
                "length": 3,
                "minValue": [8, 8, 8],
                "maxValue": [128, 128, 128],
                "scalingType": "LOG",
                "cutPara": ("num_layers",),
                "cutFunc": lambda x: x[0] - 1,
            },
            {
                "parameterName": "dropout",
                "type": "DOUBLE",
                "maxValue": 0.8,
                "minValue": 0.2,
                "scalingType": "LINEAR",
            },
            {
                "parameterName": "act",
                "type": "CATEGORICAL",
                "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
            },
        ]

        super(AutoGCN, self).__init__(
            num_features, num_classes, device=device, init=init, **kwargs
            num_features, num_classes, device=device,
            hyper_parameter_space=default_hp_space, init=init, **kwargs
        )

    def _initialize(self):
@@ -144,4 +221,8 @@ class AutoGCN(ClassificationModel):
            self.hyper_parameter.get("hidden"),
            self.hyper_parameter.get("dropout"),
            self.hyper_parameter.get("act"),
            add_self_loops=(
                    "add_self_loops" in self.hyper_parameter
                    and self.hyper_parameter.get("add_self_loops")
            )
        ).to(self.device)
--- a/autogl/module/model/graph_sage.py
+++ b/autogl/module/model/graph_sage.py
@@ -1,10 +1,10 @@
 import typing as _typing
 import torch
 import torch.nn.functional as F
 import torch.nn.functional
 from torch_geometric.nn.conv import SAGEConv

 from . import register_model
 from .base import BaseModel, activate_func
 from .base import ClassificationModel, activate_func


 class GraphSAGE(torch.nn.Module):
@@ -15,8 +15,7 @@ class GraphSAGE(torch.nn.Module):
        hidden_features: _typing.Sequence[int],
        dropout: float,
        activation_name: str,
        aggr: str = "mean",
        **kwargs
        aggr: str = "mean"
    ):
        super(GraphSAGE, self).__init__()
        if type(aggr) != str:
@@ -47,90 +46,173 @@ class GraphSAGE(torch.nn.Module):
        self.__dropout: float = dropout
        self.__activation_name: str = activation_name

    def __full_forward(self, data):
        x: torch.Tensor = getattr(data, "x")
        edge_index: torch.Tensor = getattr(data, "edge_index")
    def __basic_forward(
            self,
            x: torch.Tensor,
            edge_index: torch.Tensor,
            edge_weight: _typing.Optional[torch.Tensor] = None,
    ) -> torch.Tensor:
        for layer_index in range(len(self.__convolution_layers)):
            x: torch.Tensor = self.__convolution_layers[layer_index](x, edge_index)
            x: torch.Tensor = self.__convolution_layers[layer_index](
                x, edge_index, edge_weight
            )
            if layer_index + 1 < len(self.__convolution_layers):
                x = activate_func(x, self.__activation_name)
                x = F.dropout(x, p=self.__dropout, training=self.training)
        return F.log_softmax(x, dim=1)

    def __distributed_forward(self, data):
        x: torch.Tensor = getattr(data, "x")
        edge_indexes: _typing.Sequence[torch.Tensor] = getattr(data, "edge_indexes")
        if len(edge_indexes) != len(self.__convolution_layers):
            raise AttributeError
                x = torch.nn.functional.dropout(
                    x, p=self.__dropout, training=self.training
                )
        return torch.nn.functional.log_softmax(x, dim=1)

    def __layer_wise_forward(
            self, x: torch.Tensor,
            edge_indexes: _typing.Sequence[torch.Tensor],
            edge_weights: _typing.Sequence[_typing.Optional[torch.Tensor]]
    ) -> torch.Tensor:
        assert len(edge_indexes) == len(edge_weights) == len(self.__convolution_layers)
        for edge_index in edge_indexes:
            if type(edge_index) != torch.Tensor:
                raise TypeError
            if edge_index.size(0) != 2:
                raise ValueError
        for edge_weight in edge_weights:
            if not (edge_weight is None or type(edge_weight) == torch.Tensor):
                raise TypeError

        for layer_index in range(len(self.__convolution_layers)):
            x: torch.Tensor = self.__convolution_layers[layer_index](
                x, edge_indexes[layer_index]
            )
            if layer_index + 1 < len(self.__convolution_layers):
                x = activate_func(x, self.__activation_name)
                x = F.dropout(x, p=self.__dropout, training=self.training)
        return F.log_softmax(x, dim=1)
                x = torch.nn.functional.dropout(x, p=self.__dropout, training=self.training)
        return torch.nn.functional.log_softmax(x, dim=1)

    def forward(self, data):
    def forward(self, data) -> torch.Tensor:
        if (
            hasattr(data, "edge_indexes")
            and isinstance(getattr(data, "edge_indexes"), _typing.Sequence)
            and len(getattr(data, "edge_indexes")) == len(self.__convolution_layers)
                hasattr(data, "edge_indexes") and
                isinstance(getattr(data, "edge_indexes"), _typing.Sequence) and
                len(getattr(data, "edge_indexes")) == len(self.__convolution_layers)
        ):
            return self.__distributed_forward(data)
            edge_indexes: _typing.Sequence[torch.Tensor] = getattr(data, "edge_indexes")
            if (
                hasattr(data, "edge_weights") and
                isinstance(getattr(data, "edge_weights"), _typing.Sequence) and
                len(getattr(data, "edge_weights")) == len(self.__convolution_layers)
            ):
                edge_weights: _typing.Sequence[_typing.Optional[torch.Tensor]] = (
                    getattr(data, "edge_weights")
                )
            else:
                edge_weights: _typing.Sequence[_typing.Optional[torch.Tensor]] = [
                    None for _ in range(len(self.__convolution_layers))
                ]
            return self.__layer_wise_forward(
                getattr(data, "x"), edge_indexes, edge_weights
            )
        else:
            return self.__full_forward(data)
            if not (hasattr(data, "x") and hasattr(data, "edge_index")):
                raise AttributeError
            if not (
                type(getattr(data, "x")) == torch.Tensor
                and type(getattr(data, "edge_index")) == torch.Tensor
            ):
                raise TypeError
            x: torch.Tensor = getattr(data, "x")
            edge_index: torch.LongTensor = getattr(data, "edge_index")
            if (
                hasattr(data, "edge_weight")
                and type(getattr(data, "edge_weight")) == torch.Tensor
                and getattr(data, "edge_weight").size() == (edge_index.size(1),)
            ):
                edge_weight: _typing.Optional[torch.Tensor] = getattr(
                    data, "edge_weight"
                )
            else:
                edge_weight: _typing.Optional[torch.Tensor] = None
            return self.__basic_forward(x, edge_index, edge_weight)


@register_model("sage")
 class AutoSAGE(BaseModel):
    def __init__(
        self,
        num_features: int = 1,
        num_classes: int = 1,
        device: _typing.Optional[torch.device] = torch.device("cpu"),
        init: bool = False,
        **kwargs
    ):
        super(AutoSAGE, self).__init__(init)
        self.__num_features: int = num_features
        self.__num_classes: int = num_classes
        self.__device: torch.device = (
            device if device is not None else torch.device("cpu")
        )
 class AutoSAGE(ClassificationModel):
    r"""
    AutoSAGE. The model used in this automodel is GraphSAGE, i.e., the GraphSAGE from the `"Inductive Representation Learning on
    Large Graphs" <https://arxiv.org/abs/1706.02216>`_ paper. The layer is

        self.hyperparams = {
            "num_layers": 3,
            "hidden": [64, 32],
            "dropout": 0.5,
            "act": "relu",
            "aggr": "mean",
        }
        self.params = {
            "num_features": self.__num_features,
            "num_classes": self.__num_classes,
        }

        self._model: GraphSAGE = GraphSAGE(
            self.__num_features, self.__num_classes, [64, 32], 0.5, "relu"
        )
    .. math::

        \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + \mathbf{W_2} \cdot
        \mathrm{mean}_{j \in \mathcal{N(i)}} \mathbf{x}_j

    Parameters
    ----------
    num_features: `int`.
        The dimension of features.

    num_classes: `int`.
        The number of classes.

        self._initialized: bool = False
        if init:
            self.initialize()
    device: `torch.device` or `str`
        The device where model will be running on.

    @property
    def model(self) -> GraphSAGE:
        return self._model
    init: `bool`.
        If True(False), the model will (not) be initialized.
    """

    def __init__(
            self,
            num_features: int = ...,
            num_classes: int = ...,
            device: _typing.Union[str, torch.device] = ...,
            init: bool = False,
            **kwargs
    ):
        default_hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]] = [
            {
                "parameterName": "num_layers",
                "type": "DISCRETE",
                "feasiblePoints": "2,3,4",
            },
            {
                "parameterName": "hidden",
                "type": "NUMERICAL_LIST",
                "numericalType": "INTEGER",
                "length": 3,
                "minValue": [8, 8, 8],
                "maxValue": [128, 128, 128],
                "scalingType": "LOG",
                "cutPara": ("num_layers",),
                "cutFunc": lambda x: x[0] - 1,
            },
            {
                "parameterName": "dropout",
                "type": "DOUBLE",
                "maxValue": 0.8,
                "minValue": 0.2,
                "scalingType": "LINEAR",
            },
            {
                "parameterName": "act",
                "type": "CATEGORICAL",
                "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
            },
            {
                "parameterName": "aggr",
                "type": "CATEGORICAL",
                "feasiblePoints": ["mean", "add", "max"],
            },
        ]
        super(AutoSAGE, self).__init__(
            num_features, num_classes, device=device,
            hyper_parameter_space=default_hp_space, init=init, **kwargs
        )

    def initialize(self):
    def _initialize(self):
        """ Initialize model """
        if not self._initialized:
            self._model: GraphSAGE = GraphSAGE(
                self.__num_features,
                self.__num_classes,
                hidden_features=self.hyperparams["hidden"],
                activation_name=self.hyperparams["act"],
                **self.hyperparams
            ).to(self.__device)
            self._initialized = True
        self.model = GraphSAGE(
            self.num_features,
            self.num_classes,
            self.hyper_parameter.get("hidden"),
            self.hyper_parameter.get("dropout"),
            self.hyper_parameter.get("act"),
            self.hyper_parameter.get("aggr")
        ).to(self.device)
--- a/autogl/module/model/graphsage.py
+++ b/autogl/module/model/graphsage.py
@@ -1,270 +0,0 @@
 import torch
 from . import register_model
 from .base import BaseModel, activate_func

 from typing import Union, Tuple
 from torch_geometric.typing import OptPairTensor, Adj, Size

 from torch import Tensor
 from torch.nn import Linear
 import torch.nn.functional as F
 from torch_sparse import SparseTensor, matmul
 from torch_geometric.nn.conv import MessagePassing
 from ...utils import get_logger

 LOGGER = get_logger("SAGEModel")


 class SAGEConv(MessagePassing):
    r"""Modified from SAGEConv in Pytorch Geometric <https://github.com/rusty1s/pytorch_geometric/blob/master/torch_geometric/nn/conv/sage_conv.py>
    The GraphSAGE operator from the `"Inductive Representation Learning on
    Large Graphs" <https://arxiv.org/abs/1706.02216>`_ paper
    .. math::
        \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + \mathbf{W_2} \cdot
        \mathrm{mean}_{j \in \mathcal{N(i)}} \mathbf{x}_j
    Args:
        in_channels (int or tuple): Size of each input sample. A tuple
            corresponds to the sizes of source and target dimensionalities.
        out_channels (int): Size of each output sample.
        normalize (bool, optional): If set to :obj:`True`, output features
            will be :math:`\ell_2`-normalized, *i.e.*,
            :math:`\frac{\mathbf{x}^{\prime}_i}
            {\| \mathbf{x}^{\prime}_i \|_2}`.
            (default: :obj:`False`)
        bias (bool, optional): If set to :obj:`False`, the layer will not learn
            an additive bias. (default: :obj:`True`)
        **kwargs (optional): Additional arguments of
            :class:`torch_geometric.nn.conv.MessagePassing`.
    """

    def __init__(
        self,
        in_channels: Union[int, Tuple[int, int]],
        out_channels: int,
        normalize: bool = False,
        bias: bool = True,
        aggr: str = "mean",
        **kwargs
    ):
        super(SAGEConv, self).__init__(aggr=aggr, **kwargs)

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.normalize = normalize

        if isinstance(in_channels, int):
            in_channels = (in_channels, in_channels)

        self.lin_l = Linear(in_channels[0], out_channels, bias=bias)
        self.lin_r = Linear(in_channels[1], out_channels, bias=False)

        self.reset_parameters()

    def reset_parameters(self):
        self.lin_l.reset_parameters()
        self.lin_r.reset_parameters()

    def forward(
        self, x: Union[Tensor, OptPairTensor], edge_index: Adj, size: Size = None
    ) -> Tensor:
        """"""
        if isinstance(x, Tensor):
            x: OptPairTensor = (x, x)

        # propagate_type: (x: OptPairTensor)
        out = self.propagate(edge_index, x=x, size=size)
        out = self.lin_l(out)

        x_r = x[1]
        if x_r is not None:
            out += self.lin_r(x_r)

        if self.normalize:
            out = F.normalize(out, p=2.0, dim=-1)

        return out

    def message(self, x_j: Tensor) -> Tensor:
        return x_j

    def message_and_aggregate(self, adj_t: SparseTensor, x: OptPairTensor) -> Tensor:
        adj_t = adj_t.set_value(None, layout=None)
        return matmul(adj_t, x[0], reduce=self.aggr)

    def __repr__(self):
        return "{}({}, {})".format(
            self.__class__.__name__, self.in_channels, self.out_channels
        )


 def set_default(args, d):
    for k, v in d.items():
        if k not in args:
            args[k] = v
    return args


 class GraphSAGE(torch.nn.Module):
    def __init__(self, args):
        super(GraphSAGE, self).__init__()
        self.args = args
        agg = self.args["agg"]
        self.num_layer = int(self.args["num_layers"])
        if not self.num_layer == len(self.args["hidden"]) + 1:
            LOGGER.warn("Warning: layer size does not match the length of hidden units")

        missing_keys = list(
            set(
                [
                    "features_num",
                    "num_class",
                    "num_layers",
                    "hidden",
                    "dropout",
                    "act",
                    "agg",
                ]
            )
            - set(self.args.keys())
        )
        if len(missing_keys) > 0:
            raise Exception("Missing keys: %s." % ",".join(missing_keys))

        self.convs = torch.nn.ModuleList()
        self.convs.append(
            SAGEConv(self.args["features_num"], self.args["hidden"][0], aggr=agg)
        )
        for i in range(self.num_layer - 2):
            self.convs.append(
                SAGEConv(self.args["hidden"][i], self.args["hidden"][i + 1], aggr=agg)
            )
        self.convs.append(
            SAGEConv(
                self.args["hidden"][self.num_layer - 2],
                self.args["num_class"],
                aggr=agg,
            )
        )

    def forward(self, data):
        try:
            x = data.x
        except:
            print("no x")
            pass
        try:
            edge_index = data.edge_index
        except:
            print("no index")
            pass
        try:
            edge_weight = data.edge_weight
        except:
            edge_weight = None
            pass

        for i in range(self.num_layer):
            x = self.convs[i](x, edge_index, edge_weight)
            if i != self.num_layer - 1:
                x = activate_func(x, self.args["act"])
                x = F.dropout(x, p=self.args["dropout"], training=self.training)

        return F.log_softmax(x, dim=1)


 # @register_model("sage")
 class AutoSAGE(BaseModel):
    r"""
    AutoSAGE. The model used in this automodel is GraphSAGE, i.e., the GraphSAGE from the `"Inductive Representation Learning on
    Large Graphs" <https://arxiv.org/abs/1706.02216>`_ paper. The layer is

    .. math::

        \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + \mathbf{W_2} \cdot
        \mathrm{mean}_{j \in \mathcal{N(i)}} \mathbf{x}_j

    Parameters
    ----------
    num_features: `int`.
        The dimension of features.

    num_classes: `int`.
        The number of classes.

    device: `torch.device` or `str`
        The device where model will be running on.

    init: `bool`.
        If True(False), the model will (not) be initialized.

    """

    def __init__(
        self, num_features=None, num_classes=None, device=None, init=False, **args
    ):

        super(AutoSAGE, self).__init__()

        self.num_features = num_features if num_features is not None else 0
        self.num_classes = int(num_classes) if num_classes is not None else 0
        self.device = device if device is not None else "cpu"
        self.init = True

        self.params = {
            "features_num": self.num_features,
            "num_class": self.num_classes,
        }
        self.space = [
            {
                "parameterName": "num_layers",
                "type": "DISCRETE",
                "feasiblePoints": "2,3,4",
            },
            {
                "parameterName": "hidden",
                "type": "NUMERICAL_LIST",
                "numericalType": "INTEGER",
                "length": 3,
                "minValue": [8, 8, 8],
                "maxValue": [128, 128, 128],
                "scalingType": "LOG",
                "cutPara": ("num_layers",),
                "cutFunc": lambda x: x[0] - 1,
            },
            {
                "parameterName": "dropout",
                "type": "DOUBLE",
                "maxValue": 0.8,
                "minValue": 0.2,
                "scalingType": "LINEAR",
            },
            {
                "parameterName": "act",
                "type": "CATEGORICAL",
                "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
            },
            {
                "parameterName": "agg",
                "type": "CATEGORICAL",
                "feasiblePoints": ["mean", "add", "max"],
            },
        ]

        self.hyperparams = {
            "num_layers": 3,
            "hidden": [64, 32],
            "dropout": 0.5,
            "act": "relu",
            "agg": "mean",
        }

        self.initialized = False
        if init is True:
            self.initialize()

    def initialize(self):
        # """Initialize model."""
        if self.initialized:
            return
        self.initialized = True
        self.model = GraphSAGE({**self.params, **self.hyperparams}).to(self.device)
--- a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
+++ b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
@@ -9,6 +9,9 @@ from ..base import BaseNodeClassificationTrainer, EarlyStopping, Evaluation
 from ..evaluation import get_feval, Logloss
 from ..sampling.sampler.neighbor_sampler import NeighborSampler
 from ..sampling.sampler.graphsaint_sampler import *
 from ..sampling.sampler.layer_dependent_importance_sampler import (
    LayerDependentImportanceSampler
 )
 from ...model import BaseModel

 LOGGER: logging.Logger = logging.getLogger("Node classification sampling trainer")
@@ -366,7 +369,7 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
        model: _typing.Union[BaseModel],
        num_features: int,
        num_classes: int,
        optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None],
        optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None] = ...,
        lr: float = 1e-4,
        max_epoch: int = 100,
        early_stopping_round: int = 100,
@@ -428,30 +431,16 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
        )

        """ Set hyper parameters """
        if "num_subgraphs" not in kwargs:
            raise KeyError
        elif type(kwargs.get("num_subgraphs")) != int:
            raise TypeError
        elif not kwargs.get("num_subgraphs") > 0:
            raise ValueError
        else:
            self.__num_subgraphs: int = kwargs.get("num_subgraphs")
        if "sampling_budget" not in kwargs:
            raise KeyError
        elif type(kwargs.get("sampling_budget")) != int:
            raise TypeError
        elif not kwargs.get("sampling_budget") > 0:
            raise ValueError
        self.__num_subgraphs: int = kwargs.get("num_subgraphs")
        self.__sampling_budget: int = kwargs.get("sampling_budget")
        if (
                kwargs.get("sampling_method") is not None
                and type(kwargs.get("sampling_method")) == str
                and kwargs.get("sampling_method") in ("node", "edge")
        ):
            self.__sampling_method_identifier: str = kwargs.get("sampling_method")
        else:
            self.__sampling_budget: int = kwargs.get("sampling_budget")
        if "sampling_method" not in kwargs:
            self.__sampling_method_identifier: str = "node"
        elif type(kwargs.get("sampling_method")) != str:
            self.__sampling_method_identifier: str = "node"
        else:
            self.__sampling_method_identifier: str = kwargs.get("sampling_method")
            if self.__sampling_method_identifier.lower() not in ("node", "edge"):
                self.__sampling_method_identifier: str = "node"

        self.__is_initialized: bool = False
        if init:
@@ -480,7 +469,7 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
        """
        data = data.to(self.device)
        optimizer: torch.optim.Optimizer = self._optimizer_class(
            self.model.parameters(),
            self.model.model.parameters(),
            lr=self._learning_rate,
            weight_decay=self._weight_decay,
        )
@@ -694,7 +683,9 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
        if return_major:
            return self._valid_score[0], self.feval[0].is_higher_better()
        else:
            return (self._valid_score, [f.is_higher_better() for f in self.feval])
            return (
                self._valid_score, [f.is_higher_better() for f in self.feval]
            )

    @property
    def hyper_parameter_space(self) -> _typing.Sequence[_typing.Dict[str, _typing.Any]]:
@@ -759,3 +750,377 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
            lr_scheduler_type=self._lr_scheduler_type,
            **hp,
        )


@register_trainer("NodeClassificationLayerDependentImportanceSamplingTrainer")
 class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassificationTrainer):
    def __init__(
            self,
            model: _typing.Union[BaseModel, str],
            num_features: int,
            num_classes: int,
            optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None] = ...,
            lr: float = 1e-4,
            max_epoch: int = 100,
            early_stopping_round: int = 100,
            weight_decay: float = 1e-4,
            device: _typing.Optional[torch.device] = None,
            init: bool = True,
            feval: _typing.Union[
                _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
            ] = (Logloss,),
            loss: str = "nll_loss",
            lr_scheduler_type: _typing.Optional[str] = None,
            **kwargs,
    ) -> None:
        if isinstance(optimizer, type) and issubclass(optimizer, torch.optim.Optimizer):
            self._optimizer_class: _typing.Type[torch.optim.Optimizer] = optimizer
        elif type(optimizer) == str:
            if optimizer.lower() == "adam":
                self._optimizer_class: _typing.Type[
                    torch.optim.Optimizer
                ] = torch.optim.Adam
            elif optimizer.lower() == "adam" + "w":
                self._optimizer_class: _typing.Type[
                    torch.optim.Optimizer
                ] = torch.optim.AdamW
            elif optimizer.lower() == "sgd":
                self._optimizer_class: _typing.Type[
                    torch.optim.Optimizer
                ] = torch.optim.SGD
            else:
                self._optimizer_class: _typing.Type[
                    torch.optim.Optimizer
                ] = torch.optim.Adam
        else:
            self._optimizer_class: _typing.Type[
                torch.optim.Optimizer
            ] = torch.optim.Adam
        self._learning_rate: float = lr if lr > 0 else 1e-4
        self._lr_scheduler_type: _typing.Optional[str] = lr_scheduler_type
        self._max_epoch: int = max_epoch if max_epoch > 0 else 1e2
        self._weight_decay: float = weight_decay if weight_decay > 0 else 1e-4
        self._early_stopping = EarlyStopping(
            patience=early_stopping_round if early_stopping_round > 0 else 1e2,
            verbose=False
        )
        """ Assign an empty initial hyper parameter space """
        self._hyper_parameter_space: _typing.Sequence[_typing.Dict[str, _typing.Any]] = []

        self._valid_result: torch.Tensor = torch.zeros(0)
        self._valid_result_prob: torch.Tensor = torch.zeros(0)
        self._valid_score: _typing.Sequence[float] = ()

        super(NodeClassificationLayerDependentImportanceSamplingTrainer, self).__init__(
            model, num_features, num_classes, device, init, feval, loss
        )

        """ Set hyper parameters """
        " Configure num_layers "
        self.__num_layers: int = kwargs.get("num_layers")
        " Configure sampled_node_size_budget "
        self.__sampled_node_size_budget: int = (
            kwargs.get("sampled_node_size_budget")
        )

        self.__is_initialized: bool = False
        if init:
            self.initialize()

    def initialize(self):
        if self.__is_initialized:
            return self
        self.model.initialize()
        self.__is_initialized = True
        return self

    def to(self, device: torch.device):
        self.device = device
        if self.model is not None:
            self.model.to(self.device)

    def get_model(self):
        return self.model

    def __train_only(self, data):
        """
        The function of training on the given dataset and mask.
        :param data: data of a specific graph
        :return: self
        """
        optimizer: torch.optim.Optimizer = self._optimizer_class(
            self.model.model.parameters(),
            lr=self._learning_rate,
            weight_decay=self._weight_decay
        )

        if type(self._lr_scheduler_type) == str:
            if self._lr_scheduler_type.lower() == "step" + "lr":
                lr_scheduler: torch.optim.lr_scheduler.StepLR = (
                    torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1)
                )
            elif self._lr_scheduler_type.lower() == "multi" + "step" + "lr":
                lr_scheduler: torch.optim.lr_scheduler.MultiStepLR = (
                    torch.optim.lr_scheduler.MultiStepLR(
                        optimizer, milestones=[30, 80], gamma=0.1
                    )
                )
            elif self._lr_scheduler_type.lower() == "exponential" + "lr":
                lr_scheduler: torch.optim.lr_scheduler.ExponentialLR = (
                    torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1)
                )
            elif self._lr_scheduler_type.lower() == "ReduceLROnPlateau".lower():
                lr_scheduler: torch.optim.lr_scheduler.ReduceLROnPlateau = (
                    torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min")
                )
            else:
                lr_scheduler: torch.optim.lr_scheduler.LambdaLR = (
                    torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
                )
        else:
            lr_scheduler: torch.optim.lr_scheduler.LambdaLR = (
                torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
            )

        sampled_node_size_budget: int = self.__sampled_node_size_budget
        num_layers: int = self.__num_layers

        __layer_dependent_importance_sampler: LayerDependentImportanceSampler = (
            LayerDependentImportanceSampler(data.edge_index)
        )
        __top_layer_target_nodes_indexes: torch.LongTensor = (
            torch.where(data.train_mask)[0].unique()
        )
        for current_epoch in range(self._max_epoch):
            self.model.model.train()
            optimizer.zero_grad()
            """ epoch start """
            " sample graphs "
            __layers: _typing.Sequence[
                _typing.Tuple[torch.Tensor, torch.Tensor]
            ] = __layer_dependent_importance_sampler.sample(
                __top_layer_target_nodes_indexes,
                [sampled_node_size_budget for _ in range(num_layers)]
            )
            data.edge_indexes = [layer[0] for layer in __layers]
            data.edge_weights = [layer[1] for layer in __layers]
            data = data.to(self.device)

            result: torch.Tensor = self.model.model.forward(data)
            if hasattr(torch.nn.functional, self.loss):
                loss_function = getattr(
                    torch.nn.functional, self.loss
                )
                loss_value: torch.Tensor = loss_function(
                    result[data.train_mask],
                    data.y[data.train_mask]
                )
            else:
                raise TypeError(
                    f"PyTorch does not support loss type {self.loss}"
                )

            loss_value.backward()
            optimizer.step()
            if self._lr_scheduler_type:
                lr_scheduler.step()

            if (
                    hasattr(data, "val_mask") and
                    getattr(data, "val_mask") is not None and
                    type(getattr(data, "val_mask")) == torch.Tensor
            ):
                validation_results: _typing.Sequence[float] = self.evaluate(
                    (data,), "val", [self.feval[0]]
                )
                if self.feval[0].is_higher_better():
                    validation_loss: float = -validation_results[0]
                else:
                    validation_loss: float = validation_results[0]
                self._early_stopping(validation_loss, self.model.model)
                if self._early_stopping.early_stop:
                    LOGGER.debug("Early stopping at %d", current_epoch)
                    break
        if (
                hasattr(data, "val_mask") and
                getattr(data, "val_mask") is not None and
                type(getattr(data, "val_mask")) == torch.Tensor
        ):
            self._early_stopping.load_checkpoint(self.model.model)

    def __predict_only(self, data) -> torch.Tensor:
        """
        The function of predicting on the given data.
        :param data: data of a specific graph
        :return: the result of prediction on the given dataset
        """
        data = data.to(self.device)
        self.model.model.eval()
        with torch.no_grad():
            predicted_x: torch.Tensor = self.model.model(data)
        return predicted_x

    def predict_proba(
            self, dataset, mask: _typing.Optional[str]=None,
            in_log_format: bool=False
    ):
        """
        The function of predicting the probability on the given dataset.
        :param dataset: The node classification dataset used to be predicted.
        :param mask:
        :param in_log_format:
        :return:
        """
        data = dataset[0].to(self.device)
        if mask is not None and type(mask) == str:
            if mask.lower() == "train":
                _mask: torch.Tensor = data.train_mask
            elif mask.lower() == "test":
                _mask: torch.Tensor = data.test_mask
            elif mask.lower() == "val":
                _mask: torch.Tensor = data.val_mask
            else:
                _mask: torch.Tensor = data.test_mask
        else:
            _mask: torch.Tensor = data.test_mask
        result = self.__predict_only(data)[_mask]
        return result if in_log_format else torch.exp(result)

    def predict(self, dataset, mask: _typing.Optional[str] = None) -> torch.Tensor:
        return self.predict_proba(dataset, mask, in_log_format=True).max(1)[1]

    def evaluate(
            self,
            dataset,
            mask: _typing.Optional[str] = None,
            feval: _typing.Union[
                None, _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
            ] = None,
    ) -> _typing.Sequence[float]:
        data = dataset[0]
        data = data.to(self.device)
        if feval is None:
            _feval: _typing.Sequence[_typing.Type[Evaluation]] = self.feval
        else:
            _feval: _typing.Sequence[_typing.Type[Evaluation]] = get_feval(list(feval))
        if mask is not None and type(mask) == str:
            if mask.lower() == "train":
                _mask: torch.Tensor = data.train_mask
            elif mask.lower() == "test":
                _mask: torch.Tensor = data.test_mask
            elif mask.lower() == "val":
                _mask: torch.Tensor = data.val_mask
            else:
                _mask: torch.Tensor = data.test_mask
        else:
            _mask: torch.Tensor = data.test_mask
        prediction_probability: torch.Tensor = self.predict_proba(dataset, mask)
        y_ground_truth: torch.Tensor = data.y[_mask]

        eval_results = []
        for f in _feval:
            try:
                eval_results.append(f.evaluate(prediction_probability, y_ground_truth))
            except:
                eval_results.append(
                    f.evaluate(
                        prediction_probability.cpu().numpy(),
                        y_ground_truth.cpu().numpy(),
                    )
                )
        return eval_results

    def train(self, dataset, keep_valid_result: bool = True):
        """
        The function of training on the given dataset and keeping valid result.
        :param dataset:
        :param keep_valid_result: Whether to save the validation result after training
        """
        data = dataset[0]
        self.__train_only(data)
        if keep_valid_result:
            prediction: torch.Tensor = self.__predict_only(data)
            self._valid_result: torch.Tensor = prediction[data.val_mask].max(1)[1]
            self._valid_result_prob: torch.Tensor = prediction[data.val_mask]
            self._valid_score: _typing.Sequence[float] = self.evaluate(dataset, "val")

    def get_valid_predict(self) -> torch.Tensor:
        return self._valid_result

    def get_valid_predict_proba(self) -> torch.Tensor:
        return self._valid_result_prob

    def get_valid_score(
            self, return_major: bool = True
    ) -> _typing.Union[
        _typing.Tuple[float, bool],
        _typing.Tuple[_typing.Sequence[float], _typing.Sequence[bool]]
    ]:
        if return_major:
            return self._valid_score[0], self.feval[0].is_higher_better()
        else:
            return self._valid_score, [f.is_higher_better() for f in self.feval]

    @property
    def hyper_parameter_space(self) -> _typing.Sequence[_typing.Dict[str, _typing.Any]]:
        return self._hyper_parameter_space

    @hyper_parameter_space.setter
    def hyper_parameter_space(
            self, hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
    ) -> None:
        if not isinstance(hp_space, _typing.Sequence):
            raise TypeError
        self._hyper_parameter_space = hp_space

    def get_name_with_hp(self) -> str:
        name = "-".join(
            [
                str(self._optimizer_class),
                str(self._learning_rate),
                str(self._max_epoch),
                str(self._early_stopping.patience),
                str(self.model),
                str(self.device),
            ]
        )
        name = (
            name
            + "|"
            + "-".join(
                [
                    str(x[0]) + "-" + str(x[1])
                    for x in self.model.get_hyper_parameter().items()
                ]
            )
        )
        return name

    def duplicate_from_hyper_parameter(
            self,
            hp: _typing.Dict[str, _typing.Any],
            model: _typing.Optional[BaseModel] = None,
    ) -> "NodeClassificationLayerDependentImportanceSamplingTrainer":
        if model is None or not isinstance(model, BaseModel):
            model: BaseModel = self.model
        model = model.from_hyper_parameter(
            dict(
                [
                    x
                    for x in hp.items()
                    if x[0] in [y["parameterName"] for y in model.hyper_parameter_space]
                ]
            )
        )
        return NodeClassificationLayerDependentImportanceSamplingTrainer(
            model,
            self.num_features,
            self.num_classes,
            self._optimizer_class,
            device=self.device,
            init=True,
            feval=self.feval,
            loss=self.loss,
            lr_scheduler_type=self._lr_scheduler_type,
            **hp,
        )
--- a/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py
+++ b/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py
@@ -0,0 +1,215 @@
 import numpy as np
 import torch
 import torch.utils.data
 import typing as _typing
 import torch_geometric


 class LayerDependentImportanceSampler:
    class _Utility:
        @classmethod
        def compute_edge_weights(cls, __all_edge_index_with_self_loops: torch.LongTensor) -> torch.Tensor:
            __out_degree: torch.Tensor = \
                torch_geometric.utils.degree(__all_edge_index_with_self_loops[0])
            __in_degree: torch.Tensor = \
                torch_geometric.utils.degree(__all_edge_index_with_self_loops[1])

            # temp_tensor: torch.Tensor = torch.zeros_like(__all_edge_index_with_self_loops)
            # temp_tensor[0] = __out_degree[__all_edge_index_with_self_loops[0]]
            # temp_tensor[1] = __in_degree[__all_edge_index_with_self_loops[1]]
            temp_tensor: torch.Tensor = torch.stack(
                [
                    __out_degree[__all_edge_index_with_self_loops[0]],
                    __in_degree[__all_edge_index_with_self_loops[1]]
                ]
            )
            temp_tensor: torch.Tensor = 1.0 / temp_tensor
            temp_tensor[torch.isinf(temp_tensor)] = 0.0
            return temp_tensor[0] * temp_tensor[1]

        @classmethod
        def get_candidate_source_nodes_probabilities(
                cls, all_candidate_edge_indexes: torch.Tensor,
                all_edge_index_with_self_loops: torch.Tensor,
                all_edge_weights: torch.Tensor
        ) -> _typing.Tuple[torch.LongTensor, torch.Tensor]:
            """
            :param all_candidate_edge_indexes:
            :param all_edge_index_with_self_loops: integral edge index with self-loops
            :param all_edge_weights:
            :return: (all_source_nodes_indexes, all_source_nodes_probabilities)
            """
            _all_candidate_edges: torch.Tensor = \
                all_edge_index_with_self_loops[:, all_candidate_edge_indexes]
            _all_candidate_edges_weights: torch.Tensor = \
                all_edge_weights[all_candidate_edge_indexes]

            all_candidate_source_nodes_indexes: torch.LongTensor = _all_candidate_edges[0].unique()
            all_candidate_source_nodes_probabilities: torch.Tensor = torch.tensor(
                [
                    torch.sum(
                        _all_candidate_edges_weights[_all_candidate_edges[0] == _current_source_node_index]
                    ).item() / torch.sum(_all_candidate_edges_weights).item()
                    for _current_source_node_index in all_candidate_source_nodes_indexes.tolist()
                ]
            )
            assert (
                    all_candidate_source_nodes_indexes.size() ==
                    all_candidate_source_nodes_probabilities.size()
            )
            return all_candidate_source_nodes_indexes, all_candidate_source_nodes_probabilities

        @classmethod
        def filter_selected_edges_by_source_nodes_and_target_nodes(
                cls, all_edges_with_self_loops: torch.Tensor,
                selected_source_node_indexes: torch.LongTensor,
                selected_target_node_indexes: torch.LongTensor
        ) -> torch.Tensor:
            """
            :param all_edges_with_self_loops: all edges with self loops
            :param selected_source_node_indexes: selected source node indexes
            :param selected_target_node_indexes: selected target node indexes
            :return: filtered edge indexes
            """
            selected_edges_mask_for_source_nodes: torch.Tensor = torch.zeros(
                all_edges_with_self_loops.size(1), dtype=torch.bool
            )
            selected_edges_mask_for_source_nodes[
                torch.cat([
                    torch.where(all_edges_with_self_loops[0] == __current_selected_source_node_index)[0]
                    for __current_selected_source_node_index in selected_source_node_indexes.unique().tolist()
                ]).unique()
            ] = True
            selected_edges_mask_for_target_nodes: torch.Tensor = torch.zeros(
                all_edges_with_self_loops.size(1), dtype=torch.bool
            )
            selected_edges_mask_for_target_nodes[
                torch.cat([
                    torch.where(all_edges_with_self_loops[1] == __current_selected_target_node_index)[0]
                    for __current_selected_target_node_index in selected_target_node_indexes.unique().tolist()
                ])
            ] = True
            return torch.where(
                selected_edges_mask_for_source_nodes & selected_edges_mask_for_target_nodes
            )[0]

    def __init__(self, all_edge_index: torch.LongTensor):
        self.__all_edge_index_with_self_loops: torch.LongTensor = \
            torch_geometric.utils.add_remaining_self_loops(all_edge_index)[0]
        self.__all_edge_weights: torch.Tensor = \
            self._Utility.compute_edge_weights(self.__all_edge_index_with_self_loops)

    def __sample_layer(
            self, target_nodes_indexes: torch.LongTensor,
            sampled_node_size_budget: int
    ) -> _typing.Tuple[torch.Tensor, torch.Tensor, torch.LongTensor, torch.LongTensor]:
        """
        :param target_nodes_indexes:
                node indexes for target nodes in the top layer or nodes sampled in upper layer
        :param sampled_node_size_budget:
        :return: (Tensor, Tensor, LongTensor, LongTensor)
        """
        all_candidate_edge_indexes: torch.LongTensor = torch.cat(
            [
                torch.where(self.__all_edge_index_with_self_loops[1] == current_target_node_index)[0]
                for current_target_node_index in target_nodes_indexes.unique().tolist()
            ]
        ).unique()
        __all_candidate_source_nodes_indexes, all_candidate_source_nodes_probabilities = \
            self._Utility.get_candidate_source_nodes_probabilities(
                all_candidate_edge_indexes,
                self.__all_edge_index_with_self_loops,
                self.__all_edge_weights
            )
        assert __all_candidate_source_nodes_indexes.size() == all_candidate_source_nodes_probabilities.size()

        """ Sampling """
        if sampled_node_size_budget < __all_candidate_source_nodes_indexes.numel():
            selected_source_node_indexes: torch.LongTensor = __all_candidate_source_nodes_indexes[
                torch.from_numpy(
                    np.unique(np.random.choice(
                        np.arange(__all_candidate_source_nodes_indexes.numel()), sampled_node_size_budget,
                        p=all_candidate_source_nodes_probabilities.numpy()
                    ))
                ).unique()
            ].unique()
        else:
            selected_source_node_indexes: torch.LongTensor = __all_candidate_source_nodes_indexes

        __selected_edges_indexes: torch.LongTensor = (
            self._Utility.filter_selected_edges_by_source_nodes_and_target_nodes(
                self.__all_edge_index_with_self_loops,
                selected_source_node_indexes, target_nodes_indexes
            )
        ).unique()

        non_normalized_selected_edges_weight: torch.Tensor = (
                self.__all_edge_weights[__selected_edges_indexes] / (
                    selected_source_node_indexes.numel() * torch.tensor(
                        [
                            all_candidate_source_nodes_probabilities[
                                __all_candidate_source_nodes_indexes == current_source_node_index
                            ].item()
                            for current_source_node_index
                            in self.__all_edge_index_with_self_loops[0, __selected_edges_indexes].tolist()
                        ]
                    )
                )
        )

        def __normalize_edges_weight_by_target_nodes(
                __edge_index: torch.Tensor, __edge_weight: torch.Tensor
        ) -> torch.Tensor:
            if __edge_index.size(1) != __edge_weight.numel():
                raise ValueError
            for current_target_node_index in __edge_index[1].unique().tolist():
                __current_mask_for_edges: torch.BoolTensor = (
                        __edge_index[1] == current_target_node_index
                )
                __edge_weight[__current_mask_for_edges] = (
                    __edge_weight[__current_mask_for_edges] / (
                        torch.sum(__edge_weight[__current_mask_for_edges])
                    )
                )
            return __edge_weight

        normalized_selected_edges_weight: torch.Tensor = __normalize_edges_weight_by_target_nodes(
            self.__all_edge_index_with_self_loops[:, __selected_edges_indexes],
            non_normalized_selected_edges_weight
        )
        return (
            self.__all_edge_index_with_self_loops[:, __selected_edges_indexes],
            normalized_selected_edges_weight,
            selected_source_node_indexes,
            __selected_edges_indexes
        )

    def sample(
            self, __top_layer_target_nodes_indexes: torch.LongTensor,
            sampling_node_size_budgets: _typing.Sequence[int]
    ) -> _typing.Sequence[_typing.Tuple[torch.Tensor, torch.Tensor]]:
        """
        :param __top_layer_target_nodes_indexes: indexes of target nodes for the top layer
        :param sampling_node_size_budgets:
        :return:
        """
        if type(__top_layer_target_nodes_indexes) != torch.Tensor:
            raise TypeError
        if not isinstance(sampling_node_size_budgets, _typing.Sequence):
            raise TypeError
        if len(sampling_node_size_budgets) == 0:
            raise ValueError

        layers: _typing.List[_typing.Tuple[torch.Tensor, torch.Tensor]] = []
        upper_layer_sampled_node_indexes: torch.LongTensor = __top_layer_target_nodes_indexes
        for current_sampled_node_size_budget in sampling_node_size_budgets[::-1]:
            _sampling_result: _typing.Tuple[
                torch.Tensor, torch.Tensor, torch.LongTensor, torch.LongTensor
            ] = self.__sample_layer(upper_layer_sampled_node_indexes, current_sampled_node_size_budget)
            current_layer_edge_index: torch.Tensor = _sampling_result[0]
            current_layer_edge_weight: torch.Tensor = _sampling_result[1]
            layers.append((current_layer_edge_index, current_layer_edge_weight))

            upper_layer_sampled_node_indexes: torch.LongTensor = _sampling_result[2]

        return layers[::-1]
--- a/configs/nodeclf_ladies_gcn.yml
+++ b/configs/nodeclf_ladies_gcn.yml
@@ -0,0 +1,65 @@
 ensemble:
  name: null
 feature:
 - name: PYGNormalizeFeatures
 hpo:
  max_evals: 10
  name: random
 models:
 - hp_space:
  - feasiblePoints:
    - 0
    parameterName: add_self_loops,
    type: CATEGORICAL,
  - feasiblePoints: 5,5
    parameterName: num_layers
    type: DISCRETE
  - cutFunc: lambda x:x[0] - 1
    cutPara:
    - num_layers
    length: 4
    maxValue: 256
    minValue: 64
    numericalType: INTEGER
    parameterName: hidden
    scalingType: LOG
    type: NUMERICAL_LIST
  - maxValue: 0.8
    minValue: 0.2
    parameterName: dropout
    scalingType: LINEAR
    type: DOUBLE
  - feasiblePoints:
    - leaky_relu
    - relu
    - elu
    - tanh
    parameterName: act
    type: CATEGORICAL
  name: gcn
 trainer:
  name: NodeClassificationLayerDependentImportanceSamplingTrainer
  hp_space:
  - feasiblePoints: 128,256,512
    parameterName: sampled_node_size_budget
    type: DISCRETE
  - maxValue: 300
    minValue: 100
    parameterName: max_epoch
    scalingType: LINEAR
    type: INTEGER
  - maxValue: 30
    minValue: 10
    parameterName: early_stopping_round
    scalingType: LINEAR
    type: INTEGER
  - maxValue: 0.05
    minValue: 0.01
    parameterName: lr
    scalingType: LOG
    type: DOUBLE
  - maxValue: 0.0005
    minValue: 0.0001
    parameterName: weight_decay
    scalingType: LOG
    type: DOUBLE
--- a/configs/nodeclf_sage_benchmark_large.yml
+++ b/configs/nodeclf_sage_benchmark_large.yml
@@ -29,10 +29,10 @@ models:
    parameterName: dropout
    scalingType: LINEAR
    type: DOUBLE
  - feasiblePoints":
  - feasiblePoints:
    - mean
    parameterName: aggr,
    type: CATEGORICAL,
    parameterName: aggr
    type: CATEGORICAL
  - feasiblePoints:
    - leaky_relu
    - relu
--- a/configs/nodeclf_sage_benchmark_small.yml
+++ b/configs/nodeclf_sage_benchmark_small.yml
@@ -29,12 +29,12 @@ models:
    parameterName: dropout
    scalingType: LINEAR
    type: DOUBLE
  - feasiblePoints":
  - feasiblePoints:
    - mean
    - add
    - max
    parameterName: agg,
    type: CATEGORICAL,
    parameterName: aggr
    type: CATEGORICAL
  - feasiblePoints:
    - leaky_relu
    - relu