node

4 years ago · 4dfdea77b6
--- a/autogl/module/model/dgl/init.py
+++ b/autogl/module/model/dgl/init.py
@@ -9,6 +9,9 @@ from .gcn import AutoGCN
 from .gat import AutoGAT
 from .gin import AutoGIN
 from .gin_dgl import GIN
 from .gcn_dgl import GCN
 from .graphsage_dgl import GraphSAGE
 from .gat_dgl import GAT

 __all__ = [
    "ModelUniversalRegistry",
@@ -21,4 +24,7 @@ __all__ = [
    "AutoGAT",
    "AutoGIN",
    "GIN",
    "GCN",
    "GraphSAGE",
    "GAT"
 ]
--- a/autogl/module/model/dgl/gat_dgl.py
+++ b/autogl/module/model/dgl/gat_dgl.py
@@ -0,0 +1,216 @@
 import torch
 import torch.nn.functional as F
 from dgl.nn.pytorch.conv import GATConv
 from . import register_model
 from .base import BaseModel, activate_func
 from ....utils import get_logger

 LOGGER = get_logger("GATModel")


 def set_default(args, d):
    for k, v in d.items():
        if k not in args:
            args[k] = v
    return args


 class GAT(torch.nn.Module):
    def __init__(self, args):
        super(GAT, self).__init__()
        self.args = args
        self.num_layer = int(self.args["num_layers"])

        missing_keys = list(
            set(
                [
                    "features_num",
                    "num_class",
                    "num_layers",
                    "hidden",
                    "heads",
                    "dropout",
                    "act",
                ]
            )
            - set(self.args.keys())
        )
        if len(missing_keys) > 0:
            raise Exception("Missing keys: %s." % ",".join(missing_keys))

        if not self.num_layer == len(self.args["hidden"]) + 1:
            LOGGER.warn("Warning: layer size does not match the length of hidden units")
        self.convs = torch.nn.ModuleList()
        self.convs.append(
            GATConv(
                self.args["features_num"],
                self.args["hidden"][0],
                num_heads =self.args["heads"],
                attn_drop=self.args["dropout"],
            )
        )
        last_dim = self.args["hidden"][0] * self.args["heads"]
        for i in range(self.num_layer - 2):
            self.convs.append(
                GATConv(
                    last_dim,
                    self.args["hidden"][i + 1],
                    num_heads=self.args["heads"],
                    attn_drop=self.args["dropout"],
                )
            )
            last_dim = self.args["hidden"][i + 1] * self.args["heads"]
        self.convs.append(
            GATConv(
                last_dim,
                self.args["num_class"],
                num_heads=1,
                attn_drop=self.args["dropout"],
            )
        )

    def forward(self, data):
        try:
            x = data.ndata['x']
        except:
            print("no x")
            pass
        
        for i in range(self.num_layer):
            x = F.dropout(x, p=self.args["dropout"], training=self.training)
            x = self.convs[i](data, x)
            # concat
            x = x.view(-1, self.heads * self.out_channels)
            if i != self.num_layer - 1:
                x = activate_func(x, self.args["act"])

        return F.log_softmax(x, dim=1)

    def lp_encode(self, data):
        x = data.ndata['x']
        for i in range(self.num_layer - 1):
            x = self.convs[i](x, data.train_pos_edge_index)
            # concat
            x = x.view(-1, self.heads * self.out_channels)
            if i != self.num_layer - 2:
                x = activate_func(x, self.args["act"])
                # x = F.dropout(x, p=self.args["dropout"], training=self.training)
        return x

    def lp_decode(self, z, pos_edge_index, neg_edge_index):
        edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
        logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
        return logits

    def lp_decode_all(self, z):
        prob_adj = z @ z.t()
        return (prob_adj > 0).nonzero(as_tuple=False).t()


@register_model("gat")
 class AutoGAT(BaseModel):
    r"""
    AutoGAT. The model used in this automodel is GAT, i.e., the graph attentional network from the `"Graph Attention Networks"
    <https://arxiv.org/abs/1710.10903>`_ paper. The layer is

    .. math::
        \mathbf{x}^{\prime}_i = \alpha_{i,i}\mathbf{\Theta}\mathbf{x}_{i} +
        \sum_{j \in \mathcal{N}(i)} \alpha_{i,j}\mathbf{\Theta}\mathbf{x}_{j}

    where the attention coefficients :math:`\alpha_{i,j}` are computed as

    .. math::
        \alpha_{i,j} =
        \frac{
        \exp\left(\mathrm{LeakyReLU}\left(\mathbf{a}^{\top}
        [\mathbf{\Theta}\mathbf{x}_i \, \Vert \, \mathbf{\Theta}\mathbf{x}_j]
        \right)\right)}
        {\sum_{k \in \mathcal{N}(i) \cup \{ i \}}
        \exp\left(\mathrm{LeakyReLU}\left(\mathbf{a}^{\top}
        [\mathbf{\Theta}\mathbf{x}_i \, \Vert \, \mathbf{\Theta}\mathbf{x}_k]
        \right)\right)}.

    Parameters
    ----------
    num_features: `int`.
        The dimension of features.

    num_classes: `int`.
        The number of classes.

    device: `torch.device` or `str`
        The device where model will be running on.

    init: `bool`.
        If True(False), the model will (not) be initialized.

    args: Other parameters.
    """

    def __init__(
        self, num_features=None, num_classes=None, device=None, init=False, **args
    ):
        super(AutoGAT, self).__init__()
        self.num_features = num_features if num_features is not None else 0
        self.num_classes = int(num_classes) if num_classes is not None else 0
        self.device = device if device is not None else "cpu"
        self.init = True

        self.params = {
            "features_num": self.num_features,
            "num_class": self.num_classes,
        }
        self.space = [
            {
                "parameterName": "num_layers",
                "type": "DISCRETE",
                "feasiblePoints": "2,3,4",
            },
            {
                "parameterName": "hidden",
                "type": "NUMERICAL_LIST",
                "numericalType": "INTEGER",
                "length": 3,
                "minValue": [8, 8, 8],
                "maxValue": [64, 64, 64],
                "scalingType": "LOG",
                "cutPara": ("num_layers",),
                "cutFunc": lambda x: x[0] - 1,
            },
            {
                "parameterName": "dropout",
                "type": "DOUBLE",
                "maxValue": 0.8,
                "minValue": 0.2,
                "scalingType": "LINEAR",
            },
            {
                "parameterName": "heads",
                "type": "DISCRETE",
                "feasiblePoints": "2,4,8,16",
            },
            {
                "parameterName": "act",
                "type": "CATEGORICAL",
                "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
            },
        ]

        self.hyperparams = {
            "num_layers": 2,
            "hidden": [32],
            "heads": 4,
            "dropout": 0.2,
            "act": "leaky_relu",
        }

        self.initialized = False
        if init is True:
            self.initialize()

    def initialize(self):
        # """Initialize model."""
        if self.initialized:
            return
        self.initialized = True
        self.model = GAT({**self.params, **self.hyperparams}).to(self.device)
--- a/autogl/module/model/dgl/gcn_dgl.py
+++ b/autogl/module/model/dgl/gcn_dgl.py
@@ -3,26 +3,15 @@ import torch.nn.functional
 import typing as _typing

 from dgl.nn.pytorch.conv import GraphConv
 from dgl import remove_self_loop, add_self_loop
 import autogl.data
 from . import register_model
 from .base import BaseModel, activate_func, ClassificationSupportedSequentialModel
 from ....utils import get_logger

 LOGGER = get_logger("GCNModel")

 def add_self_loop(edge_index,num_nodes,edge_weight=None, fill_value=1.):
    N = num_nodes
    loop_index = torch.arange(0, N, dtype=torch.long, device=edge_index.device)
    loop_index = loop_index.unsqueeze(0).repeat(2, 1)

    if edge_weight is not None:
        assert edge_weight.numel() == edge_index.size(1)
        loop_weight = edge_weight.new_full((N, ), fill_value)
        edge_weight = torch.cat([edge_weight, loop_weight], dim=0)

    edge_index = torch.cat([edge_index, loop_index], dim=1)
 LOGGER = get_logger("GCNModel")

    return edge_index, edge_weight

 class GCN(ClassificationSupportedSequentialModel):
    class _GCNLayer(torch.nn.Module):
@@ -68,24 +57,12 @@ class GCN(ClassificationSupportedSequentialModel):
        def forward(self, data, enable_activation: bool = True) -> torch.Tensor:
            
            x: torch.Tensor = data.ndata['feat']
            edge_index: torch.LongTensor = data.edges
            
            if self.add_self_loops:
                edge_index, edge_weight = add_self_loop(edge_index, x.size(0), edge_weight)
                data = remove_self_loop(data)
                data = add_self_loop(data)

            
            # edge_weight: _typing.Optional[torch.Tensor] = getattr(
            #     data, "edge_weight", None
            # )
            # """ Validate the arguments """
            # if not type(x) == type(edge_index) == torch.Tensor:
            #     raise TypeError
            # if edge_weight is not None and (
            #     type(edge_weight) != torch.Tensor
            #     or edge_index.size() != (2, edge_weight.size(0))
            # ):
            #     edge_weight: _typing.Optional[torch.Tensor] = None
            

            x: torch.Tensor = self._convolution.forward(data, x)
            if self._activation_name is not None and enable_activation:
                x: torch.Tensor = activate_func(x, self._activation_name)
@@ -218,19 +195,21 @@ class GCN(ClassificationSupportedSequentialModel):
            and len(getattr(data, "edge_indexes"))
            == len(self.__sequential_encoding_layers)
        ):
            if not data.edata.has_key('edge_weights'):
                data.edata['edge_weights']=None
            return __compose_edge_index_and_weight(
                getattr(data, "edge_index"), getattr(data, "edge_weight", None)
                data.edges(), data.edata['edge_weights']
            )
        for __edge_index in getattr(data, "edge_indexes"):
            if type(__edge_index) != torch.Tensor or __edge_index.dtype != torch.int64:
                return __compose_edge_index_and_weight(
                    getattr(data, "edge_index"), getattr(data, "edge_weight", None)
                )
        # for __edge_index in getattr(data, "edge_indexes"):
        #     if type(__edge_index) != torch.Tensor or __edge_index.dtype != torch.int64:
        #         return __compose_edge_index_and_weight(
        #             data.edges(), getattr(data, "edge_weight", None)
        #         )

        if (
            hasattr(data, "edge_weights")
            and isinstance(getattr(data, "edge_weights"), _typing.Sequence)
            and len(getattr(data, "edge_weights"))
            data.edata.has_key('edge_weights')
            and isinstance(data.edata['edge_weights'], _typing.Sequence)
            and len(data.edata.has_key('edge_weights'))
            == len(self.__sequential_encoding_layers)
        ):
            return [
@@ -260,7 +239,7 @@ class GCN(ClassificationSupportedSequentialModel):
            assert len(edge_indexes_and_weights) == len(
                self.__sequential_encoding_layers
            )
            x: torch.Tensor = getattr(data, "x")
            x: torch.Tensor = data.ndata['x']
            for _edge_index_and_weight, gcn in zip(
                edge_indexes_and_weights, self.__sequential_encoding_layers
            ):
@@ -270,7 +249,7 @@ class GCN(ClassificationSupportedSequentialModel):
            return x
        else:
            """ edge_indexes_and_weights is (edge_index, edge_weight) """
            x = getattr(data, "x")
            x = data.ndata['x']
            for gcn in self.__sequential_encoding_layers:
                _temp_data = autogl.data.Data(
                    x=x, edge_index=edge_indexes_and_weights[0]
@@ -283,13 +262,13 @@ class GCN(ClassificationSupportedSequentialModel):
        return torch.nn.functional.log_softmax(x, dim=1)

    def lp_encode(self, data):
        x: torch.Tensor = getattr(data, "x")
        x: torch.Tensor = data.ndata['x']
        for i in range(len(self.__sequential_encoding_layers) - 2):
            x = self.__sequential_encoding_layers[i](
                autogl.data.Data(x, getattr(data, "edge_index"))
                autogl.data.Data(x, data.edges())
            )
        x = self.__sequential_encoding_layers[-2](
            autogl.data.Data(x, getattr(data, "edge_index")), enable_activation=False
            autogl.data.Data(x, data.edges()), enable_activation=False
        )
        return x

--- a/autogl/module/model/dgl/graphsage_dgl.py
+++ b/autogl/module/model/dgl/graphsage_dgl.py
@@ -0,0 +1,303 @@
 import torch
 import typing as _typing

 from dgl.nn.pytorch.conv import SAGEConv
 import torch.nn.functional
 import autogl.data
 from . import register_model
 from .base import BaseModel, activate_func, ClassificationSupportedSequentialModel
 from ....utils import get_logger

 LOGGER = get_logger("SAGEModel")


 class GraphSAGE(ClassificationSupportedSequentialModel):
    class _SAGELayer(torch.nn.Module):
        def __init__(
            self,
            input_channels: int,
            output_channels: int,
            aggr: str,
            activation_name: _typing.Optional[str] = ...,
            dropout_probability: _typing.Optional[float] = ...,
        ):
            super().__init__()
            self._convolution: SAGEConv = SAGEConv(
                input_channels, output_channels, aggregator_type=aggr
            )
            if (
                activation_name is not Ellipsis
                and activation_name is not None
                and type(activation_name) == str
            ):
                self._activation_name: _typing.Optional[str] = activation_name
            else:
                self._activation_name: _typing.Optional[str] = None
            if (
                dropout_probability is not Ellipsis
                and dropout_probability is not None
                and type(dropout_probability) == float
            ):
                if dropout_probability < 0:
                    dropout_probability = 0
                if dropout_probability > 1:
                    dropout_probability = 1
                self._dropout: _typing.Optional[torch.nn.Dropout] = torch.nn.Dropout(
                    dropout_probability
                )
            else:
                self._dropout: _typing.Optional[torch.nn.Dropout] = None

        def forward(self, data, enable_activation: bool = True) -> torch.Tensor:
            x: torch.Tensor = data.ndata['x']
            
            x: torch.Tensor = self._convolution.forward(data, x)
            if self._activation_name is not None and enable_activation:
                x: torch.Tensor = activate_func(x, self._activation_name)
            if self._dropout is not None:
                x: torch.Tensor = self._dropout.forward(x)
            return x

    def __init__(
        self,
        num_features: int,
        num_classes: int,
        hidden_features: _typing.Sequence[int],
        activation_name: str,
        layers_dropout: _typing.Union[
            _typing.Optional[float], _typing.Sequence[_typing.Optional[float]]
        ] = None,
        aggr: str = "mean",
    ):
        super().__init__()
        if not type(num_features) == type(num_classes) == int:
            raise TypeError
        if not isinstance(hidden_features, _typing.Sequence):
            raise TypeError
        for hidden_feature in hidden_features:
            if type(hidden_feature) != int:
                raise TypeError
            elif hidden_feature <= 0:
                raise ValueError
        if isinstance(layers_dropout, _typing.Sequence):
            if len(layers_dropout) != (len(hidden_features) + 1):
                raise TypeError
            for d in layers_dropout:
                if d is not None and type(d) != float:
                    raise TypeError
            _layers_dropout: _typing.Sequence[_typing.Optional[float]] = layers_dropout
        elif layers_dropout is None or type(layers_dropout) == float:
            _layers_dropout: _typing.Sequence[_typing.Optional[float]] = [
                layers_dropout for _ in range(len(hidden_features))
            ] + [None]
        else:
            raise TypeError
        if not type(activation_name) == type(aggr) == str:
            raise TypeError
        if aggr not in ("add", "max", "mean"):
            aggr = "mean"

        if len(hidden_features) == 0:
            self.__sequential_encoding_layers: torch.nn.ModuleList = (
                torch.nn.ModuleList(
                    [
                        self._SAGELayer(
                            num_features,
                            num_classes,
                            aggr,
                            activation_name,
                            _layers_dropout[0],
                        )
                    ]
                )
            )
        else:
            self.__sequential_encoding_layers: torch.nn.ModuleList = (
                torch.nn.ModuleList(
                    [
                        self._SAGELayer(
                            num_features,
                            hidden_features[0],
                            aggr,
                            activation_name,
                            _layers_dropout[0],
                        )
                    ]
                )
            )
            for i in range(len(hidden_features)):
                if i + 1 < len(hidden_features):
                    self.__sequential_encoding_layers.append(
                        self._SAGELayer(
                            hidden_features[i],
                            hidden_features[i + 1],
                            aggr,
                            activation_name,
                            _layers_dropout[i + 1],
                        )
                    )
                else:
                    self.__sequential_encoding_layers.append(
                        self._SAGELayer(
                            hidden_features[i],
                            num_classes,
                            aggr,
                            _layers_dropout[i + 1],
                        )
                    )

    @property
    def sequential_encoding_layers(self) -> torch.nn.ModuleList:
        return self.__sequential_encoding_layers

    def cls_encode(self, data) -> torch.Tensor:
        # if (
        #     hasattr(data, "edge_indexes")
        #     and isinstance(getattr(data, "edge_indexes"), _typing.Sequence)
        #     and len(getattr(data, "edge_indexes"))
        #     == len(self.__sequential_encoding_layers)
        # ):
        #     for __edge_index in getattr(data, "edge_indexes"):
        #         if type(__edge_index) != torch.Tensor:
        #             raise TypeError
        #     """ Layer-wise encode """
        #     x: torch.Tensor = getattr(data, "x")
        #     for i, __edge_index in enumerate(getattr(data, "edge_indexes")):
        #         x: torch.Tensor = self.__sequential_encoding_layers[i](
        #             autogl.data.Data(x=x, edge_index=__edge_index)
        #         )
        #     return x
        # else:
        x: torch.Tensor = data.ndata['x']
        for i in range(len(self.__sequential_encoding_layers)):
            x = self.__sequential_encoding_layers[i](
                autogl.data.Data(x, data.edges())
            )
        return x

    def cls_decode(self, x: torch.Tensor) -> torch.Tensor:
        return torch.nn.functional.log_softmax(x, dim=1)

    def lp_encode(self, data):
        x: torch.Tensor = data.ndata['x']
        for i in range(len(self.__sequential_encoding_layers) - 2):
            x = self.__sequential_encoding_layers[i](
                autogl.data.Data(x, data.edges())
            )
        x = self.__sequential_encoding_layers[-2](
            autogl.data.Data(x, data.edges()), enable_activation=False
        )
        return x

    def lp_decode(self, z, pos_edge_index, neg_edge_index):
        edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
        logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
        return logits

    def lp_decode_all(self, z):
        prob_adj = z @ z.t()
        return (prob_adj > 0).nonzero(as_tuple=False).t()


@register_model("sage")
 class AutoSAGE(BaseModel):
    r"""
    AutoSAGE. The model used in this automodel is GraphSAGE, i.e., the GraphSAGE from the `"Inductive Representation Learning on
    Large Graphs" <https://arxiv.org/abs/1706.02216>`_ paper. The layer is

    .. math::

        \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + \mathbf{W_2} \cdot
        \mathrm{mean}_{j \in \mathcal{N(i)}} \mathbf{x}_j

    Parameters
    ----------
    num_features: `int`.
        The dimension of features.

    num_classes: `int`.
        The number of classes.

    device: `torch.device` or `str`
        The device where model will be running on.

    init: `bool`.
        If True(False), the model will (not) be initialized.

    """

    def __init__(
        self, num_features=None, num_classes=None, device=None, init=False, **args
    ):

        super(AutoSAGE, self).__init__()

        self.num_features = num_features if num_features is not None else 0
        self.num_classes = int(num_classes) if num_classes is not None else 0
        self.device = device if device is not None else "cpu"
        self.init = True

        self.params = {
            "features_num": self.num_features,
            "num_class": self.num_classes,
        }
        self.space = [
            {
                "parameterName": "num_layers",
                "type": "DISCRETE",
                "feasiblePoints": "2,3,4",
            },
            {
                "parameterName": "hidden",
                "type": "NUMERICAL_LIST",
                "numericalType": "INTEGER",
                "length": 3,
                "minValue": [8, 8, 8],
                "maxValue": [128, 128, 128],
                "scalingType": "LOG",
                "cutPara": ("num_layers",),
                "cutFunc": lambda x: x[0] - 1,
            },
            {
                "parameterName": "dropout",
                "type": "DOUBLE",
                "maxValue": 0.8,
                "minValue": 0.2,
                "scalingType": "LINEAR",
            },
            {
                "parameterName": "act",
                "type": "CATEGORICAL",
                "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
            },
            {
                "parameterName": "agg",
                "type": "CATEGORICAL",
                "feasiblePoints": ["mean", "add", "max"],
            },
        ]

        self.hyperparams = {
            "num_layers": 3,
            "hidden": [64, 32],
            "dropout": 0.5,
            "act": "relu",
            "agg": "mean",
        }

        self.initialized = False
        if init is True:
            self.initialize()

    def initialize(self):
        if self.initialized:
            return
        self.initialized = True
        self.model = GraphSAGE(
            self.num_features,
            self.num_classes,
            self.hyperparams.get("hidden"),
            self.hyperparams.get("act", "relu"),
            self.hyperparams.get("dropout", None),
            self.hyperparams.get("agg", "mean"),
        ).to(self.device)
--- a/test/model_nlf/nclf_dgl.py
+++ b/test/model_nlf/nclf_dgl.py
@@ -7,7 +7,7 @@ from tqdm import tqdm
 sys.path.append("../../")
 print(os.getcwd())
 os.environ["AUTOGL_BACKEND"] = "dgl"
 #os.environ["AUTOGL_BACKEND"] = "pyg"
 # os.environ["AUTOGL_BACKEND"] = "pyg"
 from autogl.backend import DependentBackend
 import dgl
 from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset, GINDataset