Browse Source

node

tags/v0.3.1
Beini Frozenmad 4 years ago
parent
commit
4dfdea77b6
5 changed files with 547 additions and 43 deletions
  1. +6
    -0
      autogl/module/model/dgl/__init__.py
  2. +216
    -0
      autogl/module/model/dgl/gat_dgl.py
  3. +21
    -42
      autogl/module/model/dgl/gcn_dgl.py
  4. +303
    -0
      autogl/module/model/dgl/graphsage_dgl.py
  5. +1
    -1
      test/model_nlf/nclf_dgl.py

+ 6
- 0
autogl/module/model/dgl/__init__.py View File

@@ -9,6 +9,9 @@ from .gcn import AutoGCN
from .gat import AutoGAT
from .gin import AutoGIN
from .gin_dgl import GIN
from .gcn_dgl import GCN
from .graphsage_dgl import GraphSAGE
from .gat_dgl import GAT

__all__ = [
"ModelUniversalRegistry",
@@ -21,4 +24,7 @@ __all__ = [
"AutoGAT",
"AutoGIN",
"GIN",
"GCN",
"GraphSAGE",
"GAT"
]

+ 216
- 0
autogl/module/model/dgl/gat_dgl.py View File

@@ -0,0 +1,216 @@
import torch
import torch.nn.functional as F
from dgl.nn.pytorch.conv import GATConv
from . import register_model
from .base import BaseModel, activate_func
from ....utils import get_logger

LOGGER = get_logger("GATModel")


def set_default(args, d):
for k, v in d.items():
if k not in args:
args[k] = v
return args


class GAT(torch.nn.Module):
def __init__(self, args):
super(GAT, self).__init__()
self.args = args
self.num_layer = int(self.args["num_layers"])

missing_keys = list(
set(
[
"features_num",
"num_class",
"num_layers",
"hidden",
"heads",
"dropout",
"act",
]
)
- set(self.args.keys())
)
if len(missing_keys) > 0:
raise Exception("Missing keys: %s." % ",".join(missing_keys))

if not self.num_layer == len(self.args["hidden"]) + 1:
LOGGER.warn("Warning: layer size does not match the length of hidden units")
self.convs = torch.nn.ModuleList()
self.convs.append(
GATConv(
self.args["features_num"],
self.args["hidden"][0],
num_heads =self.args["heads"],
attn_drop=self.args["dropout"],
)
)
last_dim = self.args["hidden"][0] * self.args["heads"]
for i in range(self.num_layer - 2):
self.convs.append(
GATConv(
last_dim,
self.args["hidden"][i + 1],
num_heads=self.args["heads"],
attn_drop=self.args["dropout"],
)
)
last_dim = self.args["hidden"][i + 1] * self.args["heads"]
self.convs.append(
GATConv(
last_dim,
self.args["num_class"],
num_heads=1,
attn_drop=self.args["dropout"],
)
)

def forward(self, data):
try:
x = data.ndata['x']
except:
print("no x")
pass
for i in range(self.num_layer):
x = F.dropout(x, p=self.args["dropout"], training=self.training)
x = self.convs[i](data, x)
# concat
x = x.view(-1, self.heads * self.out_channels)
if i != self.num_layer - 1:
x = activate_func(x, self.args["act"])

return F.log_softmax(x, dim=1)

def lp_encode(self, data):
x = data.ndata['x']
for i in range(self.num_layer - 1):
x = self.convs[i](x, data.train_pos_edge_index)
# concat
x = x.view(-1, self.heads * self.out_channels)
if i != self.num_layer - 2:
x = activate_func(x, self.args["act"])
# x = F.dropout(x, p=self.args["dropout"], training=self.training)
return x

def lp_decode(self, z, pos_edge_index, neg_edge_index):
edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
return logits

def lp_decode_all(self, z):
prob_adj = z @ z.t()
return (prob_adj > 0).nonzero(as_tuple=False).t()


@register_model("gat")
class AutoGAT(BaseModel):
r"""
AutoGAT. The model used in this automodel is GAT, i.e., the graph attentional network from the `"Graph Attention Networks"
<https://arxiv.org/abs/1710.10903>`_ paper. The layer is

.. math::
\mathbf{x}^{\prime}_i = \alpha_{i,i}\mathbf{\Theta}\mathbf{x}_{i} +
\sum_{j \in \mathcal{N}(i)} \alpha_{i,j}\mathbf{\Theta}\mathbf{x}_{j}

where the attention coefficients :math:`\alpha_{i,j}` are computed as

.. math::
\alpha_{i,j} =
\frac{
\exp\left(\mathrm{LeakyReLU}\left(\mathbf{a}^{\top}
[\mathbf{\Theta}\mathbf{x}_i \, \Vert \, \mathbf{\Theta}\mathbf{x}_j]
\right)\right)}
{\sum_{k \in \mathcal{N}(i) \cup \{ i \}}
\exp\left(\mathrm{LeakyReLU}\left(\mathbf{a}^{\top}
[\mathbf{\Theta}\mathbf{x}_i \, \Vert \, \mathbf{\Theta}\mathbf{x}_k]
\right)\right)}.

Parameters
----------
num_features: `int`.
The dimension of features.

num_classes: `int`.
The number of classes.

device: `torch.device` or `str`
The device where model will be running on.

init: `bool`.
If True(False), the model will (not) be initialized.

args: Other parameters.
"""

def __init__(
self, num_features=None, num_classes=None, device=None, init=False, **args
):
super(AutoGAT, self).__init__()
self.num_features = num_features if num_features is not None else 0
self.num_classes = int(num_classes) if num_classes is not None else 0
self.device = device if device is not None else "cpu"
self.init = True

self.params = {
"features_num": self.num_features,
"num_class": self.num_classes,
}
self.space = [
{
"parameterName": "num_layers",
"type": "DISCRETE",
"feasiblePoints": "2,3,4",
},
{
"parameterName": "hidden",
"type": "NUMERICAL_LIST",
"numericalType": "INTEGER",
"length": 3,
"minValue": [8, 8, 8],
"maxValue": [64, 64, 64],
"scalingType": "LOG",
"cutPara": ("num_layers",),
"cutFunc": lambda x: x[0] - 1,
},
{
"parameterName": "dropout",
"type": "DOUBLE",
"maxValue": 0.8,
"minValue": 0.2,
"scalingType": "LINEAR",
},
{
"parameterName": "heads",
"type": "DISCRETE",
"feasiblePoints": "2,4,8,16",
},
{
"parameterName": "act",
"type": "CATEGORICAL",
"feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
},
]

self.hyperparams = {
"num_layers": 2,
"hidden": [32],
"heads": 4,
"dropout": 0.2,
"act": "leaky_relu",
}

self.initialized = False
if init is True:
self.initialize()

def initialize(self):
# """Initialize model."""
if self.initialized:
return
self.initialized = True
self.model = GAT({**self.params, **self.hyperparams}).to(self.device)

+ 21
- 42
autogl/module/model/dgl/gcn_dgl.py View File

@@ -3,26 +3,15 @@ import torch.nn.functional
import typing as _typing

from dgl.nn.pytorch.conv import GraphConv
from dgl import remove_self_loop, add_self_loop
import autogl.data
from . import register_model
from .base import BaseModel, activate_func, ClassificationSupportedSequentialModel
from ....utils import get_logger

LOGGER = get_logger("GCNModel")

def add_self_loop(edge_index,num_nodes,edge_weight=None, fill_value=1.):
N = num_nodes
loop_index = torch.arange(0, N, dtype=torch.long, device=edge_index.device)
loop_index = loop_index.unsqueeze(0).repeat(2, 1)

if edge_weight is not None:
assert edge_weight.numel() == edge_index.size(1)
loop_weight = edge_weight.new_full((N, ), fill_value)
edge_weight = torch.cat([edge_weight, loop_weight], dim=0)

edge_index = torch.cat([edge_index, loop_index], dim=1)
LOGGER = get_logger("GCNModel")

return edge_index, edge_weight

class GCN(ClassificationSupportedSequentialModel):
class _GCNLayer(torch.nn.Module):
@@ -68,24 +57,12 @@ class GCN(ClassificationSupportedSequentialModel):
def forward(self, data, enable_activation: bool = True) -> torch.Tensor:
x: torch.Tensor = data.ndata['feat']
edge_index: torch.LongTensor = data.edges
if self.add_self_loops:
edge_index, edge_weight = add_self_loop(edge_index, x.size(0), edge_weight)
data = remove_self_loop(data)
data = add_self_loop(data)

# edge_weight: _typing.Optional[torch.Tensor] = getattr(
# data, "edge_weight", None
# )
# """ Validate the arguments """
# if not type(x) == type(edge_index) == torch.Tensor:
# raise TypeError
# if edge_weight is not None and (
# type(edge_weight) != torch.Tensor
# or edge_index.size() != (2, edge_weight.size(0))
# ):
# edge_weight: _typing.Optional[torch.Tensor] = None

x: torch.Tensor = self._convolution.forward(data, x)
if self._activation_name is not None and enable_activation:
x: torch.Tensor = activate_func(x, self._activation_name)
@@ -218,19 +195,21 @@ class GCN(ClassificationSupportedSequentialModel):
and len(getattr(data, "edge_indexes"))
== len(self.__sequential_encoding_layers)
):
if not data.edata.has_key('edge_weights'):
data.edata['edge_weights']=None
return __compose_edge_index_and_weight(
getattr(data, "edge_index"), getattr(data, "edge_weight", None)
data.edges(), data.edata['edge_weights']
)
for __edge_index in getattr(data, "edge_indexes"):
if type(__edge_index) != torch.Tensor or __edge_index.dtype != torch.int64:
return __compose_edge_index_and_weight(
getattr(data, "edge_index"), getattr(data, "edge_weight", None)
)
# for __edge_index in getattr(data, "edge_indexes"):
# if type(__edge_index) != torch.Tensor or __edge_index.dtype != torch.int64:
# return __compose_edge_index_and_weight(
# data.edges(), getattr(data, "edge_weight", None)
# )

if (
hasattr(data, "edge_weights")
and isinstance(getattr(data, "edge_weights"), _typing.Sequence)
and len(getattr(data, "edge_weights"))
data.edata.has_key('edge_weights')
and isinstance(data.edata['edge_weights'], _typing.Sequence)
and len(data.edata.has_key('edge_weights'))
== len(self.__sequential_encoding_layers)
):
return [
@@ -260,7 +239,7 @@ class GCN(ClassificationSupportedSequentialModel):
assert len(edge_indexes_and_weights) == len(
self.__sequential_encoding_layers
)
x: torch.Tensor = getattr(data, "x")
x: torch.Tensor = data.ndata['x']
for _edge_index_and_weight, gcn in zip(
edge_indexes_and_weights, self.__sequential_encoding_layers
):
@@ -270,7 +249,7 @@ class GCN(ClassificationSupportedSequentialModel):
return x
else:
""" edge_indexes_and_weights is (edge_index, edge_weight) """
x = getattr(data, "x")
x = data.ndata['x']
for gcn in self.__sequential_encoding_layers:
_temp_data = autogl.data.Data(
x=x, edge_index=edge_indexes_and_weights[0]
@@ -283,13 +262,13 @@ class GCN(ClassificationSupportedSequentialModel):
return torch.nn.functional.log_softmax(x, dim=1)

def lp_encode(self, data):
x: torch.Tensor = getattr(data, "x")
x: torch.Tensor = data.ndata['x']
for i in range(len(self.__sequential_encoding_layers) - 2):
x = self.__sequential_encoding_layers[i](
autogl.data.Data(x, getattr(data, "edge_index"))
autogl.data.Data(x, data.edges())
)
x = self.__sequential_encoding_layers[-2](
autogl.data.Data(x, getattr(data, "edge_index")), enable_activation=False
autogl.data.Data(x, data.edges()), enable_activation=False
)
return x



+ 303
- 0
autogl/module/model/dgl/graphsage_dgl.py View File

@@ -0,0 +1,303 @@
import torch
import typing as _typing

from dgl.nn.pytorch.conv import SAGEConv
import torch.nn.functional
import autogl.data
from . import register_model
from .base import BaseModel, activate_func, ClassificationSupportedSequentialModel
from ....utils import get_logger

LOGGER = get_logger("SAGEModel")


class GraphSAGE(ClassificationSupportedSequentialModel):
class _SAGELayer(torch.nn.Module):
def __init__(
self,
input_channels: int,
output_channels: int,
aggr: str,
activation_name: _typing.Optional[str] = ...,
dropout_probability: _typing.Optional[float] = ...,
):
super().__init__()
self._convolution: SAGEConv = SAGEConv(
input_channels, output_channels, aggregator_type=aggr
)
if (
activation_name is not Ellipsis
and activation_name is not None
and type(activation_name) == str
):
self._activation_name: _typing.Optional[str] = activation_name
else:
self._activation_name: _typing.Optional[str] = None
if (
dropout_probability is not Ellipsis
and dropout_probability is not None
and type(dropout_probability) == float
):
if dropout_probability < 0:
dropout_probability = 0
if dropout_probability > 1:
dropout_probability = 1
self._dropout: _typing.Optional[torch.nn.Dropout] = torch.nn.Dropout(
dropout_probability
)
else:
self._dropout: _typing.Optional[torch.nn.Dropout] = None

def forward(self, data, enable_activation: bool = True) -> torch.Tensor:
x: torch.Tensor = data.ndata['x']
x: torch.Tensor = self._convolution.forward(data, x)
if self._activation_name is not None and enable_activation:
x: torch.Tensor = activate_func(x, self._activation_name)
if self._dropout is not None:
x: torch.Tensor = self._dropout.forward(x)
return x

def __init__(
self,
num_features: int,
num_classes: int,
hidden_features: _typing.Sequence[int],
activation_name: str,
layers_dropout: _typing.Union[
_typing.Optional[float], _typing.Sequence[_typing.Optional[float]]
] = None,
aggr: str = "mean",
):
super().__init__()
if not type(num_features) == type(num_classes) == int:
raise TypeError
if not isinstance(hidden_features, _typing.Sequence):
raise TypeError
for hidden_feature in hidden_features:
if type(hidden_feature) != int:
raise TypeError
elif hidden_feature <= 0:
raise ValueError
if isinstance(layers_dropout, _typing.Sequence):
if len(layers_dropout) != (len(hidden_features) + 1):
raise TypeError
for d in layers_dropout:
if d is not None and type(d) != float:
raise TypeError
_layers_dropout: _typing.Sequence[_typing.Optional[float]] = layers_dropout
elif layers_dropout is None or type(layers_dropout) == float:
_layers_dropout: _typing.Sequence[_typing.Optional[float]] = [
layers_dropout for _ in range(len(hidden_features))
] + [None]
else:
raise TypeError
if not type(activation_name) == type(aggr) == str:
raise TypeError
if aggr not in ("add", "max", "mean"):
aggr = "mean"

if len(hidden_features) == 0:
self.__sequential_encoding_layers: torch.nn.ModuleList = (
torch.nn.ModuleList(
[
self._SAGELayer(
num_features,
num_classes,
aggr,
activation_name,
_layers_dropout[0],
)
]
)
)
else:
self.__sequential_encoding_layers: torch.nn.ModuleList = (
torch.nn.ModuleList(
[
self._SAGELayer(
num_features,
hidden_features[0],
aggr,
activation_name,
_layers_dropout[0],
)
]
)
)
for i in range(len(hidden_features)):
if i + 1 < len(hidden_features):
self.__sequential_encoding_layers.append(
self._SAGELayer(
hidden_features[i],
hidden_features[i + 1],
aggr,
activation_name,
_layers_dropout[i + 1],
)
)
else:
self.__sequential_encoding_layers.append(
self._SAGELayer(
hidden_features[i],
num_classes,
aggr,
_layers_dropout[i + 1],
)
)

@property
def sequential_encoding_layers(self) -> torch.nn.ModuleList:
return self.__sequential_encoding_layers

def cls_encode(self, data) -> torch.Tensor:
# if (
# hasattr(data, "edge_indexes")
# and isinstance(getattr(data, "edge_indexes"), _typing.Sequence)
# and len(getattr(data, "edge_indexes"))
# == len(self.__sequential_encoding_layers)
# ):
# for __edge_index in getattr(data, "edge_indexes"):
# if type(__edge_index) != torch.Tensor:
# raise TypeError
# """ Layer-wise encode """
# x: torch.Tensor = getattr(data, "x")
# for i, __edge_index in enumerate(getattr(data, "edge_indexes")):
# x: torch.Tensor = self.__sequential_encoding_layers[i](
# autogl.data.Data(x=x, edge_index=__edge_index)
# )
# return x
# else:
x: torch.Tensor = data.ndata['x']
for i in range(len(self.__sequential_encoding_layers)):
x = self.__sequential_encoding_layers[i](
autogl.data.Data(x, data.edges())
)
return x

def cls_decode(self, x: torch.Tensor) -> torch.Tensor:
return torch.nn.functional.log_softmax(x, dim=1)

def lp_encode(self, data):
x: torch.Tensor = data.ndata['x']
for i in range(len(self.__sequential_encoding_layers) - 2):
x = self.__sequential_encoding_layers[i](
autogl.data.Data(x, data.edges())
)
x = self.__sequential_encoding_layers[-2](
autogl.data.Data(x, data.edges()), enable_activation=False
)
return x

def lp_decode(self, z, pos_edge_index, neg_edge_index):
edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
return logits

def lp_decode_all(self, z):
prob_adj = z @ z.t()
return (prob_adj > 0).nonzero(as_tuple=False).t()


@register_model("sage")
class AutoSAGE(BaseModel):
r"""
AutoSAGE. The model used in this automodel is GraphSAGE, i.e., the GraphSAGE from the `"Inductive Representation Learning on
Large Graphs" <https://arxiv.org/abs/1706.02216>`_ paper. The layer is

.. math::

\mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + \mathbf{W_2} \cdot
\mathrm{mean}_{j \in \mathcal{N(i)}} \mathbf{x}_j

Parameters
----------
num_features: `int`.
The dimension of features.

num_classes: `int`.
The number of classes.

device: `torch.device` or `str`
The device where model will be running on.

init: `bool`.
If True(False), the model will (not) be initialized.

"""

def __init__(
self, num_features=None, num_classes=None, device=None, init=False, **args
):

super(AutoSAGE, self).__init__()

self.num_features = num_features if num_features is not None else 0
self.num_classes = int(num_classes) if num_classes is not None else 0
self.device = device if device is not None else "cpu"
self.init = True

self.params = {
"features_num": self.num_features,
"num_class": self.num_classes,
}
self.space = [
{
"parameterName": "num_layers",
"type": "DISCRETE",
"feasiblePoints": "2,3,4",
},
{
"parameterName": "hidden",
"type": "NUMERICAL_LIST",
"numericalType": "INTEGER",
"length": 3,
"minValue": [8, 8, 8],
"maxValue": [128, 128, 128],
"scalingType": "LOG",
"cutPara": ("num_layers",),
"cutFunc": lambda x: x[0] - 1,
},
{
"parameterName": "dropout",
"type": "DOUBLE",
"maxValue": 0.8,
"minValue": 0.2,
"scalingType": "LINEAR",
},
{
"parameterName": "act",
"type": "CATEGORICAL",
"feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
},
{
"parameterName": "agg",
"type": "CATEGORICAL",
"feasiblePoints": ["mean", "add", "max"],
},
]

self.hyperparams = {
"num_layers": 3,
"hidden": [64, 32],
"dropout": 0.5,
"act": "relu",
"agg": "mean",
}

self.initialized = False
if init is True:
self.initialize()

def initialize(self):
if self.initialized:
return
self.initialized = True
self.model = GraphSAGE(
self.num_features,
self.num_classes,
self.hyperparams.get("hidden"),
self.hyperparams.get("act", "relu"),
self.hyperparams.get("dropout", None),
self.hyperparams.get("agg", "mean"),
).to(self.device)

+ 1
- 1
test/model_nlf/nclf_dgl.py View File

@@ -7,7 +7,7 @@ from tqdm import tqdm
sys.path.append("../../")
print(os.getcwd())
os.environ["AUTOGL_BACKEND"] = "dgl"
#os.environ["AUTOGL_BACKEND"] = "pyg"
# os.environ["AUTOGL_BACKEND"] = "pyg"
from autogl.backend import DependentBackend
import dgl
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset, GINDataset


Loading…
Cancel
Save