Browse Source

add glf example (dgl)

tags/v0.3.1
SwiftieH Frozenmad 4 years ago
parent
commit
526e1edddb
13 changed files with 2722 additions and 0 deletions
  1. +23
    -0
      autogl/module/model/dgl/__init__.py
  2. +28
    -0
      autogl/module/model/dgl/_model_registry.py
  3. +413
    -0
      autogl/module/model/dgl/base.py
  4. +85
    -0
      autogl/module/model/dgl/dataloader_gin.py
  5. +223
    -0
      autogl/module/model/dgl/gat.py
  6. +408
    -0
      autogl/module/model/dgl/gcn.py
  7. +232
    -0
      autogl/module/model/dgl/gin.py
  8. +171
    -0
      autogl/module/model/dgl/gin_dgl.py
  9. +81
    -0
      autogl/module/model/dgl/ginparser.py
  10. +407
    -0
      autogl/module/model/dgl/graph_saint.py
  11. +306
    -0
      autogl/module/model/dgl/graphsage.py
  12. +169
    -0
      autogl/module/model/dgl/topkpool.py
  13. +176
    -0
      test/model_glf/gclf_dgl.py

+ 23
- 0
autogl/module/model/dgl/__init__.py View File

@@ -1 +1,24 @@
from ._model_registry import MODEL_DICT, ModelUniversalRegistry, register_model
from .base import BaseModel
from .topkpool import AutoTopkpool

# from .graph_sage import AutoSAGE
from .graphsage import AutoSAGE
from .graph_saint import GraphSAINTAggregationModel
from .gcn import AutoGCN
from .gat import AutoGAT
from .gin import AutoGIN
from .gin_dgl import GIN

__all__ = [
"ModelUniversalRegistry",
"register_model",
"BaseModel",
"AutoTopkpool",
"AutoSAGE",
"GraphSAINTAggregationModel",
"AutoGCN",
"AutoGAT",
"AutoGIN",
"GIN",
]

+ 28
- 0
autogl/module/model/dgl/_model_registry.py View File

@@ -0,0 +1,28 @@
import typing as _typing
from .base import BaseModel

MODEL_DICT: _typing.Dict[str, _typing.Type[BaseModel]] = {}


def register_model(name):
def register_model_cls(cls):
if name in MODEL_DICT:
raise ValueError("Cannot register duplicate trainer ({})".format(name))
if not issubclass(cls, BaseModel):
raise ValueError(
"Trainer ({}: {}) must extend BaseModel".format(name, cls.__name__)
)
MODEL_DICT[name] = cls
return cls

return register_model_cls


class ModelUniversalRegistry:
@classmethod
def get_model(cls, name: str) -> _typing.Type[BaseModel]:
if type(name) != str:
raise TypeError
if name not in MODEL_DICT:
raise KeyError
return MODEL_DICT.get(name)

+ 413
- 0
autogl/module/model/dgl/base.py View File

@@ -0,0 +1,413 @@
"""
auto graph model
a list of models with their hyper parameters
NOTE: neural architecture search (NAS) maybe included here
"""
import copy
import logging
import typing as _typing
import torch
import torch.nn.functional as F
from copy import deepcopy

base_approach_logger: logging.Logger = logging.getLogger("BaseModel")


def activate_func(x, func):
if func == "tanh":
return torch.tanh(x)
elif hasattr(F, func):
return getattr(F, func)(x)
elif func == "":
pass
else:
raise TypeError("PyTorch does not support activation function {}".format(func))

return x


class BaseModel:
def __init__(self, init=False, *args, **kwargs):
super(BaseModel, self).__init__()

def get_hyper_parameter(self):
return deepcopy(self.hyperparams)

@property
def hyper_parameter_space(self):
return self.space

@hyper_parameter_space.setter
def hyper_parameter_space(self, space):
self.space = space

def initialize(self):
pass

def forward(self):
pass

def to(self, device):
if isinstance(device, (str, torch.device)):
self.device = device
if (
hasattr(self, "model")
and self.model is not None
and isinstance(self.model, torch.nn.Module)
):
self.model.to(self.device)
return self

def from_hyper_parameter(self, hp):
ret_self = self.__class__(
num_features=self.num_features,
num_classes=self.num_classes,
device=self.device,
init=False,
)
ret_self.hyperparams.update(hp)
ret_self.params.update(self.params)
ret_self.initialize()
return ret_self

def get_num_classes(self):
return self.num_classes

def set_num_classes(self, num_classes):
self.num_classes = num_classes
self.params["num_class"] = num_classes

def get_num_features(self):
return self.num_features

def set_num_features(self, num_features):
self.num_features = num_features
self.params["features_num"] = self.num_features

def set_num_graph_features(self, num_graph_features):
assert hasattr(
self, "num_graph_features"
), "Cannot set graph features for tasks other than graph classification"
self.num_graph_features = num_graph_features
self.params["num_graph_features"] = num_graph_features


class _BaseBaseModel:
# todo: after renaming the experimental base class _BaseModel to BaseModel,
# rename this class to _BaseModel
"""
The base class for class BaseModel,
designed to implement some basic functionality of BaseModel.
-- Designed by ZiXin Sun
"""

@classmethod
def __formulate_device(
cls, device: _typing.Union[str, torch.device] = ...
) -> torch.device:
if type(device) == torch.device or (
type(device) == str and device.strip().lower() != "auto"
):
return torch.device(device)
elif torch.cuda.is_available() and torch.cuda.device_count() > 0:
return torch.device("cuda")
else:
return torch.device("cpu")

@property
def device(self) -> torch.device:
return self.__device

@device.setter
def device(self, __device: _typing.Union[str, torch.device, None]):
self.__device: torch.device = self.__formulate_device(__device)

@property
def model(self) -> _typing.Optional[torch.nn.Module]:
if self._model is None:
base_approach_logger.debug(
"property of model NOT initialized before accessing"
)
return self._model

@model.setter
def model(self, _model: torch.nn.Module) -> None:
if not isinstance(_model, torch.nn.Module):
raise TypeError(
"the property of model MUST be an instance of " "torch.nn.Module"
)
self._model = _model

def _initialize(self):
raise NotImplementedError

def initialize(self) -> bool:
"""
Initialize the model in case that the model has NOT been initialized
:return: whether self._initialize() method called
"""
if not self.__is_initialized:
self._initialize()
self.__is_initialized = True
return True
return False

# def to(self, *args, **kwargs):
# """
# Due to the signature of to() method in class BaseApproach
# is inconsistent with the signature of the method
# in the base class torch.nn.Module,
# this intermediate overridden method is necessary to
# walk around (bypass) the inspection for
# signature of overriding method.
# :param args: positional arguments list
# :param kwargs: keyword arguments dict
# :return: self
# """
# return super(_BaseBaseModel, self).to(*args, **kwargs)

def forward(self, *args, **kwargs):
if self.model is not None and isinstance(self.model, torch.nn.Module):
return self.model(*args, **kwargs)
else:
raise NotImplementedError

def __init__(
self,
model: _typing.Optional[torch.nn.Module] = None,
initialize: bool = False,
device: _typing.Union[str, torch.device] = ...,
):
if type(initialize) != bool:
raise TypeError
super(_BaseBaseModel, self).__init__()
self.__device: torch.device = self.__formulate_device(device)
self._model: _typing.Optional[torch.nn.Module] = model
self.__is_initialized: bool = False
if initialize:
self.initialize()


class _BaseModel(_BaseBaseModel, BaseModel):
"""
The upcoming root base class for Model, i.e. BaseModel
-- Designed by ZiXin Sun
"""

# todo: Deprecate and remove the legacy class "BaseModel",
# then rename this class to "BaseModel",
# correspondingly, this class will no longer extend
# the legacy class "BaseModel" after the removal.
def _initialize(self):
raise NotImplementedError

def to(self, device: torch.device):
self.device = device
if self.model is not None and isinstance(self.model, torch.nn.Module):
self.model.to(self.device)
return super().to(device)

@property
def space(self) -> _typing.Sequence[_typing.Dict[str, _typing.Any]]:
# todo: deprecate and remove in future major version
return self.__hyper_parameter_space

@property
def hyper_parameter_space(self):
return self.__hyper_parameter_space

@hyper_parameter_space.setter
def hyper_parameter_space(
self, space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
):
self.__hyper_parameter_space = space

@property
def hyper_parameter(self) -> _typing.Dict[str, _typing.Any]:
return self.__hyper_parameter

@hyper_parameter.setter
def hyper_parameter(self, _hyper_parameter: _typing.Dict[str, _typing.Any]):
if not isinstance(_hyper_parameter, dict):
raise TypeError
self.__hyper_parameter = _hyper_parameter

def get_hyper_parameter(self) -> _typing.Dict[str, _typing.Any]:
"""
todo: consider deprecating this trivial getter method in the future
:return: copied hyper parameter
"""
return copy.deepcopy(self.__hyper_parameter)

def __init__(
self,
model: _typing.Optional[torch.nn.Module] = None,
initialize: bool = False,
hyper_parameter_space: _typing.Sequence[_typing.Any] = ...,
hyper_parameter: _typing.Dict[str, _typing.Any] = ...,
device: _typing.Union[str, torch.device] = ...,
):
if type(initialize) != bool:
raise TypeError
super(_BaseModel, self).__init__(model, initialize, device)
if hyper_parameter_space != Ellipsis and isinstance(
hyper_parameter_space, _typing.Sequence
):
self.__hyper_parameter_space: _typing.Sequence[
_typing.Dict[str, _typing.Any]
] = hyper_parameter_space
else:
self.__hyper_parameter_space: _typing.Sequence[
_typing.Dict[str, _typing.Any]
] = []
if hyper_parameter != Ellipsis and isinstance(hyper_parameter, dict):
self.__hyper_parameter: _typing.Dict[str, _typing.Any] = hyper_parameter
else:
self.__hyper_parameter: _typing.Dict[str, _typing.Any] = {}

def from_hyper_parameter(self, hyper_parameter: _typing.Dict[str, _typing.Any]):
raise NotImplementedError


class ClassificationModel(_BaseModel):
def _initialize(self):
raise NotImplementedError

def from_hyper_parameter(
self, hyper_parameter: _typing.Dict[str, _typing.Any]
) -> "ClassificationModel":
new_model: ClassificationModel = self.__class__(
num_features=self.num_features,
num_classes=self.num_classes,
device=self.device,
init=False,
)
_hyper_parameter = self.hyper_parameter
_hyper_parameter.update(hyper_parameter)
new_model.hyper_parameter = _hyper_parameter
new_model.initialize()
return new_model

def __init__(
self,
num_features: int = ...,
num_classes: int = ...,
num_graph_features: int = ...,
device: _typing.Union[str, torch.device] = ...,
hyper_parameter_space: _typing.Sequence[_typing.Any] = ...,
hyper_parameter: _typing.Dict[str, _typing.Any] = ...,
init: bool = False,
**kwargs
):
if "initialize" in kwargs:
del kwargs["initialize"]
super(ClassificationModel, self).__init__(
initialize=init,
hyper_parameter_space=hyper_parameter_space,
hyper_parameter=hyper_parameter,
device=device,
**kwargs
)
if num_classes != Ellipsis and type(num_classes) == int:
self.__num_classes: int = num_classes if num_classes > 0 else 0
else:
self.__num_classes: int = 0
if num_features != Ellipsis and type(num_features) == int:
self.__num_features: int = num_features if num_features > 0 else 0
else:
self.__num_features: int = 0
if num_graph_features != Ellipsis and type(num_graph_features) == int:
if num_graph_features > 0:
self.__num_graph_features: int = num_graph_features
else:
self.__num_graph_features: int = 0
else:
self.__num_graph_features: int = 0

def __repr__(self) -> str:
import yaml

return yaml.dump(self.hyper_parameter)

@property
def num_classes(self) -> int:
return self.__num_classes

@num_classes.setter
def num_classes(self, __num_classes: int):
if type(__num_classes) != int:
raise TypeError
if not __num_classes > 0:
raise ValueError
self.__num_classes = __num_classes if __num_classes > 0 else 0

@property
def num_features(self) -> int:
return self.__num_features

@num_features.setter
def num_features(self, __num_features: int):
if type(__num_features) != int:
raise TypeError
if not __num_features > 0:
raise ValueError
self.__num_features = __num_features if __num_features > 0 else 0

def get_num_classes(self) -> int:
# todo: consider replacing with property with getter and setter
return self.__num_classes

def set_num_classes(self, num_classes: int) -> None:
# todo: consider replacing with property with getter and setter
if type(num_classes) != int:
raise TypeError
self.__num_classes = num_classes if num_classes > 0 else 0

def get_num_features(self) -> int:
# todo: consider replacing with property with getter and setter
return self.__num_features

def set_num_features(self, num_features: int):
# todo: consider replacing with property with getter and setter
if type(num_features) != int:
raise TypeError
self.__num_features = num_features if num_features > 0 else 0

def set_num_graph_features(self, num_graph_features: int):
# todo: consider replacing with property with getter and setter
if type(num_graph_features) != int:
raise TypeError
else:
if num_graph_features > 0:
self.__num_graph_features = num_graph_features
else:
self.__num_graph_features = 0


class _ClassificationModel(torch.nn.Module):
def __init__(self):
super(_ClassificationModel, self).__init__()

def cls_encode(self, data) -> torch.Tensor:
raise NotImplementedError

def cls_decode(self, x: torch.Tensor) -> torch.Tensor:
raise NotImplementedError

def cls_forward(self, data) -> torch.Tensor:
return self.cls_decode(self.cls_encode(data))


class ClassificationSupportedSequentialModel(_ClassificationModel):
def __init__(self):
super(ClassificationSupportedSequentialModel, self).__init__()

@property
def sequential_encoding_layers(self) -> torch.nn.ModuleList:
raise NotImplementedError

def cls_encode(self, data) -> torch.Tensor:
raise NotImplementedError

def cls_decode(self, x: torch.Tensor) -> torch.Tensor:
raise NotImplementedError

+ 85
- 0
autogl/module/model/dgl/dataloader_gin.py View File

@@ -0,0 +1,85 @@
"""
PyTorch compatible dataloader
"""


import math
import numpy as np
import torch
from torch.utils.data.sampler import SubsetRandomSampler
from sklearn.model_selection import StratifiedKFold
import dgl
from dgl.dataloading import GraphDataLoader


class GINDataLoader():
def __init__(self,
dataset,
batch_size,
device,
collate_fn=None,
seed=0,
shuffle=True,
split_name='fold10',
fold_idx=0,
split_ratio=0.7):

self.shuffle = shuffle
self.seed = seed
self.kwargs = {'pin_memory': True} if 'cuda' in device.type else {}

labels = [l for _, l in dataset]

if split_name == 'fold10':
train_idx, valid_idx = self._split_fold10(
labels, fold_idx, seed, shuffle)
elif split_name == 'rand':
train_idx, valid_idx = self._split_rand(
labels, split_ratio, seed, shuffle)
else:
raise NotImplementedError()

train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

self.train_loader = GraphDataLoader(
dataset, sampler=train_sampler,
batch_size=batch_size, collate_fn=collate_fn, **self.kwargs)
self.valid_loader = GraphDataLoader(
dataset, sampler=valid_sampler,
batch_size=batch_size, collate_fn=collate_fn, **self.kwargs)

def train_valid_loader(self):
return self.train_loader, self.valid_loader

def _split_fold10(self, labels, fold_idx=0, seed=0, shuffle=True):
''' 10 flod '''
assert 0 <= fold_idx and fold_idx < 10, print(
"fold_idx must be from 0 to 9.")

skf = StratifiedKFold(n_splits=10, shuffle=shuffle, random_state=seed)
idx_list = []
for idx in skf.split(np.zeros(len(labels)), labels): # split(x, y)
idx_list.append(idx)
train_idx, valid_idx = idx_list[fold_idx]

print(
"train_set : test_set = %d : %d",
len(train_idx), len(valid_idx))

return train_idx, valid_idx

def _split_rand(self, labels, split_ratio=0.7, seed=0, shuffle=True):
num_entries = len(labels)
indices = list(range(num_entries))
np.random.seed(seed)
np.random.shuffle(indices)
split = int(math.floor(split_ratio * num_entries))
train_idx, valid_idx = indices[:split], indices[split:]

print(
"train_set : test_set = %d : %d",
len(train_idx), len(valid_idx))

return train_idx, valid_idx


+ 223
- 0
autogl/module/model/dgl/gat.py View File

@@ -0,0 +1,223 @@
import torch
import torch.nn.functional as F
from torch_geometric.nn import GATConv
from . import register_model
from .base import BaseModel, activate_func
from ....utils import get_logger

LOGGER = get_logger("GATModel")


def set_default(args, d):
for k, v in d.items():
if k not in args:
args[k] = v
return args


class GAT(torch.nn.Module):
def __init__(self, args):
super(GAT, self).__init__()
self.args = args
self.num_layer = int(self.args["num_layers"])

missing_keys = list(
set(
[
"features_num",
"num_class",
"num_layers",
"hidden",
"heads",
"dropout",
"act",
]
)
- set(self.args.keys())
)
if len(missing_keys) > 0:
raise Exception("Missing keys: %s." % ",".join(missing_keys))

if not self.num_layer == len(self.args["hidden"]) + 1:
LOGGER.warn("Warning: layer size does not match the length of hidden units")
self.convs = torch.nn.ModuleList()
self.convs.append(
GATConv(
self.args["features_num"],
self.args["hidden"][0],
heads=self.args["heads"],
dropout=self.args["dropout"],
)
)
last_dim = self.args["hidden"][0] * self.args["heads"]
for i in range(self.num_layer - 2):
self.convs.append(
GATConv(
last_dim,
self.args["hidden"][i + 1],
heads=self.args["heads"],
dropout=self.args["dropout"],
)
)
last_dim = self.args["hidden"][i + 1] * self.args["heads"]
self.convs.append(
GATConv(
last_dim,
self.args["num_class"],
heads=1,
concat=False,
dropout=self.args["dropout"],
)
)

def forward(self, data):
try:
x = data.x
except:
print("no x")
pass
try:
edge_index = data.edge_index
except:
print("no index")
pass
try:
edge_weight = data.edge_weight
except:
edge_weight = None
pass

for i in range(self.num_layer):
x = F.dropout(x, p=self.args["dropout"], training=self.training)
x = self.convs[i](x, edge_index, edge_weight)
if i != self.num_layer - 1:
x = activate_func(x, self.args["act"])

return F.log_softmax(x, dim=1)

def lp_encode(self, data):
x = data.x
for i in range(self.num_layer - 1):
x = self.convs[i](x, data.train_pos_edge_index)
if i != self.num_layer - 2:
x = activate_func(x, self.args["act"])
# x = F.dropout(x, p=self.args["dropout"], training=self.training)
return x

def lp_decode(self, z, pos_edge_index, neg_edge_index):
edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
return logits

def lp_decode_all(self, z):
prob_adj = z @ z.t()
return (prob_adj > 0).nonzero(as_tuple=False).t()


@register_model("gat")
class AutoGAT(BaseModel):
r"""
AutoGAT. The model used in this automodel is GAT, i.e., the graph attentional network from the `"Graph Attention Networks"
<https://arxiv.org/abs/1710.10903>`_ paper. The layer is

.. math::
\mathbf{x}^{\prime}_i = \alpha_{i,i}\mathbf{\Theta}\mathbf{x}_{i} +
\sum_{j \in \mathcal{N}(i)} \alpha_{i,j}\mathbf{\Theta}\mathbf{x}_{j}

where the attention coefficients :math:`\alpha_{i,j}` are computed as

.. math::
\alpha_{i,j} =
\frac{
\exp\left(\mathrm{LeakyReLU}\left(\mathbf{a}^{\top}
[\mathbf{\Theta}\mathbf{x}_i \, \Vert \, \mathbf{\Theta}\mathbf{x}_j]
\right)\right)}
{\sum_{k \in \mathcal{N}(i) \cup \{ i \}}
\exp\left(\mathrm{LeakyReLU}\left(\mathbf{a}^{\top}
[\mathbf{\Theta}\mathbf{x}_i \, \Vert \, \mathbf{\Theta}\mathbf{x}_k]
\right)\right)}.

Parameters
----------
num_features: `int`.
The dimension of features.

num_classes: `int`.
The number of classes.

device: `torch.device` or `str`
The device where model will be running on.

init: `bool`.
If True(False), the model will (not) be initialized.

args: Other parameters.
"""

def __init__(
self, num_features=None, num_classes=None, device=None, init=False, **args
):
super(AutoGAT, self).__init__()
self.num_features = num_features if num_features is not None else 0
self.num_classes = int(num_classes) if num_classes is not None else 0
self.device = device if device is not None else "cpu"
self.init = True

self.params = {
"features_num": self.num_features,
"num_class": self.num_classes,
}
self.space = [
{
"parameterName": "num_layers",
"type": "DISCRETE",
"feasiblePoints": "2,3,4",
},
{
"parameterName": "hidden",
"type": "NUMERICAL_LIST",
"numericalType": "INTEGER",
"length": 3,
"minValue": [8, 8, 8],
"maxValue": [64, 64, 64],
"scalingType": "LOG",
"cutPara": ("num_layers",),
"cutFunc": lambda x: x[0] - 1,
},
{
"parameterName": "dropout",
"type": "DOUBLE",
"maxValue": 0.8,
"minValue": 0.2,
"scalingType": "LINEAR",
},
{
"parameterName": "heads",
"type": "DISCRETE",
"feasiblePoints": "2,4,8,16",
},
{
"parameterName": "act",
"type": "CATEGORICAL",
"feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
},
]

self.hyperparams = {
"num_layers": 2,
"hidden": [32],
"heads": 4,
"dropout": 0.2,
"act": "leaky_relu",
}

self.initialized = False
if init is True:
self.initialize()

def initialize(self):
# """Initialize model."""
if self.initialized:
return
self.initialized = True
self.model = GAT({**self.params, **self.hyperparams}).to(self.device)

+ 408
- 0
autogl/module/model/dgl/gcn.py View File

@@ -0,0 +1,408 @@
import torch
import torch.nn.functional
import typing as _typing

from torch_geometric.nn.conv import GCNConv
import autogl.data
from . import register_model
from .base import BaseModel, activate_func, ClassificationSupportedSequentialModel
from ....utils import get_logger

LOGGER = get_logger("GCNModel")


class GCN(ClassificationSupportedSequentialModel):
class _GCNLayer(torch.nn.Module):
def __init__(
self,
input_channels: int,
output_channels: int,
add_self_loops: bool = True,
normalize: bool = True,
activation_name: _typing.Optional[str] = ...,
dropout_probability: _typing.Optional[float] = ...,
):
super().__init__()
self._convolution: GCNConv = GCNConv(
input_channels,
output_channels,
add_self_loops=bool(add_self_loops),
normalize=bool(normalize),
)
if (
activation_name is not Ellipsis
and activation_name is not None
and type(activation_name) == str
):
self._activation_name: _typing.Optional[str] = activation_name
else:
self._activation_name: _typing.Optional[str] = None
if (
dropout_probability is not Ellipsis
and dropout_probability is not None
and type(dropout_probability) == float
):
if dropout_probability < 0:
dropout_probability = 0
if dropout_probability > 1:
dropout_probability = 1
self._dropout: _typing.Optional[torch.nn.Dropout] = torch.nn.Dropout(
dropout_probability
)
else:
self._dropout: _typing.Optional[torch.nn.Dropout] = None

def forward(self, data, enable_activation: bool = True) -> torch.Tensor:
x: torch.Tensor = getattr(data, "x")
edge_index: torch.LongTensor = getattr(data, "edge_index")
edge_weight: _typing.Optional[torch.Tensor] = getattr(
data, "edge_weight", None
)
""" Validate the arguments """
if not type(x) == type(edge_index) == torch.Tensor:
raise TypeError
if edge_weight is not None and (
type(edge_weight) != torch.Tensor
or edge_index.size() != (2, edge_weight.size(0))
):
edge_weight: _typing.Optional[torch.Tensor] = None

x: torch.Tensor = self._convolution.forward(x, edge_index, edge_weight)
if self._activation_name is not None and enable_activation:
x: torch.Tensor = activate_func(x, self._activation_name)
if self._dropout is not None:
x: torch.Tensor = self._dropout.forward(x)
return x

def __init__(
self,
num_features: int,
num_classes: int,
hidden_features: _typing.Sequence[int],
activation_name: str,
dropout: _typing.Union[
_typing.Optional[float], _typing.Sequence[_typing.Optional[float]]
] = None,
add_self_loops: bool = True,
normalize: bool = True,
):
if isinstance(dropout, _typing.Sequence):
if len(dropout) != len(hidden_features) + 1:
raise TypeError(
"When the dropout argument is a sequence, "
"The sequence length must equal to the number of layers to construct."
)
for _dropout in dropout:
if _dropout is not None and type(_dropout) != float:
raise TypeError(
"When the dropout argument is a sequence, "
"every item in the sequence must be float or None"
)
dropout_list: _typing.Sequence[_typing.Optional[float]] = dropout
elif type(dropout) == float:
if dropout < 0:
dropout = 0
if dropout > 1:
dropout = 1
dropout_list: _typing.Sequence[_typing.Optional[float]] = [
dropout for _ in range(len(hidden_features))
] + [None]
elif dropout in (None, Ellipsis, ...):
dropout_list: _typing.Sequence[_typing.Optional[float]] = [
None for _ in range(len(hidden_features) + 1)
]
else:
raise TypeError(
"The provided dropout argument must be a float number or None or "
"a sequence in which each item is either a float Number or None."
)
super().__init__()
if len(hidden_features) == 0:
self.__sequential_encoding_layers: torch.nn.ModuleList = (
torch.nn.ModuleList(
(
self._GCNLayer(
num_features,
num_classes,
add_self_loops,
normalize,
dropout_probability=dropout_list[0],
),
)
)
)
else:
self.__sequential_encoding_layers: torch.nn.ModuleList = (
torch.nn.ModuleList()
)
self.__sequential_encoding_layers.append(
self._GCNLayer(
num_features,
hidden_features[0],
add_self_loops,
normalize,
activation_name,
dropout_list[0],
)
)
for hidden_feature_index in range(len(hidden_features)):
if hidden_feature_index + 1 < len(hidden_features):
self.__sequential_encoding_layers.append(
self._GCNLayer(
hidden_features[hidden_feature_index],
hidden_features[hidden_feature_index + 1],
add_self_loops,
normalize,
activation_name,
dropout_list[hidden_feature_index + 1],
)
)
else:
self.__sequential_encoding_layers.append(
self._GCNLayer(
hidden_features[hidden_feature_index],
num_classes,
add_self_loops,
normalize,
dropout_list[-1],
)
)

@property
def sequential_encoding_layers(self) -> torch.nn.ModuleList:
return self.__sequential_encoding_layers

def __extract_edge_indexes_and_weights(
self, data
) -> _typing.Union[
_typing.Sequence[
_typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]
],
_typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]],
]:
def __compose_edge_index_and_weight(
_edge_index: torch.LongTensor,
_edge_weight: _typing.Optional[torch.Tensor] = None,
) -> _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]:
if type(_edge_index) != torch.Tensor or _edge_index.dtype != torch.int64:
raise TypeError
if _edge_weight is not None and (
type(_edge_weight) != torch.Tensor
or _edge_index.size() != (2, _edge_weight.size(0))
):
_edge_weight: _typing.Optional[torch.Tensor] = None
return _edge_index, _edge_weight

if not (
hasattr(data, "edge_indexes")
and isinstance(getattr(data, "edge_indexes"), _typing.Sequence)
and len(getattr(data, "edge_indexes"))
== len(self.__sequential_encoding_layers)
):
return __compose_edge_index_and_weight(
getattr(data, "edge_index"), getattr(data, "edge_weight", None)
)
for __edge_index in getattr(data, "edge_indexes"):
if type(__edge_index) != torch.Tensor or __edge_index.dtype != torch.int64:
return __compose_edge_index_and_weight(
getattr(data, "edge_index"), getattr(data, "edge_weight", None)
)

if (
hasattr(data, "edge_weights")
and isinstance(getattr(data, "edge_weights"), _typing.Sequence)
and len(getattr(data, "edge_weights"))
== len(self.__sequential_encoding_layers)
):
return [
__compose_edge_index_and_weight(_edge_index, _edge_weight)
for _edge_index, _edge_weight in zip(
getattr(data, "edge_indexes"), getattr(data, "edge_weights")
)
]
else:
return [
__compose_edge_index_and_weight(__edge_index)
for __edge_index in getattr(data, "edge_indexes")
]

def cls_encode(self, data) -> torch.Tensor:
edge_indexes_and_weights: _typing.Union[
_typing.Sequence[
_typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]
],
_typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]],
] = self.__extract_edge_indexes_and_weights(data)

if (not isinstance(edge_indexes_and_weights, tuple)) and isinstance(
edge_indexes_and_weights[0], tuple
):
""" edge_indexes_and_weights is sequence of (edge_index, edge_weight) """
assert len(edge_indexes_and_weights) == len(
self.__sequential_encoding_layers
)
x: torch.Tensor = getattr(data, "x")
for _edge_index_and_weight, gcn in zip(
edge_indexes_and_weights, self.__sequential_encoding_layers
):
_temp_data = autogl.data.Data(x=x, edge_index=_edge_index_and_weight[0])
_temp_data.edge_weight = _edge_index_and_weight[1]
x = gcn(_temp_data)
return x
else:
""" edge_indexes_and_weights is (edge_index, edge_weight) """
x = getattr(data, "x")
for gcn in self.__sequential_encoding_layers:
_temp_data = autogl.data.Data(
x=x, edge_index=edge_indexes_and_weights[0]
)
_temp_data.edge_weight = edge_indexes_and_weights[1]
x = gcn(_temp_data)
return x

def cls_decode(self, x: torch.Tensor) -> torch.Tensor:
return torch.nn.functional.log_softmax(x, dim=1)

def lp_encode(self, data):
x: torch.Tensor = getattr(data, "x")
for i in range(len(self.__sequential_encoding_layers) - 2):
x = self.__sequential_encoding_layers[i](
autogl.data.Data(x, getattr(data, "edge_index"))
)
x = self.__sequential_encoding_layers[-2](
autogl.data.Data(x, getattr(data, "edge_index")), enable_activation=False
)
return x

def lp_decode(self, z, pos_edge_index, neg_edge_index):
edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
return logits

def lp_decode_all(self, z):
prob_adj = z @ z.t()
return (prob_adj > 0).nonzero(as_tuple=False).t()


@register_model("gcn")
class AutoGCN(BaseModel):
r"""
AutoGCN.
The model used in this automodel is GCN, i.e., the graph convolutional network from the
`"Semi-supervised Classification with Graph Convolutional
Networks" <https://arxiv.org/abs/1609.02907>`_ paper. The layer is

.. math::

\mathbf{X}^{\prime} = \mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}}
\mathbf{\hat{D}}^{-1/2} \mathbf{X} \mathbf{\Theta},

where :math:`\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}` denotes the
adjacency matrix with inserted self-loops and
:math:`\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}` its diagonal degree matrix.

Parameters
----------
num_features: ``int``
The dimension of features.

num_classes: ``int``
The number of classes.

device: ``torch.device`` or ``str``
The device where model will be running on.

init: `bool`.
If True(False), the model will (not) be initialized.
"""

def __init__(
self,
num_features: int = ...,
num_classes: int = ...,
device: _typing.Union[str, torch.device] = ...,
init: bool = False,
**kwargs
) -> None:
super().__init__()
self.num_features = num_features
self.num_classes = num_classes
self.device = device

self.params = {
"features_num": self.num_features,
"num_class": self.num_classes,
}
self.space = [
{
"parameterName": "add_self_loops",
"type": "CATEGORICAL",
"feasiblePoints": [1],
},
{
"parameterName": "normalize",
"type": "CATEGORICAL",
"feasiblePoints": [1],
},
{
"parameterName": "num_layers",
"type": "DISCRETE",
"feasiblePoints": "2,3,4",
},
{
"parameterName": "hidden",
"type": "NUMERICAL_LIST",
"numericalType": "INTEGER",
"length": 3,
"minValue": [8, 8, 8],
"maxValue": [128, 128, 128],
"scalingType": "LOG",
"cutPara": ("num_layers",),
"cutFunc": lambda x: x[0] - 1,
},
{
"parameterName": "dropout",
"type": "DOUBLE",
"maxValue": 0.8,
"minValue": 0.2,
"scalingType": "LINEAR",
},
{
"parameterName": "act",
"type": "CATEGORICAL",
"feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
},
]

# initial point of hp search
# self.hyperparams = {
# "num_layers": 2,
# "hidden": [16],
# "dropout": 0.2,
# "act": "leaky_relu",
# }

self.hyperparams = {
"num_layers": 3,
"hidden": [128, 64],
"dropout": 0,
"act": "relu",
}

self.initialized = False
if init is True:
self.initialize()

def initialize(self):
if self.initialized:
return
self.initialized = True
self.model = GCN(
self.num_features,
self.num_classes,
self.hyperparams.get("hidden"),
self.hyperparams.get("act"),
self.hyperparams.get("dropout", None),
bool(self.hyperparams.get("add_self_loops", True)),
bool(self.hyperparams.get("normalize", True)),
).to(self.device)

+ 232
- 0
autogl/module/model/dgl/gin.py View File

@@ -0,0 +1,232 @@
import torch
import torch.nn.functional as F
from torch.nn import Linear, ReLU, Sequential, LeakyReLU, Tanh, ELU
from torch_geometric.nn import GINConv, global_add_pool
from torch.nn import BatchNorm1d
from . import register_model
from .base import BaseModel, activate_func
from copy import deepcopy
from ....utils import get_logger

LOGGER = get_logger("GINModel")


def set_default(args, d):
for k, v in d.items():
if k not in args:
args[k] = v
return args


class GIN(torch.nn.Module):
def __init__(self, args):
super(GIN, self).__init__()
self.args = args
self.num_layer = int(self.args["num_layers"])
assert self.num_layer > 2, "Number of layers in GIN should not less than 3"

missing_keys = list(
set(
[
"features_num",
"num_class",
"num_graph_features",
"num_layers",
"hidden",
"dropout",
"act",
"mlp_layers",
"eps",
]
)
- set(self.args.keys())
)
if len(missing_keys) > 0:
raise Exception("Missing keys: %s." % ",".join(missing_keys))
if not self.num_layer == len(self.args["hidden"]) + 1:
LOGGER.warn("Warning: layer size does not match the length of hidden units")
self.num_graph_features = self.args["num_graph_features"]

if self.args["act"] == "leaky_relu":
act = LeakyReLU()
elif self.args["act"] == "relu":
act = ReLU()
elif self.args["act"] == "elu":
act = ELU()
elif self.args["act"] == "tanh":
act = Tanh()
else:
act = ReLU()

train_eps = True if self.args["eps"] == "True" else False

self.convs = torch.nn.ModuleList()
self.bns = torch.nn.ModuleList()

nn = [Linear(self.args["features_num"], self.args["hidden"][0])]
for _ in range(self.args["mlp_layers"] - 1):
nn.append(act)
nn.append(Linear(self.args["hidden"][0], self.args["hidden"][0]))
# nn.append(BatchNorm1d(self.args['hidden'][0]))
self.convs.append(GINConv(Sequential(*nn), train_eps=train_eps))
self.bns.append(BatchNorm1d(self.args["hidden"][0]))

for i in range(self.num_layer - 3):
nn = [Linear(self.args["hidden"][i], self.args["hidden"][i + 1])]
for _ in range(self.args["mlp_layers"] - 1):
nn.append(act)
nn.append(
Linear(self.args["hidden"][i + 1], self.args["hidden"][i + 1])
)
# nn.append(BatchNorm1d(self.args['hidden'][i+1]))
self.convs.append(GINConv(Sequential(*nn), train_eps=train_eps))
self.bns.append(BatchNorm1d(self.args["hidden"][i + 1]))

self.fc1 = Linear(
self.args["hidden"][self.num_layer - 3] + self.num_graph_features,
self.args["hidden"][self.num_layer - 2],
)
self.fc2 = Linear(
self.args["hidden"][self.num_layer - 2], self.args["num_class"]
)

def forward(self, data):
x, edge_index, batch = data.x, data.edge_index, data.batch

if self.num_graph_features > 0:
graph_feature = data.gf

for i in range(self.num_layer - 2):
x = self.convs[i](x, edge_index)
x = activate_func(x, self.args["act"])
x = self.bns[i](x)

x = global_add_pool(x, batch)
if self.num_graph_features > 0:
x = torch.cat([x, graph_feature], dim=-1)
x = self.fc1(x)
x = activate_func(x, self.args["act"])
x = F.dropout(x, p=self.args["dropout"], training=self.training)

x = self.fc2(x)

return F.log_softmax(x, dim=1)


@register_model("gin")
class AutoGIN(BaseModel):
r"""
AutoGIN. The model used in this automodel is GIN, i.e., the graph isomorphism network from the `"How Powerful are
Graph Neural Networks?" <https://arxiv.org/abs/1810.00826>`_ paper. The layer is

.. math::
\mathbf{x}^{\prime}_i = h_{\mathbf{\Theta}} \left( (1 + \epsilon) \cdot
\mathbf{x}_i + \sum_{j \in \mathcal{N}(i)} \mathbf{x}_j \right)

or

.. math::
\mathbf{X}^{\prime} = h_{\mathbf{\Theta}} \left( \left( \mathbf{A} +
(1 + \epsilon) \cdot \mathbf{I} \right) \cdot \mathbf{X} \right),

here :math:`h_{\mathbf{\Theta}}` denotes a neural network, *.i.e.* an MLP.

Parameters
----------
num_features: `int`.
The dimension of features.

num_classes: `int`.
The number of classes.

device: `torch.device` or `str`
The device where model will be running on.

init: `bool`.
If True(False), the model will (not) be initialized.
"""

def __init__(
self,
num_features=None,
num_classes=None,
device=None,
init=False,
num_graph_features=None,
**args
):

super(AutoGIN, self).__init__()
self.num_features = num_features if num_features is not None else 0
self.num_classes = int(num_classes) if num_classes is not None else 0
self.num_graph_features = (
int(num_graph_features) if num_graph_features is not None else 0
)
self.device = device if device is not None else "cpu"
self.init = True

self.params = {
"features_num": self.num_features,
"num_class": self.num_classes,
"num_graph_features": self.num_graph_features,
}
self.space = [
{
"parameterName": "num_layers",
"type": "DISCRETE",
"feasiblePoints": "4,5,6",
},
{
"parameterName": "hidden",
"type": "NUMERICAL_LIST",
"numericalType": "INTEGER",
"length": 5,
"minValue": [8, 8, 8, 8, 8],
"maxValue": [64, 64, 64, 64, 64],
"scalingType": "LOG",
"cutPara": ("num_layers",),
"cutFunc": lambda x: x[0] - 1,
},
{
"parameterName": "dropout",
"type": "DOUBLE",
"maxValue": 0.9,
"minValue": 0.1,
"scalingType": "LINEAR",
},
{
"parameterName": "act",
"type": "CATEGORICAL",
"feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
},
{
"parameterName": "eps",
"type": "CATEGORICAL",
"feasiblePoints": ["True", "False"],
},
{
"parameterName": "mlp_layers",
"type": "DISCRETE",
"feasiblePoints": "2,3,4",
},
]

self.hyperparams = {
"num_layers": 3,
"hidden": [64, 32],
"dropout": 0.5,
"act": "relu",
"eps": "True",
"mlp_layers": 2,
}

self.initialized = False
if init is True:
self.initialize()

def initialize(self):
# """Initialize model."""
if self.initialized:
return
self.initialized = True
self.model = GIN({**self.params, **self.hyperparams}).to(self.device)

+ 171
- 0
autogl/module/model/dgl/gin_dgl.py View File

@@ -0,0 +1,171 @@
"""
How Powerful are Graph Neural Networks
https://arxiv.org/abs/1810.00826
https://openreview.net/forum?id=ryGs6iA5Km
Author's implementation: https://github.com/weihua916/powerful-gnns
"""


import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn.pytorch.conv import GINConv
from dgl.nn.pytorch.glob import SumPooling, AvgPooling, MaxPooling


class ApplyNodeFunc(nn.Module):
"""Update the node feature hv with MLP, BN and ReLU."""
def __init__(self, mlp):
super(ApplyNodeFunc, self).__init__()
self.mlp = mlp
self.bn = nn.BatchNorm1d(self.mlp.output_dim)

def forward(self, h):
h = self.mlp(h)
h = self.bn(h)
h = F.relu(h)
return h


class MLP(nn.Module):
"""MLP with linear output"""
def __init__(self, num_layers, input_dim, hidden_dim, output_dim):
"""MLP layers construction

Paramters
---------
num_layers: int
The number of linear layers
input_dim: int
The dimensionality of input features
hidden_dim: int
The dimensionality of hidden units at ALL layers
output_dim: int
The number of classes for prediction

"""
super(MLP, self).__init__()
self.linear_or_not = True # default is linear model
self.num_layers = num_layers
self.output_dim = output_dim

if num_layers < 1:
raise ValueError("number of layers should be positive!")
elif num_layers == 1:
# Linear model
self.linear = nn.Linear(input_dim, output_dim)
else:
# Multi-layer model
self.linear_or_not = False
self.linears = torch.nn.ModuleList()
self.batch_norms = torch.nn.ModuleList()

self.linears.append(nn.Linear(input_dim, hidden_dim))
for layer in range(num_layers - 2):
self.linears.append(nn.Linear(hidden_dim, hidden_dim))
self.linears.append(nn.Linear(hidden_dim, output_dim))

for layer in range(num_layers - 1):
self.batch_norms.append(nn.BatchNorm1d((hidden_dim)))

def forward(self, x):
if self.linear_or_not:
# If linear model
return self.linear(x)
else:
# If MLP
h = x
for i in range(self.num_layers - 1):
h = F.relu(self.batch_norms[i](self.linears[i](h)))
return self.linears[-1](h)


class GIN(nn.Module):
"""GIN model"""
def __init__(self, num_layers, num_mlp_layers, input_dim, hidden_dim,
output_dim, final_dropout, learn_eps, graph_pooling_type,
neighbor_pooling_type):
"""model parameters setting

Paramters
---------
num_layers: int
The number of linear layers in the neural network
num_mlp_layers: int
The number of linear layers in mlps
input_dim: int
The dimensionality of input features
hidden_dim: int
The dimensionality of hidden units at ALL layers
output_dim: int
The number of classes for prediction
final_dropout: float
dropout ratio on the final linear layer
learn_eps: boolean
If True, learn epsilon to distinguish center nodes from neighbors
If False, aggregate neighbors and center nodes altogether.
neighbor_pooling_type: str
how to aggregate neighbors (sum, mean, or max)
graph_pooling_type: str
how to aggregate entire nodes in a graph (sum, mean or max)

"""
super(GIN, self).__init__()
self.num_layers = num_layers
self.learn_eps = learn_eps

# List of MLPs
self.ginlayers = torch.nn.ModuleList()
self.batch_norms = torch.nn.ModuleList()

for layer in range(self.num_layers - 1):
if layer == 0:
mlp = MLP(num_mlp_layers, input_dim, hidden_dim, hidden_dim)
else:
mlp = MLP(num_mlp_layers, hidden_dim, hidden_dim, hidden_dim)

self.ginlayers.append(
GINConv(ApplyNodeFunc(mlp), neighbor_pooling_type, 0, self.learn_eps))
self.batch_norms.append(nn.BatchNorm1d(hidden_dim))

# Linear function for graph poolings of output of each layer
# which maps the output of different layers into a prediction score
self.linears_prediction = torch.nn.ModuleList()

for layer in range(num_layers):
if layer == 0:
self.linears_prediction.append(
nn.Linear(input_dim, output_dim))
else:
self.linears_prediction.append(
nn.Linear(hidden_dim, output_dim))

self.drop = nn.Dropout(final_dropout)

if graph_pooling_type == 'sum':
self.pool = SumPooling()
elif graph_pooling_type == 'mean':
self.pool = AvgPooling()
elif graph_pooling_type == 'max':
self.pool = MaxPooling()
else:
raise NotImplementedError

def forward(self, g, h):
# list of hidden representation at each layer (including input)
hidden_rep = [h]

for i in range(self.num_layers - 1):
h = self.ginlayers[i](g, h)
h = self.batch_norms[i](h)
h = F.relu(h)
hidden_rep.append(h)

score_over_layer = 0

# perform pooling over all nodes in each graph in every layer
for i, h in enumerate(hidden_rep):
pooled_h = self.pool(g, h)
score_over_layer += self.drop(self.linears_prediction[i](pooled_h))

return score_over_layer

+ 81
- 0
autogl/module/model/dgl/ginparser.py View File

@@ -0,0 +1,81 @@
"""Parser for arguments

Put all arguments in one file and group similar arguments
"""
import argparse


class Parser():

def __init__(self, description):
'''
arguments parser
'''
self.parser = argparse.ArgumentParser(description=description)
self.args = None
self._parse()

def _parse(self):
# dataset
self.parser.add_argument(
'--dataset', type=str, default="MUTAG",
choices=['MUTAG', 'COLLAB', 'IMDBBINARY', 'IMDBMULTI'],
help='name of dataset (default: MUTAG)')
self.parser.add_argument(
'--batch_size', type=int, default=32,
help='batch size for training and validation (default: 32)')
self.parser.add_argument(
'--fold_idx', type=int, default=0,
help='the index(<10) of fold in 10-fold validation.')
self.parser.add_argument(
'--filename', type=str, default="",
help='output file')

# device
self.parser.add_argument(
'--disable-cuda', action='store_true',
help='Disable CUDA')
self.parser.add_argument(
'--device', type=int, default=0,
help='which gpu device to use (default: 0)')

# net
self.parser.add_argument(
'--num_layers', type=int, default=5,
help='number of layers (default: 5)')
self.parser.add_argument(
'--num_mlp_layers', type=int, default=2,
help='number of MLP layers(default: 2). 1 means linear model.')
self.parser.add_argument(
'--hidden_dim', type=int, default=64,
help='number of hidden units (default: 64)')

# graph
self.parser.add_argument(
'--graph_pooling_type', type=str,
default="sum", choices=["sum", "mean", "max"],
help='type of graph pooling: sum, mean or max')
self.parser.add_argument(
'--neighbor_pooling_type', type=str,
default="sum", choices=["sum", "mean", "max"],
help='type of neighboring pooling: sum, mean or max')
self.parser.add_argument(
'--learn_eps', action="store_true",
help='learn the epsilon weighting')

# learning
self.parser.add_argument(
'--seed', type=int, default=0,
help='random seed (default: 0)')
self.parser.add_argument(
'--epochs', type=int, default=350,
help='number of epochs to train (default: 350)')
self.parser.add_argument(
'--lr', type=float, default=0.01,
help='learning rate (default: 0.01)')
self.parser.add_argument(
'--final_dropout', type=float, default=0.5,
help='final layer dropout (default: 0.5)')

# done
self.args = self.parser.parse_args()

+ 407
- 0
autogl/module/model/dgl/graph_saint.py View File

@@ -0,0 +1,407 @@
import typing as _typing
import torch.nn.functional
from torch_geometric.nn.conv import MessagePassing
from torch_sparse import SparseTensor, matmul

from . import register_model
from .base import ClassificationModel, ClassificationSupportedSequentialModel


class _GraphSAINTAggregationLayers:
class MultiOrderAggregationLayer(torch.nn.Module):
class Order0Aggregator(torch.nn.Module):
def __init__(
self,
input_dimension: int,
output_dimension: int,
bias: bool = True,
activation: _typing.Optional[str] = "ReLU",
batch_norm: bool = True,
):
super().__init__()
if not type(input_dimension) == type(output_dimension) == int:
raise TypeError
if not (input_dimension > 0 and output_dimension > 0):
raise ValueError
if not type(bias) == bool:
raise TypeError
self.__linear_transform = torch.nn.Linear(
input_dimension, output_dimension, bias
)
self.__linear_transform.reset_parameters()
if type(activation) == str:
if activation.lower() == "ReLU".lower():
self.__activation = torch.nn.functional.relu
elif activation.lower() == "elu":
self.__activation = torch.nn.functional.elu
elif hasattr(torch.nn.functional, activation) and callable(
getattr(torch.nn.functional, activation)
):
self.__activation = getattr(torch.nn.functional, activation)
else:
self.__activation = lambda x: x
else:
self.__activation = lambda x: x
if type(batch_norm) != bool:
raise TypeError
else:
self.__optional_batch_normalization: _typing.Optional[
torch.nn.BatchNorm1d
] = (
torch.nn.BatchNorm1d(output_dimension, 1e-8)
if batch_norm
else None
)

def forward(
self,
x: _typing.Union[
torch.Tensor, _typing.Tuple[torch.Tensor, torch.Tensor]
],
_edge_index: torch.Tensor,
_edge_weight: _typing.Optional[torch.Tensor] = None,
_size: _typing.Optional[_typing.Tuple[int, int]] = None,
) -> torch.Tensor:
__output: torch.Tensor = self.__linear_transform(x)
if self.__activation is not None and callable(self.__activation):
__output: torch.Tensor = self.__activation(__output)
if self.__optional_batch_normalization is not None and isinstance(
self.__optional_batch_normalization, torch.nn.BatchNorm1d
):
__output: torch.Tensor = self.__optional_batch_normalization(
__output
)
return __output

class Order1Aggregator(MessagePassing):
def __init__(
self,
input_dimension: int,
output_dimension: int,
bias: bool = True,
activation: _typing.Optional[str] = "ReLU",
batch_norm: bool = True,
):
super().__init__(aggr="add")
if not type(input_dimension) == type(output_dimension) == int:
raise TypeError
if not (input_dimension > 0 and output_dimension > 0):
raise ValueError
if not type(bias) == bool:
raise TypeError
self.__linear_transform = torch.nn.Linear(
input_dimension, output_dimension, bias
)
self.__linear_transform.reset_parameters()
if type(activation) == str:
if activation.lower() == "ReLU".lower():
self.__activation = torch.nn.functional.relu
elif activation.lower() == "elu":
self.__activation = torch.nn.functional.elu
elif hasattr(torch.nn.functional, activation) and callable(
getattr(torch.nn.functional, activation)
):
self.__activation = getattr(torch.nn.functional, activation)
else:
self.__activation = lambda x: x
else:
self.__activation = lambda x: x
if type(batch_norm) != bool:
raise TypeError
else:
self.__optional_batch_normalization: _typing.Optional[
torch.nn.BatchNorm1d
] = (
torch.nn.BatchNorm1d(output_dimension, 1e-8)
if batch_norm
else None
)

def forward(
self,
x: _typing.Union[
torch.Tensor, _typing.Tuple[torch.Tensor, torch.Tensor]
],
_edge_index: torch.Tensor,
_edge_weight: _typing.Optional[torch.Tensor] = None,
_size: _typing.Optional[_typing.Tuple[int, int]] = None,
) -> torch.Tensor:

if type(x) == torch.Tensor:
x: _typing.Tuple[torch.Tensor, torch.Tensor] = (x, x)

__output = self.propagate(
_edge_index, x=x, edge_weight=_edge_weight, size=_size
)
__output: torch.Tensor = self.__linear_transform(__output)
if self.__activation is not None and callable(self.__activation):
__output: torch.Tensor = self.__activation(__output)
if self.__optional_batch_normalization is not None and isinstance(
self.__optional_batch_normalization, torch.nn.BatchNorm1d
):
__output: torch.Tensor = self.__optional_batch_normalization(
__output
)
return __output

def message(
self, x_j: torch.Tensor, edge_weight: _typing.Optional[torch.Tensor]
) -> torch.Tensor:
return x_j if edge_weight is None else edge_weight.view(-1, 1) * x_j

def message_and_aggregate(
self,
adj_t: SparseTensor,
x: _typing.Union[
torch.Tensor, _typing.Tuple[torch.Tensor, torch.Tensor]
],
) -> torch.Tensor:
return matmul(adj_t, x[0], reduce=self.aggr)

@property
def integral_output_dimension(self) -> int:
return (self._order + 1) * self._each_order_output_dimension

def __init__(
self,
_input_dimension: int,
_each_order_output_dimension: int,
_order: int,
bias: bool = True,
activation: _typing.Optional[str] = "ReLU",
batch_norm: bool = True,
_dropout: _typing.Optional[float] = ...,
):
super().__init__()
if not (
type(_input_dimension) == type(_order) == int
and type(_each_order_output_dimension) == int
):
raise TypeError
if _input_dimension <= 0 or _each_order_output_dimension <= 0:
raise ValueError
if _order not in (0, 1):
raise ValueError("Unsupported order number")
self._input_dimension: int = _input_dimension
self._each_order_output_dimension: int = _each_order_output_dimension
self._order: int = _order
if type(bias) != bool:
raise TypeError
self.__order0_transform = self.Order0Aggregator(
self._input_dimension,
self._each_order_output_dimension,
bias,
activation,
batch_norm,
)
if _order == 1:
self.__order1_transform = self.Order1Aggregator(
self._input_dimension,
self._each_order_output_dimension,
bias,
activation,
batch_norm,
)
else:
self.__order1_transform = None
if _dropout is not None and type(_dropout) == float:
if _dropout < 0:
_dropout = 0
if _dropout > 1:
_dropout = 1
self.__optional_dropout: _typing.Optional[
torch.nn.Dropout
] = torch.nn.Dropout(_dropout)
else:
self.__optional_dropout: _typing.Optional[torch.nn.Dropout] = None

def _forward(
self,
x: _typing.Union[torch.Tensor, _typing.Tuple[torch.Tensor, torch.Tensor]],
edge_index: torch.Tensor,
edge_weight: _typing.Optional[torch.Tensor] = None,
size: _typing.Optional[_typing.Tuple[int, int]] = None,
) -> torch.Tensor:
if self.__order1_transform is not None and isinstance(
self.__order1_transform, self.Order1Aggregator
):
__output: torch.Tensor = torch.cat(
[
self.__order0_transform(x, edge_index, edge_weight, size),
self.__order1_transform(x, edge_index, edge_weight, size),
],
dim=1,
)
else:
__output: torch.Tensor = self.__order0_transform(
x, edge_index, edge_weight, size
)
if self.__optional_dropout is not None and isinstance(
self.__optional_dropout, torch.nn.Dropout
):
__output: torch.Tensor = self.__optional_dropout(__output)
return __output

def forward(self, data) -> torch.Tensor:
x: torch.Tensor = getattr(data, "x")
if type(x) != torch.Tensor:
raise TypeError
edge_index: torch.LongTensor = getattr(data, "edge_index")
if type(edge_index) != torch.Tensor:
raise TypeError
edge_weight: _typing.Optional[torch.Tensor] = getattr(
data, "edge_weight", None
)
if edge_weight is not None and type(edge_weight) != torch.Tensor:
raise TypeError
return self._forward(x, edge_index, edge_weight)

class WrappedDropout(torch.nn.Module):
def __init__(self, dropout_module: torch.nn.Dropout):
super().__init__()
self.__dropout_module: torch.nn.Dropout = dropout_module

def forward(self, tenser_or_data) -> torch.Tensor:
if type(tenser_or_data) == torch.Tensor:
return self.__dropout_module(tenser_or_data)
elif (
hasattr(tenser_or_data, "x")
and type(getattr(tenser_or_data, "x")) == torch.Tensor
):
return self.__dropout_module(getattr(tenser_or_data, "x"))
else:
raise TypeError


class GraphSAINTMultiOrderAggregationModel(ClassificationSupportedSequentialModel):
def __init__(
self,
num_features: int,
num_classes: int,
_output_dimension_for_each_order: int,
_layers_order_list: _typing.Sequence[int],
_pre_dropout: float,
_layers_dropout: _typing.Union[float, _typing.Sequence[float]],
activation: _typing.Optional[str] = "ReLU",
bias: bool = True,
batch_norm: bool = True,
normalize: bool = True,
):
super(GraphSAINTMultiOrderAggregationModel, self).__init__()
if type(_output_dimension_for_each_order) != int:
raise TypeError
if not _output_dimension_for_each_order > 0:
raise ValueError
self._layers_order_list: _typing.Sequence[int] = _layers_order_list

if isinstance(_layers_dropout, _typing.Sequence):
if len(_layers_dropout) != len(_layers_order_list):
raise ValueError
else:
self._layers_dropout: _typing.Sequence[float] = _layers_dropout
elif type(_layers_dropout) == float:
if _layers_dropout < 0:
_layers_dropout = 0
if _layers_dropout > 1:
_layers_dropout = 1
self._layers_dropout: _typing.Sequence[float] = [
_layers_dropout for _ in _layers_order_list
]
else:
raise TypeError
if type(_pre_dropout) != float:
raise TypeError
else:
if _pre_dropout < 0:
_pre_dropout = 0
if _pre_dropout > 1:
_pre_dropout = 1
self.__sequential_encoding_layers: torch.nn.ModuleList = torch.nn.ModuleList(
(
_GraphSAINTAggregationLayers.WrappedDropout(
torch.nn.Dropout(_pre_dropout)
),
_GraphSAINTAggregationLayers.MultiOrderAggregationLayer(
num_features,
_output_dimension_for_each_order,
_layers_order_list[0],
bias,
activation,
batch_norm,
_layers_dropout[0],
),
)
)
for _layer_index in range(1, len(_layers_order_list)):
self.__sequential_encoding_layers.append(
_GraphSAINTAggregationLayers.MultiOrderAggregationLayer(
self.__sequential_encoding_layers[-1].integral_output_dimension,
_output_dimension_for_each_order,
_layers_order_list[_layer_index],
bias,
activation,
batch_norm,
_layers_dropout[_layer_index],
)
)
self.__apply_normalize: bool = normalize
self.__linear_transform: torch.nn.Linear = torch.nn.Linear(
self.__sequential_encoding_layers[-1].integral_output_dimension,
num_classes,
bias,
)
self.__linear_transform.reset_parameters()

def cls_decode(self, x: torch.Tensor) -> torch.Tensor:
if self.__apply_normalize:
x: torch.Tensor = torch.nn.functional.normalize(x, p=2, dim=1)
return torch.nn.functional.log_softmax(self.__linear_transform(x), dim=1)

def cls_encode(self, data) -> torch.Tensor:
if type(getattr(data, "x")) != torch.Tensor:
raise TypeError
if type(getattr(data, "edge_index")) != torch.Tensor:
raise TypeError
if (
getattr(data, "edge_weight", None) is not None
and type(getattr(data, "edge_weight")) != torch.Tensor
):
raise TypeError
for encoding_layer in self.__sequential_encoding_layers:
setattr(data, "x", encoding_layer(data))
return getattr(data, "x")

@property
def sequential_encoding_layers(self) -> torch.nn.ModuleList:
return self.__sequential_encoding_layers


@register_model("GraphSAINTAggregationModel")
class GraphSAINTAggregationModel(ClassificationModel):
def __init__(
self,
num_features: int = ...,
num_classes: int = ...,
device: _typing.Union[str, torch.device] = ...,
init: bool = False,
**kwargs
):
super(GraphSAINTAggregationModel, self).__init__(
num_features, num_classes, device=device, init=init, **kwargs
)
# todo: Initialize with default hyper parameter space and hyper parameter

def _initialize(self):
""" Initialize model """
self.model = GraphSAINTMultiOrderAggregationModel(
self.num_features,
self.num_classes,
self.hyper_parameter.get("output_dimension_for_each_order"),
self.hyper_parameter.get("layers_order_list"),
self.hyper_parameter.get("pre_dropout"),
self.hyper_parameter.get("layers_dropout"),
self.hyper_parameter.get("activation", "ReLU"),
bool(self.hyper_parameter.get("bias", True)),
bool(self.hyper_parameter.get("batch_norm", True)),
bool(self.hyper_parameter.get("normalize", True)),
).to(self.device)

+ 306
- 0
autogl/module/model/dgl/graphsage.py View File

@@ -0,0 +1,306 @@
import torch
import typing as _typing

from torch_geometric.nn.conv import SAGEConv
import torch.nn.functional
import autogl.data
from . import register_model
from .base import BaseModel, activate_func, ClassificationSupportedSequentialModel
from ....utils import get_logger

LOGGER = get_logger("SAGEModel")


class GraphSAGE(ClassificationSupportedSequentialModel):
class _SAGELayer(torch.nn.Module):
def __init__(
self,
input_channels: int,
output_channels: int,
aggr: str,
activation_name: _typing.Optional[str] = ...,
dropout_probability: _typing.Optional[float] = ...,
):
super().__init__()
self._convolution: SAGEConv = SAGEConv(
input_channels, output_channels, aggr=aggr
)
if (
activation_name is not Ellipsis
and activation_name is not None
and type(activation_name) == str
):
self._activation_name: _typing.Optional[str] = activation_name
else:
self._activation_name: _typing.Optional[str] = None
if (
dropout_probability is not Ellipsis
and dropout_probability is not None
and type(dropout_probability) == float
):
if dropout_probability < 0:
dropout_probability = 0
if dropout_probability > 1:
dropout_probability = 1
self._dropout: _typing.Optional[torch.nn.Dropout] = torch.nn.Dropout(
dropout_probability
)
else:
self._dropout: _typing.Optional[torch.nn.Dropout] = None

def forward(self, data, enable_activation: bool = True) -> torch.Tensor:
x: torch.Tensor = getattr(data, "x")
edge_index: torch.Tensor = getattr(data, "edge_index")
if type(x) != torch.Tensor or type(edge_index) != torch.Tensor:
raise TypeError

x: torch.Tensor = self._convolution.forward(x, edge_index)
if self._activation_name is not None and enable_activation:
x: torch.Tensor = activate_func(x, self._activation_name)
if self._dropout is not None:
x: torch.Tensor = self._dropout.forward(x)
return x

def __init__(
self,
num_features: int,
num_classes: int,
hidden_features: _typing.Sequence[int],
activation_name: str,
layers_dropout: _typing.Union[
_typing.Optional[float], _typing.Sequence[_typing.Optional[float]]
] = None,
aggr: str = "mean",
):
super().__init__()
if not type(num_features) == type(num_classes) == int:
raise TypeError
if not isinstance(hidden_features, _typing.Sequence):
raise TypeError
for hidden_feature in hidden_features:
if type(hidden_feature) != int:
raise TypeError
elif hidden_feature <= 0:
raise ValueError
if isinstance(layers_dropout, _typing.Sequence):
if len(layers_dropout) != (len(hidden_features) + 1):
raise TypeError
for d in layers_dropout:
if d is not None and type(d) != float:
raise TypeError
_layers_dropout: _typing.Sequence[_typing.Optional[float]] = layers_dropout
elif layers_dropout is None or type(layers_dropout) == float:
_layers_dropout: _typing.Sequence[_typing.Optional[float]] = [
layers_dropout for _ in range(len(hidden_features))
] + [None]
else:
raise TypeError
if not type(activation_name) == type(aggr) == str:
raise TypeError
if aggr not in ("add", "max", "mean"):
aggr = "mean"

if len(hidden_features) == 0:
self.__sequential_encoding_layers: torch.nn.ModuleList = (
torch.nn.ModuleList(
[
self._SAGELayer(
num_features,
num_classes,
aggr,
activation_name,
_layers_dropout[0],
)
]
)
)
else:
self.__sequential_encoding_layers: torch.nn.ModuleList = (
torch.nn.ModuleList(
[
self._SAGELayer(
num_features,
hidden_features[0],
aggr,
activation_name,
_layers_dropout[0],
)
]
)
)
for i in range(len(hidden_features)):
if i + 1 < len(hidden_features):
self.__sequential_encoding_layers.append(
self._SAGELayer(
hidden_features[i],
hidden_features[i + 1],
aggr,
activation_name,
_layers_dropout[i + 1],
)
)
else:
self.__sequential_encoding_layers.append(
self._SAGELayer(
hidden_features[i],
num_classes,
aggr,
_layers_dropout[i + 1],
)
)

@property
def sequential_encoding_layers(self) -> torch.nn.ModuleList:
return self.__sequential_encoding_layers

def cls_encode(self, data) -> torch.Tensor:
if (
hasattr(data, "edge_indexes")
and isinstance(getattr(data, "edge_indexes"), _typing.Sequence)
and len(getattr(data, "edge_indexes"))
== len(self.__sequential_encoding_layers)
):
for __edge_index in getattr(data, "edge_indexes"):
if type(__edge_index) != torch.Tensor:
raise TypeError
""" Layer-wise encode """
x: torch.Tensor = getattr(data, "x")
for i, __edge_index in enumerate(getattr(data, "edge_indexes")):
x: torch.Tensor = self.__sequential_encoding_layers[i](
autogl.data.Data(x=x, edge_index=__edge_index)
)
return x
else:
x: torch.Tensor = getattr(data, "x")
for i in range(len(self.__sequential_encoding_layers)):
x = self.__sequential_encoding_layers[i](
autogl.data.Data(x, getattr(data, "edge_index"))
)
return x

def cls_decode(self, x: torch.Tensor) -> torch.Tensor:
return torch.nn.functional.log_softmax(x, dim=1)

def lp_encode(self, data):
x: torch.Tensor = getattr(data, "x")
for i in range(len(self.__sequential_encoding_layers) - 2):
x = self.__sequential_encoding_layers[i](
autogl.data.Data(x, getattr(data, "edge_index"))
)
x = self.__sequential_encoding_layers[-2](
autogl.data.Data(x, getattr(data, "edge_index")), enable_activation=False
)
return x

def lp_decode(self, z, pos_edge_index, neg_edge_index):
edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
return logits

def lp_decode_all(self, z):
prob_adj = z @ z.t()
return (prob_adj > 0).nonzero(as_tuple=False).t()


@register_model("sage")
class AutoSAGE(BaseModel):
r"""
AutoSAGE. The model used in this automodel is GraphSAGE, i.e., the GraphSAGE from the `"Inductive Representation Learning on
Large Graphs" <https://arxiv.org/abs/1706.02216>`_ paper. The layer is

.. math::

\mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + \mathbf{W_2} \cdot
\mathrm{mean}_{j \in \mathcal{N(i)}} \mathbf{x}_j

Parameters
----------
num_features: `int`.
The dimension of features.

num_classes: `int`.
The number of classes.

device: `torch.device` or `str`
The device where model will be running on.

init: `bool`.
If True(False), the model will (not) be initialized.

"""

def __init__(
self, num_features=None, num_classes=None, device=None, init=False, **args
):

super(AutoSAGE, self).__init__()

self.num_features = num_features if num_features is not None else 0
self.num_classes = int(num_classes) if num_classes is not None else 0
self.device = device if device is not None else "cpu"
self.init = True

self.params = {
"features_num": self.num_features,
"num_class": self.num_classes,
}
self.space = [
{
"parameterName": "num_layers",
"type": "DISCRETE",
"feasiblePoints": "2,3,4",
},
{
"parameterName": "hidden",
"type": "NUMERICAL_LIST",
"numericalType": "INTEGER",
"length": 3,
"minValue": [8, 8, 8],
"maxValue": [128, 128, 128],
"scalingType": "LOG",
"cutPara": ("num_layers",),
"cutFunc": lambda x: x[0] - 1,
},
{
"parameterName": "dropout",
"type": "DOUBLE",
"maxValue": 0.8,
"minValue": 0.2,
"scalingType": "LINEAR",
},
{
"parameterName": "act",
"type": "CATEGORICAL",
"feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
},
{
"parameterName": "agg",
"type": "CATEGORICAL",
"feasiblePoints": ["mean", "add", "max"],
},
]

self.hyperparams = {
"num_layers": 3,
"hidden": [64, 32],
"dropout": 0.5,
"act": "relu",
"agg": "mean",
}

self.initialized = False
if init is True:
self.initialize()

def initialize(self):
if self.initialized:
return
self.initialized = True
self.model = GraphSAGE(
self.num_features,
self.num_classes,
self.hyperparams.get("hidden"),
self.hyperparams.get("act", "relu"),
self.hyperparams.get("dropout", None),
self.hyperparams.get("agg", "mean"),
).to(self.device)

+ 169
- 0
autogl/module/model/dgl/topkpool.py View File

@@ -0,0 +1,169 @@
import torch
import torch.nn.functional as F
from torch_geometric.nn import GraphConv, TopKPooling
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp
from . import register_model
from .base import BaseModel, activate_func
from ....utils import get_logger

LOGGER = get_logger("TopkModel")


def set_default(args, d):
for k, v in d.items():
if k not in args:
args[k] = v
return args


class Topkpool(torch.nn.Module):
def __init__(self, args):
super(Topkpool, self).__init__()
self.args = args

missing_keys = list(
set(
[
"features_num",
"num_class",
"num_graph_features",
"ratio",
"dropout",
"act",
]
)
- set(self.args.keys())
)
if len(missing_keys) > 0:
raise Exception("Missing keys: %s." % ",".join(missing_keys))

self.num_features = self.args["features_num"]
self.num_classes = self.args["num_class"]
self.ratio = self.args["ratio"]
self.dropout = self.args["dropout"]
self.num_graph_features = self.args["num_graph_features"]

self.conv1 = GraphConv(self.num_features, 128)
self.pool1 = TopKPooling(128, ratio=self.ratio)
self.conv2 = GraphConv(128, 128)
self.pool2 = TopKPooling(128, ratio=self.ratio)
self.conv3 = GraphConv(128, 128)
self.pool3 = TopKPooling(128, ratio=self.ratio)

self.lin1 = torch.nn.Linear(256 + self.num_graph_features, 128)
self.lin2 = torch.nn.Linear(128, 64)
self.lin3 = torch.nn.Linear(64, self.num_classes)

def forward(self, data):
x, edge_index, batch = data.x, data.edge_index, data.batch
if self.num_graph_features > 0:
graph_feature = data.gf

x = F.relu(self.conv1(x, edge_index))
x, edge_index, _, batch, _, _ = self.pool1(x, edge_index, None, batch)
x1 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

x = F.relu(self.conv2(x, edge_index))
x, edge_index, _, batch, _, _ = self.pool2(x, edge_index, None, batch)
x2 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

x = F.relu(self.conv3(x, edge_index))
x, edge_index, _, batch, _, _ = self.pool3(x, edge_index, None, batch)
x3 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

x = x1 + x2 + x3
if self.num_graph_features > 0:
x = torch.cat([x, graph_feature], dim=-1)
x = self.lin1(x)
x = activate_func(x, self.args["act"])
x = F.dropout(x, p=self.dropout, training=self.training)
x = self.lin2(x)
x = activate_func(x, self.args["act"])
x = F.log_softmax(self.lin3(x), dim=-1)

return x


@register_model("topkpool")
class AutoTopkpool(BaseModel):
r"""
AutoTopkpool. The model used in this automodel is from https://arxiv.org/abs/1905.05178, https://arxiv.org/abs/1905.02850

Parameters
----------
num_features: `int`.
The dimension of features.

num_classes: `int`.
The number of classes.

device: `torch.device` or `str`
The device where model will be running on.

init: `bool`.
If True(False), the model will (not) be initialized.

"""

def __init__(
self,
num_features=None,
num_classes=None,
device=None,
init=False,
num_graph_features=None,
**args
):
super(AutoTopkpool, self).__init__()
LOGGER.debug(
"topkpool __init__ get params num_graph_features {}".format(
num_graph_features
)
)
self.num_features = num_features if num_features is not None else 0
self.num_classes = int(num_classes) if num_classes is not None else 0
self.num_graph_features = (
int(num_graph_features) if num_graph_features is not None else 0
)
self.device = device if device is not None else "cpu"
self.init = True

self.params = {
"features_num": self.num_features,
"num_class": self.num_classes,
"num_graph_features": self.num_graph_features,
}
self.space = [
{
"parameterName": "ratio",
"type": "DOUBLE",
"maxValue": 0.9,
"minValue": 0.1,
"scalingType": "LINEAR",
},
{
"parameterName": "dropout",
"type": "DOUBLE",
"maxValue": 0.9,
"minValue": 0.1,
"scalingType": "LINEAR",
},
{
"parameterName": "act",
"type": "CATEGORICAL",
"feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
},
]

self.hyperparams = {"ratio": 0.8, "dropout": 0.5, "act": "relu"}

self.initialized = False
if init is True:
self.initialize()

def initialize(self):
if self.initialized:
return
self.initialized = True
LOGGER.debug("topkpool initialize with parameters {}".format(self.params))
self.model = Topkpool({**self.params, **self.hyperparams}).to(self.device)

+ 176
- 0
test/model_glf/gclf_dgl.py View File

@@ -0,0 +1,176 @@
import os
import sys
import logging
logging.basicConfig(level=logging.INFO)
from tqdm import tqdm

sys.path.append("../../")
print(os.getcwd())
os.environ["AUTOGL_BACKEND"] = "dgl"
#os.environ["AUTOGL_BACKEND"] = "pyg"
from autogl.backend import DependentBackend
import dgl
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset, GINDataset
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from autogl.module.model.ginparser import Parser
from autogl.module.model.dataloader_gin import GINDataLoader
from autogl.module.model import GIN

from pdb import set_trace
import numpy as np
from autogl.solver.utils import set_seed
set_seed(202106)


def train(args, net, trainloader, optimizer, criterion, epoch):
net.train()

running_loss = 0
total_iters = len(trainloader)
# setup the offset to avoid the overlap with mouse cursor
bar = tqdm(range(total_iters), unit='batch', position=2, file=sys.stdout)

for pos, (graphs, labels) in zip(bar, trainloader):
# batch graphs will be shipped to device in forward part of model
labels = labels.to(args.device)
graphs = graphs.to(args.device)
feat = graphs.ndata.pop('attr')
outputs = net(graphs, feat)

loss = criterion(outputs, labels)
running_loss += loss.item()

# backprop
optimizer.zero_grad()
loss.backward()
optimizer.step()

# report
bar.set_description('epoch-{}'.format(epoch))
bar.close()
# the final batch will be aligned
running_loss = running_loss / total_iters

return running_loss


def eval_net(args, net, dataloader, criterion):
net.eval()

total = 0
total_loss = 0
total_correct = 0

for data in dataloader:
graphs, labels = data
graphs = graphs.to(args.device)
labels = labels.to(args.device)
feat = graphs.ndata.pop('attr')
total += len(labels)
outputs = net(graphs, feat)
_, predicted = torch.max(outputs.data, 1)

total_correct += (predicted == labels.data).sum().item()
loss = criterion(outputs, labels)
# crossentropy(reduce=True) for default
total_loss += loss.item() * len(labels)

loss, acc = 1.0*total_loss / total, 1.0*total_correct / total

net.train()

return loss, acc


def main(args):

# set up seeds, args.seed supported
torch.manual_seed(seed=args.seed)
np.random.seed(seed=args.seed)

is_cuda = not args.disable_cuda and torch.cuda.is_available()

if is_cuda:
args.device = torch.device("cuda:" + str(args.device))
torch.cuda.manual_seed_all(seed=args.seed)
else:
args.device = torch.device("cpu")

dataset = GINDataset(args.dataset, not args.learn_eps)

trainloader, validloader = GINDataLoader(
dataset, batch_size=args.batch_size, device=args.device,
seed=args.seed, shuffle=True,
split_name='fold10', fold_idx=args.fold_idx).train_valid_loader()
# or split_name='rand', split_ratio=0.7

model = GIN(
args.num_layers, args.num_mlp_layers,
dataset.dim_nfeats, args.hidden_dim, dataset.gclasses,
args.final_dropout, args.learn_eps,
args.graph_pooling_type, args.neighbor_pooling_type).to(args.device)

criterion = nn.CrossEntropyLoss() # defaul reduce is true
optimizer = optim.Adam(model.parameters(), lr=args.lr)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

# it's not cost-effective to hanle the cursor and init 0
# https://stackoverflow.com/a/23121189
tbar = tqdm(range(args.epochs), unit="epoch", position=3, ncols=0, file=sys.stdout)
vbar = tqdm(range(args.epochs), unit="epoch", position=4, ncols=0, file=sys.stdout)
lrbar = tqdm(range(args.epochs), unit="epoch", position=5, ncols=0, file=sys.stdout)

for epoch, _, _ in zip(tbar, vbar, lrbar):

train(args, model, trainloader, optimizer, criterion, epoch)
scheduler.step()

train_loss, train_acc = eval_net(
args, model, trainloader, criterion)
tbar.set_description(
'train set - average loss: {:.4f}, accuracy: {:.0f}%'
.format(train_loss, 100. * train_acc))

valid_loss, valid_acc = eval_net(
args, model, validloader, criterion)
vbar.set_description(
'valid set - average loss: {:.4f}, accuracy: {:.0f}%'
.format(valid_loss, 100. * valid_acc))

if not args.filename == "":
with open(args.filename, 'a') as f:
f.write('%s %s %s %s' % (
args.dataset,
args.learn_eps,
args.neighbor_pooling_type,
args.graph_pooling_type
))
f.write("\n")
f.write("%f %f %f %f" % (
train_loss,
train_acc,
valid_loss,
valid_acc
))
f.write("\n")

lrbar.set_description(
"Learning eps with learn_eps={}: {}".format(
args.learn_eps, [layer.eps.data.item() for layer in model.ginlayers]))

tbar.close()
vbar.close()
lrbar.close()


if __name__ == '__main__':
args = Parser(description='GIN').args
print('show all arguments configuration...')
print(args)

main(args)


Loading…
Cancel
Save