diff --git a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py index 9ccdfce..415e9bd 100644 --- a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py +++ b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py @@ -166,7 +166,7 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer): self.__num_graphs_per_epoch: int = num_graphs_per_epoch " Set sampled_budget " - sampled_budget: int = kwargs.get("sampled_budget") + sampled_budget: int = kwargs.get("sampled_budget", 1e4) # todo: This is a version caused by current unreasonable initialization process # todo: Refactor the framework for trainer to fix in future version # if type(sampled_budget) != int: @@ -197,11 +197,16 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer): __cpu_count: _typing.Optional[int] = os.cpu_count() return __cpu_count if __cpu_count else 0 - self.__training_sampler_num_workers: int = kwargs.get( - "training_sampler_num_workers", _cpu_count() - ) - if not 0 <= self.__training_sampler_num_workers <= _cpu_count(): - self.__training_sampler_num_workers: int = _cpu_count() + # self.__training_sampler_num_workers: int = kwargs.get( + # "training_sampler_num_workers", _cpu_count() + # ) + + # if not 0 <= self.__training_sampler_num_workers <= _cpu_count(): + # self.__training_sampler_num_workers: int = _cpu_count() + + # force to be 0 to be compactible with current pyg solution. + self.__training_sampler_num_workers: int = 0 + super(NodeClassificationGraphSAINTTrainer, self).__init__( model, num_features, num_classes, device, init, feval, loss ) diff --git a/docs/docfile/tutorial/t_model.rst b/docs/docfile/tutorial/t_model.rst index b1ee81e..e46a94c 100644 --- a/docs/docfile/tutorial/t_model.rst +++ b/docs/docfile/tutorial/t_model.rst @@ -3,68 +3,266 @@ AutoGL Model ============ -AutoGL project uses ``model`` to define the common graph nerual networks and ``automodel`` to denote the relative class that includes some auto functions. Currently, we support the following models and automodels: +In AutoGL, we use ``model`` and ``automodel`` to define the logic of graph nerual networks and make it compatible with hyper parameter optimization. Currently we support the following models for given tasks. -* ``GCN`` and ``AutoGCN`` : graph convolutional network from https://arxiv.org/abs/1609.02907 -* ``GAT`` and ``AutoGAT`` : graph attentional network from https://arxiv.org/abs/1710.10903 -* ``GraphSAGE`` and ``AutoGraphSAGE`` : from the "Inductive Representation Learning on Large Graphs" https://arxiv.org/abs/1706.02216 ++----------------------+----------------------------+ +| Tasks | Models | ++======================+============================+ +| Node Classification | ``gcn``, ``gat``, ``sage`` | ++----------------------+----------------------------+ +| Graph Classification | ``gin``, ``topk`` | ++----------------------+----------------------------+ +| Link Prediction | ``gcn``, ``gat``, ``sage`` | ++----------------------+----------------------------+ -And we also support the following models and automodels for graph classification tasks: -* ``GIN`` and ``AutoGIN`` : graph isomorphism network from https://arxiv.org/abs/1810.00826 -* ``Topkpool`` and ``AutoTopkpool`` : graph U-Net from https://arxiv.org/abs/1905.05178, https://arxiv.org/abs/1905.02850 +Lazy Initialization +------------------- + +In current AutoGL pipeline, some important hyper-parameters related with model cannot be set outside before the pipeline (e.g. input dimensions, which can only be caluclated during running after feature engineered). Therefore, in ``automodel``, we use lazy initialization to initialize the core ``model``. When the ``automodel`` initialization method ``__init__()`` is called with argument ``init`` be ``False``, only (part of) the hyper-parameters will be set. The ``automodel`` will have its core ``model`` only after ``initialize()`` is explicitly called, which will be done automatically in ``solver`` and ``from_hyper_parameter()``, after all the hyper-parameters are set properly. Define your own model and automodel ----------------------------------- -If you want to add your own model and automodel for some task, the only thing you should do is add a new model where the forward function should be fulfilled and a new automodel inherited from the basemodel. +We highly recommend you to define both ``model`` and ``automodel``, although you only need your ``automodel`` to communicate with ``solver`` and ``trainer``. The ``model`` will be responsible for the parameters initialization and forward logic declaration, while the ``automodel`` will be responsible for the hyper-parameter definiton and organization. + +General customization +^^^^^^^^^^^^^^^^^^^^^ -For new models used in link prediction tasks, you should fulfill the lp_encode and lp_decode function. The difference between lp_encode and forward function is that there is not classification layer in lp_encode. +Let's say you want to implement a simple MLP for node classification and want to let AutoGL find the best hyper-parameters for you. You can first define the logics assuming all the hyper-parameters are given. + +.. code-block:: python + import torch + + # define mlp model, need to inherit from torch.nn.Module + class MyMLP(torch.nn.Module): + # assume you already get all the hyper-parameters + def __init__(self, in_channels, num_classes, layer_num, dim): + super().__init__() + if layer_num == 1: + ops = [torch.nn.Linear(in_channels, num_classes)] + else: + ops = [torch.nn.Linear(in_channels, dim)] + for i in range(layer_num - 2): + ops.append(torch.nn.Linear(dim, dim)) + ops.append(torch.nn.Linear(dim, num_classes)) + + self.core = torch.nn.Sequential(*ops) + + # this method is required + def forward(self, data): + # data: torch_geometric.data.Data + assert hasattr(data, 'x'), 'MLP only support graph data with features' + x = data.x + return torch.nn.functional.log_softmax(self.core(x)) -Firstly, you should define your model if it does not belong to the models above. -Secondly, you should define your corresponding automodel. +After you define the logic of ``model``, you can now define your ``automodel`` to manage the hyper-parameters. .. code-block:: python - # 1. define your search space to self.space of your automodel instance - [ - {'parameterName': 'num_layers', 'type': 'DISCRETE', 'feasiblePoints': '2,3,4'}, - {"parameterName": 'hidden', "type": "NUMERICAL_LIST", "numericalType": "INTEGER", "length": 3, "minValue": [8, 8, 8], "maxValue": [64, 64, 64], "scalingType": "LOG"}, - {'parameterName': 'dropout', 'type': 'DOUBLE', 'maxValue': 0.9, 'minValue': 0.1, 'scalingType': 'LINEAR'}, - {'parameterName': 'act', 'type': 'CATEGORICAL_LIST', "feasiblePoints": ['leaky_relu', 'relu', 'elu', 'tanh']}, - ] - # 2. define the default point to self.hyperparams of your automodel instance - { - 'num_layers': 2, - 'hidden': [16], - 'dropout': 0.2, - 'act': 'leaky_relu' - } - -Where ``self.space`` is a list of dictionary indicating the name, type, feasible point, min/max value and some properties of the parameter. ``self.hyperparams`` is a dictionary indicating the hyper-parameters used in this model. - -Finally, you can use the defined model and automodel for the specific need. + from autogl.module.model import BaseModel + + # define your automodel, need to inherit from BaseModel + class MyAutoMLP(BaseModel): + def __init__(self): + # (required) make sure you call __init__ of super with init argument properly set. + # if you do not want to initialize inside __init__, please pass False. + super().__init__(init=False) + + # (required) define the search space + self.space = [ + {'parameterName': 'layer_num', 'type': 'INTEGER', 'minValue': 1, 'maxValue': 5, 'scalingType': 'LINEAR'}, + {'parameterName': 'dim', 'type': 'INTEGER', 'minValue': 64, 'maxValue': 128, 'scalingType': 'LINEAR'} + ] + + # set default hyper-parameters + self.layer_num = 2 + self.dim = 72 + + # for the hyper-parameters that are related with dataset, you can just set them to None + self.num_classes = None + self.num_features = None + + # (required) since we don't know the num_classes and num_features until we see the dataset, + # we cannot initialize the models when instantiated. the initialized will be set to False. + self.initialized = False + + # (required) set the device of current auto model + self.device = torch.device('cuda') + + # (required) get current hyper-parameters of this automodel + # need to return a dictionary whose keys are the same with self.space + def get_hyper_parameter(self): + return { + 'layer_num': self.layer_num, + 'dim': self.dim + } + + # (required) override to interact with num_classes + def get_num_classes(self): + return self.num_classes + + # (required) override to interact with num_classes + def set_num_classes(self, n_classes): + self.num_classes = n_classes + + # (required) override to interact with num_features + def get_num_features(self): + return self.num_features + + # (required) override to interact with num_features + def set_num_features(self, n_features): + self.num_features = n_features + + # (required) instantiate the core MLP model using corresponding hyper-parameters + def initialize(self): + # (required) you need to make sure the core model is named as `self.model` + self.model = MyMLP( + in_channels = self.num_features, + num_classes = self.num_classes, + layer_num = self.layer_num, + dim = self.dim + ).to(self.device) + + self.initialized = True + + # (required) override to create a copy of model using provided hyper-parameters + def from_hyper_parameter(self, hp): + # hp is a dictionary that contains keys and values corrsponding to your self.space + # in this case, it will be in form {'layer_num': XX, 'dim': XX} + + # create a new instance + ret = self.__class__() + + # set the hyper-parameters related to dataset and device + ret.num_classes = self.num_classes + ret.num_features = self.num_features + ret.device = self.device + + # set the hyper-parameters according to hp + ret.layer_num = hp['layer_num'] + ret.dim = hp['dim'] + + # initialize it before returning + ret.initialize() + + return ret + + +Then, you can use this node classification model as part of AutoNodeClassifier ``solver``. .. code-block :: python - # for example - import torch - from .base import BaseModel - class YourGNN(torch.nn.Module): + from autogl.solver import AutoNodeClassifier + + solver = AutoNodeClassifier(graph_models=(MyAutoMLP(),)) + + +The model for graph classification is generally the same, except that you can now also receive the ``num_graph_features`` (the dimension of the graph-level feature) through overriding ``set_num_graph_features(self, n_graph_features)`` of ``BaseModel``. Also, please remember to return graph-level logits instead of node-level one in ``forward()`` of ``model``. + +Model for link prediction +^^^^^^^^^^^^^^^^^^^^^^^^^ + +For link prediction, the definition of model is a bit different with the common forward definition. You need to implement the ``lp_encode(self, data)`` and ``lp_decode(self, x, pos_edge_index, neg_edge_index)`` to interact with ``LinkPredictionTrainer`` and ``AutoLinkPredictor``. Taking the class ``MyMLP`` defined above for example, if you want to perform link prediction: + +.. code-block:: python + + class MyMLPForLP(torch.nn.Module): + # num_classes is removed since it is invalid for link prediction + def __init__(self, in_channels, layer_num, dim): + super().__init__() + ops = [torch.nn.Linear(in_channels, dim)] + for i in range(layer_num - 1): + ops.append(torch.nn.Linear(dim, dim)) + + self.core = torch.nn.Sequential(*ops) + + # (required) for interaction with link prediction trainer and solver + def lp_encode(self, data): + return self.core(data.x) + + # (required) for interaction with link prediction trainer and solver + def lp_decode(self, x, pos_edge_index, neg_edge_index): + # first, get all the edge_index need calculated + edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1) + # then, use dot-products to calculate logits, you can use whatever decode method you want + logits = (x[edge_index[0]] * x[edge_index[1]]).sum(dim=-1) + return logits + + class MyAutoMLPForLP(MyAutoMLP): + def initialize(self): + # init MyMLPForLP instead of MyMLP + self.model = MyMLPForLP( + in_channels = self.num_features, + layer_num = self.layer_num, + dim = self.dim + ).to(self.device) + + self.initialized = True + + +Model with sampling support +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Towards efficient representation learning on large-scale graph, AutoGL currently support node classification using sampling techniques including node-wise sampling, layer-wise sampling, and graph-wise sampling. See more about sampling in :ref:`trainer`. + +In order to conduct node classification using sampling technique with your custom model, further adaptation and modification are generally required. +According to the Message Passing mechanism of Graph Neural Network (GNN), numerous nodes in the multi-hop neighborhood of evaluation set or test set are potentially involved to evaluate the GNN model on large-scale graph dataset. +As the representations for those numerous nodes are likely to occupy large amount of computational resource, the common forwarding process is generally infeasible for model evaluation on large-scale graph. +An iterative representation learning mechanism is a practical and feasible way to evaluate **Sequential Model**, +which only consists of multiple sequential layers, with each layer taking a ``Data`` aggregate as input. The input ``Data`` has the same functionality with ``torch_geometric.data.Data``, which conventionally provides properties ``x``, ``edge_index``, and optional ``edge_weight``. +If your custom model is composed of concatenated layers, you would better make your model inherit ``ClassificationSupportedSequentialModel`` to utilize the layer-wise representation learning mechanism to efficiently conduct representation learning for your custom sequential model. + +.. code-block:: python + + import autogl + from autogl.module.model.base import ClassificationSupportedSequentialModel + + # override Linear so that it can take graph data as input + class Linear(torch.nn.Linear): def forward(self, data): - pass # Your forward function - - class YourAutoGNN(BaseModel): - def __init__(self, num_features=None, num_classes=None, device=None, init=True, **args): - """ - num_features: the number of features - num_classes: the number of classes - device: your device to run code - init: if True, the model will be initialize - """ - self.space = XXX # Define your search space - self.hyperparams = XXX # Define your hyper-parameters - self.initialized = False - if init is True: - self.initialize() + return super().forward(data.x) + + class MyMLPSampling(ClassificationSupportedSequentialModel): + def __init__(self, in_channels, num_classes, layer_num, dim): + super().__init__() + if layer_num == 1: + ops = [Linear(in_channels, num_classes)] + else: + ops = [Linear(in_channels, dim)] + for i in range(layer_num - 2): + ops.append(Linear(dim, dim)) + ops.append(Linear(dim, num_classes)) + + self.core = torch.nn.ModuleList(ops) + + # (required) override sequential_encoding_layers property to interact with sampling + @property + def sequential_encoding_layers(self) -> torch.nn.ModuleList: + return self.core + + # (required) define the encode logic of classification for sampling + def cls_encode(self, data): + # if you use sampling, the data will be passed in two possible ways, + # you can judge it use following rules + if hasattr(data, 'edge_indexes'): + # the edge_indexes are a list of edge_index, one for each layer + edge_indexes = data.edge_indexes + edge_weights = [None] * len(self.core) if getattr(data, 'edge_weights', None) is None else data.edge_weights + else: + # the edge_index and edge_weight will stay the same as default + edge_indexes = [data.edge_index] * len(self.core) + edge_weights = [getattr(data, 'edge_weight', None)] * len(self.core) + + x = data.x + for i in range(len(self.core)): + data = autogl.data.Data(x=x, edge_index=edge_indexes[i]) + data.edge_weight = edge_weights[i] + x = self.sequential_encoding_layers[i](data) + return x + + # (required) define the decode logic of classification for sampling + def cls_decode(self, x): + return torch.nn.functional.log_softmax(x) + diff --git a/docs/docfile/tutorial/t_trainer.rst b/docs/docfile/tutorial/t_trainer.rst index 86979e5..36f9888 100644 --- a/docs/docfile/tutorial/t_trainer.rst +++ b/docs/docfile/tutorial/t_trainer.rst @@ -10,34 +10,10 @@ AutoGL project use ``trainer`` to handle the auto-training of tasks. Currently, * ``LinkPredictionTrainer`` for link prediction -Initialization --------------- +Lazy Initialization +------------------- +Similar reason to :ref:model, we also use lazy initialization for all trainers. Only (part of) the hyper-parameters will be set when ``__init__()`` is called. The ``trainer`` will have its core ``model`` only after ``initialize()`` is explicitly called, which will be done automatically in ``solver`` and ``duplicate_from_hyper_parameter()``, after all the hyper-parameters are set properly. -A trainer can either be initialized from its ``__init__()``. If you want to build a trainer by ``__init__()``, you need to pass the following parameters to it, namely as ``model``, ``num_features``, and ``num_classes`` and ``auto ensemble``. You can also define some parameters alternatively, including ``optimizer``, ``lr``, ``max_epoch``, ``early_stopping_round``, ``weight_decay`` and etc. - -In the ``__init__()``, you need to define the space and hyperparameter of your trainer: - -.. code-block:: python - - # 1. define your search space of trainer - self.space = [ - {'parameterName': 'max_epoch', 'type': 'INTEGER', 'maxValue': 300, 'minValue': 10, 'scalingType': 'LINEAR'}, - {'parameterName': 'early_stopping_round', 'type': 'INTEGER', 'maxValue': 30, 'minValue': 10, - 'scalingType': 'LINEAR'}, - {'parameterName': 'lr', 'type': 'DOUBLE', 'maxValue': 1e-3, 'minValue': 1e-4, 'scalingType': 'LOG'}, - {'parameterName': 'weight_decay', 'type': 'DOUBLE', 'maxValue': 5e-3, 'minValue': 5e-4, - 'scalingType': 'LOG'}, - ] - - # 2. define the initial point of hyperparameter search of your trainer - self.hyperparams = { - 'max_epoch': self.max_epoch, - 'early_stopping_round': self.early_stopping_round, - 'lr': self.lr, - 'weight_decay': self.weight_decay - } - -Where ``self.space`` is a list of dictionary indicating the name, type, and some properties of the parameter. ``self.hyperparams`` is a dictionary indicating the hyper-parameters used in this trainer. Train and Predict ----------------- @@ -48,7 +24,7 @@ We have given the training and testing functions for the tasks of node classific The evaluation function is defined in ``evaluate()``, you can use your our evaluation metrics and methods. Node Classification with Sampling ------------------------------------- +--------------------------------- According to various present studies, training with spatial sampling has been demonstrated as an efficient technique for representation learning on large-scale graph. We provide implementations for various representative sampling mechanisms including @@ -84,14 +60,131 @@ The sampling techniques can be utilized by adopting corresponding trainer and ``NodeClassificationNeighborSamplingTrainer``. You can either specify the corresponding name of trainer in YAML configuration file or instantiate the solver ``AutoNodeClassifier`` -with the instance of specific trainer as ``model`` argument. +with the instance of specific trainer. However, please make sure to manange some key +hyper-paramters properly inside the hyper-parameter space. Specifically: + +For ``NodeClassificationLayerDependentImportanceSamplingTrainer``, you need to set the +hyper-parameter ``sampled_node_sizes`` properly. The space of ``sampled_node_sizes`` should +be a list of the same size with your **Sequential Model**. For example, if you have a +model with layer number 4, you need to pass the hyper-parameter space properly: + +.. code-block:: python + + solver = AutoNodeClassifier( + graph_models=(A_MODEL_WITH_4_LAYERS,), + default_trainer='NodeClassificationLayerDependentImportanceSamplingTrainer', + trainer_hp_space=[ + # (required) you need to set the trainer_hp_space properly. + { + 'parameterName': 'sampled_node_sizes', + 'type': 'NUMERICAL_LIST', + "numericalType": "INTEGER", + "length": 4, # same with the layer number of your model + "minValue": [200,200,200,200], + "maxValue": [1000,1000,1000,1000], + "scalingType": "LOG" + }, + ... + ] + ) + +If the layer number of your model is a searchable hyper-parameters, you can also set the ``cutPara`` +and ``cutFunc`` properly, to make it connected with your layer number hyper-parameters of model. + +.. code-block:: python + + ''' + Suppose the layer number of your model is of the following forms: + { + 'parameterName': 'layer_number', + 'type': 'INTEGER', + 'minValue': 2, + 'maxValue': 4, + 'scalingType': 'LOG' + } + ''' + + solver = AutoNodeClassifier( + graph_models=(A_MODEL_WITH_DYNAMIC_LAYERS,), + default_trainer='NodeClassificationLayerDependentImportanceSamplingTrainer', + trainer_hp_space=[ + # (required) you need to set the trainer_hp_space properly. + { + 'parameterName': 'sampled_node_sizes', + 'type': 'NUMERICAL_LIST', + "numericalType": "INTEGER", + "length": 4, # max length + "cutPara": ("layer_number", ), # link with layer_number + "cutFunc": lambda x:x[0], # link with layer_number + "minValue": [200,200,200,200], + "maxValue": [1000,1000,1000,1000], + "scalingType": "LOG" + }, + ... + ] + ) + + +Similarly, if you want to use ``NodeClassificationNeighborSamplingTrainer``, you need to +make sure setting the hyper-parameter ``sampling_sizes`` the same length as the layer number +of your model. For example: + +.. code-block:: python + + ''' + Suppose the layer number of your model is of the following forms: + { + 'parameterName': 'layer_number', + 'type': 'INTEGER', + 'minValue': 2, + 'maxValue': 4, + 'scalingType': 'LOG' + } + ''' + + solver = AutoNodeClassifier( + graph_models=(A_MODEL_WITH_DYNAMIC_LAYERS,), + default_trainer='NodeClassificationNeighborSamplingTrainer', + trainer_hp_space=[ + # (required) you need to set the trainer_hp_space properly. + { + 'parameterName': 'sampling_sizes', + 'type': 'NUMERICAL_LIST', + "numericalType": "INTEGER", + "length": 4, # max length + "cutPara": ("layer_number", ), # link with layer_number + "cutFunc": lambda x:x[0], # link with layer_number + "minValue": [20,20,20,20], + "maxValue": [100,100,100,100], + "scalingType": "LOG" + }, + ... + ] + ) -A brief example is demonstrated as follows: + +You can also pass a trainer inside model list directly. A brief example is demonstrated as follows: .. code-block:: python ladies_sampling_trainer = NodeClassificationLayerDependentImportanceSamplingTrainer( - model='gcn', num_features=dataset.num_features, num_classes=dataset.num_classes, - ... + model='gcn', num_features=dataset.num_features, num_classes=dataset.num_classes, ... ) + + ladies_sampling_trainer.hyper_parameter_space = [ + # (required) you need to set the trainer_hp_space properly. + { + 'parameterName': 'sampled_node_sizes', + 'type': 'NUMERICAL_LIST', + "numericalType": "INTEGER", + "length": 4, # max length + "cutPara": ("num_layers", ), # link with layer_number + "cutFunc": lambda x:x[0], # link with layer_number + "minValue": [200,200,200,200], + "maxValue": [1000,1000,1000,1000], + "scalingType": "LOG" + }, + ... + ] + AutoNodeClassifier(graph_models=(ladies_sampling_trainer,), ...)