PR [#40] dev -> main

add new features: * NAS * sampling * Link prediction
4 years ago · 59cabcdcbb
--- a/.pylintrc
+++ b/.pylintrc
@@ -143,7 +143,8 @@ disable=print-statement,
        too-many-arguments,
        too-many-branches,
        too-many-statements,
        too-many-locals
        too-many-locals,
        relative-beyond-top-level

 # Enable the message, report, category or checker with the given id(s). You can
 # either give multiple identifier separated by comma (,) or put this option
--- a/+ 202
+++ b/+ 202
@@ -1,21 +1,202 @@
 MIT License

 Copyright (c) 2020 THUMNLab aglteam

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:

 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.

 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
--- a/README.md
+++ b/README.md
@@ -11,7 +11,8 @@ Feel free to open <a href="https://github.com/THUMNLab/AutoGL/issues">issues</a>

 ## News!

 - 2021.04.10 Our paper [__AutoGL: A Library for Automated Graph Learning__](https://openreview.net/forum?id=0yHwpLeInDn) are accepted in _ICLR 2021 Workshop on Geometrical and Topological Representation Learning_! You can cite our paper following methods [here](#Cite).
 - 2021.07.11 New version! v0.2.0-pre is here! In new version, AutoGL support neural architecture search to customize the architectures for the given datasets and tasks. AutoGL also support sampling now to perform tasks on large datasets, including node-wise sampling, layer-wise sampling and sub-graph sampling. Link prediction task is now also supported! Learn more in our [tutorial](https://autogl.readthedocs.io/en/latest/index.html).
 - 2021.04.10 Our paper [__AutoGL: A Library for Automated Graph Learning__](https://arxiv.org/abs/2104.04987) are accepted in _ICLR 2021 Workshop on Geometrical and Topological Representation Learning_! You can cite our paper following methods [here](#Cite).

 ## Introduction

@@ -23,7 +24,7 @@ The workflow below shows the overall framework of AutoGL.

 AutoGL uses `datasets` to maintain dataset for graph-based machine learning, which is based on Dataset in PyTorch Geometric with some support added to corporate with the auto solver framework.

 Different graph-based machine learning tasks are solved by different `AutoGL solvers`, which make use of four main modules to automatically solve given tasks, namely `auto feature engineer`, `auto model`, `hyperparameter optimization`, and `auto ensemble`. 
 Different graph-based machine learning tasks are solved by different `AutoGL solvers`, which make use of five main modules to automatically solve given tasks, namely `auto feature engineer`, `neural architecture search`, `auto model`, `hyperparameter optimization`, and `auto ensemble`. 

 Currently, the following algorithms are supported in AutoGL:

@@ -33,20 +34,34 @@ Currently, the following algorithms are supported in AutoGL:
    <tr valign="top">
        <td>Feature Engineer</td>
        <td>Model</td>
        <td>NAS</td>
        <td>HPO</td>
        <td>Ensemble</td>
    </tr>
    <tr valign="top">
        <!--<td><b>Generators</b><br>graphlet <br> eigen <br> pagerank <br> PYGLocalDegreeProfile <br> PYGNormalizeFeatures <br> PYGOneHotDegree <br> onehot <br> <br><b>Selectors</b><br> SeFilterConstant<br> gbdt <br> <br><b>Subgraph</b><br> NxLargeCliqueSize<br> NxAverageClusteringApproximate<br> NxDegreeAssortativityCoefficient<br> NxDegreePearsonCorrelationCoefficient<br> NxHasBridge <br>NxGraphCliqueNumber<br> NxGraphNumberOfCliques<br> NxTransitivity<br> NxAverageClustering<br> NxIsConnected<br> NxNumberConnectedComponents<br> NxIsDistanceRegular<br> NxLocalEfficiency<br> NxGlobalEfficiency<br> NxIsEulerian </td>-->
        <td><b>Generators</b><br>graphlet <br> eigen <br> <a href="https://autogl.readthedocs.io/en/latest/docfile/tutorial/t_fe.html">more ...</a><br><br><b>Selectors</b><br> SeFilterConstant<br> gbdt <br> <br><b>Subgraph</b><br> netlsd<br> NxAverageClustering<br> <a href="https://autogl.readthedocs.io/en/latest/docfile/tutorial/t_fe.html">more ...</a></td>
        <td><b>Generators</b><br>graphlet <br> eigen <br> <a href="https://autogl.readthedocs.io/en/latest/docfile/tutorial/t_fe.html">more ...</a><br><br><b>Selectors</b><br> SeFilterConstant<br> gbdt <br> <br><b>Graph</b><br> netlsd<br> NxAverageClustering<br> <a href="https://autogl.readthedocs.io/en/latest/docfile/tutorial/t_fe.html">more ...</a></td>
        <td><b>Node Classification</b><br> GCN <br> GAT <br> GraphSAGE <br><br><b>Graph Classification</b><br> GIN <br> TopKPool </td>
        <td>
        <b>Algorithms</b><br>
        Random<br>
        RL<br>
        <a href='#'>more ...</a><br><br>
        <b>Spaces</b><br>
        SinglePath<br>
        GraphNas<br>
        <a href='#'>more ...</a><br><br>
        <b>Estimators</b><br>
        Oneshot<br>
        Scratch<br>
        </td>
        <td> Grid <br> Random <br> Anneal <br> Bayes <br> CAMES <br> MOCAMES <br> Quasi random <br> TPE <br> AutoNE </td>
        <td> Voting <br> Stacking </td>
    </tr>
    </tbody>
 </table>

 This toolkit also serves as a platform for users to implement and test their own autoML or graph-based machine learning models.
 This toolkit also serves as a framework for users to implement and test their own autoML or graph-based machine learning models.

 ## Installation

@@ -56,11 +71,11 @@ Please make sure you meet the following requirements before installing AutoGL.

 1. Python >= 3.6.0

 2. PyTorch (>=1.5.1)
 2. PyTorch (>=1.6.0)

    see <https://pytorch.org/> for installation.

 3. PyTorch Geometric
 3. PyTorch Geometric (>=1.7.0)

    see <https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html> for installation.

@@ -71,7 +86,7 @@ Please make sure you meet the following requirements before installing AutoGL.
 Run the following command to install this package through `pip`.

 ```
 pip install auto-graph-learning
 pip install autogl
 ```

 #### Install from source
@@ -111,7 +126,7 @@ The documentation will be automatically generated under `docs/_build/html`

 ## Cite

 You can cite [our paper](https://openreview.net/forum?id=0yHwpLeInDn) as follows if you use this code in your own work:
 You can cite [our paper](https://arxiv.org/abs/2104.04987) as follows if you use this code in your own work:
 ```
@inproceedings{
 guan2021autogl,
--- a/autogl/init.py
+++ b/autogl/init.py
@@ -1 +1,18 @@
 __version__ = "0.1.1"
 from . import (
    data,
    datasets,
    module,
    solver,
    utils,
 )

 from .module import (
    ensemble,
    feature,
    hpo,
    model,
    nas,
    train,
 )

 __version__ = "0.2.0-pre"
--- a/autogl/datasets/init.py
+++ b/autogl/datasets/init.py
@@ -46,6 +46,7 @@ def register_dataset(name):

    return register_dataset_cls


 from .pyg import (
    AmazonComputersDataset,
    AmazonPhotoDataset,
@@ -91,14 +92,16 @@ from .han_data import HANDataset, ACM_HANDataset, DBLP_HANDataset, IMDB_HANDatas
 from .matlab_matrix import (
    MatlabMatrix,
    BlogcatalogDataset,
    FlickrDataset,
    WikipediaDataset,
    PPIDataset,
 )
 from .modelnet import (
    ModelNet10, ModelNet40,
    ModelNet10Train, ModelNet10Test,
    ModelNet40Train, ModelNet40Test
    ModelNet10,
    ModelNet40,
    ModelNet10Train,
    ModelNet10Test,
    ModelNet40Train,
    ModelNet40Test,
 )
 from .utils import (
    get_label_number,
@@ -110,6 +113,7 @@ from .utils import (
    graph_get_split,
 )


 def build_dataset(args, path="~/.cache-autogl/"):
    path = osp.join(path, "data", args.dataset)
    path = os.path.expanduser(path)
@@ -120,9 +124,9 @@ def build_dataset_from_name(dataset_name, path="~/.cache-autogl/"):
    path = osp.join(path, "data", dataset_name)
    path = os.path.expanduser(path)
    dataset = DATASET_DICT[dataset_name](path)
    if 'ogbn' in dataset_name:
        #dataset.data, dataset.slices = dataset.collate([dataset.data])
        #dataset.data.num_nodes = dataset.data.num_nodes[0]
    if "ogbn" in dataset_name:
        # dataset.data, dataset.slices = dataset.collate([dataset.data])
        # dataset.data.num_nodes = dataset.data.num_nodes[0]
        if dataset.data.y.shape[-1] == 1:
            dataset.data.y = torch.squeeze(dataset.data.y)
    return dataset
@@ -132,10 +136,6 @@ __all__ = [
    "register_dataset",
    "build_dataset",
    "build_dataset_from_name",
    "GatneDataset",
    "GTNDataset",
    "HANDataset",
    "MatlabMatrix",
    "get_label_number",
    "random_splits_mask",
    "random_splits_mask_class",
@@ -143,4 +143,61 @@ __all__ = [
    "graph_set_fold_id",
    "graph_random_splits",
    "graph_get_split",
    "AmazonComputersDataset",
    "AmazonPhotoDataset",
    "CoauthorPhysicsDataset",
    "CoauthorCSDataset",
    "CoraDataset",
    "CiteSeerDataset",
    "PubMedDataset",
    "RedditDataset",
    "MUTAGDataset",
    "IMDBBinaryDataset",
    "IMDBMultiDataset",
    "CollabDataset",
    "ProteinsDataset",
    "REDDITBinary",
    "REDDITMulti5K",
    "REDDITMulti12K",
    "PTCMRDataset",
    "NCI1Dataset",
    "ENZYMES",
    "QM9Dataset",
    "OGBNproductsDataset",
    "OGBNproteinsDataset",
    "OGBNarxivDataset",
    "OGBNpapers100MDataset",
    "OGBNmagDataset",
    "OGBGmolhivDataset",
    "OGBGmolpcbaDataset",
    "OGBGppaDataset",
    "OGBGcodeDataset",
    "OGBLppaDataset",
    "OGBLcollabDataset",
    "OGBLddiDataset",
    "OGBLcitationDataset",
    "OGBLwikikgDataset",
    "OGBLbiokgDataset",
    "GatneDataset",
    "AmazonDataset",
    "TwitterDataset",
    "YouTubeDataset",
    "GTNDataset",
    "ACM_GTNDataset",
    "DBLP_GTNDataset",
    "IMDB_GTNDataset",
    "HANDataset",
    "ACM_HANDataset",
    "DBLP_HANDataset",
    "IMDB_HANDataset",
    "MatlabMatrix",
    "BlogcatalogDataset",
    "WikipediaDataset",
    "PPIDataset",
    "ModelNet10",
    "ModelNet40",
    "ModelNet10Train",
    "ModelNet10Test",
    "ModelNet40Train",
    "ModelNet40Test",
 ]
--- a/autogl/datasets/matlab_matrix.py
+++ b/autogl/datasets/matlab_matrix.py
@@ -69,13 +69,13 @@ class BlogcatalogDataset(MatlabMatrix):
        super(BlogcatalogDataset, self).__init__(path, filename, url)


@register_dataset("flickr")
 class FlickrDataset(MatlabMatrix):
    def __init__(self, path):
        dataset, filename = "flickr", "flickr"
        url = "http://leitang.net/code/social-dimension/data/"
        # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
        super(FlickrDataset, self).__init__(path, filename, url)
 # @register_dataset("flickr")
 # class FlickrDataset(MatlabMatrix):
 #     def __init__(self, path):
 #         dataset, filename = "flickr", "flickr"
 #         url = "http://leitang.net/code/social-dimension/data/"
 #         # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
 #         super(FlickrDataset, self).__init__(path, filename, url)


@register_dataset("wikipedia")
--- a/autogl/datasets/modelnet.py
+++ b/autogl/datasets/modelnet.py
@@ -21,42 +21,50 @@ class ModelNet40(ModelNet):
@register_dataset("ModelNet10Train")
 class ModelNet10Train(ModelNet):
    def __init__(self, path: str):
        super(ModelNet10Train, self).__init__(path, '10', train=True)
        super(ModelNet10Train, self).__init__(path, "10", train=True)

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(ModelNet10Train, self).get(idx)


@register_dataset("ModelNet10Test")
 class ModelNet10Test(ModelNet):
    def __init__(self, path: str):
        super(ModelNet10Test, self).__init__(path, '10', train=False)
        super(ModelNet10Test, self).__init__(path, "10", train=False)

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(ModelNet10Test, self).get(idx)


@register_dataset("ModelNet40Train")
 class ModelNet40Train(ModelNet):
    def __init__(self, path: str):
        super(ModelNet40Train, self).__init__(path, '40', train=True)
        super(ModelNet40Train, self).__init__(path, "40", train=True)

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(ModelNet40Train, self).get(idx)


@register_dataset("ModelNet40Test")
 class ModelNet40Test(ModelNet):
    def __init__(self, path: str):
        super(ModelNet40Test, self).__init__(path, '40', train=False)
        super(ModelNet40Test, self).__init__(path, "40", train=False)

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(ModelNet40Test, self).get(idx)
--- a/autogl/datasets/ogb.py
+++ b/autogl/datasets/ogb.py
@@ -30,15 +30,17 @@ class OGBNproductsDataset(PygNodePropPredDataset):
        split_idx = self.get_idx_split()
        datalist = []
        for d in self:
            setattr(d, "train_mask", index_to_mask(split_idx['train'], d.y.shape[0]))
            setattr(d, "val_mask", index_to_mask(split_idx['valid'], d.y.shape[0]))
            setattr(d, "test_mask", index_to_mask(split_idx['test'], d.y.shape[0]))
            setattr(d, "train_mask", index_to_mask(split_idx["train"], d.y.shape[0]))
            setattr(d, "val_mask", index_to_mask(split_idx["valid"], d.y.shape[0]))
            setattr(d, "test_mask", index_to_mask(split_idx["test"], d.y.shape[0]))
            datalist.append(d)
        self.data, self.slices = self.collate(datalist)
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(OGBNproductsDataset, self).get(idx)


@@ -49,7 +51,9 @@ class OGBNproteinsDataset(PygNodePropPredDataset):
        # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
        PygNodePropPredDataset(name=dataset, root=path)
        super(OGBNproteinsDataset, self).__init__(dataset, path)
        dataset_t = PygNodePropPredDataset(name=dataset, root=path, transform=T.ToSparseTensor())
        dataset_t = PygNodePropPredDataset(
            name=dataset, root=path, transform=T.ToSparseTensor()
        )

        # Move edge features to node features.
        self.data.x = dataset_t[0].adj_t.mean(dim=1)
@@ -61,15 +65,17 @@ class OGBNproteinsDataset(PygNodePropPredDataset):
        split_idx = self.get_idx_split()
        datalist = []
        for d in self:
            setattr(d, "train_mask", index_to_mask(split_idx['train'], d.y.shape[0]))
            setattr(d, "val_mask", index_to_mask(split_idx['valid'], d.y.shape[0]))
            setattr(d, "test_mask", index_to_mask(split_idx['test'], d.y.shape[0]))
            setattr(d, "train_mask", index_to_mask(split_idx["train"], d.y.shape[0]))
            setattr(d, "val_mask", index_to_mask(split_idx["valid"], d.y.shape[0]))
            setattr(d, "test_mask", index_to_mask(split_idx["test"], d.y.shape[0]))
            datalist.append(d)
        self.data, self.slices = self.collate(datalist)
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(OGBNproteinsDataset, self).get(idx)


@@ -86,15 +92,17 @@ class OGBNarxivDataset(PygNodePropPredDataset):

        datalist = []
        for d in self:
            setattr(d, "train_mask", index_to_mask(split_idx['train'], d.y.shape[0]))
            setattr(d, "val_mask", index_to_mask(split_idx['valid'], d.y.shape[0]))
            setattr(d, "test_mask", index_to_mask(split_idx['test'], d.y.shape[0]))
            setattr(d, "train_mask", index_to_mask(split_idx["train"], d.y.shape[0]))
            setattr(d, "val_mask", index_to_mask(split_idx["valid"], d.y.shape[0]))
            setattr(d, "test_mask", index_to_mask(split_idx["test"], d.y.shape[0]))
            datalist.append(d)
        self.data, self.slices = self.collate(datalist)
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(OGBNarxivDataset, self).get(idx)


@@ -110,15 +118,17 @@ class OGBNpapers100MDataset(PygNodePropPredDataset):
        split_idx = self.get_idx_split()
        datalist = []
        for d in self:
            setattr(d, "train_mask", index_to_mask(split_idx['train'], d.y.shape[0]))
            setattr(d, "val_mask", index_to_mask(split_idx['valid'], d.y.shape[0]))
            setattr(d, "test_mask", index_to_mask(split_idx['test'], d.y.shape[0]))
            setattr(d, "train_mask", index_to_mask(split_idx["train"], d.y.shape[0]))
            setattr(d, "val_mask", index_to_mask(split_idx["valid"], d.y.shape[0]))
            setattr(d, "test_mask", index_to_mask(split_idx["test"], d.y.shape[0]))
            datalist.append(d)
        self.data, self.slices = self.collate(datalist)
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(OGBNpapers100MDataset, self).get(idx)


@@ -134,9 +144,10 @@ class OGBNmagDataset(PygNodePropPredDataset):
        rel_data = self[0]
        # We are only interested in paper <-> paper relations.
        self.data = Data(
            x=rel_data.x_dict['paper'],
            edge_index=rel_data.edge_index_dict[('paper', 'cites', 'paper')],
            y=rel_data.y_dict['paper'])
            x=rel_data.x_dict["paper"],
            edge_index=rel_data.edge_index_dict[("paper", "cites", "paper")],
            y=rel_data.y_dict["paper"],
        )

        # self.data = T.ToSparseTensor()(data)
        # self[0].adj_t = self[0].adj_t.to_symmetric()
@@ -147,15 +158,17 @@ class OGBNmagDataset(PygNodePropPredDataset):

        datalist = []
        for d in self:
            setattr(d, "train_mask", index_to_mask(split_idx['train'], d.y.shape[0]))
            setattr(d, "val_mask", index_to_mask(split_idx['valid'], d.y.shape[0]))
            setattr(d, "test_mask", index_to_mask(split_idx['test'], d.y.shape[0]))
            setattr(d, "train_mask", index_to_mask(split_idx["train"], d.y.shape[0]))
            setattr(d, "val_mask", index_to_mask(split_idx["valid"], d.y.shape[0]))
            setattr(d, "test_mask", index_to_mask(split_idx["test"], d.y.shape[0]))
            datalist.append(d)
        self.data, self.slices = self.collate(datalist)
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(OGBNmagDataset, self).get(idx)


@@ -171,10 +184,12 @@ class OGBGmolhivDataset(PygGraphPropPredDataset):
        super(OGBGmolhivDataset, self).__init__(dataset, path)
        setattr(OGBGmolhivDataset, "metric", "ROC-AUC")
        setattr(OGBGmolhivDataset, "loss", "binary_cross_entropy_with_logits")
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(OGBGmolhivDataset, self).get(idx)


@@ -187,10 +202,12 @@ class OGBGmolpcbaDataset(PygGraphPropPredDataset):
        super(OGBGmolpcbaDataset, self).__init__(dataset, path)
        setattr(OGBGmolpcbaDataset, "metric", "AP")
        setattr(OGBGmolpcbaDataset, "loss", "binary_cross_entropy_with_logits")
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(OGBGmolpcbaDataset, self).get(idx)


@@ -203,10 +220,12 @@ class OGBGppaDataset(PygGraphPropPredDataset):
        super(OGBGppaDataset, self).__init__(dataset, path)
        setattr(OGBGppaDataset, "metric", "Accuracy")
        setattr(OGBGppaDataset, "loss", "cross_entropy")
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(OGBGppaDataset, self).get(idx)


@@ -219,10 +238,12 @@ class OGBGcodeDataset(PygGraphPropPredDataset):
        super(OGBGcodeDataset, self).__init__(dataset, path)
        setattr(OGBGcodeDataset, "metric", "F1 score")
        setattr(OGBGcodeDataset, "loss", "cross_entropy")
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(OGBGcodeDataset, self).get(idx)


@@ -238,10 +259,12 @@ class OGBLppaDataset(PygLinkPropPredDataset):
        super(OGBLppaDataset, self).__init__(dataset, path)
        setattr(OGBLppaDataset, "metric", "Hits@100")
        setattr(OGBLppaDataset, "loss", "pos_neg_loss")
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(OGBLppaDataset, self).get(idx)


@@ -254,10 +277,12 @@ class OGBLcollabDataset(PygLinkPropPredDataset):
        super(OGBLcollabDataset, self).__init__(dataset, path)
        setattr(OGBLcollabDataset, "metric", "Hits@50")
        setattr(OGBLcollabDataset, "loss", "pos_neg_loss")
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(OGBLcollabDataset, self).get(idx)


@@ -270,10 +295,12 @@ class OGBLddiDataset(PygLinkPropPredDataset):
        super(OGBLddiDataset, self).__init__(dataset, path)
        setattr(OGBLddiDataset, "metric", "Hits@20")
        setattr(OGBLddiDataset, "loss", "pos_neg_loss")
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(OGBLddiDataset, self).get(idx)


@@ -286,10 +313,12 @@ class OGBLcitationDataset(PygLinkPropPredDataset):
        super(OGBLcitationDataset, self).__init__(dataset, path)
        setattr(OGBLcitationDataset, "metric", "MRR")
        setattr(OGBLcitationDataset, "loss", "pos_neg_loss")
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(OGBLcitationDataset, self).get(idx)


@@ -302,10 +331,12 @@ class OGBLwikikgDataset(PygLinkPropPredDataset):
        super(OGBLwikikgDataset, self).__init__(dataset, path)
        setattr(OGBLwikikgDataset, "metric", "MRR")
        setattr(OGBLwikikgDataset, "loss", "pos_neg_loss")
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(OGBLwikikgDataset, self).get(idx)


@@ -318,8 +349,10 @@ class OGBLbiokgDataset(PygLinkPropPredDataset):
        super(OGBLbiokgDataset, self).__init__(dataset, path)
        setattr(OGBLbiokgDataset, "metric", "MRR")
        setattr(OGBLbiokgDataset, "loss", "pos_neg_loss")
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(OGBLbiokgDataset, self).get(idx)
--- a/autogl/datasets/pyg.py
+++ b/autogl/datasets/pyg.py
@@ -1,6 +1,7 @@
 import os.path as osp

 import torch

 # import torch_geometric.transforms as T
 from torch_geometric.datasets import (
    Planetoid,
@@ -9,6 +10,7 @@ from torch_geometric.datasets import (
    QM9,
    Amazon,
    Coauthor,
    Flickr,
 )
 from torch_geometric.utils import remove_self_loops
 from . import register_dataset
@@ -21,10 +23,12 @@ class AmazonComputersDataset(Amazon):
        # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
        Amazon(path, dataset)
        super(AmazonComputersDataset, self).__init__(path, dataset)
        

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(AmazonComputersDataset, self).get(idx)


@@ -35,10 +39,12 @@ class AmazonPhotoDataset(Amazon):
        # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
        Amazon(path, dataset)
        super(AmazonPhotoDataset, self).__init__(path, dataset)
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(AmazonPhotoDataset, self).get(idx)


@@ -49,10 +55,12 @@ class CoauthorPhysicsDataset(Coauthor):
        # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
        Coauthor(path, dataset)
        super(CoauthorPhysicsDataset, self).__init__(path, dataset)
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(CoauthorPhysicsDataset, self).get(idx)


@@ -63,10 +71,12 @@ class CoauthorCSDataset(Coauthor):
        # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
        Coauthor(path, dataset)
        super(CoauthorCSDataset, self).__init__(path, dataset)
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(CoauthorCSDataset, self).get(idx)


@@ -77,10 +87,12 @@ class CoraDataset(Planetoid):
        # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
        Planetoid(path, dataset)
        super(CoraDataset, self).__init__(path, dataset)
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(CoraDataset, self).get(idx)


@@ -91,10 +103,12 @@ class CiteSeerDataset(Planetoid):
        # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
        Planetoid(path, dataset)
        super(CiteSeerDataset, self).__init__(path, dataset)
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(CiteSeerDataset, self).get(idx)


@@ -105,10 +119,12 @@ class PubMedDataset(Planetoid):
        # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
        Planetoid(path, dataset)
        super(PubMedDataset, self).__init__(path, dataset)
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(PubMedDataset, self).get(idx)


@@ -119,13 +135,29 @@ class RedditDataset(Reddit):
        # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
        Reddit(path)
        super(RedditDataset, self).__init__(path)
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(RedditDataset, self).get(idx)


@register_dataset("flickr")
 class FlickrDataset(Flickr):
    def __init__(self, path):
        Flickr(path)
        super(FlickrDataset, self).__init__(path)

    def get(self, idx):
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(FlickrDataset, self).get(idx)


@register_dataset("mutag")
 class MUTAGDataset(TUDataset):
    def __init__(self, path):
@@ -135,8 +167,10 @@ class MUTAGDataset(TUDataset):
        super(MUTAGDataset, self).__init__(path, name=dataset)

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(MUTAGDataset, self).get(idx)


@@ -147,10 +181,12 @@ class IMDBBinaryDataset(TUDataset):
        # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
        TUDataset(path, name=dataset)
        super(IMDBBinaryDataset, self).__init__(path, name=dataset)
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(IMDBBinaryDataset, self).get(idx)


@@ -161,10 +197,12 @@ class IMDBMultiDataset(TUDataset):
        # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
        TUDataset(path, name=dataset)
        super(IMDBMultiDataset, self).__init__(path, name=dataset)
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(IMDBMultiDataset, self).get(idx)


@@ -175,10 +213,12 @@ class CollabDataset(TUDataset):
        # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
        TUDataset(path, name=dataset)
        super(CollabDataset, self).__init__(path, name=dataset)
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(CollabDataset, self).get(idx)


@@ -189,10 +229,12 @@ class ProteinsDataset(TUDataset):
        # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
        TUDataset(path, name=dataset)
        super(ProteinsDataset, self).__init__(path, name=dataset)
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(ProteinsDataset, self).get(idx)


@@ -203,10 +245,12 @@ class REDDITBinary(TUDataset):
        # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
        TUDataset(path, name=dataset)
        super(REDDITBinary, self).__init__(path, name=dataset)
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(REDDITBinary, self).get(idx)


@@ -217,10 +261,12 @@ class REDDITMulti5K(TUDataset):
        # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
        TUDataset(path, name=dataset)
        super(REDDITMulti5K, self).__init__(path, name=dataset)
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(REDDITMulti5K, self).get(idx)


@@ -231,10 +277,12 @@ class REDDITMulti12K(TUDataset):
        # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
        TUDataset(path, name=dataset)
        super(REDDITMulti12K, self).__init__(path, name=dataset)
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(REDDITMulti12K, self).get(idx)


@@ -247,8 +295,10 @@ class PTCMRDataset(TUDataset):
        super(PTCMRDataset, self).__init__(path, name=dataset)

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(PTCMRDataset, self).get(idx)


@@ -259,10 +309,12 @@ class NCI1Dataset(TUDataset):
        # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
        TUDataset(path, name=dataset)
        super(NCI1Dataset, self).__init__(path, name=dataset)
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(NCI1Dataset, self).get(idx)


@@ -273,10 +325,12 @@ class NCI109Dataset(TUDataset):
        # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
        TUDataset(path, name=dataset)
        super(NCI109Dataset, self).__init__(path, name=dataset)
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(NCI109Dataset, self).get(idx)


@@ -298,10 +352,12 @@ class ENZYMES(TUDataset):
            return data
        else:
            return self.index_select(idx)
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(ENZYMES, self).get(idx)


@@ -342,8 +398,10 @@ class QM9Dataset(QM9):
        if not osp.exists(path):
            QM9(path)
        super(QM9Dataset, self).__init__(path)
    

    def get(self, idx):
        if hasattr(self, '__data_list__'):
            delattr(self, '__data_list__')
        if hasattr(self, "__data_list__"):
            delattr(self, "__data_list__")
        if hasattr(self, "_data_list"):
            delattr(self, "_data_list")
        return super(QM9Dataset, self).get(idx)
--- a/autogl/datasets/utils.py
+++ b/autogl/datasets/utils.py
@@ -1,7 +1,18 @@
 from pdb import set_trace
 import torch
 import numpy as np
 from torch_geometric.data import DataLoader
 from sklearn.model_selection import StratifiedKFold
 from torch_geometric.utils import train_test_split_edges
 from sklearn.model_selection import StratifiedKFold, KFold


 def split_edges(dataset, train_ratio, val_ratio):
    datas = [data for data in dataset]
    for i in range(len(datas)):
        datas[i] = train_test_split_edges(
            datas[i], val_ratio, 1 - train_ratio - val_ratio
        )
    dataset.data, dataset.slices = dataset.collate(datas)


 def get_label_number(dataset):
@@ -37,32 +48,35 @@ def random_splits_mask(dataset, train_ratio=0.2, val_ratio=0.4, seed=None):
    assert (
        train_ratio + val_ratio <= 1
    ), "the sum of train_ratio and val_ratio is larger than 1"
    data = dataset[0]
    r_s = torch.get_rng_state()
    if torch.cuda.is_available():
        r_s_cuda = torch.cuda.get_rng_state()
    if seed is not None:
        torch.manual_seed(seed)
    _dataset = [d for d in dataset]
    for data in _dataset:
        r_s = torch.get_rng_state()
        if torch.cuda.is_available():
            torch.cuda.manual_seed(seed)

    perm = torch.randperm(data.num_nodes)
    train_index = perm[: int(data.num_nodes * train_ratio)]
    val_index = perm[
        int(data.num_nodes * train_ratio) : int(
            data.num_nodes * (train_ratio + val_ratio)
        )
    ]
    test_index = perm[int(data.num_nodes * (train_ratio + val_ratio)) :]
    data.train_mask = index_to_mask(train_index, size=data.num_nodes)
    data.val_mask = index_to_mask(val_index, size=data.num_nodes)
    data.test_mask = index_to_mask(test_index, size=data.num_nodes)
            r_s_cuda = torch.cuda.get_rng_state()
        if seed is not None:
            torch.manual_seed(seed)
            if torch.cuda.is_available():
                torch.cuda.manual_seed(seed)

        perm = torch.randperm(data.num_nodes)
        train_index = perm[: int(data.num_nodes * train_ratio)]
        val_index = perm[
            int(data.num_nodes * train_ratio) : int(
                data.num_nodes * (train_ratio + val_ratio)
            )
        ]
        test_index = perm[int(data.num_nodes * (train_ratio + val_ratio)) :]
        data.train_mask = index_to_mask(train_index, size=data.num_nodes)
        data.val_mask = index_to_mask(val_index, size=data.num_nodes)
        data.test_mask = index_to_mask(test_index, size=data.num_nodes)

    torch.set_rng_state(r_s)
    if torch.cuda.is_available():
        torch.cuda.set_rng_state(r_s_cuda)
        torch.set_rng_state(r_s)
        if torch.cuda.is_available():
            torch.cuda.set_rng_state(r_s_cuda)

    dataset.data, dataset.slices = dataset.collate([d for d in dataset])
    dataset.data, dataset.slices = dataset.collate(_dataset)
    if hasattr(dataset, "__data_list__"):
        delattr(dataset, "__data_list__")
    # while type(dataset.data.num_nodes) == list:
    #    dataset.data.num_nodes = dataset.data.num_nodes[0]
    # dataset.data.num_nodes = dataset.data.num_nodes[0]
@@ -77,11 +91,11 @@ def random_splits_mask_class(
    num_test=None,
    seed=None,
 ):
    r"""If the data has masks for train/val/test, return the splits with specific number of samples from every class for training as suggested in Pitfalls of graph neural network evaluation [1]_ for semi-supervised learning.
    r"""If the data has masks for train/val/test, return the splits with specific number of samples from every class for training as suggested in Pitfalls of graph neural network evaluation [#]_ for semi-supervised learning.

    References
    ----------
    .. [1] Shchur, O., Mumme, M., Bojchevski, A., & Günnemann, S. (2018).
    .. [#] Shchur, O., Mumme, M., Bojchevski, A., & Günnemann, S. (2018).
        Pitfalls of graph neural network evaluation.
        arXiv preprint arXiv:1811.05868.

@@ -160,14 +174,24 @@ def random_splits_mask_class(
    if torch.cuda.is_available():
        torch.cuda.set_rng_state(r_s_cuda)

    dataset.data, dataset.slices = dataset.collate([d for d in dataset])
    datalist = []
    for d in dataset:
        setattr(d, "train_mask", data.train_mask)
        setattr(d, "val_mask", data.val_mask)
        setattr(d, "test_mask", data.test_mask)
        datalist.append(d)
    dataset.data, dataset.slices = dataset.collate(datalist)
    if hasattr(dataset, "__data_list__"):
        delattr(dataset, "__data_list__")
    # while type(dataset.data.num_nodes) == list:
    #     dataset.data.num_nodes = dataset.data.num_nodes[0]
    # dataset.data.num_nodes = dataset.data.num_nodes[0]
    return dataset


 def graph_cross_validation(dataset, n_splits=10, shuffle=True, random_seed=42):
 def graph_cross_validation(
    dataset, n_splits=10, shuffle=True, random_seed=42, stratify=False
 ):
    r"""Cross validation for graph classification data, returning one fold with specific idx in autogl.datasets or pyg.Dataloader(default)

    Parameters
@@ -184,7 +208,12 @@ def graph_cross_validation(dataset, n_splits=10, shuffle=True, random_seed=42):
    random_seed : int
        random_state for sklearn.model_selection.StratifiedKFold
    """
    skf = StratifiedKFold(n_splits=n_splits, shuffle=shuffle, random_state=random_seed)
    if stratify:
        skf = StratifiedKFold(
            n_splits=n_splits, shuffle=shuffle, random_state=random_seed
        )
    else:
        skf = KFold(n_splits=n_splits, shuffle=shuffle, random_state=random_seed)
    idx_list = []

    # BUG: from pytorch_geometric, not sure whether it is a bug. The dataset.data will return
@@ -303,7 +332,9 @@ def graph_random_splits(dataset, train_ratio=0.2, val_ratio=0.4, seed=None):
    return dataset


 def graph_get_split(dataset, mask="train", is_loader=True, batch_size=128):
 def graph_get_split(
    dataset, mask="train", is_loader=True, batch_size=128, num_workers=0
 ):
    r"""Get train/test dataset/dataloader after cross validation.

    Parameters
@@ -325,7 +356,11 @@ def graph_get_split(dataset, mask="train", is_loader=True, batch_size=128):
        dataset, "%s_split" % (mask)
    ), "Given dataset do not have %s split" % (mask)
    if is_loader:
        return DataLoader(getattr(dataset, "%s_split" % (mask)), batch_size=batch_size)
        return DataLoader(
            getattr(dataset, "%s_split" % (mask)),
            batch_size=batch_size,
            num_workers=num_workers,
        )
    else:
        return getattr(dataset, "%s_split" % (mask))

--- a/autogl/module/init.py
+++ b/autogl/module/init.py
@@ -1,3 +1,5 @@
 from . import feature, model, train, hpo, nas, ensemble

 from .ensemble import *
 from .feature import *
 from .hpo import *
--- a/autogl/module/ensemble/init.py
+++ b/autogl/module/ensemble/init.py
@@ -16,9 +16,11 @@ def register_ensembler(name):

    return register_ensembler_cls


 from .voting import Voting
 from .stacking import Stacking


 def build_ensembler_from_name(name: str) -> BaseEnsembler:
    """
    Parameters
--- a/autogl/module/ensemble/stacking.py
+++ b/autogl/module/ensemble/stacking.py
@@ -100,7 +100,7 @@ class Stacking(BaseEnsembler):
                torch.tensor(predictions).transpose(0, 1).flatten(start_dim=1).numpy()
            )
            meta_Y = np.array(label)

            config = {}
            model = GradientBoostingClassifier(**config)
            model.fit(meta_X, meta_Y)

--- a/autogl/module/ensemble/voting.py
+++ b/autogl/module/ensemble/voting.py
@@ -85,7 +85,7 @@ class Voting(BaseEnsembler):
        weights = weights / np.sum(weights)

        return np.average(predictions, axis=0, weights=weights)
    

    def _specify_weights(self, predictions, label, feval):
        ensemble_prediction = []
        combinations = []
--- a/autogl/module/feature/init.py
+++ b/autogl/module/feature/init.py
@@ -1,6 +1,4 @@
 import importlib
 import os
 from .base import BaseFeatureAtom
 from .base import BaseFeature
 from .base import BaseFeatureEngineer

 FEATURE_DICT = {}
@@ -13,7 +11,7 @@ def register_feature(name):
                "Cannot register duplicate feature engineer ({})".format(name)
            )
        # if not issubclass(cls, BaseFeatureEngineer):
        if not issubclass(cls, BaseFeatureAtom):
        if not issubclass(cls, BaseFeature):
            raise ValueError(
                "Trainer ({}: {}) must extend BaseFeatureEngineer".format(
                    name, cls.__name__
@@ -24,16 +22,82 @@ def register_feature(name):

    return register_feature_cls


 from .auto_feature import AutoFeatureEngineer
 from .base import BaseFeatureEngineer

 from .generators import BaseGenerator
 from .selectors import BaseSelector
 from .generators import (
    BaseGenerator,
    GeGraphlet,
    GeEigen,
    GePageRank,
    register_pyg,
    pygfunc,
    PYGGenerator,
    PYGLocalDegreeProfile,
    PYGNormalizeFeatures,
    PYGOneHotDegree,
 )

 from .selectors import BaseSelector, SeFilterConstant, SeGBDT

 from .subgraph import BaseSubgraph
 from .graph import (
    BaseGraph,
    SgNetLSD,
    register_nx,
    NxGraph,
    nxfunc,
    NxLargeCliqueSize,
    NxAverageClusteringApproximate,
    NxDegreeAssortativityCoefficient,
    NxDegreePearsonCorrelationCoefficient,
    NxHasBridge,
    NxGraphCliqueNumber,
    NxGraphNumberOfCliques,
    NxTransitivity,
    NxAverageClustering,
    NxIsConnected,
    NxNumberConnectedComponents,
    NxIsDistanceRegular,
    NxLocalEfficiency,
    NxGlobalEfficiency,
    NxIsEulerian,
 )

 __all__ = [
    "BaseFeatureEngineer",
    "AutoFeatureEngineer",
    "BaseFeatureAtom",
    "BaseFeature",
    "BaseGenerator",
    "GeGraphlet",
    "GeEigen",
    "GePageRank",
    "register_pyg",
    "pygfunc",
    "PYGGenerator",
    "PYGLocalDegreeProfile",
    "PYGNormalizeFeatures",
    "PYGOneHotDegree",
    "BaseSelector",
    "SeFilterConstant",
    "SeGBDT",
    "BaseGraph",
    "SgNetLSD",
    "register_nx",
    "NxGraph",
    "nxfunc",
    "NxLargeCliqueSize",
    "NxAverageClusteringApproximate",
    "NxDegreeAssortativityCoefficient",
    "NxDegreePearsonCorrelationCoefficient",
    "NxHasBridge",
    "NxGraphCliqueNumber",
    "NxGraphNumberOfCliques",
    "NxTransitivity",
    "NxAverageClustering",
    "NxIsConnected",
    "NxNumberConnectedComponents",
    "NxIsDistanceRegular",
    "NxLocalEfficiency",
    "NxGlobalEfficiency",
    "NxIsEulerian",
 ]
--- a/autogl/module/feature/auto_feature.py
+++ b/autogl/module/feature/auto_feature.py
@@ -6,11 +6,12 @@ from tqdm import tqdm
 from tabulate import tabulate
 import time

 from .base import BaseFeatureAtom, BaseFeatureEngineer
 from .base import BaseFeature, BaseFeatureEngineer
 from .selectors import SeGBDT
 from . import register_feature

 from ...utils import get_logger
 import torch

 LOGGER = get_logger("Feature")

@@ -28,10 +29,15 @@ class Onlyconst(BaseFeatureEngineer):
    r"""it is a dummy feature engineer , which directly returns identical data"""

    def __init__(self, *args, **kwargs):
        super(Onlyconst, self).__init__(multigraph=True, *args, **kwargs)
        super(Onlyconst, self).__init__(
            data_t="tensor", multigraph=True, *args, **kwargs
        )

    def _transform(self, data):
        data.x = np.ones((data.x.shape[0], 1))
        if "x" in data:
            data.x = torch.ones((data.x.shape[0], 1))
        else:
            data.x = torch.ones((torch.unique(data.edge_index).shape[0], 1))
        return data


@@ -113,37 +119,30 @@ class Timer:
@register_feature("deepgl")
 class AutoFeatureEngineer(BaseFeatureEngineer):
    r"""

    Notes
    -----
    An implementation of auto feature engineering method Deepgl [1]_ ,which iteratively generates features by aggregating neighbour features
    An implementation of auto feature engineering method Deepgl [#]_ ,which iteratively generates features by aggregating neighbour features
    and select a fixed number of  features to automatically add important graph-aware features.

    References
    ----------
    .. [1] Rossi, R. A., Zhou, R., & Ahmed, N. K. (2020).
    .. [#] Rossi, R. A., Zhou, R., & Ahmed, N. K. (2020).
        Deep Inductive Graph Representation Learning.
        IEEE Transactions on Knowledge and Data Engineering, 32(3), 438–452.
        https://doi.org/10.1109/TKDE.2018.2878247

    Parameters
    ----------
    fixlen : int
        fixed number of features for every epoch. The final number of features added will be
        ``fixlen`` \times ``max_epoch``, 200 \times 5 in default.

    max_epoch : int
        number of epochs in total process.

    timebudget : int
        timebudget(seconds) for the feature engineering process, None for no time budget . Note that
        this time budget is a soft budget ,which is obtained by rough time estimation through previous iterations and
        may finally exceed the actual timebudget

    y_sel_func : Callable
        feature selector function object for selection at each iteration ,lightgbm in default. Note that in original paper,
        connected components of feature graph is used , and you may implement it by yourself if you want.

    verbosity : int
        hide any infomation except error and fatal if ``verbosity`` < 1
    """
--- a/autogl/module/feature/base.py
+++ b/autogl/module/feature/base.py
@@ -10,8 +10,8 @@ from ...utils import get_logger
 LOGGER = get_logger("Feature")


 class BaseFeatureAtom:
    r"""Any feature funcion object should inherit BaseFeatureAtom,
 class BaseFeature:
    r"""Any feature funcion object should inherit BaseFeature,
    which provides basic transformations and composing operation for feature
    engineering. Basic transformations include data type adjusting(tensor or numpy),
    complementing necessary attributes for future transform. Any subclass needs
@@ -22,19 +22,15 @@ class BaseFeatureAtom:
    Parameters
    ----------
    pipe : list
        stores pipeline of ``BaseFeatureAtom``.

        stores pipeline of ``BaseFeature``.
    data_t: str
        represents the data type needed for this transform, where 'tensor' accounts for ``torch.Tensor``,
        'np' for ``numpy.array`` and 'nx' for ``networkx``. When ``data_t`` values 'nx', then a ``networkx.DiGraph`` will
        be added to data as data.G .

    multigraph : bool
        determine whether it supports dataset with multiple graphs

    subgraph : bool
        determine whether it extracts subgraph features.

    """

    def __init__(self, pipe=None, data_t="tensor", multigraph=True, subgraph=False):
@@ -50,7 +46,7 @@ class BaseFeatureAtom:
        r"""enable and operation to support feature engineering pipeline syntax like
        SeFilterConstant()&GeEigen()&...
        """
        return BaseFeatureAtom(self._pipe + o._pipe)
        return BaseFeature(self._pipe + o._pipe)

    def _rebuild(self, dataset, datalist):
        dataset.__indices__ = None
@@ -69,6 +65,12 @@ class BaseFeatureAtom:
            if not hasattr(data, "G") or data.G is None:
                data.G = to_networkx(data, to_undirected=True)

    def _adjust_to_tensor(self, data):
        if self._data_t == "tensor":
            pass
        else:
            data_np2tensor(data)

    def _preprocess(self, data):
        pass

@@ -98,23 +100,17 @@ class BaseFeatureAtom:
        if not self._check_dataset(dataset):
            return
        dataset = copy.deepcopy(dataset)
        for p in self._pipe:
            _dataset = [x for x in dataset]
            if p._subgraph:
        with torch.no_grad():
            for p in self._pipe:
                _dataset = [x for x in dataset]
                for i, datai in enumerate(_dataset):
                    p._adjust_t(datai)
                    p._preprocess(datai)
                    p._fit_transform(datai)
                    p._postprocess(datai)
                    p._adjust_to_tensor(datai)
                    _dataset[i] = datai
            else:
                data = dataset.data
                p._adjust_t(data)
                p._preprocess(data)
                data = p._fit_transform(data)
                p._postprocess(data)
            dataset = self._rebuild(dataset, _dataset)
                
                dataset = self._rebuild(dataset, _dataset)

    def transform(self, dataset, inplace=True):
        r"""transform dataset inplace or not w.r.t bool argument ``inplace``"""
@@ -122,22 +118,17 @@ class BaseFeatureAtom:
            return dataset
        if not inplace:
            dataset = copy.deepcopy(dataset)
        for p in self._pipe:
            self._dataset = _dataset = [x for x in dataset]
            if p._subgraph:
        with torch.no_grad():
            for p in self._pipe:
                self._dataset = _dataset = [x for x in dataset]
                for i, datai in enumerate(_dataset):
                    p._adjust_t(datai)
                    p._preprocess(datai)
                    datai = p._transform(datai)
                    p._postprocess(datai)
                    _dataset[i] = datai    
            else:
                data = dataset.data
                p._adjust_t(data)
                p._preprocess(data)
                data = p._transform(data)
                p._postprocess(data)
            dataset = self._rebuild(dataset, _dataset)
                    p._adjust_to_tensor(datai)
                    _dataset[i] = datai
                dataset = self._rebuild(dataset, _dataset)
        dataset.data = data_np2tensor(dataset.data)
        return dataset

@@ -148,14 +139,14 @@ class BaseFeatureAtom:

    @staticmethod
    def compose(trans_list):
        r"""put a list of ``BaseFeatureAtom`` into feature engineering pipeline"""
        res = BaseFeatureAtom()
        r"""put a list of ``BaseFeature`` into feature engineering pipeline"""
        res = BaseFeature()
        for tran in trans_list:
            res = res & tran
        return res


 class BaseFeatureEngineer(BaseFeatureAtom):
 class BaseFeatureEngineer(BaseFeature):
    def __init__(self, data_t="np", multigraph=False, *args, **kwargs):
        super(BaseFeatureEngineer, self).__init__(
            data_t=data_t, multigraph=multigraph, *args, **kwargs
@@ -164,7 +155,7 @@ class BaseFeatureEngineer(BaseFeatureAtom):
        self.kwargs = kwargs


 class TransformWrapper(BaseFeatureAtom):
 class TransformWrapper(BaseFeature):
    def __init__(self, cls, *args, **kwargs):
        super(TransformWrapper, self).__init__(data_t="tensor", *args, **kwargs)
        self._cls = cls
--- a/autogl/module/feature/generators/init.py
+++ b/autogl/module/feature/generators/init.py
@@ -2,6 +2,24 @@ from .base import BaseGenerator
 from .graphlet import GeGraphlet
 from .eigen import GeEigen
 from .page_rank import GePageRank
 from .pyg import *
 from .pyg import (
    register_pyg,
    PYGGenerator,
    pygfunc,
    PYGLocalDegreeProfile,
    PYGNormalizeFeatures,
    PYGOneHotDegree,
 )

 __all__ = ["BaseGenerator", "GeGraphlet", "GeEigen", "GePageRank"]
 __all__ = [
    "BaseGenerator",
    "GeGraphlet",
    "GeEigen",
    "GePageRank",
    "register_pyg",
    "pygfunc",
    "PYGGenerator",
    "PYGLocalDegreeProfile",
    "PYGNormalizeFeatures",
    "PYGOneHotDegree",
 ]
--- a/autogl/module/feature/generators/base.py
+++ b/autogl/module/feature/generators/base.py
@@ -1,11 +1,13 @@
 import numpy as np
 from .. import register_feature
 from ..base import BaseFeatureAtom
 from ..base import BaseFeature


 class BaseGenerator(BaseFeatureAtom):
    def __init__(self, data_t="np", multigraph=True,**kwargs):
        super(BaseGenerator, self).__init__(data_t=data_t, multigraph=multigraph,**kwargs)
 class BaseGenerator(BaseFeature):
    def __init__(self, data_t="np", multigraph=True, **kwargs):
        super(BaseGenerator, self).__init__(
            data_t=data_t, multigraph=multigraph, **kwargs
        )


@register_feature("onehot")
--- a/autogl/module/feature/generators/eigen.py
+++ b/autogl/module/feature/generators/eigen.py
@@ -50,11 +50,11 @@ class GeEigen(BaseGenerator):

    Notes
    -----
    An implementation of [1]_
    An implementation of [#]_

    References
    ----------
    .. [1] Ziwei Zhang, Peng Cui, Jian Pei, Xin Wang, Wenwu Zhu:
    .. [#] Ziwei Zhang, Peng Cui, Jian Pei, Xin Wang, Wenwu Zhu:
        Eigen-GNN: A Graph Structure Preserving Plug-in for GNNs. CoRR abs/2006.04330 (2020)
        https://arxiv.org/abs/2006.04330

--- a/autogl/module/feature/generators/graphlet.py
+++ b/autogl/module/feature/generators/graphlet.py
@@ -272,11 +272,11 @@ class Graphlet:

@register_feature("graphlet")
 class GeGraphlet(BaseGenerator):
    r"""generate local graphlet numbers as features. The implementation refers to [1]_ .
    r"""generate local graphlet numbers as features. The implementation refers to [#]_ .

    References
    ----------
    .. [1] Ahmed, N. K., Willke, T. L., & Rossi, R. A. (2016).
    .. [#] Ahmed, N. K., Willke, T. L., & Rossi, R. A. (2016).
        Estimation of local subgraph counts. Proceedings - 2016 IEEE International Conference on Big Data, Big Data 2016, 586–595.
        https://doi.org/10.1109/BigData.2016.7840651

--- a/autogl/module/feature/generators/pyg.py
+++ b/autogl/module/feature/generators/pyg.py
@@ -29,19 +29,18 @@ class PYGGenerator(BaseGenerator):

    def _transform(self, data):
        dsc = self.extract(data)
        data.x = torch.cat([data.x, dsc], dim=1)
        # data.x = torch.cat([data.x, dsc], dim=1)
        data.x = dsc
        return data


 def pygfunc(func):
    r"""A decorator for pyg transforms. You may want to use it to quickly wrap a feature transform function object.

    Examples
    --------
    @register_pyg
    @pygfunc(local_degree_profile)
    class PYGLocalDegreeProfile(local_degree_profile):pass

    """

    def decorator_func(cls):
@@ -74,15 +73,17 @@ class PYGNormalizeFeatures(PYGGenerator):
@register_pyg
@pygfunc(OneHotDegree)
 class PYGOneHotDegree(PYGGenerator):
    def __init__(self, max_degree=0):
    def __init__(self, max_degree=1000):
        super(PYGOneHotDegree, self).__init__(max_degree=max_degree)

    """
    def _transform(self, data):
        idx, x = data.edge_index[0], data.x
        deg = degree(idx, data.num_nodes, dtype=torch.long)
        self._kwargs["max_degree"] = np.max(
            [self._kwargs["max_degree"], torch.max(deg).numpy()]
        )
        #idx, x = data.edge_index[0], data.x
        #deg = degree(idx, data.num_nodes, dtype=torch.long)
        #self._kwargs["max_degree"] = np.min(
        #    [self._kwargs["max_degree"], torch.max(deg).numpy()]
        #)
        dsc = self.extract(data)
        data.x = torch.cat([data.x, dsc], dim=1)
        return data
    """
--- a/autogl/module/feature/graph/init.py
+++ b/autogl/module/feature/graph/init.py
@@ -0,0 +1,45 @@
 from .netlsd import SgNetLSD
 from .base import BaseGraph
 from .nx import (
    register_nx,
    NxGraph,
    nxfunc,
    NxLargeCliqueSize,
    NxAverageClusteringApproximate,
    NxDegreeAssortativityCoefficient,
    NxDegreePearsonCorrelationCoefficient,
    NxHasBridge,
    NxGraphCliqueNumber,
    NxGraphNumberOfCliques,
    NxTransitivity,
    NxAverageClustering,
    NxIsConnected,
    NxNumberConnectedComponents,
    NxIsDistanceRegular,
    NxLocalEfficiency,
    NxGlobalEfficiency,
    NxIsEulerian,
 )

 __all__ = [
    "SgNetLSD",
    "BaseGraph",
    "register_nx",
    "NxGraph",
    "nxfunc",
    "NxLargeCliqueSize",
    "NxAverageClusteringApproximate",
    "NxDegreeAssortativityCoefficient",
    "NxDegreePearsonCorrelationCoefficient",
    "NxHasBridge",
    "NxGraphCliqueNumber",
    "NxGraphNumberOfCliques",
    "NxTransitivity",
    "NxAverageClustering",
    "NxIsConnected",
    "NxNumberConnectedComponents",
    "NxIsDistanceRegular",
    "NxLocalEfficiency",
    "NxGlobalEfficiency",
    "NxIsEulerian",
 ]
--- a/autogl/module/feature/subgraph/base.py
+++ b/autogl/module/feature/subgraph/base.py
@@ -1,12 +1,14 @@
 from ..base import BaseFeatureAtom
 from ..base import BaseFeature
 import numpy as np
 import torch
 from .. import register_feature


 class BaseSubgraph(BaseFeatureAtom):
    def __init__(self, data_t="np", multigraph=True,**kwargs):
        super(BaseSubgraph, self).__init__(
            data_t=data_t, multigraph=multigraph, subgraph=True,**kwargs
@register_feature("graph")
 class BaseGraph(BaseFeature):
    def __init__(self, data_t="np", multigraph=True, **kwargs):
        super(BaseGraph, self).__init__(
            data_t=data_t, multigraph=multigraph, subgraph=True, **kwargs
        )

    def _preprocess(self, data):
--- a/autogl/module/feature/subgraph/netlsd.py
+++ b/autogl/module/feature/subgraph/netlsd.py
@@ -1,22 +1,20 @@
 import netlsd
 from .base import BaseSubgraph
 from .base import BaseGraph
 import numpy as np
 import torch
 from .. import register_feature


@register_feature("netlsd")
 class SgNetLSD(BaseSubgraph):
 class SgNetLSD(BaseGraph):
    r"""
    Notes
    -----
    a subgraph feature generation method. This is a simple wrapper of NetLSD [1]_.

    a graph feature generation method. This is a simple wrapper of NetLSD [#]_.
    References
    ----------
    .. [1] A. Tsitsulin, D. Mottin, P. Karras, A. Bronstein, and E. Müller, “NetLSD: Hearing the shape of a graph,”
    .. [#] A. Tsitsulin, D. Mottin, P. Karras, A. Bronstein, and E. Müller, “NetLSD: Hearing the shape of a graph,”
     Proc. ACM SIGKDD Int. Conf. Knowl. Discov. Data Min., pp. 2347–2356, 2018.

    """

    def __init__(self, *args, **kwargs):
--- a/autogl/module/feature/subgraph/nx.py
+++ b/autogl/module/feature/subgraph/nx.py
@@ -14,7 +14,7 @@ from networkx.algorithms.assortativity import degree_assortativity_coefficient
 from networkx.algorithms.approximation.clustering_coefficient import average_clustering
 from networkx.algorithms.approximation.clique import large_clique_size
 import netlsd
 from .base import BaseSubgraph
 from .base import BaseGraph
 import numpy as np
 import torch
 from functools import wraps
@@ -30,9 +30,9 @@ def register_nx(cls):


@register_nx
 class NxSubgraph(BaseSubgraph):
 class NxGraph(BaseGraph):
    def __init__(self, *args, **kwargs):
        super(NxSubgraph, self).__init__(data_t="nx")
        super(NxGraph, self).__init__(data_t="nx")
        self._args = args
        self._kwargs = kwargs

@@ -47,13 +47,13 @@ class NxSubgraph(BaseSubgraph):


 def nxfunc(func):
    r"""A decorator for networkx subgraph transforms. You may want to use it to quickly wrap a nx subgraph feature function object.
    r"""A decorator for networkx Graph transforms. You may want to use it to quickly wrap a nx Graph feature function object.

    Examples
    --------
    @register_nx
    @nxfunc(large_clique_size)
    class NxLargeCliqueSize(NxSubgraph):pass
    class NxLargeCliqueSize(NxGraph):pass

    """

@@ -66,117 +66,117 @@ def nxfunc(func):

@register_nx
@nxfunc(large_clique_size)
 class NxLargeCliqueSize(NxSubgraph):
 class NxLargeCliqueSize(NxGraph):
    pass


@register_nx
@nxfunc(average_clustering)
 class NxAverageClusteringApproximate(NxSubgraph):
 class NxAverageClusteringApproximate(NxGraph):
    pass


@register_nx
@nxfunc(degree_assortativity_coefficient)
 class NxDegreeAssortativityCoefficient(NxSubgraph):
 class NxDegreeAssortativityCoefficient(NxGraph):
    pass


@register_nx
@nxfunc(degree_pearson_correlation_coefficient)
 class NxDegreePearsonCorrelationCoefficient(NxSubgraph):
 class NxDegreePearsonCorrelationCoefficient(NxGraph):
    pass


@register_nx
@nxfunc(has_bridges)
 class NxHasBridge(NxSubgraph):
 class NxHasBridge(NxGraph):
    pass


@register_nx
@nxfunc(graph_clique_number)
 class NxGraphCliqueNumber(NxSubgraph):
 class NxGraphCliqueNumber(NxGraph):
    pass


@register_nx
@nxfunc(graph_number_of_cliques)
 class NxGraphNumberOfCliques(NxSubgraph):
 class NxGraphNumberOfCliques(NxGraph):
    pass


@register_nx
@nxfunc(transitivity)
 class NxTransitivity(NxSubgraph):
 class NxTransitivity(NxGraph):
    pass


@register_nx
@nxfunc(average_clustering)
 class NxAverageClustering(NxSubgraph):
 class NxAverageClustering(NxGraph):
    pass


@register_nx
@nxfunc(is_connected)
 class NxIsConnected(NxSubgraph):
 class NxIsConnected(NxGraph):
    pass


@register_nx
@nxfunc(number_connected_components)
 class NxNumberConnectedComponents(NxSubgraph):
 class NxNumberConnectedComponents(NxGraph):
    pass


 # from networkx.algorithms.components import is_attracting_component
 # @register_nx
 # @nxfunc(is_attracting_component)
 # class NxIsAttractingComponent(NxSubgraph):pass
 # class NxIsAttractingComponent(NxGraph):pass

 # from networkx.algorithms.components import number_attracting_components
 # @register_nx
 # @nxfunc(number_attracting_components)
 # class NxNumberAttractingComponents(NxSubgraph):pass
 # class NxNumberAttractingComponents(NxGraph):pass

 # from networkx.algorithms.connectivity.connectivity import average_node_connectivity
 # @register_nx
 # @nxfunc(average_node_connectivity)
 # class NxAverageNodeConnectivity(NxSubgraph):pass
 # class NxAverageNodeConnectivity(NxGraph):pass

 # from networkx.algorithms.distance_measures import diameter
 # @register_nx
 # @nxfunc(diameter)
 # class NxDiameter(NxSubgraph):pass
 # class NxDiameter(NxGraph):pass

 # from networkx.algorithms.distance_measures import radius
 # @register_nx
 # @nxfunc(radius)
 # class NxRadius(NxSubgraph):pass
 # class NxRadius(NxGraph):pass


@register_nx
@nxfunc(is_distance_regular)
 class NxIsDistanceRegular(NxSubgraph):
 class NxIsDistanceRegular(NxGraph):
    pass


@register_nx
@nxfunc(local_efficiency)
 class NxLocalEfficiency(NxSubgraph):
 class NxLocalEfficiency(NxGraph):
    pass


@register_nx
@nxfunc(global_efficiency)
 class NxGlobalEfficiency(NxSubgraph):
 class NxGlobalEfficiency(NxGraph):
    pass


@register_nx
@nxfunc(is_eulerian)
 class NxIsEulerian(NxSubgraph):
 class NxIsEulerian(NxGraph):
    pass


--- a/autogl/module/feature/selectors/init.py
+++ b/autogl/module/feature/selectors/init.py
@@ -1,5 +1,3 @@
 import importlib
 import os
 from .base import BaseSelector
 from .se_filter_constant import SeFilterConstant
 from .se_gbdt import SeGBDT
--- a/autogl/module/feature/selectors/base.py
+++ b/autogl/module/feature/selectors/base.py
@@ -1,10 +1,12 @@
 from ..base import BaseFeatureAtom
 from ..base import BaseFeature
 import numpy as np


 class BaseSelector(BaseFeatureAtom):
    def __init__(self, data_t="np", multigraph=False,**kwargs):
        super(BaseSelector, self).__init__(data_t=data_t, multigraph=multigraph,**kwargs)
 class BaseSelector(BaseFeature):
    def __init__(self, data_t="np", multigraph=False, **kwargs):
        super(BaseSelector, self).__init__(
            data_t=data_t, multigraph=multigraph, **kwargs
        )
        self._sel = None

    def _transform(self, data):
--- a/autogl/module/feature/subgraph/init.py
+++ b/autogl/module/feature/subgraph/init.py
@@ -1,4 +0,0 @@
 from .netlsd import SgNetLSD
 from .base import BaseSubgraph

 __all__ = ["SgNetLSD", "BaseSubgraph"]
--- a/autogl/module/feature/subgraph/stats.py
+++ b/autogl/module/feature/subgraph/stats.py
@@ -1 +0,0 @@
 import numpy as np
--- a/autogl/module/hpo/advisorbase.py
+++ b/autogl/module/hpo/advisorbase.py
@@ -5,6 +5,7 @@ HPO Module for tuning hyper parameters
 import time
 import json
 import math
 from tqdm import trange
 from .suggestion.models import Study
 from .base import BaseHPOptimizer, TimeTooLimitedError
 from .suggestion.algorithm.random_search import RandomSearchAlgorithm
@@ -43,7 +44,9 @@ class AdvisorBaseHPOptimizer(BaseHPOptimizer):
        self.xs = []
        self.best_id = None
        self.best_trainer = None
        space = trainer.hyper_parameter_space
        space = (
            trainer.hyper_parameter_space + trainer.get_model().hyper_parameter_space
        )
        current_config = self._encode_para(space)

        for i in range(slaves):
@@ -129,7 +132,9 @@ class AdvisorBaseHPOptimizer(BaseHPOptimizer):

        self.feval_name = trainer.get_feval(return_major=True).get_eval_name()
        self.is_higher_better = trainer.get_feval(return_major=True).is_higher_better()
        space = trainer.hyper_parameter_space
        space = (
            trainer.hyper_parameter_space + trainer.get_model().hyper_parameter_space
        )
        current_space = self._encode_para(space)
        self._setUp(current_space)

@@ -146,7 +151,8 @@ class AdvisorBaseHPOptimizer(BaseHPOptimizer):
        best_id = None
        best_trainer = None

        for i in range(self.max_evals):
        print("HPO Search Phase:\n")
        for i in trange(self.max_evals):
            if time.time() - start_time > time_limit:
                self.logger.info("Time out of limit, Epoch: {}".format(str(i)))
                break
--- a/autogl/module/hpo/autone.py
+++ b/autogl/module/hpo/autone.py
@@ -3,27 +3,26 @@ HPO Module for tuning hyper parameters
 """

 import time
 import json
 import math
 import numpy as np
 from tqdm import trange
 from . import register_hpo
 from .suggestion.models import Study
 from .base import BaseHPOptimizer, TimeTooLimitedError

 from .autone_file import utils

 from torch_geometric.data import GraphSAINTRandomWalkSampler

 from ..feature.subgraph.nx import NxSubgraph, NxLargeCliqueSize
 from ..feature.subgraph import nx, SgNetLSD
 from ..feature.graph import SgNetLSD

 from torch_geometric.data import InMemoryDataset

 from torch_geometric.data import InMemoryDataset 

 class _MyDataset(InMemoryDataset):
    def __init__(self, datalist) -> None:
        super().__init__()
        self.data, self.slices = self.collate(datalist)


@register_hpo("autone")
 class AutoNE(BaseHPOptimizer):
    """
@@ -59,7 +58,9 @@ class AutoNE(BaseHPOptimizer):
        """
        self.feval_name = trainer.get_feval(return_major=True).get_eval_name()
        self.is_higher_better = trainer.get_feval(return_major=True).is_higher_better()
        space = trainer.hyper_parameter_space + trainer.model.hyper_parameter_space
        space = (
            trainer.hyper_parameter_space + trainer.get_model().hyper_parameter_space
        )
        current_space = self._encode_para(space)

        def sample_subgraph(whole_data):
@@ -73,17 +74,17 @@ class AutoNE(BaseHPOptimizer):
            )
            results = []
            for data in loader:
                in_dataset= _MyDataset([data])
                in_dataset = _MyDataset([data])
                results.append(in_dataset)
            return results

        func = SgNetLSD()

        def get_wne(graph):
            graph=func.fit_transform(graph)
            # transform = nx.NxSubgraph.compose(map(lambda x: x(), nx.NX_EXTRACTORS))
            graph = func.fit_transform(graph)
            # transform = nx.NxGraph.compose(map(lambda x: x(), nx.NX_EXTRACTORS))
            # print(type(graph))
            #gf = transform.fit_transform(graph).data.gf
            # gf = transform.fit_transform(graph).data.gf
            gf = graph.data.gf
            fin = list(gf[0]) + list(map(lambda x: float(x), gf[1:]))
            return fin
@@ -111,7 +112,8 @@ class AutoNE(BaseHPOptimizer):
        K = utils.K(len(params.type_))
        gp = utils.GaussianProcessRegressor(K)
        sample_graphs = sample_subgraph(dataset)
        for t in range(sampled_number):
        print("Sample Phase:\n")
        for t in trange(sampled_number):
            b_t = time.time()
            i = t
            subgraph = sample_graphs[t]
@@ -129,7 +131,8 @@ class AutoNE(BaseHPOptimizer):
        best_trainer = None
        best_para = None
        wne = get_wne(dataset)
        for t in range(s):
        print("HPO Search Phase:\n")
        for t in trange(s):
            if time.time() - start_time > time_limit:
                self.logger.info("Time out of limit, Epoch: {}".format(str(i)))
                break
--- a/autogl/module/hpo/base.py
+++ b/autogl/module/hpo/base.py
@@ -30,7 +30,9 @@ class BaseHPOptimizer:
            raise WrongDependedParameterError("The depended parameter does not exist.")

        for para in config:
            if para["type"] == "NUMERICAL_LIST" and para.get("cutPara", None):
            if para["type"] in ("NUMERICAL_LIST", "CATEGORICAL_LIST") and para.get(
                "cutPara", None
            ):
                self._depend_map[para["parameterName"]] = para
                if type(para["cutPara"]) == str:
                    get_depended_para(para["cutPara"])
@@ -76,6 +78,18 @@ class BaseHPOptimizer:
                    new_para["maxValue"] = y
                    new_para["scalingType"] = para["scalingType"]
                    fin.append(new_para)
            elif para["type"] == "CATEGORICAL_LIST":
                self._list_map[para["parameterName"]] = para["length"]
                category = para["feasiblePoints"]
                self._category_map[para["parameterName"]] = category

                cur_points = ",".join(map(lambda _x: str(_x), range(len(category))))
                for i in range(para["length"]):
                    new_para = dict()
                    new_para["parameterName"] = para["parameterName"] + "_" + str(i)
                    new_para["type"] = "DISCRETE"
                    new_para["feasiblePoints"] = cur_points
                    fin.append(new_para)
            elif para["type"] == "FIXED":
                self._fix_map[para["parameterName"]] = para["value"]
            else:
@@ -92,6 +106,8 @@ class BaseHPOptimizer:
            for i in range(self._list_map[pname]):
                val.append(config[pname + "_" + str(i)])
                del config[pname + "_" + str(i)]
            if pname in self._category_map:
                val = [self._category_map[pname][i] for i in val]
            fin[pname] = val
        # deal other para
        for pname in config:
@@ -123,10 +139,10 @@ class BaseHPOptimizer:
            "maxValue": 0.9,
            "scalingType": "LINEAR"
        }]"""
        config = self._decompose_list_fixed_para(config)
        self._category_map = {}
        self._discrete_map = {}
        self._numerical_map = {}
        config = self._decompose_list_fixed_para(config)

        current_config = []
        for para in config:
--- a/autogl/module/hpo/suggestion/init.py
+++ b/autogl/module/hpo/suggestion/init.py
@@ -1 +1 @@
 # Files in this folder are reproduced from https://github.com/tobegit3hub/advisor with some changes.
 # Files in this folder are reproduced from https://github.com/tobegit3hub/advisor with some changes.
--- a/autogl/module/model/init.py
+++ b/autogl/module/model/init.py
@@ -1,34 +1,21 @@
 import importlib
 import os

 MODEL_DICT = {}


 def register_model(name):
    def register_model_cls(cls):
        if name in MODEL_DICT:
            raise ValueError("Cannot register duplicate trainer ({})".format(name))
        if not issubclass(cls, BaseModel):
            raise ValueError(
                "Trainer ({}: {}) must extend BaseModel".format(name, cls.__name__)
            )
        MODEL_DICT[name] = cls
        return cls

    return register_model_cls

 from ._model_registry import MODEL_DICT, ModelUniversalRegistry, register_model
 from .base import BaseModel
 from .topkpool import AutoTopkpool

 # from .graph_sage import AutoSAGE
 from .graphsage import AutoSAGE
 from .graph_saint import GraphSAINTAggregationModel
 from .gcn import AutoGCN
 from .gat import AutoGAT
 from .gin import AutoGIN


 __all__ = [
    "ModelUniversalRegistry",
    "register_model",
    "BaseModel",
    "AutoTopkpool",
    "AutoSAGE",
    "GraphSAINTAggregationModel",
    "AutoGCN",
    "AutoGAT",
    "AutoGIN",
--- a/autogl/module/model/_model_registry.py
+++ b/autogl/module/model/_model_registry.py
@@ -0,0 +1,28 @@
 import typing as _typing
 from .base import BaseModel

 MODEL_DICT: _typing.Dict[str, _typing.Type[BaseModel]] = {}


 def register_model(name):
    def register_model_cls(cls):
        if name in MODEL_DICT:
            raise ValueError("Cannot register duplicate trainer ({})".format(name))
        if not issubclass(cls, BaseModel):
            raise ValueError(
                "Trainer ({}: {}) must extend BaseModel".format(name, cls.__name__)
            )
        MODEL_DICT[name] = cls
        return cls

    return register_model_cls


 class ModelUniversalRegistry:
    @classmethod
    def get_model(cls, name: str) -> _typing.Type[BaseModel]:
        if type(name) != str:
            raise TypeError
        if name not in MODEL_DICT:
            raise KeyError
        return MODEL_DICT.get(name)
--- a/autogl/module/model/base.py
+++ b/autogl/module/model/base.py
@@ -3,11 +3,15 @@ auto graph model
 a list of models with their hyper parameters
 NOTE: neural architecture search (NAS) maybe included here
 """

 import copy
 import logging
 import typing as _typing
 import torch
 import torch.nn.functional as F
 from copy import deepcopy

 base_approach_logger: logging.Logger = logging.getLogger("BaseModel")


 def activate_func(x, func):
    if func == "tanh":
@@ -22,7 +26,7 @@ def activate_func(x, func):
    return x


 class BaseModel(torch.nn.Module):
 class BaseModel:
    def __init__(self, init=False, *args, **kwargs):
        super(BaseModel, self).__init__()

@@ -43,6 +47,17 @@ class BaseModel(torch.nn.Module):
    def forward(self):
        pass

    def to(self, device):
        if isinstance(device, (str, torch.device)):
            self.device = device
        if (
            hasattr(self, "model")
            and self.model is not None
            and isinstance(self.model, torch.nn.Module)
        ):
            self.model.to(self.device)
        return self

    def from_hyper_parameter(self, hp):
        ret_self = self.__class__(
            num_features=self.num_features,
@@ -75,3 +90,324 @@ class BaseModel(torch.nn.Module):
        ), "Cannot set graph features for tasks other than graph classification"
        self.num_graph_features = num_graph_features
        self.params["num_graph_features"] = num_graph_features


 class _BaseBaseModel:
    # todo: after renaming the experimental base class _BaseModel to BaseModel,
    #       rename this class to _BaseModel
    """
    The base class for class BaseModel,
    designed to implement some basic functionality of BaseModel.
    --  Designed by ZiXin Sun
    """

    @classmethod
    def __formulate_device(
        cls, device: _typing.Union[str, torch.device] = ...
    ) -> torch.device:
        if type(device) == torch.device or (
            type(device) == str and device.strip().lower() != "auto"
        ):
            return torch.device(device)
        elif torch.cuda.is_available() and torch.cuda.device_count() > 0:
            return torch.device("cuda")
        else:
            return torch.device("cpu")

    @property
    def device(self) -> torch.device:
        return self.__device

    @device.setter
    def device(self, __device: _typing.Union[str, torch.device, None]):
        self.__device: torch.device = self.__formulate_device(__device)

    @property
    def model(self) -> _typing.Optional[torch.nn.Module]:
        if self._model is None:
            base_approach_logger.debug(
                "property of model NOT initialized before accessing"
            )
        return self._model

    @model.setter
    def model(self, _model: torch.nn.Module) -> None:
        if not isinstance(_model, torch.nn.Module):
            raise TypeError(
                "the property of model MUST be an instance of " "torch.nn.Module"
            )
        self._model = _model

    def _initialize(self):
        raise NotImplementedError

    def initialize(self) -> bool:
        """
        Initialize the model in case that the model has NOT been initialized
        :return: whether self._initialize() method called
        """
        if not self.__is_initialized:
            self._initialize()
            self.__is_initialized = True
            return True
        return False

    # def to(self, *args, **kwargs):
    #     """
    #     Due to the signature of to() method in class BaseApproach
    #     is inconsistent with the signature of the method
    #     in the base class torch.nn.Module,
    #     this intermediate overridden method is necessary to
    #     walk around (bypass) the inspection for
    #     signature of overriding method.
    #     :param args: positional arguments list
    #     :param kwargs: keyword arguments dict
    #     :return: self
    #     """
    #     return super(_BaseBaseModel, self).to(*args, **kwargs)

    def forward(self, *args, **kwargs):
        if self.model is not None and isinstance(self.model, torch.nn.Module):
            return self.model(*args, **kwargs)
        else:
            raise NotImplementedError

    def __init__(
        self,
        model: _typing.Optional[torch.nn.Module] = None,
        initialize: bool = False,
        device: _typing.Union[str, torch.device] = ...,
    ):
        if type(initialize) != bool:
            raise TypeError
        super(_BaseBaseModel, self).__init__()
        self.__device: torch.device = self.__formulate_device(device)
        self._model: _typing.Optional[torch.nn.Module] = model
        self.__is_initialized: bool = False
        if initialize:
            self.initialize()


 class _BaseModel(_BaseBaseModel, BaseModel):
    """
    The upcoming root base class for Model, i.e. BaseModel
    --  Designed by ZiXin Sun
    """

    # todo: Deprecate and remove the legacy class "BaseModel",
    #       then rename this class to "BaseModel",
    #       correspondingly, this class will no longer extend
    #       the legacy class "BaseModel" after the removal.
    def _initialize(self):
        raise NotImplementedError

    def to(self, device: torch.device):
        self.device = device
        if self.model is not None and isinstance(self.model, torch.nn.Module):
            self.model.to(self.device)
        return super().to(device)

    @property
    def space(self) -> _typing.Sequence[_typing.Dict[str, _typing.Any]]:
        # todo: deprecate and remove in future major version
        return self.__hyper_parameter_space

    @property
    def hyper_parameter_space(self):
        return self.__hyper_parameter_space

    @hyper_parameter_space.setter
    def hyper_parameter_space(
        self, space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
    ):
        self.__hyper_parameter_space = space

    @property
    def hyper_parameter(self) -> _typing.Dict[str, _typing.Any]:
        return self.__hyper_parameter

    @hyper_parameter.setter
    def hyper_parameter(self, _hyper_parameter: _typing.Dict[str, _typing.Any]):
        if not isinstance(_hyper_parameter, dict):
            raise TypeError
        self.__hyper_parameter = _hyper_parameter

    def get_hyper_parameter(self) -> _typing.Dict[str, _typing.Any]:
        """
        todo: consider deprecating this trivial getter method in the future
        :return: copied hyper parameter
        """
        return copy.deepcopy(self.__hyper_parameter)

    def __init__(
        self,
        model: _typing.Optional[torch.nn.Module] = None,
        initialize: bool = False,
        hyper_parameter_space: _typing.Sequence[_typing.Any] = ...,
        hyper_parameter: _typing.Dict[str, _typing.Any] = ...,
        device: _typing.Union[str, torch.device] = ...,
    ):
        if type(initialize) != bool:
            raise TypeError
        super(_BaseModel, self).__init__(model, initialize, device)
        if hyper_parameter_space != Ellipsis and isinstance(
            hyper_parameter_space, _typing.Sequence
        ):
            self.__hyper_parameter_space: _typing.Sequence[
                _typing.Dict[str, _typing.Any]
            ] = hyper_parameter_space
        else:
            self.__hyper_parameter_space: _typing.Sequence[
                _typing.Dict[str, _typing.Any]
            ] = []
        if hyper_parameter != Ellipsis and isinstance(hyper_parameter, dict):
            self.__hyper_parameter: _typing.Dict[str, _typing.Any] = hyper_parameter
        else:
            self.__hyper_parameter: _typing.Dict[str, _typing.Any] = {}

    def from_hyper_parameter(self, hyper_parameter: _typing.Dict[str, _typing.Any]):
        raise NotImplementedError


 class ClassificationModel(_BaseModel):
    def _initialize(self):
        raise NotImplementedError

    def from_hyper_parameter(
        self, hyper_parameter: _typing.Dict[str, _typing.Any]
    ) -> "ClassificationModel":
        new_model: ClassificationModel = self.__class__(
            num_features=self.num_features,
            num_classes=self.num_classes,
            device=self.device,
            init=False,
        )
        _hyper_parameter = self.hyper_parameter
        _hyper_parameter.update(hyper_parameter)
        new_model.hyper_parameter = _hyper_parameter
        new_model.initialize()
        return new_model

    def __init__(
        self,
        num_features: int = ...,
        num_classes: int = ...,
        num_graph_features: int = ...,
        device: _typing.Union[str, torch.device] = ...,
        hyper_parameter_space: _typing.Sequence[_typing.Any] = ...,
        hyper_parameter: _typing.Dict[str, _typing.Any] = ...,
        init: bool = False,
        **kwargs
    ):
        if "initialize" in kwargs:
            del kwargs["initialize"]
        super(ClassificationModel, self).__init__(
            initialize=init,
            hyper_parameter_space=hyper_parameter_space,
            hyper_parameter=hyper_parameter,
            device=device,
            **kwargs
        )
        if num_classes != Ellipsis and type(num_classes) == int:
            self.__num_classes: int = num_classes if num_classes > 0 else 0
        else:
            self.__num_classes: int = 0
        if num_features != Ellipsis and type(num_features) == int:
            self.__num_features: int = num_features if num_features > 0 else 0
        else:
            self.__num_features: int = 0
        if num_graph_features != Ellipsis and type(num_graph_features) == int:
            if num_graph_features > 0:
                self.__num_graph_features: int = num_graph_features
            else:
                self.__num_graph_features: int = 0
        else:
            self.__num_graph_features: int = 0

    def __repr__(self) -> str:
        import yaml

        return yaml.dump(self.hyper_parameter)

    @property
    def num_classes(self) -> int:
        return self.__num_classes

    @num_classes.setter
    def num_classes(self, __num_classes: int):
        if type(__num_classes) != int:
            raise TypeError
        if not __num_classes > 0:
            raise ValueError
        self.__num_classes = __num_classes if __num_classes > 0 else 0

    @property
    def num_features(self) -> int:
        return self.__num_features

    @num_features.setter
    def num_features(self, __num_features: int):
        if type(__num_features) != int:
            raise TypeError
        if not __num_features > 0:
            raise ValueError
        self.__num_features = __num_features if __num_features > 0 else 0

    def get_num_classes(self) -> int:
        # todo: consider replacing with property with getter and setter
        return self.__num_classes

    def set_num_classes(self, num_classes: int) -> None:
        # todo: consider replacing with property with getter and setter
        if type(num_classes) != int:
            raise TypeError
        self.__num_classes = num_classes if num_classes > 0 else 0

    def get_num_features(self) -> int:
        # todo: consider replacing with property with getter and setter
        return self.__num_features

    def set_num_features(self, num_features: int):
        # todo: consider replacing with property with getter and setter
        if type(num_features) != int:
            raise TypeError
        self.__num_features = num_features if num_features > 0 else 0

    def set_num_graph_features(self, num_graph_features: int):
        # todo: consider replacing with property with getter and setter
        if type(num_graph_features) != int:
            raise TypeError
        else:
            if num_graph_features > 0:
                self.__num_graph_features = num_graph_features
            else:
                self.__num_graph_features = 0


 class _ClassificationModel(torch.nn.Module):
    def __init__(self):
        super(_ClassificationModel, self).__init__()

    def cls_encode(self, data) -> torch.Tensor:
        raise NotImplementedError

    def cls_decode(self, x: torch.Tensor) -> torch.Tensor:
        raise NotImplementedError

    def cls_forward(self, data) -> torch.Tensor:
        return self.cls_decode(self.cls_encode(data))


 class ClassificationSupportedSequentialModel(_ClassificationModel):
    def __init__(self):
        super(ClassificationSupportedSequentialModel, self).__init__()

    @property
    def sequential_encoding_layers(self) -> torch.nn.ModuleList:
        raise NotImplementedError

    def cls_encode(self, data) -> torch.Tensor:
        raise NotImplementedError

    def cls_decode(self, x: torch.Tensor) -> torch.Tensor:
        raise NotImplementedError
--- a/autogl/module/model/gat.py
+++ b/autogl/module/model/gat.py
@@ -21,9 +21,22 @@ class GAT(torch.nn.Module):
        self.args = args
        self.num_layer = int(self.args["num_layers"])

        missing_keys = list(set(["features_num", "num_class", "num_layers", "hidden", "heads", "dropout", "act"]) - set(self.args.keys()))
        missing_keys = list(
            set(
                [
                    "features_num",
                    "num_class",
                    "num_layers",
                    "hidden",
                    "heads",
                    "dropout",
                    "act",
                ]
            )
            - set(self.args.keys())
        )
        if len(missing_keys) > 0:
            raise Exception("Missing keys: %s." % ','.join(missing_keys))
            raise Exception("Missing keys: %s." % ",".join(missing_keys))

        if not self.num_layer == len(self.args["hidden"]) + 1:
            LOGGER.warn("Warning: layer size does not match the length of hidden units")
@@ -82,6 +95,24 @@ class GAT(torch.nn.Module):

        return F.log_softmax(x, dim=1)

    def lp_encode(self, data):
        x = data.x
        for i in range(self.num_layer - 1):
            x = self.convs[i](x, data.train_pos_edge_index)
            if i != self.num_layer - 2:
                x = activate_func(x, self.args["act"])
                # x = F.dropout(x, p=self.args["dropout"], training=self.training)
        return x

    def lp_decode(self, z, pos_edge_index, neg_edge_index):
        edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
        logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
        return logits

    def lp_decode_all(self, z):
        prob_adj = z @ z.t()
        return (prob_adj > 0).nonzero(as_tuple=False).t()


@register_model("gat")
 class AutoGAT(BaseModel):
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -1,66 +1,287 @@
 import torch
 import torch.nn.functional as F
 from torch_geometric.nn import GCNConv
 import torch.nn.functional
 import typing as _typing

 from torch_geometric.nn.conv import GCNConv
 import autogl.data
 from . import register_model
 from .base import BaseModel, activate_func
 from .base import BaseModel, activate_func, ClassificationSupportedSequentialModel
 from ...utils import get_logger

 LOGGER = get_logger("GCNModel")


 def set_default(args, d):
    for k, v in d.items():
        if k not in args:
            args[k] = v
    return args
 class GCN(ClassificationSupportedSequentialModel):
    class _GCNLayer(torch.nn.Module):
        def __init__(
            self,
            input_channels: int,
            output_channels: int,
            add_self_loops: bool = True,
            normalize: bool = True,
            activation_name: _typing.Optional[str] = ...,
            dropout_probability: _typing.Optional[float] = ...,
        ):
            super().__init__()
            self._convolution: GCNConv = GCNConv(
                input_channels,
                output_channels,
                add_self_loops=bool(add_self_loops),
                normalize=bool(normalize),
            )
            if (
                activation_name is not Ellipsis
                and activation_name is not None
                and type(activation_name) == str
            ):
                self._activation_name: _typing.Optional[str] = activation_name
            else:
                self._activation_name: _typing.Optional[str] = None
            if (
                dropout_probability is not Ellipsis
                and dropout_probability is not None
                and type(dropout_probability) == float
            ):
                if dropout_probability < 0:
                    dropout_probability = 0
                if dropout_probability > 1:
                    dropout_probability = 1
                self._dropout: _typing.Optional[torch.nn.Dropout] = torch.nn.Dropout(
                    dropout_probability
                )
            else:
                self._dropout: _typing.Optional[torch.nn.Dropout] = None

        def forward(self, data, enable_activation: bool = True) -> torch.Tensor:
            x: torch.Tensor = getattr(data, "x")
            edge_index: torch.LongTensor = getattr(data, "edge_index")
            edge_weight: _typing.Optional[torch.Tensor] = getattr(
                data, "edge_weight", None
            )
            """ Validate the arguments """
            if not type(x) == type(edge_index) == torch.Tensor:
                raise TypeError
            if edge_weight is not None and (
                type(edge_weight) != torch.Tensor
                or edge_index.size() != (2, edge_weight.size(0))
            ):
                edge_weight: _typing.Optional[torch.Tensor] = None

            x: torch.Tensor = self._convolution.forward(x, edge_index, edge_weight)
            if self._activation_name is not None and enable_activation:
                x: torch.Tensor = activate_func(x, self._activation_name)
            if self._dropout is not None:
                x: torch.Tensor = self._dropout.forward(x)
            return x

    def __init__(
        self,
        num_features: int,
        num_classes: int,
        hidden_features: _typing.Sequence[int],
        activation_name: str,
        dropout: _typing.Union[
            _typing.Optional[float], _typing.Sequence[_typing.Optional[float]]
        ] = None,
        add_self_loops: bool = True,
        normalize: bool = True,
    ):
        if isinstance(dropout, _typing.Sequence):
            if len(dropout) != len(hidden_features) + 1:
                raise TypeError(
                    "When the dropout argument is a sequence, "
                    "The sequence length must equal to the number of layers to construct."
                )
            for _dropout in dropout:
                if _dropout is not None and type(_dropout) != float:
                    raise TypeError(
                        "When the dropout argument is a sequence, "
                        "every item in the sequence must be float or None"
                    )
            dropout_list: _typing.Sequence[_typing.Optional[float]] = dropout
        elif type(dropout) == float:
            if dropout < 0:
                dropout = 0
            if dropout > 1:
                dropout = 1
            dropout_list: _typing.Sequence[_typing.Optional[float]] = [
                dropout for _ in range(len(hidden_features))
            ] + [None]
        elif dropout in (None, Ellipsis, ...):
            dropout_list: _typing.Sequence[_typing.Optional[float]] = [
                None for _ in range(len(hidden_features) + 1)
            ]
        else:
            raise TypeError(
                "The provided dropout argument must be a float number or None or "
                "a sequence in which each item is either a float Number or None."
            )
        super().__init__()
        if len(hidden_features) == 0:
            self.__sequential_encoding_layers: torch.nn.ModuleList = (
                torch.nn.ModuleList(
                    (
                        self._GCNLayer(
                            num_features,
                            num_classes,
                            add_self_loops,
                            normalize,
                            dropout_probability=dropout_list[0],
                        ),
                    )
                )
            )
        else:
            self.__sequential_encoding_layers: torch.nn.ModuleList = (
                torch.nn.ModuleList()
            )
            self.__sequential_encoding_layers.append(
                self._GCNLayer(
                    num_features,
                    hidden_features[0],
                    add_self_loops,
                    normalize,
                    activation_name,
                    dropout_list[0],
                )
            )
            for hidden_feature_index in range(len(hidden_features)):
                if hidden_feature_index + 1 < len(hidden_features):
                    self.__sequential_encoding_layers.append(
                        self._GCNLayer(
                            hidden_features[hidden_feature_index],
                            hidden_features[hidden_feature_index + 1],
                            add_self_loops,
                            normalize,
                            activation_name,
                            dropout_list[hidden_feature_index + 1],
                        )
                    )
                else:
                    self.__sequential_encoding_layers.append(
                        self._GCNLayer(
                            hidden_features[hidden_feature_index],
                            num_classes,
                            add_self_loops,
                            normalize,
                            dropout_list[-1],
                        )
                    )

    @property
    def sequential_encoding_layers(self) -> torch.nn.ModuleList:
        return self.__sequential_encoding_layers

    def __extract_edge_indexes_and_weights(
        self, data
    ) -> _typing.Union[
        _typing.Sequence[
            _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]
        ],
        _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]],
    ]:
        def __compose_edge_index_and_weight(
            _edge_index: torch.LongTensor,
            _edge_weight: _typing.Optional[torch.Tensor] = None,
        ) -> _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]:
            if type(_edge_index) != torch.Tensor or _edge_index.dtype != torch.int64:
                raise TypeError
            if _edge_weight is not None and (
                type(_edge_weight) != torch.Tensor
                or _edge_index.size() != (2, _edge_weight.size(0))
            ):
                _edge_weight: _typing.Optional[torch.Tensor] = None
            return _edge_index, _edge_weight

        if not (
            hasattr(data, "edge_indexes")
            and isinstance(getattr(data, "edge_indexes"), _typing.Sequence)
            and len(getattr(data, "edge_indexes"))
            == len(self.__sequential_encoding_layers)
        ):
            return __compose_edge_index_and_weight(
                getattr(data, "edge_index"), getattr(data, "edge_weight", None)
            )
        for __edge_index in getattr(data, "edge_indexes"):
            if type(__edge_index) != torch.Tensor or __edge_index.dtype != torch.int64:
                return __compose_edge_index_and_weight(
                    getattr(data, "edge_index"), getattr(data, "edge_weight", None)
                )

        if (
            hasattr(data, "edge_weights")
            and isinstance(getattr(data, "edge_weights"), _typing.Sequence)
            and len(getattr(data, "edge_weights"))
            == len(self.__sequential_encoding_layers)
        ):
            return [
                __compose_edge_index_and_weight(_edge_index, _edge_weight)
                for _edge_index, _edge_weight in zip(
                    getattr(data, "edge_indexes"), getattr(data, "edge_weights")
                )
            ]
        else:
            return [
                __compose_edge_index_and_weight(__edge_index)
                for __edge_index in getattr(data, "edge_indexes")
            ]

 class GCN(torch.nn.Module):
    def __init__(self, args):
        super(GCN, self).__init__()
        self.args = args
        self.num_layer = int(self.args["num_layers"])
    def cls_encode(self, data) -> torch.Tensor:
        edge_indexes_and_weights: _typing.Union[
            _typing.Sequence[
                _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]
            ],
            _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]],
        ] = self.__extract_edge_indexes_and_weights(data)

        missing_keys = list(set(["features_num", "num_class", "num_layers", "hidden", "dropout", "act"]) - set(self.args.keys()))
        if len(missing_keys) > 0:
            raise Exception("Missing keys: %s." % ','.join(missing_keys))
        if (not isinstance(edge_indexes_and_weights, tuple)) and isinstance(
            edge_indexes_and_weights[0], tuple
        ):
            """ edge_indexes_and_weights is sequence of (edge_index, edge_weight) """
            assert len(edge_indexes_and_weights) == len(
                self.__sequential_encoding_layers
            )
            x: torch.Tensor = getattr(data, "x")
            for _edge_index_and_weight, gcn in zip(
                edge_indexes_and_weights, self.__sequential_encoding_layers
            ):
                _temp_data = autogl.data.Data(x=x, edge_index=_edge_index_and_weight[0])
                _temp_data.edge_weight = _edge_index_and_weight[1]
                x = gcn(_temp_data)
            return x
        else:
            """ edge_indexes_and_weights is (edge_index, edge_weight) """
            x = getattr(data, "x")
            for gcn in self.__sequential_encoding_layers:
                _temp_data = autogl.data.Data(
                    x=x, edge_index=edge_indexes_and_weights[0]
                )
                _temp_data.edge_weight = edge_indexes_and_weights[1]
                x = gcn(_temp_data)
            return x

        if not self.num_layer == len(self.args["hidden"]) + 1:
            LOGGER.warn("Warning: layer size does not match the length of hidden units")
    def cls_decode(self, x: torch.Tensor) -> torch.Tensor:
        return torch.nn.functional.log_softmax(x, dim=1)

        self.convs = torch.nn.ModuleList()
        self.convs.append(GCNConv(self.args["features_num"], self.args["hidden"][0]))
        for i in range(self.num_layer - 2):
            self.convs.append(
                GCNConv(self.args["hidden"][i], self.args["hidden"][i + 1])
    def lp_encode(self, data):
        x: torch.Tensor = getattr(data, "x")
        for i in range(len(self.__sequential_encoding_layers) - 2):
            x = self.__sequential_encoding_layers[i](
                autogl.data.Data(x, getattr(data, "edge_index"))
            )
        self.convs.append(
            GCNConv(self.args["hidden"][self.num_layer - 2], self.args["num_class"])
        x = self.__sequential_encoding_layers[-2](
            autogl.data.Data(x, getattr(data, "edge_index")), enable_activation=False
        )
        return x

    def lp_decode(self, z, pos_edge_index, neg_edge_index):
        edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
        logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
        return logits

    def forward(self, data):
        try:
            x = data.x
        except:
            print("no x")
            pass
        try:
            edge_index = data.edge_index
        except:
            print("no index")
            pass
        try:
            edge_weight = data.edge_weight
        except:
            edge_weight = None
            pass

        for i in range(self.num_layer):
            x = self.convs[i](x, edge_index, edge_weight)
            if i != self.num_layer - 1:
                x = activate_func(x, self.args["act"])
                x = F.dropout(x, p=self.args["dropout"], training=self.training)
        return F.log_softmax(x, dim=1)
    def lp_decode_all(self, z):
        prob_adj = z @ z.t()
        return (prob_adj > 0).nonzero(as_tuple=False).t()


@register_model("gcn")
@@ -96,21 +317,33 @@ class AutoGCN(BaseModel):
    """

    def __init__(
        self, num_features=None, num_classes=None, device=None, init=False, **args
    ):

        super(AutoGCN, self).__init__()

        self.num_features = num_features if num_features is not None else 0
        self.num_classes = int(num_classes) if num_classes is not None else 0
        self.device = device if device is not None else "cpu"
        self.init = True
        self,
        num_features: int = ...,
        num_classes: int = ...,
        device: _typing.Union[str, torch.device] = ...,
        init: bool = False,
        **kwargs
    ) -> None:
        super().__init__()
        self.num_features = num_features
        self.num_classes = num_classes
        self.device = device

        self.params = {
            "features_num": self.num_features,
            "num_class": self.num_classes,
        }
        self.space = [
            {
                "parameterName": "add_self_loops",
                "type": "CATEGORICAL",
                "feasiblePoints": [1],
            },
            {
                "parameterName": "normalize",
                "type": "CATEGORICAL",
                "feasiblePoints": [1],
            },
            {
                "parameterName": "num_layers",
                "type": "DISCRETE",
@@ -142,11 +375,18 @@ class AutoGCN(BaseModel):
        ]

        # initial point of hp search
        # self.hyperparams = {
        #     "num_layers": 2,
        #     "hidden": [16],
        #     "dropout": 0.2,
        #     "act": "leaky_relu",
        # }

        self.hyperparams = {
            "num_layers": 2,
            "hidden": [16],
            "dropout": 0.2,
            "act": "leaky_relu",
            "num_layers": 3,
            "hidden": [128, 64],
            "dropout": 0,
            "act": "relu",
        }

        self.initialized = False
@@ -154,8 +394,15 @@ class AutoGCN(BaseModel):
            self.initialize()

    def initialize(self):
        # """Initialize model."""
        if self.initialized:
            return
        self.initialized = True
        self.model = GCN({**self.params, **self.hyperparams}).to(self.device)
        self.model = GCN(
            self.num_features,
            self.num_classes,
            self.hyperparams.get("hidden"),
            self.hyperparams.get("act"),
            self.hyperparams.get("dropout", None),
            bool(self.hyperparams.get("add_self_loops", True)),
            bool(self.hyperparams.get("normalize", True)),
        ).to(self.device)
--- a/autogl/module/model/gin.py
+++ b/autogl/module/model/gin.py
@@ -25,14 +25,27 @@ class GIN(torch.nn.Module):
        self.num_layer = int(self.args["num_layers"])
        assert self.num_layer > 2, "Number of layers in GIN should not less than 3"

        missing_keys = list(set(["features_num", "num_class", "num_graph_features",
                    "num_layers", "hidden", "dropout", "act",
                    "mlp_layers", "eps"]) - set(self.args.keys()))
        missing_keys = list(
            set(
                [
                    "features_num",
                    "num_class",
                    "num_graph_features",
                    "num_layers",
                    "hidden",
                    "dropout",
                    "act",
                    "mlp_layers",
                    "eps",
                ]
            )
            - set(self.args.keys())
        )
        if len(missing_keys) > 0:
            raise Exception("Missing keys: %s." % ','.join(missing_keys))
        if not self.num_layer == len(self.args['hidden']) + 1:
            LOGGER.warn('Warning: layer size does not match the length of hidden units')
        self.num_graph_features = self.args['num_graph_features']
            raise Exception("Missing keys: %s." % ",".join(missing_keys))
        if not self.num_layer == len(self.args["hidden"]) + 1:
            LOGGER.warn("Warning: layer size does not match the length of hidden units")
        self.num_graph_features = self.args["num_graph_features"]

        if self.args["act"] == "leaky_relu":
            act = LeakyReLU()
@@ -80,7 +93,8 @@ class GIN(torch.nn.Module):
    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        graph_feature = data.gf
        if self.num_graph_features > 0:
            graph_feature = data.gf

        for i in range(self.num_layer - 2):
            x = self.convs[i](x, edge_index)
@@ -88,7 +102,8 @@ class GIN(torch.nn.Module):
            x = self.bns[i](x)

        x = global_add_pool(x, batch)
        x = torch.cat([x, graph_feature], dim=-1)
        if self.num_graph_features > 0:
            x = torch.cat([x, graph_feature], dim=-1)
        x = self.fc1(x)
        x = activate_func(x, self.args["act"])
        x = F.dropout(x, p=self.args["dropout"], training=self.training)
--- a/autogl/module/model/graph_saint.py
+++ b/autogl/module/model/graph_saint.py
@@ -0,0 +1,407 @@
 import typing as _typing
 import torch.nn.functional
 from torch_geometric.nn.conv import MessagePassing
 from torch_sparse import SparseTensor, matmul

 from . import register_model
 from .base import ClassificationModel, ClassificationSupportedSequentialModel


 class _GraphSAINTAggregationLayers:
    class MultiOrderAggregationLayer(torch.nn.Module):
        class Order0Aggregator(torch.nn.Module):
            def __init__(
                self,
                input_dimension: int,
                output_dimension: int,
                bias: bool = True,
                activation: _typing.Optional[str] = "ReLU",
                batch_norm: bool = True,
            ):
                super().__init__()
                if not type(input_dimension) == type(output_dimension) == int:
                    raise TypeError
                if not (input_dimension > 0 and output_dimension > 0):
                    raise ValueError
                if not type(bias) == bool:
                    raise TypeError
                self.__linear_transform = torch.nn.Linear(
                    input_dimension, output_dimension, bias
                )
                self.__linear_transform.reset_parameters()
                if type(activation) == str:
                    if activation.lower() == "ReLU".lower():
                        self.__activation = torch.nn.functional.relu
                    elif activation.lower() == "elu":
                        self.__activation = torch.nn.functional.elu
                    elif hasattr(torch.nn.functional, activation) and callable(
                        getattr(torch.nn.functional, activation)
                    ):
                        self.__activation = getattr(torch.nn.functional, activation)
                    else:
                        self.__activation = lambda x: x
                else:
                    self.__activation = lambda x: x
                if type(batch_norm) != bool:
                    raise TypeError
                else:
                    self.__optional_batch_normalization: _typing.Optional[
                        torch.nn.BatchNorm1d
                    ] = (
                        torch.nn.BatchNorm1d(output_dimension, 1e-8)
                        if batch_norm
                        else None
                    )

            def forward(
                self,
                x: _typing.Union[
                    torch.Tensor, _typing.Tuple[torch.Tensor, torch.Tensor]
                ],
                _edge_index: torch.Tensor,
                _edge_weight: _typing.Optional[torch.Tensor] = None,
                _size: _typing.Optional[_typing.Tuple[int, int]] = None,
            ) -> torch.Tensor:
                __output: torch.Tensor = self.__linear_transform(x)
                if self.__activation is not None and callable(self.__activation):
                    __output: torch.Tensor = self.__activation(__output)
                if self.__optional_batch_normalization is not None and isinstance(
                    self.__optional_batch_normalization, torch.nn.BatchNorm1d
                ):
                    __output: torch.Tensor = self.__optional_batch_normalization(
                        __output
                    )
                return __output

        class Order1Aggregator(MessagePassing):
            def __init__(
                self,
                input_dimension: int,
                output_dimension: int,
                bias: bool = True,
                activation: _typing.Optional[str] = "ReLU",
                batch_norm: bool = True,
            ):
                super().__init__(aggr="add")
                if not type(input_dimension) == type(output_dimension) == int:
                    raise TypeError
                if not (input_dimension > 0 and output_dimension > 0):
                    raise ValueError
                if not type(bias) == bool:
                    raise TypeError
                self.__linear_transform = torch.nn.Linear(
                    input_dimension, output_dimension, bias
                )
                self.__linear_transform.reset_parameters()
                if type(activation) == str:
                    if activation.lower() == "ReLU".lower():
                        self.__activation = torch.nn.functional.relu
                    elif activation.lower() == "elu":
                        self.__activation = torch.nn.functional.elu
                    elif hasattr(torch.nn.functional, activation) and callable(
                        getattr(torch.nn.functional, activation)
                    ):
                        self.__activation = getattr(torch.nn.functional, activation)
                    else:
                        self.__activation = lambda x: x
                else:
                    self.__activation = lambda x: x
                if type(batch_norm) != bool:
                    raise TypeError
                else:
                    self.__optional_batch_normalization: _typing.Optional[
                        torch.nn.BatchNorm1d
                    ] = (
                        torch.nn.BatchNorm1d(output_dimension, 1e-8)
                        if batch_norm
                        else None
                    )

            def forward(
                self,
                x: _typing.Union[
                    torch.Tensor, _typing.Tuple[torch.Tensor, torch.Tensor]
                ],
                _edge_index: torch.Tensor,
                _edge_weight: _typing.Optional[torch.Tensor] = None,
                _size: _typing.Optional[_typing.Tuple[int, int]] = None,
            ) -> torch.Tensor:

                if type(x) == torch.Tensor:
                    x: _typing.Tuple[torch.Tensor, torch.Tensor] = (x, x)

                __output = self.propagate(
                    _edge_index, x=x, edge_weight=_edge_weight, size=_size
                )
                __output: torch.Tensor = self.__linear_transform(__output)
                if self.__activation is not None and callable(self.__activation):
                    __output: torch.Tensor = self.__activation(__output)
                if self.__optional_batch_normalization is not None and isinstance(
                    self.__optional_batch_normalization, torch.nn.BatchNorm1d
                ):
                    __output: torch.Tensor = self.__optional_batch_normalization(
                        __output
                    )
                return __output

            def message(
                self, x_j: torch.Tensor, edge_weight: _typing.Optional[torch.Tensor]
            ) -> torch.Tensor:
                return x_j if edge_weight is None else edge_weight.view(-1, 1) * x_j

            def message_and_aggregate(
                self,
                adj_t: SparseTensor,
                x: _typing.Union[
                    torch.Tensor, _typing.Tuple[torch.Tensor, torch.Tensor]
                ],
            ) -> torch.Tensor:
                return matmul(adj_t, x[0], reduce=self.aggr)

        @property
        def integral_output_dimension(self) -> int:
            return (self._order + 1) * self._each_order_output_dimension

        def __init__(
            self,
            _input_dimension: int,
            _each_order_output_dimension: int,
            _order: int,
            bias: bool = True,
            activation: _typing.Optional[str] = "ReLU",
            batch_norm: bool = True,
            _dropout: _typing.Optional[float] = ...,
        ):
            super().__init__()
            if not (
                type(_input_dimension) == type(_order) == int
                and type(_each_order_output_dimension) == int
            ):
                raise TypeError
            if _input_dimension <= 0 or _each_order_output_dimension <= 0:
                raise ValueError
            if _order not in (0, 1):
                raise ValueError("Unsupported order number")
            self._input_dimension: int = _input_dimension
            self._each_order_output_dimension: int = _each_order_output_dimension
            self._order: int = _order
            if type(bias) != bool:
                raise TypeError
            self.__order0_transform = self.Order0Aggregator(
                self._input_dimension,
                self._each_order_output_dimension,
                bias,
                activation,
                batch_norm,
            )
            if _order == 1:
                self.__order1_transform = self.Order1Aggregator(
                    self._input_dimension,
                    self._each_order_output_dimension,
                    bias,
                    activation,
                    batch_norm,
                )
            else:
                self.__order1_transform = None
            if _dropout is not None and type(_dropout) == float:
                if _dropout < 0:
                    _dropout = 0
                if _dropout > 1:
                    _dropout = 1
                self.__optional_dropout: _typing.Optional[
                    torch.nn.Dropout
                ] = torch.nn.Dropout(_dropout)
            else:
                self.__optional_dropout: _typing.Optional[torch.nn.Dropout] = None

        def _forward(
            self,
            x: _typing.Union[torch.Tensor, _typing.Tuple[torch.Tensor, torch.Tensor]],
            edge_index: torch.Tensor,
            edge_weight: _typing.Optional[torch.Tensor] = None,
            size: _typing.Optional[_typing.Tuple[int, int]] = None,
        ) -> torch.Tensor:
            if self.__order1_transform is not None and isinstance(
                self.__order1_transform, self.Order1Aggregator
            ):
                __output: torch.Tensor = torch.cat(
                    [
                        self.__order0_transform(x, edge_index, edge_weight, size),
                        self.__order1_transform(x, edge_index, edge_weight, size),
                    ],
                    dim=1,
                )
            else:
                __output: torch.Tensor = self.__order0_transform(
                    x, edge_index, edge_weight, size
                )
            if self.__optional_dropout is not None and isinstance(
                self.__optional_dropout, torch.nn.Dropout
            ):
                __output: torch.Tensor = self.__optional_dropout(__output)
            return __output

        def forward(self, data) -> torch.Tensor:
            x: torch.Tensor = getattr(data, "x")
            if type(x) != torch.Tensor:
                raise TypeError
            edge_index: torch.LongTensor = getattr(data, "edge_index")
            if type(edge_index) != torch.Tensor:
                raise TypeError
            edge_weight: _typing.Optional[torch.Tensor] = getattr(
                data, "edge_weight", None
            )
            if edge_weight is not None and type(edge_weight) != torch.Tensor:
                raise TypeError
            return self._forward(x, edge_index, edge_weight)

    class WrappedDropout(torch.nn.Module):
        def __init__(self, dropout_module: torch.nn.Dropout):
            super().__init__()
            self.__dropout_module: torch.nn.Dropout = dropout_module

        def forward(self, tenser_or_data) -> torch.Tensor:
            if type(tenser_or_data) == torch.Tensor:
                return self.__dropout_module(tenser_or_data)
            elif (
                hasattr(tenser_or_data, "x")
                and type(getattr(tenser_or_data, "x")) == torch.Tensor
            ):
                return self.__dropout_module(getattr(tenser_or_data, "x"))
            else:
                raise TypeError


 class GraphSAINTMultiOrderAggregationModel(ClassificationSupportedSequentialModel):
    def __init__(
        self,
        num_features: int,
        num_classes: int,
        _output_dimension_for_each_order: int,
        _layers_order_list: _typing.Sequence[int],
        _pre_dropout: float,
        _layers_dropout: _typing.Union[float, _typing.Sequence[float]],
        activation: _typing.Optional[str] = "ReLU",
        bias: bool = True,
        batch_norm: bool = True,
        normalize: bool = True,
    ):
        super(GraphSAINTMultiOrderAggregationModel, self).__init__()
        if type(_output_dimension_for_each_order) != int:
            raise TypeError
        if not _output_dimension_for_each_order > 0:
            raise ValueError
        self._layers_order_list: _typing.Sequence[int] = _layers_order_list

        if isinstance(_layers_dropout, _typing.Sequence):
            if len(_layers_dropout) != len(_layers_order_list):
                raise ValueError
            else:
                self._layers_dropout: _typing.Sequence[float] = _layers_dropout
        elif type(_layers_dropout) == float:
            if _layers_dropout < 0:
                _layers_dropout = 0
            if _layers_dropout > 1:
                _layers_dropout = 1
            self._layers_dropout: _typing.Sequence[float] = [
                _layers_dropout for _ in _layers_order_list
            ]
        else:
            raise TypeError
        if type(_pre_dropout) != float:
            raise TypeError
        else:
            if _pre_dropout < 0:
                _pre_dropout = 0
            if _pre_dropout > 1:
                _pre_dropout = 1
        self.__sequential_encoding_layers: torch.nn.ModuleList = torch.nn.ModuleList(
            (
                _GraphSAINTAggregationLayers.WrappedDropout(
                    torch.nn.Dropout(_pre_dropout)
                ),
                _GraphSAINTAggregationLayers.MultiOrderAggregationLayer(
                    num_features,
                    _output_dimension_for_each_order,
                    _layers_order_list[0],
                    bias,
                    activation,
                    batch_norm,
                    _layers_dropout[0],
                ),
            )
        )
        for _layer_index in range(1, len(_layers_order_list)):
            self.__sequential_encoding_layers.append(
                _GraphSAINTAggregationLayers.MultiOrderAggregationLayer(
                    self.__sequential_encoding_layers[-1].integral_output_dimension,
                    _output_dimension_for_each_order,
                    _layers_order_list[_layer_index],
                    bias,
                    activation,
                    batch_norm,
                    _layers_dropout[_layer_index],
                )
            )
        self.__apply_normalize: bool = normalize
        self.__linear_transform: torch.nn.Linear = torch.nn.Linear(
            self.__sequential_encoding_layers[-1].integral_output_dimension,
            num_classes,
            bias,
        )
        self.__linear_transform.reset_parameters()

    def cls_decode(self, x: torch.Tensor) -> torch.Tensor:
        if self.__apply_normalize:
            x: torch.Tensor = torch.nn.functional.normalize(x, p=2, dim=1)
        return torch.nn.functional.log_softmax(self.__linear_transform(x), dim=1)

    def cls_encode(self, data) -> torch.Tensor:
        if type(getattr(data, "x")) != torch.Tensor:
            raise TypeError
        if type(getattr(data, "edge_index")) != torch.Tensor:
            raise TypeError
        if (
            getattr(data, "edge_weight", None) is not None
            and type(getattr(data, "edge_weight")) != torch.Tensor
        ):
            raise TypeError
        for encoding_layer in self.__sequential_encoding_layers:
            setattr(data, "x", encoding_layer(data))
        return getattr(data, "x")

    @property
    def sequential_encoding_layers(self) -> torch.nn.ModuleList:
        return self.__sequential_encoding_layers


@register_model("GraphSAINTAggregationModel")
 class GraphSAINTAggregationModel(ClassificationModel):
    def __init__(
        self,
        num_features: int = ...,
        num_classes: int = ...,
        device: _typing.Union[str, torch.device] = ...,
        init: bool = False,
        **kwargs
    ):
        super(GraphSAINTAggregationModel, self).__init__(
            num_features, num_classes, device=device, init=init, **kwargs
        )
        # todo: Initialize with default hyper parameter space and hyper parameter

    def _initialize(self):
        """ Initialize model """
        self.model = GraphSAINTMultiOrderAggregationModel(
            self.num_features,
            self.num_classes,
            self.hyper_parameter.get("output_dimension_for_each_order"),
            self.hyper_parameter.get("layers_order_list"),
            self.hyper_parameter.get("pre_dropout"),
            self.hyper_parameter.get("layers_dropout"),
            self.hyper_parameter.get("activation", "ReLU"),
            bool(self.hyper_parameter.get("bias", True)),
            bool(self.hyper_parameter.get("batch_norm", True)),
            bool(self.hyper_parameter.get("normalize", True)),
        ).to(self.device)
--- a/autogl/module/model/graphsage.py
+++ b/autogl/module/model/graphsage.py
@@ -1,163 +1,205 @@
 import torch
 from . import register_model
 from .base import BaseModel, activate_func

 from typing import Union, Tuple
 from torch_geometric.typing import OptPairTensor, Adj, Size
 import typing as _typing

 from torch import Tensor
 from torch.nn import Linear
 import torch.nn.functional as F
 from torch_sparse import SparseTensor, matmul
 from torch_geometric.nn.conv import MessagePassing
 from torch_geometric.nn.conv import SAGEConv
 import torch.nn.functional
 import autogl.data
 from . import register_model
 from .base import BaseModel, activate_func, ClassificationSupportedSequentialModel
 from ...utils import get_logger

 LOGGER = get_logger("SAGEModel")


 class SAGEConv(MessagePassing):
    r"""Modified from SAGEConv in Pytorch Geometric <https://github.com/rusty1s/pytorch_geometric/blob/master/torch_geometric/nn/conv/sage_conv.py>
    The GraphSAGE operator from the `"Inductive Representation Learning on
    Large Graphs" <https://arxiv.org/abs/1706.02216>`_ paper
    .. math::
        \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + \mathbf{W_2} \cdot
        \mathrm{mean}_{j \in \mathcal{N(i)}} \mathbf{x}_j
    Args:
        in_channels (int or tuple): Size of each input sample. A tuple
            corresponds to the sizes of source and target dimensionalities.
        out_channels (int): Size of each output sample.
        normalize (bool, optional): If set to :obj:`True`, output features
            will be :math:`\ell_2`-normalized, *i.e.*,
            :math:`\frac{\mathbf{x}^{\prime}_i}
            {\| \mathbf{x}^{\prime}_i \|_2}`.
            (default: :obj:`False`)
        bias (bool, optional): If set to :obj:`False`, the layer will not learn
            an additive bias. (default: :obj:`True`)
        **kwargs (optional): Additional arguments of
            :class:`torch_geometric.nn.conv.MessagePassing`.
    """
 class GraphSAGE(ClassificationSupportedSequentialModel):
    class _SAGELayer(torch.nn.Module):
        def __init__(
            self,
            input_channels: int,
            output_channels: int,
            aggr: str,
            activation_name: _typing.Optional[str] = ...,
            dropout_probability: _typing.Optional[float] = ...,
        ):
            super().__init__()
            self._convolution: SAGEConv = SAGEConv(
                input_channels, output_channels, aggr=aggr
            )
            if (
                activation_name is not Ellipsis
                and activation_name is not None
                and type(activation_name) == str
            ):
                self._activation_name: _typing.Optional[str] = activation_name
            else:
                self._activation_name: _typing.Optional[str] = None
            if (
                dropout_probability is not Ellipsis
                and dropout_probability is not None
                and type(dropout_probability) == float
            ):
                if dropout_probability < 0:
                    dropout_probability = 0
                if dropout_probability > 1:
                    dropout_probability = 1
                self._dropout: _typing.Optional[torch.nn.Dropout] = torch.nn.Dropout(
                    dropout_probability
                )
            else:
                self._dropout: _typing.Optional[torch.nn.Dropout] = None

        def forward(self, data, enable_activation: bool = True) -> torch.Tensor:
            x: torch.Tensor = getattr(data, "x")
            edge_index: torch.Tensor = getattr(data, "edge_index")
            if type(x) != torch.Tensor or type(edge_index) != torch.Tensor:
                raise TypeError

            x: torch.Tensor = self._convolution.forward(x, edge_index)
            if self._activation_name is not None and enable_activation:
                x: torch.Tensor = activate_func(x, self._activation_name)
            if self._dropout is not None:
                x: torch.Tensor = self._dropout.forward(x)
            return x

    def __init__(
        self,
        in_channels: Union[int, Tuple[int, int]],
        out_channels: int,
        normalize: bool = False,
        bias: bool = True,
        num_features: int,
        num_classes: int,
        hidden_features: _typing.Sequence[int],
        activation_name: str,
        layers_dropout: _typing.Union[
            _typing.Optional[float], _typing.Sequence[_typing.Optional[float]]
        ] = None,
        aggr: str = "mean",
        **kwargs
    ):
        super(SAGEConv, self).__init__(aggr=aggr, **kwargs)

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.normalize = normalize

        if isinstance(in_channels, int):
            in_channels = (in_channels, in_channels)

        self.lin_l = Linear(in_channels[0], out_channels, bias=bias)
        self.lin_r = Linear(in_channels[1], out_channels, bias=False)

        self.reset_parameters()

    def reset_parameters(self):
        self.lin_l.reset_parameters()
        self.lin_r.reset_parameters()

    def forward(
        self, x: Union[Tensor, OptPairTensor], edge_index: Adj, size: Size = None
    ) -> Tensor:
        """"""
        if isinstance(x, Tensor):
            x: OptPairTensor = (x, x)

        # propagate_type: (x: OptPairTensor)
        out = self.propagate(edge_index, x=x, size=size)
        out = self.lin_l(out)

        x_r = x[1]
        if x_r is not None:
            out += self.lin_r(x_r)

        if self.normalize:
            out = F.normalize(out, p=2.0, dim=-1)

        return out

    def message(self, x_j: Tensor) -> Tensor:
        return x_j

    def message_and_aggregate(self, adj_t: SparseTensor, x: OptPairTensor) -> Tensor:
        adj_t = adj_t.set_value(None, layout=None)
        return matmul(adj_t, x[0], reduce=self.aggr)

    def __repr__(self):
        return "{}({}, {})".format(
            self.__class__.__name__, self.in_channels, self.out_channels
        )


 def set_default(args, d):
    for k, v in d.items():
        if k not in args:
            args[k] = v
    return args


 class GraphSAGE(torch.nn.Module):
    def __init__(self, args):
        super(GraphSAGE, self).__init__()
        self.args = args
        agg = self.args["agg"]
        self.num_layer = int(self.args["num_layers"])
        if not self.num_layer == len(self.args["hidden"]) + 1:
            LOGGER.warn("Warning: layer size does not match the length of hidden units")

        missing_keys = list(set(["features_num", "num_class", "num_layers",
                    "hidden", "dropout", "act", "agg"]) - set(self.args.keys()))
        if len(missing_keys) > 0:
            raise Exception("Missing keys: %s." % ','.join(missing_keys))
        
        self.convs = torch.nn.ModuleList()
        self.convs.append(
            SAGEConv(self.args["features_num"], self.args["hidden"][0], aggr=agg)
        )
        for i in range(self.num_layer - 2):
            self.convs.append(
                SAGEConv(self.args["hidden"][i], self.args["hidden"][i + 1], aggr=agg)
        super().__init__()
        if not type(num_features) == type(num_classes) == int:
            raise TypeError
        if not isinstance(hidden_features, _typing.Sequence):
            raise TypeError
        for hidden_feature in hidden_features:
            if type(hidden_feature) != int:
                raise TypeError
            elif hidden_feature <= 0:
                raise ValueError
        if isinstance(layers_dropout, _typing.Sequence):
            if len(layers_dropout) != (len(hidden_features) + 1):
                raise TypeError
            for d in layers_dropout:
                if d is not None and type(d) != float:
                    raise TypeError
            _layers_dropout: _typing.Sequence[_typing.Optional[float]] = layers_dropout
        elif layers_dropout is None or type(layers_dropout) == float:
            _layers_dropout: _typing.Sequence[_typing.Optional[float]] = [
                layers_dropout for _ in range(len(hidden_features))
            ] + [None]
        else:
            raise TypeError
        if not type(activation_name) == type(aggr) == str:
            raise TypeError
        if aggr not in ("add", "max", "mean"):
            aggr = "mean"

        if len(hidden_features) == 0:
            self.__sequential_encoding_layers: torch.nn.ModuleList = (
                torch.nn.ModuleList(
                    [
                        self._SAGELayer(
                            num_features,
                            num_classes,
                            aggr,
                            activation_name,
                            _layers_dropout[0],
                        )
                    ]
                )
            )
        else:
            self.__sequential_encoding_layers: torch.nn.ModuleList = (
                torch.nn.ModuleList(
                    [
                        self._SAGELayer(
                            num_features,
                            hidden_features[0],
                            aggr,
                            activation_name,
                            _layers_dropout[0],
                        )
                    ]
                )
            )
        self.convs.append(
            SAGEConv(
                self.args["hidden"][self.num_layer - 2],
                self.args["num_class"],
                aggr=agg,
            for i in range(len(hidden_features)):
                if i + 1 < len(hidden_features):
                    self.__sequential_encoding_layers.append(
                        self._SAGELayer(
                            hidden_features[i],
                            hidden_features[i + 1],
                            aggr,
                            activation_name,
                            _layers_dropout[i + 1],
                        )
                    )
                else:
                    self.__sequential_encoding_layers.append(
                        self._SAGELayer(
                            hidden_features[i],
                            num_classes,
                            aggr,
                            _layers_dropout[i + 1],
                        )
                    )

    @property
    def sequential_encoding_layers(self) -> torch.nn.ModuleList:
        return self.__sequential_encoding_layers

    def cls_encode(self, data) -> torch.Tensor:
        if (
            hasattr(data, "edge_indexes")
            and isinstance(getattr(data, "edge_indexes"), _typing.Sequence)
            and len(getattr(data, "edge_indexes"))
            == len(self.__sequential_encoding_layers)
        ):
            for __edge_index in getattr(data, "edge_indexes"):
                if type(__edge_index) != torch.Tensor:
                    raise TypeError
            """ Layer-wise encode """
            x: torch.Tensor = getattr(data, "x")
            for i, __edge_index in enumerate(getattr(data, "edge_indexes")):
                x: torch.Tensor = self.__sequential_encoding_layers[i](
                    autogl.data.Data(x=x, edge_index=__edge_index)
                )
            return x
        else:
            x: torch.Tensor = getattr(data, "x")
            for i in range(len(self.__sequential_encoding_layers)):
                x = self.__sequential_encoding_layers[i](
                    autogl.data.Data(x, getattr(data, "edge_index"))
                )
            return x

    def cls_decode(self, x: torch.Tensor) -> torch.Tensor:
        return torch.nn.functional.log_softmax(x, dim=1)

    def lp_encode(self, data):
        x: torch.Tensor = getattr(data, "x")
        for i in range(len(self.__sequential_encoding_layers) - 2):
            x = self.__sequential_encoding_layers[i](
                autogl.data.Data(x, getattr(data, "edge_index"))
            )
        x = self.__sequential_encoding_layers[-2](
            autogl.data.Data(x, getattr(data, "edge_index")), enable_activation=False
        )
        return x

    def forward(self, data):
        try:
            x = data.x
        except:
            print("no x")
            pass
        try:
            edge_index = data.edge_index
        except:
            print("no index")
            pass
        try:
            edge_weight = data.edge_weight
        except:
            edge_weight = None
            pass

        for i in range(self.num_layer):
            x = self.convs[i](x, edge_index, edge_weight)
            if i != self.num_layer - 1:
                x = activate_func(x, self.args["act"])
                x = F.dropout(x, p=self.args["dropout"], training=self.training)
    def lp_decode(self, z, pos_edge_index, neg_edge_index):
        edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
        logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
        return logits

        return F.log_softmax(x, dim=1)
    def lp_decode_all(self, z):
        prob_adj = z @ z.t()
        return (prob_adj > 0).nonzero(as_tuple=False).t()


@register_model("sage")
@@ -251,8 +293,14 @@ class AutoSAGE(BaseModel):
            self.initialize()

    def initialize(self):
        # """Initialize model."""
        if self.initialized:
            return
        self.initialized = True
        self.model = GraphSAGE({**self.params, **self.hyperparams}).to(self.device)
        self.model = GraphSAGE(
            self.num_features,
            self.num_classes,
            self.hyperparams.get("hidden"),
            self.hyperparams.get("act", "relu"),
            self.hyperparams.get("dropout", None),
            self.hyperparams.get("agg", "mean"),
        ).to(self.device)
--- a/autogl/module/model/topkpool.py
+++ b/autogl/module/model/topkpool.py
@@ -21,10 +21,21 @@ class Topkpool(torch.nn.Module):
        super(Topkpool, self).__init__()
        self.args = args

        missing_keys = list(set(["features_num", "num_class", "num_graph_features",
                    "ratio", "dropout", "act"]) - set(self.args.keys()))
        missing_keys = list(
            set(
                [
                    "features_num",
                    "num_class",
                    "num_graph_features",
                    "ratio",
                    "dropout",
                    "act",
                ]
            )
            - set(self.args.keys())
        )
        if len(missing_keys) > 0:
            raise Exception("Missing keys: %s." % ','.join(missing_keys))
            raise Exception("Missing keys: %s." % ",".join(missing_keys))

        self.num_features = self.args["features_num"]
        self.num_classes = self.args["num_class"]
@@ -45,7 +56,8 @@ class Topkpool(torch.nn.Module):

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        graph_feature = data.gf
        if self.num_graph_features > 0:
            graph_feature = data.gf

        x = F.relu(self.conv1(x, edge_index))
        x, edge_index, _, batch, _, _ = self.pool1(x, edge_index, None, batch)
@@ -60,7 +72,8 @@ class Topkpool(torch.nn.Module):
        x3 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = x1 + x2 + x3
        x = torch.cat([x, graph_feature], dim=-1)
        if self.num_graph_features > 0:
            x = torch.cat([x, graph_feature], dim=-1)
        x = self.lin1(x)
        x = activate_func(x, self.args["act"])
        x = F.dropout(x, p=self.dropout, training=self.training)
--- a/autogl/module/nas/init.py
+++ b/autogl/module/nas/init.py
@@ -0,0 +1,5 @@
 from . import algorithm, estimator, space

 from .algorithm import NAS_ALGO_DICT
 from .estimator import NAS_ESTIMATOR_DICT
 from .space import NAS_SPACE_DICT
--- a/autogl/module/nas/algorithm/init.py
+++ b/autogl/module/nas/algorithm/init.py
@@ -0,0 +1,55 @@
 """
 NAS algorithms
 """

 import importlib
 import os
 from .base import BaseNAS

 NAS_ALGO_DICT = {}


 def register_nas_algo(name):
    def register_nas_algo_cls(cls):
        if name in NAS_ALGO_DICT:
            raise ValueError(
                "Cannot register duplicate NAS algorithm ({})".format(name)
            )
        if not issubclass(cls, BaseNAS):
            raise ValueError(
                "Model ({}: {}) must extend NAS algorithm".format(name, cls.__name__)
            )
        NAS_ALGO_DICT[name] = cls
        return cls

    return register_nas_algo_cls


 from .darts import Darts
 from .enas import Enas
 from .random_search import RandomSearch
 from .rl import RL, GraphNasRL


 def build_nas_algo_from_name(name: str) -> BaseNAS:
    """
    Parameters
    ----------
    name: ``str``
        the name of nas algorithm.

    Returns
    -------
    BaseNAS:
        the NAS algorithm built using default parameters

    Raises
    ------
    AssertionError
        If an invalid name is passed in
    """
    assert name in NAS_ALGO_DICT, "HPO module do not have name " + name
    return NAS_ALGO_DICT[name]()


 __all__ = ["BaseNAS", "Darts", "Enas", "RandomSearch", "RL", "GraphNasRL"]
--- a/autogl/module/nas/algorithm/base.py
+++ b/autogl/module/nas/algorithm/base.py
@@ -0,0 +1,52 @@
 """
 Base class for algorithm
 """
 from ...model import BaseModel
 import torch
 from abc import abstractmethod
 from ....utils import get_device


 class BaseNAS:
    """
    Base NAS algorithm class

    Parameters
    ----------
    device: str or torch.device
        The device of the whole process
    """

    def __init__(self, device="auto") -> None:
        self.device = get_device(device)

    def to(self, device):
        """
        Change the device of the whole NAS search process

        Parameters
        ----------
        device: str or torch.device
        """
        self.device = get_device(device)

    @abstractmethod
    def search(self, space, dataset, estimator) -> BaseModel:
        """
        The search process of NAS.

        Parameters
        ----------
        space : autogl.module.nas.space.BaseSpace
            The search space. Constructed following nni.
        dataset : autogl.datasets
            Dataset to perform search on.
        estimator : autogl.module.nas.estimator.BaseEstimator
            The estimator to compute loss & metrics.

        Returns
        -------
        model: autogl.module.model.BaseModel
            The searched model.
        """
        raise NotImplementedError()
--- a/autogl/module/nas/algorithm/darts.py
+++ b/autogl/module/nas/algorithm/darts.py
@@ -0,0 +1,136 @@
 # Modified from NNI

 import logging

 import torch
 import torch.optim
 import torch.nn as nn
 import torch.nn.functional as F

 from . import register_nas_algo
 from .base import BaseNAS
 from ..estimator.base import BaseEstimator
 from ..space import BaseSpace
 from ..utils import replace_layer_choice, replace_input_choice
 from ...model.base import BaseModel
 from nni.retiarii.oneshot.pytorch.darts import DartsLayerChoice, DartsInputChoice

 _logger = logging.getLogger(__name__)


@register_nas_algo("darts")
 class Darts(BaseNAS):
    """
    DARTS trainer.

    Parameters
    ----------
    num_epochs : int
        Number of epochs planned for training.
    workers : int
        Workers for data loading.
    gradient_clip : float
        Gradient clipping. Set to 0 to disable. Default: 5.
    model_lr : float
        Learning rate to optimize the model.
    model_wd : float
        Weight decay to optimize the model.
    arch_lr : float
        Learning rate to optimize the architecture.
    arch_wd : float
        Weight decay to optimize the architecture.
    device : str or torch.device
        The device of the whole process
    """

    def __init__(
        self,
        num_epochs=5,
        workers=4,
        gradient_clip=5.0,
        model_lr=1e-3,
        model_wd=5e-4,
        arch_lr=3e-4,
        arch_wd=1e-3,
        device="cuda",
    ):
        super().__init__(device=device)
        self.num_epochs = num_epochs
        self.workers = workers
        self.gradient_clip = gradient_clip
        self.model_optimizer = torch.optim.Adam
        self.arch_optimizer = torch.optim.Adam
        self.model_lr = model_lr
        self.model_wd = model_wd
        self.arch_lr = arch_lr
        self.arch_wd = arch_wd

    def search(self, space: BaseSpace, dataset, estimator):
        model_optim = self.model_optimizer(
            space.parameters(), self.model_lr, weight_decay=self.model_wd
        )

        nas_modules = []
        replace_layer_choice(space, DartsLayerChoice, nas_modules)
        replace_input_choice(space, DartsInputChoice, nas_modules)
        space = space.to(self.device)

        ctrl_params = {}
        for _, m in nas_modules:
            if m.name in ctrl_params:
                assert (
                    m.alpha.size() == ctrl_params[m.name].size()
                ), "Size of parameters with the same label should be same."
                m.alpha = ctrl_params[m.name]
            else:
                ctrl_params[m.name] = m.alpha
        arch_optim = self.arch_optimizer(
            list(ctrl_params.values()), self.arch_lr, weight_decay=self.arch_wd
        )

        for epoch in range(self.num_epochs):
            self._train_one_epoch(
                epoch, space, dataset, estimator, model_optim, arch_optim
            )

        selection = self.export(nas_modules)
        return space.parse_model(selection, self.device)

    def _train_one_epoch(
        self,
        epoch,
        model: BaseSpace,
        dataset,
        estimator,
        model_optim: torch.optim.Optimizer,
        arch_optim: torch.optim.Optimizer,
    ):
        model.train()

        # phase 1. architecture step
        arch_optim.zero_grad()
        # only no unroll here
        _, loss = self._infer(model, dataset, estimator, "val")
        loss.backward()
        arch_optim.step()

        # phase 2: child network step
        model_optim.zero_grad()
        metric, loss = self._infer(model, dataset, estimator, "train")
        loss.backward()
        # gradient clipping
        if self.gradient_clip > 0:
            nn.utils.clip_grad_norm_(model.parameters(), self.gradient_clip)
        model_optim.step()

    def _infer(self, model: BaseSpace, dataset, estimator: BaseEstimator, mask="train"):
        metric, loss = estimator.infer(model, dataset, mask=mask)
        return metric, loss

    @torch.no_grad()
    def export(self, nas_modules) -> dict:
        result = dict()
        for name, module in nas_modules:
            if name not in result:
                result[name] = module.export()
        return result
--- a/autogl/module/nas/algorithm/enas.py
+++ b/autogl/module/nas/algorithm/enas.py
@@ -0,0 +1,224 @@
 # codes in this file are reproduced from https://github.com/microsoft/nni with some changes.
 import copy

 import torch
 import torch.nn as nn
 import torch.nn.functional as F

 from . import register_nas_algo
 from .base import BaseNAS
 from ..space import BaseSpace
 from ..utils import (
    AverageMeterGroup,
    replace_layer_choice,
    replace_input_choice,
    get_module_order,
    sort_replaced_module,
 )
 from tqdm import tqdm, trange
 from .rl import (
    PathSamplingLayerChoice,
    PathSamplingInputChoice,
    ReinforceField,
    ReinforceController,
 )
 from ....utils import get_logger

 LOGGER = get_logger("ENAS")


@register_nas_algo("enas")
 class Enas(BaseNAS):
    """
    ENAS trainer.

    Parameters
    ----------
    num_epochs : int
        Number of epochs planned for training.
    n_warmup : int
        Number of epochs for training super network.
    log_frequency : int
        Step count per logging.
    grad_clip : float
        Gradient clipping. Set to 0 to disable. Default: 5.
    entropy_weight : float
        Weight of sample entropy loss.
    skip_weight : float
        Weight of skip penalty loss.
    baseline_decay : float
        Decay factor of baseline. New baseline will be equal to ``baseline_decay * baseline_old + reward * (1 - baseline_decay)``.
    ctrl_lr : float
        Learning rate for RL controller.
    ctrl_steps_aggregate : int
        Number of steps that will be aggregated into one mini-batch for RL controller.
    ctrl_kwargs : dict
        Optional kwargs that will be passed to :class:`ReinforceController`.
    model_lr : float
        Learning rate for super network.
    model_wd : float
        Weight decay for super network.
    disable_progeress: boolean
        Control whether show the progress bar.
    device : str or torch.device
        The device of the whole process, e.g. "cuda", torch.device("cpu")
    """

    def __init__(
        self,
        num_epochs=5,
        n_warmup=100,
        log_frequency=None,
        grad_clip=5.0,
        entropy_weight=0.0001,
        skip_weight=0.8,
        baseline_decay=0.999,
        ctrl_lr=0.00035,
        ctrl_steps_aggregate=20,
        ctrl_kwargs=None,
        model_lr=5e-3,
        model_wd=5e-4,
        disable_progress=True,
        device="cuda",
    ):
        super().__init__(device)
        self.device = device
        self.num_epochs = num_epochs
        self.log_frequency = log_frequency
        self.entropy_weight = entropy_weight
        self.skip_weight = skip_weight
        self.baseline_decay = baseline_decay
        self.baseline = 0.0
        self.ctrl_steps_aggregate = ctrl_steps_aggregate
        self.grad_clip = grad_clip
        self.ctrl_kwargs = ctrl_kwargs
        self.ctrl_lr = ctrl_lr
        self.n_warmup = n_warmup
        self.model_lr = model_lr
        self.model_wd = model_wd
        self.disable_progress = disable_progress

    def search(self, space: BaseSpace, dset, estimator):
        self.model = space
        self.dataset = dset  # .to(self.device)
        self.estimator = estimator
        # replace choice
        self.nas_modules = []

        k2o = get_module_order(self.model)
        replace_layer_choice(self.model, PathSamplingLayerChoice, self.nas_modules)
        replace_input_choice(self.model, PathSamplingInputChoice, self.nas_modules)
        self.nas_modules = sort_replaced_module(k2o, self.nas_modules)

        # to device
        self.model = self.model.to(self.device)
        self.model_optim = torch.optim.Adam(
            self.model.parameters(), lr=self.model_lr, weight_decay=self.model_wd
        )
        # fields
        self.nas_fields = [
            ReinforceField(
                name,
                len(module),
                isinstance(module, PathSamplingLayerChoice) or module.n_chosen == 1,
            )
            for name, module in self.nas_modules
        ]
        self.controller = ReinforceController(
            self.nas_fields, **(self.ctrl_kwargs or {})
        )
        self.ctrl_optim = torch.optim.Adam(
            self.controller.parameters(), lr=self.ctrl_lr
        )

        # warm up supernet
        with tqdm(range(self.n_warmup), disable=self.disable_progress) as bar:
            for i in bar:
                acc, l1 = self._train_model(i)
                with torch.no_grad():
                    val_acc, val_loss = self._infer("val")
                bar.set_postfix(loss=l1, acc=acc, val_acc=val_acc, val_loss=val_loss)

        # train
        with tqdm(range(self.num_epochs), disable=self.disable_progress) as bar:
            for i in bar:
                try:
                    l1 = self._train_model(i)
                    l2 = self._train_controller(i)
                except Exception as e:
                    print(e)
                    nm = self.nas_modules
                    for i in range(len(nm)):
                        print(nm[i][1].sampled)
                bar.set_postfix(loss_model=l1, reward_controller=l2)

        selection = self.export()
        # print(selection)
        return space.parse_model(selection, self.device)

    def _train_model(self, epoch):
        self.model.train()
        self.controller.eval()
        self.model_optim.zero_grad()
        self._resample()
        metric, loss = self._infer()
        loss.backward()
        if self.grad_clip > 0:
            nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_clip)
        self.model_optim.step()

        return metric, loss.item()

    def _train_controller(self, epoch):
        self.model.eval()
        self.controller.train()
        self.ctrl_optim.zero_grad()
        rewards = []
        for ctrl_step in range(self.ctrl_steps_aggregate):
            self._resample()
            with torch.no_grad():
                metric, loss = self._infer(mask="val")
            reward = metric
            rewards.append(reward)
            if self.entropy_weight:
                reward += self.entropy_weight * self.controller.sample_entropy.item()
            self.baseline = self.baseline * self.baseline_decay + reward * (
                1 - self.baseline_decay
            )
            loss = self.controller.sample_log_prob * (reward - self.baseline)
            if self.skip_weight:
                loss += self.skip_weight * self.controller.sample_skip_penalty
            loss /= self.ctrl_steps_aggregate
            loss.backward()

            if (ctrl_step + 1) % self.ctrl_steps_aggregate == 0:
                if self.grad_clip > 0:
                    nn.utils.clip_grad_norm_(
                        self.controller.parameters(), self.grad_clip
                    )
                self.ctrl_optim.step()
                self.ctrl_optim.zero_grad()

            if self.log_frequency is not None and ctrl_step % self.log_frequency == 0:
                LOGGER.info(
                    "RL Epoch [%d/%d] Step [%d/%d]  %s",
                    epoch + 1,
                    self.num_epochs,
                    ctrl_step + 1,
                    self.ctrl_steps_aggregate,
                )
        return sum(rewards) / len(rewards)

    def _resample(self):
        result = self.controller.resample()
        for name, module in self.nas_modules:
            module.sampled = result[name]

    def export(self):
        self.controller.eval()
        with torch.no_grad():
            return self.controller.resample()

    def _infer(self, mask="train"):
        metric, loss = self.estimator.infer(self.model, self.dataset, mask=mask)
        return metric[0], loss
--- a/autogl/module/nas/algorithm/random_search.py
+++ b/autogl/module/nas/algorithm/random_search.py
@@ -0,0 +1,85 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F

 from . import register_nas_algo
 from .base import BaseNAS
 from ..space import BaseSpace
 from ..utils import (
    AverageMeterGroup,
    replace_layer_choice,
    replace_input_choice,
    get_module_order,
    sort_replaced_module,
 )
 from tqdm import tqdm
 from .rl import PathSamplingLayerChoice, PathSamplingInputChoice
 import numpy as np
 from ....utils import get_logger

 LOGGER = get_logger("random_search_NAS")


@register_nas_algo("random")
 class RandomSearch(BaseNAS):
    """
    Uniformly random architecture search

    Parameters
    ----------
    device : str or torch.device
        The device of the whole process, e.g. "cuda", torch.device("cpu")
    num_epochs : int
        Number of epochs planned for training.
    disable_progeress: boolean
        Control whether show the progress bar.
    """

    def __init__(self, device="cuda", num_epochs=400, disable_progress=False):
        super().__init__(device)
        self.num_epochs = num_epochs
        self.disable_progress = disable_progress

    def search(self, space: BaseSpace, dset, estimator):
        self.estimator = estimator
        self.dataset = dset
        self.space = space

        self.nas_modules = []
        k2o = get_module_order(self.space)
        replace_layer_choice(self.space, PathSamplingLayerChoice, self.nas_modules)
        replace_input_choice(self.space, PathSamplingInputChoice, self.nas_modules)
        self.nas_modules = sort_replaced_module(k2o, self.nas_modules)
        selection_range = {}
        for k, v in self.nas_modules:
            selection_range[k] = len(v)
        self.selection_dict = selection_range

        # space_size=np.prod(list(selection_range.values()))

        arch_perfs = []
        cache = {}
        with tqdm(range(self.num_epochs), disable=self.disable_progress) as bar:
            for i in bar:
                selection = self.sample()
                vec = tuple(list(selection.values()))
                if vec not in cache:
                    self.arch = space.parse_model(selection, self.device)
                    metric, loss = self._infer(mask="val")
                    arch_perfs.append([metric, selection])
                    cache[vec] = metric
                bar.set_postfix(acc=metric, max_acc=max(cache.values()))
        selection = arch_perfs[np.argmax([x[0] for x in arch_perfs])][1]
        arch = space.parse_model(selection, self.device)
        return arch

    def sample(self):
        # uniformly sample
        selection = {}
        for k, v in self.selection_dict.items():
            selection[k] = np.random.choice(range(v))
        return selection

    def _infer(self, mask="train"):
        metric, loss = self.estimator.infer(self.arch._model, self.dataset, mask=mask)
        return metric[0], loss
--- a/autogl/module/nas/algorithm/rl.py
+++ b/autogl/module/nas/algorithm/rl.py
@@ -0,0 +1,670 @@
 # codes in this file are reproduced from https://github.com/microsoft/nni with some changes.
 import torch
 import torch.nn as nn
 import torch.nn.functional as F

 from . import register_nas_algo
 from .base import BaseNAS
 from ..space import BaseSpace
 from ..utils import (
    AverageMeterGroup,
    replace_layer_choice,
    replace_input_choice,
    get_module_order,
    sort_replaced_module,
 )
 from nni.nas.pytorch.fixed import apply_fixed_architecture
 from tqdm import tqdm
 from datetime import datetime
 import numpy as np
 from ....utils import get_logger

 LOGGER = get_logger("random_search_NAS")


 def _get_mask(sampled, total):
    multihot = [
        i == sampled or (isinstance(sampled, list) and i in sampled)
        for i in range(total)
    ]
    return torch.tensor(multihot, dtype=torch.bool)  # pylint: disable=not-callable


 class PathSamplingLayerChoice(nn.Module):
    """
    Mixed module, in which fprop is decided by exactly one or multiple (sampled) module.
    If multiple module is selected, the result will be sumed and returned.

    Attributes
    ----------
    sampled : int or list of int
        Sampled module indices.
    mask : tensor
        A multi-hot bool 1D-tensor representing the sampled mask.
    """

    def __init__(self, layer_choice):
        super(PathSamplingLayerChoice, self).__init__()
        self.op_names = []
        for name, module in layer_choice.named_children():
            self.add_module(name, module)
            self.op_names.append(name)
        assert self.op_names, "There has to be at least one op to choose from."
        self.sampled = None  # sampled can be either a list of indices or an index

    def forward(self, *args, **kwargs):
        assert (
            self.sampled is not None
        ), "At least one path needs to be sampled before fprop."
        if isinstance(self.sampled, list):
            return sum(
                [getattr(self, self.op_names[i])(*args, **kwargs) for i in self.sampled]
            )  # pylint: disable=not-an-iterable
        else:
            return getattr(self, self.op_names[self.sampled])(
                *args, **kwargs
            )  # pylint: disable=invalid-sequence-index

    def __len__(self):
        return len(self.op_names)

    @property
    def mask(self):
        return _get_mask(self.sampled, len(self))


 class PathSamplingInputChoice(nn.Module):
    """
    Mixed input. Take a list of tensor as input, select some of them and return the sum.

    Attributes
    ----------
    sampled : int or list of int
        Sampled module indices.
    mask : tensor
        A multi-hot bool 1D-tensor representing the sampled mask.
    """

    def __init__(self, input_choice):
        super(PathSamplingInputChoice, self).__init__()
        self.n_candidates = input_choice.n_candidates
        self.n_chosen = input_choice.n_chosen
        self.sampled = None

    def forward(self, input_tensors):
        if isinstance(self.sampled, list):
            return sum(
                [input_tensors[t] for t in self.sampled]
            )  # pylint: disable=not-an-iterable
        else:
            return input_tensors[self.sampled]

    def __len__(self):
        return self.n_candidates

    @property
    def mask(self):
        return _get_mask(self.sampled, len(self))

    def __repr__(self):
        return f"PathSamplingInputChoice(n_candidates={self.n_candidates}, chosen={self.sampled})"


 class StackedLSTMCell(nn.Module):
    def __init__(self, layers, size, bias):
        super().__init__()
        self.lstm_num_layers = layers
        self.lstm_modules = nn.ModuleList(
            [nn.LSTMCell(size, size, bias=bias) for _ in range(self.lstm_num_layers)]
        )

    def forward(self, inputs, hidden):
        prev_h, prev_c = hidden
        next_h, next_c = [], []
        for i, m in enumerate(self.lstm_modules):
            curr_h, curr_c = m(inputs, (prev_h[i], prev_c[i]))
            next_c.append(curr_c)
            next_h.append(curr_h)
            # current implementation only supports batch size equals 1,
            # but the algorithm does not necessarily have this limitation
            inputs = curr_h[-1].view(1, -1)
        return next_h, next_c


 class ReinforceField:
    """
    A field with ``name``, with ``total`` choices. ``choose_one`` is true if one and only one is meant to be
    selected. Otherwise, any number of choices can be chosen.
    """

    def __init__(self, name, total, choose_one):
        self.name = name
        self.total = total
        self.choose_one = choose_one

    def __repr__(self):
        return f"ReinforceField(name={self.name}, total={self.total}, choose_one={self.choose_one})"


 class ReinforceController(nn.Module):
    """
    A controller that mutates the graph with RL.

    Parameters
    ----------
    fields : list of ReinforceField
        List of fields to choose.
    lstm_size : int
        Controller LSTM hidden units.
    lstm_num_layers : int
        Number of layers for stacked LSTM.
    tanh_constant : float
        Logits will be equal to ``tanh_constant * tanh(logits)``. Don't use ``tanh`` if this value is ``None``.
    skip_target : float
        Target probability that skipconnect will appear.
    temperature : float
        Temperature constant that divides the logits.
    entropy_reduction : str
        Can be one of ``sum`` and ``mean``. How the entropy of multi-input-choice is reduced.
    """

    def __init__(
        self,
        fields,
        lstm_size=64,
        lstm_num_layers=1,
        tanh_constant=1.5,
        skip_target=0.4,
        temperature=None,
        entropy_reduction="sum",
    ):
        super(ReinforceController, self).__init__()
        self.fields = fields
        self.lstm_size = lstm_size
        self.lstm_num_layers = lstm_num_layers
        self.tanh_constant = tanh_constant
        self.temperature = temperature
        self.skip_target = skip_target

        self.lstm = StackedLSTMCell(self.lstm_num_layers, self.lstm_size, False)
        self.attn_anchor = nn.Linear(self.lstm_size, self.lstm_size, bias=False)
        self.attn_query = nn.Linear(self.lstm_size, self.lstm_size, bias=False)
        self.v_attn = nn.Linear(self.lstm_size, 1, bias=False)
        self.g_emb = nn.Parameter(torch.randn(1, self.lstm_size) * 0.1)
        self.skip_targets = nn.Parameter(
            torch.tensor(
                [1.0 - self.skip_target, self.skip_target]
            ),  # pylint: disable=not-callable
            requires_grad=False,
        )
        assert entropy_reduction in [
            "sum",
            "mean",
        ], "Entropy reduction must be one of sum and mean."
        self.entropy_reduction = torch.sum if entropy_reduction == "sum" else torch.mean
        self.cross_entropy_loss = nn.CrossEntropyLoss(reduction="none")
        self.soft = nn.ModuleDict(
            {
                field.name: nn.Linear(self.lstm_size, field.total, bias=False)
                for field in fields
            }
        )
        self.embedding = nn.ModuleDict(
            {field.name: nn.Embedding(field.total, self.lstm_size) for field in fields}
        )

    def resample(self):
        self._initialize()
        result = dict()
        for field in self.fields:
            result[field.name] = self._sample_single(field)
        return result

    def _initialize(self):
        self._inputs = self.g_emb.data
        self._c = [
            torch.zeros(
                (1, self.lstm_size),
                dtype=self._inputs.dtype,
                device=self._inputs.device,
            )
            for _ in range(self.lstm_num_layers)
        ]
        self._h = [
            torch.zeros(
                (1, self.lstm_size),
                dtype=self._inputs.dtype,
                device=self._inputs.device,
            )
            for _ in range(self.lstm_num_layers)
        ]
        self.sample_log_prob = 0
        self.sample_entropy = 0
        self.sample_skip_penalty = 0

    def _lstm_next_step(self):
        self._h, self._c = self.lstm(self._inputs, (self._h, self._c))

    def _sample_single(self, field):
        self._lstm_next_step()
        logit = self.soft[field.name](self._h[-1])
        if self.temperature is not None:
            logit /= self.temperature
        if self.tanh_constant is not None:
            logit = self.tanh_constant * torch.tanh(logit)
        if field.choose_one:
            sampled = torch.multinomial(F.softmax(logit, dim=-1), 1).view(-1)
            log_prob = self.cross_entropy_loss(logit, sampled)
            self._inputs = self.embedding[field.name](sampled)
        else:
            logit = logit.view(-1, 1)
            logit = torch.cat(
                [-logit, logit], 1
            )  # pylint: disable=invalid-unary-operand-type
            sampled = torch.multinomial(F.softmax(logit, dim=-1), 1).view(-1)
            skip_prob = torch.sigmoid(logit)
            kl = torch.sum(skip_prob * torch.log(skip_prob / self.skip_targets))
            self.sample_skip_penalty += kl
            log_prob = self.cross_entropy_loss(logit, sampled)
            sampled = sampled.nonzero().view(-1)
            if sampled.sum().item():
                self._inputs = (
                    torch.sum(self.embedding[field.name](sampled.view(-1)), 0)
                    / (1.0 + torch.sum(sampled))
                ).unsqueeze(0)
            else:
                self._inputs = torch.zeros(
                    1, self.lstm_size, device=self.embedding[field.name].weight.device
                )

        sampled = sampled.detach().numpy().tolist()
        self.sample_log_prob += self.entropy_reduction(log_prob)
        entropy = (
            log_prob * torch.exp(-log_prob)
        ).detach()  # pylint: disable=invalid-unary-operand-type
        self.sample_entropy += self.entropy_reduction(entropy)
        if len(sampled) == 1:
            sampled = sampled[0]
        return sampled


@register_nas_algo("rl")
 class RL(BaseNAS):
    """
    RL in GraphNas.

    Parameters
    ----------
    num_epochs : int
        Number of epochs planned for training.
    device : torch.device
        ``torch.device("cpu")`` or ``torch.device("cuda")``.
    log_frequency : int
        Step count per logging.
    grad_clip : float
        Gradient clipping. Set to 0 to disable. Default: 5.
    entropy_weight : float
        Weight of sample entropy loss.
    skip_weight : float
        Weight of skip penalty loss.
    baseline_decay : float
        Decay factor of baseline. New baseline will be equal to ``baseline_decay * baseline_old + reward * (1 - baseline_decay)``.
    ctrl_lr : float
        Learning rate for RL controller.
    ctrl_steps_aggregate : int
        Number of steps that will be aggregated into one mini-batch for RL controller.
    ctrl_steps : int
        Number of mini-batches for each epoch of RL controller learning.
    ctrl_kwargs : dict
        Optional kwargs that will be passed to :class:`ReinforceController`.
    n_warmup : int
        Number of epochs for training super network.
    model_lr : float
        Learning rate for super network.
    model_wd : float
        Weight decay for super network.
    disable_progress: boolean
        Control whether show the progress bar.
    """

    def __init__(
        self,
        num_epochs=5,
        device="cuda",
        log_frequency=None,
        grad_clip=5.0,
        entropy_weight=0.0001,
        skip_weight=0.8,
        baseline_decay=0.999,
        ctrl_lr=0.00035,
        ctrl_steps_aggregate=20,
        ctrl_kwargs=None,
        n_warmup=100,
        model_lr=5e-3,
        model_wd=5e-4,
        disable_progress=True,
    ):
        super().__init__(device)
        self.device = device
        self.num_epochs = num_epochs
        self.log_frequency = log_frequency
        self.entropy_weight = entropy_weight
        self.skip_weight = skip_weight
        self.baseline_decay = baseline_decay
        self.baseline = 0.0
        self.ctrl_steps_aggregate = ctrl_steps_aggregate
        self.grad_clip = grad_clip
        self.ctrl_kwargs = ctrl_kwargs
        self.ctrl_lr = ctrl_lr
        self.n_warmup = n_warmup
        self.model_lr = model_lr
        self.model_wd = model_wd
        self.disable_progress = disable_progress

    def search(self, space: BaseSpace, dset, estimator):
        self.model = space
        self.dataset = dset  # .to(self.device)
        self.estimator = estimator
        # replace choice
        self.nas_modules = []

        k2o = get_module_order(self.model)
        replace_layer_choice(self.model, PathSamplingLayerChoice, self.nas_modules)
        replace_input_choice(self.model, PathSamplingInputChoice, self.nas_modules)
        self.nas_modules = sort_replaced_module(k2o, self.nas_modules)

        # to device
        self.model = self.model.to(self.device)
        # fields
        self.nas_fields = [
            ReinforceField(
                name,
                len(module),
                isinstance(module, PathSamplingLayerChoice) or module.n_chosen == 1,
            )
            for name, module in self.nas_modules
        ]
        self.controller = ReinforceController(
            self.nas_fields, **(self.ctrl_kwargs or {})
        )
        self.ctrl_optim = torch.optim.Adam(
            self.controller.parameters(), lr=self.ctrl_lr
        )
        # train
        with tqdm(range(self.num_epochs), disable=self.disable_progress) as bar:
            for i in bar:
                l2 = self._train_controller(i)
                bar.set_postfix(reward_controller=l2)

        selection = self.export()
        arch = space.parse_model(selection, self.device)
        # print(selection,arch)
        return arch

    def _train_controller(self, epoch):
        self.model.eval()
        self.controller.train()
        self.ctrl_optim.zero_grad()
        rewards = []
        with tqdm(
            range(self.ctrl_steps_aggregate), disable=self.disable_progress
        ) as bar:
            for ctrl_step in bar:
                self._resample()
                metric, loss = self._infer(mask="val")
                bar.set_postfix(acc=metric, loss=loss.item())
                LOGGER.info(f"{self.arch}\n{self.selection}\n{metric},{loss}")
                reward = metric
                rewards.append(reward)
                if self.entropy_weight:
                    reward += (
                        self.entropy_weight * self.controller.sample_entropy.item()
                    )
                self.baseline = self.baseline * self.baseline_decay + reward * (
                    1 - self.baseline_decay
                )
                loss = self.controller.sample_log_prob * (reward - self.baseline)
                if self.skip_weight:
                    loss += self.skip_weight * self.controller.sample_skip_penalty
                loss /= self.ctrl_steps_aggregate
                loss.backward()

                if (ctrl_step + 1) % self.ctrl_steps_aggregate == 0:
                    if self.grad_clip > 0:
                        nn.utils.clip_grad_norm_(
                            self.controller.parameters(), self.grad_clip
                        )
                    self.ctrl_optim.step()
                    self.ctrl_optim.zero_grad()

                if (
                    self.log_frequency is not None
                    and ctrl_step % self.log_frequency == 0
                ):
                    LOGGER.info(
                        "RL Epoch [%d/%d] Step [%d/%d]  %s",
                        epoch + 1,
                        self.num_epochs,
                        ctrl_step + 1,
                        self.ctrl_steps_aggregate,
                    )
        return sum(rewards) / len(rewards)

    def _resample(self):
        result = self.controller.resample()
        self.arch = self.model.parse_model(result, device=self.device)
        self.selection = result

    def export(self):
        self.controller.eval()
        with torch.no_grad():
            return self.controller.resample()

    def _infer(self, mask="train"):
        metric, loss = self.estimator.infer(self.arch._model, self.dataset, mask=mask)
        return metric[0], loss


@register_nas_algo("graphnas")
 class GraphNasRL(BaseNAS):
    """
    RL in GraphNas.

    Parameters
    ----------
    device : torch.device
        ``torch.device("cpu")`` or ``torch.device("cuda")``.
    num_epochs : int
        Number of epochs planned for training.
    log_frequency : int
        Step count per logging.
    grad_clip : float
        Gradient clipping. Set to 0 to disable. Default: 5.
    entropy_weight : float
        Weight of sample entropy loss.
    skip_weight : float
        Weight of skip penalty loss.
    baseline_decay : float
        Decay factor of baseline. New baseline will be equal to ``baseline_decay * baseline_old + reward * (1 - baseline_decay)``.
    ctrl_lr : float
        Learning rate for RL controller.
    ctrl_steps_aggregate : int
        Number of steps that will be aggregated into one mini-batch for RL controller.
    ctrl_steps : int
        Number of mini-batches for each epoch of RL controller learning.
    ctrl_kwargs : dict
        Optional kwargs that will be passed to :class:`ReinforceController`.
    n_warmup : int
        Number of epochs for training super network.
    model_lr : float
        Learning rate for super network.
    model_wd : float
        Weight decay for super network.
    topk : int
        Number of architectures kept in training process.
    disable_progeress: boolean
        Control whether show the progress bar.
    """

    def __init__(
        self,
        device="cuda",
        num_epochs=10,
        log_frequency=None,
        grad_clip=5.0,
        entropy_weight=0.0001,
        skip_weight=0,
        baseline_decay=0.95,
        ctrl_lr=0.00035,
        ctrl_steps_aggregate=100,
        ctrl_kwargs=None,
        n_warmup=100,
        model_lr=5e-3,
        model_wd=5e-4,
        topk=5,
        disable_progress=True,
    ):
        super().__init__(device)
        self.device = device
        self.num_epochs = num_epochs
        self.log_frequency = log_frequency
        self.entropy_weight = entropy_weight
        self.skip_weight = skip_weight
        self.baseline_decay = baseline_decay
        self.ctrl_steps_aggregate = ctrl_steps_aggregate
        self.grad_clip = grad_clip
        self.ctrl_kwargs = ctrl_kwargs
        self.ctrl_lr = ctrl_lr
        self.n_warmup = n_warmup
        self.model_lr = model_lr
        self.model_wd = model_wd
        self.hist = []
        self.topk = topk
        self.disable_progress = disable_progress

    def search(self, space: BaseSpace, dset, estimator):
        self.model = space
        self.dataset = dset  # .to(self.device)
        self.estimator = estimator
        # replace choice
        self.nas_modules = []

        k2o = get_module_order(self.model)
        replace_layer_choice(self.model, PathSamplingLayerChoice, self.nas_modules)
        replace_input_choice(self.model, PathSamplingInputChoice, self.nas_modules)
        self.nas_modules = sort_replaced_module(k2o, self.nas_modules)

        # to device
        self.model = self.model.to(self.device)
        # fields
        self.nas_fields = [
            ReinforceField(
                name,
                len(module),
                isinstance(module, PathSamplingLayerChoice) or module.n_chosen == 1,
            )
            for name, module in self.nas_modules
        ]
        self.controller = ReinforceController(
            self.nas_fields,
            lstm_size=100,
            temperature=5.0,
            tanh_constant=2.5,
            **(self.ctrl_kwargs or {}),
        )
        self.ctrl_optim = torch.optim.Adam(
            self.controller.parameters(), lr=self.ctrl_lr
        )
        # train
        with tqdm(range(self.num_epochs), disable=self.disable_progress) as bar:
            for i in bar:
                l2 = self._train_controller(i)
                bar.set_postfix(reward_controller=l2)

        # selection=self.export()

        selections = [x[1] for x in self.hist]
        candidiate_accs = [-x[0] for x in self.hist]
        # print('candidiate accuracies',candidiate_accs)
        selection = self._choose_best(selections)
        arch = space.parse_model(selection, self.device)
        # print(selection,arch)
        return arch

    def _choose_best(self, selections):
        # graphnas use top 5 models, can evaluate 20 times epoch and choose the best.
        results = []
        for selection in selections:
            accs = []
            for i in tqdm(range(20), disable=self.disable_progress):
                self.arch = self.model.parse_model(selection, device=self.device)
                metric, loss = self._infer(mask="val")
                accs.append(metric)
            result = np.mean(accs)
            LOGGER.info(
                "selection {} \n acc {:.4f} +- {:.4f}".format(
                    selection, np.mean(accs), np.std(accs) / np.sqrt(20)
                )
            )
            results.append(result)
        best_selection = selections[np.argmax(results)]
        return best_selection

    def _train_controller(self, epoch):
        self.model.eval()
        self.controller.train()
        self.ctrl_optim.zero_grad()
        rewards = []
        baseline = None
        # diff: graph nas train 100 and derive 100 for every epoch(10 epochs), we just train 100(20 epochs). totol num of samples are same (2000)
        with tqdm(
            range(self.ctrl_steps_aggregate), disable=self.disable_progress
        ) as bar:
            for ctrl_step in bar:
                self._resample()
                metric, loss = self._infer(mask="val")

                # bar.set_postfix(acc=metric,loss=loss.item())
                LOGGER.debug(f"{self.arch}\n{self.selection}\n{metric},{loss}")
                # diff: not do reward shaping as in graphnas code
                reward = metric
                self.hist.append([-metric, self.selection])
                if len(self.hist) > self.topk:
                    self.hist.sort(key=lambda x: x[0])
                    self.hist.pop()
                rewards.append(reward)

                if self.entropy_weight:
                    reward += (
                        self.entropy_weight * self.controller.sample_entropy.item()
                    )

                if not baseline:
                    baseline = reward
                else:
                    baseline = baseline * self.baseline_decay + reward * (
                        1 - self.baseline_decay
                    )

                loss = self.controller.sample_log_prob * (reward - baseline)
                self.ctrl_optim.zero_grad()
                loss.backward()

                self.ctrl_optim.step()

                bar.set_postfix(acc=metric, max_acc=max(rewards))
        return sum(rewards) / len(rewards)

    def _resample(self):
        result = self.controller.resample()
        self.arch = self.model.parse_model(result, device=self.device)
        self.selection = result

    def export(self):
        self.controller.eval()
        with torch.no_grad():
            return self.controller.resample()

    def _infer(self, mask="train"):
        metric, loss = self.estimator.infer(self.arch._model, self.dataset, mask=mask)
        return metric[0], loss
--- a/autogl/module/nas/estimator/init.py
+++ b/autogl/module/nas/estimator/init.py
@@ -0,0 +1,49 @@
 import importlib
 import os
 from .base import BaseEstimator

 NAS_ESTIMATOR_DICT = {}


 def register_nas_estimator(name):
    def register_nas_estimator_cls(cls):
        if name in NAS_ESTIMATOR_DICT:
            raise ValueError(
                "Cannot register duplicate NAS estimator ({})".format(name)
            )
        if not issubclass(cls, BaseEstimator):
            raise ValueError(
                "Model ({}: {}) must extend NAS estimator".format(name, cls.__name__)
            )
        NAS_ESTIMATOR_DICT[name] = cls
        return cls

    return register_nas_estimator_cls


 from .one_shot import OneShotEstimator
 from .train_scratch import TrainEstimator


 def build_nas_estimator_from_name(name: str) -> BaseEstimator:
    """
    Parameters
    ----------
    name: ``str``
        the name of nas estimator.

    Returns
    -------
    BaseEstimator:
        the NAS estimator built using default parameters

    Raises
    ------
    AssertionError
        If an invalid name is passed in
    """
    assert name in NAS_ESTIMATOR_DICT, "HPO module do not have name " + name
    return NAS_ESTIMATOR_DICT[name]()


 __all__ = ["BaseEstimator", "OneShotEstimator", "TrainEstimator"]
--- a/autogl/module/nas/estimator/base.py
+++ b/autogl/module/nas/estimator/base.py
@@ -0,0 +1,62 @@
 """
 Base estimator of NAS
 """

 from abc import abstractmethod
 from ..space import BaseSpace
 from typing import Tuple
 from ...train.evaluation import Evaluation, Acc
 import torch.nn.functional as F
 import torch


 class BaseEstimator:
    """
    The estimator of NAS model.

    Parameters
    ----------
    loss_f: callable
        Default loss function for evaluation

    evaluation: list of autogl.module.train.evaluation.Evaluation
        Default evaluation metric
    """

    def __init__(self, loss_f: str = "nll_loss", evaluation=[Acc()]):
        self.loss_f = loss_f
        self.evaluation = evaluation

    def setLossFunction(self, loss_f: str):
        self.loss_f = loss_f

    def setEvaluation(self, evaluation):
        self.evaluation = evaluation

    @abstractmethod
    def infer(
        self, model: BaseSpace, dataset, mask="train"
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Calculate the loss and metrics of given model on given dataset using
        specified masks.

        Parameters
        ----------
        model: autogl.module.nas.space.BaseSpace
            The model in space.

        dataset: autogl.dataset
            The dataset to perform infer

        mask: str
            The mask to evalute on dataset

        Return
        ------
        metrics: list of float
            the metrics on given datasets.
        loss: torch.Tensor
            the loss on given datasets. Note that loss should be differentiable.
        """
        raise NotImplementedError()
--- a/autogl/module/nas/estimator/one_shot.py
+++ b/autogl/module/nas/estimator/one_shot.py
@@ -0,0 +1,27 @@
 import torch.nn as nn
 import torch.nn.functional as F

 from . import register_nas_estimator
 from ..space import BaseSpace
 from .base import BaseEstimator


@register_nas_estimator("oneshot")
 class OneShotEstimator(BaseEstimator):
    """
    One shot estimator.

    Use model directly to get estimations.
    """

    def infer(self, model: BaseSpace, dataset, mask="train"):
        device = next(model.parameters()).device
        dset = dataset[0].to(device)
        pred = model(dset)[getattr(dset, f"{mask}_mask")]
        y = dset.y[getattr(dset, f"{mask}_mask")]
        loss = getattr(F, self.loss_f)(pred, y)
        # acc=sum(pred.max(1)[1]==y).item()/y.size(0)
        probs = F.softmax(pred, dim=1).detach().cpu().numpy()
        y = y.cpu()
        metrics = [eva.evaluate(probs, y) for eva in self.evaluation]
        return metrics, loss
--- a/autogl/module/nas/estimator/train_scratch.py
+++ b/autogl/module/nas/estimator/train_scratch.py
@@ -0,0 +1,51 @@
 import torch.nn as nn
 import torch.nn.functional as F

 from . import register_nas_estimator
 from ..space import BaseSpace
 from .base import BaseEstimator
 from .one_shot import OneShotEstimator
 import torch

 from autogl.module.train import NodeClassificationFullTrainer, Acc


@register_nas_estimator("scratch")
 class TrainEstimator(BaseEstimator):
    """
    An estimator which trans from scratch
    """

    def __init__(self, loss_f="nll_loss", evaluation=[Acc()]):
        super().__init__(loss_f, evaluation)
        self.evaluation = evaluation
        self.estimator = OneShotEstimator(self.loss_f, self.evaluation)

    def infer(self, model: BaseSpace, dataset, mask="train"):
        # self.trainer.model=model
        # self.trainer.device=model.device
        boxmodel = model.wrap()
        self.trainer = NodeClassificationFullTrainer(
            model=boxmodel,
            optimizer=torch.optim.Adam,
            lr=0.005,
            max_epoch=300,
            early_stopping_round=30,
            weight_decay=5e-4,
            device="auto",
            init=False,
            feval=self.evaluation,
            loss=self.loss_f,
            lr_scheduler_type=None,
        )
        try:
            self.trainer.train(dataset)
            with torch.no_grad():
                return self.estimator.infer(boxmodel.model, dataset, mask)
        except RuntimeError as e:
            if "cuda" in str(e) or "CUDA" in str(e):
                INF = 100
                fin = [-INF if eva.is_higher_better else INF for eva in self.evaluation]
                return fin, 0
            else:
                raise e
--- a/autogl/module/nas/space/init.py
+++ b/autogl/module/nas/space/init.py
@@ -0,0 +1,53 @@
 import importlib
 import os
 from .base import BaseSpace

 NAS_SPACE_DICT = {}


 def register_nas_space(name):
    def register_nas_space_cls(cls):
        if name in NAS_SPACE_DICT:
            raise ValueError("Cannot register duplicate NAS space ({})".format(name))
        if not issubclass(cls, BaseSpace):
            raise ValueError(
                "Model ({}: {}) must extend NAS space".format(name, cls.__name__)
            )
        NAS_SPACE_DICT[name] = cls
        return cls

    return register_nas_space_cls


 from .graph_nas_macro import GraphNasMacroNodeClassificationSpace
 from .graph_nas import GraphNasNodeClassificationSpace
 from .single_path import SinglePathNodeClassificationSpace


 def build_nas_space_from_name(name: str) -> BaseSpace:
    """
    Parameters
    ----------
    name: ``str``
        the name of nas space.

    Returns
    -------
    BaseSpace:
        the NAS space built using default parameters

    Raises
    ------
    AssertionError
        If an invalid name is passed in
    """
    assert name in NAS_SPACE_DICT, "HPO module do not have name " + name
    return NAS_SPACE_DICT[name]()


 __all__ = [
    "BaseSpace",
    "GraphNasMacroNodeClassificationSpace",
    "GraphNasNodeClassificationSpace",
    "SinglePathNodeClassificationSpace",
 ]
--- a/autogl/module/nas/space/base.py
+++ b/autogl/module/nas/space/base.py
@@ -0,0 +1,361 @@
 from abc import abstractmethod
 import torch.nn as nn
 from nni.nas.pytorch import mutables
 from nni.nas.pytorch.fixed import FixedArchitecture
 import json
 from copy import deepcopy
 import typing as _typ
 import torch
 from ...model import BaseModel
 from ....utils import get_logger

 from ...model import AutoGCN


 class OrderedMutable:
    """
    An abstract class with order, enabling to sort mutables with a certain rank.

    Parameters
    ----------
    order : int
        The order of the mutable
    """

    def __init__(self, order):
        self.order = order


 class OrderedLayerChoice(OrderedMutable, mutables.LayerChoice):
    def __init__(
        self, order, op_candidates, reduction="sum", return_mask=False, key=None
    ):
        OrderedMutable.__init__(self, order)
        mutables.LayerChoice.__init__(self, op_candidates, reduction, return_mask, key)


 class OrderedInputChoice(OrderedMutable, mutables.InputChoice):
    def __init__(
        self,
        order,
        n_candidates=None,
        choose_from=None,
        n_chosen=None,
        reduction="sum",
        return_mask=False,
        key=None,
    ):
        OrderedMutable.__init__(self, order)
        mutables.InputChoice.__init__(
            self, n_candidates, choose_from, n_chosen, reduction, return_mask, key
        )


 class StrModule(nn.Module):
    """
    A shell used to wrap choices as nn.Module for non-one-shot space definition
    You can use ``map_nn`` function

    Parameters
    ----------
    name : anything
        the name of module, can be any type
    """

    def __init__(self, name):
        super().__init__()
        self.str = name

    def forward(self, *args, **kwargs):
        return self.str

    def __repr__(self):
        return "{}({})".format(self.__class__.__name__, self.str)


 def map_nn(names):
    """
    A function used to wrap choices as nn.Module for non-one-shot space definition

    Parameters
    ----------
    name : list of anything
        the names of module, can be any type
    """
    return [StrModule(x) for x in names]


 class BoxModel(BaseModel):
    """
    The box wrapping a space, can be passed to later procedure or trainer

    Parameters
    ----------
    space_model : BaseSpace
        The space which should be wrapped
    device : str or torch.device
        The device to place the model
    """

    _logger = get_logger("space model")

    def __init__(self, space_model, device=torch.device("cuda")):
        super().__init__(init=True)
        self.init = True
        self.space = []
        self.hyperparams = {}
        self._model = space_model.to(device)
        self.num_features = self._model.input_dim
        self.num_classes = self._model.output_dim
        self.params = {"num_class": self.num_classes, "features_num": self.num_features}
        self.device = device
        self.selection = None

    def fix(self, selection):
        """
        To fix self._model with a selection

        Parameters
        ----------
        selection : dict
            A seletion indicating the choices of mutables
        """
        self.selection = selection
        self._model.instantiate()
        apply_fixed_architecture(self._model, selection, verbose=False)
        return self

    def to(self, device):
        if isinstance(device, (str, torch.device)):
            self.device = device
        return super().to(device)

    def forward(self, *args, **kwargs):
        return self._model(*args, **kwargs)

    def from_hyper_parameter(self, hp):
        """
        receive no hp, just copy self and reset the learnable parameters.
        """

        ret_self = deepcopy(self)
        ret_self._model.instantiate()
        if ret_self.selection:
            apply_fixed_architecture(ret_self._model, ret_self.selection, verbose=False)
        ret_self.to(self.device)
        return ret_self

    @property
    def model(self):
        return self._model


 class BaseSpace(nn.Module):
    """
    Base space class of NAS module. Defining space containing all models.
    Please use mutables to define your whole space. Refer to
    `https://nni.readthedocs.io/en/stable/NAS/WriteSearchSpace.html`
    for detailed information.

    Parameters
    ----------
    init: bool
        Whether to initialize the whole space. Default: `False`
    """

    def __init__(self):
        super().__init__()
        self._initialized = False

    @abstractmethod
    def _instantiate(self):
        """
        Instantiate modules in the space
        """
        raise NotImplementedError()

    @abstractmethod
    def forward(self, *args, **kwargs):
        """
        Define the forward pass of space model
        """
        raise NotImplementedError()

    @abstractmethod
    def parse_model(self, selection: dict, device) -> BaseModel:
        """
        Export the searched model from space.

        Parameters
        ----------
        selection: Dict
            The dictionary containing all the choices of nni.
        device: str or torch.device
            The device to put model on.

        Return
        ------
        model: autogl.module.model.BaseModel
            model to be exported.
        """
        raise NotImplementedError()

    def instantiate(self):
        """
        Instantiate the space, reset default key for the mutables here/
        """
        self._default_key = 0
        if not self._initialized:
            self._initialized = True

    def setLayerChoice(
        self, order, op_candidates, reduction="sum", return_mask=False, key=None
    ):
        """
        Give a unique key if not given
        """
        orikey = key
        if orikey == None:
            key = f"default_key_{self._default_key}"
            self._default_key += 1
            orikey = key
        layer = OrderedLayerChoice(order, op_candidates, reduction, return_mask, orikey)
        return layer

    def setInputChoice(
        self,
        order,
        n_candidates=None,
        choose_from=None,
        n_chosen=None,
        reduction="sum",
        return_mask=False,
        key=None,
    ):
        """
        Give a unique key if not given
        """
        orikey = key
        if orikey == None:
            key = f"default_key_{self._default_key}"
            self._default_key += 1
            orikey = key
        layer = OrderedInputChoice(
            order, n_candidates, choose_from, n_chosen, reduction, return_mask, orikey
        )
        return layer

    def wrap(self, device="cuda"):
        """
        Return a BoxModel which wrap self as a model
        Used to pass to trainer
        To use this function, must contain `input_dim` and `output_dim`
        """
        return BoxModel(self, device)


 class FixedInputChoice(nn.Module):
    """
    Use to replace `InputChoice` Mutable in fix process

    Parameters
    ----------
    mask : list
        The mask indicating which input to choose
    """

    def __init__(self, mask):
        self.mask_len = len(mask)
        for i in range(self.mask_len):
            if mask[i]:
                self.selected = i
                break
        super().__init__()

    def forward(self, optional_inputs):
        if len(optional_inputs) == self.mask_len:
            return optional_inputs[self.selected]


 class CleanFixedArchitecture(FixedArchitecture):
    """
    Fixed architecture mutator that always selects a certain graph, allowing deepcopy

    Parameters
    ----------
    model : nn.Module
        A mutable network.
    fixed_arc : dict
        Preloaded architecture object.
    strict : bool
        Force everything that appears in ``fixed_arc`` to be used at least once.
    verbose : bool
        Print log messages if set to True
    """

    def __init__(self, model, fixed_arc, strict=True, verbose=True):
        super().__init__(model, fixed_arc, strict, verbose)

    def replace_all_choice(self, module=None, prefix=""):
        """
        Replace all choices with selected candidates. It's done with best effort.
        In case of weighted choices or multiple choices. if some of the choices on weighted with zero, delete them.
        If single choice, replace the module with a normal module.

        Parameters
        ----------
        module : nn.Module
            Module to be processed.
        prefix : str
            Module name under global namespace.
        """
        if module is None:
            module = self.model
        for name, mutable in module.named_children():
            global_name = (prefix + "." if prefix else "") + name
            if isinstance(mutable, OrderedLayerChoice):
                chosen = self._fixed_arc[mutable.key]
                if sum(chosen) == 1 and max(chosen) == 1 and not mutable.return_mask:
                    # sum is one, max is one, there has to be an only one
                    # this is compatible with both integer arrays, boolean arrays and float arrays
                    setattr(module, name, mutable[chosen.index(1)])
                else:
                    # remove unused parameters
                    for ch, n in zip(chosen, mutable.names):
                        if ch == 0 and not isinstance(ch, float):
                            setattr(mutable, n, None)
            elif isinstance(mutable, OrderedInputChoice):
                chosen = self._fixed_arc[mutable.key]
                setattr(module, name, FixedInputChoice(chosen))
            else:
                self.replace_all_choice(mutable, global_name)


 def apply_fixed_architecture(model, fixed_arc, verbose=True):
    """
    Load architecture from `fixed_arc` and apply to model.

    Parameters
    ----------
    model : torch.nn.Module
        Model with mutables.
    fixed_arc : str or dict
        Path to the JSON that stores the architecture, or dict that stores the exported architecture.
    verbose : bool
        Print log messages if set to True

    Returns
    -------
    FixedArchitecture
        Mutator that is responsible for fixes the graph.
    """

    if isinstance(fixed_arc, str):
        with open(fixed_arc) as f:
            fixed_arc = json.load(f)
    architecture = CleanFixedArchitecture(model, fixed_arc, verbose)
    architecture.reset()

    # for the convenience of parameters counting
    architecture.replace_all_choice()
    return architecture
--- a/autogl/module/nas/space/graph_nas.py
+++ b/autogl/module/nas/space/graph_nas.py
@@ -0,0 +1,191 @@
 # codes in this file are reproduced from https://github.com/GraphNAS/GraphNAS with some changes.
 import typing as _typ
 import torch

 import torch.nn.functional as F
 from nni.nas.pytorch import mutables

 from . import register_nas_space
 from .base import BaseSpace
 from ...model import BaseModel

 from torch import nn
 from .operation import act_map, gnn_map

 GRAPHNAS_DEFAULT_GNN_OPS = [
    "gat_8",  # GAT with 8 heads
    "gat_6",  # GAT with 6 heads
    "gat_4",  # GAT with 4 heads
    "gat_2",  # GAT with 2 heads
    "gat_1",  # GAT with 1 heads
    "gcn",  # GCN
    "cheb",  # chebnet
    "sage",  # sage
    "arma",
    "sg",  # simplifying gcn
    "linear",  # skip connection
    "zero",  # skip connection
 ]

 GRAPHNAS_DEFAULT_ACT_OPS = [
    # "sigmoid", "tanh", "relu", "linear",
    #  "softplus", "leaky_relu", "relu6", "elu"
    "sigmoid",
    "tanh",
    "relu",
    "linear",
    "elu",
 ]


 class LambdaModule(nn.Module):
    def __init__(self, lambd):
        super().__init__()
        self.lambd = lambd

    def forward(self, x):
        return self.lambd(x)

    def __repr__(self):
        return "{}({})".format(self.__class__.__name__, self.lambd)


 class StrModule(nn.Module):
    def __init__(self, lambd):
        super().__init__()
        self.str = lambd

    def forward(self, *args, **kwargs):
        return self.str

    def __repr__(self):
        return "{}({})".format(self.__class__.__name__, self.str)


 def act_map_nn(act):
    return LambdaModule(act_map(act))


 def map_nn(l):
    return [StrModule(x) for x in l]


@register_nas_space("graphnas")
 class GraphNasNodeClassificationSpace(BaseSpace):
    def __init__(
        self,
        hidden_dim: _typ.Optional[int] = 64,
        layer_number: _typ.Optional[int] = 2,
        dropout: _typ.Optional[float] = 0.9,
        input_dim: _typ.Optional[int] = None,
        output_dim: _typ.Optional[int] = None,
        gnn_ops: _typ.Sequence[_typ.Union[str, _typ.Any]] = GRAPHNAS_DEFAULT_GNN_OPS,
        act_ops: _typ.Sequence[_typ.Union[str, _typ.Any]] = GRAPHNAS_DEFAULT_ACT_OPS,
    ):
        super().__init__()
        self.layer_number = layer_number
        self.hidden_dim = hidden_dim
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.gnn_ops = gnn_ops
        self.act_ops = act_ops
        self.dropout = dropout

    def instantiate(
        self,
        hidden_dim: _typ.Optional[int] = None,
        layer_number: _typ.Optional[int] = None,
        dropout: _typ.Optional[float] = None,
        input_dim: _typ.Optional[int] = None,
        output_dim: _typ.Optional[int] = None,
        gnn_ops: _typ.Sequence[_typ.Union[str, _typ.Any]] = None,
        act_ops: _typ.Sequence[_typ.Union[str, _typ.Any]] = None,
    ):
        super().instantiate()
        self.dropout = dropout or self.dropout
        self.hidden_dim = hidden_dim or self.hidden_dim
        self.layer_number = layer_number or self.layer_number
        self.input_dim = input_dim or self.input_dim
        self.output_dim = output_dim or self.output_dim
        self.gnn_ops = gnn_ops or self.gnn_ops
        self.act_ops = act_ops or self.act_ops
        self.preproc0 = nn.Linear(self.input_dim, self.hidden_dim)
        self.preproc1 = nn.Linear(self.input_dim, self.hidden_dim)
        node_labels = [mutables.InputChoice.NO_KEY, mutables.InputChoice.NO_KEY]
        for layer in range(2, self.layer_number + 2):
            node_labels.append(f"op_{layer}")
            setattr(
                self,
                f"in_{layer}",
                self.setInputChoice(
                    layer,
                    choose_from=node_labels[:-1],
                    n_chosen=1,
                    return_mask=False,
                    key=f"in_{layer}",
                ),
            )
            setattr(
                self,
                f"op_{layer}",
                self.setLayerChoice(
                    layer,
                    [
                        gnn_map(op, self.hidden_dim, self.hidden_dim)
                        for op in self.gnn_ops
                    ],
                    key=f"op_{layer}",
                ),
            )
        setattr(
            self,
            "act",
            self.setLayerChoice(
                2 * layer, [act_map_nn(a) for a in self.act_ops], key="act"
            ),
        )
        setattr(
            self,
            "concat",
            self.setLayerChoice(
                2 * layer + 1, map_nn(["add", "product", "concat"]), key="concat"
            ),
        )
        self._initialized = True
        self.classifier1 = nn.Linear(
            self.hidden_dim * self.layer_number, self.output_dim
        )
        self.classifier2 = nn.Linear(self.hidden_dim, self.output_dim)

    def forward(self, data):
        x, edges = data.x, data.edge_index  # x [2708,1433] ,[2, 10556]
        x = F.dropout(x, p=self.dropout, training=self.training)
        pprev_, prev_ = self.preproc0(x), self.preproc1(x)
        prev_nodes_out = [pprev_, prev_]
        for layer in range(2, self.layer_number + 2):
            node_in = getattr(self, f"in_{layer}")(prev_nodes_out)
            node_out = getattr(self, f"op_{layer}")(node_in, edges)
            prev_nodes_out.append(node_out)
        act = getattr(self, "act")
        con = getattr(self, "concat")()
        states = prev_nodes_out
        if con == "concat":
            x = torch.cat(states[2:], dim=1)
        else:
            tmp = states[2]
            for i in range(2, len(states)):
                if con == "add":
                    tmp = torch.add(tmp, states[i])
                elif con == "product":
                    tmp = torch.mul(tmp, states[i])
            x = tmp
        x = act(x)
        if con == "concat":
            x = self.classifier1(x)
        else:
            x = self.classifier2(x)
        return F.log_softmax(x, dim=1)

    def parse_model(self, selection, device) -> BaseModel:
        # return AutoGCN(self.input_dim, self.output_dim, device)
        return self.wrap(device).fix(selection)
--- a/autogl/module/nas/space/graph_nas_macro.py
+++ b/autogl/module/nas/space/graph_nas_macro.py
@@ -0,0 +1,741 @@
 import torch
 import typing as _typ
 import torch.nn as nn
 import torch.nn.functional as F

 from . import register_nas_space
 from .base import BaseSpace, map_nn
 from ...model import BaseModel
 from .operation import act_map

 from torch.nn import Parameter
 from torch_geometric.nn.inits import glorot, zeros
 from torch_geometric.utils import (
    remove_self_loops,
    add_self_loops,
    add_remaining_self_loops,
    softmax,
 )
 from torch_scatter import scatter_add
 import torch_scatter

 import inspect
 import sys

 special_args = [
    "edge_index",
    "edge_index_i",
    "edge_index_j",
    "size",
    "size_i",
    "size_j",
 ]
 __size_error_msg__ = (
    "All tensors which should get mapped to the same source "
    "or target nodes must be of same size in dimension 0."
 )

 is_python2 = sys.version_info[0] < 3
 getargspec = inspect.getargspec if is_python2 else inspect.getfullargspec


 def scatter_(name, src, index, dim_size=None):
    r"""Aggregates all values from the :attr:`src` tensor at the indices
    specified in the :attr:`index` tensor along the first dimension.
    If multiple indices reference the same location, their contributions
    are aggregated according to :attr:`name` (either :obj:`"add"`,
    :obj:`"mean"` or :obj:`"max"`).

    Args:
        name (string): The aggregation to use (:obj:`"add"`, :obj:`"mean"`,
            :obj:`"max"`).
        src (Tensor): The source tensor.
        index (LongTensor): The indices of elements to scatter.
        dim_size (int, optional): Automatically create output tensor with size
            :attr:`dim_size` in the first dimension. If set to :attr:`None`, a
            minimal sized output tensor is returned. (default: :obj:`None`)

    :rtype: :class:`Tensor`
    """

    assert name in ["add", "mean", "max"]

    op = getattr(torch_scatter, "scatter_{}".format(name))
    fill_value = -1e9 if name == "max" else 0

    out = op(src, index, 0, None, dim_size)
    if isinstance(out, tuple):
        out = out[0]

    if name == "max":
        out[out == fill_value] = 0

    return out


 class MessagePassing(torch.nn.Module):
    def __init__(self, aggr="add", flow="source_to_target"):
        super(MessagePassing, self).__init__()

        self.aggr = aggr
        assert self.aggr in ["add", "mean", "max"]

        self.flow = flow
        assert self.flow in ["source_to_target", "target_to_source"]

        self.__message_args__ = getargspec(self.message)[0][1:]
        self.__special_args__ = [
            (i, arg)
            for i, arg in enumerate(self.__message_args__)
            if arg in special_args
        ]
        self.__message_args__ = [
            arg for arg in self.__message_args__ if arg not in special_args
        ]
        self.__update_args__ = getargspec(self.update)[0][2:]

    def propagate(self, edge_index, size=None, **kwargs):
        r"""The initial call to start propagating messages.

        Args:
            edge_index (Tensor): The indices of a general (sparse) assignment
                matrix with shape :obj:`[N, M]` (can be directed or
                undirected).
            size (list or tuple, optional): The size :obj:`[N, M]` of the
                assignment matrix. If set to :obj:`None`, the size is tried to
                get automatically inferrred. (default: :obj:`None`)
            **kwargs: Any additional data which is needed to construct messages
                and to update node embeddings.
        """

        size = [None, None] if size is None else list(size)
        assert len(size) == 2

        i, j = (0, 1) if self.flow == "target_to_source" else (1, 0)
        ij = {"_i": i, "_j": j}

        message_args = []
        for arg in self.__message_args__:
            if arg[-2:] in ij.keys():
                tmp = kwargs.get(arg[:-2], None)
                if tmp is None:  # pragma: no cover
                    message_args.append(tmp)
                else:
                    idx = ij[arg[-2:]]
                    if isinstance(tmp, tuple) or isinstance(tmp, list):
                        assert len(tmp) == 2
                        if tmp[1 - idx] is not None:
                            if size[1 - idx] is None:
                                size[1 - idx] = tmp[1 - idx].size(0)
                            if size[1 - idx] != tmp[1 - idx].size(0):
                                raise ValueError(__size_error_msg__)
                        tmp = tmp[idx]

                    if size[idx] is None:
                        size[idx] = tmp.size(0)
                    if size[idx] != tmp.size(0):
                        raise ValueError(__size_error_msg__)

                    tmp = torch.index_select(tmp, 0, edge_index[idx])
                    message_args.append(tmp)
            else:
                message_args.append(kwargs.get(arg, None))

        size[0] = size[1] if size[0] is None else size[0]
        size[1] = size[0] if size[1] is None else size[1]

        kwargs["edge_index"] = edge_index
        kwargs["size"] = size

        for (idx, arg) in self.__special_args__:
            if arg[-2:] in ij.keys():
                message_args.insert(idx, kwargs[arg[:-2]][ij[arg[-2:]]])
            else:
                message_args.insert(idx, kwargs[arg])

        update_args = [kwargs[arg] for arg in self.__update_args__]

        out = self.message(*message_args)
        if self.aggr in ["add", "mean", "max"]:
            out = scatter_(self.aggr, out, edge_index[i], dim_size=size[i])
        else:
            pass
        out = self.update(out, *update_args)

        return out

    def message(self, x_j):  # pragma: no cover
        r"""Constructs messages in analogy to :math:`\phi_{\mathbf{\Theta}}`
        for each edge in :math:`(i,j) \in \mathcal{E}`.
        Can take any argument which was initially passed to :meth:`propagate`.
        In addition, features can be lifted to the source node :math:`i` and
        target node :math:`j` by appending :obj:`_i` or :obj:`_j` to the
        variable name, *.e.g.* :obj:`x_i` and :obj:`x_j`."""

        return x_j

    def update(self, aggr_out):  # pragma: no cover
        r"""Updates node embeddings in analogy to
        :math:`\gamma_{\mathbf{\Theta}}` for each node
        :math:`i \in \mathcal{V}`.
        Takes in the output of aggregation as first argument and any argument
        which was initially passed to :meth:`propagate`."""

        return aggr_out


 class GeoLayer(MessagePassing):
    def __init__(
        self,
        in_channels,
        out_channels,
        heads=1,
        concat=True,
        negative_slope=0.2,
        dropout=0,
        bias=True,
        att_type="gat",
        agg_type="sum",
        pool_dim=0,
    ):
        if agg_type in ["sum", "mlp"]:
            super(GeoLayer, self).__init__("add")
        elif agg_type in ["mean", "max"]:
            super(GeoLayer, self).__init__(agg_type)
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.heads = heads
        self.concat = concat
        self.negative_slope = negative_slope
        self.dropout = dropout
        self.att_type = att_type
        self.agg_type = agg_type

        # GCN weight
        self.gcn_weight = None

        self.weight = Parameter(torch.Tensor(in_channels, heads * out_channels))
        self.att = Parameter(torch.Tensor(1, heads, 2 * out_channels))

        if bias and concat:
            self.bias = Parameter(torch.Tensor(heads * out_channels))
        elif bias and not concat:
            self.bias = Parameter(torch.Tensor(out_channels))
        else:
            self.register_parameter("bias", None)

        if self.att_type in ["generalized_linear"]:
            self.general_att_layer = torch.nn.Linear(out_channels, 1, bias=False)

        if self.agg_type in ["mean", "max", "mlp"]:
            if pool_dim <= 0:
                pool_dim = 128
        self.pool_dim = pool_dim
        if pool_dim != 0:
            self.pool_layer = torch.nn.ModuleList()
            self.pool_layer.append(torch.nn.Linear(self.out_channels, self.pool_dim))
            self.pool_layer.append(torch.nn.Linear(self.pool_dim, self.out_channels))
        else:
            pass
        self.reset_parameters()

    @staticmethod
    def norm(edge_index, num_nodes, edge_weight, improved=False, dtype=None):
        if edge_weight is None:
            edge_weight = torch.ones(
                (edge_index.size(1),), dtype=dtype, device=edge_index.device
            )

        fill_value = 1 if not improved else 2
        edge_index, edge_weight = add_remaining_self_loops(
            edge_index, edge_weight, fill_value, num_nodes
        )

        row, col = edge_index
        deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
        deg_inv_sqrt = deg.pow(-0.5)
        deg_inv_sqrt[deg_inv_sqrt == float("inf")] = 0

        return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]

    def reset_parameters(self):
        glorot(self.weight)
        glorot(self.att)
        zeros(self.bias)

        if self.att_type in ["generalized_linear"]:
            glorot(self.general_att_layer.weight)

        if self.pool_dim != 0:
            for layer in self.pool_layer:
                glorot(layer.weight)
                zeros(layer.bias)

    def forward(self, x, edge_index):
        """"""
        edge_index, _ = remove_self_loops(edge_index)
        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))
        # prepare
        x = torch.mm(x, self.weight).view(-1, self.heads, self.out_channels)
        return self.propagate(edge_index, x=x, num_nodes=x.size(0))

    def message(self, x_i, x_j, edge_index, num_nodes):

        if self.att_type == "const":
            if self.training and self.dropout > 0:
                x_j = F.dropout(x_j, p=self.dropout, training=True)
            neighbor = x_j
        elif self.att_type == "gcn":
            if self.gcn_weight is None or self.gcn_weight.size(0) != x_j.size(
                0
            ):  # 对于不同的图gcn_weight需要重新计算
                _, norm = self.norm(edge_index, num_nodes, None)
                self.gcn_weight = norm
            neighbor = self.gcn_weight.view(-1, 1, 1) * x_j
        else:
            # Compute attention coefficients.
            alpha = self.apply_attention(edge_index, num_nodes, x_i, x_j)
            alpha = softmax(alpha, edge_index[0], num_nodes=num_nodes)
            # Sample attention coefficients stochastically.
            if self.training and self.dropout > 0:
                alpha = F.dropout(alpha, p=self.dropout, training=True)

            neighbor = x_j * alpha.view(-1, self.heads, 1)
        if self.pool_dim > 0:
            for layer in self.pool_layer:
                neighbor = layer(neighbor)
        return neighbor

    def apply_attention(self, edge_index, num_nodes, x_i, x_j):
        if self.att_type == "gat":
            alpha = (torch.cat([x_i, x_j], dim=-1) * self.att).sum(dim=-1)
            alpha = F.leaky_relu(alpha, self.negative_slope)

        elif self.att_type == "gat_sym":
            wl = self.att[:, :, : self.out_channels]  # weight left
            wr = self.att[:, :, self.out_channels :]  # weight right
            alpha = (x_i * wl).sum(dim=-1) + (x_j * wr).sum(dim=-1)
            alpha_2 = (x_j * wl).sum(dim=-1) + (x_i * wr).sum(dim=-1)
            alpha = F.leaky_relu(alpha, self.negative_slope) + F.leaky_relu(
                alpha_2, self.negative_slope
            )

        elif self.att_type == "linear":
            wl = self.att[:, :, : self.out_channels]  # weight left
            wr = self.att[:, :, self.out_channels :]  # weight right
            al = x_j * wl
            ar = x_j * wr
            alpha = al.sum(dim=-1) + ar.sum(dim=-1)
            alpha = torch.tanh(alpha)
        elif self.att_type == "cos":
            wl = self.att[:, :, : self.out_channels]  # weight left
            wr = self.att[:, :, self.out_channels :]  # weight right
            alpha = x_i * wl * x_j * wr
            alpha = alpha.sum(dim=-1)

        elif self.att_type == "generalized_linear":
            wl = self.att[:, :, : self.out_channels]  # weight left
            wr = self.att[:, :, self.out_channels :]  # weight right
            al = x_i * wl
            ar = x_j * wr
            alpha = al + ar
            alpha = torch.tanh(alpha)
            alpha = self.general_att_layer(alpha)
        else:
            raise Exception("Wrong attention type:", self.att_type)
        return alpha

    def update(self, aggr_out):
        if self.concat is True:
            aggr_out = aggr_out.view(-1, self.heads * self.out_channels)
        else:
            aggr_out = aggr_out.mean(dim=1)

        if self.bias is not None:
            aggr_out = aggr_out + self.bias
        return aggr_out

    def __repr__(self):
        return "{}({}, {}, heads={})".format(
            self.__class__.__name__, self.in_channels, self.out_channels, self.heads
        )

    def get_param_dict(self):
        params = {}
        key = f"{self.att_type}_{self.agg_type}_{self.in_channels}_{self.out_channels}_{self.heads}"
        weight_key = key + "_weight"
        att_key = key + "_att"
        agg_key = key + "_agg"
        bais_key = key + "_bais"

        params[weight_key] = self.weight
        params[att_key] = self.att
        params[bais_key] = self.bias
        if hasattr(self, "pool_layer"):
            params[agg_key] = self.pool_layer.state_dict()

        return params

    def load_param(self, params):
        key = f"{self.att_type}_{self.agg_type}_{self.in_channels}_{self.out_channels}_{self.heads}"
        weight_key = key + "_weight"
        att_key = key + "_att"
        agg_key = key + "_agg"
        bais_key = key + "_bais"

        if weight_key in params:
            self.weight = params[weight_key]

        if att_key in params:
            self.att = params[att_key]

        if bais_key in params:
            self.bias = params[bais_key]

        if agg_key in params and hasattr(self, "pool_layer"):
            self.pool_layer.load_state_dict(params[agg_key])


@register_nas_space("graphnasmacro")
 class GraphNasMacroNodeClassificationSpace(BaseSpace):
    def __init__(
        self,
        hidden_dim: _typ.Optional[int] = 64,
        layer_number: _typ.Optional[int] = 2,
        dropout: _typ.Optional[float] = 0.6,
        input_dim: _typ.Optional[int] = None,
        output_dim: _typ.Optional[int] = None,
        ops: _typ.Tuple = None,
        search_act_con=False,
    ):
        super().__init__()
        self.layer_number = layer_number
        self.hidden_dim = hidden_dim
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.ops = ops
        self.dropout = dropout
        self.search_act_con = search_act_con

    def instantiate(
        self,
        hidden_dim: _typ.Optional[int] = None,
        layer_number: _typ.Optional[int] = None,
        input_dim: _typ.Optional[int] = None,
        output_dim: _typ.Optional[int] = None,
        ops: _typ.Tuple = None,
        dropout=None,
    ):
        super().instantiate()
        self.hidden_dim = hidden_dim or self.hidden_dim
        self.layer_number = layer_number or self.layer_number
        self.input_dim = input_dim or self.input_dim
        self.output_dim = output_dim or self.output_dim
        self.ops = ops or self.ops
        self.dropout = dropout or self.dropout

        num_feat = self.input_dim
        num_label = self.output_dim

        layer_nums = self.layer_number
        state_num = 5

        # build hidden layer
        for i in range(layer_nums):
            # extract layer information
            setattr(
                self,
                f"attention_{i}",
                self.setLayerChoice(
                    i * state_num + 0,
                    map_nn(
                        [
                            "gat",
                            "gcn",
                            "cos",
                            "const",
                            "gat_sym",
                            "linear",
                            "generalized_linear",
                        ]
                    ),
                    key=f"attention_{i}",
                ),
            )
            setattr(
                self,
                f"aggregator_{i}",
                self.setLayerChoice(
                    i * state_num + 1,
                    map_nn(
                        [
                            "sum",
                            "mean",
                            "max",
                            "mlp",
                        ]
                    ),
                    key=f"aggregator_{i}",
                ),
            )
            setattr(
                self,
                f"act_{i}",
                self.setLayerChoice(
                    i * state_num + 0,
                    map_nn(
                        [
                            "sigmoid",
                            "tanh",
                            "relu",
                            "linear",
                            "softplus",
                            "leaky_relu",
                            "relu6",
                            "elu",
                        ]
                    ),
                    key=f"act_{i}",
                ),
            )
            setattr(
                self,
                f"head_{i}",
                self.setLayerChoice(
                    i * state_num + 0, map_nn([1, 2, 4, 6, 8, 16]), key=f"head_{i}"
                ),
            )
            if i < layer_nums - 1:
                setattr(
                    self,
                    f"out_channels_{i}",
                    self.setLayerChoice(
                        i * state_num + 0,
                        map_nn([4, 8, 16, 32, 64, 128, 256]),
                        key=f"out_channels_{i}",
                    ),
                )

    def parse_model(self, selection, device) -> BaseModel:
        sel_list = []
        for i in range(self.layer_number):
            sel_list.append(
                [
                    "gat",
                    "gcn",
                    "cos",
                    "const",
                    "gat_sym",
                    "linear",
                    "generalized_linear",
                ][selection[f"attention_{i}"]]
            )
            sel_list.append(
                [
                    "sum",
                    "mean",
                    "max",
                    "mlp",
                ][selection[f"aggregator_{i}"]]
            )
            sel_list.append(
                [
                    "sigmoid",
                    "tanh",
                    "relu",
                    "linear",
                    "softplus",
                    "leaky_relu",
                    "relu6",
                    "elu",
                ][selection[f"act_{i}"]]
            )
            sel_list.append([1, 2, 4, 6, 8, 16][selection[f"head_{i}"]])
            if i < self.layer_number - 1:
                sel_list.append(
                    [4, 8, 16, 32, 64, 128, 256][selection[f"out_channels_{i}"]]
                )
        sel_list.append(self.output_dim)
        # sel_list = ['const', 'sum', 'relu6', 2, 128, 'gat', 'sum', 'linear', 2, 7]
        model = GraphNet(
            sel_list,
            self.input_dim,
            self.output_dim,
            self.dropout,
            multi_label=False,
            batch_normal=False,
            layers=self.layer_number,
        ).wrap(device)
        return model


 class GraphNet(BaseSpace):
    def __init__(
        self,
        actions,
        num_feat,
        num_label,
        drop_out=0.6,
        multi_label=False,
        batch_normal=True,
        state_num=5,
        residual=False,
        layers=2,
    ):
        self.residual = residual
        self.batch_normal = batch_normal
        self.layer_nums = layers
        self.multi_label = multi_label
        self.num_feat = num_feat
        self.num_label = num_label
        self.input_dim = num_feat
        self.output_dim = num_label
        self.dropout = drop_out

        super().__init__()
        self.build_model(
            actions, batch_normal, drop_out, num_feat, num_label, state_num
        )

    def build_model(
        self, actions, batch_normal, drop_out, num_feat, num_label, state_num
    ):
        if self.residual:
            self.fcs = torch.nn.ModuleList()
        if self.batch_normal:
            self.bns = torch.nn.ModuleList()
        self.layers = torch.nn.ModuleList()
        self.acts = []
        self.gates = torch.nn.ModuleList()
        self.build_hidden_layers(
            actions,
            batch_normal,
            drop_out,
            self.layer_nums,
            num_feat,
            num_label,
            state_num,
        )

    def build_hidden_layers(
        self,
        actions,
        batch_normal,
        drop_out,
        layer_nums,
        num_feat,
        num_label,
        state_num=6,
    ):

        # build hidden layer
        for i in range(layer_nums):

            if i == 0:
                in_channels = num_feat
            else:
                in_channels = out_channels * head_num

            # extract layer information
            attention_type = actions[i * state_num + 0]
            aggregator_type = actions[i * state_num + 1]
            act = actions[i * state_num + 2]
            head_num = actions[i * state_num + 3]
            out_channels = actions[i * state_num + 4]
            concat = True
            if i == layer_nums - 1:
                concat = False
            if self.batch_normal:
                self.bns.append(torch.nn.BatchNorm1d(in_channels, momentum=0.5))
            self.layers.append(
                GeoLayer(
                    in_channels,
                    out_channels,
                    head_num,
                    concat,
                    dropout=self.dropout,
                    att_type=attention_type,
                    agg_type=aggregator_type,
                )
            )
            self.acts.append(act_map(act))
            if self.residual:
                if concat:
                    self.fcs.append(
                        torch.nn.Linear(in_channels, out_channels * head_num)
                    )
                else:
                    self.fcs.append(torch.nn.Linear(in_channels, out_channels))

    def forward(self, data):
        output, edge_index_all = data.x, data.edge_index  # x [2708,1433] ,[2, 10556]
        if self.residual:
            for i, (act, layer, fc) in enumerate(zip(self.acts, self.layers, self.fcs)):
                output = F.dropout(output, p=self.dropout, training=self.training)
                if self.batch_normal:
                    output = self.bns[i](output)

                output = act(layer(output, edge_index_all) + fc(output))
        else:
            for i, (act, layer) in enumerate(zip(self.acts, self.layers)):
                output = F.dropout(output, p=self.dropout, training=self.training)
                if self.batch_normal:
                    output = self.bns[i](output)
                output = act(layer(output, edge_index_all))
        if not self.multi_label:
            output = F.log_softmax(output, dim=1)
        return output

    def __repr__(self):
        result_lines = ""
        for each in self.layers:
            result_lines += str(each)
        return result_lines

    @staticmethod
    def merge_param(old_param, new_param, update_all):
        for key in new_param:
            if update_all or key not in old_param:
                old_param[key] = new_param[key]
        return old_param

    def get_param_dict(self, old_param=None, update_all=True):
        if old_param is None:
            result = {}
        else:
            result = old_param
        for i in range(self.layer_nums):
            key = "layer_%d" % i
            new_param = self.layers[i].get_param_dict()
            if key in result:
                new_param = self.merge_param(result[key], new_param, update_all)
                result[key] = new_param
            else:
                result[key] = new_param
        if self.residual:
            for i, fc in enumerate(self.fcs):
                key = f"layer_{i}_fc_{fc.weight.size(0)}_{fc.weight.size(1)}"
                result[key] = self.fcs[i]
        if self.batch_normal:
            for i, bn in enumerate(self.bns):
                key = f"layer_{i}_fc_{bn.weight.size(0)}"
                result[key] = self.bns[i]
        return result

    def load_param(self, param):
        if param is None:
            return

        for i in range(self.layer_nums):
            self.layers[i].load_param(param["layer_%d" % i])

        if self.residual:
            for i, fc in enumerate(self.fcs):
                key = f"layer_{i}_fc_{fc.weight.size(0)}_{fc.weight.size(1)}"
                if key in param:
                    self.fcs[i] = param[key]
        if self.batch_normal:
            for i, bn in enumerate(self.bns):
                key = f"layer_{i}_fc_{bn.weight.size(0)}"
                if key in param:
                    self.bns[i] = param[key]
--- a/autogl/module/nas/space/operation.py
+++ b/autogl/module/nas/space/operation.py
@@ -0,0 +1,126 @@
 # codes in this file are reproduced from https://github.com/GraphNAS/GraphNAS with some changes.

 from torch_geometric.nn import (
    GATConv,
    GCNConv,
    ChebConv,
    SAGEConv,
    GatedGraphConv,
    ARMAConv,
    SGConv,
 )
 import torch_geometric.nn
 import torch
 from torch import nn
 import torch.nn.functional as F


 class LinearConv(nn.Module):
    def __init__(self, in_channels, out_channels, bias=True):
        super(LinearConv, self).__init__()

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.linear = torch.nn.Linear(in_channels, out_channels, bias)

    def forward(self, x, edge_index, edge_weight=None):
        return self.linear(x)

    def __repr__(self):
        return "{}({}, {})".format(
            self.__class__.__name__, self.in_channels, self.out_channels
        )


 class ZeroConv(nn.Module):
    def forward(self, x, edge_index, edge_weight=None):
        out = torch.zeros_like(x)
        out.requires_grad = True
        return out

    def __repr__(self):
        return "ZeroConv()"


 class Identity(nn.Module):
    def forward(self, x, edge_index, edge_weight=None):
        return x

    def __repr__(self):
        return "Identity()"


 def act_map(act):
    if act == "linear":
        return lambda x: x
    elif act == "elu":
        return F.elu
    elif act == "sigmoid":
        return torch.sigmoid
    elif act == "tanh":
        return torch.tanh
    elif act == "relu":
        return torch.nn.functional.relu
    elif act == "relu6":
        return torch.nn.functional.relu6
    elif act == "softplus":
        return torch.nn.functional.softplus
    elif act == "leaky_relu":
        return torch.nn.functional.leaky_relu
    else:
        raise Exception("wrong activate function")


 def gnn_map(gnn_name, in_dim, out_dim, concat=False, bias=True) -> nn.Module:
    """

    :param gnn_name:
    :param in_dim:
    :param out_dim:
    :param concat: for gat, concat multi-head output or not
    :return: GNN model
    """
    if gnn_name == "gat_8":
        return GATConv(in_dim, out_dim, 8, concat=concat, bias=bias)
    elif gnn_name == "gat_6":
        return GATConv(in_dim, out_dim, 6, concat=concat, bias=bias)
    elif gnn_name == "gat_4":
        return GATConv(in_dim, out_dim, 4, concat=concat, bias=bias)
    elif gnn_name == "gat_2":
        return GATConv(in_dim, out_dim, 2, concat=concat, bias=bias)
    elif gnn_name in ["gat_1", "gat"]:
        return GATConv(in_dim, out_dim, 1, concat=concat, bias=bias)
    elif gnn_name == "gcn":
        return GCNConv(in_dim, out_dim)
    elif gnn_name == "cheb":
        return ChebConv(in_dim, out_dim, K=2, bias=bias)
    elif gnn_name == "sage":
        return SAGEConv(in_dim, out_dim, bias=bias)
    elif gnn_name == "gated":
        return GatedGraphConv(in_dim, out_dim, bias=bias)
    elif gnn_name == "arma":
        return ARMAConv(in_dim, out_dim, bias=bias)
    elif gnn_name == "sg":
        return SGConv(in_dim, out_dim, bias=bias)
    elif gnn_name == "linear":
        return LinearConv(in_dim, out_dim, bias=bias)
    elif gnn_name == "zero":
        return ZeroConv()
    elif gnn_name == "identity":
        return Identity()
    elif hasattr(torch_geometric.nn, gnn_name):
        cls = getattr(torch_geometric.nn, gnn_name)
        assert isinstance(cls, type), "Only support modules, get %s" % (gnn_name)
        kwargs = {
            "in_channels": in_dim,
            "out_channels": out_dim,
            "concat": concat,
            "bias": bias,
        }
        kwargs = {
            key: kwargs[key]
            for key in cls.__init__.__code__.co_varnames
            if key in kwargs
        }
        return cls(**kwargs)
    raise KeyError("Cannot parse key %s" % (gnn_name))
--- a/autogl/module/nas/space/single_path.py
+++ b/autogl/module/nas/space/single_path.py
@@ -0,0 +1,89 @@
 from autogl.module.nas.space.operation import gnn_map
 import typing as _typ
 import torch

 import torch.nn.functional as F

 from . import register_nas_space
 from .base import apply_fixed_architecture
 from .base import BaseSpace
 from ...model import BaseModel
 from ....utils import get_logger

 from ...model import AutoGCN


@register_nas_space("singlepath")
 class SinglePathNodeClassificationSpace(BaseSpace):
    def __init__(
        self,
        hidden_dim: _typ.Optional[int] = 64,
        layer_number: _typ.Optional[int] = 2,
        dropout: _typ.Optional[float] = 0.2,
        input_dim: _typ.Optional[int] = None,
        output_dim: _typ.Optional[int] = None,
        ops: _typ.Tuple = ["GCNConv", "GATConv"],
    ):
        super().__init__()
        self.layer_number = layer_number
        self.hidden_dim = hidden_dim
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.ops = ops
        self.dropout = dropout

    def instantiate(
        self,
        hidden_dim: _typ.Optional[int] = None,
        layer_number: _typ.Optional[int] = None,
        input_dim: _typ.Optional[int] = None,
        output_dim: _typ.Optional[int] = None,
        ops: _typ.Tuple = None,
        dropout=None,
    ):
        super().instantiate()
        self.hidden_dim = hidden_dim or self.hidden_dim
        self.layer_number = layer_number or self.layer_number
        self.input_dim = input_dim or self.input_dim
        self.output_dim = output_dim or self.output_dim
        self.ops = ops or self.ops
        self.dropout = dropout or self.dropout
        for layer in range(self.layer_number):
            setattr(
                self,
                f"op_{layer}",
                self.setLayerChoice(
                    layer,
                    [
                        op(
                            self.input_dim if layer == 0 else self.hidden_dim,
                            self.output_dim
                            if layer == self.layer_number - 1
                            else self.hidden_dim,
                        )
                        if isinstance(op, type)
                        else gnn_map(
                            op,
                            self.input_dim if layer == 0 else self.hidden_dim,
                            self.output_dim
                            if layer == self.layer_number - 1
                            else self.hidden_dim,
                        )
                        for op in self.ops
                    ],
                ),
            )
        self._initialized = True

    def forward(self, data):
        x, edges = data.x, data.edge_index
        for layer in range(self.layer_number):
            x = getattr(self, f"op_{layer}")(x, edges)
            if layer != self.layer_number - 1:
                x = F.leaky_relu(x)
                x = F.dropout(x, p=self.dropout, training=self.training)
        return F.log_softmax(x, dim=1)

    def parse_model(self, selection, device) -> BaseModel:
        # return AutoGCN(self.input_dim, self.output_dim, device)
        return self.wrap(device).fix(selection)
--- a/autogl/module/nas/utils.py
+++ b/autogl/module/nas/utils.py
@@ -0,0 +1,205 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.

 import logging
 from collections import OrderedDict

 import numpy as np
 import torch
 import nni.retiarii.nn.pytorch as nn
 from nni.nas.pytorch.mutables import Mutable, InputChoice, LayerChoice

 _logger = logging.getLogger(__name__)


 def to_device(obj, device):
    """
    Move a tensor, tuple, list, or dict onto device.
    """
    if torch.is_tensor(obj):
        return obj.to(device)
    if isinstance(obj, tuple):
        return tuple(to_device(t, device) for t in obj)
    if isinstance(obj, list):
        return [to_device(t, device) for t in obj]
    if isinstance(obj, dict):
        return {k: to_device(v, device) for k, v in obj.items()}
    if isinstance(obj, (int, float, str)):
        return obj
    raise ValueError("'%s' has unsupported type '%s'" % (obj, type(obj)))


 def to_list(arr):
    if torch.is_tensor(arr):
        return arr.cpu().numpy().tolist()
    if isinstance(arr, np.ndarray):
        return arr.tolist()
    if isinstance(arr, (list, tuple)):
        return list(arr)
    return arr


 class AverageMeterGroup:
    """
    Average meter group for multiple average meters.
    """

    def __init__(self):
        self.meters = OrderedDict()

    def update(self, data):
        """
        Update the meter group with a dict of metrics.
        Non-exist average meters will be automatically created.
        """
        for k, v in data.items():
            if k not in self.meters:
                self.meters[k] = AverageMeter(k, ":4f")
            self.meters[k].update(v)

    def __getattr__(self, item):
        return self.meters[item]

    def __getitem__(self, item):
        return self.meters[item]

    def __str__(self):
        return "  ".join(str(v) for v in self.meters.values())

    def summary(self):
        """
        Return a summary string of group data.
        """
        return "  ".join(v.summary() for v in self.meters.values())


 class AverageMeter:
    """
    Computes and stores the average and current value.

    Parameters
    ----------
    name : str
        Name to display.
    fmt : str
        Format string to print the values.
    """

    def __init__(self, name, fmt=":f"):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        """
        Reset the meter.
        """
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        """
        Update with value and weight.

        Parameters
        ----------
        val : float or int
            The new value to be accounted in.
        n : int
            The weight of the new value.
        """
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = "{name} {val" + self.fmt + "} ({avg" + self.fmt + "})"
        return fmtstr.format(**self.__dict__)

    def summary(self):
        fmtstr = "{name}: {avg" + self.fmt + "}"
        return fmtstr.format(**self.__dict__)


 def get_module_order(root_module):
    key2order = {}

    def apply(m):
        for name, child in m.named_children():
            if isinstance(child, Mutable):
                key2order[child.key] = child.order
            else:
                apply(child)

    apply(root_module)
    return key2order


 def sort_replaced_module(k2o, modules):
    modules = sorted(modules, key=lambda x: k2o[x[0]])
    return modules


 def _replace_module_with_type(root_module, init_fn, type_name, modules):
    if modules is None:
        modules = []

    def apply(m):
        for name, child in m.named_children():
            if isinstance(child, type_name):
                setattr(m, name, init_fn(child))
                modules.append((child.key, getattr(m, name)))
            else:
                apply(child)

    apply(root_module)
    return modules


 def replace_layer_choice(root_module, init_fn, modules=None):
    """
    Replace layer choice modules with modules that are initiated with init_fn.

    Parameters
    ----------
    root_module : nn.Module
        Root module to traverse.
    init_fn : Callable
        Initializing function.
    modules : dict, optional
        Update the replaced modules into the dict and check duplicate if provided.

    Returns
    -------
    List[Tuple[str, nn.Module]]
        A list from layer choice keys (names) and replaced modules.
    """
    return _replace_module_with_type(
        root_module, init_fn, (LayerChoice, nn.LayerChoice), modules
    )


 def replace_input_choice(root_module, init_fn, modules=None):
    """
    Replace input choice modules with modules that are initiated with init_fn.

    Parameters
    ----------
    root_module : nn.Module
        Root module to traverse.
    init_fn : Callable
        Initializing function.
    modules : dict, optional
        Update the replaced modules into the dict and check duplicate if provided.

    Returns
    -------
    List[Tuple[str, nn.Module]]
        A list from layer choice keys (names) and replaced modules.
    """
    return _replace_module_with_type(
        root_module, init_fn, (InputChoice, nn.InputChoice), modules
    )
--- a/autogl/module/train/init.py
+++ b/autogl/module/train/init.py
@@ -1,8 +1,11 @@
 import importlib
 import os
 from .base import BaseTrainer, Evaluation, EarlyStopping

 TRAINER_DICT = {}
 from .base import (
    BaseTrainer,
    Evaluation,
    BaseNodeClassificationTrainer,
    BaseGraphClassificationTrainer,
    BaseLinkPredictionTrainer,
 )


 def register_trainer(name):
@@ -19,43 +22,28 @@ def register_trainer(name):
    return register_trainer_cls


 EVALUATE_DICT = {}


 def register_evaluate(*name):
    def register_evaluate_cls(cls):
        for n in name:
            if n in EVALUATE_DICT:
                raise ValueError("Cannot register duplicate evaluator ({})".format(n))
            if not issubclass(cls, Evaluation):
                raise ValueError(
                    "Evaluator ({}: {}) must extend Evaluation".format(n, cls.__name__)
                )
            EVALUATE_DICT[n] = cls
        return cls

    return register_evaluate_cls

 def get_feval(feval):
    if isinstance(feval, str):
        return EVALUATE_DICT[feval]
    if isinstance(feval, type) and issubclass(feval, Evaluation):
        return feval
    if isinstance(feval, list):
        return [get_feval(f) for f in feval]
    raise ValueError("feval argument of type", type(feval), "is not supported!")


 from .graph_classification import GraphClassificationTrainer
 from .node_classification import NodeClassificationTrainer
 from .evaluate import Acc, Auc, Logloss
 from .graph_classification_full import GraphClassificationFullTrainer
 from .node_classification_full import NodeClassificationFullTrainer
 from .link_prediction import LinkPredictionTrainer
 from .node_classification_trainer import *
 from .evaluation import get_feval, Acc, Auc, Logloss, Mrr, MicroF1

 __all__ = [
    "BaseTrainer",
    "GraphClassificationTrainer",
    "NodeClassificationTrainer",
    "Evaluation",
    "BaseGraphClassificationTrainer",
    "BaseNodeClassificationTrainer",
    "BaseLinkPredictionTrainer",
    "GraphClassificationFullTrainer",
    "NodeClassificationFullTrainer",
    "NodeClassificationGraphSAINTTrainer",
    "NodeClassificationLayerDependentImportanceSamplingTrainer",
    "NodeClassificationNeighborSamplingTrainer",
    "LinkPredictionTrainer",
    "Acc",
    "Auc",
    "Logloss",
    "Mrr",
    "MicroF1",
    "get_feval",
 ]
--- a/autogl/module/train/base.py
+++ b/autogl/module/train/base.py
@@ -1,7 +1,10 @@
 import numpy as np
 from typing import Union, Iterable
 from ..model import BaseModel
 import typing as _typing

 import torch
 import pickle
 from ..model import BaseModel, ModelUniversalRegistry
 from .evaluation import Evaluation, get_feval, Acc
 from ...utils import get_logger

 LOGGER_ES = get_logger("early-stopping")
@@ -81,17 +84,13 @@ class EarlyStopping:
 class BaseTrainer:
    def __init__(
        self,
        model: Union[BaseModel, str],
        optimizer=None,
        lr=None,
        max_epoch=None,
        early_stopping_round=None,
        device=None,
        init=True,
        feval=["acc"],
        loss="nll_loss",
        *args,
        **kwargs,
        model: BaseModel,
        device: _typing.Union[torch.device, str],
        init: bool = True,
        feval: _typing.Union[
            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
        ] = (Acc,),
        loss: str = "nll_loss",
    ):
        """
        The basic trainer.
@@ -103,29 +102,65 @@ class BaseTrainer:
        model: `BaseModel` or `str`
            The (name of) model used to train and predict.

        optimizer: `Optimizer` of `str`
            The (name of) optimizer used to train and predict.

        lr: `float`
            The learning rate.

        max_epoch: `int`
            The max number of epochs in training.
        init: `bool`
            If True(False), the model will (not) be initialized.
        """
        super().__init__()
        self.model: BaseModel = model
        if type(device) == torch.device or (
            type(device) == str and device.lower() != "auto"
        ):
            self.__device: torch.device = torch.device(device)
        else:
            self.__device: torch.device = torch.device(
                "cuda"
                if torch.cuda.is_available() and torch.cuda.device_count() > 0
                else "cpu"
            )
        self.init: bool = init
        self.__feval: _typing.Sequence[_typing.Type[Evaluation]] = get_feval(feval)
        self.loss: str = loss

        early_stopping_round: `int`
            The round of early stop.
    @property
    def device(self) -> torch.device:
        return self.__device

    @device.setter
    def device(self, __device: _typing.Union[torch.device, str]):
        if type(__device) == torch.device or (
            type(__device) == str and __device.lower() != "auto"
        ):
            self.__device: torch.device = torch.device(__device)
        else:
            self.__device: torch.device = torch.device(
                "cuda"
                if torch.cuda.is_available() and torch.cuda.device_count() > 0
                else "cpu"
            )

        device: `torch.device` or `str`
            The device where model will be running on.
    @property
    def feval(self) -> _typing.Sequence[_typing.Type[Evaluation]]:
        return self.__feval

        init: `bool`
            If True(False), the model will (not) be initialized.
    @feval.setter
    def feval(
        self,
        _feval: _typing.Union[
            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
        ],
    ):
        self.__feval: _typing.Sequence[_typing.Type[Evaluation]] = get_feval(_feval)

        args: Other parameters.
    def to(self, device: torch.device):
        """
        Transfer the trainer to another device

        kwargs: Other parameters.
        Parameters
        ----------
        device: `str` or `torch.device`
            The device this trainer will use
        """
        super().__init__()
        self.device = torch.device(device)

    def initialize(self):
        """Initialize the auto model in trainer."""
@@ -137,7 +172,9 @@ class BaseTrainer:

    def get_feval(
        self, return_major: bool = False
    ) -> Union["Evaluation", Iterable["Evaluation"]]:
    ) -> _typing.Union[
        _typing.Type[Evaluation], _typing.Sequence[_typing.Type[Evaluation]]
    ]:
        """
        Parameters
        ----------
@@ -151,17 +188,12 @@ class BaseTrainer:
            Otherwise, will return the ``evaluation`` element passed when constructing.
        """
        if return_major:
            if isinstance(self.feval, list):
            if isinstance(self.feval, _typing.Sequence):
                return self.feval[0]
            else:
                return self.feval
        return self.feval

    @classmethod
    def get_task_name(cls):
        """Get task name, e.g., `base`, `NodeClassification`, `GraphClassification`, etc."""
        return "base"

    @classmethod
    def save(cls, instance, path):
        with open(path, "wb") as output:
@@ -169,8 +201,8 @@ class BaseTrainer:

    @classmethod
    def load(cls, path):
        with open(path, "rb") as input:
            instance = pickle.load(input)
        with open(path, "rb") as inputs:
            instance = pickle.load(inputs)
            return instance

    @property
@@ -184,7 +216,7 @@ class BaseTrainer:
        pass

    def duplicate_from_hyper_parameter(
        self, hp, model: Union[BaseModel, str, None] = None
        self, hp, model: _typing.Optional[BaseModel] = ...
    ) -> "BaseTrainer":
        """Create a new trainer with the given hyper parameter."""
        raise NotImplementedError()
@@ -255,9 +287,8 @@ class BaseTrainer:
        """Get the validation score."""
        raise NotImplementedError()

    def get_name_with_hp(self):
        """Get the name of hyperparameter."""
        raise NotImplementedError()
    def __repr__(self) -> str:
        raise NotImplementedError

    def evaluate(self, dataset, mask=None, feval=None):
        """
@@ -275,32 +306,117 @@ class BaseTrainer:
        -------
        The evaluation result.
        """
        raise NotImplementedError()

    def set_feval(self, feval):
        """Set the evaluation metrics."""
        raise NotImplementedError()
        raise NotImplementedError


 # a static class for evaluating results
 class Evaluation:
    @staticmethod
    def get_eval_name():
    def update_parameters(self, **kwargs):
        """
        Should return the name of this evaluation method
        Update parameters of this trainer
        """
        raise NotImplementedError()
        for k, v in kwargs.items():
            if k == "feval":
                self.feval = get_feval(v)
            elif k == "device":
                self.to(v)
            elif hasattr(self, k):
                setattr(self, k, v)
            else:
                raise KeyError("Cannot set parameter", k, "for trainer", self.__class__)

    @staticmethod
    def is_higher_better():
        """
        Should return whether this evaluation method is higher better (bool)
        """
        raise True

    @staticmethod
    def evaluate(predict, label):
        """
        Should return: the evaluation result (float)
        """
        raise NotImplementedError()
 class _BaseClassificationTrainer(BaseTrainer):
    """ Base class of trainer for classification tasks """

    def __init__(
        self,
        model: _typing.Union[BaseModel, str],
        num_features: int,
        num_classes: int,
        device: _typing.Union[torch.device, str, None] = "auto",
        init: bool = True,
        feval: _typing.Union[
            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
        ] = (Acc,),
        loss: str = "nll_loss",
    ):
        self.num_features: int = num_features
        self.num_classes: int = num_classes
        if type(device) == torch.device or (
            type(device) == str and device.lower() != "auto"
        ):
            __device: torch.device = torch.device(device)
        else:
            __device: torch.device = torch.device(
                "cuda"
                if torch.cuda.is_available() and torch.cuda.device_count() > 0
                else "cpu"
            )
        if type(model) == str:
            _model: BaseModel = ModelUniversalRegistry.get_model(model)(
                num_features, num_classes, __device, init=init
            )
        elif isinstance(model, BaseModel):
            _model: BaseModel = model
        elif model is None:
            _model = None
        else:
            raise TypeError(
                f"Model argument only support str or BaseModel, got {model}."
            )
        super(_BaseClassificationTrainer, self).__init__(
            _model, __device, init, feval, loss
        )


 class BaseNodeClassificationTrainer(_BaseClassificationTrainer):
    def __init__(
        self,
        model: _typing.Union[BaseModel, str],
        num_features: int,
        num_classes: int,
        device: _typing.Union[torch.device, str, None] = None,
        init: bool = True,
        feval: _typing.Union[
            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
        ] = (Acc,),
        loss: str = "nll_loss",
    ):
        super(BaseNodeClassificationTrainer, self).__init__(
            model, num_features, num_classes, device, init, feval, loss
        )


 class BaseGraphClassificationTrainer(_BaseClassificationTrainer):
    def __init__(
        self,
        model: _typing.Union[BaseModel, str],
        num_features: int,
        num_classes: int,
        num_graph_features: int = 0,
        device: _typing.Union[torch.device, str, None] = None,
        init: bool = True,
        feval: _typing.Union[
            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
        ] = (Acc,),
        loss: str = "nll_loss",
    ):
        self.num_graph_features: int = num_graph_features
        super(BaseGraphClassificationTrainer, self).__init__(
            model, num_features, num_classes, device, init, feval, loss
        )


 class BaseLinkPredictionTrainer(_BaseClassificationTrainer):
    def __init__(
        self,
        model: _typing.Union[BaseModel, str],
        num_features: int,
        device: _typing.Union[torch.device, str, None] = None,
        init: bool = True,
        feval: _typing.Union[
            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
        ] = (Acc,),
        loss: str = "nll_loss",
    ):
        super(BaseLinkPredictionTrainer, self).__init__(
            model, num_features, 2, device, init, feval, loss
        )
--- a/autogl/module/train/evaluate.py
+++ b/autogl/module/train/evaluate.py
@@ -1,94 +0,0 @@
 import numpy as np
 from . import register_evaluate, Evaluation
 from sklearn.metrics import (
    log_loss,
    accuracy_score,
    roc_auc_score,
    label_ranking_average_precision_score,
 )


@register_evaluate("logloss")
 class Logloss(Evaluation):
    @staticmethod
    def get_eval_name():
        return "logloss"

    @staticmethod
    def is_higher_better():
        """
        Should return whether this evaluation method is higher better (bool)
        """
        return False

    @staticmethod
    def evaluate(predict, label):
        """
        Should return: the evaluation result (float)
        """
        return log_loss(label, predict)


@register_evaluate("auc", "ROC-AUC")
 class Auc(Evaluation):
    @staticmethod
    def get_eval_name():
        return "auc"

    @staticmethod
    def is_higher_better():
        """
        Should return whether this evaluation method is higher better (bool)
        """
        return True

    @staticmethod
    def evaluate(predict, label):
        """
        Should return: the evaluation result (float)
        """
        pos_predict = predict[:, 1]
        return roc_auc_score(label, pos_predict)


@register_evaluate("acc", "Accuracy")
 class Acc(Evaluation):
    @staticmethod
    def get_eval_name():
        return "acc"

    @staticmethod
    def is_higher_better():
        """
        Should return whether this evaluation method is higher better (bool)
        """
        return True

    @staticmethod
    def evaluate(predict, label):
        """
        Should return: the evaluation result (float)
        """
        return accuracy_score(label, np.argmax(predict, axis=1))


@register_evaluate("mrr")
 class Mrr(Evaluation):
    @staticmethod
    def get_eval_name():
        return "mrr"

    @staticmethod
    def is_higher_better():
        """
        Should return whether this evaluation method is higher better (bool)
        """
        return True

    @staticmethod
    def evaluate(predict, label):
        """
        Should return: the evaluation result (float)
        """
        pos_predict = predict[:, 1]
        return label_ranking_average_precision_score(label, pos_predict)
--- a/autogl/module/train/evaluation.py
+++ b/autogl/module/train/evaluation.py
@@ -0,0 +1,281 @@
 import numpy as np
 import typing as _typing
 from sklearn.metrics import (
    f1_score,
    log_loss,
    accuracy_score,
    roc_auc_score,
    label_ranking_average_precision_score,
 )


 class Evaluation:
    @staticmethod
    def get_eval_name() -> str:
        """ Expected to return the name of this evaluation method """
        raise NotImplementedError

    @staticmethod
    def is_higher_better() -> bool:
        """ Expected to return whether this evaluation method is higher better (bool) """
        raise NotImplementedError

    @staticmethod
    def evaluate(predict, label) -> float:
        """ Expected to return the evaluation result (float) """
        raise NotImplementedError


 class EvaluatorUtility:
    """ Auxiliary utilities for evaluation """

    class PredictionBatchCumulativeBuilder:
        """
        Batch-cumulative builder for prediction
        For large graph, as it is infeasible to predict all the nodes
        in validation set and test set in single batch,
        and layer-wise prediction mechanism is a practical evaluation approach,
        a batch-cumulative prediction collector `PredictionBatchCumulativeBuilder`
        is implemented for prediction in mini-batch manner.
        """

        def __init__(self):
            self.__indexes_in_integral_data: _typing.Optional[np.ndarray] = None
            self.__prediction: _typing.Optional[np.ndarray] = None

        def clear_batches(
            self, *__args, **__kwargs
        ) -> "EvaluatorUtility.PredictionBatchCumulativeBuilder":
            self.__indexes_in_integral_data = None
            self.__prediction = None
            return self

        def add_batch(
            self, indexes_in_integral_data: np.ndarray, batch_prediction: np.ndarray
        ) -> "EvaluatorUtility.PredictionBatchCumulativeBuilder":
            if not (
                isinstance(indexes_in_integral_data, np.ndarray)
                and isinstance(batch_prediction, np.ndarray)
                and len(indexes_in_integral_data.shape) == 1
            ):
                raise TypeError
            elif indexes_in_integral_data.shape[0] != batch_prediction.shape[0]:
                raise ValueError

            if self.__indexes_in_integral_data is None:
                if (
                    indexes_in_integral_data.shape
                    != np.unique(indexes_in_integral_data).shape
                ):
                    raise ValueError(
                        f"There exists duplicate index "
                        f"in the argument indexes_in_integral_data {indexes_in_integral_data}"
                    )
                else:
                    self.__indexes_in_integral_data: np.ndarray = np.unique(
                        indexes_in_integral_data
                    )
            else:
                __indexes_in_integral_data = np.concatenate(
                    (self.__indexes_in_integral_data, indexes_in_integral_data)
                )
                if (
                    __indexes_in_integral_data.shape
                    != np.unique(__indexes_in_integral_data).shape
                ):
                    raise ValueError
                else:
                    self.__indexes_in_integral_data: np.ndarray = (
                        __indexes_in_integral_data
                    )

            if self.__prediction is None:
                self.__prediction: np.ndarray = batch_prediction
            else:
                self.__prediction: np.ndarray = np.concatenate(
                    (self.__prediction, batch_prediction)
                )

            return self

        def compose(
            self, __sorted: bool = True, **__kwargs
        ) -> _typing.Tuple[np.ndarray, np.ndarray]:
            if __sorted:
                sorted_index = np.argsort(self.__indexes_in_integral_data)
                return (
                    self.__indexes_in_integral_data[sorted_index],
                    self.__prediction[sorted_index],
                )
            else:
                return self.__indexes_in_integral_data, self.__prediction


 EVALUATE_DICT: _typing.Dict[str, _typing.Type[Evaluation]] = {}


 def register_evaluate(*name):
    def register_evaluate_cls(cls):
        for n in name:
            if n in EVALUATE_DICT:
                raise ValueError("Cannot register duplicate evaluator ({})".format(n))
            if not issubclass(cls, Evaluation):
                raise ValueError(
                    "Evaluator ({}: {}) must extend Evaluation".format(n, cls.__name__)
                )
            EVALUATE_DICT[n] = cls
        return cls

    return register_evaluate_cls


 def get_feval(feval):
    if isinstance(feval, str):
        return EVALUATE_DICT[feval]
    if isinstance(feval, type) and issubclass(feval, Evaluation):
        return feval
    if isinstance(feval, _typing.Sequence):
        return [get_feval(f) for f in feval]
    raise ValueError("feval argument of type", type(feval), "is not supported!")


 class EvaluationUniversalRegistry:
    @classmethod
    def register_evaluation(
        cls, *names
    ) -> _typing.Callable[[_typing.Type[Evaluation]], _typing.Type[Evaluation]]:
        def _register_evaluation(
            _class: _typing.Type[Evaluation],
        ) -> _typing.Type[Evaluation]:
            for n in names:
                if n in EVALUATE_DICT:
                    raise ValueError(
                        "Cannot register duplicate evaluator ({})".format(n)
                    )
                if not issubclass(_class, Evaluation):
                    raise ValueError(
                        "Evaluator ({}: {}) must extend Evaluation".format(
                            n, cls.__name__
                        )
                    )
                EVALUATE_DICT[n] = _class
            return _class

        return _register_evaluation


@register_evaluate("logloss")
 class Logloss(Evaluation):
    @staticmethod
    def get_eval_name():
        return "logloss"

    @staticmethod
    def is_higher_better():
        """
        Should return whether this evaluation method is higher better (bool)
        """
        return False

    @staticmethod
    def evaluate(predict, label):
        """
        Should return: the evaluation result (float)
        """
        return log_loss(label, predict)


@register_evaluate("auc", "ROC-AUC")
 class Auc(Evaluation):
    @staticmethod
    def get_eval_name():
        return "auc"

    @staticmethod
    def is_higher_better():
        """
        Should return whether this evaluation method is higher better (bool)
        """
        return True

    @staticmethod
    def evaluate(predict, label):
        """
        Should return: the evaluation result (float)
        """
        if len(predict.shape) == 1:
            pos_predict = predict
        else:
            assert (
                predict.shape[1] == 2
            ), "Cannot use auc on given data with %d classes!" % (predict.shape[1])
            pos_predict = predict[:, 1]
        return roc_auc_score(label, pos_predict)


@register_evaluate("acc", "Accuracy")
 class Acc(Evaluation):
    @staticmethod
    def get_eval_name():
        return "acc"

    @staticmethod
    def is_higher_better():
        """
        Should return whether this evaluation method is higher better (bool)
        """
        return True

    @staticmethod
    def evaluate(predict, label):
        """
        Should return: the evaluation result (float)
        """
        if len(predict.shape) == 2:
            predict = np.argmax(predict, axis=1)
        else:
            predict = [1 if p > 0.5 else 0 for p in predict]
        return accuracy_score(label, predict)


@register_evaluate("mrr")
 class Mrr(Evaluation):
    @staticmethod
    def get_eval_name():
        return "mrr"

    @staticmethod
    def is_higher_better():
        """
        Should return whether this evaluation method is higher better (bool)
        """
        return True

    @staticmethod
    def evaluate(predict, label):
        """
        Should return: the evaluation result (float)
        """
        if len(predict.shape) == 2:
            assert (
                predict.shape[1] == 2
            ), "Cannot use mrr on given data with %d classes!" % (predict.shape[1])
            pos_predict = predict[:, 1]
        else:
            pos_predict = predict
        return label_ranking_average_precision_score(label, pos_predict)


@register_evaluate("MicroF1")
 class MicroF1(Evaluation):
    @staticmethod
    def get_eval_name() -> str:
        return "MicroF1"

    @staticmethod
    def is_higher_better() -> bool:
        return True

    @staticmethod
    def evaluate(predict, label) -> float:
        return f1_score(label, np.argmax(predict, axis=1), average="micro")
--- a/autogl/module/train/graph_classification_full.py
+++ b/autogl/module/train/graph_classification_full.py
@@ -1,29 +1,27 @@
 from . import register_trainer, BaseTrainer, Evaluation, EVALUATE_DICT, EarlyStopping
 from . import register_trainer
 from .base import BaseGraphClassificationTrainer, EarlyStopping, Evaluation
 import torch
 from torch.optim.lr_scheduler import StepLR
 from torch.optim.lr_scheduler import (
    StepLR,
    MultiStepLR,
    ExponentialLR,
    ReduceLROnPlateau,
 )
 import torch.nn.functional as F
 from ..model import MODEL_DICT, BaseModel
 from .evaluate import Logloss
 from ..model import BaseModel
 from .evaluation import get_feval, Logloss
 from typing import Union
 from ...datasets import utils
 from copy import deepcopy
 import torch.multiprocessing as mp

 from ...utils import get_logger

 LOGGER = get_logger('graph classification solver')
 LOGGER = get_logger("graph classification solver")

 def get_feval(feval):
    if isinstance(feval, str):
        return EVALUATE_DICT[feval]
    if isinstance(feval, type) and issubclass(feval, Evaluation):
        return feval
    if isinstance(feval, list):
        return [get_feval(f) for f in feval]
    raise ValueError("feval argument of type", type(feval), "is not supported!")


@register_trainer("GraphClassification")
 class GraphClassificationTrainer(BaseTrainer):
@register_trainer("GraphClassificationFull")
 class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
    """
    The graph classification trainer.

@@ -65,32 +63,29 @@ class GraphClassificationTrainer(BaseTrainer):
        lr=None,
        max_epoch=None,
        batch_size=None,
        num_workers=None,
        early_stopping_round=7,
        weight_decay=1e-4,
        device=None,
        device="auto",
        init=True,
        feval=[Logloss],
        loss="nll_loss",
        lr_scheduler_type=None,
        *args,
        **kwargs
    ):
        super(GraphClassificationTrainer, self).__init__(model)

        self.loss_type = loss

        # init model
        if isinstance(model, str):
            assert model in MODEL_DICT, "Cannot parse model name " + model
            self.model = MODEL_DICT[model](
                num_features,
                num_classes,
                device,
                init=init,
                num_graph_features=num_graph_features,
            )
        elif isinstance(model, BaseModel):
            self.model = model
        super().__init__(
            model,
            num_features,
            num_classes,
            num_graph_features=num_graph_features,
            device=device,
            init=init,
            feval=feval,
            loss=loss,
        )

        self.opt_received = optimizer
        if type(optimizer) == str and optimizer.lower() == "adam":
            self.optimizer = torch.optim.Adam
        elif type(optimizer) == str and optimizer.lower() == "sgd":
@@ -98,12 +93,14 @@ class GraphClassificationTrainer(BaseTrainer):
        else:
            self.optimizer = torch.optim.Adam

        self.num_features = num_features
        self.num_classes = num_classes
        self.num_graph_features = num_graph_features
        self.lr_scheduler_type = lr_scheduler_type

        self.lr = lr if lr is not None else 1e-4
        self.max_epoch = max_epoch if max_epoch is not None else 100
        self.batch_size = batch_size if batch_size is not None else 64
        self.num_workers = num_workers if num_workers is not None else 0
        if self.num_workers > 0:
            mp.set_start_method("fork", force=True)
        self.early_stopping_round = (
            early_stopping_round if early_stopping_round is not None else 100
        )
@@ -125,8 +122,6 @@ class GraphClassificationTrainer(BaseTrainer):
        self.valid_score = None

        self.initialized = False
        self.num_features = num_features
        self.num_classes = num_classes
        self.device = device

        self.space = [
@@ -166,8 +161,6 @@ class GraphClassificationTrainer(BaseTrainer):
                "scalingType": "LOG",
            },
        ]
        self.space += self.model.space
        GraphClassificationTrainer.space = self.space

        self.hyperparams = {
            "max_epoch": self.max_epoch,
@@ -176,7 +169,6 @@ class GraphClassificationTrainer(BaseTrainer):
            "lr": self.lr,
            "weight_decay": self.weight_decay,
        }
        self.hyperparams = {**self.hyperparams, **self.model.get_hyper_parameter()}

        if init is True:
            self.initialize()
@@ -197,9 +189,9 @@ class GraphClassificationTrainer(BaseTrainer):
        # """Get task name, i.e., `GraphClassification`."""
        return "GraphClassification"

    def to(self, new_device):
        assert isinstance(new_device, torch.device)
        self.device = new_device
    def to(self, device):
        assert isinstance(device, torch.device)
        self.device = device
        if self.model is not None:
            self.model.to(self.device)

@@ -219,9 +211,24 @@ class GraphClassificationTrainer(BaseTrainer):

        """
        optimizer = self.optimizer(
            self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
            self.model.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
        )
        scheduler = StepLR(optimizer, step_size=100, gamma=0.1)

        # scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
        lr_scheduler_type = self.lr_scheduler_type
        if type(lr_scheduler_type) == str and lr_scheduler_type == "steplr":
            scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
        elif type(lr_scheduler_type) == str and lr_scheduler_type == "multisteplr":
            scheduler = MultiStepLR(optimizer, milestones=[30, 80], gamma=0.1)
        elif type(lr_scheduler_type) == str and lr_scheduler_type == "exponentiallr":
            scheduler = ExponentialLR(optimizer, gamma=0.1)
        elif (
            type(lr_scheduler_type) == str and lr_scheduler_type == "reducelronplateau"
        ):
            scheduler = ReduceLROnPlateau(optimizer, "min")
        else:
            scheduler = None

        for epoch in range(1, self.max_epoch):
            self.model.model.train()
            loss_all = 0
@@ -230,29 +237,33 @@ class GraphClassificationTrainer(BaseTrainer):
                optimizer.zero_grad()
                output = self.model.model(data)
                # loss = F.nll_loss(output, data.y)
                if hasattr(F, self.loss_type):
                    loss = getattr(F, self.loss_type)(output, data.y)
                if hasattr(F, self.loss):
                    loss = getattr(F, self.loss)(output, data.y)
                else:
                    raise TypeError("PyTorch does not support loss type {}".format(self.loss_type))
                    raise TypeError(
                        "PyTorch does not support loss type {}".format(self.loss)
                    )
                loss.backward()
                loss_all += data.num_graphs * loss.item()
                optimizer.step()
                scheduler.step()

                if self.lr_scheduler_type:
                    scheduler.step()
            # loss = loss_all / len(train_loader.dataset)
            # train_loss = self.evaluate(train_loader)
            eval_func = (
                self.feval if not isinstance(self.feval, list) else self.feval[0]
            )
            val_loss = self._evaluate(valid_loader, eval_func) if valid_loader else 0.0

            if eval_func.is_higher_better():
                val_loss = -val_loss
            self.early_stopping(val_loss, self.model.model)
            if self.early_stopping.early_stop:
                LOGGER.debug("Early stopping at", epoch)
                self.early_stopping.load_checkpoint(self.model.model)
                break
            if valid_loader is not None:
                eval_func = (
                    self.feval if not isinstance(self.feval, list) else self.feval[0]
                )
                val_loss = self._evaluate(valid_loader, eval_func)

                if eval_func.is_higher_better():
                    val_loss = -val_loss
                self.early_stopping(val_loss, self.model.model)
                if self.early_stopping.early_stop:
                    LOGGER.debug("Early stopping at", epoch)
                    break
        if valid_loader is not None:
            self.early_stopping.load_checkpoint(self.model.model)

    def predict_only(self, loader):
        """
@@ -294,10 +305,10 @@ class GraphClassificationTrainer(BaseTrainer):

        """
        train_loader = utils.graph_get_split(
            dataset, "train", batch_size=self.batch_size
            dataset, "train", batch_size=self.batch_size, num_workers=self.num_workers
        )  # DataLoader(dataset['train'], batch_size=self.batch_size)
        valid_loader = utils.graph_get_split(
            dataset, "val", batch_size=self.batch_size
            dataset, "val", batch_size=self.batch_size, num_workers=self.num_workers
        )  # DataLoader(dataset['val'], batch_size=self.batch_size)
        self.train_only(train_loader, valid_loader)
        if keep_valid_result and valid_loader:
@@ -321,7 +332,9 @@ class GraphClassificationTrainer(BaseTrainer):
        -------
        The prediction result of ``predict_proba``.
        """
        loader = utils.graph_get_split(dataset, mask, batch_size=self.batch_size)
        loader = utils.graph_get_split(
            dataset, mask, batch_size=self.batch_size, num_workers=self.num_workers
        )
        return self._predict_proba(loader, in_log_format=True).max(1)[1]

    def predict_proba(self, dataset, mask="test", in_log_format=False):
@@ -342,7 +355,9 @@ class GraphClassificationTrainer(BaseTrainer):
        -------
        The prediction result.
        """
        loader = utils.graph_get_split(dataset, mask, batch_size=self.batch_size)
        loader = utils.graph_get_split(
            dataset, mask, batch_size=self.batch_size, num_workers=self.num_workers
        )
        return self._predict_proba(loader, in_log_format)

    def _predict_proba(self, loader, in_log_format=False):
@@ -382,29 +397,19 @@ class GraphClassificationTrainer(BaseTrainer):
        else:
            return self.valid_score, self.feval.is_higher_better()

    def get_name_with_hp(self):
        # """Get the name of hyperparameter."""
        name = "-".join(
            [
                str(self.optimizer),
                str(self.lr),
                str(self.max_epoch),
                str(self.early_stopping_round),
                str(self.model),
                str(self.device),
            ]
        )
        name = (
            name
            + "|"
            + "-".join(
                [
                    str(x[0]) + "-" + str(x[1])
                    for x in self.model.get_hyper_parameter().items()
                ]
            )
    def __repr__(self) -> str:
        import yaml

        return yaml.dump(
            {
                "trainer_name": self.__class__.__name__,
                "optimizer": self.optimizer,
                "learning_rate": self.lr,
                "max_epoch": self.max_epoch,
                "early_stopping_round": self.early_stopping_round,
                "model": repr(self.model),
            }
        )
        return name

    def evaluate(self, dataset, mask="val", feval=None):
        """
@@ -425,7 +430,9 @@ class GraphClassificationTrainer(BaseTrainer):
        res: The evaluation result on the given dataset.

        """
        loader = utils.graph_get_split(dataset, mask, batch_size=self.batch_size)
        loader = utils.graph_get_split(
            dataset, mask, batch_size=self.batch_size, num_workers=self.num_workers
        )
        return self._evaluate(loader, feval)

    def _evaluate(self, loader, feval=None):
@@ -532,7 +539,7 @@ class GraphClassificationTrainer(BaseTrainer):
            num_features=self.num_features,
            num_classes=self.num_classes,
            num_graph_features=self.num_graph_features,
            optimizer=self.optimizer,
            optimizer=self.opt_received,
            lr=hp["lr"],
            max_epoch=hp["max_epoch"],
            batch_size=hp["batch_size"],
@@ -540,6 +547,8 @@ class GraphClassificationTrainer(BaseTrainer):
            weight_decay=hp["weight_decay"],
            device=self.device,
            feval=self.feval,
            loss=self.loss,
            lr_scheduler_type=self.lr_scheduler_type,
            init=True,
            *self.args,
            **self.kwargs
@@ -547,10 +556,6 @@ class GraphClassificationTrainer(BaseTrainer):

        return ret

    def set_feval(self, feval):
        # """Get the space of hyperparameter."""
        self.feval = get_feval(feval)

    @property
    def hyper_parameter_space(self):
        # """Set the space of hyperparameter."""
@@ -560,7 +565,6 @@ class GraphClassificationTrainer(BaseTrainer):
    def hyper_parameter_space(self, space):
        # """Set the space of hyperparameter."""
        self.space = space
        GraphClassificationTrainer.space = space

    def get_hyper_parameter(self):
        # """Get the hyperparameter in this trainer."""
--- a/autogl/module/train/link_prediction.py
+++ b/autogl/module/train/link_prediction.py
@@ -0,0 +1,520 @@
 from . import register_trainer, Evaluation
 import torch
 from torch.optim.lr_scheduler import StepLR
 import torch.nn.functional as F
 from ..model import MODEL_DICT, BaseModel
 from .evaluation import Auc, EVALUATE_DICT
 from .base import EarlyStopping, BaseLinkPredictionTrainer
 from typing import Union
 from copy import deepcopy
 from torch_geometric.utils import negative_sampling

 from ...utils import get_logger

 LOGGER = get_logger("link prediction trainer")


 def get_feval(feval):
    if isinstance(feval, str):
        return EVALUATE_DICT[feval]
    if isinstance(feval, type) and issubclass(feval, Evaluation):
        return feval
    if isinstance(feval, list):
        return [get_feval(f) for f in feval]
    raise ValueError("feval argument of type", type(feval), "is not supported!")


@register_trainer("LinkPredictionFull")
 class LinkPredictionTrainer(BaseLinkPredictionTrainer):
    """
    The link prediction trainer.

    Used to automatically train the link prediction problem.

    Parameters
    ----------
    model: ``BaseModel`` or ``str``
        The (name of) model used to train and predict.

    optimizer: ``Optimizer`` of ``str``
        The (name of) optimizer used to train and predict.

    lr: ``float``
        The learning rate of link prediction task.

    max_epoch: ``int``
        The max number of epochs in training.

    early_stopping_round: ``int``
        The round of early stop.

    device: ``torch.device`` or ``str``
        The device where model will be running on.

    init: ``bool``
        If True(False), the model will (not) be initialized.
    """

    space = None

    def __init__(
        self,
        model: Union[BaseModel, str] = None,
        num_features=None,
        optimizer=None,
        lr=1e-4,
        max_epoch=100,
        early_stopping_round=101,
        weight_decay=1e-4,
        device="auto",
        init=True,
        feval=[Auc],
        loss="binary_cross_entropy_with_logits",
        *args,
        **kwargs,
    ):
        super().__init__(model, num_features, device, init, feval, loss)

        if type(optimizer) == str and optimizer.lower() == "adam":
            self.optimizer = torch.optim.Adam
        elif type(optimizer) == str and optimizer.lower() == "sgd":
            self.optimizer = torch.optim.SGD
        else:
            self.optimizer = torch.optim.Adam

        self.lr = lr
        self.max_epoch = max_epoch
        self.early_stopping_round = early_stopping_round
        self.device = device
        self.args = args
        self.kwargs = kwargs
        self.weight_decay = weight_decay

        self.early_stopping = EarlyStopping(
            patience=early_stopping_round, verbose=False
        )

        self.valid_result = None
        self.valid_result_prob = None
        self.valid_score = None

        self.initialized = False
        self.device = device

        self.space = [
            {
                "parameterName": "max_epoch",
                "type": "INTEGER",
                "maxValue": 500,
                "minValue": 10,
                "scalingType": "LINEAR",
            },
            {
                "parameterName": "early_stopping_round",
                "type": "INTEGER",
                "maxValue": 30,
                "minValue": 10,
                "scalingType": "LINEAR",
            },
            {
                "parameterName": "lr",
                "type": "DOUBLE",
                "maxValue": 1e-1,
                "minValue": 1e-4,
                "scalingType": "LOG",
            },
            {
                "parameterName": "weight_decay",
                "type": "DOUBLE",
                "maxValue": 1e-2,
                "minValue": 1e-4,
                "scalingType": "LOG",
            },
        ]

        LinkPredictionTrainer.space = self.space

        self.hyperparams = {
            "max_epoch": self.max_epoch,
            "early_stopping_round": self.early_stopping_round,
            "lr": self.lr,
            "weight_decay": self.weight_decay,
        }

        if init is True:
            self.initialize()

    def initialize(self):
        #  Initialize the auto model in trainer.
        if self.initialized is True:
            return
        self.initialized = True
        self.model.set_num_classes(self.num_classes)
        self.model.set_num_features(self.num_features)
        self.model.initialize()

    def get_model(self):
        # Get auto model used in trainer.
        return self.model

    @classmethod
    def get_task_name(cls):
        # Get task name, i.e., `LinkPrediction`.
        return "LinkPrediction"

    def train_only(self, data, train_mask=None):
        """
        The function of training on the given dataset and mask.

        Parameters
        ----------
        data: The link prediction dataset used to be trained. It should consist of masks, including train_mask, and etc.
        train_mask: The mask used in training stage.

        Returns
        -------
        self: ``autogl.train.LinkPredictionTrainer``
            A reference of current trainer.

        """

        # data.train_mask = data.val_mask = data.test_mask = data.y = None
        # data = train_test_split_edges(data)
        data = data.to(self.device)
        # mask = data.train_mask if train_mask is None else train_mask
        optimizer = self.optimizer(
            self.model.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
        )
        scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
        for epoch in range(1, self.max_epoch):
            self.model.model.train()

            neg_edge_index = negative_sampling(
                edge_index=data.train_pos_edge_index,
                num_nodes=data.num_nodes,
                num_neg_samples=data.train_pos_edge_index.size(1),
            )

            optimizer.zero_grad()
            # res = self.model.model.forward(data)
            z = self.model.model.lp_encode(data)
            link_logits = self.model.model.lp_decode(
                z, data.train_pos_edge_index, neg_edge_index
            )
            link_labels = self.get_link_labels(
                data.train_pos_edge_index, neg_edge_index
            )
            # loss = F.binary_cross_entropy_with_logits(link_logits, link_labels)
            if hasattr(F, self.loss):
                loss = getattr(F, self.loss)(link_logits, link_labels)
            else:
                raise TypeError(
                    "PyTorch does not support loss type {}".format(self.loss)
                )

            loss.backward()
            optimizer.step()
            scheduler.step()

            if type(self.feval) is list:
                feval = self.feval[0]
            else:
                feval = self.feval
            val_loss = self.evaluate([data], mask="val", feval=feval)
            if feval.is_higher_better() is True:
                val_loss = -val_loss
            self.early_stopping(val_loss, self.model.model)
            if self.early_stopping.early_stop:
                LOGGER.debug("Early stopping at %d", epoch)
                break
        self.early_stopping.load_checkpoint(self.model.model)

    def predict_only(self, data, test_mask=None):
        """
        The function of predicting on the given dataset and mask.

        Parameters
        ----------
        data: The link prediction dataset used to be predicted.
        train_mask: The mask used in training stage.

        Returns
        -------
        res: The result of predicting on the given dataset.

        """
        data = data.to(self.device)
        self.model.model.eval()
        with torch.no_grad():
            z = self.model.model.lp_encode(data)
        return z

    def train(self, dataset, keep_valid_result=True):
        """
        The function of training on the given dataset and keeping valid result.

        Parameters
        ----------
        dataset: The link prediction dataset used to be trained.

        keep_valid_result: ``bool``
            If True(False), save the validation result after training.

        Returns
        -------
        self: ``autogl.train.LinkPredictionTrainer``
            A reference of current trainer.

        """
        data = dataset[0]
        data.edge_index = data.train_pos_edge_index
        self.train_only(data)
        if keep_valid_result:
            self.valid_result = self.predict_only(data)
            self.valid_result_prob = self.predict_proba(dataset, "val")
            self.valid_score = self.evaluate(dataset, mask="val", feval=self.feval)

    def predict(self, dataset, mask=None):
        """
        The function of predicting on the given dataset.

        Parameters
        ----------
        dataset: The link prediction dataset used to be predicted.

        mask: ``train``, ``val``, or ``test``.
            The dataset mask.

        Returns
        -------
        The prediction result of ``predict_proba``.
        """
        return self.predict_proba(dataset, mask=mask, in_log_format=False)

    def predict_proba(self, dataset, mask=None, in_log_format=False):
        """
        The function of predicting the probability on the given dataset.

        Parameters
        ----------
        dataset: The link prediction dataset used to be predicted.

        mask: ``train``, ``val``, or ``test``.
            The dataset mask.

        in_log_format: ``bool``.
            If True(False), the probability will (not) be log format.

        Returns
        -------
        The prediction result.
        """
        data = dataset[0]
        data.edge_index = data.train_pos_edge_index
        data = data.to(self.device)
        if mask in ["train", "val", "test"]:
            pos_edge_index = data[f"{mask}_pos_edge_index"]
            neg_edge_index = data[f"{mask}_neg_edge_index"]
        else:
            pos_edge_index = data[f"test_pos_edge_index"]
            neg_edge_index = data[f"test_neg_edge_index"]

        self.model.model.eval()
        with torch.no_grad():
            z = self.predict_only(data)
            link_logits = self.model.model.lp_decode(z, pos_edge_index, neg_edge_index)
            link_probs = link_logits.sigmoid()

        return link_probs

    def get_valid_predict(self):
        # """Get the valid result."""
        return self.valid_result

    def get_valid_predict_proba(self):
        # """Get the valid result (prediction probability)."""
        return self.valid_result_prob

    def get_valid_score(self, return_major=True):
        """
        The function of getting the valid score.

        Parameters
        ----------
        return_major: ``bool``.
            If True, the return only consists of the major result.
            If False, the return consists of the all results.

        Returns
        -------
        result: The valid score in training stage.
        """
        if isinstance(self.feval, list):
            if return_major:
                return self.valid_score[0], self.feval[0].is_higher_better()
            else:
                return self.valid_score, [f.is_higher_better() for f in self.feval]
        else:
            return self.valid_score, self.feval.is_higher_better()

    def get_name_with_hp(self):
        # """Get the name of hyperparameter."""
        name = "-".join(
            [
                str(self.optimizer),
                str(self.lr),
                str(self.max_epoch),
                str(self.early_stopping_round),
                str(self.model),
                str(self.device),
            ]
        )
        name = (
            name
            + "|"
            + "-".join(
                [
                    str(x[0]) + "-" + str(x[1])
                    for x in self.model.get_hyper_parameter().items()
                ]
            )
        )
        return name

    def evaluate(self, dataset, mask=None, feval=None):
        """
        The function of training on the given dataset and keeping valid result.

        Parameters
        ----------
        dataset: The link prediction dataset used to be evaluated.

        mask: ``train``, ``val``, or ``test``.
            The dataset mask.

        feval: ``str``.
            The evaluation method used in this function.

        Returns
        -------
        res: The evaluation result on the given dataset.

        """
        data = dataset[0]
        data = data.to(self.device)
        test_mask = mask
        if feval is None:
            feval = self.feval
        else:
            feval = get_feval(feval)

        if mask in ["train", "val", "test"]:
            pos_edge_index = data[f"{mask}_pos_edge_index"]
            neg_edge_index = data[f"{mask}_neg_edge_index"]
        else:
            pos_edge_index = data[f"test_pos_edge_index"]
            neg_edge_index = data[f"test_neg_edge_index"]

        self.model.model.eval()
        with torch.no_grad():
            link_probs = self.predict_proba(dataset, mask)
            link_labels = self.get_link_labels(pos_edge_index, neg_edge_index)

        if not isinstance(feval, list):
            feval = [feval]
            return_signle = True
        else:
            return_signle = False

        res = []
        for f in feval:
            res.append(f.evaluate(link_probs.cpu().numpy(), link_labels.cpu().numpy()))
        if return_signle:
            return res[0]
        return res

    def to(self, new_device):
        assert isinstance(new_device, torch.device)
        self.device = new_device
        if self.model is not None:
            self.model.to(self.device)

    def duplicate_from_hyper_parameter(self, hp: dict, model=None, restricted=True):
        """
        The function of duplicating a new instance from the given hyperparameter.

        Parameters
        ----------
        hp: ``dict``.
            The hyperparameter used in the new instance.

        model: The model used in the new instance of trainer.

        restricted: ``bool``.
            If False(True), the hyperparameter should (not) be updated from origin hyperparameter.

        Returns
        -------
        self: ``autogl.train.LinkPredictionTrainer``
            A new instance of trainer.

        """
        if not restricted:
            origin_hp = deepcopy(self.hyperparams)
            origin_hp.update(hp)
            hp = origin_hp
        if model is None:
            model = self.model
        model.set_num_classes(self.num_classes)
        model.set_num_features(self.num_features)
        model = model.from_hyper_parameter(
            dict(
                [
                    x
                    for x in hp.items()
                    if x[0] in [y["parameterName"] for y in model.space]
                ]
            )
        )

        ret = self.__class__(
            model=model,
            num_features=self.num_features,
            optimizer=self.optimizer,
            lr=hp["lr"],
            max_epoch=hp["max_epoch"],
            early_stopping_round=hp["early_stopping_round"],
            device=self.device,
            weight_decay=hp["weight_decay"],
            feval=self.feval,
            init=True,
            *self.args,
            **self.kwargs,
        )

        return ret

    def set_feval(self, feval):
        # """Set the evaluation metrics."""
        self.feval = get_feval(feval)

    @property
    def hyper_parameter_space(self):
        # """Get the space of hyperparameter."""
        return self.space

    @hyper_parameter_space.setter
    def hyper_parameter_space(self, space):
        # """Set the space of hyperparameter."""
        self.space = space
        LinkPredictionTrainer.space = space

    def get_hyper_parameter(self):
        # """Get the hyperparameter in this trainer."""
        return self.hyperparams

    def get_link_labels(self, pos_edge_index, neg_edge_index):
        E = pos_edge_index.size(1) + neg_edge_index.size(1)
        link_labels = torch.zeros(E, dtype=torch.float, device=self.device)
        link_labels[: pos_edge_index.size(1)] = 1.0
        return link_labels
--- a/autogl/module/train/node_classification_full.py
+++ b/autogl/module/train/node_classification_full.py
@@ -1,9 +1,21 @@
 from . import register_trainer, BaseTrainer, Evaluation, EVALUATE_DICT, EarlyStopping
 """
 Node classification Full Trainer Implementation
 """

 from . import register_trainer

 from .base import BaseNodeClassificationTrainer, EarlyStopping, Evaluation
 import torch
 from torch.optim.lr_scheduler import StepLR
 from torch.optim.lr_scheduler import (
    StepLR,
    MultiStepLR,
    ExponentialLR,
    ReduceLROnPlateau,
 )
 import torch.nn.functional as F
 from ..model import MODEL_DICT, BaseModel
 from .evaluate import Logloss, Acc, Auc
 from ..model.base import ClassificationSupportedSequentialModel
 from .evaluation import get_feval, Logloss
 from typing import Union
 from copy import deepcopy

@@ -11,18 +23,9 @@ from ...utils import get_logger

 LOGGER = get_logger("node classification trainer")

 def get_feval(feval):
    if isinstance(feval, str):
        return EVALUATE_DICT[feval]
    if isinstance(feval, type) and issubclass(feval, Evaluation):
        return feval
    if isinstance(feval, list):
        return [get_feval(f) for f in feval]
    raise ValueError("feval argument of type", type(feval), "is not supported!")


@register_trainer("NodeClassification")
 class NodeClassificationTrainer(BaseTrainer):
@register_trainer("NodeClassificationFull")
 class NodeClassificationFullTrainer(BaseNodeClassificationTrainer):
    """
    The node classification trainer.

@@ -52,39 +55,35 @@ class NodeClassificationTrainer(BaseTrainer):
        If True(False), the model will (not) be initialized.
    """

    space = None

    def __init__(
        self,
        model: Union[BaseModel, str],
        num_features,
        num_classes,
        model: Union[BaseModel, str] = None,
        num_features=None,
        num_classes=None,
        optimizer=None,
        lr=None,
        max_epoch=None,
        early_stopping_round=None,
        weight_decay=1e-4,
        device=None,
        device="auto",
        init=True,
        feval=[Logloss],
        loss="nll_loss",
        lr_scheduler_type=None,
        *args,
        **kwargs
    ):
        super(NodeClassificationTrainer, self).__init__(model)

        self.loss_type = loss

        if device is None:
            device = "cpu"

        # init model
        if isinstance(model, str):
            assert model in MODEL_DICT, "Cannot parse model name " + model
            self.model = MODEL_DICT[model](num_features, num_classes, device, init=init)
        elif isinstance(model, BaseModel):
            self.model = model
        super().__init__(
            model,
            num_features,
            num_classes,
            device=device,
            init=init,
            feval=feval,
            loss=loss,
        )

        self.opt_received = optimizer
        if type(optimizer) == str and optimizer.lower() == "adam":
            self.optimizer = torch.optim.Adam
        elif type(optimizer) == str and optimizer.lower() == "sgd":
@@ -92,14 +91,13 @@ class NodeClassificationTrainer(BaseTrainer):
        else:
            self.optimizer = torch.optim.Adam

        self.num_features = num_features
        self.num_classes = num_classes
        self.lr_scheduler_type = lr_scheduler_type

        self.lr = lr if lr is not None else 1e-4
        self.max_epoch = max_epoch if max_epoch is not None else 100
        self.early_stopping_round = (
            early_stopping_round if early_stopping_round is not None else 100
        )
        self.device = device
        self.args = args
        self.kwargs = kwargs

@@ -116,9 +114,6 @@ class NodeClassificationTrainer(BaseTrainer):
        self.valid_score = None

        self.initialized = False
        self.num_features = num_features
        self.num_classes = num_classes
        self.device = device

        self.space = [
            {
@@ -150,8 +145,6 @@ class NodeClassificationTrainer(BaseTrainer):
                "scalingType": "LOG",
            },
        ]
        self.space += self.model.space
        NodeClassificationTrainer.space = self.space

        self.hyperparams = {
            "max_epoch": self.max_epoch,
@@ -159,7 +152,6 @@ class NodeClassificationTrainer(BaseTrainer):
            "lr": self.lr,
            "weight_decay": self.weight_decay,
        }
        self.hyperparams = {**self.hyperparams, **self.model.get_hyper_parameter()}

        if init is True:
            self.initialize()
@@ -198,34 +190,56 @@ class NodeClassificationTrainer(BaseTrainer):
        data = data.to(self.device)
        mask = data.train_mask if train_mask is None else train_mask
        optimizer = self.optimizer(
            self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
            self.model.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
        )
        scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
        # scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
        lr_scheduler_type = self.lr_scheduler_type
        if type(lr_scheduler_type) == str and lr_scheduler_type == "steplr":
            scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
        elif type(lr_scheduler_type) == str and lr_scheduler_type == "multisteplr":
            scheduler = MultiStepLR(optimizer, milestones=[30, 80], gamma=0.1)
        elif type(lr_scheduler_type) == str and lr_scheduler_type == "exponentiallr":
            scheduler = ExponentialLR(optimizer, gamma=0.1)
        elif (
            type(lr_scheduler_type) == str and lr_scheduler_type == "reducelronplateau"
        ):
            scheduler = ReduceLROnPlateau(optimizer, "min")
        else:
            scheduler = None

        for epoch in range(1, self.max_epoch):
            self.model.model.train()
            optimizer.zero_grad()
            res = self.model.model.forward(data)
            if hasattr(F, self.loss_type):
                loss = getattr(F, self.loss_type)(res[mask], data.y[mask])
            if isinstance(self.model.model, ClassificationSupportedSequentialModel):
                res = self.model.model.cls_forward(data)
            else:
                res = self.model.model.forward(data)
            if hasattr(F, self.loss):
                loss = getattr(F, self.loss)(res[mask], data.y[mask])
            else:
                raise TypeError("PyTorch does not support loss type {}".format(self.loss_type))
                raise TypeError(
                    "PyTorch does not support loss type {}".format(self.loss)
                )

            loss.backward()
            optimizer.step()
            scheduler.step()

            if type(self.feval) is list:
                feval = self.feval[0]
            else:
                feval = self.feval
            val_loss = self.evaluate([data], mask=data.val_mask, feval=feval)
            if feval.is_higher_better() is True:
                val_loss = -val_loss
            self.early_stopping(val_loss, self.model.model)
            if self.early_stopping.early_stop:
                LOGGER.debug("Early stopping at %d", epoch)
                self.early_stopping.load_checkpoint(self.model.model)
                break
            if self.lr_scheduler_type:
                scheduler.step()

            if hasattr(data, "val_mask") and data.val_mask is not None:
                if type(self.feval) is list:
                    feval = self.feval[0]
                else:
                    feval = self.feval
                val_loss = self.evaluate([data], mask=data.val_mask, feval=feval)
                if feval.is_higher_better() is True:
                    val_loss = -val_loss
                self.early_stopping(val_loss, self.model.model)
                if self.early_stopping.early_stop:
                    LOGGER.debug("Early stopping at %d", epoch)
                    break
        if hasattr(data, "val_mask") and data.val_mask is not None:
            self.early_stopping.load_checkpoint(self.model.model)

    def predict_only(self, data, test_mask=None):
        """
@@ -245,7 +259,10 @@ class NodeClassificationTrainer(BaseTrainer):
        data = data.to(self.device)
        self.model.model.eval()
        with torch.no_grad():
            res = self.model.model.forward(data)
            if isinstance(self.model.model, ClassificationSupportedSequentialModel):
                res = self.model.model.cls_forward(data)
            else:
                res = self.model.model.forward(data)
        return res

    def train(self, dataset, keep_valid_result=True):
@@ -356,29 +373,19 @@ class NodeClassificationTrainer(BaseTrainer):
        else:
            return self.valid_score, self.feval.is_higher_better()

    def get_name_with_hp(self):
        # """Get the name of hyperparameter."""
        name = "-".join(
            [
                str(self.optimizer),
                str(self.lr),
                str(self.max_epoch),
                str(self.early_stopping_round),
                str(self.model),
                str(self.device),
            ]
        )
        name = (
            name
            + "|"
            + "-".join(
                [
                    str(x[0]) + "-" + str(x[1])
                    for x in self.model.get_hyper_parameter().items()
                ]
            )
    def __repr__(self) -> str:
        import yaml

        return yaml.dump(
            {
                "trainer_name": self.__class__.__name__,
                "optimizer": self.optimizer,
                "learning_rate": self.lr,
                "max_epoch": self.max_epoch,
                "early_stopping_round": self.early_stopping_round,
                "model": repr(self.model),
            }
        )
        return name

    def evaluate(self, dataset, mask=None, feval=None):
        """
@@ -480,13 +487,15 @@ class NodeClassificationTrainer(BaseTrainer):
            model=model,
            num_features=self.num_features,
            num_classes=self.num_classes,
            optimizer=self.optimizer,
            optimizer=self.opt_received,
            lr=hp["lr"],
            max_epoch=hp["max_epoch"],
            early_stopping_round=hp["early_stopping_round"],
            device=self.device,
            weight_decay=hp["weight_decay"],
            feval=self.feval,
            loss=self.loss,
            lr_scheduler_type=self.lr_scheduler_type,
            init=True,
            *self.args,
            **self.kwargs
@@ -494,10 +503,6 @@ class NodeClassificationTrainer(BaseTrainer):

        return ret

    def set_feval(self, feval):
        # """Set the evaluation metrics."""
        self.feval = get_feval(feval)

    @property
    def hyper_parameter_space(self):
        # """Get the space of hyperparameter."""
@@ -507,7 +512,6 @@ class NodeClassificationTrainer(BaseTrainer):
    def hyper_parameter_space(self, space):
        # """Set the space of hyperparameter."""
        self.space = space
        NodeClassificationTrainer.space = space

    def get_hyper_parameter(self):
        # """Get the hyperparameter in this trainer."""
--- a/autogl/module/train/node_classification_trainer/init.py
+++ b/autogl/module/train/node_classification_trainer/init.py
@@ -0,0 +1 @@
 from .node_classification_sampled_trainer import *
--- a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
+++ b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
--- a/autogl/module/train/sampling/init.py
+++ b/autogl/module/train/sampling/init.py
--- a/autogl/module/train/sampling/sampler/init.py
+++ b/autogl/module/train/sampling/sampler/init.py
--- a/autogl/module/train/sampling/sampler/graphsaint_sampler.py
+++ b/autogl/module/train/sampling/sampler/graphsaint_sampler.py
@@ -0,0 +1,148 @@
 import torch_geometric


 class GraphSAINTSamplerFactory:
    """
    A simple factory class for creating varieties of
    :class:`torch_geometric.data.GraphSAINTSampler`.
    There exists potential sampling performance issues for
    the implementation of :class:`torch_geometric.data.GraphSAINTEdgeSampler`
    provided by PyTorch Geometric. Considering that the ultimate performance of
    GraphSAINT Edge Sampler and GraphSAINT Random Walk Sampler are similar
    according to the original literature
    `"GraphSAINT: Graph Sampling Based Inductive Learning Method"
    <https://arxiv.org/abs/1907.04931>`_ which introduces the GraphSAINT approach,
    nevertheless, when the walk length for GraphSAINT Random Walk Sampler is specified as `2`,
    the Random walk operation is actually selecting edges.
    Therefore an effective implementation for GraphSAINT Edge Sampler is not very urgently needed.
    Meanwhile, the varieties of Subgraph-wise sampling is scheduled to be redesigned and refactored.
    With the aim of abstracting a unified sampling module for representative mainstream varieties of
    Node-wise Sampling, Layer-wise Sampling, and Subgraph-wise Sampling.
    """

    @classmethod
    def create_node_sampler(
        cls,
        data,
        num_graphs_per_epoch: int,
        node_budget: int,
        sample_coverage_factor: int = 50,
        **kwargs
    ) -> torch_geometric.data.GraphSAINTNodeSampler:
        """
        A simple static method for instantiating :class:`torch_geometric.data.GraphSAINTNodeSampler`
        with more explicit arguments.

        Arguments
        ------------
        data:
            The conventional data of integral graph for sampling.
        num_graphs_per_epoch:
            number of subgraphs to sampler per epoch.
        node_budget:
            budget of nodes to sample for one sampled subgraph.
        sample_coverage_factor:
            The average number of samples per node should be used to
            compute normalization statistics.
        **kwargs:
            Additional optional arguments of :class:`torch.utils.data.DataLoader`,
            including :obj:`batch_size` or :obj:`num_workers`.

        Returns
        --------
        Instance of :class:`torch_geometric.data.GraphSAINTNodeSampler`.
        """
        return torch_geometric.data.GraphSAINTNodeSampler(
            data,
            node_budget,
            num_graphs_per_epoch,
            sample_coverage_factor,
            log=False,
            **kwargs
        )

    @classmethod
    def create_edge_sampler(
        cls,
        data,
        num_graphs_per_epoch: int,
        edge_budget: int,
        sample_coverage_factor: int = 50,
        **kwargs
    ) -> torch_geometric.data.GraphSAINTEdgeSampler:
        """
        A simple static method for instantiating :class:`torch_geometric.data.GraphSAINTEdgeSampler`
        with more explicit arguments.

        Arguments
        ------------
        data:
            The conventional data of integral graph for sampling.
        num_graphs_per_epoch:
            number of subgraphs to sampler per epoch.
        edge_budget:
            budget of edges to sample for one sampled subgraph.
        sample_coverage_factor:
            The average number of samples per node should be used to
            compute normalization statistics.
        **kwargs:
            Additional optional arguments of :class:`torch.utils.data.DataLoader`,
            including :obj:`batch_size` or :obj:`num_workers`.

        Returns
        --------
        Instance of :class:`torch_geometric.data.GraphSAINTEdgeSampler`.
        """
        return torch_geometric.data.GraphSAINTEdgeSampler(
            data,
            edge_budget,
            num_graphs_per_epoch,
            sample_coverage_factor,
            log=False,
            **kwargs
        )

    @classmethod
    def create_random_walk_sampler(
        cls,
        data,
        num_graphs_per_epoch: int,
        num_walks: int,
        walk_length: int,
        sample_coverage_factor: int = 50,
        **kwargs
    ) -> torch_geometric.data.GraphSAINTRandomWalkSampler:
        """
        A simple static method for instantiating :class:`torch_geometric.data.GraphSAINTEdgeSampler`
        with more explicit arguments.

        Arguments
        ------------
        data:
            The conventional data of integral graph for sampling.
        num_graphs_per_epoch:
            number of subgraphs to sampler per epoch.
        num_walks:
            The number of random walks for sampling.
        walk_length:
            The length of each random walk.
        sample_coverage_factor:
            The average number of samples per node should be used to
            compute normalization statistics.
        **kwargs:
            Additional optional arguments of :class:`torch.utils.data.DataLoader`,
            including :obj:`batch_size` or :obj:`num_workers`.

        Returns
        --------
        Instance of :class:`torch_geometric.data.GraphSAINTEdgeSampler`.
        """
        return torch_geometric.data.GraphSAINTRandomWalkSampler(
            data,
            num_walks,
            walk_length,
            num_graphs_per_epoch,
            sample_coverage_factor,
            log=False,
            **kwargs
        )
--- a/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py
+++ b/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py
@@ -0,0 +1,479 @@
 import numpy as np
 import scipy.sparse as sp
 import torch
 import torch.utils.data
 import typing as _typing
 import torch_geometric
 from . import target_dependant_sampler


 class _LayerDependentImportanceSampler(
    target_dependant_sampler.BasicLayerWiseTargetDependantSampler
 ):
    """
    Obsolete implementation, unused
    """

    class _Utility:
        @classmethod
        def compute_edge_weights(
            cls, __all_edge_index_with_self_loops: torch.Tensor
        ) -> torch.Tensor:
            __out_degree: torch.Tensor = torch_geometric.utils.degree(
                __all_edge_index_with_self_loops[0]
            )
            __in_degree: torch.Tensor = torch_geometric.utils.degree(
                __all_edge_index_with_self_loops[1]
            )

            temp_tensor: torch.Tensor = torch.stack(
                [
                    __out_degree[__all_edge_index_with_self_loops[0]],
                    __in_degree[__all_edge_index_with_self_loops[1]],
                ]
            )
            temp_tensor: torch.Tensor = torch.pow(temp_tensor, -0.5)
            temp_tensor[torch.isinf(temp_tensor)] = 0.0
            return temp_tensor[0] * temp_tensor[1]

        @classmethod
        def get_candidate_source_nodes_probabilities(
            cls,
            all_candidate_edge_indexes: torch.LongTensor,
            all_edge_index_with_self_loops: torch.Tensor,
            all_edge_weights: torch.Tensor,
        ) -> _typing.Tuple[torch.LongTensor, torch.Tensor]:
            """
            :param all_candidate_edge_indexes:
            :param all_edge_index_with_self_loops: integral edge index with self-loops
            :param all_edge_weights:
            :return: (all_source_nodes_indexes, all_source_nodes_probabilities)
            """
            all_candidate_edge_indexes: torch.LongTensor = (
                all_candidate_edge_indexes.unique()
            )
            _all_candidate_edges_weights: torch.Tensor = all_edge_weights[
                all_candidate_edge_indexes
            ]
            all_candidate_source_nodes_indexes: torch.LongTensor = (
                all_edge_index_with_self_loops[0, all_candidate_edge_indexes].unique()
            )
            all_candidate_source_nodes_probabilities: torch.Tensor = torch.tensor(
                [
                    torch.sum(
                        _all_candidate_edges_weights[
                            all_edge_index_with_self_loops[
                                0, all_candidate_edge_indexes
                            ]
                            == _current_source_node_index
                        ]
                    ).item()
                    / torch.sum(_all_candidate_edges_weights).item()
                    for _current_source_node_index in all_candidate_source_nodes_indexes.tolist()
                ]
            )
            assert (
                all_candidate_source_nodes_indexes.size()
                == all_candidate_source_nodes_probabilities.size()
            )
            return (
                all_candidate_source_nodes_indexes,
                all_candidate_source_nodes_probabilities,
            )

        @classmethod
        def filter_selected_edges_by_source_nodes_and_target_nodes(
            cls,
            all_edges_with_self_loops: torch.Tensor,
            selected_source_node_indexes: torch.LongTensor,
            selected_target_node_indexes: torch.LongTensor,
        ) -> torch.Tensor:
            """
            :param all_edges_with_self_loops: all edges with self loops
            :param selected_source_node_indexes: selected source node indexes
            :param selected_target_node_indexes: selected target node indexes
            :return: filtered edge indexes
            """
            selected_edges_mask_for_source_nodes: torch.Tensor = torch.zeros(
                all_edges_with_self_loops.size(1), dtype=torch.bool
            )
            selected_edges_mask_for_source_nodes[
                torch.cat(
                    [
                        torch.where(
                            all_edges_with_self_loops[0]
                            == __current_selected_source_node_index
                        )[0]
                        for __current_selected_source_node_index in selected_source_node_indexes.unique().tolist()
                    ]
                ).unique()
            ] = True
            selected_edges_mask_for_target_nodes: torch.Tensor = torch.zeros(
                all_edges_with_self_loops.size(1), dtype=torch.bool
            )
            selected_edges_mask_for_target_nodes[
                torch.cat(
                    [
                        torch.where(
                            all_edges_with_self_loops[1]
                            == __current_selected_target_node_index
                        )[0]
                        for __current_selected_target_node_index in selected_target_node_indexes.unique().tolist()
                    ]
                )
            ] = True
            return torch.where(
                selected_edges_mask_for_source_nodes
                & selected_edges_mask_for_target_nodes
            )[0]

    def __init__(
        self,
        edge_index: torch.LongTensor,
        target_nodes_indexes: torch.LongTensor,
        layer_wise_arguments: _typing.Sequence,
        batch_size: _typing.Optional[int] = 1,
        num_workers: int = 0,
        shuffle: bool = True,
        **kwargs
    ):
        super().__init__(
            torch_geometric.utils.add_remaining_self_loops(edge_index)[0],
            target_nodes_indexes,
            layer_wise_arguments,
            batch_size,
            num_workers,
            shuffle,
            **kwargs
        )
        self.__all_edge_weights: torch.Tensor = self._Utility.compute_edge_weights(
            self._edge_index
        )

    def _sample_edges_for_layer(
        self,
        __current_layer_target_nodes_indexes: torch.LongTensor,
        __top_layer_target_nodes_indexes: torch.LongTensor,
        layer_argument: _typing.Any,
        *args,
        **kwargs
    ) -> _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]:
        """
        Sample edges for one layer
        :param __current_layer_target_nodes_indexes: target nodes for current layer
        :param __top_layer_target_nodes_indexes: target nodes for top layer
        :param layer_argument: argument for current layer
        :param args: remaining positional arguments
        :param kwargs: remaining keyword arguments
        :return: (edge_id_in_integral_graph, edge_weight)
        """
        if type(layer_argument) != int:
            raise TypeError
        elif not layer_argument > 0:
            raise ValueError
        else:
            sampled_node_size_budget: int = layer_argument

        all_candidate_edge_indexes: torch.LongTensor = torch.cat(
            [
                torch.where(self._edge_index[1] == current_target_node_index)[0]
                for current_target_node_index in __current_layer_target_nodes_indexes.unique().tolist()
            ]
        ).unique()
        (
            __all_candidate_source_nodes_indexes,
            all_candidate_source_nodes_probabilities,
        ) = self._Utility.get_candidate_source_nodes_probabilities(
            all_candidate_edge_indexes,
            self._edge_index,
            self.__all_edge_weights * self.__all_edge_weights,
        )
        assert (
            __all_candidate_source_nodes_indexes.size()
            == all_candidate_source_nodes_probabilities.size()
        )

        """ Sampling """
        if sampled_node_size_budget < __all_candidate_source_nodes_indexes.numel():
            selected_source_node_indexes: torch.LongTensor = (
                __all_candidate_source_nodes_indexes[
                    torch.from_numpy(
                        np.unique(
                            np.random.choice(
                                np.arange(__all_candidate_source_nodes_indexes.numel()),
                                sampled_node_size_budget,
                                p=all_candidate_source_nodes_probabilities.numpy(),
                                replace=False,
                            )
                        )
                    ).unique()
                ].unique()
            )
        else:
            selected_source_node_indexes: torch.LongTensor = (
                __all_candidate_source_nodes_indexes
            )
        selected_source_node_indexes: torch.LongTensor = torch.cat(
            [selected_source_node_indexes, __top_layer_target_nodes_indexes]
        ).unique()

        __selected_edges_indexes: torch.LongTensor = (
            self._Utility.filter_selected_edges_by_source_nodes_and_target_nodes(
                self._edge_index,
                selected_source_node_indexes,
                __current_layer_target_nodes_indexes,
            ).unique()
        )

        non_normalized_selected_edges_weight: torch.Tensor = self.__all_edge_weights[
            __selected_edges_indexes
        ] / torch.tensor(
            [
                all_candidate_source_nodes_probabilities[
                    __all_candidate_source_nodes_indexes == current_source_node_index
                ].item()
                for current_source_node_index in self._edge_index[
                    0, __selected_edges_indexes
                ].tolist()
            ]
        )

        def __normalize_edges_weight_by_target_nodes(
            __edge_index: torch.Tensor, __edge_weight: torch.Tensor
        ) -> torch.Tensor:
            if __edge_index.size(1) != __edge_weight.numel():
                raise ValueError
            for current_target_node_index in __edge_index[1].unique().tolist():
                __current_mask_for_edges: torch.BoolTensor = (
                    __edge_index[1] == current_target_node_index
                )
                __edge_weight[__current_mask_for_edges] = __edge_weight[
                    __current_mask_for_edges
                ] / torch.sum(__edge_weight[__current_mask_for_edges])
            return __edge_weight

        normalized_selected_edges_weight: torch.Tensor = (
            __normalize_edges_weight_by_target_nodes(
                self._edge_index[:, __selected_edges_indexes],
                non_normalized_selected_edges_weight,
            )
        )
        return __selected_edges_indexes, normalized_selected_edges_weight


 class LayerDependentImportanceSampler(
    target_dependant_sampler.BasicLayerWiseTargetDependantSampler
 ):
    """
    The layer-dependent importance sampler from the
    `"Layer-Dependent Importance Sampling for Training Deep and Large Graph Convolutional Networks"
    <https://arxiv.org/abs/1911.07323>`_ literature,  which allows
    for mini-batch training of GNNs on large-scale graphs where full-batch training is not feasible.

    Arguments
    ------------
    edge_index:
        A :obj:`torch.LongTensor` that defines the underlying graph
        connectivity/message passing flow.
        :obj:`edge_index` holds the indices of a (sparse) adjacency matrix.
        If :obj:`edge_index` is of type :obj:`torch.LongTensor`, its shape
        must be defined as :obj:`[2, num_edges]`, where messages from nodes
        :obj:`edge_index[0]` are sent to nodes in :obj:`edge_index[1]`
        (in case :obj:`flow="source_to_target"`).
    target_nodes_indexes:
        indexes of target nodes to learn representation.
    layer_wise_arguments:
        The number of nodes to sample for each layer.
        It's noteworthy that the target nodes for a specific layer
        always be preserved as source nodes for that layer,
        such that the self loops for those target nodes
        are generally preserved for representation learning.
    batch_size:
        number of target nodes for each mini-batch.
    num_workers:
        num_workers argument for inner :class:`torch.utils.data.DataLoader`
    shuffle:
        whether to shuffle target nodes for mini-batches.
    """

    @classmethod
    def __compute_edge_weight(cls, edge_index: torch.Tensor) -> torch.Tensor:
        __num_nodes: int = max(int(edge_index[0].max()), int(edge_index[1].max())) + 1
        _temp_tensor: torch.Tensor = torch.stack(
            [
                torch_geometric.utils.degree(edge_index[0], __num_nodes)[edge_index[0]],
                torch_geometric.utils.degree(edge_index[1], __num_nodes)[edge_index[1]],
            ]
        )
        _temp_tensor: torch.Tensor = torch.pow(_temp_tensor, -0.5)
        _temp_tensor[torch.isinf(_temp_tensor)] = 0
        return _temp_tensor[0] * _temp_tensor[1]

    def __init__(
        self,
        edge_index: torch.LongTensor,
        target_nodes_indexes: torch.LongTensor,
        layer_wise_arguments: _typing.Sequence,
        batch_size: _typing.Optional[int] = 1,
        num_workers: int = 0,
        shuffle: bool = True,
        **kwargs
    ):
        super(LayerDependentImportanceSampler, self).__init__(
            torch_geometric.utils.add_remaining_self_loops(edge_index)[0],
            target_nodes_indexes,
            layer_wise_arguments,
            batch_size,
            num_workers,
            shuffle,
            **kwargs
        )
        self.__edge_weight: torch.Tensor = self.__compute_edge_weight(self._edge_index)
        self.__integral_normalized_l_matrix: sp.csr_matrix = sp.csr_matrix(
            (
                self.__edge_weight.numpy(),
                (self._edge_index[1].numpy(), self._edge_index[0].numpy()),
            )
        )
        self.__integral_edges_indexes_sparse_matrix: sp.csr_matrix = sp.csr_matrix(
            (
                np.arange(self._edge_index.size(1)),
                (self._edge_index[1].numpy(), self._edge_index[0].numpy()),
            )
        )

    def __sample_edges(
        self,
        __current_layer_target_nodes_indexes: np.ndarray,
        __top_layer_target_nodes_indexes: np.ndarray,
        sampled_source_nodes_budget: int,
    ) -> _typing.Tuple[np.ndarray, np.ndarray, np.ndarray]:
        """

        :param __current_layer_target_nodes_indexes: indexes of target nodes for current layer
        :param __top_layer_target_nodes_indexes: indexes of target nodes for top layer
        :param sampled_source_nodes_budget: sampled source nodes budget
        :return: (
                    sampled_edges_indexes,
                    sampled_source_nodes_indexes,
                    corresponding probabilities for sampled_source_nodes_indexes
        )
        """
        partial_l_matrix: sp.csr_matrix = self.__integral_normalized_l_matrix[
            __current_layer_target_nodes_indexes, :
        ]
        p: np.ndarray = np.array(
            np.sum(partial_l_matrix.multiply(partial_l_matrix), axis=0)
        )[0]
        p: np.ndarray = p / np.sum(p)
        _number_of_nodes_to_sample = np.min(
            [np.sum(p > 0), sampled_source_nodes_budget]
        )
        _selected_source_nodes: np.ndarray = np.unique(
            np.concatenate(
                [
                    np.random.choice(
                        p.size, _number_of_nodes_to_sample, replace=False, p=p
                    ),
                    __top_layer_target_nodes_indexes,
                ]
            )
        )

        _sampled_edges_indexes_sparse_matrix: sp.csr_matrix = (
            self.__integral_edges_indexes_sparse_matrix[
                __current_layer_target_nodes_indexes, :
            ]
        )
        _sampled_edges_indexes_sparse_matrix: sp.csc_matrix = (
            _sampled_edges_indexes_sparse_matrix.tocsc()[:, _selected_source_nodes]
        )
        _sampled_edges_indexes: np.ndarray = np.unique(
            _sampled_edges_indexes_sparse_matrix.data
        )

        return _sampled_edges_indexes, _selected_source_nodes, p[_selected_source_nodes]

    def _sample_edges_for_layer(
        self,
        __current_layer_target_nodes_indexes: torch.LongTensor,
        __top_layer_target_nodes_indexes: torch.LongTensor,
        layer_argument: _typing.Any,
        *args,
        **kwargs
    ) -> _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]:
        """
        Sample edges for one specific layer, expected to be implemented in subclass.

        Parameters
        ------------
        __current_layer_target_nodes_indexes:
            target nodes for current layer
        __top_layer_target_nodes_indexes:
            target nodes for top layer
        layer_argument:
            argument for current layer
        args:
            remaining positional arguments
        kwargs:
            remaining keyword arguments

        Returns
        --------
        edge_id_in_integral_graph:
            the corresponding positional indexes for the `edge_index` of integral graph
        edge_weight:
            the optional `edge_weight` for aggregation
        """
        __wrapped_result: _typing.Tuple[
            np.ndarray, np.ndarray, np.ndarray
        ] = self.__sample_edges(
            __current_layer_target_nodes_indexes.numpy(),
            __top_layer_target_nodes_indexes.numpy(),
            layer_argument,
        )
        _sampled_edges_indexes: torch.Tensor = torch.from_numpy(__wrapped_result[0])
        _selected_source_nodes: torch.Tensor = torch.from_numpy(__wrapped_result[1])
        _selected_source_nodes_probabilities: torch.Tensor = torch.from_numpy(
            __wrapped_result[2]
        )

        """ Multiply corresponding discount weights """
        __selected_source_node_probability_mapping: _typing.Dict[int, float] = dict(
            zip(
                _selected_source_nodes.tolist(),
                _selected_source_nodes_probabilities.tolist(),
            )
        )
        _selected_edges_weight: torch.Tensor = self.__edge_weight[
            _sampled_edges_indexes
        ]
        _selected_edges_weight: torch.Tensor = _selected_edges_weight / torch.tensor(
            [
                __selected_source_node_probability_mapping.get(
                    _current_source_node_index
                )
                for _current_source_node_index in self._edge_index[
                    0, _sampled_edges_indexes
                ].tolist()
            ]
        )

        """ Normalize edge weight for selected edges by target nodes """
        for _current_target_node_index in (
            self._edge_index[1, _sampled_edges_indexes].unique().tolist()
        ):
            _current_mask_for_selected_edges: torch.BoolTensor = (
                self._edge_index[1, _sampled_edges_indexes]
                == _current_target_node_index
            )
            _selected_edges_weight[
                _current_mask_for_selected_edges
            ] = _selected_edges_weight[_current_mask_for_selected_edges] / torch.sum(
                _selected_edges_weight[_current_mask_for_selected_edges]
            )

        _sampled_edges_indexes: _typing.Union[
            torch.LongTensor, torch.Tensor
        ] = _sampled_edges_indexes
        return _sampled_edges_indexes, _selected_edges_weight
--- a/autogl/module/train/sampling/sampler/neighbor_sampler.py
+++ b/autogl/module/train/sampling/sampler/neighbor_sampler.py
@@ -0,0 +1,218 @@
 import typing as _typing
 import torch.utils.data
 import torch_geometric
 from .target_dependant_sampler import TargetDependantSampler, TargetDependantSampledData


 class NeighborSampler(TargetDependantSampler, _typing.Iterable):
    """
    The neighbor sampler from the `"Inductive Representation Learning on
    Large Graphs" <https://arxiv.org/abs/1706.02216>`_ literature, which allows
    for mini-batch training of GNNs on large-scale graphs where full-batch
    training is not feasible.

    Arguments
    ------------
    edge_index:
        A :obj:`torch.LongTensor` that defines the underlying graph
        connectivity/message passing flow.
        :obj:`edge_index` holds the indices of a (sparse) adjacency matrix.
        If :obj:`edge_index` is of type :obj:`torch.LongTensor`, its shape
        must be defined as :obj:`[2, num_edges]`, where messages from nodes
        :obj:`edge_index[0]` are sent to nodes in :obj:`edge_index[1]`
        (in case :obj:`flow="source_to_target"`).
    target_nodes_indexes:
        indexes of target nodes to learn representation.
    sampling_sizes:
        The number of neighbors to sample for each node in each layer.
        If set to :obj:`sampling_sizes[l] = -1`, all neighbors are included in layer :obj:`l`.
    batch_size:
        number of target nodes for each mini-batch.
    num_workers:
        num_workers argument for inner :class:`torch.utils.data.DataLoader`
    shuffle:
        whether to shuffle target nodes for mini-batches.
    """

    class _SequenceDataset(torch.utils.data.Dataset):
        def __init__(self, sequence):
            self.__sequence = sequence

        def __len__(self):
            return len(self.__sequence)

        def __getitem__(self, idx):
            return self.__sequence[idx]

    @classmethod
    def __compute_edge_weight(cls, edge_index: torch.LongTensor) -> torch.Tensor:
        __num_nodes = max(int(edge_index[0].max()), int(edge_index[1].max())) + 1
        __out_degree: torch.LongTensor = torch_geometric.utils.degree(
            edge_index[0], __num_nodes
        )
        __in_degree: torch.LongTensor = torch_geometric.utils.degree(
            edge_index[1], __num_nodes
        )
        temp_tensor: torch.Tensor = torch.stack(
            [__out_degree[edge_index[0]], __in_degree[edge_index[1]]]
        )
        temp_tensor: torch.Tensor = torch.pow(temp_tensor, -0.5)
        temp_tensor[torch.isinf(temp_tensor)] = 0.0
        return temp_tensor[0] * temp_tensor[1]

    def __init__(
        self,
        edge_index: torch.LongTensor,
        target_nodes_indexes: torch.LongTensor,
        sampling_sizes: _typing.Sequence[int],
        batch_size: int = 1,
        num_workers: int = 0,
        shuffle: bool = True,
        **kwargs
    ):
        def is_deterministic(__cached: bool = bool(kwargs.get("cached", True))) -> bool:
            if not __cached:
                return False
            _deterministic: bool = True
            for _sampling_size in sampling_sizes:
                if type(_sampling_size) != int:
                    raise TypeError(
                        "The sampling_sizes argument must be a sequence of integer"
                    )
                if _sampling_size >= 0:
                    _deterministic = False
                    break
            return _deterministic

        self.__edge_weight: torch.Tensor = self.__compute_edge_weight(edge_index)
        self.__pyg_neighbor_sampler: torch_geometric.data.NeighborSampler = (
            torch_geometric.data.NeighborSampler(
                edge_index,
                list(sampling_sizes[::-1]),
                target_nodes_indexes,
                transform=self._transform,
                batch_size=batch_size,
                num_workers=num_workers,
                shuffle=shuffle,
                **kwargs
            )
        )

        if is_deterministic():
            pyg_neighbor_sampler: _typing.Iterable = self.__pyg_neighbor_sampler
            self.__cached_sampled_data_list: _typing.Optional[
                _typing.List[TargetDependantSampledData]
            ] = [sampled_data for sampled_data in pyg_neighbor_sampler]
        else:
            self.__cached_sampled_data_list: _typing.Optional[
                _typing.List[TargetDependantSampledData]
            ] = None

    def _transform(
        self,
        batch_size: int,
        n_id: torch.LongTensor,
        adj_or_adj_list: _typing.Union[
            _typing.Sequence[
                _typing.Tuple[
                    torch.LongTensor, torch.LongTensor, _typing.Tuple[int, int]
                ]
            ],
            _typing.Tuple[torch.LongTensor, torch.LongTensor, _typing.Tuple[int, int]],
        ],
    ) -> TargetDependantSampledData:
        if (
            isinstance(adj_or_adj_list[0], _typing.Tuple)
            and isinstance(adj_or_adj_list, _typing.Sequence)
            and not isinstance(adj_or_adj_list, _typing.Tuple)
        ):
            return TargetDependantSampledData(
                [
                    (
                        current_layer[0],
                        current_layer[1],
                        self.__edge_weight[current_layer[1]],
                    )
                    for current_layer in adj_or_adj_list
                ],
                (torch.arange(batch_size, dtype=torch.long).long(), n_id[:batch_size]),
                n_id,
            )
        elif (
            isinstance(adj_or_adj_list, _typing.Tuple)
            and type(adj_or_adj_list[0]) == torch.Tensor
        ):
            adj_or_adj_list: _typing.Tuple[
                torch.LongTensor, torch.LongTensor, _typing.Tuple[int, int]
            ] = adj_or_adj_list
            return TargetDependantSampledData(
                [
                    (
                        adj_or_adj_list[0],
                        adj_or_adj_list[1],
                        self.__edge_weight[adj_or_adj_list[1]],
                    )
                ],
                (torch.arange(batch_size, dtype=torch.long).long(), n_id[:batch_size]),
                n_id,
            )

    def __iter__(self):
        if self.__cached_sampled_data_list is not None and isinstance(
            self.__cached_sampled_data_list, _typing.Sequence
        ):
            return iter(
                torch.utils.data.DataLoader(
                    self._SequenceDataset(self.__cached_sampled_data_list),
                    collate_fn=lambda x: x[0],
                )
            )
        else:
            return iter(self.__pyg_neighbor_sampler)

    @classmethod
    def create_basic_sampler(
        cls,
        edge_index: torch.LongTensor,
        target_nodes_indexes: torch.LongTensor,
        layer_wise_arguments: _typing.Sequence,
        batch_size: int = 1,
        num_workers: int = 1,
        shuffle: bool = True,
        *args,
        **kwargs
    ) -> TargetDependantSampler:
        """
        A static factory method to create instance of :class:`NeighborSampler`

        Arguments
        ------------
        edge_index:
            A :obj:`torch.LongTensor` that defines the underlying graph
            connectivity/message passing flow.
            :obj:`edge_index` holds the indices of a (sparse) adjacency matrix.
            If :obj:`edge_index` is of type :obj:`torch.LongTensor`, its shape
            must be defined as :obj:`[2, num_edges]`, where messages from nodes
            :obj:`edge_index[0]` are sent to nodes in :obj:`edge_index[1]`
            (in case :obj:`flow="source_to_target"`).
        target_nodes_indexes:
            indexes of target nodes to learn representation.
        layer_wise_arguments:
            The number of neighbors to sample for each node in each layer.
            If set to :obj:`sampling_sizes[l] = -1`, all neighbors are included in layer :obj:`l`.
        batch_size:
            number of target nodes for each mini-batch.
        num_workers:
            num_workers argument for inner :class:`torch.utils.data.DataLoader`
        shuffle:
            whether to shuffle target nodes for mini-batches.
        """
        return cls(
            edge_index,
            target_nodes_indexes,
            layer_wise_arguments,
            batch_size,
            num_workers,
            shuffle,
            **kwargs
        )
--- a/autogl/module/train/sampling/sampler/target_dependant_sampler.py
+++ b/autogl/module/train/sampling/sampler/target_dependant_sampler.py
@@ -0,0 +1,376 @@
 import torch.utils.data
 import typing as _typing


 class TargetDependantSampledData:
    """
    A uniform aggregation of sampled data for one mini-batch,
    generally sampler by target-dependent sampler.
    Node-wise Sampling and Layer-wise Sampling techniques are definitely target-dependent,
    for which each sampled subgraph depends on the corresponding target nodes.
    Besides, the Subgraph-wise Sampling mechanism can also be treated as target-dependent,
    however, each set of target nodes for Subgraph-wise Sampling is determined by the sampled graph.

    Parameters
    ------------
    sampled_edges_for_layers:
        A sequence of tuple denoted as
        `( edge_index_for_sampled_graph, edge_id_in_integral_graph, (optional)edge_weight )`,
        where the `edge_index_for_sampled_graph` represents the sampled `edge_index` for sampled subgraph,
        the `edge_id_in_integral_graph` represents
        the corresponding positional indexes for the `edge_index` of integral graph,
        and the optional `edge_weight` for aggregation can also be provided.
    target_nodes_indexes:
        A tuple consists of (`torch.Tensor`, `torch.Tensor`),
        in which the first element represents the indexes of target nodes in sampled subgraph,
        and the second element represents the indexes of target nodes in the integral graph.
    all_sampled_nodes_indexes:
        Indexes of all sampled nodes for mini-batch.

    Attributes
    ------------
    target_nodes_indexes:
        A combined aggregation composed of
        `indexes_in_sampled_graph` and `indexes_in_integral_graph`
    all_sampled_nodes_indexes:
        Indexes of all sampled nodes for mini-batch.
    sampled_edges_for_layers:
        The stored sequence of tuple
        `( edge_index_for_sampled_graph, edge_id_in_integral_graph, (optional)edge_weight )`.
    """

    class _LayerSampledEdgeData:
        def __init__(
            self,
            edge_index_for_sampled_graph: torch.Tensor,
            edge_id_in_integral_graph: torch.Tensor,
            edge_weight: _typing.Optional[torch.Tensor],
        ):
            self.__edge_index_for_sampled_graph: torch.Tensor = (
                edge_index_for_sampled_graph
            )
            self.__edge_id_in_integral_graph: torch.Tensor = edge_id_in_integral_graph
            self.__edge_weight: _typing.Optional[torch.Tensor] = edge_weight

        @property
        def edge_index_for_sampled_graph(self) -> torch.LongTensor:
            edge_index_for_sampled_graph: _typing.Any = (
                self.__edge_index_for_sampled_graph
            )
            return edge_index_for_sampled_graph

        @property
        def edge_id_in_integral_graph(self) -> torch.LongTensor:
            edge_id_in_integral_graph: _typing.Any = self.__edge_id_in_integral_graph
            return edge_id_in_integral_graph

        @property
        def edge_weight(self) -> _typing.Optional[torch.Tensor]:
            return self.__edge_weight

    class _TargetNodes:
        @property
        def indexes_in_sampled_graph(self) -> torch.LongTensor:
            indexes_in_sampled_graph: _typing.Any = self.__indexes_in_sampled_graph
            return indexes_in_sampled_graph

        @property
        def indexes_in_integral_graph(self) -> torch.LongTensor:
            indexes_in_integral_graph: _typing.Any = self.__indexes_in_integral_graph
            return indexes_in_integral_graph

        def __init__(
            self,
            indexes_in_sampled_graph: torch.Tensor,
            indexes_in_integral_graph: torch.Tensor,
        ):
            self.__indexes_in_sampled_graph: torch.Tensor = indexes_in_sampled_graph
            self.__indexes_in_integral_graph: torch.Tensor = indexes_in_integral_graph

    @property
    def target_nodes_indexes(self) -> _TargetNodes:
        """ indexes of target nodes in the integral graph """
        return self.__target_nodes_indexes

    @property
    def all_sampled_nodes_indexes(self) -> torch.LongTensor:
        """ indexes of all sampled nodes in the integral graph """
        all_sampled_nodes_indexes: _typing.Any = self.__all_sampled_nodes_indexes
        return all_sampled_nodes_indexes

    @property
    def sampled_edges_for_layers(self) -> _typing.Sequence[_LayerSampledEdgeData]:
        return self.__sampled_edges_for_layers

    def __init__(
        self,
        sampled_edges_for_layers: _typing.Sequence[
            _typing.Tuple[torch.Tensor, torch.Tensor, _typing.Optional[torch.Tensor]]
        ],
        target_nodes_indexes: _typing.Tuple[torch.Tensor, torch.Tensor],
        all_sampled_nodes_indexes: torch.Tensor,
    ):
        self.__sampled_edges_for_layers: _typing.Sequence[
            TargetDependantSampledData._LayerSampledEdgeData
        ] = [
            self._LayerSampledEdgeData(item[0], item[1], item[2])
            for item in sampled_edges_for_layers
        ]
        self.__target_nodes_indexes: TargetDependantSampledData._TargetNodes = (
            self._TargetNodes(target_nodes_indexes[0], target_nodes_indexes[1])
        )
        self.__all_sampled_nodes_indexes: torch.Tensor = all_sampled_nodes_indexes


 class TargetDependantSampler(torch.utils.data.DataLoader, _typing.Iterable):
    """
    An abstract base class for various target-dependent sampler
    """

    @classmethod
    def create_basic_sampler(
        cls,
        edge_index: torch.LongTensor,
        target_nodes_indexes: torch.LongTensor,
        layer_wise_arguments: _typing.Sequence,
        batch_size: int = 1,
        num_workers: int = 0,
        shuffle: bool = True,
        *args,
        **kwargs
    ) -> "TargetDependantSampler":
        """
        :param edge_index: edge index of integral graph
        :param target_nodes_indexes: indexes of target nodes in the integral graph
        :param layer_wise_arguments: layer-wise arguments for sampling
        :param batch_size: batch size for target nodes, default to 1
        :param num_workers: number of workers, default to 0
        :param shuffle: flag for shuffling, default to True
        :param args: remaining positional arguments
        :param kwargs: remaining keyword arguments
        :return: instance of TargetDependantSampler
        """
        raise NotImplementedError

    def __iter__(self):
        return super(TargetDependantSampler, self).__iter__()


 class BasicLayerWiseTargetDependantSampler(TargetDependantSampler):
    """
    The base class for various Layer-wise Sampling techniques,
    providing basic functionality of composing sampled data for mini-batches.

    Parameters
    ------------
    edge_index:
        edge index of integral graph
    target_nodes_indexes:
        indexes of target nodes in the integral graph
    layer_wise_arguments:
        layer-wise arguments for sampling
    batch_size:
        batch size for target nodes
    num_workers:
        number of workers
    shuffle:
        flag for shuffling, default to True
    kwargs:
        remaining keyword arguments
    """

    def __init__(
        self,
        edge_index: torch.LongTensor,
        target_nodes_indexes: torch.LongTensor,
        layer_wise_arguments: _typing.Sequence,
        batch_size: _typing.Optional[int] = 1,
        num_workers: int = 0,
        shuffle: bool = True,
        **kwargs
    ):
        self._edge_index: torch.LongTensor = edge_index
        self.__layer_wise_arguments: _typing.Sequence = layer_wise_arguments
        if "collate_fn" in kwargs:
            del kwargs["collate_fn"]
        super(BasicLayerWiseTargetDependantSampler, self).__init__(
            target_nodes_indexes.unique().numpy(),
            batch_size,
            shuffle,
            num_workers=num_workers,
            collate_fn=self._collate_fn,
            **kwargs
        )

    @classmethod
    def create_basic_sampler(
        cls,
        edge_index: torch.LongTensor,
        target_nodes_indexes: torch.LongTensor,
        layer_wise_arguments: _typing.Sequence,
        batch_size: int = 1,
        num_workers: int = 0,
        shuffle: bool = True,
        *args,
        **kwargs
    ) -> TargetDependantSampler:
        """
        :param edge_index: edge index of integral graph
        :param target_nodes_indexes: indexes of target nodes in the integral graph
        :param layer_wise_arguments: layer-wise arguments for sampling
        :param batch_size: batch size for target nodes
        :param num_workers: number of workers
        :param shuffle: flag for shuffling, default to True
        :param args: remaining positional arguments
        :param kwargs: remaining keyword arguments
        :return: instance of TargetDependantSampler
        """
        return BasicLayerWiseTargetDependantSampler(
            edge_index,
            target_nodes_indexes,
            layer_wise_arguments,
            batch_size,
            num_workers,
            shuffle,
            **kwargs
        )

    def _sample_edges_for_layer(
        self,
        __current_layer_target_nodes_indexes: torch.LongTensor,
        __top_layer_target_nodes_indexes: torch.LongTensor,
        layer_argument: _typing.Any,
        *args,
        **kwargs
    ) -> _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]:
        """
        Sample edges for one specific layer, expected to be implemented in subclass.

        Parameters
        ------------
        __current_layer_target_nodes_indexes:
            target nodes for current layer
        __top_layer_target_nodes_indexes:
            target nodes for top layer
        layer_argument:
            argument for current layer
        args:
            remaining positional arguments
        kwargs:
            remaining keyword arguments

        Returns
        --------
        edge_id_in_integral_graph:
            the corresponding positional indexes for the `edge_index` of integral graph
        edge_weight:
            the optional `edge_weight` for aggregation
        """
        raise NotImplementedError

    def _collate_fn(
        self, top_layer_target_nodes_indexes_list: _typing.List[int]
    ) -> TargetDependantSampledData:
        return self.__sample_layers(
            torch.tensor(top_layer_target_nodes_indexes_list).unique()
        )

    def __sample_layers(
        self, __top_layer_target_nodes_indexes: torch.LongTensor
    ) -> TargetDependantSampledData:
        sampled_edges_for_layers: _typing.List[
            _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]
        ] = list()
        __current_layer_target_nodes_indexes: torch.LongTensor = (
            __top_layer_target_nodes_indexes
        )
        " Reverse self.__layer_wise_arguments from bottom-up to top-down "
        for layer_argument in self.__layer_wise_arguments[::-1]:
            current_layer_result: _typing.Tuple[
                torch.LongTensor, _typing.Optional[torch.Tensor]
            ] = self._sample_edges_for_layer(
                __current_layer_target_nodes_indexes,
                __top_layer_target_nodes_indexes,
                layer_argument,
            )
            __source_nodes_indexes_for_current_layer: torch.Tensor = self._edge_index[
                0, current_layer_result[0]
            ]
            __current_layer_target_nodes_indexes: torch.LongTensor = (
                __source_nodes_indexes_for_current_layer.unique()
            )
            sampled_edges_for_layers.append(current_layer_result)
        """ Reverse sampled_edges_for_layers from top-down to bottom-up """
        sampled_edges_for_layers: _typing.Sequence[
            _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]
        ] = sampled_edges_for_layers[::-1]

        sampled_nodes_in_sub_graph: torch.LongTensor = torch.cat(
            [
                self._edge_index[:, current_layer_result[0]].reshape([-1])
                for current_layer_result in sampled_edges_for_layers
            ]
        ).unique()
        __sampled_nodes_in_sub_graph_mapping: _typing.Dict[int, int] = dict(
            list(
                zip(
                    sampled_nodes_in_sub_graph.tolist(),
                    range(sampled_nodes_in_sub_graph.size(0)),
                )
            )
        )

        __sampled_edge_index_for_layers_in_sub_graph: _typing.Sequence[torch.Tensor] = [
            torch.stack(
                [
                    torch.tensor(
                        [
                            __sampled_nodes_in_sub_graph_mapping.get(node_index)
                            for node_index in self._edge_index[
                                0, current_layer_result[0]
                            ].tolist()
                        ]
                    ),
                    torch.tensor(
                        [
                            __sampled_nodes_in_sub_graph_mapping.get(node_index)
                            for node_index in self._edge_index[
                                1, current_layer_result[0]
                            ].tolist()
                        ]
                    ),
                ]
            )
            for current_layer_result in sampled_edges_for_layers
        ]

        return TargetDependantSampledData(
            [
                (temp_tuple[0], temp_tuple[1][0], temp_tuple[1][1])
                for temp_tuple in zip(
                    __sampled_edge_index_for_layers_in_sub_graph,
                    sampled_edges_for_layers,
                )
            ],
            (
                torch.tensor(
                    [
                        __sampled_nodes_in_sub_graph_mapping.get(
                            current_target_node_index_in_integral_data
                        )
                        for current_target_node_index_in_integral_data in __top_layer_target_nodes_indexes.tolist()
                        if current_target_node_index_in_integral_data
                        in __sampled_nodes_in_sub_graph_mapping
                    ]
                ).long(),  # Remap
                torch.tensor(
                    [
                        current_target_node_index_in_integral_data
                        for current_target_node_index_in_integral_data in __top_layer_target_nodes_indexes.tolist()
                        if current_target_node_index_in_integral_data
                        in __sampled_nodes_in_sub_graph_mapping
                    ]
                ).long(),
            ),
            sampled_nodes_in_sub_graph,
        )
--- a/autogl/solver/init.py
+++ b/autogl/solver/init.py
@@ -2,7 +2,12 @@
 Auto solver for various graph tasks
 """

 from .classifier import AutoGraphClassifier, AutoNodeClassifier
 from .utils import Leaderboard
 from .classifier import AutoGraphClassifier, AutoNodeClassifier, AutoLinkPredictor
 from .utils import LeaderBoard

 __all__ = ["AutoNodeClassifier", "AutoGraphClassifier", "Leaderboard"]
 __all__ = [
    "AutoNodeClassifier",
    "AutoGraphClassifier",
    "AutoLinkPredictor",
    "LeaderBoard",
 ]
--- a/autogl/solver/base.py
+++ b/autogl/solver/base.py
@@ -5,21 +5,40 @@ Provide some standard solver interface.
 """

 from typing import Any, Tuple
 from copy import deepcopy

 import torch

 from ..module.feature import FEATURE_DICT
 from ..module.hpo import HPO_DICT
 from ..module.train import NodeClassificationTrainer
 from ..module import BaseFeatureAtom, BaseHPOptimizer, BaseTrainer
 from .utils import Leaderboard
 from ..module.model import MODEL_DICT
 from ..module.nas.algorithm import NAS_ALGO_DICT
 from ..module.nas.estimator import NAS_ESTIMATOR_DICT
 from ..module.nas.space import NAS_SPACE_DICT
 from ..module import BaseFeature, BaseHPOptimizer, BaseTrainer
 from .utils import LeaderBoard
 from ..utils import get_logger

 LOGGER = get_logger("BaseSolver")


 def _initialize_single_model(model_name, parameters=None):
    if parameters:
        return MODEL_DICT[model_name](**parameters)
    return MODEL_DICT[model_name]()


 def _parse_hp_space(spaces):
    if spaces is None:
        return None
    for space in spaces:
        if "cutFunc" in space and isinstance(space["cutFunc"], str):
            space["cutFunc"] = eval(space["cutFunc"])
    return spaces


 class BaseSolver:
    """
    r"""
    Base solver class, define some standard solver interfaces.

    Parameters
@@ -43,6 +62,12 @@ class BaseSolver:
        If given, will set the number eval times the hpo module will use.
        Only be effective when hpo_module is  of type ``str``. Default ``50``.

    default_trainer: str or list of str (Optional)
        Default trainer class to be used.
        If a single trainer class is given, will set all trainer to default trainer.
        If a list of trainer class is given, will set every model with corresponding trainer
        cls. Default ``None``.

    trainer_hp_space: list of dict (Optional)
        trainer hp space or list of trainer hp spaces configuration.
        If a single trainer hp is given, will specify the hp space of trainer for every model.
@@ -68,9 +93,13 @@ class BaseSolver:
        self,
        feature_module,
        graph_models,
        nas_spaces,
        nas_algorithms,
        nas_estimators,
        hpo_module,
        ensemble_module,
        max_evals=50,
        default_trainer=None,
        trainer_hp_space=None,
        model_hp_spaces=None,
        size=4,
@@ -87,15 +116,18 @@ class BaseSolver:
        elif isinstance(device, str) and (device == "cpu" or device.startswith("cuda")):
            self.runtime_device = torch.device(device)
        else:
            LOGGER.error("Cannor parse device %s", str(device))
            LOGGER.error("Cannot parse device %s", str(device))
            raise ValueError("Cannot parse device {}".format(device))

        # initialize modules
        self.graph_model_list = []
        self.set_graph_models(graph_models, trainer_hp_space, model_hp_spaces)
        self.set_graph_models(
            graph_models, default_trainer, trainer_hp_space, model_hp_spaces
        )
        self.set_feature_module(feature_module)
        self.set_hpo_module(hpo_module, max_evals=max_evals)
        self.set_ensemble_module(ensemble_module, size=size)
        self.set_nas_module(nas_algorithms, nas_spaces, nas_estimators)

        # initialize leaderboard
        self.leaderboard = None
@@ -109,12 +141,12 @@ class BaseSolver:
        *args,
        **kwargs,
    ) -> "BaseSolver":
        """
        r"""
        Set the feature module of current solver.

        Parameters
        ----------
        feature_module: autogl.module.feature.BaseFeatureAtom or str or None
        feature_module: autogl.module.feature.BaseFeature or str or None
            The (name of) auto feature engineer used to process the given dataset.
            Disable feature engineer by setting it to ``None``.

@@ -126,7 +158,7 @@ class BaseSolver:
        # load feature engineer module

        def get_feature(feature_engineer):
            if isinstance(feature_engineer, BaseFeatureAtom):
            if isinstance(feature_engineer, BaseFeature):
                return feature_engineer
            if isinstance(feature_engineer, str):
                if feature_engineer in FEATURE_DICT:
@@ -141,7 +173,7 @@ class BaseSolver:

        if feature_module is None:
            self.feature_module = None
        elif isinstance(feature_module, (BaseFeatureAtom, str)):
        elif isinstance(feature_module, (BaseFeature, str)):
            self.feature_module = get_feature(feature_module)
        elif isinstance(feature_module, list):
            self.feature_module = get_feature(feature_module[0])
@@ -159,10 +191,11 @@ class BaseSolver:
    def set_graph_models(
        self,
        graph_models,
        default_trainer=None,
        trainer_hp_space=None,
        model_hp_spaces=None,
    ) -> "BaseSolver":
        """
        r"""
        Set the graph models used in current solver.

        Parameters
@@ -170,6 +203,12 @@ class BaseSolver:
        graph_models: list of autogl.module.model.BaseModel or list of str
            The (name of) models to be optimized as backbone.

        default_trainer: str or list of str (Optional)
            Default trainer class to be used.
            If a single trainer class is given, will set all trainer to default trainer.
            If a list of trainer class is given, will set every model with corresponding trainer
            cls. Default ``None``.

        trainer_hp_space: list of dict (Optional)
            trainer hp space or list of trainer hp spaces configuration.
            If a single trainer hp is given, will specify the hp space of trainer for every model.
@@ -187,12 +226,13 @@ class BaseSolver:
            A reference of current solver.
        """
        self.gml = graph_models
        self._default_trainer = default_trainer
        self._trainer_hp_space = trainer_hp_space
        self._model_hp_spaces = model_hp_spaces
        return self

    def set_hpo_module(self, hpo_module, *args, **kwargs) -> "BaseSolver":
        """
        r"""
        Set the hpo module used in current solver.

        Parameters
@@ -223,9 +263,105 @@ class BaseSolver:
                type(hpo_module),
                "instead.",
            )
        return self

    def set_ensemble_module(self, ensemble_module, *args, **kwargs) -> "BaseSolver":
    def set_nas_module(
        self, nas_algorithms=None, nas_spaces=None, nas_estimators=None
    ) -> "BaseSolver":
        """
        Set the neural architecture search module in current solver.

        Parameters
        ----------
        nas_spaces: (list of) `autogl.module.hpo.nas.GraphSpace`
            The search space of nas. You can pass a list of space to enable
            multiple space search. If list passed, the length of `nas_spaces`,
            `nas_algorithms` and `nas_estimators` should be the same. If set
            to `None`, will disable the whole nas module.

        nas_algorithms: (list of) `autogl.module.hpo.nas.BaseNAS`
            The search algorithm of nas. You can pass a list of algorithms
            to enable multiple algorithms search. If list passed, the length of
            `nas_spaces`, `nas_algorithms` and `nas_estimators` should be the same.
            Default `None`.

        nas_estimators: (list of) `autogl.module.hpo.nas.BaseEstimators`
            The nas estimators. You can pass a list of estimators to enable multiple
            estimators search. If list passed, the length of `nas_spaces`, `nas_algorithms`
            and `nas_estimators` should be the same. Default `None`.

        Returns
        -------
        self: autogl.solver.BaseSolver
            A reference of current solver.
        """
        if nas_algorithms is None and nas_estimators is None and nas_spaces is None:
            self.nas_algorithms = self.nas_estimators = self.nas_spaces = None
            return
        assert None not in [
            nas_algorithms,
            nas_estimators,
            nas_spaces,
        ], "The algorithms, estimators and spaces should all be set"

        nas_algorithms = (
            nas_algorithms
            if isinstance(nas_algorithms, (list, tuple))
            else [nas_algorithms]
        )
        nas_spaces = (
            nas_spaces if isinstance(nas_spaces, (list, tuple)) else [nas_spaces]
        )
        nas_estimators = (
            nas_estimators
            if isinstance(nas_estimators, (list, tuple))
            else [nas_estimators]
        )

        # parse all str elements
        nas_algorithms = [
            algo if not isinstance(algo, str) else NAS_ALGO_DICT[algo]()
            for algo in nas_algorithms
        ]
        nas_spaces = [
            space if not isinstance(space, str) else NAS_SPACE_DICT[space]()
            for space in nas_spaces
        ]
        nas_estimators = [
            estimator
            if not isinstance(estimator, str)
            else NAS_ESTIMATOR_DICT[estimator]()
            for estimator in nas_estimators
        ]

        max_number = max([len(x) for x in [nas_algorithms, nas_spaces, nas_estimators]])
        assert all(
            [
                len(x) in [1, max_number]
                for x in [nas_algorithms, nas_spaces, nas_estimators]
            ]
        ), "lengths of algorithms/spaces/estimators do not match!"

        self.nas_algorithms = (
            [deepcopy(nas_algorithms) for _ in range(max_number)]
            if len(nas_algorithms) == 1 and max_number > 1
            else nas_algorithms
        )
        self.nas_spaces = (
            [deepcopy(nas_spaces) for _ in range(max_number)]
            if len(nas_spaces) == 1 and max_number > 1
            else nas_spaces
        )
        self.nas_estimators = (
            [deepcopy(nas_estimators) for _ in range(max_number)]
            if len(nas_estimators) == 1 and max_number > 1
            else nas_estimators
        )

        return self

    def set_ensemble_module(self, ensemble_module, *args, **kwargs) -> "BaseSolver":
        r"""
        Set the ensemble module used in current solver.

        Parameters
@@ -243,7 +379,7 @@ class BaseSolver:
        raise NotImplementedError()

    def fit(self, *args, **kwargs) -> "BaseSolver":
        """
        r"""
        Fit current solver on given dataset.

        Returns
@@ -254,7 +390,7 @@ class BaseSolver:
        raise NotImplementedError()

    def fit_predict(self, *args, **kwargs) -> Any:
        """
        r"""
        Fit current solver on given dataset and return the predicted value.

        Returns
@@ -265,7 +401,7 @@ class BaseSolver:
        raise NotImplementedError()

    def predict(self, *args, **kwargs) -> Any:
        """
        r"""
        Predict the node class number.

        Returns
@@ -275,8 +411,8 @@ class BaseSolver:
        """
        raise NotImplementedError()

    def get_leaderboard(self) -> Leaderboard:
        """
    def get_leaderboard(self) -> LeaderBoard:
        r"""
        Get the current leaderboard of this solver.

        Returns
@@ -287,7 +423,7 @@ class BaseSolver:
        return self.leaderboard

    def get_model_by_name(self, name) -> BaseTrainer:
        """
        r"""
        Find and get the model instance by name.

        Parameters
@@ -303,8 +439,8 @@ class BaseSolver:
        assert name in self.trained_models, "cannot find model by name" + name
        return self.trained_models[name]

    def get_model_by_performance(self, index) -> Tuple[NodeClassificationTrainer, str]:
        """
    def get_model_by_performance(self, index) -> Tuple[BaseTrainer, str]:
        r"""
        Find and get the model instance by performance.

        Parameters
@@ -314,7 +450,7 @@ class BaseSolver:

        Returns
        -------
        trainer: autogl.module.train.NodeClassificationTrainer
        trainer: autogl.module.train.BaseTrainer
            A trainer instance containing the trained models and training status.
        name: str
            The name of current trainer.
@@ -324,7 +460,7 @@ class BaseSolver:

    @classmethod
    def from_config(cls, path_or_dict, filetype="auto") -> "BaseSolver":
        """
        r"""
        Load solver from config file.

        You can use this function to directly load a solver from predefined config dict
--- a/autogl/solver/classifier/init.py
+++ b/autogl/solver/classifier/init.py
@@ -5,5 +5,11 @@ Auto classifier for classification problems.
 from .base import BaseClassifier
 from .graph_classifier import AutoGraphClassifier
 from .node_classifier import AutoNodeClassifier
 from .link_predictor import AutoLinkPredictor

 __all__ = ["BaseClassifier", "AutoGraphClassifier", "AutoNodeClassifier"]
 __all__ = [
    "BaseClassifier",
    "AutoGraphClassifier",
    "AutoNodeClassifier",
    "AutoLinkPredictor",
 ]
--- a/autogl/solver/classifier/graph_classifier.py
+++ b/autogl/solver/classifier/graph_classifier.py
@@ -12,10 +12,10 @@ import yaml

 from .base import BaseClassifier
 from ...module.feature import FEATURE_DICT
 from ...module.model import MODEL_DICT
 from ...module.train import TRAINER_DICT, get_feval
 from ...module import BaseModel
 from ..utils import Leaderboard, set_seed
 from ...module.model import BaseModel, MODEL_DICT
 from ...module.train import TRAINER_DICT, get_feval, BaseGraphClassificationTrainer
 from ..base import _initialize_single_model, _parse_hp_space
 from ..utils import LeaderBoard, set_seed
 from ...datasets import utils
 from ...utils import get_logger

@@ -74,9 +74,13 @@ class AutoGraphClassifier(BaseClassifier):
        self,
        feature_module=None,
        graph_models=["gin", "topkpool"],
        # nas_algorithms=None,
        # nas_spaces=None,
        # nas_estimators=None,
        hpo_module="anneal",
        ensemble_module="voting",
        max_evals=50,
        default_trainer=None,
        trainer_hp_space=None,
        model_hp_spaces=None,
        size=4,
@@ -86,9 +90,13 @@ class AutoGraphClassifier(BaseClassifier):
        super().__init__(
            feature_module=feature_module,
            graph_models=graph_models,
            nas_algorithms=None,  # nas_algorithms,
            nas_spaces=None,  # nas_spaces,
            nas_estimators=None,  # nas_estimators,
            hpo_module=hpo_module,
            ensemble_module=ensemble_module,
            max_evals=max_evals,
            default_trainer=default_trainer or "GraphClassificationFull",
            trainer_hp_space=trainer_hp_space,
            model_hp_spaces=model_hp_spaces,
            size=size,
@@ -100,23 +108,25 @@ class AutoGraphClassifier(BaseClassifier):
    def _init_graph_module(
        self,
        graph_models,
        num_features,
        num_classes,
        *args,
        **kwargs,
        num_features,
        feval,
        device,
        loss,
        num_graph_features,
    ) -> "AutoGraphClassifier":
        # load graph network module
        self.graph_model_list = []
        if isinstance(graph_models, list):
        if isinstance(graph_models, (list, tuple)):
            for model in graph_models:
                if isinstance(model, str):
                    if model in MODEL_DICT:
                        self.graph_model_list.append(
                            MODEL_DICT[model](
                                num_features=num_features,
                                num_classes=num_classes,
                                *args,
                                **kwargs,
                                num_features=num_features,
                                num_graph_features=num_graph_features,
                                device=device,
                                init=False,
                            )
                        )
@@ -125,56 +135,94 @@ class AutoGraphClassifier(BaseClassifier):
                elif isinstance(model, type) and issubclass(model, BaseModel):
                    self.graph_model_list.append(
                        model(
                            num_features=num_features,
                            num_classes=num_classes,
                            *args,
                            **kwargs,
                            num_features=num_features,
                            num_graph_features=num_graph_features,
                            device=device,
                            init=False,
                        )
                    )
                elif isinstance(model, BaseModel):
                    model.set_num_features(num_features)
                    # setup the hp of num_classes and num_features
                    model.set_num_classes(num_classes)
                    model.set_num_graph_features(
                        0
                        if "num_graph_features" not in kwargs
                        else kwargs["num_graph_features"]
                    model.set_num_features(num_features)
                    model.set_num_graph_features(num_graph_features)
                    self.graph_model_list.append(model.to(device))
                elif isinstance(model, BaseGraphClassificationTrainer):
                    # receive a trainer list, put trainer to list
                    assert (
                        model.get_model() is not None
                    ), "Passed trainer should contain a model"
                    model.model.set_num_classes(num_classes)
                    model.model.set_num_features(num_features)
                    model.model.set_num_graph_features(num_graph_features)
                    model.update_parameters(
                        num_classes=num_classes,
                        num_features=num_features,
                        num_graph_features=num_graph_features,
                        loss=loss,
                        feval=feval,
                        device=device,
                    )
                    self.graph_model_list.append(model)
                else:
                    raise KeyError("cannot find graph network %s." % (model))
        else:
            raise ValueError(
                "need graph network to be str or a BaseModel class/instance, get",
                "need graph network to be (list of) str or a BaseModel class/instance, get",
                graph_models,
                "instead.",
            )

        # wrap all model_cls with specified trainer
        for i, model in enumerate(self.graph_model_list):
            # set model hp space
            if self._model_hp_spaces is not None:
                if self._model_hp_spaces[i] is not None:
                    model.hyper_parameter_space = self._model_hp_spaces[i]
            trainer = TRAINER_DICT["GraphClassification"](
                model=model,
                num_features=num_features,
                num_classes=num_classes,
                *args,
                **kwargs,
                init=False,
            )
                    if isinstance(model, BaseGraphClassificationTrainer):
                        model.model.hyper_parameter_space = self._model_hp_spaces[i]
                    else:
                        model.hyper_parameter_space = self._model_hp_spaces[i]
            # initialize trainer if needed
            if isinstance(model, BaseModel):
                name = (
                    self._default_trainer
                    if isinstance(self._default_trainer, str)
                    else self._default_trainer[i]
                )
                model = TRAINER_DICT[name](
                    model=model,
                    num_features=num_features,
                    num_classes=num_classes,
                    loss=loss,
                    feval=feval,
                    device=device,
                    num_graph_features=num_graph_features,
                    init=False,
                )
            # set trainer hp space
            if self._trainer_hp_space is not None:
                if isinstance(self._trainer_hp_space[0], list):
                    current_hp_for_trainer = self._trainer_hp_space[i]
                else:
                    current_hp_for_trainer = self._trainer_hp_space
                trainer.hyper_parameter_space = (
                    current_hp_for_trainer + model.hyper_parameter_space
                )
            self.graph_model_list[i] = trainer
                model.hyper_parameter_space = current_hp_for_trainer
            self.graph_model_list[i] = model

        return self

    """
    # currently disabled
    def _init_nas_module(
        self, num_features, num_classes, num_graph_features, feval, device, loss
    ):
        for algo, space, estimator in zip(
            self.nas_algorithms, self.nas_spaces, self.nas_estimators
        ):
            # TODO: initialize important parameters
            pass
    """

    # pylint: disable=arguments-differ
    def fit(
        self,
@@ -183,8 +231,6 @@ class AutoGraphClassifier(BaseClassifier):
        inplace=False,
        train_split=None,
        val_split=None,
        cross_validation=True,
        cv_split=10,
        evaluation_method="infer",
        seed=None,
    ) -> "AutoGraphClassifier":
@@ -214,13 +260,6 @@ class AutoGraphClassifier(BaseClassifier):
            use default train/val/test split in dataset, please set this to ``None``.
            Default ``None``.

        cross_validation: bool
            Whether to use cross validation to fit on train dataset. Default ``True``.

        cv_split: int
            The cross validation split number. Only be effective when ``cross_validation=True``.
            Default ``10``.

        evaluation_method: (list of) str autogl.module.train.evaluation
            A (list of) evaluation method for current solver. If ``infer``, will automatically
            determine. Default ``infer``.
@@ -254,7 +293,7 @@ class AutoGraphClassifier(BaseClassifier):
        assert isinstance(evaluation_method, list)
        evaluator_list = get_feval(evaluation_method)

        self.leaderboard = Leaderboard(
        self.leaderboard = LeaderBoard(
            [e.get_eval_name() for e in evaluator_list],
            {e.get_eval_name(): e.is_higher_better() for e in evaluator_list},
        )
@@ -266,18 +305,11 @@ class AutoGraphClassifier(BaseClassifier):
                "Please manually pass train and val ratio."
            )
            LOGGER.info("Use the default train/val/test ratio in given dataset")
            if hasattr(dataset.train_split, "n_splits"):
                cross_validation = True
            # if hasattr(dataset.train_split, "n_splits"):
            #    cross_validation = True

        elif train_split is not None and val_split is not None:
            utils.graph_random_splits(dataset, train_split, val_split, seed=seed)
            if cross_validation:
                assert (
                    val_split > 0
                ), "You should set val_split > 0 to use cross_validation"
                utils.graph_cross_validation(
                    dataset.train_split, cv_split, random_seed=seed
                )
        else:
            LOGGER.error(
                "Please set both train_split and val_split explicitly. Detect %s is None.",
@@ -314,91 +346,65 @@ class AutoGraphClassifier(BaseClassifier):
            else dataset.data.gf.size(1),
        )

        # currently disabled
        """
        self._init_nas_module(
            num_features=dataset.num_node_features,
            num_classes=dataset.num_classes,
            feval=evaluator_list,
            device=self.runtime_device,
            loss="cross_entropy" if not hasattr(dataset, "loss") else dataset.loss,
            num_graph_features=0
            if not hasattr(dataset.data, "gf")
            else dataset.data.gf.size(1),
        )

        # neural architecture search
        if self.nas_algorithms is not None:
            # perform nas and add them to trainer list
            for algo, space, estimator in zip(
                self.nas_algorithms, self.nas_spaces, self.nas_estimators
            ):
                trainer = algo.search(space, self.dataset, estimator)
                self.graph_model_list.append(trainer)
        """

        # train the models and tune hpo
        result_valid = []
        names = []
        if not cross_validation:
            for idx, model in enumerate(self.graph_model_list):
                if time_limit < 0:
                    time_for_each_model = None
                else:
                    time_for_each_model = (time_limit - time.time() + time_begin) / (
                        len(self.graph_model_list) - idx
                    )
                if self.hpo_module is None:
                    model.initialize()
                    model.train(dataset, True)
                    optimized = model
                else:
                    optimized, _ = self.hpo_module.optimize(
                        trainer=model, dataset=dataset, time_limit=time_for_each_model
                    )
                # to save memory, all the trainer derived will be mapped to cpu
                optimized.to(torch.device("cpu"))
                name = optimized.get_name_with_hp()
                names.append(name)
                performance_on_valid, _ = optimized.get_valid_score(return_major=False)
                result_valid.append(
                    optimized.get_valid_predict_proba().detach().cpu().numpy()
        for idx, model in enumerate(self.graph_model_list):
            if time_limit < 0:
                time_for_each_model = None
            else:
                time_for_each_model = (time_limit - time.time() + time_begin) / (
                    len(self.graph_model_list) - idx
                )
                self.leaderboard.insert_model_performance(
                    name,
                    dict(
                        zip(
                            [e.get_eval_name() for e in evaluator_list],
                            performance_on_valid,
                        )
                    ),
            if self.hpo_module is None:
                model.initialize()
                model.train(dataset, True)
                optimized = model
            else:
                optimized, _ = self.hpo_module.optimize(
                    trainer=model, dataset=dataset, time_limit=time_for_each_model
                )
                self.trained_models[name] = optimized
        else:
            for i in range(dataset.train_split.n_splits):
                utils.graph_set_fold_id(dataset.train_split, i)
                if time_limit < 0:
                    time_for_each_cv = None
                else:
                    time_for_each_cv = (time_limit - time.time() + time_begin) / (
                        dataset.train_split.n_splits - i
                    )
                time_cv_begin = time.time()
                for idx, model in enumerate(self.graph_model_list):
                    if time_for_each_cv is None:
                        time_for_each_model = None
                    else:
                        time_for_each_model = (
                            time_for_each_cv - time.time() + time_cv_begin
                        ) / (len(self.graph_model_list) - idx)
                    if self.hpo_module is None:
                        model.train(dataset.train_split, False)
                        optimized = model
                    else:
                        optimized, _ = self.hpo_module.optimize(
                            trainer=model,
                            dataset=dataset.train_split,
                            time_limit=time_for_each_model,
                        )
                    # to save memory, all the trainer derived will be mapped to cpu
                    optimized.to(torch.device("cpu"))
                    name = optimized.get_name_with_hp() + "_cv%d_idx%d" % (i, idx)
                    names.append(name)
                    # evaluate on val_split of input dataset
                    performance_on_valid = optimized.evaluate(dataset, mask="val")
                    result_valid.append(
                        optimized.predict_proba(dataset, mask="val")
                        .detach()
                        .cpu()
                        .numpy()
                    )
                    self.leaderboard.insert_model_performance(
                        name,
                        dict(
                            zip(
                                [e.get_eval_name() for e in evaluator_list],
                                performance_on_valid,
                            )
                        ),
            # to save memory, all the trainer derived will be mapped to cpu
            optimized.to(torch.device("cpu"))
            name = str(optimized)
            names.append(name)
            performance_on_valid, _ = optimized.get_valid_score(return_major=False)
            result_valid.append(
                optimized.get_valid_predict_proba().detach().cpu().numpy()
            )
            self.leaderboard.insert_model_performance(
                name,
                dict(
                    zip(
                        [e.get_eval_name() for e in evaluator_list],
                        performance_on_valid,
                    )
                    self.trained_models[name] = optimized
                ),
            )
            self.trained_models[name] = optimized

        # fit the ensemble model
        if self.ensemble_module is not None:
@@ -423,8 +429,6 @@ class AutoGraphClassifier(BaseClassifier):
        inplace=False,
        train_split=None,
        val_split=None,
        cross_validation=True,
        cv_split=10,
        evaluation_method="infer",
        seed=None,
        use_ensemble=True,
@@ -457,13 +461,6 @@ class AutoGraphClassifier(BaseClassifier):
            to use default train/val/test split in dataset, please set this to ``None``.
            Default ``None``.

        cross_validation: bool
            Whether to use cross validation to fit on train dataset. Default ``True``.

        cv_split: int
            The cross validation split number. Only be effective when ``cross_validation=True``.
            Default ``10``.

        evaluation_method: (list of) str or autogl.module.train.evaluation
            A (list of) evaluation method for current solver. If ``infer``, will automatically
            determine. Default ``infer``.
@@ -495,8 +492,6 @@ class AutoGraphClassifier(BaseClassifier):
            inplace=inplace,
            train_split=train_split,
            val_split=val_split,
            cross_validation=cross_validation,
            cv_split=cv_split,
            evaluation_method=evaluation_method,
            seed=seed,
        )
@@ -700,7 +695,7 @@ class AutoGraphClassifier(BaseClassifier):
        )
        if isinstance(path_or_dict, str):
            if filetype == "auto":
                if path_or_dict.endswith(".yaml"):
                if path_or_dict.endswith(".yaml") or path_or_dict.endswith(".yml"):
                    filetype = "yaml"
                elif path_or_dict.endswith(".json"):
                    filetype = "json"
@@ -723,49 +718,70 @@ class AutoGraphClassifier(BaseClassifier):
        # load the dictionary
        path_or_dict = deepcopy(path_or_dict)
        solver = cls(None, [], None, None)
        fe_list = path_or_dict.pop("feature", [{"name": "deepgl"}])
        fe_list_ele = []
        for feature_engineer in fe_list:
            name = feature_engineer.pop("name")
            if name is not None:
                fe_list_ele.append(FEATURE_DICT[name](**feature_engineer))
        if fe_list_ele != []:
            solver.set_feature_module(fe_list_ele)

        models = path_or_dict.pop("models", {"gcn": None, "gat": None})
        model_list = list(models.keys())
        model_hp_space = [models[m] for m in model_list]
        trainer_space = path_or_dict.pop("trainer", None)

        # parse lambda function
        if model_hp_space:
            for space in model_hp_space:
                if space is not None:
                    for keys in space:
                        if "cutFunc" in keys and isinstance(keys["cutFunc"], str):
                            keys["cutFunc"] = eval(keys["cutFunc"])

        if trainer_space:
            for space in trainer_space:
                if (
                    isinstance(space, dict)
                    and "cutFunc" in space
                    and isinstance(space["cutFunc"], str)
                ):
                    space["cutFunc"] = eval(space["cutFunc"])
                elif space is not None:
                    for keys in space:
                        if "cutFunc" in keys and isinstance(keys["cutFunc"], str):
                            keys["cutFunc"] = eval(keys["cutFunc"])

        solver.set_graph_models(model_list, trainer_space, model_hp_space)
        fe_list = path_or_dict.pop("feature", None)
        if fe_list is not None:
            fe_list_ele = []
            for feature_engineer in fe_list:
                name = feature_engineer.pop("name")
                if name is not None:
                    fe_list_ele.append(FEATURE_DICT[name](**feature_engineer))
            if fe_list_ele != []:
                solver.set_feature_module(fe_list_ele)

        models = path_or_dict.pop("models", [{"name": "gin"}, {"name": "topkpool"}])
        model_hp_space = [
            _parse_hp_space(model.pop("hp_space", None)) for model in models
        ]
        model_list = [
            _initialize_single_model(model.pop("name"), model) for model in models
        ]

        trainer = path_or_dict.pop("trainer", None)
        default_trainer = "GraphClassificationFull"
        trainer_space = None
        if isinstance(trainer, dict):
            # global default
            default_trainer = trainer.pop("name", "GraphClassificationFull")
            trainer_space = _parse_hp_space(trainer.pop("hp_space", None))
            default_kwargs = {"num_features": None, "num_classes": None}
            default_kwargs.update(trainer)
            default_kwargs["init"] = False
            for i in range(len(model_list)):
                model = model_list[i]
                trainer_wrapper = TRAINER_DICT[default_trainer](
                    model=model, **default_kwargs
                )
                model_list[i] = trainer_wrapper
        elif isinstance(trainer, list):
            # sequential trainer definition
            assert len(trainer) == len(
                model_list
            ), "The number of trainer and model does not match"
            trainer_space = []
            for i in range(len(model_list)):
                train, model = trainer[i], model_list[i]
                default_trainer = train.pop("name", "GraphClassificationFull")
                trainer_space.append(_parse_hp_space(train.pop("hp_space", None)))
                default_kwargs = {"num_features": None, "num_classes": None}
                default_kwargs.update(train)
                default_kwargs["init"] = False
                trainer_wrap = TRAINER_DICT[default_trainer](
                    model=model, **default_kwargs
                )
                model_list[i] = trainer_wrap

        solver.set_graph_models(
            model_list, default_trainer, trainer_space, model_hp_space
        )

        hpo_dict = path_or_dict.pop("hpo", {"name": "anneal"})
        name = hpo_dict.pop("name")
        solver.set_hpo_module(name, **hpo_dict)
        if hpo_dict is not None:
            name = hpo_dict.pop("name")
            solver.set_hpo_module(name, **hpo_dict)

        ensemble_dict = path_or_dict.pop("ensemble", {"name": "voting"})
        name = ensemble_dict.pop("name")
        solver.set_ensemble_module(name, **ensemble_dict)
        if ensemble_dict is not None:
            name = ensemble_dict.pop("name")
            solver.set_ensemble_module(name, **ensemble_dict)

        return solver
--- a/autogl/solver/classifier/link_predictor.py
+++ b/autogl/solver/classifier/link_predictor.py
@@ -0,0 +1,750 @@
 """
 Auto Classfier for Node Classification
 """
 import time
 import json

 from copy import deepcopy

 import torch
 import numpy as np
 import yaml

 from .base import BaseClassifier
 from ..base import _parse_hp_space, _initialize_single_model
 from ...module.feature import FEATURE_DICT
 from ...module.model import MODEL_DICT, BaseModel
 from ...module.train import TRAINER_DICT, BaseLinkPredictionTrainer
 from ...module.train import get_feval
 from ..utils import LeaderBoard, set_seed
 from ...datasets import utils
 from ...utils import get_logger

 LOGGER = get_logger("LinkPredictor")


 class AutoLinkPredictor(BaseClassifier):
    """
    Auto Link Predictor.

    Used to automatically solve the link prediction problems.

    Parameters
    ----------
    feature_module: autogl.module.feature.BaseFeatureEngineer or str or None
        The (name of) auto feature engineer used to process the given dataset. Default ``deepgl``.
        Disable feature engineer by setting it to ``None``.

    graph_models: list of autogl.module.model.BaseModel or list of str
        The (name of) models to be optimized as backbone. Default ``['gat', 'gcn']``.

    hpo_module: autogl.module.hpo.BaseHPOptimizer or str or None
        The (name of) hpo module used to search for best hyper parameters. Default ``anneal``.
        Disable hpo by setting it to ``None``.

    ensemble_module: autogl.module.ensemble.BaseEnsembler or str or None
        The (name of) ensemble module used to ensemble the multi-models found. Default ``voting``.
        Disable ensemble by setting it to ``None``.

    max_evals: int (Optional)
        If given, will set the number eval times the hpo module will use.
        Only be effective when hpo_module is ``str``. Default ``None``.

    trainer_hp_space: list of dict (Optional)
        trainer hp space or list of trainer hp spaces configuration.
        If a single trainer hp is given, will specify the hp space of trainer for every model.
        If a list of trainer hp is given, will specify every model with corrsponding
        trainer hp space.
        Default ``None``.

    model_hp_spaces: list of list of dict (Optional)
        model hp space configuration.
        If given, will specify every hp space of every passed model. Default ``None``.

    size: int (Optional)
        The max models ensemble module will use. Default ``None``.

    device: torch.device or str
        The device where model will be running on. If set to ``auto``, will use gpu when available.
        You can also specify the device by directly giving ``gpu`` or ``cuda:0``, etc.
        Default ``auto``.
    """

    def __init__(
        self,
        feature_module=None,
        graph_models=("gat", "gcn"),
        hpo_module="anneal",
        ensemble_module="voting",
        max_evals=50,
        default_trainer=None,
        trainer_hp_space=None,
        model_hp_spaces=None,
        size=4,
        device="auto",
    ):

        super().__init__(
            feature_module=feature_module,
            graph_models=graph_models,
            nas_algorithms=None,
            nas_spaces=None,
            nas_estimators=None,
            hpo_module=hpo_module,
            ensemble_module=ensemble_module,
            max_evals=max_evals,
            default_trainer=default_trainer or "LinkPredictionFull",
            trainer_hp_space=trainer_hp_space,
            model_hp_spaces=model_hp_spaces,
            size=size,
            device=device,
        )

        # data to be kept when fit
        self.dataset = None

    def _init_graph_module(
        self, graph_models, num_features, feval, device, loss
    ) -> "AutoLinkPredictor":
        # load graph network module
        self.graph_model_list = []
        if isinstance(graph_models, (list, tuple)):
            for model in graph_models:
                if isinstance(model, str):
                    if model in MODEL_DICT:
                        self.graph_model_list.append(
                            MODEL_DICT[model](
                                num_classes=1,
                                num_features=num_features,
                                device=device,
                                init=False,
                            )
                        )
                    else:
                        raise KeyError("cannot find model %s" % (model))
                elif isinstance(model, type) and issubclass(model, BaseModel):
                    self.graph_model_list.append(
                        model(
                            num_classes=1,
                            num_features=num_features,
                            device=device,
                            init=False,
                        )
                    )
                elif isinstance(model, BaseModel):
                    # setup the hp of num_classes and num_features
                    model.set_num_classes(1)
                    model.set_num_features(num_features)
                    self.graph_model_list.append(model.to(device))
                elif isinstance(model, BaseLinkPredictionTrainer):
                    # receive a trainer list, put trainer to list
                    assert (
                        model.get_model() is not None
                    ), "Passed trainer should contain a model"
                    model.model.set_num_classes(1)
                    model.model.set_num_features(num_features)
                    model.update_parameters(
                        num_classes=1,
                        num_features=num_features,
                        loss=loss,
                        feval=feval,
                        device=device,
                    )
                    self.graph_model_list.append(model)
                else:
                    raise KeyError("cannot find graph network %s." % (model))
        else:
            raise ValueError(
                "need graph network to be (list of) str or a BaseModel class/instance, get",
                graph_models,
                "instead.",
            )

        # wrap all model_cls with specified trainer
        for i, model in enumerate(self.graph_model_list):
            # set model hp space
            if self._model_hp_spaces is not None:
                if self._model_hp_spaces[i] is not None:
                    if isinstance(model, BaseLinkPredictionTrainer):
                        model.model.hyper_parameter_space = self._model_hp_spaces[i]
                    else:
                        model.hyper_parameter_space = self._model_hp_spaces[i]
            # initialize trainer if needed
            if isinstance(model, BaseModel):
                name = (
                    self._default_trainer
                    if isinstance(self._default_trainer, str)
                    else self._default_trainer[i]
                )
                model = TRAINER_DICT[name](
                    model=model,
                    num_features=num_features,
                    loss=loss,
                    feval=feval,
                    device=device,
                    init=False,
                )
            # set trainer hp space
            if self._trainer_hp_space is not None:
                if isinstance(self._trainer_hp_space[0], list):
                    current_hp_for_trainer = self._trainer_hp_space[i]
                else:
                    current_hp_for_trainer = self._trainer_hp_space
                model.hyper_parameter_space = current_hp_for_trainer
            self.graph_model_list[i] = model

        return self

    def _to_prob(self, sig_prob: np.ndarray):
        nelements = len(sig_prob)
        prob = np.zeros([nelements, 2])
        prob[:, 0] = 1 - sig_prob
        prob[:, 1] = sig_prob
        return prob

    # pylint: disable=arguments-differ
    def fit(
        self,
        dataset,
        time_limit=-1,
        inplace=False,
        train_split=None,
        val_split=None,
        evaluation_method="infer",
        seed=None,
    ) -> "AutoLinkPredictor":
        """
        Fit current solver on given dataset.

        Parameters
        ----------
        dataset: torch_geometric.data.dataset.Dataset
            The dataset needed to fit on. This dataset must have only one graph.

        time_limit: int
            The time limit of the whole fit process (in seconds). If set below 0,
            will ignore time limit. Default ``-1``.

        inplace: bool
            Whether we process the given dataset in inplace manner. Default ``False``.
            Set it to True if you want to save memory by modifying the given dataset directly.

        train_split: float or int (Optional)
            The train ratio (in ``float``) or number (in ``int``) of dataset. If you want to
            use default train/val/test split in dataset, please set this to ``None``.
            Default ``None``.

        val_split: float or int (Optional)
            The validation ratio (in ``float``) or number (in ``int``) of dataset. If you want
            to use default train/val/test split in dataset, please set this to ``None``.
            Default ``None``.

        evaluation_method: (list of) str or autogl.module.train.evaluation
            A (list of) evaluation method for current solver. If ``infer``, will automatically
            determine. Default ``infer``.

        seed: int (Optional)
            The random seed. If set to ``None``, will run everything at random.
            Default ``None``.

        Returns
        -------
        self: autogl.solver.AutoNodeClassifier
            A reference of current solver.
        """
        set_seed(seed)

        if time_limit < 0:
            time_limit = 3600 * 24
        time_begin = time.time()

        # initialize leaderboard
        if evaluation_method == "infer":
            if hasattr(dataset, "metric"):
                evaluation_method = [dataset.metric]
            else:
                num_of_label = dataset.num_classes
                if num_of_label == 2:
                    evaluation_method = ["auc"]
                else:
                    evaluation_method = ["acc"]
        assert isinstance(evaluation_method, list)
        evaluator_list = get_feval(evaluation_method)

        self.leaderboard = LeaderBoard(
            [e.get_eval_name() for e in evaluator_list],
            {e.get_eval_name(): e.is_higher_better() for e in evaluator_list},
        )

        # set up the dataset
        if train_split is not None and val_split is not None:
            utils.split_edges(dataset, train_split, val_split)
        else:
            assert all(
                [
                    hasattr(dataset.data, f"{name}")
                    for name in [
                        "train_pos_edge_index",
                        "train_neg_adj_mask",
                        "val_pos_edge_index",
                        "val_neg_edge_index",
                        "test_pos_edge_index",
                        "test_neg_edge_index",
                    ]
                ]
            ), (
                "The dataset has no default train/val split! Please manually pass "
                "train and val ratio."
            )
            LOGGER.info("Use the default train/val/test ratio in given dataset")

        # feature engineering
        if self.feature_module is not None:
            dataset = self.feature_module.fit_transform(dataset, inplace=inplace)

        self.dataset = dataset
        assert self.dataset[0].x is not None, (
            "Does not support fit on non node-feature dataset!"
            " Please add node features to dataset or specify feature engineers that generate"
            " node features."
        )

        # initialize graph networks
        self._init_graph_module(
            self.gml,
            num_features=self.dataset[0].x.shape[1],
            feval=evaluator_list,
            device=self.runtime_device,
            loss="binary_cross_entropy_with_logits"
            if not hasattr(dataset, "loss")
            else dataset.loss,
        )

        # train the models and tune hpo
        result_valid = []
        names = []
        for idx, model in enumerate(self.graph_model_list):
            time_for_each_model = (time_limit - time.time() + time_begin) / (
                len(self.graph_model_list) - idx
            )
            if self.hpo_module is None:
                model.initialize()
                model.train(self.dataset, True)
                optimized = model
            else:
                optimized, _ = self.hpo_module.optimize(
                    trainer=model, dataset=self.dataset, time_limit=time_for_each_model
                )
            # to save memory, all the trainer derived will be mapped to cpu
            optimized.to(torch.device("cpu"))
            name = optimized.get_name_with_hp() + "_idx%d" % (idx)
            names.append(name)
            performance_on_valid, _ = optimized.get_valid_score(return_major=False)
            result_valid.append(
                self._to_prob(optimized.get_valid_predict_proba().cpu().numpy())
            )
            self.leaderboard.insert_model_performance(
                name,
                dict(
                    zip(
                        [e.get_eval_name() for e in evaluator_list],
                        performance_on_valid,
                    )
                ),
            )
            self.trained_models[name] = optimized

        # fit the ensemble model
        if self.ensemble_module is not None:
            pos_edge_index, neg_edge_index = (
                self.dataset[0].val_pos_edge_index,
                self.dataset[0].val_neg_edge_index,
            )
            E = pos_edge_index.size(1) + neg_edge_index.size(1)
            link_labels = torch.zeros(E, dtype=torch.float)
            link_labels[: pos_edge_index.size(1)] = 1.0

            performance = self.ensemble_module.fit(
                result_valid,
                link_labels.detach().cpu().numpy(),
                names,
                evaluator_list,
                n_classes=dataset.num_classes,
            )
            self.leaderboard.insert_model_performance(
                "ensemble",
                dict(zip([e.get_eval_name() for e in evaluator_list], performance)),
            )

        return self

    def fit_predict(
        self,
        dataset,
        time_limit=-1,
        inplace=False,
        train_split=None,
        val_split=None,
        evaluation_method="infer",
        use_ensemble=True,
        use_best=True,
        name=None,
    ) -> np.ndarray:
        """
        Fit current solver on given dataset and return the predicted value.

        Parameters
        ----------
        dataset: torch_geometric.data.dataset.Dataset
            The dataset needed to fit on. This dataset must have only one graph.

        time_limit: int
            The time limit of the whole fit process (in seconds).
            If set below 0, will ignore time limit. Default ``-1``.

        inplace: bool
            Whether we process the given dataset in inplace manner. Default ``False``.
            Set it to True if you want to save memory by modifying the given dataset directly.

        train_split: float or int (Optional)
            The train ratio (in ``float``) or number (in ``int``) of dataset. If you want to
            use default train/val/test split in dataset, please set this to ``None``.
            Default ``None``.

        val_split: float or int (Optional)
            The validation ratio (in ``float``) or number (in ``int``) of dataset. If you want
            to use default train/val/test split in dataset, please set this to ``None``.
            Default ``None``.

        balanced: bool
            Wether to create the train/valid/test split in a balanced way.
            If set to ``True``, the train/valid will have the same number of different classes.
            Default ``False``.

        evaluation_method: (list of) str or autogl.module.train.evaluation
            A (list of) evaluation method for current solver. If ``infer``, will automatically
            determine. Default ``infer``.

        use_ensemble: bool
            Whether to use ensemble to do the predict. Default ``True``.

        use_best: bool
            Whether to use the best single model to do the predict. Will only be effective when
            ``use_ensemble`` is ``False``.
            Default ``True``.

        name: str or None
            The name of model used to predict. Will only be effective when ``use_ensemble`` and
            ``use_best`` both are ``False``.
            Default ``None``.

        Returns
        -------
        result: np.ndarray
            An array of shape ``(N,)``, where ``N`` is the number of test nodes. The prediction
            on given dataset.
        """
        self.fit(
            dataset=dataset,
            time_limit=time_limit,
            inplace=inplace,
            train_split=train_split,
            val_split=val_split,
            evaluation_method=evaluation_method,
        )
        return self.predict(
            dataset=dataset,
            inplaced=inplace,
            inplace=inplace,
            use_ensemble=use_ensemble,
            use_best=use_best,
            name=name,
        )

    def predict_proba(
        self,
        dataset=None,
        inplaced=False,
        inplace=False,
        use_ensemble=True,
        use_best=True,
        name=None,
        mask="test",
    ) -> np.ndarray:
        """
        Predict the node probability.

        Parameters
        ----------
        dataset: torch_geometric.data.dataset.Dataset or None
            The dataset needed to predict. If ``None``, will use the processed dataset passed
            to ``fit()`` instead. Default ``None``.

        inplaced: bool
            Whether the given dataset is processed. Only be effective when ``dataset``
            is not ``None``. If you pass the dataset to ``fit()`` with ``inplace=True``, and
            you pass the dataset again to this method, you should set this argument to ``True``.
            Otherwise ``False``. Default ``False``.

        inplace: bool
            Whether we process the given dataset in inplace manner. Default ``False``. Set it to
            True if you want to save memory by modifying the given dataset directly.

        use_ensemble: bool
            Whether to use ensemble to do the predict. Default ``True``.

        use_best: bool
            Whether to use the best single model to do the predict. Will only be effective when
            ``use_ensemble`` is ``False``. Default ``True``.

        name: str or None
            The name of model used to predict. Will only be effective when ``use_ensemble`` and
            ``use_best`` both are ``False``. Default ``None``.

        mask: str
            The data split to give prediction on. Default ``test``.

        Returns
        -------
        result: np.ndarray
            An array of shape ``(N,C,)``, where ``N`` is the number of test nodes and ``C`` is
            the number of classes. The prediction on given dataset.
        """
        if dataset is None:
            dataset = self.dataset
            assert dataset is not None, (
                "Please execute fit() first before" " predicting on remembered dataset"
            )
        elif not inplaced and self.feature_module is not None:
            dataset = self.feature_module.transform(dataset, inplace=inplace)

        if use_ensemble:
            LOGGER.info("Ensemble argument on, will try using ensemble model.")

        if not use_ensemble and use_best:
            LOGGER.info(
                "Ensemble argument off and best argument on, will try using best model."
            )

        if (use_ensemble and self.ensemble_module is not None) or (
            not use_best and name == "ensemble"
        ):
            # we need to get all the prediction of every model trained
            predict_result = []
            names = []
            for model_name in self.trained_models:
                predict_result.append(
                    self._to_prob(
                        self._predict_proba_by_name(dataset, model_name, mask)
                    )
                )
                names.append(model_name)
            return self.ensemble_module.ensemble(predict_result, names)[:, 1]

        if use_ensemble and self.ensemble_module is None:
            LOGGER.warning(
                "Cannot use ensemble because no ensebmle module is given. "
                "Will use best model instead."
            )

        if use_best or (use_ensemble and self.ensemble_module is None):
            # just return the best model we have found
            name = self.leaderboard.get_best_model()
            return self._predict_proba_by_name(dataset, name, mask)

        if name is not None:
            # return model performance by name
            return self._predict_proba_by_name(dataset, name, mask)

        LOGGER.error(
            "No model name is given while ensemble and best arguments are off."
        )
        raise ValueError(
            "You need to specify a model name if you do not want use ensemble and best model."
        )

    def _predict_proba_by_name(self, dataset, name, mask="test"):
        self.trained_models[name].to(self.runtime_device)
        predicted = (
            self.trained_models[name].predict_proba(dataset, mask=mask).cpu().numpy()
        )
        self.trained_models[name].to(torch.device("cpu"))
        return predicted

    def predict(
        self,
        dataset=None,
        inplaced=False,
        inplace=False,
        use_ensemble=True,
        use_best=True,
        name=None,
        mask="test",
        threshold=0.5,
    ) -> np.ndarray:
        """
        Predict the node class number.

        Parameters
        ----------
        dataset: torch_geometric.data.dataset.Dataset or None
            The dataset needed to predict. If ``None``, will use the processed dataset passed
            to ``fit()`` instead. Default ``None``.

        inplaced: bool
            Whether the given dataset is processed. Only be effective when ``dataset``
            is not ``None``. If you pass the dataset to ``fit()`` with ``inplace=True``,
            and you pass the dataset again to this method, you should set this argument
            to ``True``. Otherwise ``False``. Default ``False``.

        inplace: bool
            Whether we process the given dataset in inplace manner. Default ``False``.
            Set it to True if you want to save memory by modifying the given dataset directly.

        use_ensemble: bool
            Whether to use ensemble to do the predict. Default ``True``.

        use_best: bool
            Whether to use the best single model to do the predict. Will only be effective
            when ``use_ensemble`` is ``False``. Default ``True``.

        name: str or None
            The name of model used to predict. Will only be effective when ``use_ensemble``
            and ``use_best`` both are ``False``. Default ``None``.

        mask: str
            The data split to give prediction on. Default ``test``.

        threshold: float
            The threshold to judge whether the edges are positive or not.

        Returns
        -------
        result: np.ndarray
            An array of shape ``(N,)``, where ``N`` is the number of test nodes.
            The prediction on given dataset.
        """
        proba = self.predict_proba(
            dataset, inplaced, inplace, use_ensemble, use_best, name, mask
        )
        return (proba > threshold).astype("int")

    @classmethod
    def from_config(cls, path_or_dict, filetype="auto") -> "AutoLinkPredictor":
        """
        Load solver from config file.

        You can use this function to directly load a solver from predefined config dict
        or config file path. Currently, only support file type of ``json`` or ``yaml``,
        if you pass a path.

        Parameters
        ----------
        path_or_dict: str or dict
            The path to the config file or the config dictionary object

        filetype: str
            The filetype the given file if the path is specified. Currently only support
            ``json`` or ``yaml``. You can set to ``auto`` to automatically detect the file
            type (from file name). Default ``auto``.

        Returns
        -------
        solver: autogl.solver.AutoGraphClassifier
            The solver that is created from given file or dictionary.
        """
        assert filetype in ["auto", "yaml", "json"], (
            "currently only support yaml file or json file type, but get type "
            + filetype
        )
        if isinstance(path_or_dict, str):
            if filetype == "auto":
                if path_or_dict.endswith(".yaml") or path_or_dict.endswith(".yml"):
                    filetype = "yaml"
                elif path_or_dict.endswith(".json"):
                    filetype = "json"
                else:
                    LOGGER.error(
                        "cannot parse the type of the given file name, "
                        "please manually set the file type"
                    )
                    raise ValueError(
                        "cannot parse the type of the given file name, "
                        "please manually set the file type"
                    )
            if filetype == "yaml":
                path_or_dict = yaml.load(
                    open(path_or_dict, "r").read(), Loader=yaml.FullLoader
                )
            else:
                path_or_dict = json.load(open(path_or_dict, "r"))

        path_or_dict = deepcopy(path_or_dict)
        solver = cls(None, [], None, None)
        fe_list = path_or_dict.pop("feature", None)
        if fe_list is not None:
            fe_list_ele = []
            for feature_engineer in fe_list:
                name = feature_engineer.pop("name")
                if name is not None:
                    fe_list_ele.append(FEATURE_DICT[name](**feature_engineer))
            if fe_list_ele != []:
                solver.set_feature_module(fe_list_ele)

        models = path_or_dict.pop("models", [{"name": "gcn"}, {"name": "gat"}])
        model_hp_space = [
            _parse_hp_space(model.pop("hp_space", None)) for model in models
        ]
        model_list = [
            _initialize_single_model(model.pop("name"), model) for model in models
        ]

        trainer = path_or_dict.pop("trainer", None)
        default_trainer = "LinkPredictionFull"
        trainer_space = None
        if isinstance(trainer, dict):
            # global default
            default_trainer = trainer.pop("name", "LinkPredictionFull")
            trainer_space = _parse_hp_space(trainer.pop("hp_space", None))
            default_kwargs = {"num_features": None}
            default_kwargs.update(trainer)
            default_kwargs["init"] = False
            for i in range(len(model_list)):
                model = model_list[i]
                trainer_wrap = TRAINER_DICT[default_trainer](
                    model=model, **default_kwargs
                )
                model_list[i] = trainer_wrap
        elif isinstance(trainer, list):
            # sequential trainer definition
            assert len(trainer) == len(
                model_list
            ), "The number of trainer and model does not match"
            trainer_space = []
            for i in range(len(model_list)):
                train, model = trainer[i], model_list[i]
                default_trainer = train.pop("name", "LinkPredictionFull")
                trainer_space.append(_parse_hp_space(train.pop("hp_space", None)))
                default_kwargs = {"num_features": None}
                default_kwargs.update(train)
                default_kwargs["init"] = False
                trainer_wrap = TRAINER_DICT[default_trainer](
                    model=model, **default_kwargs
                )
                model_list[i] = trainer_wrap

        solver.set_graph_models(
            model_list, default_trainer, trainer_space, model_hp_space
        )

        hpo_dict = path_or_dict.pop("hpo", {"name": "anneal"})
        if hpo_dict is not None:
            name = hpo_dict.pop("name")
            solver.set_hpo_module(name, **hpo_dict)

        ensemble_dict = path_or_dict.pop("ensemble", {"name": "voting"})
        if ensemble_dict is not None:
            name = ensemble_dict.pop("name")
            solver.set_ensemble_module(name, **ensemble_dict)

        return solver
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -7,18 +7,25 @@ import json
 from copy import deepcopy

 import torch
 import torch.nn.functional as F
 import numpy as np
 import yaml

 from .base import BaseClassifier
 from ..base import _parse_hp_space, _initialize_single_model
 from ...module.feature import FEATURE_DICT
 from ...module.model import MODEL_DICT
 from ...module.train import TRAINER_DICT, get_feval
 from ...module import BaseModel
 from ..utils import Leaderboard, set_seed
 from ...module.model import MODEL_DICT, BaseModel
 from ...module.train import TRAINER_DICT, BaseNodeClassificationTrainer
 from ...module.train import get_feval
 from ...module.nas.space import NAS_SPACE_DICT
 from ...module.nas.algorithm import NAS_ALGO_DICT
 from ...module.nas.estimator import NAS_ESTIMATOR_DICT, BaseEstimator
 from ..utils import LeaderBoard, set_seed
 from ...datasets import utils
 from ...utils import get_logger

 from torch_geometric.nn import GATConv, GCNConv

 LOGGER = get_logger("NodeClassifier")


@@ -37,6 +44,15 @@ class AutoNodeClassifier(BaseClassifier):
    graph_models: list of autogl.module.model.BaseModel or list of str
        The (name of) models to be optimized as backbone. Default ``['gat', 'gcn']``.

    nas_algorithms: (list of) autogl.module.nas.algorithm.BaseNAS or str (Optional)
        The (name of) nas algorithms used. Default ``None``.

    nas_spaces: (list of) autogl.module.nas.space.BaseSpace or str (Optional)
        The (name of) nas spaces used. Default ``None``.

    nas_estimators: (list of) autogl.module.nas.estimator.BaseEstimator or str (Optional)
        The (name of) nas estimators used. Default ``None``.

    hpo_module: autogl.module.hpo.BaseHPOptimizer or str or None
        The (name of) hpo module used to search for best hyper parameters. Default ``anneal``.
        Disable hpo by setting it to ``None``.
@@ -69,15 +85,17 @@ class AutoNodeClassifier(BaseClassifier):
        Default ``auto``.
    """

    # pylint: disable=W0102

    def __init__(
        self,
        feature_module="deepgl",
        graph_models=["gat", "gcn"],
        feature_module=None,
        graph_models=("gat", "gcn"),
        nas_algorithms=None,
        nas_spaces=None,
        nas_estimators=None,
        hpo_module="anneal",
        ensemble_module="voting",
        max_evals=50,
        default_trainer=None,
        trainer_hp_space=None,
        model_hp_spaces=None,
        size=4,
@@ -87,9 +105,13 @@ class AutoNodeClassifier(BaseClassifier):
        super().__init__(
            feature_module=feature_module,
            graph_models=graph_models,
            nas_algorithms=nas_algorithms,
            nas_spaces=nas_spaces,
            nas_estimators=nas_estimators,
            hpo_module=hpo_module,
            ensemble_module=ensemble_module,
            max_evals=max_evals,
            default_trainer=default_trainer or "NodeClassificationFull",
            trainer_hp_space=trainer_hp_space,
            model_hp_spaces=model_hp_spaces,
            size=size,
@@ -97,19 +119,14 @@ class AutoNodeClassifier(BaseClassifier):
        )

        # data to be kept when fit
        self.data = None
        self.dataset = None

    def _init_graph_module(
        self,
        graph_models,
        num_classes,
        num_features,
        *args,
        **kwargs,
        self, graph_models, num_classes, num_features, feval, device, loss
    ) -> "AutoNodeClassifier":
        # load graph network module
        self.graph_model_list = []
        if isinstance(graph_models, list):
        if isinstance(graph_models, (list, tuple)):
            for model in graph_models:
                if isinstance(model, str):
                    if model in MODEL_DICT:
@@ -117,8 +134,7 @@ class AutoNodeClassifier(BaseClassifier):
                            MODEL_DICT[model](
                                num_classes=num_classes,
                                num_features=num_features,
                                *args,
                                **kwargs,
                                device=device,
                                init=False,
                            )
                        )
@@ -129,8 +145,7 @@ class AutoNodeClassifier(BaseClassifier):
                        model(
                            num_classes=num_classes,
                            num_features=num_features,
                            *args,
                            **kwargs,
                            device=device,
                            init=False,
                        )
                    )
@@ -138,6 +153,21 @@ class AutoNodeClassifier(BaseClassifier):
                    # setup the hp of num_classes and num_features
                    model.set_num_classes(num_classes)
                    model.set_num_features(num_features)
                    self.graph_model_list.append(model.to(device))
                elif isinstance(model, BaseNodeClassificationTrainer):
                    # receive a trainer list, put trainer to list
                    assert (
                        model.get_model() is not None
                    ), "Passed trainer should contain a model"
                    model.model.set_num_classes(num_classes)
                    model.model.set_num_features(num_features)
                    model.update_parameters(
                        num_classes=num_classes,
                        num_features=num_features,
                        loss=loss,
                        feval=feval,
                        device=device,
                    )
                    self.graph_model_list.append(model)
                else:
                    raise KeyError("cannot find graph network %s." % (model))
@@ -150,29 +180,50 @@ class AutoNodeClassifier(BaseClassifier):

        # wrap all model_cls with specified trainer
        for i, model in enumerate(self.graph_model_list):
            # set model hp space
            if self._model_hp_spaces is not None:
                if self._model_hp_spaces[i] is not None:
                    model.hyper_parameter_space = self._model_hp_spaces[i]
            trainer = TRAINER_DICT["NodeClassification"](
                model=model,
                num_features=num_features,
                num_classes=num_classes,
                *args,
                **kwargs,
                init=False,
            )
                    if isinstance(model, BaseNodeClassificationTrainer):
                        model.model.hyper_parameter_space = self._model_hp_spaces[i]
                    else:
                        model.hyper_parameter_space = self._model_hp_spaces[i]
            # initialize trainer if needed
            if isinstance(model, BaseModel):
                name = (
                    self._default_trainer
                    if isinstance(self._default_trainer, str)
                    else self._default_trainer[i]
                )
                model = TRAINER_DICT[name](
                    model=model,
                    num_features=num_features,
                    num_classes=num_classes,
                    loss=loss,
                    feval=feval,
                    device=device,
                    init=False,
                )
            # set trainer hp space
            if self._trainer_hp_space is not None:
                if isinstance(self._trainer_hp_space[0], list):
                    current_hp_for_trainer = self._trainer_hp_space[i]
                else:
                    current_hp_for_trainer = self._trainer_hp_space
                trainer.hyper_parameter_space = (
                    current_hp_for_trainer + model.hyper_parameter_space
                )
            self.graph_model_list[i] = trainer
                model.hyper_parameter_space = current_hp_for_trainer
            self.graph_model_list[i] = model

        return self

    def _init_nas_module(self, num_features, num_classes, feval, device, loss):
        for algo, space, estimator in zip(
            self.nas_algorithms, self.nas_spaces, self.nas_estimators
        ):
            estimator: BaseEstimator
            algo.to(device)
            space.instantiate(input_dim=num_features, output_dim=num_classes)
            estimator.setEvaluation(feval)
            estimator.setLossFunction(loss)

    # pylint: disable=arguments-differ
    def fit(
        self,
@@ -248,7 +299,7 @@ class AutoNodeClassifier(BaseClassifier):
        assert isinstance(evaluation_method, list)
        evaluator_list = get_feval(evaluation_method)

        self.leaderboard = Leaderboard(
        self.leaderboard = LeaderBoard(
            [e.get_eval_name() for e in evaluator_list],
            {e.get_eval_name(): e.is_higher_better() for e in evaluator_list},
        )
@@ -300,9 +351,63 @@ class AutoNodeClassifier(BaseClassifier):
            num_classes=dataset.num_classes,
            feval=evaluator_list,
            device=self.runtime_device,
            loss="cross_entropy" if not hasattr(dataset, "loss") else dataset.loss,
            loss="nll_loss" if not hasattr(dataset, "loss") else dataset.loss,
        )

        if self.nas_algorithms is not None:
            # perform neural architecture search
            self._init_nas_module(
                num_features=self.dataset[0].x.shape[1],
                num_classes=self.dataset.num_classes,
                feval=evaluator_list,
                device=self.runtime_device,
                loss="nll_loss" if not hasattr(dataset, "loss") else dataset.loss,
            )

            assert not isinstance(self._default_trainer, list) or len(
                self.nas_algorithms
            ) == len(self._default_trainer) - len(
                self.graph_model_list
            ), "length of default trainer should match total graph models and nas models passed"

            # perform nas and add them to model list
            idx_trainer = len(self.graph_model_list)
            for algo, space, estimator in zip(
                self.nas_algorithms, self.nas_spaces, self.nas_estimators
            ):
                model = algo.search(space, self.dataset, estimator)
                # insert model into default trainer
                if isinstance(self._default_trainer, list):
                    train_name = self._default_trainer[idx_trainer]
                    idx_trainer += 1
                else:
                    train_name = self._default_trainer
                if isinstance(train_name, str):
                    trainer = TRAINER_DICT[train_name](
                        model=model,
                        num_features=self.dataset[0].x.shape[1],
                        num_classes=self.dataset.num_classes,
                        loss="nll_loss"
                        if not hasattr(dataset, "loss")
                        else dataset.loss,
                        feval=evaluator_list,
                        device=self.runtime_device,
                        init=False,
                    )
                else:
                    trainer = train_name
                    trainer.model = model
                    trainer.update_parameters(
                        num_classes=self.dataset.num_classes,
                        num_features=self.dataset[0].x.shape[1],
                        loss="nll_loss"
                        if not hasattr(dataset, "loss")
                        else dataset.loss,
                        feval=evaluator_list,
                        device=self.runtime_device,
                    )
                self.graph_model_list.append(trainer)

        # train the models and tune hpo
        result_valid = []
        names = []
@@ -312,7 +417,7 @@ class AutoNodeClassifier(BaseClassifier):
            )
            if self.hpo_module is None:
                model.initialize()
                model.train(self.data, True)
                model.train(self.dataset, True)
                optimized = model
            else:
                optimized, _ = self.hpo_module.optimize(
@@ -320,7 +425,7 @@ class AutoNodeClassifier(BaseClassifier):
                )
            # to save memory, all the trainer derived will be mapped to cpu
            optimized.to(torch.device("cpu"))
            name = optimized.get_name_with_hp() + "_idx%d" % (idx)
            name = str(optimized) + "_idx%d" % (idx)
            names.append(name)
            performance_on_valid, _ = optimized.get_valid_score(return_major=False)
            result_valid.append(optimized.get_valid_predict_proba().cpu().numpy())
@@ -517,7 +622,7 @@ class AutoNodeClassifier(BaseClassifier):
        if use_ensemble and self.ensemble_module is None:
            LOGGER.warning(
                "Cannot use ensemble because no ensebmle module is given."
                "Will use best model instead."
                " Will use best model instead."
            )

        if use_best or (use_ensemble and self.ensemble_module is None):
@@ -628,7 +733,7 @@ class AutoNodeClassifier(BaseClassifier):
        )
        if isinstance(path_or_dict, str):
            if filetype == "auto":
                if path_or_dict.endswith(".yaml"):
                if path_or_dict.endswith(".yaml") or path_or_dict.endswith(".yml"):
                    filetype = "yaml"
                elif path_or_dict.endswith(".json"):
                    filetype = "json"
@@ -650,7 +755,7 @@ class AutoNodeClassifier(BaseClassifier):

        path_or_dict = deepcopy(path_or_dict)
        solver = cls(None, [], None, None)
        fe_list = path_or_dict.pop("feature", [{"name": "deepgl"}])
        fe_list = path_or_dict.pop("feature", None)
        if fe_list is not None:
            fe_list_ele = []
            for feature_engineer in fe_list:
@@ -660,40 +765,84 @@ class AutoNodeClassifier(BaseClassifier):
            if fe_list_ele != []:
                solver.set_feature_module(fe_list_ele)

        models = path_or_dict.pop("models", {"gcn": None, "gat": None})
        model_list = list(models.keys())
        model_hp_space = [models[m] for m in model_list]
        trainer_space = path_or_dict.pop("trainer", None)

        if model_hp_space:
            # parse lambda function
            for space in model_hp_space:
                if space is not None:
                    for keys in space:
                        if "cutFunc" in keys and isinstance(keys["cutFunc"], str):
                            keys["cutFunc"] = eval(keys["cutFunc"])

        if trainer_space:
            for space in trainer_space:
                if (
                    isinstance(space, dict)
                    and "cutFunc" in space
                    and isinstance(space["cutFunc"], str)
                ):
                    space["cutFunc"] = eval(space["cutFunc"])
                elif space is not None:
                    for keys in space:
                        if "cutFunc" in keys and isinstance(keys["cutFunc"], str):
                            keys["cutFunc"] = eval(keys["cutFunc"])

        solver.set_graph_models(model_list, trainer_space, model_hp_space)
        models = path_or_dict.pop("models", [{"name": "gcn"}, {"name": "gat"}])
        model_hp_space = [
            _parse_hp_space(model.pop("hp_space", None)) for model in models
        ]
        model_list = [
            _initialize_single_model(model.pop("name"), model) for model in models
        ]

        trainer = path_or_dict.pop("trainer", None)
        default_trainer = "NodeClassificationFull"
        trainer_space = None
        if isinstance(trainer, dict):
            # global default
            default_trainer = trainer.pop("name", "NodeClassificationFull")
            trainer_space = _parse_hp_space(trainer.pop("hp_space", None))
            default_kwargs = {"num_features": None, "num_classes": None}
            default_kwargs.update(trainer)
            default_kwargs["init"] = False
            for i in range(len(model_list)):
                model = model_list[i]
                trainer_wrap = TRAINER_DICT[default_trainer](
                    model=model, **default_kwargs
                )
                model_list[i] = trainer_wrap
        elif isinstance(trainer, list):
            # sequential trainer definition
            assert len(trainer) == len(
                model_list
            ), "The number of trainer and model does not match"
            trainer_space = []
            for i in range(len(model_list)):
                train, model = trainer[i], model_list[i]
                default_trainer = train.pop("name", "NodeClassificationFull")
                trainer_space.append(_parse_hp_space(train.pop("hp_space", None)))
                default_kwargs = {"num_features": None, "num_classes": None}
                default_kwargs.update(train)
                default_kwargs["init"] = False
                trainer_wrap = TRAINER_DICT[default_trainer](
                    model=model, **default_kwargs
                )
                model_list[i] = trainer_wrap

        solver.set_graph_models(
            model_list, default_trainer, trainer_space, model_hp_space
        )

        hpo_dict = path_or_dict.pop("hpo", {"name": "anneal"})
        name = hpo_dict.pop("name")
        solver.set_hpo_module(name, **hpo_dict)
        if hpo_dict is not None:
            name = hpo_dict.pop("name")
            solver.set_hpo_module(name, **hpo_dict)

        ensemble_dict = path_or_dict.pop("ensemble", {"name": "voting"})
        name = ensemble_dict.pop("name")
        solver.set_ensemble_module(name, **ensemble_dict)
        if ensemble_dict is not None:
            name = ensemble_dict.pop("name")
            solver.set_ensemble_module(name, **ensemble_dict)

        nas_dict = path_or_dict.pop("nas", None)
        if nas_dict is not None:
            keys: set = set(nas_dict.keys())
            needed = {"space", "algorithm", "estimator"}
            if keys != needed:
                LOGGER.error("Key mismatch, we need %s, you give %s", needed, keys)
                raise KeyError("Key mismatch, we need %s, you give %s" % (needed, keys))

            spaces, algorithms, estimators = [], [], []

            for container, indexer, k in zip(
                [spaces, algorithms, estimators],
                [NAS_SPACE_DICT, NAS_ALGO_DICT, NAS_ESTIMATOR_DICT],
                ["space", "algorithm", "estimator"],
            ):
                configs = nas_dict[k]
                if isinstance(configs, list):
                    for item in configs:
                        container.append(indexer[item.pop("name")](**item))
                else:
                    container.append(indexer[configs.pop("name")](**configs))

            solver.set_nas_module(algorithms, spaces, estimators)

        return solver
--- a/autogl/solver/utils.py
+++ b/autogl/solver/utils.py
@@ -1,23 +1,23 @@
 """
 Util tools used by solver
 Utilities used by the solver

 * leaderboard: The leaderboard that maintains the performance of models.
 * LeaderBoard: The LeaderBoard that maintains the performance of models.
 """

 import random

 import torch
 import typing as _typing
 import torch.backends.cudnn
 import numpy as np
 import pandas as pd

 from ..utils import get_logger

 LOGGER = get_logger("leaderboard")
 LOGGER = get_logger("LeaderBoard")


 class Leaderboard:
 class LeaderBoard:
    """
    The leaderboard that can be used to store / sort the model performance automatically.
    The leaderBoard that can be used to store / sort the model performance automatically.

    Parameters
    ----------
@@ -25,8 +25,8 @@ class Leaderboard:
        A list of field name that shows the model performance. The first field is used as
        the major field for sorting the model performances.

    is_higher_better: list of `bool`
        A list of indicator that whether the field score is higher better.
    is_higher_better: `dict` of *field* -> `bool`
        A mapping of indicator that whether each field is higher better.
    """

    def __init__(self, fields, is_higher_better):
@@ -38,7 +38,7 @@ class Leaderboard:

    def set_major_field(self, field) -> None:
        """
        Set the major field of current leaderboard.
        Set the major field of current LeaderBoard.

        Parameters
        ----------
@@ -53,7 +53,7 @@ class Leaderboard:
            self.major_field = field
        else:
            LOGGER.warning(
                "do not find major field %s in current leaderboard, will ignore.", field
                f"Field [{field}] NOT found in the current LeaderBoard, will ignore."
            )

    def insert_model_performance(self, name, performance) -> None:
@@ -130,26 +130,49 @@ class Leaderboard:
            name_list.remove("ensemble")
        return name_list[index]

    def show(self, top_k=-1) -> None:
    def show(self, top_k=0) -> None:
        """
        Show current leaderboard (from good model to bad).
        Show current LeaderBoard (from best model to worst).

        Parameters
        ----------
        top_k: `int`
            Controls the number model shown. If below `0`, will show all the models. Default `-1`.
            Controls the number model shown.
            If less than or equal to `0`, will show all the models. Default to `0`.

        Returns
        -------
        None
        """
        if top_k == -1:
            top_k = len(self.perform_dict["name"])
        top_k: int = top_k if top_k > 0 else len(self.perform_dict)

        """
        reindex self.__performance_data_frame
        to ensure the columns of name and representation are in left-side of the data frame
        """
        _columns = self.perform_dict.columns.tolist()
        maxcolwidths: _typing.List[_typing.Optional[int]] = []
        if "name" in _columns:
            _columns.remove("name")
            _columns.insert(0, "name")
            maxcolwidths.append(40)
        self.perform_dict = self.perform_dict[_columns]

        sorted_performance_df: pd.DataFrame = self.perform_dict.sort_values(
            self.major_field, ascending=not self.is_higher_better[self.major_field]
        )
        sorted_performance_df = sorted_performance_df.head(top_k)

        from tabulate import tabulate

        _columns = sorted_performance_df.columns.tolist()
        maxcolwidths.extend([None for _ in range(len(_columns) - len(maxcolwidths))])
        print(
            self.perform_dict.sort_values(
                by=self.major_field,
                ascending=not self.is_higher_better[self.major_field],
            ).head(top_k)
            tabulate(
                list(zip(*[sorted_performance_df[column] for column in _columns])),
                headers=_columns,
                tablefmt="grid",
            )
        )


--- a/autogl/utils/init.py
+++ b/autogl/utils/init.py
@@ -3,5 +3,6 @@ Some utils used by AutoGL
 """

 from .log import get_logger
 from .device import get_device

 __all__ = ["get_logger"]
 __all__ = ["get_logger", "get_device"]
--- a/autogl/utils/device.py
+++ b/autogl/utils/device.py
@@ -0,0 +1,27 @@
 import torch
 from typing import Union


 def get_device(device: Union[str, torch.device]):
    """
    Get device of passed argument. Will return a torch.device based on passed arguments.
    Can parse auto, cpu, gpu, cpu:x, gpu:x, etc. If auto is given, will automatically find
    available devices.


    Parameters
    ----------
    device: ``str`` or ``torch.device``
        The device to parse. If ``auto`` if given, will determine automatically.

    Returns
    -------
    device: ``torch.device``
        The parsed device.
    """
    assert isinstance(
        device, (str, torch.device)
    ), "Only support device of str or torch.device, get {} instead".format(device)
    if device == "auto":
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    return torch.device(device)
--- a/configs/graph_classification.yaml
+++ b/configs/graph_classification.yaml
@@ -1,66 +0,0 @@
 feature:
  - name: NxLargeCliqueSize
  - name: NxLargeCliqueSize

 models:
  topkpool:

    - parameterName: ratio
      type: DOUBLE
      maxValue: 0.9
      minValue: 0.1
      scalingType: LINEAR
    
    - parameterName: dropout
      type: DOUBLE
      maxValue: 0.9
      minValue: 0.1
      scalingType: LINEAR
    
    - parameterName: act
      type: CATEGORICAL
      feasiblePoints:
        - leaky_relu
        - relu
        - elu
        - tanh

 trainer:
  - parameterName: max_epoch
    type: INTEGER
    maxValue: 300
    minValue: 10
    scalingType: LINEAR
  
  - parameterName: early_stopping_round
    type: INTEGER
    maxValue: 30
    minValue: 10
    scalingType: LINEAR

  - parameterName: lr
    type: DOUBLE
    maxValue: 0.1
    minValue: 0.0001
    scalingType: LOG
  
  - parameterName: weight_decay
    type: DOUBLE
    maxValue: 0.005
    minValue: 0.00005
    scalingType: LOG

  - parameterName: batch_size
    type: INTEGER
    maxValue: 128
    minValue: 48
    scalingType: LINEAR


 hpo:
  name: anneal
  max_evals: 10

 ensemble:
  name: voting
  size: 2
--- a/configs/graphclf_full.yml
+++ b/configs/graphclf_full.yml
@@ -0,0 +1,53 @@
 ensemble:
  name: voting
  size: 2
 hpo:
  max_evals: 10
  name: anneal
 models:
 - hp_space:
  - maxValue: 0.9
    minValue: 0.1
    parameterName: ratio
    scalingType: LINEAR
    type: DOUBLE
  - maxValue: 0.9
    minValue: 0.1
    parameterName: dropout
    scalingType: LINEAR
    type: DOUBLE
  - feasiblePoints:
    - leaky_relu
    - relu
    - elu
    - tanh
    parameterName: act
    type: CATEGORICAL
  name: topkpool
 trainer:
  hp_space:
  - maxValue: 300
    minValue: 10
    parameterName: max_epoch
    scalingType: LINEAR
    type: INTEGER
  - maxValue: 30
    minValue: 10
    parameterName: early_stopping_round
    scalingType: LINEAR
    type: INTEGER
  - maxValue: 0.1
    minValue: 0.0001
    parameterName: lr
    scalingType: LOG
    type: DOUBLE
  - maxValue: 0.005
    minValue: 5.0e-05
    parameterName: weight_decay
    scalingType: LOG
    type: DOUBLE
  - maxValue: 128
    minValue: 48
    parameterName: batch_size
    scalingType: LINEAR
    type: INTEGER
--- a/configs/graphclf_gin_benchmark.yml
+++ b/configs/graphclf_gin_benchmark.yml
@@ -0,0 +1,70 @@
 hpo:
  max_evals: 10
  name: tpe
 models:
 - hp_space:
  - parameterName: num_layers
    type: DISCRETE
    feasiblePoints: '3,4,5'

  - parameterName: hidden
    type: NUMERICAL_LIST
    numericalType: INTEGER
    length: 5
    minValue: [8, 8, 8, 8, 8]
    maxValue: [64, 64, 64, 64, 64]
    scalingType: LOG
    cutPara: ["num_layers"]
    cutFunc: "lambda x: x[0] - 1"

  - parameterName: dropout
    type: DOUBLE
    maxValue: 0.9
    minValue: 0.1
    scalingType: LINEAR

  - parameterName: act
    type: CATEGORICAL
    feasiblePoints:
      - leaky_relu
      - relu
      - elu
      - tanh

  - parameterName: eps
    type: CATEGORICAL
    feasiblePoints:
      - True
      - False

  - parameterName: mlp_layers
    type: DISCRETE
    feasiblePoints: '2,3,4'
  name: gin
 trainer:
  hp_space:
  - maxValue: 300
    minValue: 10
    parameterName: max_epoch
    scalingType: LINEAR
    type: INTEGER
  - maxValue: 30
    minValue: 10
    parameterName: early_stopping_round
    scalingType: LINEAR
    type: INTEGER
  - maxValue: 0.1
    minValue: 0.0001
    parameterName: lr
    scalingType: LOG
    type: DOUBLE
  - maxValue: 0.005
    minValue: 5.0e-05
    parameterName: weight_decay
    scalingType: LOG
    type: DOUBLE
  - maxValue: 128
    minValue: 48
    parameterName: batch_size
    scalingType: LINEAR
    type: INTEGER
--- a/configs/graphclf_topk_benchmark.yml
+++ b/configs/graphclf_topk_benchmark.yml
@@ -0,0 +1,50 @@
 hpo:
  max_evals: 10
  name: tpe
 models:
 - hp_space:
  - maxValue: 0.9
    minValue: 0.1
    parameterName: ratio
    scalingType: LINEAR
    type: DOUBLE
  - maxValue: 0.9
    minValue: 0.1
    parameterName: dropout
    scalingType: LINEAR
    type: DOUBLE
  - feasiblePoints:
    - leaky_relu
    - relu
    - elu
    - tanh
    parameterName: act
    type: CATEGORICAL
  name: topkpool
 trainer:
  hp_space:
  - maxValue: 300
    minValue: 10
    parameterName: max_epoch
    scalingType: LINEAR
    type: INTEGER
  - maxValue: 30
    minValue: 10
    parameterName: early_stopping_round
    scalingType: LINEAR
    type: INTEGER
  - maxValue: 0.1
    minValue: 0.0001
    parameterName: lr
    scalingType: LOG
    type: DOUBLE
  - maxValue: 0.005
    minValue: 5.0e-05
    parameterName: weight_decay
    scalingType: LOG
    type: DOUBLE
  - maxValue: 128
    minValue: 48
    parameterName: batch_size
    scalingType: LINEAR
    type: INTEGER
--- a/configs/lp_benchmark.yml
+++ b/configs/lp_benchmark.yml
@@ -0,0 +1,92 @@
 ensemble:
  name: voting
 feature:
 - name: PYGNormalizeFeatures
 hpo:
  max_evals: 10
  name: random
 models:
 - hp_space:
  - feasiblePoints: 2,3
    parameterName: num_layers
    type: DISCRETE
  - cutFunc: lambda x:x[0] - 1
    cutPara:
    - num_layers
    length: 2
    maxValue:
    - 256
    - 256
    minValue:
    - 64
    - 64
    numericalType: INTEGER
    parameterName: hidden
    scalingType: LOG
    type: NUMERICAL_LIST
  - maxValue: 0.2
    minValue: 0.0
    parameterName: dropout
    scalingType: LINEAR
    type: DOUBLE
  - feasiblePoints:
    - leaky_relu
    - relu
    - elu
    - tanh
    parameterName: act
    type: CATEGORICAL
  name: gcn
 - name: gat
  hp_space:
  - feasiblePoints: 2,3
    parameterName: num_layers
    type: DISCRETE
  - cutFunc: lambda x:x[0] - 1
    cutPara:
    - num_layers
    length: 2
    maxValue:
    - 256
    - 256
    minValue:
    - 64
    - 64
    numericalType: INTEGER
    parameterName: hidden
    scalingType: LOG
    type: NUMERICAL_LIST
  - maxValue: 0.2
    minValue: 0.0
    parameterName: dropout
    scalingType: LINEAR
    type: DOUBLE
  - feasiblePoints:
    - leaky_relu
    - relu
    - elu
    - tanh
    parameterName: act
    type: CATEGORICAL
 trainer:
  hp_space:
  - maxValue: 150
    minValue: 50
    parameterName: max_epoch
    scalingType: LINEAR
    type: INTEGER
  - maxValue: 40
    minValue: 25
    parameterName: early_stopping_round
    scalingType: LINEAR
    type: INTEGER
  - maxValue: 0.05
    minValue: 0.005
    parameterName: lr
    scalingType: LOG
    type: DOUBLE
  - maxValue: 1.0E-7
    minValue: 1.0E-10
    parameterName: weight_decay
    scalingType: LOG
    type: DOUBLE
--- a/configs/lp_gat_benchmark.yml
+++ b/configs/lp_gat_benchmark.yml
@@ -0,0 +1,61 @@
 ensemble:
  name: null
 feature:
 - name: PYGNormalizeFeatures
 hpo:
  max_evals: 10
  name: random
 models:
 - name: gat
  hp_space:
  - feasiblePoints: 2,3
    parameterName: num_layers
    type: DISCRETE
  - cutFunc: lambda x:x[0] - 1
    cutPara:
    - num_layers
    length: 2
    maxValue:
    - 256
    - 256
    minValue:
    - 64
    - 64
    numericalType: INTEGER
    parameterName: hidden
    scalingType: LOG
    type: NUMERICAL_LIST
  - maxValue: 0.2
    minValue: 0.0
    parameterName: dropout
    scalingType: LINEAR
    type: DOUBLE
  - feasiblePoints:
    - leaky_relu
    - relu
    - elu
    - tanh
    parameterName: act
    type: CATEGORICAL
 trainer:
  hp_space:
  - maxValue: 150
    minValue: 50
    parameterName: max_epoch
    scalingType: LINEAR
    type: INTEGER
  - maxValue: 40
    minValue: 25
    parameterName: early_stopping_round
    scalingType: LINEAR
    type: INTEGER
  - maxValue: 0.05
    minValue: 0.005
    parameterName: lr
    scalingType: LOG
    type: DOUBLE
  - maxValue: 1.0E-7
    minValue: 1.0E-10
    parameterName: weight_decay
    scalingType: LOG
    type: DOUBLE
--- a/configs/lp_gcn_benchmark.yml
+++ b/configs/lp_gcn_benchmark.yml
@@ -0,0 +1,61 @@
 ensemble:
  name: null
 feature:
 - name: PYGNormalizeFeatures
 hpo:
  max_evals: 10
  name: random
 models:
 - hp_space:
  - feasiblePoints: 2,3
    parameterName: num_layers
    type: DISCRETE
  - cutFunc: lambda x:x[0] - 1
    cutPara:
    - num_layers
    length: 2
    maxValue:
    - 256
    - 256
    minValue:
    - 64
    - 64
    numericalType: INTEGER
    parameterName: hidden
    scalingType: LOG
    type: NUMERICAL_LIST
  - maxValue: 0.2
    minValue: 0.0
    parameterName: dropout
    scalingType: LINEAR
    type: DOUBLE
  - feasiblePoints:
    - leaky_relu
    - relu
    - elu
    - tanh
    parameterName: act
    type: CATEGORICAL
  name: gcn
 trainer:
  hp_space:
  - maxValue: 150
    minValue: 50
    parameterName: max_epoch
    scalingType: LINEAR
    type: INTEGER
  - maxValue: 40
    minValue: 25
    parameterName: early_stopping_round
    scalingType: LINEAR
    type: INTEGER
  - maxValue: 0.05
    minValue: 0.005
    parameterName: lr
    scalingType: LOG
    type: DOUBLE
  - maxValue: 1.0E-7
    minValue: 1.0E-10
    parameterName: weight_decay
    scalingType: LOG
    type: DOUBLE
--- a/configs/lp_sage_benchmark.yml
+++ b/configs/lp_sage_benchmark.yml
@@ -0,0 +1,69 @@
 ensemble:
  name: null
 feature:
 - name: PYGNormalizeFeatures
 hpo:
  max_evals: 10
  name: random
 models:
 - name: sage
  hp_space:
  - parameterName: num_layers
    type: DISCRETE
    feasiblePoints: 2,3
  
  - parameterName: hidden
    type: NUMERICAL_LIST
    scalingType: LOG
    numericalType: INTEGER
    cutFunc: lambda x:x[0] - 1
    cutPara:
    - num_layers
    length: 2
    maxValue:
    - 256
    - 256
    minValue:
    - 64
    - 64
  
  - parameterName: dropout
    type: DOUBLE
    scalingType: LINEAR
    maxValue: 0.2
    minValue: 0.0
    
  - parameterName: act
    type: CATEGORICAL
    feasiblePoints:
    - leaky_relu
    - relu
    - elu
    - tanh
    
  - parameterName: agg
    type: CATEGORICAL
    feasiblePoints: ["mean", "add", "max"]    

 trainer:
  hp_space:
  - maxValue: 150
    minValue: 50
    parameterName: max_epoch
    scalingType: LINEAR
    type: INTEGER
  - maxValue: 40
    minValue: 25
    parameterName: early_stopping_round
    scalingType: LINEAR
    type: INTEGER
  - maxValue: 0.05
    minValue: 0.005
    parameterName: lr
    scalingType: LOG
    type: DOUBLE
  - maxValue: 1.0E-7
    minValue: 1.0E-10
    parameterName: weight_decay
    scalingType: LOG
    type: DOUBLE
--- a/configs/node_classification.yaml
+++ b/configs/node_classification.yaml
@@ -1,70 +0,0 @@
 feature:
  - name: PYGNormalizeFeatures
  - name: pagerank

 models:
  gat:
    - parameterName: num_layers
      type: DISCRETE
      feasiblePoints: '2,3,4'

    - parameterName: heads
      type: DISCRETE
      feasiblePoints: '4,8,16'
    
    - parameterName: hidden
      type: NUMERICAL_LIST
      numericalType: INTEGER
      length: 3
      minValue: [8, 8, 8]
      maxValue: [64, 64, 64]
      cutPara: ["num_layers"]
      cutFunc: "lambda x:x[0] - 1"
      scalingType: LOG
    
    - parameterName: dropout
      type: DOUBLE
      maxValue: 0.9
      minValue: 0.1
      scalingType: LINEAR
    
    - parameterName: act
      type: CATEGORICAL
      feasiblePoints:
        - leaky_relu
        - relu
        - elu
        - tanh

 trainer:
  - parameterName: max_epoch
    type: INTEGER
    maxValue: 300
    minValue: 10
    scalingType: LINEAR
  
  - parameterName: early_stopping_round
    type: INTEGER
    maxValue: 30
    minValue: 10
    scalingType: LINEAR

  - parameterName: lr
    type: DOUBLE
    maxValue: 0.1
    minValue: 0.0001
    scalingType: LOG
  
  - parameterName: weight_decay
    type: DOUBLE
    maxValue: 0.005
    minValue: 0.00005
    scalingType: LOG

 hpo:
  name: anneal
  max_evals: 10

 ensemble:
  name: voting
  size: 2
--- a/configs/nodeclf_full.yml
+++ b/configs/nodeclf_full.yml
@@ -0,0 +1,93 @@
 ensemble:
  name: voting
  size: 2
 feature:
 - name: PYGNormalizeFeatures
 hpo:
  max_evals: 50
  name: tpe
 models:
 - hp_space:
  - feasiblePoints: '2'
    parameterName: num_layers
    type: DISCRETE
  - feasiblePoints: 6,8,10,12
    parameterName: heads
    type: DISCRETE
  - cutFunc: lambda x:x[0] - 1
    cutPara:
    - num_layers
    length: 1
    maxValue:
    - 16
    minValue:
    - 4
    numericalType: INTEGER
    parameterName: hidden
    scalingType: LOG
    type: NUMERICAL_LIST
  - maxValue: 0.8
    minValue: 0.2
    parameterName: dropout
    scalingType: LINEAR
    type: DOUBLE
  - feasiblePoints:
    - leaky_relu
    - relu
    - elu
    - tanh
    parameterName: act
    type: CATEGORICAL
  name: gat
 - hp_space:
  - feasiblePoints: '2'
    parameterName: num_layers
    type: DISCRETE
  - cutFunc: lambda x:x[0] - 1
    cutPara:
    - num_layers
    length: 1
    maxValue:
    - 64
    minValue:
    - 16
    numericalType: INTEGER
    parameterName: hidden
    scalingType: LOG
    type: NUMERICAL_LIST
  - maxValue: 0.8
    minValue: 0.2
    parameterName: dropout
    scalingType: LINEAR
    type: DOUBLE
  - feasiblePoints:
    - leaky_relu
    - relu
    - elu
    - tanh
    parameterName: act
    type: CATEGORICAL
  name: gcn
 trainer:
  hp_space:
  - maxValue: 300
    minValue: 100
    parameterName: max_epoch
    scalingType: LINEAR
    type: INTEGER
  - maxValue: 30
    minValue: 10
    parameterName: early_stopping_round
    scalingType: LINEAR
    type: INTEGER
  - maxValue: 0.05
    minValue: 0.01
    parameterName: lr
    scalingType: LOG
    type: DOUBLE
  - maxValue: 0.001
    minValue: 0.0001
    parameterName: weight_decay
    scalingType: LOG
    type: DOUBLE

--- a/configs/nodeclf_gat_benchmark_large.yml
+++ b/configs/nodeclf_gat_benchmark_large.yml
@@ -1,69 +1,64 @@
 # search space for gat on amazon_computers amazon_photo coauthor_cs coauthor_physics
 ensemble:
  name: null
 feature:
  - name: PYGNormalizeFeatures

 - name: PYGNormalizeFeatures
 hpo:
  max_evals: 10
  name: random
 models:
  gcn:
    - parameterName: num_layers
      type: DISCRETE
      feasiblePoints: '2,3'
    
    - parameterName: hidden
      type: NUMERICAL_LIST
      numericalType: INTEGER
      length: 2
      minValue: [8, 8]
      maxValue: [32, 32]
      cutPara: ["num_layers"]
      cutFunc: "lambda x:x[0] - 1"
      scalingType: LOG
    
    - parameterName: dropout
      type: DOUBLE
      maxValue: 0.5
      minValue: 0.2
      scalingType: LINEAR

    - parameterName: heads
      type: DISCRETE
      feasiblePoints: '8,10,12'
    
    - parameterName: act
      type: CATEGORICAL
      feasiblePoints:
        - leaky_relu
        - relu
        - elu
        - tanh

 - hp_space:
  - feasiblePoints: 2,3
    parameterName: num_layers
    type: DISCRETE
  - cutFunc: lambda x:x[0] - 1
    cutPara:
    - num_layers
    length: 2
    maxValue:
    - 32
    - 32
    minValue:
    - 8
    - 8
    numericalType: INTEGER
    parameterName: hidden
    scalingType: LOG
    type: NUMERICAL_LIST
  - maxValue: 0.5
    minValue: 0.2
    parameterName: dropout
    scalingType: LINEAR
    type: DOUBLE
  - feasiblePoints: 8,10,12
    parameterName: heads
    type: DISCRETE
  - feasiblePoints:
    - leaky_relu
    - relu
    - elu
    - tanh
    parameterName: act
    type: CATEGORICAL
  name: gat
 trainer:
  - parameterName: max_epoch
    type: INTEGER
    maxValue: 400
  hp_space:
  - maxValue: 400
    minValue: 250
    parameterName: max_epoch
    scalingType: LINEAR
  
  - parameterName: early_stopping_round
    type: INTEGER
    maxValue: 40
  - maxValue: 40
    minValue: 25
    parameterName: early_stopping_round
    scalingType: LINEAR

  - parameterName: lr
    type: DOUBLE
    maxValue: 0.05
    type: INTEGER
  - maxValue: 0.05
    minValue: 0.01
    parameterName: lr
    scalingType: LOG
  
  - parameterName: weight_decay
    type: DOUBLE
    maxValue: 0.0005
  - maxValue: 0.0005
    minValue: 0.0001
    parameterName: weight_decay
    scalingType: LOG

 hpo:
  name: random
  max_evals: 10

 ensemble:
  name: ~
    type: DOUBLE
--- a/configs/nodeclf_gat_benchmark_small.yml
+++ b/configs/nodeclf_gat_benchmark_small.yml
@@ -1,70 +1,62 @@
 # search space for gat on cora, citeseer, pubmed
 ensemble:
  name: null
 feature:
  - name: PYGNormalizeFeatures

 - name: PYGNormalizeFeatures
 hpo:
  max_evals: 10
  name: random
 models:
  gat:

    - parameterName: num_layers
      type: DISCRETE
      feasiblePoints: '2'
  
    - parameterName: heads
      type: DISCRETE
      feasiblePoints: '6,8,10,12'

    - parameterName: hidden
      type: NUMERICAL_LIST
      numericalType: INTEGER
      length: 1
      minValue: [4]
      maxValue: [16]
      cutPara: ["num_layers"]
      cutFunc: "lambda x:x[0] - 1"
      scalingType: LOG
    
    - parameterName: dropout
      type: DOUBLE
      maxValue: 0.8
      minValue: 0.2
      scalingType: LINEAR
    
    - parameterName: act
      type: CATEGORICAL
      feasiblePoints:
        - leaky_relu
        - relu
        - elu
        - tanh

 - hp_space:
  - feasiblePoints: '2'
    parameterName: num_layers
    type: DISCRETE
  - feasiblePoints: 6,8,10,12
    parameterName: heads
    type: DISCRETE
  - cutFunc: lambda x:x[0] - 1
    cutPara:
    - num_layers
    length: 1
    maxValue:
    - 16
    minValue:
    - 4
    numericalType: INTEGER
    parameterName: hidden
    scalingType: LOG
    type: NUMERICAL_LIST
  - maxValue: 0.8
    minValue: 0.2
    parameterName: dropout
    scalingType: LINEAR
    type: DOUBLE
  - feasiblePoints:
    - leaky_relu
    - relu
    - elu
    - tanh
    parameterName: act
    type: CATEGORICAL
  name: gat
 trainer:
  - parameterName: max_epoch
    type: INTEGER
    maxValue: 300
  hp_space:
  - maxValue: 300
    minValue: 100
    parameterName: max_epoch
    scalingType: LINEAR
  
  - parameterName: early_stopping_round
    type: INTEGER
    maxValue: 30
  - maxValue: 30
    minValue: 10
    parameterName: early_stopping_round
    scalingType: LINEAR

  - parameterName: lr
    type: DOUBLE
    maxValue: 0.05
    type: INTEGER
  - maxValue: 0.05
    minValue: 0.01
    parameterName: lr
    scalingType: LOG
  
  - parameterName: weight_decay
    type: DOUBLE
    maxValue: 0.001
  - maxValue: 0.001
    minValue: 0.0001
    parameterName: weight_decay
    scalingType: LOG

 hpo:
  name: random
  max_evals: 10

 ensemble:
  name: ~
    type: DOUBLE
--- a/configs/nodeclf_gcn.yaml
+++ b/configs/nodeclf_gcn.yaml
@@ -1,64 +0,0 @@
 feature:
  - name: ~ # ~ means None

 models:
  gcn:
    - parameterName: num_layers
      type: DISCRETE
      feasiblePoints: '2'
    
    - parameterName: hidden
      type: NUMERICAL_LIST
      numericalType: INTEGER
      length: 2
      minValue: [16, 16]
      maxValue: [64, 64]
      cutPara: ["num_layers"]
      cutFunc: "lambda x:x[0] - 1"
      scalingType: LOG
    
    - parameterName: dropout
      type: DOUBLE
      maxValue: 0.8
      minValue: 0.2
      scalingType: LINEAR
    
    - parameterName: act
      type: CATEGORICAL
      feasiblePoints:
        - leaky_relu
        - relu
        - elu
        - tanh

 trainer:
  - parameterName: max_epoch
    type: INTEGER
    maxValue: 300
    minValue: 100
    scalingType: LINEAR
  
  - parameterName: early_stopping_round
    type: INTEGER
    maxValue: 30
    minValue: 10
    scalingType: LINEAR

  - parameterName: lr
    type: DOUBLE
    maxValue: 0.01
    minValue: 0.0025
    scalingType: LOG
  
  - parameterName: weight_decay
    type: DOUBLE
    maxValue: 0.025
    minValue: 0.0025
    scalingType: LOG

 hpo:
  name: random
  max_evals: 10

 ensemble:
  name: ~
--- a/configs/nodeclf_gcn_benchmark_large.yml
+++ b/configs/nodeclf_gcn_benchmark_large.yml
@@ -1,65 +1,61 @@
 # search space for gcn on amazon_computers amazon_photo coauthor_cs coauthor_physics
 ensemble:
  name: null
 feature:
  - name: PYGNormalizeFeatures

 - name: PYGNormalizeFeatures
 hpo:
  max_evals: 10
  name: random
 models:
  gcn:
    - parameterName: num_layers
      type: DISCRETE
      feasiblePoints: '2,3'
    
    - parameterName: hidden
      type: NUMERICAL_LIST
      numericalType: INTEGER
      length: 2
      minValue: [32, 32]
      maxValue: [128, 128]
      cutPara: ["num_layers"]
      cutFunc: "lambda x:x[0] - 1"
      scalingType: LOG
    
    - parameterName: dropout
      type: DOUBLE
      maxValue: 0.8
      minValue: 0.2
      scalingType: LINEAR
    
    - parameterName: act
      type: CATEGORICAL
      feasiblePoints:
        - leaky_relu
        - relu
        - elu
        - tanh

 - hp_space:
  - feasiblePoints: 2,3
    parameterName: num_layers
    type: DISCRETE
  - cutFunc: lambda x:x[0] - 1
    cutPara:
    - num_layers
    length: 2
    maxValue:
    - 128
    - 128
    minValue:
    - 32
    - 32
    numericalType: INTEGER
    parameterName: hidden
    scalingType: LOG
    type: NUMERICAL_LIST
  - maxValue: 0.8
    minValue: 0.2
    parameterName: dropout
    scalingType: LINEAR
    type: DOUBLE
  - feasiblePoints:
    - leaky_relu
    - relu
    - elu
    - tanh
    parameterName: act
    type: CATEGORICAL
  name: gcn
 trainer:
  - parameterName: max_epoch
    type: INTEGER
    maxValue: 300
  hp_space:
  - maxValue: 300
    minValue: 100
    parameterName: max_epoch
    scalingType: LINEAR
  
  - parameterName: early_stopping_round
    type: INTEGER
    maxValue: 30
  - maxValue: 30
    minValue: 10
    parameterName: early_stopping_round
    scalingType: LINEAR

  - parameterName: lr
    type: DOUBLE
    maxValue: 0.05
    type: INTEGER
  - maxValue: 0.05
    minValue: 0.01
    parameterName: lr
    scalingType: LOG
  
  - parameterName: weight_decay
    type: DOUBLE
    maxValue: 0.0005
    minValue: 0.00005
  - maxValue: 0.0005
    minValue: 5.0e-05
    parameterName: weight_decay
    scalingType: LOG

 hpo:
  name: random
  max_evals: 10

 ensemble:
  name: ~
    type: DOUBLE
--- a/configs/nodeclf_gcn_benchmark_small.yml
+++ b/configs/nodeclf_gcn_benchmark_small.yml
@@ -1,65 +1,59 @@
 # search space for gcn on cora, citeseer, pubmed
 ensemble:
  name: null
 feature:
  - name: PYGNormalizeFeatures

 - name: PYGNormalizeFeatures
 hpo:
  max_evals: 10
  name: random
 models:
  gcn:
    - parameterName: num_layers
      type: DISCRETE
      feasiblePoints: '2'
    
    - parameterName: hidden
      type: NUMERICAL_LIST
      numericalType: INTEGER
      length: 1
      minValue: [16]
      maxValue: [64]
      cutPara: ["num_layers"]
      cutFunc: "lambda x:x[0] - 1"
      scalingType: LOG
    
    - parameterName: dropout
      type: DOUBLE
      maxValue: 0.8
      minValue: 0.2
      scalingType: LINEAR
    
    - parameterName: act
      type: CATEGORICAL
      feasiblePoints:
        - leaky_relu
        - relu
        - elu
        - tanh

 - hp_space:
  - feasiblePoints: '2'
    parameterName: num_layers
    type: DISCRETE
  - cutFunc: lambda x:x[0] - 1
    cutPara:
    - num_layers
    length: 1
    maxValue:
    - 64
    minValue:
    - 16
    numericalType: INTEGER
    parameterName: hidden
    scalingType: LOG
    type: NUMERICAL_LIST
  - maxValue: 0.8
    minValue: 0.2
    parameterName: dropout
    scalingType: LINEAR
    type: DOUBLE
  - feasiblePoints:
    - leaky_relu
    - relu
    - elu
    - tanh
    parameterName: act
    type: CATEGORICAL
  name: gcn
 trainer:
  - parameterName: max_epoch
    type: INTEGER
    maxValue: 300
  hp_space:
  - maxValue: 300
    minValue: 100
    parameterName: max_epoch
    scalingType: LINEAR
  
  - parameterName: early_stopping_round
    type: INTEGER
    maxValue: 30
  - maxValue: 30
    minValue: 10
    parameterName: early_stopping_round
    scalingType: LINEAR

  - parameterName: lr
    type: DOUBLE
    maxValue: 0.05
    type: INTEGER
  - maxValue: 0.05
    minValue: 0.005
    parameterName: lr
    scalingType: LOG
  
  - parameterName: weight_decay
    type: DOUBLE
    maxValue: 0.001
  - maxValue: 0.001
    minValue: 0.0001
    parameterName: weight_decay
    scalingType: LOG

 hpo:
  name: random
  max_evals: 10

 ensemble:
  name: ~
    type: DOUBLE