From e477663d35539bc7fa9affc8c012e501c272c844 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Wed, 24 Feb 2021 14:05:55 +0000
Subject: [PATCH 001/144] fix bug of non-hpo

---
 autogl/solver/classifier/node_classifier.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autogl/solver/classifier/node_classifier.py b/autogl/solver/classifier/node_classifier.py
index 9274e1b..d663357 100644
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -312,7 +312,7 @@ class AutoNodeClassifier(BaseClassifier):
             )
             if self.hpo_module is None:
                 model.initialize()
-                model.train(self.data, True)
+                model.train(self.dataset, True)
                 optimized = model
             else:
                 optimized, _ = self.hpo_module.optimize(

From 6726cd5364a319b28f54a6ecfabfb0f86e1bf9bc Mon Sep 17 00:00:00 2001
From: SwiftieH <changheng95@126.com>
Date: Thu, 25 Feb 2021 08:44:37 +0000
Subject: [PATCH 002/144] fixed dayaset with no masks and gbm config problem

---
 autogl/datasets/utils.py           | 21 ++++++++++++++++++---
 autogl/module/ensemble/stacking.py |  2 +-
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/autogl/datasets/utils.py b/autogl/datasets/utils.py
index 8b677ee..46b68ae 100644
--- a/autogl/datasets/utils.py
+++ b/autogl/datasets/utils.py
@@ -61,8 +61,15 @@ def random_splits_mask(dataset, train_ratio=0.2, val_ratio=0.4, seed=None):
     torch.set_rng_state(r_s)
     if torch.cuda.is_available():
         torch.cuda.set_rng_state(r_s_cuda)
-
-    dataset.data, dataset.slices = dataset.collate([d for d in dataset])
+    datalist = []
+    for d in dataset:
+        setattr(d, "train_mask", data.train_mask)
+        setattr(d, "val_mask", data.val_mask)
+        setattr(d, "test_mask", data.test_mask)
+        datalist.append(d)
+    dataset.data, dataset.slices = dataset.collate(datalist)
+    if hasattr(dataset, '__data_list__'):
+        delattr(dataset, '__data_list__')
     # while type(dataset.data.num_nodes) == list:
     #    dataset.data.num_nodes = dataset.data.num_nodes[0]
     # dataset.data.num_nodes = dataset.data.num_nodes[0]
@@ -160,7 +167,15 @@ def random_splits_mask_class(
     if torch.cuda.is_available():
         torch.cuda.set_rng_state(r_s_cuda)
 
-    dataset.data, dataset.slices = dataset.collate([d for d in dataset])
+    datalist = []
+    for d in dataset:
+        setattr(d, "train_mask", data.train_mask)
+        setattr(d, "val_mask", data.val_mask)
+        setattr(d, "test_mask", data.test_mask)
+        datalist.append(d)
+    dataset.data, dataset.slices = dataset.collate(datalist)
+    if hasattr(dataset, '__data_list__'):
+        delattr(dataset, '__data_list__')
     # while type(dataset.data.num_nodes) == list:
     #     dataset.data.num_nodes = dataset.data.num_nodes[0]
     # dataset.data.num_nodes = dataset.data.num_nodes[0]
diff --git a/autogl/module/ensemble/stacking.py b/autogl/module/ensemble/stacking.py
index c29f849..08337b2 100644
--- a/autogl/module/ensemble/stacking.py
+++ b/autogl/module/ensemble/stacking.py
@@ -100,7 +100,7 @@ class Stacking(BaseEnsembler):
                 torch.tensor(predictions).transpose(0, 1).flatten(start_dim=1).numpy()
             )
             meta_Y = np.array(label)
-
+            config = {}
             model = GradientBoostingClassifier(**config)
             model.fit(meta_X, meta_Y)
 

From 3cdf94842be431bdf9a61f950d56ec3711978065 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Thu, 25 Feb 2021 13:11:49 +0000
Subject: [PATCH 003/144] fix bug in graph model

---
 autogl/module/model/gin.py      | 6 ++++--
 autogl/module/model/topkpool.py | 6 ++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/autogl/module/model/gin.py b/autogl/module/model/gin.py
index 2839d46..3caa753 100644
--- a/autogl/module/model/gin.py
+++ b/autogl/module/model/gin.py
@@ -80,7 +80,8 @@ class GIN(torch.nn.Module):
     def forward(self, data):
         x, edge_index, batch = data.x, data.edge_index, data.batch
 
-        graph_feature = data.gf
+        if self.num_graph_features > 0:
+            graph_feature = data.gf
 
         for i in range(self.num_layer - 2):
             x = self.convs[i](x, edge_index)
@@ -88,7 +89,8 @@ class GIN(torch.nn.Module):
             x = self.bns[i](x)
 
         x = global_add_pool(x, batch)
-        x = torch.cat([x, graph_feature], dim=-1)
+        if self.num_graph_features > 0:
+            x = torch.cat([x, graph_feature], dim=-1)
         x = self.fc1(x)
         x = activate_func(x, self.args["act"])
         x = F.dropout(x, p=self.args["dropout"], training=self.training)
diff --git a/autogl/module/model/topkpool.py b/autogl/module/model/topkpool.py
index 703b441..897f7df 100644
--- a/autogl/module/model/topkpool.py
+++ b/autogl/module/model/topkpool.py
@@ -45,7 +45,8 @@ class Topkpool(torch.nn.Module):
 
     def forward(self, data):
         x, edge_index, batch = data.x, data.edge_index, data.batch
-        graph_feature = data.gf
+        if self.num_graph_features > 0:
+            graph_feature = data.gf
 
         x = F.relu(self.conv1(x, edge_index))
         x, edge_index, _, batch, _, _ = self.pool1(x, edge_index, None, batch)
@@ -60,7 +61,8 @@ class Topkpool(torch.nn.Module):
         x3 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)
 
         x = x1 + x2 + x3
-        x = torch.cat([x, graph_feature], dim=-1)
+        if self.num_graph_features > 0:
+            x = torch.cat([x, graph_feature], dim=-1)
         x = self.lin1(x)
         x = activate_func(x, self.args["act"])
         x = F.dropout(x, p=self.dropout, training=self.training)

From d89d2150da61c6b169b9d9a1328c7149a3ff970e Mon Sep 17 00:00:00 2001
From: wondergo2017 <wondergo2017@gmail.com>
Date: Thu, 25 Feb 2021 13:39:35 +0000
Subject: [PATCH 004/144] fix subgraph

---
 autogl/module/feature/generators/pyg.py | 4 ++--
 autogl/module/feature/subgraph/base.py  | 5 ++++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/autogl/module/feature/generators/pyg.py b/autogl/module/feature/generators/pyg.py
index a356fc8..a3f7c8c 100644
--- a/autogl/module/feature/generators/pyg.py
+++ b/autogl/module/feature/generators/pyg.py
@@ -74,13 +74,13 @@ class PYGNormalizeFeatures(PYGGenerator):
 @register_pyg
 @pygfunc(OneHotDegree)
 class PYGOneHotDegree(PYGGenerator):
-    def __init__(self, max_degree=0):
+    def __init__(self, max_degree=1000):
         super(PYGOneHotDegree, self).__init__(max_degree=max_degree)
 
     def _transform(self, data):
         idx, x = data.edge_index[0], data.x
         deg = degree(idx, data.num_nodes, dtype=torch.long)
-        self._kwargs["max_degree"] = np.max(
+        self._kwargs["max_degree"] = np.min(
             [self._kwargs["max_degree"], torch.max(deg).numpy()]
         )
         dsc = self.extract(data)
diff --git a/autogl/module/feature/subgraph/base.py b/autogl/module/feature/subgraph/base.py
index 3f06d93..cfd695e 100644
--- a/autogl/module/feature/subgraph/base.py
+++ b/autogl/module/feature/subgraph/base.py
@@ -1,8 +1,9 @@
 from ..base import BaseFeatureAtom
 import numpy as np
 import torch
+from .. import register_feature
 
-
+@register_feature('subgraph')
 class BaseSubgraph(BaseFeatureAtom):
     def __init__(self, data_t="np", multigraph=True,**kwargs):
         super(BaseSubgraph, self).__init__(
@@ -15,3 +16,5 @@ class BaseSubgraph(BaseFeatureAtom):
 
     def _postprocess(self, data):
         pass
+
+

From 2687014defd9ea1f6a0ff48235e29f862c024eef Mon Sep 17 00:00:00 2001
From: wondergo2017 <wondergo2017@gmail.com>
Date: Thu, 25 Feb 2021 13:40:40 +0000
Subject: [PATCH 005/144] fix dtype inconsistency in fe fittransform pipe

---
 autogl/module/feature/base.py | 37 +++++++++++++++--------------------
 1 file changed, 16 insertions(+), 21 deletions(-)

diff --git a/autogl/module/feature/base.py b/autogl/module/feature/base.py
index 53beab2..1c7b52b 100644
--- a/autogl/module/feature/base.py
+++ b/autogl/module/feature/base.py
@@ -68,7 +68,12 @@ class BaseFeatureAtom:
         elif self._data_t == "nx":
             if not hasattr(data, "G") or data.G is None:
                 data.G = to_networkx(data, to_undirected=True)
-
+    def _adjust_to_tensor(self,data):
+        if self._data_t == "tensor":
+            pass
+        else:
+            data_np2tensor(data)
+        
     def _preprocess(self, data):
         pass
 
@@ -98,22 +103,17 @@ class BaseFeatureAtom:
         if not self._check_dataset(dataset):
             return
         dataset = copy.deepcopy(dataset)
-        for p in self._pipe:
-            _dataset = [x for x in dataset]
-            if p._subgraph:
+        with torch.no_grad():
+            for p in self._pipe:
+                _dataset = [x for x in dataset]
                 for i, datai in enumerate(_dataset):
                     p._adjust_t(datai)
                     p._preprocess(datai)
                     p._fit_transform(datai)
                     p._postprocess(datai)
+                    p._adjust_to_tensor(datai)
                     _dataset[i] = datai
-            else:
-                data = dataset.data
-                p._adjust_t(data)
-                p._preprocess(data)
-                data = p._fit_transform(data)
-                p._postprocess(data)
-            dataset = self._rebuild(dataset, _dataset)
+                dataset = self._rebuild(dataset, _dataset)
                 
 
     def transform(self, dataset, inplace=True):
@@ -122,22 +122,17 @@ class BaseFeatureAtom:
             return dataset
         if not inplace:
             dataset = copy.deepcopy(dataset)
-        for p in self._pipe:
-            self._dataset = _dataset = [x for x in dataset]
-            if p._subgraph:
+        with torch.no_grad():
+            for p in self._pipe:
+                self._dataset = _dataset = [x for x in dataset]
                 for i, datai in enumerate(_dataset):
                     p._adjust_t(datai)
                     p._preprocess(datai)
                     datai = p._transform(datai)
                     p._postprocess(datai)
+                    p._adjust_to_tensor(datai)
                     _dataset[i] = datai    
-            else:
-                data = dataset.data
-                p._adjust_t(data)
-                p._preprocess(data)
-                data = p._transform(data)
-                p._postprocess(data)
-            dataset = self._rebuild(dataset, _dataset)
+                dataset = self._rebuild(dataset, _dataset)
         dataset.data = data_np2tensor(dataset.data)
         return dataset
 

From bbc8d4bce5006e8027043e210ff97394f42ea88c Mon Sep 17 00:00:00 2001
From: wondergo2017 <wondergo2017@gmail.com>
Date: Thu, 25 Feb 2021 13:41:12 +0000
Subject: [PATCH 006/144] fix onlyconst for dataset with no features

---
 autogl/module/feature/auto_feature.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/autogl/module/feature/auto_feature.py b/autogl/module/feature/auto_feature.py
index 4b7fb3b..3d7f8c0 100644
--- a/autogl/module/feature/auto_feature.py
+++ b/autogl/module/feature/auto_feature.py
@@ -11,7 +11,7 @@ from .selectors import SeGBDT
 from . import register_feature
 
 from ...utils import get_logger
-
+import torch
 LOGGER = get_logger("Feature")
 
 
@@ -28,10 +28,13 @@ class Onlyconst(BaseFeatureEngineer):
     r"""it is a dummy feature engineer , which directly returns identical data"""
 
     def __init__(self, *args, **kwargs):
-        super(Onlyconst, self).__init__(multigraph=True, *args, **kwargs)
+        super(Onlyconst, self).__init__(data_t='tensor',multigraph=True, *args, **kwargs)
 
     def _transform(self, data):
-        data.x = np.ones((data.x.shape[0], 1))
+        if 'x' in data:
+            data.x = torch.ones((data.x.shape[0], 1))
+        else:
+            data.x= torch.ones((torch.unique(data.edge_index).shape[0],1))
         return data
 
 

From f15d08e83f6479de26fafbe690b56d033ffd30b1 Mon Sep 17 00:00:00 2001
From: wondergo2017 <wondergo2017@gmail.com>
Date: Thu, 25 Feb 2021 13:42:06 +0000
Subject: [PATCH 007/144] fix dataset change in random_split_mask

---
 autogl/datasets/utils.py | 48 +++++++++++++++++++++-------------------
 1 file changed, 25 insertions(+), 23 deletions(-)

diff --git a/autogl/datasets/utils.py b/autogl/datasets/utils.py
index 8b677ee..afd1128 100644
--- a/autogl/datasets/utils.py
+++ b/autogl/datasets/utils.py
@@ -1,3 +1,4 @@
+from pdb import set_trace
 import torch
 import numpy as np
 from torch_geometric.data import DataLoader
@@ -37,32 +38,33 @@ def random_splits_mask(dataset, train_ratio=0.2, val_ratio=0.4, seed=None):
     assert (
         train_ratio + val_ratio <= 1
     ), "the sum of train_ratio and val_ratio is larger than 1"
-    data = dataset[0]
-    r_s = torch.get_rng_state()
-    if torch.cuda.is_available():
-        r_s_cuda = torch.cuda.get_rng_state()
-    if seed is not None:
-        torch.manual_seed(seed)
+    _dataset=[d for d in dataset]
+    for data in _dataset:
+        r_s = torch.get_rng_state()
         if torch.cuda.is_available():
-            torch.cuda.manual_seed(seed)
-
-    perm = torch.randperm(data.num_nodes)
-    train_index = perm[: int(data.num_nodes * train_ratio)]
-    val_index = perm[
-        int(data.num_nodes * train_ratio) : int(
-            data.num_nodes * (train_ratio + val_ratio)
-        )
-    ]
-    test_index = perm[int(data.num_nodes * (train_ratio + val_ratio)) :]
-    data.train_mask = index_to_mask(train_index, size=data.num_nodes)
-    data.val_mask = index_to_mask(val_index, size=data.num_nodes)
-    data.test_mask = index_to_mask(test_index, size=data.num_nodes)
+            r_s_cuda = torch.cuda.get_rng_state()
+        if seed is not None:
+            torch.manual_seed(seed)
+            if torch.cuda.is_available():
+                torch.cuda.manual_seed(seed)
+
+        perm = torch.randperm(data.num_nodes)
+        train_index = perm[: int(data.num_nodes * train_ratio)]
+        val_index = perm[
+            int(data.num_nodes * train_ratio) : int(
+                data.num_nodes * (train_ratio + val_ratio)
+            )
+        ]
+        test_index = perm[int(data.num_nodes * (train_ratio + val_ratio)) :]
+        data.train_mask = index_to_mask(train_index, size=data.num_nodes)
+        data.val_mask = index_to_mask(val_index, size=data.num_nodes)
+        data.test_mask = index_to_mask(test_index, size=data.num_nodes)
 
-    torch.set_rng_state(r_s)
-    if torch.cuda.is_available():
-        torch.cuda.set_rng_state(r_s_cuda)
+        torch.set_rng_state(r_s)
+        if torch.cuda.is_available():
+            torch.cuda.set_rng_state(r_s_cuda)
 
-    dataset.data, dataset.slices = dataset.collate([d for d in dataset])
+    dataset.data, dataset.slices = dataset.collate(_dataset)
     # while type(dataset.data.num_nodes) == list:
     #    dataset.data.num_nodes = dataset.data.num_nodes[0]
     # dataset.data.num_nodes = dataset.data.num_nodes[0]

From 53d82874d01720af8c36c0852d1656b4e0e2aae6 Mon Sep 17 00:00:00 2001
From: wondergo2017 <wondergo2017@gmail.com>
Date: Thu, 25 Feb 2021 13:43:12 +0000
Subject: [PATCH 008/144] add default config in paper for gcn,gat,gin

---
 configs/gcl_gin.yaml | 65 ++++++++++++++++++++++++++++++++++++++++++++
 configs/ncl_gat.yaml | 52 +++++++++++++++++++++++++++++++++++
 configs/ncl_gcn.yaml | 45 ++++++++++++++++++++++++++++++
 3 files changed, 162 insertions(+)
 create mode 100644 configs/gcl_gin.yaml
 create mode 100644 configs/ncl_gat.yaml
 create mode 100644 configs/ncl_gcn.yaml

diff --git a/configs/gcl_gin.yaml b/configs/gcl_gin.yaml
new file mode 100644
index 0000000..b7e0500
--- /dev/null
+++ b/configs/gcl_gin.yaml
@@ -0,0 +1,65 @@
+feature:
+  - name : ~
+
+models:
+  gin:
+    - parameterName: num_layers
+      type: FIXED
+      value: 6
+    
+    - parameterName: hidden
+      type: FIXED
+      value: [32,32,32,32,32]
+    
+    - parameterName: dropout
+      type: FIXED
+      value: 0.5
+    
+    - parameterName: act
+      type: FIXED
+      value: relu
+
+    - parameterName: eps
+      type: FIXED
+      value: True
+
+    - parameterName: mlp_layers
+      type: FIXED
+      value: 2
+
+trainer:
+  - parameterName: max_epoch
+    type: FIXED
+    value: 350
+  
+  - parameterName: early_stopping_round
+    type: FIXED
+    value: 10
+
+  - parameterName: lr
+    type: FIXED
+    value: 0.01
+  
+  - parameterName: weight_decay
+    type: FIXED
+    value: 0
+
+  - parameterName: batch_size
+    type: FIXED
+    value: 32
+
+# hidden tuned in {16,32} for bioinformatics,64 for social
+# batch tuned in {32,128}
+# dropout tuned in {0,0.5}
+
+# weight decay （0.5 every 50 epochs)
+
+# max epoch 350 
+# early stop epochs (run to end?), best for 10 folds
+
+hpo:
+  name: random
+  max_evals: 1
+
+ensemble:
+  name: ~
\ No newline at end of file
diff --git a/configs/ncl_gat.yaml b/configs/ncl_gat.yaml
new file mode 100644
index 0000000..9fa384e
--- /dev/null
+++ b/configs/ncl_gat.yaml
@@ -0,0 +1,52 @@
+feature:
+  - name: ~
+
+models:
+  gat:
+    - parameterName: num_layers
+      type: FIXED
+      value: 2
+
+    - parameterName: heads
+      type: FIXED
+      value: 8
+    
+    - parameterName: hidden
+      type: FIXED
+      value: [64]
+    
+    - parameterName: dropout
+      type: FIXED
+      value: 0.6
+    
+    - parameterName: act
+      type: FIXED
+      value: elu
+
+trainer:
+  - parameterName: max_epoch
+    type: FIXED
+    value: 200
+  
+  - parameterName: early_stopping_round
+    type: FIXED
+    value: 10
+
+  - parameterName: lr
+    type: FIXED
+    value: 0.005
+  
+  - parameterName: weight_decay
+    type: FIXED
+    value: 0.0005
+
+# Glorot initialization
+# for pumbed dataset , heads = 8 for last layer  and weight decay =0.001 ,lr=0.01
+# early stopping 100, max epoch 100000
+hpo:
+  name: random
+  max_evals: 1
+
+ensemble:
+  name: ~
+
diff --git a/configs/ncl_gcn.yaml b/configs/ncl_gcn.yaml
new file mode 100644
index 0000000..1a76724
--- /dev/null
+++ b/configs/ncl_gcn.yaml
@@ -0,0 +1,45 @@
+feature:
+  - name: ~ # ~ means None
+
+models:
+  gcn:
+    - parameterName: num_layers
+      type: FIXED
+      value: 3
+    
+    - parameterName: hidden
+      type: FIXED
+      value: [16, 16]
+    
+    - parameterName: dropout
+      type: FIXED
+      value: 0.5
+    
+    - parameterName: act
+      type: FIXED
+      value: relu
+
+trainer:
+  - parameterName: max_epoch
+    type: FIXED
+    value: 200
+  
+  - parameterName: early_stopping_round
+    type: FIXED
+    value: 10
+
+  - parameterName: lr
+    type: FIXED
+    value: 0.01
+  
+  - parameterName: weight_decay
+    type: FIXED
+    value: 0.0005
+# Glorot initialization 
+# weight decay only for the first layer
+hpo:
+  name: random
+  max_evals: 1
+
+ensemble:
+  name: ~
\ No newline at end of file

From 33f75a20d12f1ad2ce3dcf6f14ec2d29060b0071 Mon Sep 17 00:00:00 2001
From: wondergo2017 <wondergo2017@gmail.com>
Date: Thu, 25 Feb 2021 13:44:09 +0000
Subject: [PATCH 009/144] add fe test for node classification

---
 examples/fe_ncl_test.py | 137 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 137 insertions(+)
 create mode 100644 examples/fe_ncl_test.py

diff --git a/examples/fe_ncl_test.py b/examples/fe_ncl_test.py
new file mode 100644
index 0000000..11b61f9
--- /dev/null
+++ b/examples/fe_ncl_test.py
@@ -0,0 +1,137 @@
+import sys
+
+from networkx.algorithms.reciprocity import reciprocity
+sys.path.append('../')
+from autogl.datasets import build_dataset_from_name
+from autogl.solver import AutoNodeClassifier
+from autogl.module import Acc
+import yaml
+import random
+import torch
+import numpy as np
+
+import logging
+logging.basicConfig(level=logging.INFO)
+import sys
+from numpy.core.defchararray import index
+from torch.utils.data import dataset
+
+from yaml import compose, load
+sys.path.append('../')
+import random
+import numpy as np
+import torch
+import os
+import yaml
+import re
+from autogl.module.feature.base import BaseFeatureAtom
+from autogl.module.feature import FEATURE_DICT
+import pandas as pd
+import copy
+from argparse import ArgumentParser
+parser = ArgumentParser()
+# parser.add_argument('--device', default=0, type=int)
+# parser.add_argument('--max_eval', default=10, type=int)
+parser.add_argument('--sn',default=5,type=int)
+parser.add_argument('--output',default='./record.txt',type=str)
+parser.add_argument('--clean',default=False,type=bool)
+args=dict()
+def setseed(seed):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+
+def renew(record_file):
+    with open(record_file,'w') as file:
+        file.write('')
+
+def run_ncl(dataset,configs,features,seed):
+    print(f'run {dataset} \t {configs} \t {features} \t {seed}')
+    setseed(seed)
+    dataset = build_dataset_from_name(dataset)
+    configs = yaml.load(open(configs, 'r').read(), Loader=yaml.FullLoader)
+    configs['features']=[]
+    for f in features:
+        configs['feature'].append({'name':f})
+    
+    autoClassifier = AutoNodeClassifier.from_config(configs)
+    # train
+    if dataset in ['cora', 'citeseer', 'pubmed']:
+        autoClassifier.fit(dataset, time_limit=3600, evaluation_method=[Acc])
+    else:
+        autoClassifier.fit(dataset, time_limit=3600, evaluation_method=[Acc], seed=seed, train_split=20*dataset.num_classes, val_split=30*dataset.num_classes, balanced=False)
+    val = autoClassifier.get_model_by_performance(0)[0].get_valid_score()[0]
+    # print('val acc: ', val)
+
+    # test
+    predict_result = autoClassifier.predict_proba(use_best=True, use_ensemble=False)
+    test_result=Acc.evaluate(predict_result, dataset.data.y[dataset.data.test_mask].numpy())
+    # print('test acc: ', test_result)
+    return test_result
+
+if __name__ == '__main__':
+    print(f"all FEs {FEATURE_DICT.keys()}")
+    args = parser.parse_args()
+    
+    record_file=args.output
+    if not os.path.exists(record_file):
+        renew(record_file)
+    print(f"record file {record_file}")
+    record_file=open(record_file,'a+')
+
+    sn=args.sn # seeds num for each config
+    setseed(2021)
+    seeds=[random.randint(0,12345678) for x in range(sn)]
+    print('setting seeds ',seeds)
+
+    feature_set=[
+            '',
+            'onehot',
+            'PYGOneHotDegree',
+            'eigen',
+            'pagerank',
+            'PYGLocalDegreeProfile',
+            'graphlet',
+        ]
+    datasets=[
+        'cora',
+        'citeseer',
+        'pubmed',
+        'amazon_computers',
+        'amazon_photo',
+        'coauthor_cs',
+        'coauthor_physics',
+        # 'reddit'
+    ]
+    models=['gcn','gat']
+    cnt=0
+    for fi,f in enumerate(feature_set):
+        for mi,m in enumerate(models):
+            for di,d in enumerate(datasets):
+                for si,seed in enumerate(seeds):
+                    cnt+=1
+                    if cnt<=100:
+                        continue
+                    fs=['onlyconst',f] if f !='' else ['onlyconst']
+                    try:
+                        acc=run_ncl(d,f'../configs/ncl_{m}.yaml',fs,seed)   
+                    except Exception as e:
+                        print(e)
+                        acc=-1
+                    record_file.write(f'{cnt},{acc},{m},{d},{f},{seed}\n')
+                    record_file.flush()
+
+
+                
+
+
+
+    
+
+
+
+

From ddd7b51e0518d353d6cb9409a418717384f4b952 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Thu, 25 Feb 2021 13:56:58 +0000
Subject: [PATCH 010/144] add support for parsing None in solver

---
 autogl/solver/classifier/graph_classifier.py | 25 +++++++++++---------
 autogl/solver/classifier/node_classifier.py  | 10 ++++----
 2 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/autogl/solver/classifier/graph_classifier.py b/autogl/solver/classifier/graph_classifier.py
index 91cb7af..85511b2 100644
--- a/autogl/solver/classifier/graph_classifier.py
+++ b/autogl/solver/classifier/graph_classifier.py
@@ -724,13 +724,14 @@ class AutoGraphClassifier(BaseClassifier):
         path_or_dict = deepcopy(path_or_dict)
         solver = cls(None, [], None, None)
         fe_list = path_or_dict.pop("feature", [{"name": "deepgl"}])
-        fe_list_ele = []
-        for feature_engineer in fe_list:
-            name = feature_engineer.pop("name")
-            if name is not None:
-                fe_list_ele.append(FEATURE_DICT[name](**feature_engineer))
-        if fe_list_ele != []:
-            solver.set_feature_module(fe_list_ele)
+        if fe_list is not None:
+            fe_list_ele = []
+            for feature_engineer in fe_list:
+                name = feature_engineer.pop("name")
+                if name is not None:
+                    fe_list_ele.append(FEATURE_DICT[name](**feature_engineer))
+            if fe_list_ele != []:
+                solver.set_feature_module(fe_list_ele)
 
         models = path_or_dict.pop("models", {"gcn": None, "gat": None})
         model_list = list(models.keys())
@@ -761,11 +762,13 @@ class AutoGraphClassifier(BaseClassifier):
         solver.set_graph_models(model_list, trainer_space, model_hp_space)
 
         hpo_dict = path_or_dict.pop("hpo", {"name": "anneal"})
-        name = hpo_dict.pop("name")
-        solver.set_hpo_module(name, **hpo_dict)
+        if hpo_dict is not None:
+            name = hpo_dict.pop("name")
+            solver.set_hpo_module(name, **hpo_dict)
 
         ensemble_dict = path_or_dict.pop("ensemble", {"name": "voting"})
-        name = ensemble_dict.pop("name")
-        solver.set_ensemble_module(name, **ensemble_dict)
+        if ensemble_dict is not None:
+            name = ensemble_dict.pop("name")
+            solver.set_ensemble_module(name, **ensemble_dict)
 
         return solver
diff --git a/autogl/solver/classifier/node_classifier.py b/autogl/solver/classifier/node_classifier.py
index d663357..054581e 100644
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -689,11 +689,13 @@ class AutoNodeClassifier(BaseClassifier):
         solver.set_graph_models(model_list, trainer_space, model_hp_space)
 
         hpo_dict = path_or_dict.pop("hpo", {"name": "anneal"})
-        name = hpo_dict.pop("name")
-        solver.set_hpo_module(name, **hpo_dict)
+        if hpo_dict is not None:
+            name = hpo_dict.pop("name")
+            solver.set_hpo_module(name, **hpo_dict)
 
         ensemble_dict = path_or_dict.pop("ensemble", {"name": "voting"})
-        name = ensemble_dict.pop("name")
-        solver.set_ensemble_module(name, **ensemble_dict)
+        if ensemble_dict is not None:
+            name = ensemble_dict.pop("name")
+            solver.set_ensemble_module(name, **ensemble_dict)
 
         return solver

From 6912f7801bedbb14b657167cf8497ff1fb616688 Mon Sep 17 00:00:00 2001
From: wondergo2017 <wondergo2017@gmail.com>
Date: Thu, 25 Feb 2021 15:40:04 +0000
Subject: [PATCH 011/144] add fe gcl test

---
 examples/fe_gcl_test.py | 136 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 136 insertions(+)
 create mode 100644 examples/fe_gcl_test.py

diff --git a/examples/fe_gcl_test.py b/examples/fe_gcl_test.py
new file mode 100644
index 0000000..b44128c
--- /dev/null
+++ b/examples/fe_gcl_test.py
@@ -0,0 +1,136 @@
+import sys
+
+from networkx.algorithms.reciprocity import reciprocity
+sys.path.append('../')
+from autogl.datasets import build_dataset_from_name
+from autogl.solver import AutoNodeClassifier,AutoGraphClassifier
+from autogl.module import Acc
+import yaml
+import random
+import torch
+import numpy as np
+
+import logging
+logging.basicConfig(level=logging.INFO)
+import sys
+from numpy.core.defchararray import index
+from torch.utils.data import dataset
+
+from yaml import compose, load
+sys.path.append('../')
+import random
+import numpy as np
+import torch
+import os
+import yaml
+import re
+from autogl.module.feature.base import BaseFeatureAtom
+from autogl.module.feature import FEATURE_DICT
+import pandas as pd
+import copy
+from argparse import ArgumentParser
+parser = ArgumentParser()
+# parser.add_argument('--device', default=0, type=int)
+# parser.add_argument('--max_eval', default=10, type=int)
+parser.add_argument('--sn',default=5,type=int)
+parser.add_argument('--output',default='./record_gcl2.txt',type=str)
+parser.add_argument('--clean',default=False,type=bool)
+args=dict()
+def setseed(seed):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+
+def renew(record_file):
+    with open(record_file,'w') as file:
+        file.write('')
+
+def run_gcl(dataset,configs,features,seed):
+
+    print(f'run {dataset} \t {configs} \t {features} \t {seed}')
+    setseed(seed)
+    dataset = build_dataset_from_name(dataset)
+    configs = yaml.load(open(configs, 'r').read(), Loader=yaml.FullLoader)
+    configs['features']=[]
+    for f in features:
+        configs['feature'].append({'name':f})
+    
+    autoClassifier = AutoGraphClassifier.from_config(configs)
+    # train
+    autoClassifier.fit(
+        dataset, 
+        time_limit=3600, 
+        train_split=0.8, 
+        val_split=0.1, 
+        cross_validation=True,
+        cv_split=10, 
+    )
+    val = autoClassifier.get_model_by_performance(0)[0].get_valid_score()[0]
+
+    # test
+    predict_result = autoClassifier.predict_proba()
+    acc=Acc.evaluate(predict_result, dataset.data.y[dataset.test_index].cpu().detach().numpy())
+    # print(acc)
+    return acc
+
+if __name__ == "__main__":
+    print(f"all FEs {FEATURE_DICT.keys()}")
+    args = parser.parse_args()
+
+    record_file=args.output
+    if not os.path.exists(record_file):
+        renew(record_file)
+    print(f"record file {record_file}")
+    record_file=open(record_file,'a+')
+
+    sn=args.sn # seeds num for each config
+    setseed(2021)
+    seeds=[random.randint(0,12345678) for x in range(sn)]
+    print('setting seeds ',seeds)
+
+    feature_set=[
+            '',
+            'netlsd',
+            'NxSubgraph', 'NxLargeCliqueSize', 'NxAverageClusteringApproximate', 'NxDegreeAssortativityCoefficient', 'NxDegreePearsonCorrelationCoefficient', 'NxHasBridge', 'NxGraphCliqueNumber', 'NxGraphNumberOfCliques', 'NxTransitivity', 'NxAverageClustering', 'NxIsConnected', 'NxNumberConnectedComponents', 'NxIsDistanceRegular', 'NxLocalEfficiency', 'NxGlobalEfficiency', 'NxIsEulerian'
+        ]
+    datasets=[
+        'mutag',
+        'imdb-b',
+        'imdb-m',
+        'proteins',
+        'collab'
+    ]
+    models=['gin']
+    cnt=0
+    for fi,f in enumerate(feature_set):
+        for mi,m in enumerate(models):
+            for di,d in enumerate(datasets):
+                for si,seed in enumerate(seeds):
+                    cnt+=1
+                    if cnt<=0:
+                        continue
+                    fs=['onlyconst',f] if f !='' else ['onlyconst','subgraph']
+                    try:
+                        # queue_configs.append([d,f'../configs/gcl_{m}.yaml',fs,seed])
+                        acc=run_gcl(d,f'../configs/gcl_{m}.yaml',fs,seed)   
+                    except Exception as e:
+                        print(e)
+                        acc=-1
+                    record_file.write(f'{cnt},{acc},{m},{d},{f},{seed}\n')
+                    record_file.flush()
+    
+
+
+                
+
+
+
+    
+
+
+
+

From b9efe5256204bcf67feda6a4e8aae8d3066d98cd Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Fri, 26 Feb 2021 14:32:06 +0000
Subject: [PATCH 012/144] disable cross validation check

---
 autogl/solver/classifier/graph_classifier.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/autogl/solver/classifier/graph_classifier.py b/autogl/solver/classifier/graph_classifier.py
index 85511b2..2a70ae9 100644
--- a/autogl/solver/classifier/graph_classifier.py
+++ b/autogl/solver/classifier/graph_classifier.py
@@ -266,8 +266,8 @@ class AutoGraphClassifier(BaseClassifier):
                 "Please manually pass train and val ratio."
             )
             LOGGER.info("Use the default train/val/test ratio in given dataset")
-            if hasattr(dataset.train_split, "n_splits"):
-                cross_validation = True
+            #if hasattr(dataset.train_split, "n_splits"):
+            #    cross_validation = True
 
         elif train_split is not None and val_split is not None:
             utils.graph_random_splits(dataset, train_split, val_split, seed=seed)

From 8430fac714afca3ba6657cf6011cdc8c89e5e274 Mon Sep 17 00:00:00 2001
From: wondergo2017 <wondergo2017@gmail.com>
Date: Thu, 4 Mar 2021 02:37:52 +0000
Subject: [PATCH 013/144] fix pyg default aggregation for features

---
 autogl/module/feature/generators/pyg.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/autogl/module/feature/generators/pyg.py b/autogl/module/feature/generators/pyg.py
index a356fc8..130fe70 100644
--- a/autogl/module/feature/generators/pyg.py
+++ b/autogl/module/feature/generators/pyg.py
@@ -29,7 +29,8 @@ class PYGGenerator(BaseGenerator):
 
     def _transform(self, data):
         dsc = self.extract(data)
-        data.x = torch.cat([data.x, dsc], dim=1)
+        # data.x = torch.cat([data.x, dsc], dim=1)
+        data.x = dsc
         return data
 
 

From e63c95da45a34c3c36ceb2150f3495c3a03e3839 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Thu, 4 Mar 2021 06:54:54 +0000
Subject: [PATCH 014/144] black format

---
 autogl/datasets/__init__.py                  |  17 ++-
 autogl/datasets/modelnet.py                  |  24 ++--
 autogl/datasets/ogb.py                       | 131 ++++++++++---------
 autogl/datasets/pyg.py                       | 123 ++++++++---------
 autogl/datasets/utils.py                     |  10 +-
 autogl/module/ensemble/__init__.py           |   2 +
 autogl/module/ensemble/voting.py             |   2 +-
 autogl/module/feature/__init__.py            |   1 +
 autogl/module/feature/auto_feature.py        |   9 +-
 autogl/module/feature/base.py                |   8 +-
 autogl/module/feature/generators/base.py     |   6 +-
 autogl/module/feature/selectors/base.py      |   6 +-
 autogl/module/feature/subgraph/base.py       |   9 +-
 autogl/module/hpo/autone.py                  |  10 +-
 autogl/module/hpo/suggestion/__init__.py     |   2 +-
 autogl/module/model/__init__.py              |   1 +
 autogl/module/model/gat.py                   |  17 ++-
 autogl/module/model/gcn.py                   |   7 +-
 autogl/module/model/gin.py                   |  27 +++-
 autogl/module/model/graphsage.py             |  20 ++-
 autogl/module/model/topkpool.py              |  17 ++-
 autogl/module/train/__init__.py              |   1 +
 autogl/module/train/graph_classification.py  |   7 +-
 autogl/module/train/node_classification.py   |   5 +-
 autogl/solver/classifier/graph_classifier.py |   2 +-
 25 files changed, 272 insertions(+), 192 deletions(-)

diff --git a/autogl/datasets/__init__.py b/autogl/datasets/__init__.py
index 1f07a51..c1fccae 100644
--- a/autogl/datasets/__init__.py
+++ b/autogl/datasets/__init__.py
@@ -46,6 +46,7 @@ def register_dataset(name):
 
     return register_dataset_cls
 
+
 from .pyg import (
     AmazonComputersDataset,
     AmazonPhotoDataset,
@@ -96,9 +97,12 @@ from .matlab_matrix import (
     PPIDataset,
 )
 from .modelnet import (
-    ModelNet10, ModelNet40,
-    ModelNet10Train, ModelNet10Test,
-    ModelNet40Train, ModelNet40Test
+    ModelNet10,
+    ModelNet40,
+    ModelNet10Train,
+    ModelNet10Test,
+    ModelNet40Train,
+    ModelNet40Test,
 )
 from .utils import (
     get_label_number,
@@ -110,6 +114,7 @@ from .utils import (
     graph_get_split,
 )
 
+
 def build_dataset(args, path="~/.cache-autogl/"):
     path = osp.join(path, "data", args.dataset)
     path = os.path.expanduser(path)
@@ -120,9 +125,9 @@ def build_dataset_from_name(dataset_name, path="~/.cache-autogl/"):
     path = osp.join(path, "data", dataset_name)
     path = os.path.expanduser(path)
     dataset = DATASET_DICT[dataset_name](path)
-    if 'ogbn' in dataset_name:
-        #dataset.data, dataset.slices = dataset.collate([dataset.data])
-        #dataset.data.num_nodes = dataset.data.num_nodes[0]
+    if "ogbn" in dataset_name:
+        # dataset.data, dataset.slices = dataset.collate([dataset.data])
+        # dataset.data.num_nodes = dataset.data.num_nodes[0]
         if dataset.data.y.shape[-1] == 1:
             dataset.data.y = torch.squeeze(dataset.data.y)
     return dataset
diff --git a/autogl/datasets/modelnet.py b/autogl/datasets/modelnet.py
index b91aa1c..67c958f 100644
--- a/autogl/datasets/modelnet.py
+++ b/autogl/datasets/modelnet.py
@@ -21,42 +21,42 @@ class ModelNet40(ModelNet):
 @register_dataset("ModelNet10Train")
 class ModelNet10Train(ModelNet):
     def __init__(self, path: str):
-        super(ModelNet10Train, self).__init__(path, '10', train=True)
+        super(ModelNet10Train, self).__init__(path, "10", train=True)
 
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(ModelNet10Train, self).get(idx)
 
 
 @register_dataset("ModelNet10Test")
 class ModelNet10Test(ModelNet):
     def __init__(self, path: str):
-        super(ModelNet10Test, self).__init__(path, '10', train=False)
+        super(ModelNet10Test, self).__init__(path, "10", train=False)
 
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(ModelNet10Test, self).get(idx)
 
 
 @register_dataset("ModelNet40Train")
 class ModelNet40Train(ModelNet):
     def __init__(self, path: str):
-        super(ModelNet40Train, self).__init__(path, '40', train=True)
+        super(ModelNet40Train, self).__init__(path, "40", train=True)
 
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(ModelNet40Train, self).get(idx)
 
 
 @register_dataset("ModelNet40Test")
 class ModelNet40Test(ModelNet):
     def __init__(self, path: str):
-        super(ModelNet40Test, self).__init__(path, '40', train=False)
+        super(ModelNet40Test, self).__init__(path, "40", train=False)
 
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(ModelNet40Test, self).get(idx)
diff --git a/autogl/datasets/ogb.py b/autogl/datasets/ogb.py
index 1fb8da9..a27ea8e 100644
--- a/autogl/datasets/ogb.py
+++ b/autogl/datasets/ogb.py
@@ -30,15 +30,15 @@ class OGBNproductsDataset(PygNodePropPredDataset):
         split_idx = self.get_idx_split()
         datalist = []
         for d in self:
-            setattr(d, "train_mask", index_to_mask(split_idx['train'], d.y.shape[0]))
-            setattr(d, "val_mask", index_to_mask(split_idx['valid'], d.y.shape[0]))
-            setattr(d, "test_mask", index_to_mask(split_idx['test'], d.y.shape[0]))
+            setattr(d, "train_mask", index_to_mask(split_idx["train"], d.y.shape[0]))
+            setattr(d, "val_mask", index_to_mask(split_idx["valid"], d.y.shape[0]))
+            setattr(d, "test_mask", index_to_mask(split_idx["test"], d.y.shape[0]))
             datalist.append(d)
         self.data, self.slices = self.collate(datalist)
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(OGBNproductsDataset, self).get(idx)
 
 
@@ -49,7 +49,9 @@ class OGBNproteinsDataset(PygNodePropPredDataset):
         # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
         PygNodePropPredDataset(name=dataset, root=path)
         super(OGBNproteinsDataset, self).__init__(dataset, path)
-        dataset_t = PygNodePropPredDataset(name=dataset, root=path, transform=T.ToSparseTensor())
+        dataset_t = PygNodePropPredDataset(
+            name=dataset, root=path, transform=T.ToSparseTensor()
+        )
 
         # Move edge features to node features.
         self.data.x = dataset_t[0].adj_t.mean(dim=1)
@@ -61,15 +63,15 @@ class OGBNproteinsDataset(PygNodePropPredDataset):
         split_idx = self.get_idx_split()
         datalist = []
         for d in self:
-            setattr(d, "train_mask", index_to_mask(split_idx['train'], d.y.shape[0]))
-            setattr(d, "val_mask", index_to_mask(split_idx['valid'], d.y.shape[0]))
-            setattr(d, "test_mask", index_to_mask(split_idx['test'], d.y.shape[0]))
+            setattr(d, "train_mask", index_to_mask(split_idx["train"], d.y.shape[0]))
+            setattr(d, "val_mask", index_to_mask(split_idx["valid"], d.y.shape[0]))
+            setattr(d, "test_mask", index_to_mask(split_idx["test"], d.y.shape[0]))
             datalist.append(d)
         self.data, self.slices = self.collate(datalist)
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(OGBNproteinsDataset, self).get(idx)
 
 
@@ -86,15 +88,15 @@ class OGBNarxivDataset(PygNodePropPredDataset):
 
         datalist = []
         for d in self:
-            setattr(d, "train_mask", index_to_mask(split_idx['train'], d.y.shape[0]))
-            setattr(d, "val_mask", index_to_mask(split_idx['valid'], d.y.shape[0]))
-            setattr(d, "test_mask", index_to_mask(split_idx['test'], d.y.shape[0]))
+            setattr(d, "train_mask", index_to_mask(split_idx["train"], d.y.shape[0]))
+            setattr(d, "val_mask", index_to_mask(split_idx["valid"], d.y.shape[0]))
+            setattr(d, "test_mask", index_to_mask(split_idx["test"], d.y.shape[0]))
             datalist.append(d)
         self.data, self.slices = self.collate(datalist)
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(OGBNarxivDataset, self).get(idx)
 
 
@@ -110,15 +112,15 @@ class OGBNpapers100MDataset(PygNodePropPredDataset):
         split_idx = self.get_idx_split()
         datalist = []
         for d in self:
-            setattr(d, "train_mask", index_to_mask(split_idx['train'], d.y.shape[0]))
-            setattr(d, "val_mask", index_to_mask(split_idx['valid'], d.y.shape[0]))
-            setattr(d, "test_mask", index_to_mask(split_idx['test'], d.y.shape[0]))
+            setattr(d, "train_mask", index_to_mask(split_idx["train"], d.y.shape[0]))
+            setattr(d, "val_mask", index_to_mask(split_idx["valid"], d.y.shape[0]))
+            setattr(d, "test_mask", index_to_mask(split_idx["test"], d.y.shape[0]))
             datalist.append(d)
         self.data, self.slices = self.collate(datalist)
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(OGBNpapers100MDataset, self).get(idx)
 
 
@@ -134,9 +136,10 @@ class OGBNmagDataset(PygNodePropPredDataset):
         rel_data = self[0]
         # We are only interested in paper <-> paper relations.
         self.data = Data(
-            x=rel_data.x_dict['paper'],
-            edge_index=rel_data.edge_index_dict[('paper', 'cites', 'paper')],
-            y=rel_data.y_dict['paper'])
+            x=rel_data.x_dict["paper"],
+            edge_index=rel_data.edge_index_dict[("paper", "cites", "paper")],
+            y=rel_data.y_dict["paper"],
+        )
 
         # self.data = T.ToSparseTensor()(data)
         # self[0].adj_t = self[0].adj_t.to_symmetric()
@@ -147,15 +150,15 @@ class OGBNmagDataset(PygNodePropPredDataset):
 
         datalist = []
         for d in self:
-            setattr(d, "train_mask", index_to_mask(split_idx['train'], d.y.shape[0]))
-            setattr(d, "val_mask", index_to_mask(split_idx['valid'], d.y.shape[0]))
-            setattr(d, "test_mask", index_to_mask(split_idx['test'], d.y.shape[0]))
+            setattr(d, "train_mask", index_to_mask(split_idx["train"], d.y.shape[0]))
+            setattr(d, "val_mask", index_to_mask(split_idx["valid"], d.y.shape[0]))
+            setattr(d, "test_mask", index_to_mask(split_idx["test"], d.y.shape[0]))
             datalist.append(d)
         self.data, self.slices = self.collate(datalist)
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(OGBNmagDataset, self).get(idx)
 
 
@@ -171,10 +174,10 @@ class OGBGmolhivDataset(PygGraphPropPredDataset):
         super(OGBGmolhivDataset, self).__init__(dataset, path)
         setattr(OGBGmolhivDataset, "metric", "ROC-AUC")
         setattr(OGBGmolhivDataset, "loss", "binary_cross_entropy_with_logits")
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(OGBGmolhivDataset, self).get(idx)
 
 
@@ -187,10 +190,10 @@ class OGBGmolpcbaDataset(PygGraphPropPredDataset):
         super(OGBGmolpcbaDataset, self).__init__(dataset, path)
         setattr(OGBGmolpcbaDataset, "metric", "AP")
         setattr(OGBGmolpcbaDataset, "loss", "binary_cross_entropy_with_logits")
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(OGBGmolpcbaDataset, self).get(idx)
 
 
@@ -203,10 +206,10 @@ class OGBGppaDataset(PygGraphPropPredDataset):
         super(OGBGppaDataset, self).__init__(dataset, path)
         setattr(OGBGppaDataset, "metric", "Accuracy")
         setattr(OGBGppaDataset, "loss", "cross_entropy")
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(OGBGppaDataset, self).get(idx)
 
 
@@ -219,10 +222,10 @@ class OGBGcodeDataset(PygGraphPropPredDataset):
         super(OGBGcodeDataset, self).__init__(dataset, path)
         setattr(OGBGcodeDataset, "metric", "F1 score")
         setattr(OGBGcodeDataset, "loss", "cross_entropy")
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(OGBGcodeDataset, self).get(idx)
 
 
@@ -238,10 +241,10 @@ class OGBLppaDataset(PygLinkPropPredDataset):
         super(OGBLppaDataset, self).__init__(dataset, path)
         setattr(OGBLppaDataset, "metric", "Hits@100")
         setattr(OGBLppaDataset, "loss", "pos_neg_loss")
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(OGBLppaDataset, self).get(idx)
 
 
@@ -254,10 +257,10 @@ class OGBLcollabDataset(PygLinkPropPredDataset):
         super(OGBLcollabDataset, self).__init__(dataset, path)
         setattr(OGBLcollabDataset, "metric", "Hits@50")
         setattr(OGBLcollabDataset, "loss", "pos_neg_loss")
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(OGBLcollabDataset, self).get(idx)
 
 
@@ -270,10 +273,10 @@ class OGBLddiDataset(PygLinkPropPredDataset):
         super(OGBLddiDataset, self).__init__(dataset, path)
         setattr(OGBLddiDataset, "metric", "Hits@20")
         setattr(OGBLddiDataset, "loss", "pos_neg_loss")
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(OGBLddiDataset, self).get(idx)
 
 
@@ -286,10 +289,10 @@ class OGBLcitationDataset(PygLinkPropPredDataset):
         super(OGBLcitationDataset, self).__init__(dataset, path)
         setattr(OGBLcitationDataset, "metric", "MRR")
         setattr(OGBLcitationDataset, "loss", "pos_neg_loss")
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(OGBLcitationDataset, self).get(idx)
 
 
@@ -302,10 +305,10 @@ class OGBLwikikgDataset(PygLinkPropPredDataset):
         super(OGBLwikikgDataset, self).__init__(dataset, path)
         setattr(OGBLwikikgDataset, "metric", "MRR")
         setattr(OGBLwikikgDataset, "loss", "pos_neg_loss")
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(OGBLwikikgDataset, self).get(idx)
 
 
@@ -318,8 +321,8 @@ class OGBLbiokgDataset(PygLinkPropPredDataset):
         super(OGBLbiokgDataset, self).__init__(dataset, path)
         setattr(OGBLbiokgDataset, "metric", "MRR")
         setattr(OGBLbiokgDataset, "loss", "pos_neg_loss")
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(OGBLbiokgDataset, self).get(idx)
diff --git a/autogl/datasets/pyg.py b/autogl/datasets/pyg.py
index cdb833f..d98b927 100644
--- a/autogl/datasets/pyg.py
+++ b/autogl/datasets/pyg.py
@@ -1,6 +1,7 @@
 import os.path as osp
 
 import torch
+
 # import torch_geometric.transforms as T
 from torch_geometric.datasets import (
     Planetoid,
@@ -21,10 +22,10 @@ class AmazonComputersDataset(Amazon):
         # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
         Amazon(path, dataset)
         super(AmazonComputersDataset, self).__init__(path, dataset)
-        
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(AmazonComputersDataset, self).get(idx)
 
 
@@ -35,10 +36,10 @@ class AmazonPhotoDataset(Amazon):
         # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
         Amazon(path, dataset)
         super(AmazonPhotoDataset, self).__init__(path, dataset)
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(AmazonPhotoDataset, self).get(idx)
 
 
@@ -49,10 +50,10 @@ class CoauthorPhysicsDataset(Coauthor):
         # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
         Coauthor(path, dataset)
         super(CoauthorPhysicsDataset, self).__init__(path, dataset)
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(CoauthorPhysicsDataset, self).get(idx)
 
 
@@ -63,10 +64,10 @@ class CoauthorCSDataset(Coauthor):
         # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
         Coauthor(path, dataset)
         super(CoauthorCSDataset, self).__init__(path, dataset)
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(CoauthorCSDataset, self).get(idx)
 
 
@@ -77,10 +78,10 @@ class CoraDataset(Planetoid):
         # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
         Planetoid(path, dataset)
         super(CoraDataset, self).__init__(path, dataset)
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(CoraDataset, self).get(idx)
 
 
@@ -91,10 +92,10 @@ class CiteSeerDataset(Planetoid):
         # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
         Planetoid(path, dataset)
         super(CiteSeerDataset, self).__init__(path, dataset)
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(CiteSeerDataset, self).get(idx)
 
 
@@ -105,10 +106,10 @@ class PubMedDataset(Planetoid):
         # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
         Planetoid(path, dataset)
         super(PubMedDataset, self).__init__(path, dataset)
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(PubMedDataset, self).get(idx)
 
 
@@ -119,10 +120,10 @@ class RedditDataset(Reddit):
         # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
         Reddit(path)
         super(RedditDataset, self).__init__(path)
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(RedditDataset, self).get(idx)
 
 
@@ -135,8 +136,8 @@ class MUTAGDataset(TUDataset):
         super(MUTAGDataset, self).__init__(path, name=dataset)
 
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(MUTAGDataset, self).get(idx)
 
 
@@ -147,10 +148,10 @@ class IMDBBinaryDataset(TUDataset):
         # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
         TUDataset(path, name=dataset)
         super(IMDBBinaryDataset, self).__init__(path, name=dataset)
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(IMDBBinaryDataset, self).get(idx)
 
 
@@ -161,10 +162,10 @@ class IMDBMultiDataset(TUDataset):
         # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
         TUDataset(path, name=dataset)
         super(IMDBMultiDataset, self).__init__(path, name=dataset)
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(IMDBMultiDataset, self).get(idx)
 
 
@@ -175,10 +176,10 @@ class CollabDataset(TUDataset):
         # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
         TUDataset(path, name=dataset)
         super(CollabDataset, self).__init__(path, name=dataset)
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(CollabDataset, self).get(idx)
 
 
@@ -189,10 +190,10 @@ class ProteinsDataset(TUDataset):
         # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
         TUDataset(path, name=dataset)
         super(ProteinsDataset, self).__init__(path, name=dataset)
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(ProteinsDataset, self).get(idx)
 
 
@@ -203,10 +204,10 @@ class REDDITBinary(TUDataset):
         # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
         TUDataset(path, name=dataset)
         super(REDDITBinary, self).__init__(path, name=dataset)
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(REDDITBinary, self).get(idx)
 
 
@@ -217,10 +218,10 @@ class REDDITMulti5K(TUDataset):
         # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
         TUDataset(path, name=dataset)
         super(REDDITMulti5K, self).__init__(path, name=dataset)
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(REDDITMulti5K, self).get(idx)
 
 
@@ -231,10 +232,10 @@ class REDDITMulti12K(TUDataset):
         # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
         TUDataset(path, name=dataset)
         super(REDDITMulti12K, self).__init__(path, name=dataset)
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(REDDITMulti12K, self).get(idx)
 
 
@@ -247,8 +248,8 @@ class PTCMRDataset(TUDataset):
         super(PTCMRDataset, self).__init__(path, name=dataset)
 
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(PTCMRDataset, self).get(idx)
 
 
@@ -259,10 +260,10 @@ class NCI1Dataset(TUDataset):
         # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
         TUDataset(path, name=dataset)
         super(NCI1Dataset, self).__init__(path, name=dataset)
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(NCI1Dataset, self).get(idx)
 
 
@@ -273,10 +274,10 @@ class NCI109Dataset(TUDataset):
         # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
         TUDataset(path, name=dataset)
         super(NCI109Dataset, self).__init__(path, name=dataset)
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(NCI109Dataset, self).get(idx)
 
 
@@ -298,10 +299,10 @@ class ENZYMES(TUDataset):
             return data
         else:
             return self.index_select(idx)
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(ENZYMES, self).get(idx)
 
 
@@ -342,8 +343,8 @@ class QM9Dataset(QM9):
         if not osp.exists(path):
             QM9(path)
         super(QM9Dataset, self).__init__(path)
-    
+
     def get(self, idx):
-        if hasattr(self, '__data_list__'):
-            delattr(self, '__data_list__')
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
         return super(QM9Dataset, self).get(idx)
diff --git a/autogl/datasets/utils.py b/autogl/datasets/utils.py
index 0dc09eb..7ae5afc 100644
--- a/autogl/datasets/utils.py
+++ b/autogl/datasets/utils.py
@@ -38,7 +38,7 @@ def random_splits_mask(dataset, train_ratio=0.2, val_ratio=0.4, seed=None):
     assert (
         train_ratio + val_ratio <= 1
     ), "the sum of train_ratio and val_ratio is larger than 1"
-    _dataset=[d for d in dataset]
+    _dataset = [d for d in dataset]
     for data in _dataset:
         r_s = torch.get_rng_state()
         if torch.cuda.is_available():
@@ -65,8 +65,8 @@ def random_splits_mask(dataset, train_ratio=0.2, val_ratio=0.4, seed=None):
             torch.cuda.set_rng_state(r_s_cuda)
 
     dataset.data, dataset.slices = dataset.collate(_dataset)
-    if hasattr(dataset, '__data_list__'):
-        delattr(dataset, '__data_list__')
+    if hasattr(dataset, "__data_list__"):
+        delattr(dataset, "__data_list__")
     # while type(dataset.data.num_nodes) == list:
     #    dataset.data.num_nodes = dataset.data.num_nodes[0]
     # dataset.data.num_nodes = dataset.data.num_nodes[0]
@@ -171,8 +171,8 @@ def random_splits_mask_class(
         setattr(d, "test_mask", data.test_mask)
         datalist.append(d)
     dataset.data, dataset.slices = dataset.collate(datalist)
-    if hasattr(dataset, '__data_list__'):
-        delattr(dataset, '__data_list__')
+    if hasattr(dataset, "__data_list__"):
+        delattr(dataset, "__data_list__")
     # while type(dataset.data.num_nodes) == list:
     #     dataset.data.num_nodes = dataset.data.num_nodes[0]
     # dataset.data.num_nodes = dataset.data.num_nodes[0]
diff --git a/autogl/module/ensemble/__init__.py b/autogl/module/ensemble/__init__.py
index 7acf431..92a6c4f 100644
--- a/autogl/module/ensemble/__init__.py
+++ b/autogl/module/ensemble/__init__.py
@@ -16,9 +16,11 @@ def register_ensembler(name):
 
     return register_ensembler_cls
 
+
 from .voting import Voting
 from .stacking import Stacking
 
+
 def build_ensembler_from_name(name: str) -> BaseEnsembler:
     """
     Parameters
diff --git a/autogl/module/ensemble/voting.py b/autogl/module/ensemble/voting.py
index 0515e07..a9b37be 100644
--- a/autogl/module/ensemble/voting.py
+++ b/autogl/module/ensemble/voting.py
@@ -85,7 +85,7 @@ class Voting(BaseEnsembler):
         weights = weights / np.sum(weights)
 
         return np.average(predictions, axis=0, weights=weights)
-    
+
     def _specify_weights(self, predictions, label, feval):
         ensemble_prediction = []
         combinations = []
diff --git a/autogl/module/feature/__init__.py b/autogl/module/feature/__init__.py
index 3885953..20738f7 100644
--- a/autogl/module/feature/__init__.py
+++ b/autogl/module/feature/__init__.py
@@ -24,6 +24,7 @@ def register_feature(name):
 
     return register_feature_cls
 
+
 from .auto_feature import AutoFeatureEngineer
 from .base import BaseFeatureEngineer
 
diff --git a/autogl/module/feature/auto_feature.py b/autogl/module/feature/auto_feature.py
index 3d7f8c0..345447c 100644
--- a/autogl/module/feature/auto_feature.py
+++ b/autogl/module/feature/auto_feature.py
@@ -12,6 +12,7 @@ from . import register_feature
 
 from ...utils import get_logger
 import torch
+
 LOGGER = get_logger("Feature")
 
 
@@ -28,13 +29,15 @@ class Onlyconst(BaseFeatureEngineer):
     r"""it is a dummy feature engineer , which directly returns identical data"""
 
     def __init__(self, *args, **kwargs):
-        super(Onlyconst, self).__init__(data_t='tensor',multigraph=True, *args, **kwargs)
+        super(Onlyconst, self).__init__(
+            data_t="tensor", multigraph=True, *args, **kwargs
+        )
 
     def _transform(self, data):
-        if 'x' in data:
+        if "x" in data:
             data.x = torch.ones((data.x.shape[0], 1))
         else:
-            data.x= torch.ones((torch.unique(data.edge_index).shape[0],1))
+            data.x = torch.ones((torch.unique(data.edge_index).shape[0], 1))
         return data
 
 
diff --git a/autogl/module/feature/base.py b/autogl/module/feature/base.py
index 1c7b52b..94ac3bb 100644
--- a/autogl/module/feature/base.py
+++ b/autogl/module/feature/base.py
@@ -68,12 +68,13 @@ class BaseFeatureAtom:
         elif self._data_t == "nx":
             if not hasattr(data, "G") or data.G is None:
                 data.G = to_networkx(data, to_undirected=True)
-    def _adjust_to_tensor(self,data):
+
+    def _adjust_to_tensor(self, data):
         if self._data_t == "tensor":
             pass
         else:
             data_np2tensor(data)
-        
+
     def _preprocess(self, data):
         pass
 
@@ -114,7 +115,6 @@ class BaseFeatureAtom:
                     p._adjust_to_tensor(datai)
                     _dataset[i] = datai
                 dataset = self._rebuild(dataset, _dataset)
-                
 
     def transform(self, dataset, inplace=True):
         r"""transform dataset inplace or not w.r.t bool argument ``inplace``"""
@@ -131,7 +131,7 @@ class BaseFeatureAtom:
                     datai = p._transform(datai)
                     p._postprocess(datai)
                     p._adjust_to_tensor(datai)
-                    _dataset[i] = datai    
+                    _dataset[i] = datai
                 dataset = self._rebuild(dataset, _dataset)
         dataset.data = data_np2tensor(dataset.data)
         return dataset
diff --git a/autogl/module/feature/generators/base.py b/autogl/module/feature/generators/base.py
index c7f96c1..1b7bab1 100644
--- a/autogl/module/feature/generators/base.py
+++ b/autogl/module/feature/generators/base.py
@@ -4,8 +4,10 @@ from ..base import BaseFeatureAtom
 
 
 class BaseGenerator(BaseFeatureAtom):
-    def __init__(self, data_t="np", multigraph=True,**kwargs):
-        super(BaseGenerator, self).__init__(data_t=data_t, multigraph=multigraph,**kwargs)
+    def __init__(self, data_t="np", multigraph=True, **kwargs):
+        super(BaseGenerator, self).__init__(
+            data_t=data_t, multigraph=multigraph, **kwargs
+        )
 
 
 @register_feature("onehot")
diff --git a/autogl/module/feature/selectors/base.py b/autogl/module/feature/selectors/base.py
index 14d60ef..01806ee 100644
--- a/autogl/module/feature/selectors/base.py
+++ b/autogl/module/feature/selectors/base.py
@@ -3,8 +3,10 @@ import numpy as np
 
 
 class BaseSelector(BaseFeatureAtom):
-    def __init__(self, data_t="np", multigraph=False,**kwargs):
-        super(BaseSelector, self).__init__(data_t=data_t, multigraph=multigraph,**kwargs)
+    def __init__(self, data_t="np", multigraph=False, **kwargs):
+        super(BaseSelector, self).__init__(
+            data_t=data_t, multigraph=multigraph, **kwargs
+        )
         self._sel = None
 
     def _transform(self, data):
diff --git a/autogl/module/feature/subgraph/base.py b/autogl/module/feature/subgraph/base.py
index cfd695e..a85d77c 100644
--- a/autogl/module/feature/subgraph/base.py
+++ b/autogl/module/feature/subgraph/base.py
@@ -3,11 +3,12 @@ import numpy as np
 import torch
 from .. import register_feature
 
-@register_feature('subgraph')
+
+@register_feature("subgraph")
 class BaseSubgraph(BaseFeatureAtom):
-    def __init__(self, data_t="np", multigraph=True,**kwargs):
+    def __init__(self, data_t="np", multigraph=True, **kwargs):
         super(BaseSubgraph, self).__init__(
-            data_t=data_t, multigraph=multigraph, subgraph=True,**kwargs
+            data_t=data_t, multigraph=multigraph, subgraph=True, **kwargs
         )
 
     def _preprocess(self, data):
@@ -16,5 +17,3 @@ class BaseSubgraph(BaseFeatureAtom):
 
     def _postprocess(self, data):
         pass
-
-
diff --git a/autogl/module/hpo/autone.py b/autogl/module/hpo/autone.py
index d254a19..02b79d7 100644
--- a/autogl/module/hpo/autone.py
+++ b/autogl/module/hpo/autone.py
@@ -17,13 +17,15 @@ from torch_geometric.data import GraphSAINTRandomWalkSampler
 from ..feature.subgraph.nx import NxSubgraph, NxLargeCliqueSize
 from ..feature.subgraph import nx, SgNetLSD
 
-from torch_geometric.data import InMemoryDataset 
+from torch_geometric.data import InMemoryDataset
+
 
 class _MyDataset(InMemoryDataset):
     def __init__(self, datalist) -> None:
         super().__init__()
         self.data, self.slices = self.collate(datalist)
 
+
 @register_hpo("autone")
 class AutoNE(BaseHPOptimizer):
     """
@@ -73,17 +75,17 @@ class AutoNE(BaseHPOptimizer):
             )
             results = []
             for data in loader:
-                in_dataset= _MyDataset([data])
+                in_dataset = _MyDataset([data])
                 results.append(in_dataset)
             return results
 
         func = SgNetLSD()
 
         def get_wne(graph):
-            graph=func.fit_transform(graph)
+            graph = func.fit_transform(graph)
             # transform = nx.NxSubgraph.compose(map(lambda x: x(), nx.NX_EXTRACTORS))
             # print(type(graph))
-            #gf = transform.fit_transform(graph).data.gf
+            # gf = transform.fit_transform(graph).data.gf
             gf = graph.data.gf
             fin = list(gf[0]) + list(map(lambda x: float(x), gf[1:]))
             return fin
diff --git a/autogl/module/hpo/suggestion/__init__.py b/autogl/module/hpo/suggestion/__init__.py
index 6dfafe0..91275c9 100644
--- a/autogl/module/hpo/suggestion/__init__.py
+++ b/autogl/module/hpo/suggestion/__init__.py
@@ -1 +1 @@
-# Files in this folder are reproduced from https://github.com/tobegit3hub/advisor with some changes.
\ No newline at end of file
+# Files in this folder are reproduced from https://github.com/tobegit3hub/advisor with some changes.
diff --git a/autogl/module/model/__init__.py b/autogl/module/model/__init__.py
index 9eb8495..e05a3a2 100644
--- a/autogl/module/model/__init__.py
+++ b/autogl/module/model/__init__.py
@@ -17,6 +17,7 @@ def register_model(name):
 
     return register_model_cls
 
+
 from .base import BaseModel
 from .topkpool import AutoTopkpool
 from .graphsage import AutoSAGE
diff --git a/autogl/module/model/gat.py b/autogl/module/model/gat.py
index 4a5a3f2..92b5dff 100644
--- a/autogl/module/model/gat.py
+++ b/autogl/module/model/gat.py
@@ -21,9 +21,22 @@ class GAT(torch.nn.Module):
         self.args = args
         self.num_layer = int(self.args["num_layers"])
 
-        missing_keys = list(set(["features_num", "num_class", "num_layers", "hidden", "heads", "dropout", "act"]) - set(self.args.keys()))
+        missing_keys = list(
+            set(
+                [
+                    "features_num",
+                    "num_class",
+                    "num_layers",
+                    "hidden",
+                    "heads",
+                    "dropout",
+                    "act",
+                ]
+            )
+            - set(self.args.keys())
+        )
         if len(missing_keys) > 0:
-            raise Exception("Missing keys: %s." % ','.join(missing_keys))
+            raise Exception("Missing keys: %s." % ",".join(missing_keys))
 
         if not self.num_layer == len(self.args["hidden"]) + 1:
             LOGGER.warn("Warning: layer size does not match the length of hidden units")
diff --git a/autogl/module/model/gcn.py b/autogl/module/model/gcn.py
index 63e1bc4..3e6208f 100644
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -21,9 +21,12 @@ class GCN(torch.nn.Module):
         self.args = args
         self.num_layer = int(self.args["num_layers"])
 
-        missing_keys = list(set(["features_num", "num_class", "num_layers", "hidden", "dropout", "act"]) - set(self.args.keys()))
+        missing_keys = list(
+            set(["features_num", "num_class", "num_layers", "hidden", "dropout", "act"])
+            - set(self.args.keys())
+        )
         if len(missing_keys) > 0:
-            raise Exception("Missing keys: %s." % ','.join(missing_keys))
+            raise Exception("Missing keys: %s." % ",".join(missing_keys))
 
         if not self.num_layer == len(self.args["hidden"]) + 1:
             LOGGER.warn("Warning: layer size does not match the length of hidden units")
diff --git a/autogl/module/model/gin.py b/autogl/module/model/gin.py
index 3caa753..6ea4390 100644
--- a/autogl/module/model/gin.py
+++ b/autogl/module/model/gin.py
@@ -25,14 +25,27 @@ class GIN(torch.nn.Module):
         self.num_layer = int(self.args["num_layers"])
         assert self.num_layer > 2, "Number of layers in GIN should not less than 3"
 
-        missing_keys = list(set(["features_num", "num_class", "num_graph_features",
-                    "num_layers", "hidden", "dropout", "act",
-                    "mlp_layers", "eps"]) - set(self.args.keys()))
+        missing_keys = list(
+            set(
+                [
+                    "features_num",
+                    "num_class",
+                    "num_graph_features",
+                    "num_layers",
+                    "hidden",
+                    "dropout",
+                    "act",
+                    "mlp_layers",
+                    "eps",
+                ]
+            )
+            - set(self.args.keys())
+        )
         if len(missing_keys) > 0:
-            raise Exception("Missing keys: %s." % ','.join(missing_keys))
-        if not self.num_layer == len(self.args['hidden']) + 1:
-            LOGGER.warn('Warning: layer size does not match the length of hidden units')
-        self.num_graph_features = self.args['num_graph_features']
+            raise Exception("Missing keys: %s." % ",".join(missing_keys))
+        if not self.num_layer == len(self.args["hidden"]) + 1:
+            LOGGER.warn("Warning: layer size does not match the length of hidden units")
+        self.num_graph_features = self.args["num_graph_features"]
 
         if self.args["act"] == "leaky_relu":
             act = LeakyReLU()
diff --git a/autogl/module/model/graphsage.py b/autogl/module/model/graphsage.py
index 6c492a5..9802432 100644
--- a/autogl/module/model/graphsage.py
+++ b/autogl/module/model/graphsage.py
@@ -113,11 +113,23 @@ class GraphSAGE(torch.nn.Module):
         if not self.num_layer == len(self.args["hidden"]) + 1:
             LOGGER.warn("Warning: layer size does not match the length of hidden units")
 
-        missing_keys = list(set(["features_num", "num_class", "num_layers",
-                    "hidden", "dropout", "act", "agg"]) - set(self.args.keys()))
+        missing_keys = list(
+            set(
+                [
+                    "features_num",
+                    "num_class",
+                    "num_layers",
+                    "hidden",
+                    "dropout",
+                    "act",
+                    "agg",
+                ]
+            )
+            - set(self.args.keys())
+        )
         if len(missing_keys) > 0:
-            raise Exception("Missing keys: %s." % ','.join(missing_keys))
-        
+            raise Exception("Missing keys: %s." % ",".join(missing_keys))
+
         self.convs = torch.nn.ModuleList()
         self.convs.append(
             SAGEConv(self.args["features_num"], self.args["hidden"][0], aggr=agg)
diff --git a/autogl/module/model/topkpool.py b/autogl/module/model/topkpool.py
index 897f7df..9fd64ef 100644
--- a/autogl/module/model/topkpool.py
+++ b/autogl/module/model/topkpool.py
@@ -21,10 +21,21 @@ class Topkpool(torch.nn.Module):
         super(Topkpool, self).__init__()
         self.args = args
 
-        missing_keys = list(set(["features_num", "num_class", "num_graph_features",
-                    "ratio", "dropout", "act"]) - set(self.args.keys()))
+        missing_keys = list(
+            set(
+                [
+                    "features_num",
+                    "num_class",
+                    "num_graph_features",
+                    "ratio",
+                    "dropout",
+                    "act",
+                ]
+            )
+            - set(self.args.keys())
+        )
         if len(missing_keys) > 0:
-            raise Exception("Missing keys: %s." % ','.join(missing_keys))
+            raise Exception("Missing keys: %s." % ",".join(missing_keys))
 
         self.num_features = self.args["features_num"]
         self.num_classes = self.args["num_class"]
diff --git a/autogl/module/train/__init__.py b/autogl/module/train/__init__.py
index 36fd434..87fa030 100644
--- a/autogl/module/train/__init__.py
+++ b/autogl/module/train/__init__.py
@@ -36,6 +36,7 @@ def register_evaluate(*name):
 
     return register_evaluate_cls
 
+
 def get_feval(feval):
     if isinstance(feval, str):
         return EVALUATE_DICT[feval]
diff --git a/autogl/module/train/graph_classification.py b/autogl/module/train/graph_classification.py
index e365021..82644a1 100644
--- a/autogl/module/train/graph_classification.py
+++ b/autogl/module/train/graph_classification.py
@@ -10,7 +10,8 @@ from copy import deepcopy
 
 from ...utils import get_logger
 
-LOGGER = get_logger('graph classification solver')
+LOGGER = get_logger("graph classification solver")
+
 
 def get_feval(feval):
     if isinstance(feval, str):
@@ -233,7 +234,9 @@ class GraphClassificationTrainer(BaseTrainer):
                 if hasattr(F, self.loss_type):
                     loss = getattr(F, self.loss_type)(output, data.y)
                 else:
-                    raise TypeError("PyTorch does not support loss type {}".format(self.loss_type))
+                    raise TypeError(
+                        "PyTorch does not support loss type {}".format(self.loss_type)
+                    )
                 loss.backward()
                 loss_all += data.num_graphs * loss.item()
                 optimizer.step()
diff --git a/autogl/module/train/node_classification.py b/autogl/module/train/node_classification.py
index 14970f7..608f51e 100644
--- a/autogl/module/train/node_classification.py
+++ b/autogl/module/train/node_classification.py
@@ -11,6 +11,7 @@ from ...utils import get_logger
 
 LOGGER = get_logger("node classification trainer")
 
+
 def get_feval(feval):
     if isinstance(feval, str):
         return EVALUATE_DICT[feval]
@@ -208,7 +209,9 @@ class NodeClassificationTrainer(BaseTrainer):
             if hasattr(F, self.loss_type):
                 loss = getattr(F, self.loss_type)(res[mask], data.y[mask])
             else:
-                raise TypeError("PyTorch does not support loss type {}".format(self.loss_type))
+                raise TypeError(
+                    "PyTorch does not support loss type {}".format(self.loss_type)
+                )
 
             loss.backward()
             optimizer.step()
diff --git a/autogl/solver/classifier/graph_classifier.py b/autogl/solver/classifier/graph_classifier.py
index 2a70ae9..c459cf1 100644
--- a/autogl/solver/classifier/graph_classifier.py
+++ b/autogl/solver/classifier/graph_classifier.py
@@ -266,7 +266,7 @@ class AutoGraphClassifier(BaseClassifier):
                 "Please manually pass train and val ratio."
             )
             LOGGER.info("Use the default train/val/test ratio in given dataset")
-            #if hasattr(dataset.train_split, "n_splits"):
+            # if hasattr(dataset.train_split, "n_splits"):
             #    cross_validation = True
 
         elif train_split is not None and val_split is not None:

From 031ed5710f5a347120959eb75154533d2b79c452 Mon Sep 17 00:00:00 2001
From: lihy96 <lhy237451594@vip.qq.com>
Date: Mon, 8 Mar 2021 17:26:13 +0800
Subject: [PATCH 015/144] add lr_scheduler setting

---
 autogl/module/train/graph_classification.py | 20 ++++++++++++++++++--
 autogl/module/train/node_classification.py  | 18 ++++++++++++++++--
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/autogl/module/train/graph_classification.py b/autogl/module/train/graph_classification.py
index 82644a1..f97f63e 100644
--- a/autogl/module/train/graph_classification.py
+++ b/autogl/module/train/graph_classification.py
@@ -1,6 +1,6 @@
 from . import register_trainer, BaseTrainer, Evaluation, EVALUATE_DICT, EarlyStopping
 import torch
-from torch.optim.lr_scheduler import StepLR
+from torch.optim.lr_scheduler import StepLR, MultiStepLR, ExponentialLR, ReduceLROnPlateau
 import torch.nn.functional as F
 from ..model import MODEL_DICT, BaseModel
 from .evaluate import Logloss
@@ -72,6 +72,7 @@ class GraphClassificationTrainer(BaseTrainer):
         init=True,
         feval=[Logloss],
         loss="nll_loss",
+        lr_scheduler_type='steplr',
         *args,
         **kwargs
     ):
@@ -99,6 +100,8 @@ class GraphClassificationTrainer(BaseTrainer):
         else:
             self.optimizer = torch.optim.Adam
 
+        self.lr_scheduler_type = lr_scheduler_type
+
         self.num_features = num_features
         self.num_classes = num_classes
         self.num_graph_features = num_graph_features
@@ -222,7 +225,20 @@ class GraphClassificationTrainer(BaseTrainer):
         optimizer = self.optimizer(
             self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
         )
-        scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
+
+        # scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
+        lr_scheduler_type = self.lr_scheduler_type
+        if type(lr_scheduler_type) == str and lr_scheduler_type == 'steplr':
+            scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
+        elif type(lr_scheduler_type) == str and lr_scheduler_type == 'multisteplr':
+            scheduler = MultiStepLR(optimizer, milestones=[30,80], gamma=0.1)
+        elif type(lr_scheduler_type) == str and lr_scheduler_type == 'exponentiallr':
+            scheduler = ExponentialLR(optimizer, gamma=0.1)
+        elif type(lr_scheduler_type) == str and lr_scheduler_type == 'reducelronplateau':
+            scheduler = ReduceLROnPlateau(optimizer, 'min')
+        else:
+            scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
+
         for epoch in range(1, self.max_epoch):
             self.model.model.train()
             loss_all = 0
diff --git a/autogl/module/train/node_classification.py b/autogl/module/train/node_classification.py
index 608f51e..9f55b82 100644
--- a/autogl/module/train/node_classification.py
+++ b/autogl/module/train/node_classification.py
@@ -1,6 +1,6 @@
 from . import register_trainer, BaseTrainer, Evaluation, EVALUATE_DICT, EarlyStopping
 import torch
-from torch.optim.lr_scheduler import StepLR
+from torch.optim.lr_scheduler import StepLR, MultiStepLR, ExponentialLR, ReduceLROnPlateau
 import torch.nn.functional as F
 from ..model import MODEL_DICT, BaseModel
 from .evaluate import Logloss, Acc, Auc
@@ -93,6 +93,8 @@ class NodeClassificationTrainer(BaseTrainer):
         else:
             self.optimizer = torch.optim.Adam
 
+        self.lr_scheduler_type = lr_scheduler_type
+
         self.num_features = num_features
         self.num_classes = num_classes
         self.lr = lr if lr is not None else 1e-4
@@ -201,7 +203,19 @@ class NodeClassificationTrainer(BaseTrainer):
         optimizer = self.optimizer(
             self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
         )
-        scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
+        # scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
+        lr_scheduler_type = self.lr_scheduler_type
+        if type(lr_scheduler_type) == str and lr_scheduler_type == 'steplr':
+            scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
+        elif type(lr_scheduler_type) == str and lr_scheduler_type == 'multisteplr':
+            scheduler = MultiStepLR(optimizer, milestones=[30, 80], gamma=0.1)
+        elif type(lr_scheduler_type) == str and lr_scheduler_type == 'exponentiallr':
+            scheduler = ExponentialLR(optimizer, gamma=0.1)
+        elif type(lr_scheduler_type) == str and lr_scheduler_type == 'reducelronplateau':
+            scheduler = ReduceLROnPlateau(optimizer, 'min')
+        else:
+            scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
+
         for epoch in range(1, self.max_epoch):
             self.model.model.train()
             optimizer.zero_grad()

From 49924154fee8c5090cadcec20cc5d9effd05759a Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Thu, 11 Mar 2021 03:08:48 +0000
Subject: [PATCH 016/144] add trainer to init func

---
 autogl/solver/classifier/graph_classifier.py | 33 +++++++++--------
 autogl/solver/classifier/node_classifier.py  | 38 ++++++++++++--------
 2 files changed, 42 insertions(+), 29 deletions(-)

diff --git a/autogl/solver/classifier/graph_classifier.py b/autogl/solver/classifier/graph_classifier.py
index c459cf1..53bcd11 100644
--- a/autogl/solver/classifier/graph_classifier.py
+++ b/autogl/solver/classifier/graph_classifier.py
@@ -14,7 +14,7 @@ from .base import BaseClassifier
 from ...module.feature import FEATURE_DICT
 from ...module.model import MODEL_DICT
 from ...module.train import TRAINER_DICT, get_feval
-from ...module import BaseModel
+from ...module import BaseModel, GraphClassificationTrainer
 from ..utils import Leaderboard, set_seed
 from ...datasets import utils
 from ...utils import get_logger
@@ -141,6 +141,9 @@ class AutoGraphClassifier(BaseClassifier):
                         else kwargs["num_graph_features"]
                     )
                     self.graph_model_list.append(model)
+                elif isinstance(model, GraphClassificationTrainer):
+                    # receive a trainer list, put trainer to list
+                    self.graph_model_list.append(model)
                 else:
                     raise KeyError("cannot find graph network %s." % (model))
         else:
@@ -154,24 +157,26 @@ class AutoGraphClassifier(BaseClassifier):
         for i, model in enumerate(self.graph_model_list):
             if self._model_hp_spaces is not None:
                 if self._model_hp_spaces[i] is not None:
-                    model.hyper_parameter_space = self._model_hp_spaces[i]
-            trainer = TRAINER_DICT["GraphClassification"](
-                model=model,
-                num_features=num_features,
-                num_classes=num_classes,
-                *args,
-                **kwargs,
-                init=False,
-            )
+                    if isinstance(model, GraphClassificationTrainer):
+                        model.model.hyper_parameter_space = self._model_hp_spaces[i]
+                    else:
+                        model.hyper_parameter_space = self._model_hp_spaces[i]
+            if isinstance(model, BaseModel):
+                model = TRAINER_DICT["GraphClassification"](
+                    model=model,
+                    num_features=num_features,
+                    num_classes=num_classes,
+                    *args,
+                    **kwargs,
+                    init=False,
+                )
             if self._trainer_hp_space is not None:
                 if isinstance(self._trainer_hp_space[0], list):
                     current_hp_for_trainer = self._trainer_hp_space[i]
                 else:
                     current_hp_for_trainer = self._trainer_hp_space
-                trainer.hyper_parameter_space = (
-                    current_hp_for_trainer + model.hyper_parameter_space
-                )
-            self.graph_model_list[i] = trainer
+                model.hyper_parameter_space = current_hp_for_trainer
+            self.graph_model_list[i] = model
 
         return self
 
diff --git a/autogl/solver/classifier/node_classifier.py b/autogl/solver/classifier/node_classifier.py
index 054581e..1a360b5 100644
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -14,7 +14,7 @@ from .base import BaseClassifier
 from ...module.feature import FEATURE_DICT
 from ...module.model import MODEL_DICT
 from ...module.train import TRAINER_DICT, get_feval
-from ...module import BaseModel
+from ...module import BaseModel, NodeClassificationTrainer
 from ..utils import Leaderboard, set_seed
 from ...datasets import utils
 from ...utils import get_logger
@@ -139,6 +139,9 @@ class AutoNodeClassifier(BaseClassifier):
                     model.set_num_classes(num_classes)
                     model.set_num_features(num_features)
                     self.graph_model_list.append(model)
+                elif isinstance(model, NodeClassificationTrainer):
+                    # receive a trainer list, put trainer to list
+                    self.graph_model_list.append(model)
                 else:
                     raise KeyError("cannot find graph network %s." % (model))
         else:
@@ -147,29 +150,34 @@ class AutoNodeClassifier(BaseClassifier):
                 graph_models,
                 "instead.",
             )
-
+        
         # wrap all model_cls with specified trainer
         for i, model in enumerate(self.graph_model_list):
+            # set model hp space
             if self._model_hp_spaces is not None:
                 if self._model_hp_spaces[i] is not None:
-                    model.hyper_parameter_space = self._model_hp_spaces[i]
-            trainer = TRAINER_DICT["NodeClassification"](
-                model=model,
-                num_features=num_features,
-                num_classes=num_classes,
-                *args,
-                **kwargs,
-                init=False,
-            )
+                    if isinstance(model, NodeClassificationTrainer):
+                        model.model.hyper_parameter_space = self._model_hp_spaces[i]
+                    else:
+                        model.hyper_parameter_space = self._model_hp_spaces[i]
+            # initialize trainer if needed
+            if isinstance(model, BaseModel):
+                model = TRAINER_DICT["NodeClassification"](
+                    model=model,
+                    num_features=num_features,
+                    num_classes=num_classes,
+                    *args,
+                    **kwargs,
+                    init=False,
+                )
+            # set trainer hp space
             if self._trainer_hp_space is not None:
                 if isinstance(self._trainer_hp_space[0], list):
                     current_hp_for_trainer = self._trainer_hp_space[i]
                 else:
                     current_hp_for_trainer = self._trainer_hp_space
-                trainer.hyper_parameter_space = (
-                    current_hp_for_trainer + model.hyper_parameter_space
-                )
-            self.graph_model_list[i] = trainer
+                model.hyper_parameter_space = current_hp_for_trainer
+            self.graph_model_list[i] = model
 
         return self
 

From 0e0c5528b327172d311501b4000d5281cfa1fbc0 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sun, 14 Mar 2021 03:51:09 +0000
Subject: [PATCH 017/144] add trainer when loading configs

---
 autogl/solver/base.py                        |  59 ++++++--
 autogl/solver/classifier/base.py             |  91 +++++++++++-
 autogl/solver/classifier/graph_classifier.py | 146 ++++--------------
 autogl/solver/classifier/node_classifier.py  | 147 +++++--------------
 4 files changed, 201 insertions(+), 242 deletions(-)

diff --git a/autogl/solver/base.py b/autogl/solver/base.py
index fbe1584..8f2f8c0 100644
--- a/autogl/solver/base.py
+++ b/autogl/solver/base.py
@@ -10,6 +10,7 @@ import torch
 
 from ..module.feature import FEATURE_DICT
 from ..module.hpo import HPO_DICT
+from ..module.model import MODEL_DICT
 from ..module.train import NodeClassificationTrainer
 from ..module import BaseFeatureAtom, BaseHPOptimizer, BaseTrainer
 from .utils import Leaderboard
@@ -18,8 +19,23 @@ from ..utils import get_logger
 LOGGER = get_logger("BaseSolver")
 
 
+def _initialize_single_model(model_name, parameters=None):
+    if parameters:
+        return MODEL_DICT[model_name](**parameters)
+    return MODEL_DICT[model_name]()
+
+
+def _parse_hp_space(spaces):
+    if spaces is None:
+        return None
+    for space in spaces:
+        if "cutFunc" in space and isinstance(space["cutFunc"], str):
+            space["cutFunc"] = eval(space["cutFunc"])
+    return spaces
+
+
 class BaseSolver:
-    """
+    r"""
     Base solver class, define some standard solver interfaces.
 
     Parameters
@@ -43,6 +59,12 @@ class BaseSolver:
         If given, will set the number eval times the hpo module will use.
         Only be effective when hpo_module is  of type ``str``. Default ``50``.
 
+    default_trainer: str or list of str (Optional)
+        Default trainer class to be used.
+        If a single trainer class is given, will set all trainer to default trainer.
+        If a list of trainer class is given, will set every model with corresponding trainer
+        cls. Default ``None``.
+
     trainer_hp_space: list of dict (Optional)
         trainer hp space or list of trainer hp spaces configuration.
         If a single trainer hp is given, will specify the hp space of trainer for every model.
@@ -71,6 +93,7 @@ class BaseSolver:
         hpo_module,
         ensemble_module,
         max_evals=50,
+        default_trainer=None,
         trainer_hp_space=None,
         model_hp_spaces=None,
         size=4,
@@ -92,7 +115,9 @@ class BaseSolver:
 
         # initialize modules
         self.graph_model_list = []
-        self.set_graph_models(graph_models, trainer_hp_space, model_hp_spaces)
+        self.set_graph_models(
+            graph_models, default_trainer, trainer_hp_space, model_hp_spaces
+        )
         self.set_feature_module(feature_module)
         self.set_hpo_module(hpo_module, max_evals=max_evals)
         self.set_ensemble_module(ensemble_module, size=size)
@@ -109,7 +134,7 @@ class BaseSolver:
         *args,
         **kwargs,
     ) -> "BaseSolver":
-        """
+        r"""
         Set the feature module of current solver.
 
         Parameters
@@ -159,10 +184,11 @@ class BaseSolver:
     def set_graph_models(
         self,
         graph_models,
+        default_trainer=None,
         trainer_hp_space=None,
         model_hp_spaces=None,
     ) -> "BaseSolver":
-        """
+        r"""
         Set the graph models used in current solver.
 
         Parameters
@@ -170,6 +196,12 @@ class BaseSolver:
         graph_models: list of autogl.module.model.BaseModel or list of str
             The (name of) models to be optimized as backbone.
 
+        default_trainer: str or list of str (Optional)
+            Default trainer class to be used.
+            If a single trainer class is given, will set all trainer to default trainer.
+            If a list of trainer class is given, will set every model with corresponding trainer
+            cls. Default ``None``.
+
         trainer_hp_space: list of dict (Optional)
             trainer hp space or list of trainer hp spaces configuration.
             If a single trainer hp is given, will specify the hp space of trainer for every model.
@@ -187,12 +219,13 @@ class BaseSolver:
             A reference of current solver.
         """
         self.gml = graph_models
+        self._default_trainer = default_trainer
         self._trainer_hp_space = trainer_hp_space
         self._model_hp_spaces = model_hp_spaces
         return self
 
     def set_hpo_module(self, hpo_module, *args, **kwargs) -> "BaseSolver":
-        """
+        r"""
         Set the hpo module used in current solver.
 
         Parameters
@@ -225,7 +258,7 @@ class BaseSolver:
             )
 
     def set_ensemble_module(self, ensemble_module, *args, **kwargs) -> "BaseSolver":
-        """
+        r"""
         Set the ensemble module used in current solver.
 
         Parameters
@@ -243,7 +276,7 @@ class BaseSolver:
         raise NotImplementedError()
 
     def fit(self, *args, **kwargs) -> "BaseSolver":
-        """
+        r"""
         Fit current solver on given dataset.
 
         Returns
@@ -254,7 +287,7 @@ class BaseSolver:
         raise NotImplementedError()
 
     def fit_predict(self, *args, **kwargs) -> Any:
-        """
+        r"""
         Fit current solver on given dataset and return the predicted value.
 
         Returns
@@ -265,7 +298,7 @@ class BaseSolver:
         raise NotImplementedError()
 
     def predict(self, *args, **kwargs) -> Any:
-        """
+        r"""
         Predict the node class number.
 
         Returns
@@ -276,7 +309,7 @@ class BaseSolver:
         raise NotImplementedError()
 
     def get_leaderboard(self) -> Leaderboard:
-        """
+        r"""
         Get the current leaderboard of this solver.
 
         Returns
@@ -287,7 +320,7 @@ class BaseSolver:
         return self.leaderboard
 
     def get_model_by_name(self, name) -> BaseTrainer:
-        """
+        r"""
         Find and get the model instance by name.
 
         Parameters
@@ -304,7 +337,7 @@ class BaseSolver:
         return self.trained_models[name]
 
     def get_model_by_performance(self, index) -> Tuple[NodeClassificationTrainer, str]:
-        """
+        r"""
         Find and get the model instance by performance.
 
         Parameters
@@ -324,7 +357,7 @@ class BaseSolver:
 
     @classmethod
     def from_config(cls, path_or_dict, filetype="auto") -> "BaseSolver":
-        """
+        r"""
         Load solver from config file.
 
         You can use this function to directly load a solver from predefined config dict
diff --git a/autogl/solver/classifier/base.py b/autogl/solver/classifier/base.py
index 96f84a3..32f740e 100644
--- a/autogl/solver/classifier/base.py
+++ b/autogl/solver/classifier/base.py
@@ -5,7 +5,9 @@ Base solver for classification problems
 from typing import Any
 from ..base import BaseSolver
 from ...module.ensemble import ENSEMBLE_DICT
-from ...module import BaseEnsembler
+from ...module.train import TRAINER_DICT
+from ...module.model import MODEL_DICT
+from ...module import BaseEnsembler, BaseModel, BaseTrainer
 
 
 class BaseClassifier(BaseSolver):
@@ -13,6 +15,93 @@ class BaseClassifier(BaseSolver):
     Base solver for classification problems
     """
 
+    def _init_graph_module(
+        self,
+        graph_models,
+        num_classes,
+        num_features,
+        *args,
+        **kwargs,
+    ) -> "BaseClassifier":
+        # load graph network module
+        self.graph_model_list = []
+        if isinstance(graph_models, list):
+            for model in graph_models:
+                if isinstance(model, str):
+                    if model in MODEL_DICT:
+                        self.graph_model_list.append(
+                            MODEL_DICT[model](
+                                num_classes=num_classes,
+                                num_features=num_features,
+                                *args,
+                                **kwargs,
+                                init=False,
+                            )
+                        )
+                    else:
+                        raise KeyError("cannot find model %s" % (model))
+                elif isinstance(model, type) and issubclass(model, BaseModel):
+                    self.graph_model_list.append(
+                        model(
+                            num_classes=num_classes,
+                            num_features=num_features,
+                            *args,
+                            **kwargs,
+                            init=False,
+                        )
+                    )
+                elif isinstance(model, BaseModel):
+                    # setup the hp of num_classes and num_features
+                    model.set_num_classes(num_classes)
+                    model.set_num_features(num_features)
+                    self.graph_model_list.append(model)
+                elif isinstance(model, BaseTrainer):
+                    # receive a trainer list, put trainer to list
+                    self.graph_model_list.append(model)
+                else:
+                    raise KeyError("cannot find graph network %s." % (model))
+        else:
+            raise ValueError(
+                "need graph network to be (list of) str or a BaseModel class/instance, get",
+                graph_models,
+                "instead.",
+            )
+
+        # wrap all model_cls with specified trainer
+        for i, model in enumerate(self.graph_model_list):
+            # set model hp space
+            if self._model_hp_spaces is not None:
+                if self._model_hp_spaces[i] is not None:
+                    if isinstance(model, BaseTrainer):
+                        model.model.hyper_parameter_space = self._model_hp_spaces[i]
+                    else:
+                        model.hyper_parameter_space = self._model_hp_spaces[i]
+            # initialize trainer if needed
+            if isinstance(model, BaseModel):
+                name = (
+                    self._default_trainer
+                    if isinstance(self._default_trainer)
+                    else self._default_trainer[i]
+                )
+                model = TRAINER_DICT[name](
+                    model=model,
+                    num_features=num_features,
+                    num_classes=num_classes,
+                    *args,
+                    **kwargs,
+                    init=False,
+                )
+            # set trainer hp space
+            if self._trainer_hp_space is not None:
+                if isinstance(self._trainer_hp_space[0], list):
+                    current_hp_for_trainer = self._trainer_hp_space[i]
+                else:
+                    current_hp_for_trainer = self._trainer_hp_space
+                model.hyper_parameter_space = current_hp_for_trainer
+            self.graph_model_list[i] = model
+
+        return self
+
     def predict_proba(self, *args, **kwargs) -> Any:
         """
         Predict the node probability.
diff --git a/autogl/solver/classifier/graph_classifier.py b/autogl/solver/classifier/graph_classifier.py
index 53bcd11..f7efedf 100644
--- a/autogl/solver/classifier/graph_classifier.py
+++ b/autogl/solver/classifier/graph_classifier.py
@@ -12,9 +12,8 @@ import yaml
 
 from .base import BaseClassifier
 from ...module.feature import FEATURE_DICT
-from ...module.model import MODEL_DICT
-from ...module.train import TRAINER_DICT, get_feval
-from ...module import BaseModel, GraphClassificationTrainer
+from ...module.train import get_feval
+from ..base import _initialize_single_model, _parse_hp_space
 from ..utils import Leaderboard, set_seed
 from ...datasets import utils
 from ...utils import get_logger
@@ -77,6 +76,7 @@ class AutoGraphClassifier(BaseClassifier):
         hpo_module="anneal",
         ensemble_module="voting",
         max_evals=50,
+        default_trainer=None,
         trainer_hp_space=None,
         model_hp_spaces=None,
         size=4,
@@ -89,6 +89,7 @@ class AutoGraphClassifier(BaseClassifier):
             hpo_module=hpo_module,
             ensemble_module=ensemble_module,
             max_evals=max_evals,
+            default_trainer=default_trainer or "GraphClassification",
             trainer_hp_space=trainer_hp_space,
             model_hp_spaces=model_hp_spaces,
             size=size,
@@ -97,89 +98,6 @@ class AutoGraphClassifier(BaseClassifier):
 
         self.dataset = None
 
-    def _init_graph_module(
-        self,
-        graph_models,
-        num_features,
-        num_classes,
-        *args,
-        **kwargs,
-    ) -> "AutoGraphClassifier":
-        # load graph network module
-        self.graph_model_list = []
-        if isinstance(graph_models, list):
-            for model in graph_models:
-                if isinstance(model, str):
-                    if model in MODEL_DICT:
-                        self.graph_model_list.append(
-                            MODEL_DICT[model](
-                                num_features=num_features,
-                                num_classes=num_classes,
-                                *args,
-                                **kwargs,
-                                init=False,
-                            )
-                        )
-                    else:
-                        raise KeyError("cannot find model %s" % (model))
-                elif isinstance(model, type) and issubclass(model, BaseModel):
-                    self.graph_model_list.append(
-                        model(
-                            num_features=num_features,
-                            num_classes=num_classes,
-                            *args,
-                            **kwargs,
-                            init=False,
-                        )
-                    )
-                elif isinstance(model, BaseModel):
-                    model.set_num_features(num_features)
-                    model.set_num_classes(num_classes)
-                    model.set_num_graph_features(
-                        0
-                        if "num_graph_features" not in kwargs
-                        else kwargs["num_graph_features"]
-                    )
-                    self.graph_model_list.append(model)
-                elif isinstance(model, GraphClassificationTrainer):
-                    # receive a trainer list, put trainer to list
-                    self.graph_model_list.append(model)
-                else:
-                    raise KeyError("cannot find graph network %s." % (model))
-        else:
-            raise ValueError(
-                "need graph network to be str or a BaseModel class/instance, get",
-                graph_models,
-                "instead.",
-            )
-
-        # wrap all model_cls with specified trainer
-        for i, model in enumerate(self.graph_model_list):
-            if self._model_hp_spaces is not None:
-                if self._model_hp_spaces[i] is not None:
-                    if isinstance(model, GraphClassificationTrainer):
-                        model.model.hyper_parameter_space = self._model_hp_spaces[i]
-                    else:
-                        model.hyper_parameter_space = self._model_hp_spaces[i]
-            if isinstance(model, BaseModel):
-                model = TRAINER_DICT["GraphClassification"](
-                    model=model,
-                    num_features=num_features,
-                    num_classes=num_classes,
-                    *args,
-                    **kwargs,
-                    init=False,
-                )
-            if self._trainer_hp_space is not None:
-                if isinstance(self._trainer_hp_space[0], list):
-                    current_hp_for_trainer = self._trainer_hp_space[i]
-                else:
-                    current_hp_for_trainer = self._trainer_hp_space
-                model.hyper_parameter_space = current_hp_for_trainer
-            self.graph_model_list[i] = model
-
-        return self
-
     # pylint: disable=arguments-differ
     def fit(
         self,
@@ -728,7 +646,7 @@ class AutoGraphClassifier(BaseClassifier):
         # load the dictionary
         path_or_dict = deepcopy(path_or_dict)
         solver = cls(None, [], None, None)
-        fe_list = path_or_dict.pop("feature", [{"name": "deepgl"}])
+        fe_list = path_or_dict.pop("feature", None)
         if fe_list is not None:
             fe_list_ele = []
             for feature_engineer in fe_list:
@@ -738,33 +656,33 @@ class AutoGraphClassifier(BaseClassifier):
             if fe_list_ele != []:
                 solver.set_feature_module(fe_list_ele)
 
-        models = path_or_dict.pop("models", {"gcn": None, "gat": None})
-        model_list = list(models.keys())
-        model_hp_space = [models[m] for m in model_list]
-        trainer_space = path_or_dict.pop("trainer", None)
-
-        # parse lambda function
-        if model_hp_space:
-            for space in model_hp_space:
-                if space is not None:
-                    for keys in space:
-                        if "cutFunc" in keys and isinstance(keys["cutFunc"], str):
-                            keys["cutFunc"] = eval(keys["cutFunc"])
-
-        if trainer_space:
-            for space in trainer_space:
-                if (
-                    isinstance(space, dict)
-                    and "cutFunc" in space
-                    and isinstance(space["cutFunc"], str)
-                ):
-                    space["cutFunc"] = eval(space["cutFunc"])
-                elif space is not None:
-                    for keys in space:
-                        if "cutFunc" in keys and isinstance(keys["cutFunc"], str):
-                            keys["cutFunc"] = eval(keys["cutFunc"])
-
-        solver.set_graph_models(model_list, trainer_space, model_hp_space)
+        models = path_or_dict.pop("models", [{"name": "gin"}, {"name": "topkpool"}])
+        model_hp_space = [
+            _parse_hp_space(model.pop("hp_space", None)) for model in models
+        ]
+        model_list = [
+            _initialize_single_model(model.pop("name"), model) for model in models
+        ]
+
+        trainer = path_or_dict.pop("trainer", None)
+        default_trainer = "GraphClassification"
+        trainer_space = None
+        if isinstance(trainer, dict):
+            # global default
+            default_trainer = trainer.pop("name", "GraphClassification")
+            trainer_space = _parse_hp_space(trainer.pop("hp_space", None))
+        elif isinstance(trainer, list):
+            # sequential trainer definition
+            default_trainer = [
+                train.pop("name", "GraphClassification") for train in trainer
+            ]
+            trainer_space = [
+                _parse_hp_space(train.pop("hp_space", None)) for train in trainer
+            ]
+
+        solver.set_graph_models(
+            model_list, default_trainer, trainer_space, model_hp_space
+        )
 
         hpo_dict = path_or_dict.pop("hpo", {"name": "anneal"})
         if hpo_dict is not None:
diff --git a/autogl/solver/classifier/node_classifier.py b/autogl/solver/classifier/node_classifier.py
index 1a360b5..25d9560 100644
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -11,10 +11,9 @@ import numpy as np
 import yaml
 
 from .base import BaseClassifier
+from ..base import _parse_hp_space, _initialize_single_model
 from ...module.feature import FEATURE_DICT
-from ...module.model import MODEL_DICT
-from ...module.train import TRAINER_DICT, get_feval
-from ...module import BaseModel, NodeClassificationTrainer
+from ...module.train import get_feval
 from ..utils import Leaderboard, set_seed
 from ...datasets import utils
 from ...utils import get_logger
@@ -73,11 +72,12 @@ class AutoNodeClassifier(BaseClassifier):
 
     def __init__(
         self,
-        feature_module="deepgl",
+        feature_module=None,
         graph_models=["gat", "gcn"],
         hpo_module="anneal",
         ensemble_module="voting",
         max_evals=50,
+        default_trainer=None,
         trainer_hp_space=None,
         model_hp_spaces=None,
         size=4,
@@ -90,6 +90,7 @@ class AutoNodeClassifier(BaseClassifier):
             hpo_module=hpo_module,
             ensemble_module=ensemble_module,
             max_evals=max_evals,
+            default_trainer=default_trainer or "NodeClassification",
             trainer_hp_space=trainer_hp_space,
             model_hp_spaces=model_hp_spaces,
             size=size,
@@ -99,88 +100,6 @@ class AutoNodeClassifier(BaseClassifier):
         # data to be kept when fit
         self.data = None
 
-    def _init_graph_module(
-        self,
-        graph_models,
-        num_classes,
-        num_features,
-        *args,
-        **kwargs,
-    ) -> "AutoNodeClassifier":
-        # load graph network module
-        self.graph_model_list = []
-        if isinstance(graph_models, list):
-            for model in graph_models:
-                if isinstance(model, str):
-                    if model in MODEL_DICT:
-                        self.graph_model_list.append(
-                            MODEL_DICT[model](
-                                num_classes=num_classes,
-                                num_features=num_features,
-                                *args,
-                                **kwargs,
-                                init=False,
-                            )
-                        )
-                    else:
-                        raise KeyError("cannot find model %s" % (model))
-                elif isinstance(model, type) and issubclass(model, BaseModel):
-                    self.graph_model_list.append(
-                        model(
-                            num_classes=num_classes,
-                            num_features=num_features,
-                            *args,
-                            **kwargs,
-                            init=False,
-                        )
-                    )
-                elif isinstance(model, BaseModel):
-                    # setup the hp of num_classes and num_features
-                    model.set_num_classes(num_classes)
-                    model.set_num_features(num_features)
-                    self.graph_model_list.append(model)
-                elif isinstance(model, NodeClassificationTrainer):
-                    # receive a trainer list, put trainer to list
-                    self.graph_model_list.append(model)
-                else:
-                    raise KeyError("cannot find graph network %s." % (model))
-        else:
-            raise ValueError(
-                "need graph network to be (list of) str or a BaseModel class/instance, get",
-                graph_models,
-                "instead.",
-            )
-        
-        # wrap all model_cls with specified trainer
-        for i, model in enumerate(self.graph_model_list):
-            # set model hp space
-            if self._model_hp_spaces is not None:
-                if self._model_hp_spaces[i] is not None:
-                    if isinstance(model, NodeClassificationTrainer):
-                        model.model.hyper_parameter_space = self._model_hp_spaces[i]
-                    else:
-                        model.hyper_parameter_space = self._model_hp_spaces[i]
-            # initialize trainer if needed
-            if isinstance(model, BaseModel):
-                model = TRAINER_DICT["NodeClassification"](
-                    model=model,
-                    num_features=num_features,
-                    num_classes=num_classes,
-                    *args,
-                    **kwargs,
-                    init=False,
-                )
-            # set trainer hp space
-            if self._trainer_hp_space is not None:
-                if isinstance(self._trainer_hp_space[0], list):
-                    current_hp_for_trainer = self._trainer_hp_space[i]
-                else:
-                    current_hp_for_trainer = self._trainer_hp_space
-                model.hyper_parameter_space = current_hp_for_trainer
-            self.graph_model_list[i] = model
-
-        return self
-
     # pylint: disable=arguments-differ
     def fit(
         self,
@@ -658,7 +577,7 @@ class AutoNodeClassifier(BaseClassifier):
 
         path_or_dict = deepcopy(path_or_dict)
         solver = cls(None, [], None, None)
-        fe_list = path_or_dict.pop("feature", [{"name": "deepgl"}])
+        fe_list = path_or_dict.pop("feature", None)
         if fe_list is not None:
             fe_list_ele = []
             for feature_engineer in fe_list:
@@ -668,33 +587,33 @@ class AutoNodeClassifier(BaseClassifier):
             if fe_list_ele != []:
                 solver.set_feature_module(fe_list_ele)
 
-        models = path_or_dict.pop("models", {"gcn": None, "gat": None})
-        model_list = list(models.keys())
-        model_hp_space = [models[m] for m in model_list]
-        trainer_space = path_or_dict.pop("trainer", None)
-
-        if model_hp_space:
-            # parse lambda function
-            for space in model_hp_space:
-                if space is not None:
-                    for keys in space:
-                        if "cutFunc" in keys and isinstance(keys["cutFunc"], str):
-                            keys["cutFunc"] = eval(keys["cutFunc"])
-
-        if trainer_space:
-            for space in trainer_space:
-                if (
-                    isinstance(space, dict)
-                    and "cutFunc" in space
-                    and isinstance(space["cutFunc"], str)
-                ):
-                    space["cutFunc"] = eval(space["cutFunc"])
-                elif space is not None:
-                    for keys in space:
-                        if "cutFunc" in keys and isinstance(keys["cutFunc"], str):
-                            keys["cutFunc"] = eval(keys["cutFunc"])
-
-        solver.set_graph_models(model_list, trainer_space, model_hp_space)
+        models = path_or_dict.pop("models", [{"name": "gcn"}, {"name": "gat"}])
+        model_hp_space = [
+            _parse_hp_space(model.pop("hp_space", None)) for model in models
+        ]
+        model_list = [
+            _initialize_single_model(model.pop("name"), model) for model in models
+        ]
+
+        trainer = path_or_dict.pop("trainer", None)
+        default_trainer = "NodeClassification"
+        trainer_space = None
+        if isinstance(trainer, dict):
+            # global default
+            default_trainer = trainer.pop("name", "NodeClassification")
+            trainer_space = _parse_hp_space(trainer.pop("hp_space", None))
+        elif isinstance(trainer, list):
+            # sequential trainer definition
+            default_trainer = [
+                train.pop("name", "NodeClassification") for train in trainer
+            ]
+            trainer_space = [
+                _parse_hp_space(train.pop("hp_space", None)) for train in trainer
+            ]
+
+        solver.set_graph_models(
+            model_list, default_trainer, trainer_space, model_hp_space
+        )
 
         hpo_dict = path_or_dict.pop("hpo", {"name": "anneal"})
         if hpo_dict is not None:

From c2a589615e82849f80a5b5bde4ab06d46e99f1a3 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sun, 14 Mar 2021 09:00:43 +0000
Subject: [PATCH 018/144] fix device inconsistency

---
 autogl/module/model/base.py      | 5 +++++
 autogl/solver/base.py            | 2 +-
 autogl/solver/classifier/base.py | 4 ++--
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/autogl/module/model/base.py b/autogl/module/model/base.py
index 4025bda..6e6d8c7 100644
--- a/autogl/module/model/base.py
+++ b/autogl/module/model/base.py
@@ -43,6 +43,11 @@ class BaseModel(torch.nn.Module):
     def forward(self):
         pass
 
+    def to(self, device):
+        if isinstance(device, (str, torch.device)):
+            self.device = device
+        return super().to(device)
+
     def from_hyper_parameter(self, hp):
         ret_self = self.__class__(
             num_features=self.num_features,
diff --git a/autogl/solver/base.py b/autogl/solver/base.py
index 8f2f8c0..421306f 100644
--- a/autogl/solver/base.py
+++ b/autogl/solver/base.py
@@ -110,7 +110,7 @@ class BaseSolver:
         elif isinstance(device, str) and (device == "cpu" or device.startswith("cuda")):
             self.runtime_device = torch.device(device)
         else:
-            LOGGER.error("Cannor parse device %s", str(device))
+            LOGGER.error("Cannot parse device %s", str(device))
             raise ValueError("Cannot parse device {}".format(device))
 
         # initialize modules
diff --git a/autogl/solver/classifier/base.py b/autogl/solver/classifier/base.py
index 32f740e..2bb8521 100644
--- a/autogl/solver/classifier/base.py
+++ b/autogl/solver/classifier/base.py
@@ -54,7 +54,7 @@ class BaseClassifier(BaseSolver):
                     # setup the hp of num_classes and num_features
                     model.set_num_classes(num_classes)
                     model.set_num_features(num_features)
-                    self.graph_model_list.append(model)
+                    self.graph_model_list.append(model.to(self.runtime_device))
                 elif isinstance(model, BaseTrainer):
                     # receive a trainer list, put trainer to list
                     self.graph_model_list.append(model)
@@ -80,7 +80,7 @@ class BaseClassifier(BaseSolver):
             if isinstance(model, BaseModel):
                 name = (
                     self._default_trainer
-                    if isinstance(self._default_trainer)
+                    if isinstance(self._default_trainer, str)
                     else self._default_trainer[i]
                 )
                 model = TRAINER_DICT[name](

From dbaa4dbd0f86372aa3a65b2aeb123a858c1c7188 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sun, 14 Mar 2021 09:05:43 +0000
Subject: [PATCH 019/144] fix hp space setting

---
 autogl/module/hpo/advisorbase.py | 4 ++--
 autogl/module/hpo/autone.py      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/autogl/module/hpo/advisorbase.py b/autogl/module/hpo/advisorbase.py
index 22a9712..5bdb5cb 100644
--- a/autogl/module/hpo/advisorbase.py
+++ b/autogl/module/hpo/advisorbase.py
@@ -43,7 +43,7 @@ class AdvisorBaseHPOptimizer(BaseHPOptimizer):
         self.xs = []
         self.best_id = None
         self.best_trainer = None
-        space = trainer.hyper_parameter_space
+        space = trainer.hyper_parameter_space + trainer.get_model().hyper_parameter_space
         current_config = self._encode_para(space)
 
         for i in range(slaves):
@@ -129,7 +129,7 @@ class AdvisorBaseHPOptimizer(BaseHPOptimizer):
 
         self.feval_name = trainer.get_feval(return_major=True).get_eval_name()
         self.is_higher_better = trainer.get_feval(return_major=True).is_higher_better()
-        space = trainer.hyper_parameter_space
+        space = trainer.hyper_parameter_space + trainer.get_model().hyper_parameter_space
         current_space = self._encode_para(space)
         self._setUp(current_space)
 
diff --git a/autogl/module/hpo/autone.py b/autogl/module/hpo/autone.py
index 02b79d7..1021f4d 100644
--- a/autogl/module/hpo/autone.py
+++ b/autogl/module/hpo/autone.py
@@ -61,7 +61,7 @@ class AutoNE(BaseHPOptimizer):
         """
         self.feval_name = trainer.get_feval(return_major=True).get_eval_name()
         self.is_higher_better = trainer.get_feval(return_major=True).is_higher_better()
-        space = trainer.hyper_parameter_space + trainer.model.hyper_parameter_space
+        space = trainer.hyper_parameter_space + trainer.get_model().hyper_parameter_space
         current_space = self._encode_para(space)
 
         def sample_subgraph(whole_data):

From 4f20b5d956ccba52b258bd28c5373b66854fa0b1 Mon Sep 17 00:00:00 2001
From: lihy96 <lhy237451594@vip.qq.com>
Date: Sun, 14 Mar 2021 18:22:25 +0800
Subject: [PATCH 020/144] rm model hp space from trainer hp space

---
 autogl/module/train/graph_classification.py | 8 ++++----
 autogl/module/train/node_classification.py  | 8 +++++---
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/autogl/module/train/graph_classification.py b/autogl/module/train/graph_classification.py
index f97f63e..3dc16e7 100644
--- a/autogl/module/train/graph_classification.py
+++ b/autogl/module/train/graph_classification.py
@@ -170,7 +170,7 @@ class GraphClassificationTrainer(BaseTrainer):
                 "scalingType": "LOG",
             },
         ]
-        self.space += self.model.space
+        # self.space += self.model.space
         GraphClassificationTrainer.space = self.space
 
         self.hyperparams = {
@@ -237,7 +237,7 @@ class GraphClassificationTrainer(BaseTrainer):
         elif type(lr_scheduler_type) == str and lr_scheduler_type == 'reducelronplateau':
             scheduler = ReduceLROnPlateau(optimizer, 'min')
         else:
-            scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
+            scheduler = None
 
         for epoch in range(1, self.max_epoch):
             self.model.model.train()
@@ -256,8 +256,8 @@ class GraphClassificationTrainer(BaseTrainer):
                 loss.backward()
                 loss_all += data.num_graphs * loss.item()
                 optimizer.step()
-                scheduler.step()
-
+                if self.lr_scheduler_type:
+                    scheduler.step()
             # loss = loss_all / len(train_loader.dataset)
             # train_loss = self.evaluate(train_loader)
             eval_func = (
diff --git a/autogl/module/train/node_classification.py b/autogl/module/train/node_classification.py
index 9f55b82..de50ca5 100644
--- a/autogl/module/train/node_classification.py
+++ b/autogl/module/train/node_classification.py
@@ -69,6 +69,7 @@ class NodeClassificationTrainer(BaseTrainer):
         init=True,
         feval=[Logloss],
         loss="nll_loss",
+        lr_scheduler_type='steplr',
         *args,
         **kwargs
     ):
@@ -153,7 +154,7 @@ class NodeClassificationTrainer(BaseTrainer):
                 "scalingType": "LOG",
             },
         ]
-        self.space += self.model.space
+        # self.space += self.model.space
         NodeClassificationTrainer.space = self.space
 
         self.hyperparams = {
@@ -214,7 +215,7 @@ class NodeClassificationTrainer(BaseTrainer):
         elif type(lr_scheduler_type) == str and lr_scheduler_type == 'reducelronplateau':
             scheduler = ReduceLROnPlateau(optimizer, 'min')
         else:
-            scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
+            scheduler = None
 
         for epoch in range(1, self.max_epoch):
             self.model.model.train()
@@ -229,7 +230,8 @@ class NodeClassificationTrainer(BaseTrainer):
 
             loss.backward()
             optimizer.step()
-            scheduler.step()
+            if self.lr_scheduler_type:
+                scheduler.step()
 
             if type(self.feval) is list:
                 feval = self.feval[0]

From d19bc12a222b568fcd20a84ef63f2e05ecac155d Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sun, 14 Mar 2021 13:07:33 +0000
Subject: [PATCH 021/144] customize init process

---
 autogl/solver/classifier/base.py             |  91 +---------------
 autogl/solver/classifier/graph_classifier.py | 105 ++++++++++++++++++-
 autogl/solver/classifier/node_classifier.py  |  97 +++++++++++++++++
 3 files changed, 202 insertions(+), 91 deletions(-)

diff --git a/autogl/solver/classifier/base.py b/autogl/solver/classifier/base.py
index 2bb8521..96f84a3 100644
--- a/autogl/solver/classifier/base.py
+++ b/autogl/solver/classifier/base.py
@@ -5,9 +5,7 @@ Base solver for classification problems
 from typing import Any
 from ..base import BaseSolver
 from ...module.ensemble import ENSEMBLE_DICT
-from ...module.train import TRAINER_DICT
-from ...module.model import MODEL_DICT
-from ...module import BaseEnsembler, BaseModel, BaseTrainer
+from ...module import BaseEnsembler
 
 
 class BaseClassifier(BaseSolver):
@@ -15,93 +13,6 @@ class BaseClassifier(BaseSolver):
     Base solver for classification problems
     """
 
-    def _init_graph_module(
-        self,
-        graph_models,
-        num_classes,
-        num_features,
-        *args,
-        **kwargs,
-    ) -> "BaseClassifier":
-        # load graph network module
-        self.graph_model_list = []
-        if isinstance(graph_models, list):
-            for model in graph_models:
-                if isinstance(model, str):
-                    if model in MODEL_DICT:
-                        self.graph_model_list.append(
-                            MODEL_DICT[model](
-                                num_classes=num_classes,
-                                num_features=num_features,
-                                *args,
-                                **kwargs,
-                                init=False,
-                            )
-                        )
-                    else:
-                        raise KeyError("cannot find model %s" % (model))
-                elif isinstance(model, type) and issubclass(model, BaseModel):
-                    self.graph_model_list.append(
-                        model(
-                            num_classes=num_classes,
-                            num_features=num_features,
-                            *args,
-                            **kwargs,
-                            init=False,
-                        )
-                    )
-                elif isinstance(model, BaseModel):
-                    # setup the hp of num_classes and num_features
-                    model.set_num_classes(num_classes)
-                    model.set_num_features(num_features)
-                    self.graph_model_list.append(model.to(self.runtime_device))
-                elif isinstance(model, BaseTrainer):
-                    # receive a trainer list, put trainer to list
-                    self.graph_model_list.append(model)
-                else:
-                    raise KeyError("cannot find graph network %s." % (model))
-        else:
-            raise ValueError(
-                "need graph network to be (list of) str or a BaseModel class/instance, get",
-                graph_models,
-                "instead.",
-            )
-
-        # wrap all model_cls with specified trainer
-        for i, model in enumerate(self.graph_model_list):
-            # set model hp space
-            if self._model_hp_spaces is not None:
-                if self._model_hp_spaces[i] is not None:
-                    if isinstance(model, BaseTrainer):
-                        model.model.hyper_parameter_space = self._model_hp_spaces[i]
-                    else:
-                        model.hyper_parameter_space = self._model_hp_spaces[i]
-            # initialize trainer if needed
-            if isinstance(model, BaseModel):
-                name = (
-                    self._default_trainer
-                    if isinstance(self._default_trainer, str)
-                    else self._default_trainer[i]
-                )
-                model = TRAINER_DICT[name](
-                    model=model,
-                    num_features=num_features,
-                    num_classes=num_classes,
-                    *args,
-                    **kwargs,
-                    init=False,
-                )
-            # set trainer hp space
-            if self._trainer_hp_space is not None:
-                if isinstance(self._trainer_hp_space[0], list):
-                    current_hp_for_trainer = self._trainer_hp_space[i]
-                else:
-                    current_hp_for_trainer = self._trainer_hp_space
-                model.hyper_parameter_space = current_hp_for_trainer
-            self.graph_model_list[i] = model
-
-        return self
-
     def predict_proba(self, *args, **kwargs) -> Any:
         """
         Predict the node probability.
diff --git a/autogl/solver/classifier/graph_classifier.py b/autogl/solver/classifier/graph_classifier.py
index f7efedf..4ad0987 100644
--- a/autogl/solver/classifier/graph_classifier.py
+++ b/autogl/solver/classifier/graph_classifier.py
@@ -12,7 +12,8 @@ import yaml
 
 from .base import BaseClassifier
 from ...module.feature import FEATURE_DICT
-from ...module.train import get_feval
+from ...module.model import BaseModel, MODEL_DICT
+from ...module.train import TRAINER_DICT, get_feval, GraphClassificationTrainer
 from ..base import _initialize_single_model, _parse_hp_space
 from ..utils import Leaderboard, set_seed
 from ...datasets import utils
@@ -98,6 +99,108 @@ class AutoGraphClassifier(BaseClassifier):
 
         self.dataset = None
 
+    def _init_graph_module(
+        self,
+        graph_models,
+        num_classes,
+        num_features,
+        feval,
+        device,
+        loss,
+        num_graph_features
+    ) -> "AutoGraphClassifier":
+        # load graph network module
+        self.graph_model_list = []
+        if isinstance(graph_models, list):
+            for model in graph_models:
+                if isinstance(model, str):
+                    if model in MODEL_DICT:
+                        self.graph_model_list.append(
+                            MODEL_DICT[model](
+                                num_classes=num_classes,
+                                num_features=num_features,
+                                num_graph_features=num_graph_features,
+                                device=device,
+                                init=False
+                            )
+                        )
+                    else:
+                        raise KeyError("cannot find model %s" % (model))
+                elif isinstance(model, type) and issubclass(model, BaseModel):
+                    self.graph_model_list.append(
+                        model(
+                            num_classes=num_classes,
+                            num_features=num_features,
+                            num_graph_features=num_graph_features,
+                            device=device,
+                            init=False
+                        )
+                    )
+                elif isinstance(model, BaseModel):
+                    # setup the hp of num_classes and num_features
+                    model.set_num_classes(num_classes)
+                    model.set_num_features(num_features)
+                    model.set_num_graph_features(num_graph_features)
+                    self.graph_model_list.append(model.to(device))
+                elif isinstance(model, GraphClassificationTrainer):
+                    # receive a trainer list, put trainer to list
+                    assert model.get_model() is not None, "Passed trainer should contain a model"
+                    model.set_feval(feval)
+                    model.loss_type = loss
+                    model.to(device)
+                    model.model.set_num_classes(num_classes)
+                    model.model.set_num_features(num_features)
+                    model.model.set_num_graph_features(num_graph_features)
+                    model.num_classes = num_classes
+                    model.num_features = num_features
+                    model.num_graph_features = num_graph_features
+                    self.graph_model_list.append(model)
+                else:
+                    raise KeyError("cannot find graph network %s." % (model))
+        else:
+            raise ValueError(
+                "need graph network to be (list of) str or a BaseModel class/instance, get",
+                graph_models,
+                "instead.",
+            )
+
+        # wrap all model_cls with specified trainer
+        for i, model in enumerate(self.graph_model_list):
+            # set model hp space
+            if self._model_hp_spaces is not None:
+                if self._model_hp_spaces[i] is not None:
+                    if isinstance(model, GraphClassificationTrainer):
+                        model.model.hyper_parameter_space = self._model_hp_spaces[i]
+                    else:
+                        model.hyper_parameter_space = self._model_hp_spaces[i]
+            # initialize trainer if needed
+            if isinstance(model, BaseModel):
+                name = (
+                    self._default_trainer
+                    if isinstance(self._default_trainer, str)
+                    else self._default_trainer[i]
+                )
+                model = TRAINER_DICT[name](
+                    model=model,
+                    num_features=num_features,
+                    num_classes=num_classes,
+                    loss=loss,
+                    feval=feval,
+                    device=device,
+                    num_graph_features=num_graph_features,
+                    init=False
+                )
+            # set trainer hp space
+            if self._trainer_hp_space is not None:
+                if isinstance(self._trainer_hp_space[0], list):
+                    current_hp_for_trainer = self._trainer_hp_space[i]
+                else:
+                    current_hp_for_trainer = self._trainer_hp_space
+                model.hyper_parameter_space = current_hp_for_trainer
+            self.graph_model_list[i] = model
+
+        return self
+
     # pylint: disable=arguments-differ
     def fit(
         self,
diff --git a/autogl/solver/classifier/node_classifier.py b/autogl/solver/classifier/node_classifier.py
index 25d9560..587e513 100644
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -13,6 +13,8 @@ import yaml
 from .base import BaseClassifier
 from ..base import _parse_hp_space, _initialize_single_model
 from ...module.feature import FEATURE_DICT
+from ...module.model import MODEL_DICT, BaseModel
+from ...module.train import TRAINER_DICT, NodeClassificationTrainer
 from ...module.train import get_feval
 from ..utils import Leaderboard, set_seed
 from ...datasets import utils
@@ -100,6 +102,101 @@ class AutoNodeClassifier(BaseClassifier):
         # data to be kept when fit
         self.data = None
 
+    def _init_graph_module(
+        self,
+        graph_models,
+        num_classes,
+        num_features,
+        feval,
+        device,
+        loss
+    ) -> "AutoNodeClassifier":
+        # load graph network module
+        self.graph_model_list = []
+        if isinstance(graph_models, list):
+            for model in graph_models:
+                if isinstance(model, str):
+                    if model in MODEL_DICT:
+                        self.graph_model_list.append(
+                            MODEL_DICT[model](
+                                num_classes=num_classes,
+                                num_features=num_features,
+                                device=device,
+                                init=False
+                            )
+                        )
+                    else:
+                        raise KeyError("cannot find model %s" % (model))
+                elif isinstance(model, type) and issubclass(model, BaseModel):
+                    self.graph_model_list.append(
+                        model(
+                            num_classes=num_classes,
+                            num_features=num_features,
+                            device=device,
+                            init=False
+                        )
+                    )
+                elif isinstance(model, BaseModel):
+                    # setup the hp of num_classes and num_features
+                    model.set_num_classes(num_classes)
+                    model.set_num_features(num_features)
+                    self.graph_model_list.append(model.to(device))
+                elif isinstance(model, NodeClassificationTrainer):
+                    # receive a trainer list, put trainer to list
+                    assert model.get_model() is not None, "Passed trainer should contain a model"
+                    model.set_feval(feval)
+                    model.loss_type = loss
+                    model.to(device)
+                    model.model.set_num_classes(num_classes)
+                    model.model.set_num_features(num_features)
+                    model.num_classes = num_classes
+                    model.num_features = num_features
+                    self.graph_model_list.append(model)
+                else:
+                    raise KeyError("cannot find graph network %s." % (model))
+        else:
+            raise ValueError(
+                "need graph network to be (list of) str or a BaseModel class/instance, get",
+                graph_models,
+                "instead.",
+            )
+
+        # wrap all model_cls with specified trainer
+        for i, model in enumerate(self.graph_model_list):
+            # set model hp space
+            if self._model_hp_spaces is not None:
+                if self._model_hp_spaces[i] is not None:
+                    if isinstance(model, NodeClassificationTrainer):
+                        model.model.hyper_parameter_space = self._model_hp_spaces[i]
+                    else:
+                        model.hyper_parameter_space = self._model_hp_spaces[i]
+            # initialize trainer if needed
+            if isinstance(model, BaseModel):
+                name = (
+                    self._default_trainer
+                    if isinstance(self._default_trainer, str)
+                    else self._default_trainer[i]
+                )
+                model = TRAINER_DICT[name](
+                    model=model,
+                    num_features=num_features,
+                    num_classes=num_classes,
+                    loss=loss,
+                    feval=feval,
+                    device=device,
+                    init=False
+                )
+            # set trainer hp space
+            if self._trainer_hp_space is not None:
+                if isinstance(self._trainer_hp_space[0], list):
+                    current_hp_for_trainer = self._trainer_hp_space[i]
+                else:
+                    current_hp_for_trainer = self._trainer_hp_space
+                model.hyper_parameter_space = current_hp_for_trainer
+            self.graph_model_list[i] = model
+
+        return self
+
     # pylint: disable=arguments-differ
     def fit(
         self,

From 9031fd4618b62827b6212b7d66987751261c9ab7 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sun, 14 Mar 2021 13:31:15 +0000
Subject: [PATCH 022/144] construct trainer when building from configs

---
 autogl/module/hpo/advisorbase.py             |  8 +++-
 autogl/module/hpo/autone.py                  |  4 +-
 autogl/solver/classifier/graph_classifier.py | 40 +++++++++++-----
 autogl/solver/classifier/node_classifier.py  | 48 +++++++++++++-------
 4 files changed, 69 insertions(+), 31 deletions(-)

diff --git a/autogl/module/hpo/advisorbase.py b/autogl/module/hpo/advisorbase.py
index 5bdb5cb..6d9395a 100644
--- a/autogl/module/hpo/advisorbase.py
+++ b/autogl/module/hpo/advisorbase.py
@@ -43,7 +43,9 @@ class AdvisorBaseHPOptimizer(BaseHPOptimizer):
         self.xs = []
         self.best_id = None
         self.best_trainer = None
-        space = trainer.hyper_parameter_space + trainer.get_model().hyper_parameter_space
+        space = (
+            trainer.hyper_parameter_space + trainer.get_model().hyper_parameter_space
+        )
         current_config = self._encode_para(space)
 
         for i in range(slaves):
@@ -129,7 +131,9 @@ class AdvisorBaseHPOptimizer(BaseHPOptimizer):
 
         self.feval_name = trainer.get_feval(return_major=True).get_eval_name()
         self.is_higher_better = trainer.get_feval(return_major=True).is_higher_better()
-        space = trainer.hyper_parameter_space + trainer.get_model().hyper_parameter_space
+        space = (
+            trainer.hyper_parameter_space + trainer.get_model().hyper_parameter_space
+        )
         current_space = self._encode_para(space)
         self._setUp(current_space)
 
diff --git a/autogl/module/hpo/autone.py b/autogl/module/hpo/autone.py
index 1021f4d..30da0b9 100644
--- a/autogl/module/hpo/autone.py
+++ b/autogl/module/hpo/autone.py
@@ -61,7 +61,9 @@ class AutoNE(BaseHPOptimizer):
         """
         self.feval_name = trainer.get_feval(return_major=True).get_eval_name()
         self.is_higher_better = trainer.get_feval(return_major=True).is_higher_better()
-        space = trainer.hyper_parameter_space + trainer.get_model().hyper_parameter_space
+        space = (
+            trainer.hyper_parameter_space + trainer.get_model().hyper_parameter_space
+        )
         current_space = self._encode_para(space)
 
         def sample_subgraph(whole_data):
diff --git a/autogl/solver/classifier/graph_classifier.py b/autogl/solver/classifier/graph_classifier.py
index 4ad0987..cedb2bc 100644
--- a/autogl/solver/classifier/graph_classifier.py
+++ b/autogl/solver/classifier/graph_classifier.py
@@ -107,7 +107,7 @@ class AutoGraphClassifier(BaseClassifier):
         feval,
         device,
         loss,
-        num_graph_features
+        num_graph_features,
     ) -> "AutoGraphClassifier":
         # load graph network module
         self.graph_model_list = []
@@ -121,7 +121,7 @@ class AutoGraphClassifier(BaseClassifier):
                                 num_features=num_features,
                                 num_graph_features=num_graph_features,
                                 device=device,
-                                init=False
+                                init=False,
                             )
                         )
                     else:
@@ -133,7 +133,7 @@ class AutoGraphClassifier(BaseClassifier):
                             num_features=num_features,
                             num_graph_features=num_graph_features,
                             device=device,
-                            init=False
+                            init=False,
                         )
                     )
                 elif isinstance(model, BaseModel):
@@ -144,7 +144,9 @@ class AutoGraphClassifier(BaseClassifier):
                     self.graph_model_list.append(model.to(device))
                 elif isinstance(model, GraphClassificationTrainer):
                     # receive a trainer list, put trainer to list
-                    assert model.get_model() is not None, "Passed trainer should contain a model"
+                    assert (
+                        model.get_model() is not None
+                    ), "Passed trainer should contain a model"
                     model.set_feval(feval)
                     model.loss_type = loss
                     model.to(device)
@@ -188,7 +190,7 @@ class AutoGraphClassifier(BaseClassifier):
                     feval=feval,
                     device=device,
                     num_graph_features=num_graph_features,
-                    init=False
+                    init=False,
                 )
             # set trainer hp space
             if self._trainer_hp_space is not None:
@@ -774,14 +776,30 @@ class AutoGraphClassifier(BaseClassifier):
             # global default
             default_trainer = trainer.pop("name", "GraphClassification")
             trainer_space = _parse_hp_space(trainer.pop("hp_space", None))
+            default_kwargs = {"num_features": None, "num_classes": None}
+            default_kwargs.update(trainer)
+            default_kwargs["init"] = False
+            for i in range(len(model_list)):
+                model = model_list[i]
+                trainer_wrapper = TRAINER_DICT[default_trainer](model=model, **trainer)
+                model_list[i] = trainer_wrapper
         elif isinstance(trainer, list):
             # sequential trainer definition
-            default_trainer = [
-                train.pop("name", "GraphClassification") for train in trainer
-            ]
-            trainer_space = [
-                _parse_hp_space(train.pop("hp_space", None)) for train in trainer
-            ]
+            assert len(trainer) == len(
+                model_list
+            ), "The number of trainer and model does not match"
+            trainer_space = []
+            for i in range(len(model_list)):
+                train, model = trainer[i], model_list[i]
+                default_trainer = train.pop("name", "GraphClassification")
+                trainer_space.append(_parse_hp_space(train.pop("hp_space", None)))
+                default_kwargs = {"num_features": None, "num_classes": None}
+                default_kwargs.update(train)
+                default_kwargs["init"] = False
+                trainer_wrap = TRAINER_DICT[default_trainer](
+                    model=model, **default_kwargs
+                )
+                model_list[i] = trainer_wrap
 
         solver.set_graph_models(
             model_list, default_trainer, trainer_space, model_hp_space
diff --git a/autogl/solver/classifier/node_classifier.py b/autogl/solver/classifier/node_classifier.py
index 587e513..4fb37a0 100644
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -103,13 +103,7 @@ class AutoNodeClassifier(BaseClassifier):
         self.data = None
 
     def _init_graph_module(
-        self,
-        graph_models,
-        num_classes,
-        num_features,
-        feval,
-        device,
-        loss
+        self, graph_models, num_classes, num_features, feval, device, loss
     ) -> "AutoNodeClassifier":
         # load graph network module
         self.graph_model_list = []
@@ -122,7 +116,7 @@ class AutoNodeClassifier(BaseClassifier):
                                 num_classes=num_classes,
                                 num_features=num_features,
                                 device=device,
-                                init=False
+                                init=False,
                             )
                         )
                     else:
@@ -133,7 +127,7 @@ class AutoNodeClassifier(BaseClassifier):
                             num_classes=num_classes,
                             num_features=num_features,
                             device=device,
-                            init=False
+                            init=False,
                         )
                     )
                 elif isinstance(model, BaseModel):
@@ -143,7 +137,9 @@ class AutoNodeClassifier(BaseClassifier):
                     self.graph_model_list.append(model.to(device))
                 elif isinstance(model, NodeClassificationTrainer):
                     # receive a trainer list, put trainer to list
-                    assert model.get_model() is not None, "Passed trainer should contain a model"
+                    assert (
+                        model.get_model() is not None
+                    ), "Passed trainer should contain a model"
                     model.set_feval(feval)
                     model.loss_type = loss
                     model.to(device)
@@ -184,7 +180,7 @@ class AutoNodeClassifier(BaseClassifier):
                     loss=loss,
                     feval=feval,
                     device=device,
-                    init=False
+                    init=False,
                 )
             # set trainer hp space
             if self._trainer_hp_space is not None:
@@ -699,14 +695,32 @@ class AutoNodeClassifier(BaseClassifier):
             # global default
             default_trainer = trainer.pop("name", "NodeClassification")
             trainer_space = _parse_hp_space(trainer.pop("hp_space", None))
+            default_kwargs = {"num_features": None, "num_classes": None}
+            default_kwargs.update(trainer)
+            default_kwargs["init"] = False
+            for i in range(len(model_list)):
+                model = model_list[i]
+                trainer_wrap = TRAINER_DICT[default_trainer](
+                    model=model, **default_kwargs
+                )
+                model_list[i] = trainer_wrap
         elif isinstance(trainer, list):
             # sequential trainer definition
-            default_trainer = [
-                train.pop("name", "NodeClassification") for train in trainer
-            ]
-            trainer_space = [
-                _parse_hp_space(train.pop("hp_space", None)) for train in trainer
-            ]
+            assert len(trainer) == len(
+                model_list
+            ), "The number of trainer and model does not match"
+            trainer_space = []
+            for i in range(len(model_list)):
+                train, model = trainer[i], model_list[i]
+                default_trainer = train.pop("name", "NodeClassification")
+                trainer_space.append(_parse_hp_space(train.pop("hp_space", None)))
+                default_kwargs = {"num_features": None, "num_classes": None}
+                default_kwargs.update(train)
+                default_kwargs["init"] = False
+                trainer_wrap = TRAINER_DICT[default_trainer](
+                    model=model, **default_kwargs
+                )
+                model_list[i] = trainer_wrap
 
         solver.set_graph_models(
             model_list, default_trainer, trainer_space, model_hp_space

From 7d5f51c4ffb76b23bac76bd47fc312f5cfcac33f Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sun, 14 Mar 2021 14:01:51 +0000
Subject: [PATCH 023/144] update configs

---
 configs/gcl_gin.yaml                     |  65 -------------
 configs/graph_classification.yaml        |  94 +++++++++----------
 configs/ncl_gat.yaml                     |  52 -----------
 configs/ncl_gcn.yaml                     |  45 ---------
 configs/node_classification.yaml         | 114 +++++++++++------------
 configs/nodeclf_gat_benchmark_large.yml  | 107 ++++++++++-----------
 configs/nodeclf_gat_benchmark_small.yml  | 106 ++++++++++-----------
 configs/nodeclf_gcn.yaml                 |  99 ++++++++++----------
 configs/nodeclf_gcn_benchmark_large.yml  | 102 ++++++++++----------
 configs/nodeclf_gcn_benchmark_small.yml  |  98 +++++++++----------
 configs/nodeclf_gcn_large.yaml           |  99 ++++++++++----------
 configs/nodeclf_sage_benchmark_large.yml | 109 +++++++++++-----------
 configs/nodeclf_sage_benchmark_small.yml | 113 +++++++++++-----------
 13 files changed, 495 insertions(+), 708 deletions(-)
 delete mode 100644 configs/gcl_gin.yaml
 delete mode 100644 configs/ncl_gat.yaml
 delete mode 100644 configs/ncl_gcn.yaml

diff --git a/configs/gcl_gin.yaml b/configs/gcl_gin.yaml
deleted file mode 100644
index b7e0500..0000000
--- a/configs/gcl_gin.yaml
+++ /dev/null
@@ -1,65 +0,0 @@
-feature:
-  - name : ~
-
-models:
-  gin:
-    - parameterName: num_layers
-      type: FIXED
-      value: 6
-    
-    - parameterName: hidden
-      type: FIXED
-      value: [32,32,32,32,32]
-    
-    - parameterName: dropout
-      type: FIXED
-      value: 0.5
-    
-    - parameterName: act
-      type: FIXED
-      value: relu
-
-    - parameterName: eps
-      type: FIXED
-      value: True
-
-    - parameterName: mlp_layers
-      type: FIXED
-      value: 2
-
-trainer:
-  - parameterName: max_epoch
-    type: FIXED
-    value: 350
-  
-  - parameterName: early_stopping_round
-    type: FIXED
-    value: 10
-
-  - parameterName: lr
-    type: FIXED
-    value: 0.01
-  
-  - parameterName: weight_decay
-    type: FIXED
-    value: 0
-
-  - parameterName: batch_size
-    type: FIXED
-    value: 32
-
-# hidden tuned in {16,32} for bioinformatics,64 for social
-# batch tuned in {32,128}
-# dropout tuned in {0,0.5}
-
-# weight decay （0.5 every 50 epochs)
-
-# max epoch 350 
-# early stop epochs (run to end?), best for 10 folds
-
-hpo:
-  name: random
-  max_evals: 1
-
-ensemble:
-  name: ~
\ No newline at end of file
diff --git a/configs/graph_classification.yaml b/configs/graph_classification.yaml
index a86372d..4640ddc 100644
--- a/configs/graph_classification.yaml
+++ b/configs/graph_classification.yaml
@@ -1,66 +1,56 @@
+ensemble:
+  name: voting
+  size: 2
 feature:
-  - name: NxLargeCliqueSize
-  - name: NxLargeCliqueSize
-
+- name: NxLargeCliqueSize
+- name: NxLargeCliqueSize
+hpo:
+  max_evals: 10
+  name: anneal
 models:
-  topkpool:
-
-    - parameterName: ratio
-      type: DOUBLE
-      maxValue: 0.9
-      minValue: 0.1
-      scalingType: LINEAR
-    
-    - parameterName: dropout
-      type: DOUBLE
-      maxValue: 0.9
-      minValue: 0.1
-      scalingType: LINEAR
-    
-    - parameterName: act
-      type: CATEGORICAL
-      feasiblePoints:
-        - leaky_relu
-        - relu
-        - elu
-        - tanh
-
+- hp_space:
+  - maxValue: 0.9
+    minValue: 0.1
+    parameterName: ratio
+    scalingType: LINEAR
+    type: DOUBLE
+  - maxValue: 0.9
+    minValue: 0.1
+    parameterName: dropout
+    scalingType: LINEAR
+    type: DOUBLE
+  - feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    parameterName: act
+    type: CATEGORICAL
+  name: topkpool
 trainer:
-  - parameterName: max_epoch
-    type: INTEGER
-    maxValue: 300
+  hp_space:
+  - maxValue: 300
     minValue: 10
+    parameterName: max_epoch
     scalingType: LINEAR
-  
-  - parameterName: early_stopping_round
     type: INTEGER
-    maxValue: 30
+  - maxValue: 30
     minValue: 10
+    parameterName: early_stopping_round
     scalingType: LINEAR
-
-  - parameterName: lr
-    type: DOUBLE
-    maxValue: 0.1
+    type: INTEGER
+  - maxValue: 0.1
     minValue: 0.0001
+    parameterName: lr
     scalingType: LOG
-  
-  - parameterName: weight_decay
     type: DOUBLE
-    maxValue: 0.005
-    minValue: 0.00005
+  - maxValue: 0.005
+    minValue: 5.0e-05
+    parameterName: weight_decay
     scalingType: LOG
-
-  - parameterName: batch_size
-    type: INTEGER
-    maxValue: 128
+    type: DOUBLE
+  - maxValue: 128
     minValue: 48
+    parameterName: batch_size
     scalingType: LINEAR
-
-
-hpo:
-  name: anneal
-  max_evals: 10
-
-ensemble:
-  name: voting
-  size: 2
\ No newline at end of file
+    type: INTEGER
diff --git a/configs/ncl_gat.yaml b/configs/ncl_gat.yaml
deleted file mode 100644
index 9fa384e..0000000
--- a/configs/ncl_gat.yaml
+++ /dev/null
@@ -1,52 +0,0 @@
-feature:
-  - name: ~
-
-models:
-  gat:
-    - parameterName: num_layers
-      type: FIXED
-      value: 2
-
-    - parameterName: heads
-      type: FIXED
-      value: 8
-    
-    - parameterName: hidden
-      type: FIXED
-      value: [64]
-    
-    - parameterName: dropout
-      type: FIXED
-      value: 0.6
-    
-    - parameterName: act
-      type: FIXED
-      value: elu
-
-trainer:
-  - parameterName: max_epoch
-    type: FIXED
-    value: 200
-  
-  - parameterName: early_stopping_round
-    type: FIXED
-    value: 10
-
-  - parameterName: lr
-    type: FIXED
-    value: 0.005
-  
-  - parameterName: weight_decay
-    type: FIXED
-    value: 0.0005
-
-# Glorot initialization
-# for pumbed dataset , heads = 8 for last layer  and weight decay =0.001 ,lr=0.01
-# early stopping 100, max epoch 100000
-hpo:
-  name: random
-  max_evals: 1
-
-ensemble:
-  name: ~
-
diff --git a/configs/ncl_gcn.yaml b/configs/ncl_gcn.yaml
deleted file mode 100644
index 1a76724..0000000
--- a/configs/ncl_gcn.yaml
+++ /dev/null
@@ -1,45 +0,0 @@
-feature:
-  - name: ~ # ~ means None
-
-models:
-  gcn:
-    - parameterName: num_layers
-      type: FIXED
-      value: 3
-    
-    - parameterName: hidden
-      type: FIXED
-      value: [16, 16]
-    
-    - parameterName: dropout
-      type: FIXED
-      value: 0.5
-    
-    - parameterName: act
-      type: FIXED
-      value: relu
-
-trainer:
-  - parameterName: max_epoch
-    type: FIXED
-    value: 200
-  
-  - parameterName: early_stopping_round
-    type: FIXED
-    value: 10
-
-  - parameterName: lr
-    type: FIXED
-    value: 0.01
-  
-  - parameterName: weight_decay
-    type: FIXED
-    value: 0.0005
-# Glorot initialization 
-# weight decay only for the first layer
-hpo:
-  name: random
-  max_evals: 1
-
-ensemble:
-  name: ~
\ No newline at end of file
diff --git a/configs/node_classification.yaml b/configs/node_classification.yaml
index 35a7bd0..011fda2 100644
--- a/configs/node_classification.yaml
+++ b/configs/node_classification.yaml
@@ -1,70 +1,68 @@
+ensemble:
+  name: voting
+  size: 2
 feature:
-  - name: PYGNormalizeFeatures
-  - name: pagerank
-
+- name: PYGNormalizeFeatures
+- name: pagerank
+hpo:
+  max_evals: 10
+  name: anneal
 models:
-  gat:
-    - parameterName: num_layers
-      type: DISCRETE
-      feasiblePoints: '2,3,4'
-
-    - parameterName: heads
-      type: DISCRETE
-      feasiblePoints: '4,8,16'
-    
-    - parameterName: hidden
-      type: NUMERICAL_LIST
-      numericalType: INTEGER
-      length: 3
-      minValue: [8, 8, 8]
-      maxValue: [64, 64, 64]
-      cutPara: ["num_layers"]
-      cutFunc: "lambda x:x[0] - 1"
-      scalingType: LOG
-    
-    - parameterName: dropout
-      type: DOUBLE
-      maxValue: 0.9
-      minValue: 0.1
-      scalingType: LINEAR
-    
-    - parameterName: act
-      type: CATEGORICAL
-      feasiblePoints:
-        - leaky_relu
-        - relu
-        - elu
-        - tanh
-
+- hp_space:
+  - feasiblePoints: 2,3,4
+    parameterName: num_layers
+    type: DISCRETE
+  - feasiblePoints: 4,8,16
+    parameterName: heads
+    type: DISCRETE
+  - cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 3
+    maxValue:
+    - 64
+    - 64
+    - 64
+    minValue:
+    - 8
+    - 8
+    - 8
+    numericalType: INTEGER
+    parameterName: hidden
+    scalingType: LOG
+    type: NUMERICAL_LIST
+  - maxValue: 0.9
+    minValue: 0.1
+    parameterName: dropout
+    scalingType: LINEAR
+    type: DOUBLE
+  - feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    parameterName: act
+    type: CATEGORICAL
+  name: gat
 trainer:
-  - parameterName: max_epoch
-    type: INTEGER
-    maxValue: 300
+  hp_space:
+  - maxValue: 300
     minValue: 10
+    parameterName: max_epoch
     scalingType: LINEAR
-  
-  - parameterName: early_stopping_round
     type: INTEGER
-    maxValue: 30
+  - maxValue: 30
     minValue: 10
+    parameterName: early_stopping_round
     scalingType: LINEAR
-
-  - parameterName: lr
-    type: DOUBLE
-    maxValue: 0.1
+    type: INTEGER
+  - maxValue: 0.1
     minValue: 0.0001
+    parameterName: lr
     scalingType: LOG
-  
-  - parameterName: weight_decay
     type: DOUBLE
-    maxValue: 0.005
-    minValue: 0.00005
+  - maxValue: 0.005
+    minValue: 5.0e-05
+    parameterName: weight_decay
     scalingType: LOG
-
-hpo:
-  name: anneal
-  max_evals: 10
-
-ensemble:
-  name: voting
-  size: 2
\ No newline at end of file
+    type: DOUBLE
diff --git a/configs/nodeclf_gat_benchmark_large.yml b/configs/nodeclf_gat_benchmark_large.yml
index 92dd480..1b5933f 100644
--- a/configs/nodeclf_gat_benchmark_large.yml
+++ b/configs/nodeclf_gat_benchmark_large.yml
@@ -1,69 +1,64 @@
-# search space for gat on amazon_computers amazon_photo coauthor_cs coauthor_physics
+ensemble:
+  name: null
 feature:
-  - name: PYGNormalizeFeatures
-
+- name: PYGNormalizeFeatures
+hpo:
+  max_evals: 10
+  name: random
 models:
-  gcn:
-    - parameterName: num_layers
-      type: DISCRETE
-      feasiblePoints: '2,3'
-    
-    - parameterName: hidden
-      type: NUMERICAL_LIST
-      numericalType: INTEGER
-      length: 2
-      minValue: [8, 8]
-      maxValue: [32, 32]
-      cutPara: ["num_layers"]
-      cutFunc: "lambda x:x[0] - 1"
-      scalingType: LOG
-    
-    - parameterName: dropout
-      type: DOUBLE
-      maxValue: 0.5
-      minValue: 0.2
-      scalingType: LINEAR
-
-    - parameterName: heads
-      type: DISCRETE
-      feasiblePoints: '8,10,12'
-    
-    - parameterName: act
-      type: CATEGORICAL
-      feasiblePoints:
-        - leaky_relu
-        - relu
-        - elu
-        - tanh
-
+- hp_space:
+  - feasiblePoints: 2,3
+    parameterName: num_layers
+    type: DISCRETE
+  - cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 2
+    maxValue:
+    - 32
+    - 32
+    minValue:
+    - 8
+    - 8
+    numericalType: INTEGER
+    parameterName: hidden
+    scalingType: LOG
+    type: NUMERICAL_LIST
+  - maxValue: 0.5
+    minValue: 0.2
+    parameterName: dropout
+    scalingType: LINEAR
+    type: DOUBLE
+  - feasiblePoints: 8,10,12
+    parameterName: heads
+    type: DISCRETE
+  - feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    parameterName: act
+    type: CATEGORICAL
+  name: gcn
 trainer:
-  - parameterName: max_epoch
-    type: INTEGER
-    maxValue: 400
+  hp_space:
+  - maxValue: 400
     minValue: 250
+    parameterName: max_epoch
     scalingType: LINEAR
-  
-  - parameterName: early_stopping_round
     type: INTEGER
-    maxValue: 40
+  - maxValue: 40
     minValue: 25
+    parameterName: early_stopping_round
     scalingType: LINEAR
-
-  - parameterName: lr
-    type: DOUBLE
-    maxValue: 0.05
+    type: INTEGER
+  - maxValue: 0.05
     minValue: 0.01
+    parameterName: lr
     scalingType: LOG
-  
-  - parameterName: weight_decay
     type: DOUBLE
-    maxValue: 0.0005
+  - maxValue: 0.0005
     minValue: 0.0001
+    parameterName: weight_decay
     scalingType: LOG
-
-hpo:
-  name: random
-  max_evals: 10
-
-ensemble:
-  name: ~
\ No newline at end of file
+    type: DOUBLE
diff --git a/configs/nodeclf_gat_benchmark_small.yml b/configs/nodeclf_gat_benchmark_small.yml
index 0762349..318c5c9 100644
--- a/configs/nodeclf_gat_benchmark_small.yml
+++ b/configs/nodeclf_gat_benchmark_small.yml
@@ -1,70 +1,62 @@
-# search space for gat on cora, citeseer, pubmed
+ensemble:
+  name: null
 feature:
-  - name: PYGNormalizeFeatures
-
+- name: PYGNormalizeFeatures
+hpo:
+  max_evals: 10
+  name: random
 models:
-  gat:
-
-    - parameterName: num_layers
-      type: DISCRETE
-      feasiblePoints: '2'
-  
-    - parameterName: heads
-      type: DISCRETE
-      feasiblePoints: '6,8,10,12'
-
-    - parameterName: hidden
-      type: NUMERICAL_LIST
-      numericalType: INTEGER
-      length: 1
-      minValue: [4]
-      maxValue: [16]
-      cutPara: ["num_layers"]
-      cutFunc: "lambda x:x[0] - 1"
-      scalingType: LOG
-    
-    - parameterName: dropout
-      type: DOUBLE
-      maxValue: 0.8
-      minValue: 0.2
-      scalingType: LINEAR
-    
-    - parameterName: act
-      type: CATEGORICAL
-      feasiblePoints:
-        - leaky_relu
-        - relu
-        - elu
-        - tanh
-
+- hp_space:
+  - feasiblePoints: '2'
+    parameterName: num_layers
+    type: DISCRETE
+  - feasiblePoints: 6,8,10,12
+    parameterName: heads
+    type: DISCRETE
+  - cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 1
+    maxValue:
+    - 16
+    minValue:
+    - 4
+    numericalType: INTEGER
+    parameterName: hidden
+    scalingType: LOG
+    type: NUMERICAL_LIST
+  - maxValue: 0.8
+    minValue: 0.2
+    parameterName: dropout
+    scalingType: LINEAR
+    type: DOUBLE
+  - feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    parameterName: act
+    type: CATEGORICAL
+  name: gat
 trainer:
-  - parameterName: max_epoch
-    type: INTEGER
-    maxValue: 300
+  hp_space:
+  - maxValue: 300
     minValue: 100
+    parameterName: max_epoch
     scalingType: LINEAR
-  
-  - parameterName: early_stopping_round
     type: INTEGER
-    maxValue: 30
+  - maxValue: 30
     minValue: 10
+    parameterName: early_stopping_round
     scalingType: LINEAR
-
-  - parameterName: lr
-    type: DOUBLE
-    maxValue: 0.05
+    type: INTEGER
+  - maxValue: 0.05
     minValue: 0.01
+    parameterName: lr
     scalingType: LOG
-  
-  - parameterName: weight_decay
     type: DOUBLE
-    maxValue: 0.001
+  - maxValue: 0.001
     minValue: 0.0001
+    parameterName: weight_decay
     scalingType: LOG
-
-hpo:
-  name: random
-  max_evals: 10
-
-ensemble:
-  name: ~
\ No newline at end of file
+    type: DOUBLE
diff --git a/configs/nodeclf_gcn.yaml b/configs/nodeclf_gcn.yaml
index e8e2345..0f80b5b 100644
--- a/configs/nodeclf_gcn.yaml
+++ b/configs/nodeclf_gcn.yaml
@@ -1,64 +1,61 @@
+ensemble:
+  name: null
 feature:
-  - name: ~ # ~ means None
-
+- name: null
+hpo:
+  max_evals: 10
+  name: random
 models:
-  gcn:
-    - parameterName: num_layers
-      type: DISCRETE
-      feasiblePoints: '2'
-    
-    - parameterName: hidden
-      type: NUMERICAL_LIST
-      numericalType: INTEGER
-      length: 2
-      minValue: [16, 16]
-      maxValue: [64, 64]
-      cutPara: ["num_layers"]
-      cutFunc: "lambda x:x[0] - 1"
-      scalingType: LOG
-    
-    - parameterName: dropout
-      type: DOUBLE
-      maxValue: 0.8
-      minValue: 0.2
-      scalingType: LINEAR
-    
-    - parameterName: act
-      type: CATEGORICAL
-      feasiblePoints:
-        - leaky_relu
-        - relu
-        - elu
-        - tanh
-
+- hp_space:
+  - feasiblePoints: '2'
+    parameterName: num_layers
+    type: DISCRETE
+  - cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 2
+    maxValue:
+    - 64
+    - 64
+    minValue:
+    - 16
+    - 16
+    numericalType: INTEGER
+    parameterName: hidden
+    scalingType: LOG
+    type: NUMERICAL_LIST
+  - maxValue: 0.8
+    minValue: 0.2
+    parameterName: dropout
+    scalingType: LINEAR
+    type: DOUBLE
+  - feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    parameterName: act
+    type: CATEGORICAL
+  name: gcn
 trainer:
-  - parameterName: max_epoch
-    type: INTEGER
-    maxValue: 300
+  hp_space:
+  - maxValue: 300
     minValue: 100
+    parameterName: max_epoch
     scalingType: LINEAR
-  
-  - parameterName: early_stopping_round
     type: INTEGER
-    maxValue: 30
+  - maxValue: 30
     minValue: 10
+    parameterName: early_stopping_round
     scalingType: LINEAR
-
-  - parameterName: lr
-    type: DOUBLE
-    maxValue: 0.01
+    type: INTEGER
+  - maxValue: 0.01
     minValue: 0.0025
+    parameterName: lr
     scalingType: LOG
-  
-  - parameterName: weight_decay
     type: DOUBLE
-    maxValue: 0.025
+  - maxValue: 0.025
     minValue: 0.0025
+    parameterName: weight_decay
     scalingType: LOG
-
-hpo:
-  name: random
-  max_evals: 10
-
-ensemble:
-  name: ~
\ No newline at end of file
+    type: DOUBLE
diff --git a/configs/nodeclf_gcn_benchmark_large.yml b/configs/nodeclf_gcn_benchmark_large.yml
index 659ca0a..54a72d7 100644
--- a/configs/nodeclf_gcn_benchmark_large.yml
+++ b/configs/nodeclf_gcn_benchmark_large.yml
@@ -1,65 +1,61 @@
-# search space for gcn on amazon_computers amazon_photo coauthor_cs coauthor_physics
+ensemble:
+  name: null
 feature:
-  - name: PYGNormalizeFeatures
-
+- name: PYGNormalizeFeatures
+hpo:
+  max_evals: 10
+  name: random
 models:
-  gcn:
-    - parameterName: num_layers
-      type: DISCRETE
-      feasiblePoints: '2,3'
-    
-    - parameterName: hidden
-      type: NUMERICAL_LIST
-      numericalType: INTEGER
-      length: 2
-      minValue: [32, 32]
-      maxValue: [128, 128]
-      cutPara: ["num_layers"]
-      cutFunc: "lambda x:x[0] - 1"
-      scalingType: LOG
-    
-    - parameterName: dropout
-      type: DOUBLE
-      maxValue: 0.8
-      minValue: 0.2
-      scalingType: LINEAR
-    
-    - parameterName: act
-      type: CATEGORICAL
-      feasiblePoints:
-        - leaky_relu
-        - relu
-        - elu
-        - tanh
-
+- hp_space:
+  - feasiblePoints: 2,3
+    parameterName: num_layers
+    type: DISCRETE
+  - cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 2
+    maxValue:
+    - 128
+    - 128
+    minValue:
+    - 32
+    - 32
+    numericalType: INTEGER
+    parameterName: hidden
+    scalingType: LOG
+    type: NUMERICAL_LIST
+  - maxValue: 0.8
+    minValue: 0.2
+    parameterName: dropout
+    scalingType: LINEAR
+    type: DOUBLE
+  - feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    parameterName: act
+    type: CATEGORICAL
+  name: gcn
 trainer:
-  - parameterName: max_epoch
-    type: INTEGER
-    maxValue: 300
+  hp_space:
+  - maxValue: 300
     minValue: 100
+    parameterName: max_epoch
     scalingType: LINEAR
-  
-  - parameterName: early_stopping_round
     type: INTEGER
-    maxValue: 30
+  - maxValue: 30
     minValue: 10
+    parameterName: early_stopping_round
     scalingType: LINEAR
-
-  - parameterName: lr
-    type: DOUBLE
-    maxValue: 0.05
+    type: INTEGER
+  - maxValue: 0.05
     minValue: 0.01
+    parameterName: lr
     scalingType: LOG
-  
-  - parameterName: weight_decay
     type: DOUBLE
-    maxValue: 0.0005
-    minValue: 0.00005
+  - maxValue: 0.0005
+    minValue: 5.0e-05
+    parameterName: weight_decay
     scalingType: LOG
-
-hpo:
-  name: random
-  max_evals: 10
-
-ensemble:
-  name: ~
\ No newline at end of file
+    type: DOUBLE
diff --git a/configs/nodeclf_gcn_benchmark_small.yml b/configs/nodeclf_gcn_benchmark_small.yml
index c982b36..febc655 100644
--- a/configs/nodeclf_gcn_benchmark_small.yml
+++ b/configs/nodeclf_gcn_benchmark_small.yml
@@ -1,65 +1,59 @@
-# search space for gcn on cora, citeseer, pubmed
+ensemble:
+  name: null
 feature:
-  - name: PYGNormalizeFeatures
-
+- name: PYGNormalizeFeatures
+hpo:
+  max_evals: 10
+  name: random
 models:
-  gcn:
-    - parameterName: num_layers
-      type: DISCRETE
-      feasiblePoints: '2'
-    
-    - parameterName: hidden
-      type: NUMERICAL_LIST
-      numericalType: INTEGER
-      length: 1
-      minValue: [16]
-      maxValue: [64]
-      cutPara: ["num_layers"]
-      cutFunc: "lambda x:x[0] - 1"
-      scalingType: LOG
-    
-    - parameterName: dropout
-      type: DOUBLE
-      maxValue: 0.8
-      minValue: 0.2
-      scalingType: LINEAR
-    
-    - parameterName: act
-      type: CATEGORICAL
-      feasiblePoints:
-        - leaky_relu
-        - relu
-        - elu
-        - tanh
-
+- hp_space:
+  - feasiblePoints: '2'
+    parameterName: num_layers
+    type: DISCRETE
+  - cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 1
+    maxValue:
+    - 64
+    minValue:
+    - 16
+    numericalType: INTEGER
+    parameterName: hidden
+    scalingType: LOG
+    type: NUMERICAL_LIST
+  - maxValue: 0.8
+    minValue: 0.2
+    parameterName: dropout
+    scalingType: LINEAR
+    type: DOUBLE
+  - feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    parameterName: act
+    type: CATEGORICAL
+  name: gcn
 trainer:
-  - parameterName: max_epoch
-    type: INTEGER
-    maxValue: 300
+  hp_space:
+  - maxValue: 300
     minValue: 100
+    parameterName: max_epoch
     scalingType: LINEAR
-  
-  - parameterName: early_stopping_round
     type: INTEGER
-    maxValue: 30
+  - maxValue: 30
     minValue: 10
+    parameterName: early_stopping_round
     scalingType: LINEAR
-
-  - parameterName: lr
-    type: DOUBLE
-    maxValue: 0.05
+    type: INTEGER
+  - maxValue: 0.05
     minValue: 0.005
+    parameterName: lr
     scalingType: LOG
-  
-  - parameterName: weight_decay
     type: DOUBLE
-    maxValue: 0.001
+  - maxValue: 0.001
     minValue: 0.0001
+    parameterName: weight_decay
     scalingType: LOG
-
-hpo:
-  name: random
-  max_evals: 10
-
-ensemble:
-  name: ~
\ No newline at end of file
+    type: DOUBLE
diff --git a/configs/nodeclf_gcn_large.yaml b/configs/nodeclf_gcn_large.yaml
index 5d4e3a4..8303833 100644
--- a/configs/nodeclf_gcn_large.yaml
+++ b/configs/nodeclf_gcn_large.yaml
@@ -1,64 +1,61 @@
+ensemble:
+  name: null
 feature:
-  - name: ~ # ~ means None
-
+- name: null
+hpo:
+  max_evals: 10
+  name: random
 models:
-  gcn:
-    - parameterName: num_layers
-      type: DISCRETE
-      feasiblePoints: '2,3'
-    
-    - parameterName: hidden
-      type: NUMERICAL_LIST
-      numericalType: INTEGER
-      length: 2
-      minValue: [32, 32]
-      maxValue: [128, 128]
-      cutPara: ["num_layers"]
-      cutFunc: "lambda x:x[0] - 1"
-      scalingType: LOG
-    
-    - parameterName: dropout
-      type: DOUBLE
-      maxValue: 0.8
-      minValue: 0.2
-      scalingType: LINEAR
-    
-    - parameterName: act
-      type: CATEGORICAL
-      feasiblePoints:
-        - leaky_relu
-        - relu
-        - elu
-        - tanh
-
+- hp_space:
+  - feasiblePoints: 2,3
+    parameterName: num_layers
+    type: DISCRETE
+  - cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 2
+    maxValue:
+    - 128
+    - 128
+    minValue:
+    - 32
+    - 32
+    numericalType: INTEGER
+    parameterName: hidden
+    scalingType: LOG
+    type: NUMERICAL_LIST
+  - maxValue: 0.8
+    minValue: 0.2
+    parameterName: dropout
+    scalingType: LINEAR
+    type: DOUBLE
+  - feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    parameterName: act
+    type: CATEGORICAL
+  name: gcn
 trainer:
-  - parameterName: max_epoch
-    type: INTEGER
-    maxValue: 300
+  hp_space:
+  - maxValue: 300
     minValue: 100
+    parameterName: max_epoch
     scalingType: LINEAR
-  
-  - parameterName: early_stopping_round
     type: INTEGER
-    maxValue: 30
+  - maxValue: 30
     minValue: 10
+    parameterName: early_stopping_round
     scalingType: LINEAR
-
-  - parameterName: lr
-    type: DOUBLE
-    maxValue: 0.01
+    type: INTEGER
+  - maxValue: 0.01
     minValue: 0.001
+    parameterName: lr
     scalingType: LOG
-  
-  - parameterName: weight_decay
     type: DOUBLE
-    maxValue: 0.01
+  - maxValue: 0.01
     minValue: 0.001
+    parameterName: weight_decay
     scalingType: LOG
-
-hpo:
-  name: random
-  max_evals: 10
-
-ensemble:
-  name: ~
\ No newline at end of file
+    type: DOUBLE
diff --git a/configs/nodeclf_sage_benchmark_large.yml b/configs/nodeclf_sage_benchmark_large.yml
index cb218c8..33833bd 100644
--- a/configs/nodeclf_sage_benchmark_large.yml
+++ b/configs/nodeclf_sage_benchmark_large.yml
@@ -1,70 +1,65 @@
-# search space for graphsage on amazon_computers amazon_photo coauthor_cs coauthor_physics
+ensemble:
+  name: null
 feature:
-  - name: PYGNormalizeFeatures
-
+- name: PYGNormalizeFeatures
+hpo:
+  max_evals: 10
+  name: random
 models:
-  gcn:
-    - parameterName: num_layers
-      type: DISCRETE
-      feasiblePoints: '2,3'
-    
-    - parameterName: hidden
-      type: NUMERICAL_LIST
-      numericalType: INTEGER
-      length: 2
-      minValue: [32,128]
-      maxValue: [32,128]
-      cutPara: ["num_layers"]
-      cutFunc: "lambda x:x[0] - 1"
-      scalingType: LOG
-    
-    - parameterName: dropout
-      type: DOUBLE
-      maxValue: 0.8
-      minValue: 0.2
-      scalingType: LINEAR
-
-    - parameterName: agg, 
-      type: CATEGORICAL,
-      feasiblePoints": 
-        - mean
-    
-    - parameterName: act
-      type: CATEGORICAL
-      feasiblePoints:
-        - leaky_relu
-        - relu
-        - elu
-        - tanh
-
+- hp_space:
+  - feasiblePoints: 2,3
+    parameterName: num_layers
+    type: DISCRETE
+  - cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 2
+    maxValue:
+    - 32
+    - 128
+    minValue:
+    - 32
+    - 128
+    numericalType: INTEGER
+    parameterName: hidden
+    scalingType: LOG
+    type: NUMERICAL_LIST
+  - maxValue: 0.8
+    minValue: 0.2
+    parameterName: dropout
+    scalingType: LINEAR
+    type: DOUBLE
+  - feasiblePoints":
+    - mean
+    parameterName: agg,
+    type: CATEGORICAL,
+  - feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    parameterName: act
+    type: CATEGORICAL
+  name: gcn
 trainer:
-  - parameterName: max_epoch
-    type: INTEGER
-    maxValue: 300
+  hp_space:
+  - maxValue: 300
     minValue: 100
+    parameterName: max_epoch
     scalingType: LINEAR
-  
-  - parameterName: early_stopping_round
     type: INTEGER
-    maxValue: 30
+  - maxValue: 30
     minValue: 10
+    parameterName: early_stopping_round
     scalingType: LINEAR
-
-  - parameterName: lr
-    type: DOUBLE
-    maxValue: 0.05
+    type: INTEGER
+  - maxValue: 0.05
     minValue: 0.01
+    parameterName: lr
     scalingType: LOG
-  
-  - parameterName: weight_decay
     type: DOUBLE
-    maxValue: 0.0005
+  - maxValue: 0.0005
     minValue: 0.0001
+    parameterName: weight_decay
     scalingType: LOG
-
-hpo:
-  name: random
-  max_evals: 10
-
-ensemble:
-  name: ~
\ No newline at end of file
+    type: DOUBLE
diff --git a/configs/nodeclf_sage_benchmark_small.yml b/configs/nodeclf_sage_benchmark_small.yml
index 88f2bb2..2bd0ffe 100644
--- a/configs/nodeclf_sage_benchmark_small.yml
+++ b/configs/nodeclf_sage_benchmark_small.yml
@@ -1,72 +1,67 @@
-# search space for graphsage on cora, citeseer, pubmed
+ensemble:
+  name: null
 feature:
-  - name: PYGNormalizeFeatures
-
+- name: PYGNormalizeFeatures
+hpo:
+  max_evals: 10
+  name: random
 models:
-  gcn:
-    - parameterName: num_layers
-      type: DISCRETE
-      feasiblePoints: '2,3'
-    
-    - parameterName: hidden
-      type: NUMERICAL_LIST
-      numericalType: INTEGER
-      length: 2
-      minValue: [16,64]
-      maxValue: [16,64]
-      cutPara: ["num_layers"]
-      cutFunc: "lambda x:x[0] - 1"
-      scalingType: LOG
-    
-    - parameterName: dropout
-      type: DOUBLE
-      maxValue: 0.8
-      minValue: 0.2
-      scalingType: LINEAR
-
-    - parameterName: agg, 
-      type: CATEGORICAL,
-      feasiblePoints": 
-        - mean
-        - add
-        - max
-    
-    - parameterName: act
-      type: CATEGORICAL
-      feasiblePoints:
-        - leaky_relu
-        - relu
-        - elu
-        - tanh
-
+- hp_space:
+  - feasiblePoints: 2,3
+    parameterName: num_layers
+    type: DISCRETE
+  - cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 2
+    maxValue:
+    - 16
+    - 64
+    minValue:
+    - 16
+    - 64
+    numericalType: INTEGER
+    parameterName: hidden
+    scalingType: LOG
+    type: NUMERICAL_LIST
+  - maxValue: 0.8
+    minValue: 0.2
+    parameterName: dropout
+    scalingType: LINEAR
+    type: DOUBLE
+  - feasiblePoints":
+    - mean
+    - add
+    - max
+    parameterName: agg,
+    type: CATEGORICAL,
+  - feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    parameterName: act
+    type: CATEGORICAL
+  name: gcn
 trainer:
-  - parameterName: max_epoch
-    type: INTEGER
-    maxValue: 300
+  hp_space:
+  - maxValue: 300
     minValue: 100
+    parameterName: max_epoch
     scalingType: LINEAR
-  
-  - parameterName: early_stopping_round
     type: INTEGER
-    maxValue: 30
+  - maxValue: 30
     minValue: 10
+    parameterName: early_stopping_round
     scalingType: LINEAR
-
-  - parameterName: lr
-    type: DOUBLE
-    maxValue: 0.05
+    type: INTEGER
+  - maxValue: 0.05
     minValue: 0.01
+    parameterName: lr
     scalingType: LOG
-  
-  - parameterName: weight_decay
     type: DOUBLE
-    maxValue: 0.001
+  - maxValue: 0.001
     minValue: 0.0001
+    parameterName: weight_decay
     scalingType: LOG
-
-hpo:
-  name: random
-  max_evals: 10
-
-ensemble:
-  name: ~
\ No newline at end of file
+    type: DOUBLE

From 614c9c0b84559a988e150d59b0daa1f49f5fd9f0 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sun, 14 Mar 2021 14:14:42 +0000
Subject: [PATCH 024/144] fix bug in graph trainer

---
 autogl/solver/classifier/graph_classifier.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/autogl/solver/classifier/graph_classifier.py b/autogl/solver/classifier/graph_classifier.py
index cedb2bc..bc5bcdb 100644
--- a/autogl/solver/classifier/graph_classifier.py
+++ b/autogl/solver/classifier/graph_classifier.py
@@ -781,7 +781,9 @@ class AutoGraphClassifier(BaseClassifier):
             default_kwargs["init"] = False
             for i in range(len(model_list)):
                 model = model_list[i]
-                trainer_wrapper = TRAINER_DICT[default_trainer](model=model, **trainer)
+                trainer_wrapper = TRAINER_DICT[default_trainer](
+                    model=model, **default_kwargs
+                )
                 model_list[i] = trainer_wrapper
         elif isinstance(trainer, list):
             # sequential trainer definition

From d94923fb395b07c0a231ab40a9a0059514e6f19a Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Tue, 16 Mar 2021 15:56:23 +0000
Subject: [PATCH 025/144] fix es bug and duplicate_from_hyperparameter

---
 autogl/module/train/graph_classification.py |  9 ++++++---
 autogl/module/train/node_classification.py  | 11 +++++++----
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/autogl/module/train/graph_classification.py b/autogl/module/train/graph_classification.py
index 3dc16e7..87e9aac 100644
--- a/autogl/module/train/graph_classification.py
+++ b/autogl/module/train/graph_classification.py
@@ -72,7 +72,7 @@ class GraphClassificationTrainer(BaseTrainer):
         init=True,
         feval=[Logloss],
         loss="nll_loss",
-        lr_scheduler_type='steplr',
+        lr_scheduler_type=None,
         *args,
         **kwargs
     ):
@@ -93,6 +93,7 @@ class GraphClassificationTrainer(BaseTrainer):
         elif isinstance(model, BaseModel):
             self.model = model
 
+        self.opt_received = optimizer
         if type(optimizer) == str and optimizer.lower() == "adam":
             self.optimizer = torch.optim.Adam
         elif type(optimizer) == str and optimizer.lower() == "sgd":
@@ -270,8 +271,8 @@ class GraphClassificationTrainer(BaseTrainer):
             self.early_stopping(val_loss, self.model.model)
             if self.early_stopping.early_stop:
                 LOGGER.debug("Early stopping at", epoch)
-                self.early_stopping.load_checkpoint(self.model.model)
                 break
+        self.early_stopping.load_checkpoint(self.model.model)
 
     def predict_only(self, loader):
         """
@@ -551,7 +552,7 @@ class GraphClassificationTrainer(BaseTrainer):
             num_features=self.num_features,
             num_classes=self.num_classes,
             num_graph_features=self.num_graph_features,
-            optimizer=self.optimizer,
+            optimizer=self.opt_received,
             lr=hp["lr"],
             max_epoch=hp["max_epoch"],
             batch_size=hp["batch_size"],
@@ -559,6 +560,8 @@ class GraphClassificationTrainer(BaseTrainer):
             weight_decay=hp["weight_decay"],
             device=self.device,
             feval=self.feval,
+            loss=self.loss_type,
+            lr_scheduler_type=self.lr_scheduler_type,
             init=True,
             *self.args,
             **self.kwargs
diff --git a/autogl/module/train/node_classification.py b/autogl/module/train/node_classification.py
index de50ca5..061b2b7 100644
--- a/autogl/module/train/node_classification.py
+++ b/autogl/module/train/node_classification.py
@@ -69,7 +69,7 @@ class NodeClassificationTrainer(BaseTrainer):
         init=True,
         feval=[Logloss],
         loss="nll_loss",
-        lr_scheduler_type='steplr',
+        lr_scheduler_type=None,
         *args,
         **kwargs
     ):
@@ -86,7 +86,8 @@ class NodeClassificationTrainer(BaseTrainer):
             self.model = MODEL_DICT[model](num_features, num_classes, device, init=init)
         elif isinstance(model, BaseModel):
             self.model = model
-
+        
+        self.opt_received = optimizer
         if type(optimizer) == str and optimizer.lower() == "adam":
             self.optimizer = torch.optim.Adam
         elif type(optimizer) == str and optimizer.lower() == "sgd":
@@ -243,8 +244,8 @@ class NodeClassificationTrainer(BaseTrainer):
             self.early_stopping(val_loss, self.model.model)
             if self.early_stopping.early_stop:
                 LOGGER.debug("Early stopping at %d", epoch)
-                self.early_stopping.load_checkpoint(self.model.model)
                 break
+        self.early_stopping.load_checkpoint(self.model.model)
 
     def predict_only(self, data, test_mask=None):
         """
@@ -499,13 +500,15 @@ class NodeClassificationTrainer(BaseTrainer):
             model=model,
             num_features=self.num_features,
             num_classes=self.num_classes,
-            optimizer=self.optimizer,
+            optimizer=self.opt_received,
             lr=hp["lr"],
             max_epoch=hp["max_epoch"],
             early_stopping_round=hp["early_stopping_round"],
             device=self.device,
             weight_decay=hp["weight_decay"],
             feval=self.feval,
+            loss=self.loss_type,
+            lr_scheduler_type=self.lr_scheduler_type,
             init=True,
             *self.args,
             **self.kwargs

From e6d42bbc4f33aa33d82cfa9ccc021e73c392a46b Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Tue, 16 Mar 2021 16:01:20 +0000
Subject: [PATCH 026/144] black format

---
 autogl/module/train/graph_classification.py | 21 ++++++++++++++-------
 autogl/module/train/node_classification.py  | 21 ++++++++++++++-------
 2 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/autogl/module/train/graph_classification.py b/autogl/module/train/graph_classification.py
index 87e9aac..5a10689 100644
--- a/autogl/module/train/graph_classification.py
+++ b/autogl/module/train/graph_classification.py
@@ -1,6 +1,11 @@
 from . import register_trainer, BaseTrainer, Evaluation, EVALUATE_DICT, EarlyStopping
 import torch
-from torch.optim.lr_scheduler import StepLR, MultiStepLR, ExponentialLR, ReduceLROnPlateau
+from torch.optim.lr_scheduler import (
+    StepLR,
+    MultiStepLR,
+    ExponentialLR,
+    ReduceLROnPlateau,
+)
 import torch.nn.functional as F
 from ..model import MODEL_DICT, BaseModel
 from .evaluate import Logloss
@@ -229,14 +234,16 @@ class GraphClassificationTrainer(BaseTrainer):
 
         # scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
         lr_scheduler_type = self.lr_scheduler_type
-        if type(lr_scheduler_type) == str and lr_scheduler_type == 'steplr':
+        if type(lr_scheduler_type) == str and lr_scheduler_type == "steplr":
             scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
-        elif type(lr_scheduler_type) == str and lr_scheduler_type == 'multisteplr':
-            scheduler = MultiStepLR(optimizer, milestones=[30,80], gamma=0.1)
-        elif type(lr_scheduler_type) == str and lr_scheduler_type == 'exponentiallr':
+        elif type(lr_scheduler_type) == str and lr_scheduler_type == "multisteplr":
+            scheduler = MultiStepLR(optimizer, milestones=[30, 80], gamma=0.1)
+        elif type(lr_scheduler_type) == str and lr_scheduler_type == "exponentiallr":
             scheduler = ExponentialLR(optimizer, gamma=0.1)
-        elif type(lr_scheduler_type) == str and lr_scheduler_type == 'reducelronplateau':
-            scheduler = ReduceLROnPlateau(optimizer, 'min')
+        elif (
+            type(lr_scheduler_type) == str and lr_scheduler_type == "reducelronplateau"
+        ):
+            scheduler = ReduceLROnPlateau(optimizer, "min")
         else:
             scheduler = None
 
diff --git a/autogl/module/train/node_classification.py b/autogl/module/train/node_classification.py
index 061b2b7..b5a69cd 100644
--- a/autogl/module/train/node_classification.py
+++ b/autogl/module/train/node_classification.py
@@ -1,6 +1,11 @@
 from . import register_trainer, BaseTrainer, Evaluation, EVALUATE_DICT, EarlyStopping
 import torch
-from torch.optim.lr_scheduler import StepLR, MultiStepLR, ExponentialLR, ReduceLROnPlateau
+from torch.optim.lr_scheduler import (
+    StepLR,
+    MultiStepLR,
+    ExponentialLR,
+    ReduceLROnPlateau,
+)
 import torch.nn.functional as F
 from ..model import MODEL_DICT, BaseModel
 from .evaluate import Logloss, Acc, Auc
@@ -86,7 +91,7 @@ class NodeClassificationTrainer(BaseTrainer):
             self.model = MODEL_DICT[model](num_features, num_classes, device, init=init)
         elif isinstance(model, BaseModel):
             self.model = model
-        
+
         self.opt_received = optimizer
         if type(optimizer) == str and optimizer.lower() == "adam":
             self.optimizer = torch.optim.Adam
@@ -207,14 +212,16 @@ class NodeClassificationTrainer(BaseTrainer):
         )
         # scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
         lr_scheduler_type = self.lr_scheduler_type
-        if type(lr_scheduler_type) == str and lr_scheduler_type == 'steplr':
+        if type(lr_scheduler_type) == str and lr_scheduler_type == "steplr":
             scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
-        elif type(lr_scheduler_type) == str and lr_scheduler_type == 'multisteplr':
+        elif type(lr_scheduler_type) == str and lr_scheduler_type == "multisteplr":
             scheduler = MultiStepLR(optimizer, milestones=[30, 80], gamma=0.1)
-        elif type(lr_scheduler_type) == str and lr_scheduler_type == 'exponentiallr':
+        elif type(lr_scheduler_type) == str and lr_scheduler_type == "exponentiallr":
             scheduler = ExponentialLR(optimizer, gamma=0.1)
-        elif type(lr_scheduler_type) == str and lr_scheduler_type == 'reducelronplateau':
-            scheduler = ReduceLROnPlateau(optimizer, 'min')
+        elif (
+            type(lr_scheduler_type) == str and lr_scheduler_type == "reducelronplateau"
+        ):
+            scheduler = ReduceLROnPlateau(optimizer, "min")
         else:
             scheduler = None
 

From 3badd90bb9b97bec0b1eb1676a6f78248451a80c Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Wed, 17 Mar 2021 15:11:57 +0000
Subject: [PATCH 027/144] change es logic

---
 autogl/module/train/graph_classification.py | 26 +++++++++++----------
 autogl/module/train/node_classification.py  | 26 +++++++++++----------
 2 files changed, 28 insertions(+), 24 deletions(-)

diff --git a/autogl/module/train/graph_classification.py b/autogl/module/train/graph_classification.py
index 5a10689..6f0e3bb 100644
--- a/autogl/module/train/graph_classification.py
+++ b/autogl/module/train/graph_classification.py
@@ -268,18 +268,20 @@ class GraphClassificationTrainer(BaseTrainer):
                     scheduler.step()
             # loss = loss_all / len(train_loader.dataset)
             # train_loss = self.evaluate(train_loader)
-            eval_func = (
-                self.feval if not isinstance(self.feval, list) else self.feval[0]
-            )
-            val_loss = self._evaluate(valid_loader, eval_func) if valid_loader else 0.0
-
-            if eval_func.is_higher_better():
-                val_loss = -val_loss
-            self.early_stopping(val_loss, self.model.model)
-            if self.early_stopping.early_stop:
-                LOGGER.debug("Early stopping at", epoch)
-                break
-        self.early_stopping.load_checkpoint(self.model.model)
+            if valid_loader is not None:
+                eval_func = (
+                    self.feval if not isinstance(self.feval, list) else self.feval[0]
+                )
+                val_loss = self._evaluate(valid_loader, eval_func)
+
+                if eval_func.is_higher_better():
+                    val_loss = -val_loss
+                self.early_stopping(val_loss, self.model.model)
+                if self.early_stopping.early_stop:
+                    LOGGER.debug("Early stopping at", epoch)
+                    break
+        if valid_loader is not None:
+            self.early_stopping.load_checkpoint(self.model.model)
 
     def predict_only(self, loader):
         """
diff --git a/autogl/module/train/node_classification.py b/autogl/module/train/node_classification.py
index b5a69cd..a4994fc 100644
--- a/autogl/module/train/node_classification.py
+++ b/autogl/module/train/node_classification.py
@@ -241,18 +241,20 @@ class NodeClassificationTrainer(BaseTrainer):
             if self.lr_scheduler_type:
                 scheduler.step()
 
-            if type(self.feval) is list:
-                feval = self.feval[0]
-            else:
-                feval = self.feval
-            val_loss = self.evaluate([data], mask=data.val_mask, feval=feval)
-            if feval.is_higher_better() is True:
-                val_loss = -val_loss
-            self.early_stopping(val_loss, self.model.model)
-            if self.early_stopping.early_stop:
-                LOGGER.debug("Early stopping at %d", epoch)
-                break
-        self.early_stopping.load_checkpoint(self.model.model)
+            if hasattr(data, 'val_mask') and data.val_mask is not None:
+                if type(self.feval) is list:
+                    feval = self.feval[0]
+                else:
+                    feval = self.feval
+                val_loss = self.evaluate([data], mask=data.val_mask, feval=feval)
+                if feval.is_higher_better() is True:
+                    val_loss = -val_loss
+                self.early_stopping(val_loss, self.model.model)
+                if self.early_stopping.early_stop:
+                    LOGGER.debug("Early stopping at %d", epoch)
+                    break
+        if hasattr(data, 'val_mask') and data.val_mask is not None:
+            self.early_stopping.load_checkpoint(self.model.model)
 
     def predict_only(self, data, test_mask=None):
         """

From 38ab2b06b95cee26d732f5cd6c15dfddb45599d0 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Mon, 22 Mar 2021 14:42:06 +0000
Subject: [PATCH 028/144] rebase trainer

---
 autogl/module/train/__init__.py               |  21 ++-
 autogl/module/train/base.py                   | 161 ++++++++++++++----
 ...cation.py => graph_classification_full.py} |  58 +++----
 ...ication.py => node_classification_full.py} |  53 +++---
 autogl/solver/base.py                         |   5 +-
 autogl/solver/classifier/graph_classifier.py  |  28 +--
 autogl/solver/classifier/node_classifier.py   |  26 +--
 7 files changed, 221 insertions(+), 131 deletions(-)
 rename autogl/module/train/{graph_classification.py => graph_classification_full.py} (92%)
 rename autogl/module/train/{node_classification.py => node_classification_full.py} (93%)

diff --git a/autogl/module/train/__init__.py b/autogl/module/train/__init__.py
index 87fa030..93cbbf0 100644
--- a/autogl/module/train/__init__.py
+++ b/autogl/module/train/__init__.py
@@ -1,8 +1,14 @@
 import importlib
 import os
-from .base import BaseTrainer, Evaluation, EarlyStopping
 
 TRAINER_DICT = {}
+EVALUATE_DICT = {}
+from .base import (
+    BaseTrainer,
+    Evaluation,
+    BaseNodeClassificationTrainer,
+    BaseGraphClassificationTrainer,
+)
 
 
 def register_trainer(name):
@@ -19,9 +25,6 @@ def register_trainer(name):
     return register_trainer_cls
 
 
-EVALUATE_DICT = {}
-
-
 def register_evaluate(*name):
     def register_evaluate_cls(cls):
         for n in name:
@@ -47,14 +50,16 @@ def get_feval(feval):
     raise ValueError("feval argument of type", type(feval), "is not supported!")
 
 
-from .graph_classification import GraphClassificationTrainer
-from .node_classification import NodeClassificationTrainer
+from .graph_classification_full import GraphClassificationFullTrainer
+from .node_classification_full import NodeClassificationFullTrainer
 from .evaluate import Acc, Auc, Logloss
 
 __all__ = [
     "BaseTrainer",
-    "GraphClassificationTrainer",
-    "NodeClassificationTrainer",
+    "BaseNodeClassificationTrainer",
+    "BaseGraphClassificationTrainer",
+    "GraphClassificationFullTrainer",
+    "NodeClassificationFullTrainer",
     "Evaluation",
     "Acc",
     "Auc",
diff --git a/autogl/module/train/base.py b/autogl/module/train/base.py
index b765d80..b0ad872 100644
--- a/autogl/module/train/base.py
+++ b/autogl/module/train/base.py
@@ -1,12 +1,25 @@
 import numpy as np
 from typing import Union, Iterable
-from ..model import BaseModel
+
+import torch
+from ..model import BaseModel, MODEL_DICT
 import pickle
 from ...utils import get_logger
+from . import EVALUATE_DICT
 
 LOGGER_ES = get_logger("early-stopping")
 
 
+def get_feval(feval):
+    if isinstance(feval, str):
+        return EVALUATE_DICT[feval]
+    if isinstance(feval, type) and issubclass(feval, Evaluation):
+        return feval
+    if isinstance(feval, list):
+        return [get_feval(f) for f in feval]
+    raise ValueError("feval argument of type", type(feval), "is not supported!")
+
+
 class EarlyStopping:
     """Early stops the training if validation loss doesn't improve after a given patience."""
 
@@ -81,17 +94,11 @@ class EarlyStopping:
 class BaseTrainer:
     def __init__(
         self,
-        model: Union[BaseModel, str],
-        optimizer=None,
-        lr=None,
-        max_epoch=None,
-        early_stopping_round=None,
-        device=None,
+        model: BaseModel,
+        device: Union[torch.device, str],
         init=True,
         feval=["acc"],
         loss="nll_loss",
-        *args,
-        **kwargs,
     ):
         """
         The basic trainer.
@@ -103,29 +110,26 @@ class BaseTrainer:
         model: `BaseModel` or `str`
             The (name of) model used to train and predict.
 
-        optimizer: `Optimizer` of `str`
-            The (name of) optimizer used to train and predict.
-
-        lr: `float`
-            The learning rate.
-
-        max_epoch: `int`
-            The max number of epochs in training.
-
-        early_stopping_round: `int`
-            The round of early stop.
-
-        device: `torch.device` or `str`
-            The device where model will be running on.
-
         init: `bool`
             If True(False), the model will (not) be initialized.
+        """
+        super().__init__()
+        self.model = model
+        self.to(device)
+        self.init = init
+        self.feval = get_feval(feval)
+        self.loss = loss
 
-        args: Other parameters.
+    def to(self, device):
+        """
+        Migrate trainer to new device
 
-        kwargs: Other parameters.
+        Parameters
+        ----------
+        device: `str` or `torch.device`
+            The device this trainer will use
         """
-        super().__init__()
+        self.device = torch.device(device)
 
     def initialize(self):
         """Initialize the auto model in trainer."""
@@ -169,8 +173,8 @@ class BaseTrainer:
 
     @classmethod
     def load(cls, path):
-        with open(path, "rb") as input:
-            instance = pickle.load(input)
+        with open(path, "rb") as inputs:
+            instance = pickle.load(inputs)
             return instance
 
     @property
@@ -279,7 +283,21 @@ class BaseTrainer:
 
     def set_feval(self, feval):
         """Set the evaluation metrics."""
-        raise NotImplementedError()
+        self.feval = get_feval(feval)
+
+    def update_parameters(self, **kwargs):
+        """
+        Update parameters of this trainer
+        """
+        for k, v in kwargs.items():
+            if k == "feval":
+                self.set_feval(v)
+            elif k == "device":
+                self.to(v)
+            elif hasattr(self, k):
+                setattr(self, k, v)
+            else:
+                raise KeyError("Cannot set parameter", k, "for trainer", self.__class__)
 
 
 # a static class for evaluating results
@@ -296,7 +314,7 @@ class Evaluation:
         """
         Should return whether this evaluation method is higher better (bool)
         """
-        raise True
+        return True
 
     @staticmethod
     def evaluate(predict, label):
@@ -304,3 +322,84 @@ class Evaluation:
         Should return: the evaluation result (float)
         """
         raise NotImplementedError()
+
+
+class BaseNodeClassificationTrainer(BaseTrainer):
+    def __init__(
+        self,
+        model: Union[BaseModel, str],
+        num_features,
+        num_classes,
+        device="auto",
+        init=True,
+        feval=["acc"],
+        loss="nll_loss",
+    ):
+        self.num_features = num_features
+        self.num_classes = num_classes
+        device = (
+            torch.device("cuda" if torch.cuda.is_available() else "cpu")
+            if device == "auto"
+            else torch.device(device)
+        )
+        if isinstance(model, str):
+            assert model in MODEL_DICT, "Cannot parse model name " + model
+            self.model = MODEL_DICT[model](num_features, num_classes, device, init=init)
+        elif isinstance(model, BaseModel):
+            self.model = model
+        else:
+            raise TypeError(
+                "Model argument only support str or BaseModel, get",
+                type(model),
+                "instead.",
+            )
+        super().__init__(model, device=device, init=init, feval=feval, loss=loss)
+
+    @classmethod
+    def get_task_name(cls):
+        return "GraphClassification"
+
+
+class BaseGraphClassificationTrainer(BaseTrainer):
+    def __init__(
+        self,
+        model: Union[BaseModel, str],
+        num_features,
+        num_classes,
+        num_graph_features=0,
+        device=None,
+        init=True,
+        feval=["acc"],
+        loss="nll_loss",
+    ):
+        self.num_features = num_features
+        self.num_classes = num_classes
+        self.num_graph_features = num_graph_features
+        device = (
+            torch.device("cuda" if torch.cuda.is_available() else "cpu")
+            if device == "auto"
+            else torch.device(device)
+        )
+        if isinstance(model, str):
+            assert model in MODEL_DICT, "Cannot parse model name " + model
+            self.model = MODEL_DICT[model](
+                num_features,
+                num_classes,
+                device,
+                init=init,
+                num_graph_features=num_graph_features,
+            )
+        elif isinstance(model, BaseModel):
+            self.model = model
+        else:
+            raise TypeError(
+                "Model argument only support str or BaseModel, get",
+                type(model),
+                "instead.",
+            )
+
+        super().__init__(model, device=device, init=init, feval=feval, loss=loss)
+
+    @classmethod
+    def get_task_name(cls):
+        return "NodeClassification"
diff --git a/autogl/module/train/graph_classification.py b/autogl/module/train/graph_classification_full.py
similarity index 92%
rename from autogl/module/train/graph_classification.py
rename to autogl/module/train/graph_classification_full.py
index 6f0e3bb..ac3a05d 100644
--- a/autogl/module/train/graph_classification.py
+++ b/autogl/module/train/graph_classification_full.py
@@ -1,4 +1,5 @@
-from . import register_trainer, BaseTrainer, Evaluation, EVALUATE_DICT, EarlyStopping
+from . import register_trainer, EVALUATE_DICT
+from .base import BaseGraphClassificationTrainer, EarlyStopping, Evaluation
 import torch
 from torch.optim.lr_scheduler import (
     StepLR,
@@ -28,8 +29,8 @@ def get_feval(feval):
     raise ValueError("feval argument of type", type(feval), "is not supported!")
 
 
-@register_trainer("GraphClassification")
-class GraphClassificationTrainer(BaseTrainer):
+@register_trainer("GraphClassificationFull")
+class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
     """
     The graph classification trainer.
 
@@ -73,7 +74,7 @@ class GraphClassificationTrainer(BaseTrainer):
         batch_size=None,
         early_stopping_round=7,
         weight_decay=1e-4,
-        device=None,
+        device="auto",
         init=True,
         feval=[Logloss],
         loss="nll_loss",
@@ -81,22 +82,16 @@ class GraphClassificationTrainer(BaseTrainer):
         *args,
         **kwargs
     ):
-        super(GraphClassificationTrainer, self).__init__(model)
-
-        self.loss_type = loss
-
-        # init model
-        if isinstance(model, str):
-            assert model in MODEL_DICT, "Cannot parse model name " + model
-            self.model = MODEL_DICT[model](
-                num_features,
-                num_classes,
-                device,
-                init=init,
-                num_graph_features=num_graph_features,
-            )
-        elif isinstance(model, BaseModel):
-            self.model = model
+        super().__init__(
+            model,
+            num_features,
+            num_classes,
+            num_graph_features=num_graph_features,
+            device=device,
+            init=init,
+            feval=feval,
+            loss=loss,
+        )
 
         self.opt_received = optimizer
         if type(optimizer) == str and optimizer.lower() == "adam":
@@ -108,9 +103,6 @@ class GraphClassificationTrainer(BaseTrainer):
 
         self.lr_scheduler_type = lr_scheduler_type
 
-        self.num_features = num_features
-        self.num_classes = num_classes
-        self.num_graph_features = num_graph_features
         self.lr = lr if lr is not None else 1e-4
         self.max_epoch = max_epoch if max_epoch is not None else 100
         self.batch_size = batch_size if batch_size is not None else 64
@@ -135,8 +127,6 @@ class GraphClassificationTrainer(BaseTrainer):
         self.valid_score = None
 
         self.initialized = False
-        self.num_features = num_features
-        self.num_classes = num_classes
         self.device = device
 
         self.space = [
@@ -176,8 +166,6 @@ class GraphClassificationTrainer(BaseTrainer):
                 "scalingType": "LOG",
             },
         ]
-        # self.space += self.model.space
-        GraphClassificationTrainer.space = self.space
 
         self.hyperparams = {
             "max_epoch": self.max_epoch,
@@ -186,7 +174,6 @@ class GraphClassificationTrainer(BaseTrainer):
             "lr": self.lr,
             "weight_decay": self.weight_decay,
         }
-        self.hyperparams = {**self.hyperparams, **self.model.get_hyper_parameter()}
 
         if init is True:
             self.initialize()
@@ -207,9 +194,9 @@ class GraphClassificationTrainer(BaseTrainer):
         # """Get task name, i.e., `GraphClassification`."""
         return "GraphClassification"
 
-    def to(self, new_device):
-        assert isinstance(new_device, torch.device)
-        self.device = new_device
+    def to(self, device):
+        assert isinstance(device, torch.device)
+        self.device = device
         if self.model is not None:
             self.model.to(self.device)
 
@@ -255,11 +242,11 @@ class GraphClassificationTrainer(BaseTrainer):
                 optimizer.zero_grad()
                 output = self.model.model(data)
                 # loss = F.nll_loss(output, data.y)
-                if hasattr(F, self.loss_type):
-                    loss = getattr(F, self.loss_type)(output, data.y)
+                if hasattr(F, self.loss):
+                    loss = getattr(F, self.loss)(output, data.y)
                 else:
                     raise TypeError(
-                        "PyTorch does not support loss type {}".format(self.loss_type)
+                        "PyTorch does not support loss type {}".format(self.loss)
                     )
                 loss.backward()
                 loss_all += data.num_graphs * loss.item()
@@ -569,7 +556,7 @@ class GraphClassificationTrainer(BaseTrainer):
             weight_decay=hp["weight_decay"],
             device=self.device,
             feval=self.feval,
-            loss=self.loss_type,
+            loss=self.loss,
             lr_scheduler_type=self.lr_scheduler_type,
             init=True,
             *self.args,
@@ -591,7 +578,6 @@ class GraphClassificationTrainer(BaseTrainer):
     def hyper_parameter_space(self, space):
         # """Set the space of hyperparameter."""
         self.space = space
-        GraphClassificationTrainer.space = space
 
     def get_hyper_parameter(self):
         # """Get the hyperparameter in this trainer."""
diff --git a/autogl/module/train/node_classification.py b/autogl/module/train/node_classification_full.py
similarity index 93%
rename from autogl/module/train/node_classification.py
rename to autogl/module/train/node_classification_full.py
index a4994fc..361a391 100644
--- a/autogl/module/train/node_classification.py
+++ b/autogl/module/train/node_classification_full.py
@@ -1,4 +1,10 @@
-from . import register_trainer, BaseTrainer, Evaluation, EVALUATE_DICT, EarlyStopping
+"""
+Node classification Full Trainer Implementation
+"""
+
+from . import register_trainer, EVALUATE_DICT
+
+from .base import BaseNodeClassificationTrainer, EarlyStopping, Evaluation
 import torch
 from torch.optim.lr_scheduler import (
     StepLR,
@@ -27,8 +33,8 @@ def get_feval(feval):
     raise ValueError("feval argument of type", type(feval), "is not supported!")
 
 
-@register_trainer("NodeClassification")
-class NodeClassificationTrainer(BaseTrainer):
+@register_trainer("NodeClassificationFull")
+class NodeClassificationFullTrainer(BaseNodeClassificationTrainer):
     """
     The node classification trainer.
 
@@ -58,8 +64,6 @@ class NodeClassificationTrainer(BaseTrainer):
         If True(False), the model will (not) be initialized.
     """
 
-    space = None
-
     def __init__(
         self,
         model: Union[BaseModel, str],
@@ -70,7 +74,7 @@ class NodeClassificationTrainer(BaseTrainer):
         max_epoch=None,
         early_stopping_round=None,
         weight_decay=1e-4,
-        device=None,
+        device="auto",
         init=True,
         feval=[Logloss],
         loss="nll_loss",
@@ -78,12 +82,15 @@ class NodeClassificationTrainer(BaseTrainer):
         *args,
         **kwargs
     ):
-        super(NodeClassificationTrainer, self).__init__(model)
-
-        self.loss_type = loss
-
-        if device is None:
-            device = "cpu"
+        super().__init__(
+            model,
+            num_features,
+            num_classes,
+            device=device,
+            init=init,
+            feval=feval,
+            loss=loss,
+        )
 
         # init model
         if isinstance(model, str):
@@ -102,14 +109,11 @@ class NodeClassificationTrainer(BaseTrainer):
 
         self.lr_scheduler_type = lr_scheduler_type
 
-        self.num_features = num_features
-        self.num_classes = num_classes
         self.lr = lr if lr is not None else 1e-4
         self.max_epoch = max_epoch if max_epoch is not None else 100
         self.early_stopping_round = (
             early_stopping_round if early_stopping_round is not None else 100
         )
-        self.device = device
         self.args = args
         self.kwargs = kwargs
 
@@ -126,9 +130,6 @@ class NodeClassificationTrainer(BaseTrainer):
         self.valid_score = None
 
         self.initialized = False
-        self.num_features = num_features
-        self.num_classes = num_classes
-        self.device = device
 
         self.space = [
             {
@@ -160,8 +161,6 @@ class NodeClassificationTrainer(BaseTrainer):
                 "scalingType": "LOG",
             },
         ]
-        # self.space += self.model.space
-        NodeClassificationTrainer.space = self.space
 
         self.hyperparams = {
             "max_epoch": self.max_epoch,
@@ -169,7 +168,6 @@ class NodeClassificationTrainer(BaseTrainer):
             "lr": self.lr,
             "weight_decay": self.weight_decay,
         }
-        self.hyperparams = {**self.hyperparams, **self.model.get_hyper_parameter()}
 
         if init is True:
             self.initialize()
@@ -229,11 +227,11 @@ class NodeClassificationTrainer(BaseTrainer):
             self.model.model.train()
             optimizer.zero_grad()
             res = self.model.model.forward(data)
-            if hasattr(F, self.loss_type):
-                loss = getattr(F, self.loss_type)(res[mask], data.y[mask])
+            if hasattr(F, self.loss):
+                loss = getattr(F, self.loss)(res[mask], data.y[mask])
             else:
                 raise TypeError(
-                    "PyTorch does not support loss type {}".format(self.loss_type)
+                    "PyTorch does not support loss type {}".format(self.loss)
                 )
 
             loss.backward()
@@ -241,7 +239,7 @@ class NodeClassificationTrainer(BaseTrainer):
             if self.lr_scheduler_type:
                 scheduler.step()
 
-            if hasattr(data, 'val_mask') and data.val_mask is not None:
+            if hasattr(data, "val_mask") and data.val_mask is not None:
                 if type(self.feval) is list:
                     feval = self.feval[0]
                 else:
@@ -253,7 +251,7 @@ class NodeClassificationTrainer(BaseTrainer):
                 if self.early_stopping.early_stop:
                     LOGGER.debug("Early stopping at %d", epoch)
                     break
-        if hasattr(data, 'val_mask') and data.val_mask is not None:
+        if hasattr(data, "val_mask") and data.val_mask is not None:
             self.early_stopping.load_checkpoint(self.model.model)
 
     def predict_only(self, data, test_mask=None):
@@ -516,7 +514,7 @@ class NodeClassificationTrainer(BaseTrainer):
             device=self.device,
             weight_decay=hp["weight_decay"],
             feval=self.feval,
-            loss=self.loss_type,
+            loss=self.loss,
             lr_scheduler_type=self.lr_scheduler_type,
             init=True,
             *self.args,
@@ -538,7 +536,6 @@ class NodeClassificationTrainer(BaseTrainer):
     def hyper_parameter_space(self, space):
         # """Set the space of hyperparameter."""
         self.space = space
-        NodeClassificationTrainer.space = space
 
     def get_hyper_parameter(self):
         # """Get the hyperparameter in this trainer."""
diff --git a/autogl/solver/base.py b/autogl/solver/base.py
index 421306f..94e7c0a 100644
--- a/autogl/solver/base.py
+++ b/autogl/solver/base.py
@@ -11,7 +11,6 @@ import torch
 from ..module.feature import FEATURE_DICT
 from ..module.hpo import HPO_DICT
 from ..module.model import MODEL_DICT
-from ..module.train import NodeClassificationTrainer
 from ..module import BaseFeatureAtom, BaseHPOptimizer, BaseTrainer
 from .utils import Leaderboard
 from ..utils import get_logger
@@ -336,7 +335,7 @@ class BaseSolver:
         assert name in self.trained_models, "cannot find model by name" + name
         return self.trained_models[name]
 
-    def get_model_by_performance(self, index) -> Tuple[NodeClassificationTrainer, str]:
+    def get_model_by_performance(self, index) -> Tuple[BaseTrainer, str]:
         r"""
         Find and get the model instance by performance.
 
@@ -347,7 +346,7 @@ class BaseSolver:
 
         Returns
         -------
-        trainer: autogl.module.train.NodeClassificationTrainer
+        trainer: autogl.module.train.BaseTrainer
             A trainer instance containing the trained models and training status.
         name: str
             The name of current trainer.
diff --git a/autogl/solver/classifier/graph_classifier.py b/autogl/solver/classifier/graph_classifier.py
index bc5bcdb..82c5254 100644
--- a/autogl/solver/classifier/graph_classifier.py
+++ b/autogl/solver/classifier/graph_classifier.py
@@ -13,7 +13,7 @@ import yaml
 from .base import BaseClassifier
 from ...module.feature import FEATURE_DICT
 from ...module.model import BaseModel, MODEL_DICT
-from ...module.train import TRAINER_DICT, get_feval, GraphClassificationTrainer
+from ...module.train import TRAINER_DICT, get_feval, BaseGraphClassificationTrainer
 from ..base import _initialize_single_model, _parse_hp_space
 from ..utils import Leaderboard, set_seed
 from ...datasets import utils
@@ -90,7 +90,7 @@ class AutoGraphClassifier(BaseClassifier):
             hpo_module=hpo_module,
             ensemble_module=ensemble_module,
             max_evals=max_evals,
-            default_trainer=default_trainer or "GraphClassification",
+            default_trainer=default_trainer or "GraphClassificationFull",
             trainer_hp_space=trainer_hp_space,
             model_hp_spaces=model_hp_spaces,
             size=size,
@@ -142,20 +142,22 @@ class AutoGraphClassifier(BaseClassifier):
                     model.set_num_features(num_features)
                     model.set_num_graph_features(num_graph_features)
                     self.graph_model_list.append(model.to(device))
-                elif isinstance(model, GraphClassificationTrainer):
+                elif isinstance(model, BaseGraphClassificationTrainer):
                     # receive a trainer list, put trainer to list
                     assert (
                         model.get_model() is not None
                     ), "Passed trainer should contain a model"
-                    model.set_feval(feval)
-                    model.loss_type = loss
-                    model.to(device)
                     model.model.set_num_classes(num_classes)
                     model.model.set_num_features(num_features)
                     model.model.set_num_graph_features(num_graph_features)
-                    model.num_classes = num_classes
-                    model.num_features = num_features
-                    model.num_graph_features = num_graph_features
+                    model.update_parameters(
+                        num_classes=num_classes,
+                        num_features=num_features,
+                        num_graph_features=num_graph_features,
+                        loss=loss,
+                        feval=feval,
+                        device=device,
+                    )
                     self.graph_model_list.append(model)
                 else:
                     raise KeyError("cannot find graph network %s." % (model))
@@ -171,7 +173,7 @@ class AutoGraphClassifier(BaseClassifier):
             # set model hp space
             if self._model_hp_spaces is not None:
                 if self._model_hp_spaces[i] is not None:
-                    if isinstance(model, GraphClassificationTrainer):
+                    if isinstance(model, BaseGraphClassificationTrainer):
                         model.model.hyper_parameter_space = self._model_hp_spaces[i]
                     else:
                         model.hyper_parameter_space = self._model_hp_spaces[i]
@@ -770,11 +772,11 @@ class AutoGraphClassifier(BaseClassifier):
         ]
 
         trainer = path_or_dict.pop("trainer", None)
-        default_trainer = "GraphClassification"
+        default_trainer = "GraphClassificationFull"
         trainer_space = None
         if isinstance(trainer, dict):
             # global default
-            default_trainer = trainer.pop("name", "GraphClassification")
+            default_trainer = trainer.pop("name", "GraphClassificationFull")
             trainer_space = _parse_hp_space(trainer.pop("hp_space", None))
             default_kwargs = {"num_features": None, "num_classes": None}
             default_kwargs.update(trainer)
@@ -793,7 +795,7 @@ class AutoGraphClassifier(BaseClassifier):
             trainer_space = []
             for i in range(len(model_list)):
                 train, model = trainer[i], model_list[i]
-                default_trainer = train.pop("name", "GraphClassification")
+                default_trainer = train.pop("name", "GraphClassificationFull")
                 trainer_space.append(_parse_hp_space(train.pop("hp_space", None)))
                 default_kwargs = {"num_features": None, "num_classes": None}
                 default_kwargs.update(train)
diff --git a/autogl/solver/classifier/node_classifier.py b/autogl/solver/classifier/node_classifier.py
index 4fb37a0..20b915c 100644
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -14,7 +14,7 @@ from .base import BaseClassifier
 from ..base import _parse_hp_space, _initialize_single_model
 from ...module.feature import FEATURE_DICT
 from ...module.model import MODEL_DICT, BaseModel
-from ...module.train import TRAINER_DICT, NodeClassificationTrainer
+from ...module.train import TRAINER_DICT, BaseNodeClassificationTrainer
 from ...module.train import get_feval
 from ..utils import Leaderboard, set_seed
 from ...datasets import utils
@@ -92,7 +92,7 @@ class AutoNodeClassifier(BaseClassifier):
             hpo_module=hpo_module,
             ensemble_module=ensemble_module,
             max_evals=max_evals,
-            default_trainer=default_trainer or "NodeClassification",
+            default_trainer=default_trainer or "NodeClassificationFull",
             trainer_hp_space=trainer_hp_space,
             model_hp_spaces=model_hp_spaces,
             size=size,
@@ -135,18 +135,20 @@ class AutoNodeClassifier(BaseClassifier):
                     model.set_num_classes(num_classes)
                     model.set_num_features(num_features)
                     self.graph_model_list.append(model.to(device))
-                elif isinstance(model, NodeClassificationTrainer):
+                elif isinstance(model, BaseNodeClassificationTrainer):
                     # receive a trainer list, put trainer to list
                     assert (
                         model.get_model() is not None
                     ), "Passed trainer should contain a model"
-                    model.set_feval(feval)
-                    model.loss_type = loss
-                    model.to(device)
                     model.model.set_num_classes(num_classes)
                     model.model.set_num_features(num_features)
-                    model.num_classes = num_classes
-                    model.num_features = num_features
+                    model.update_parameters(
+                        num_classes=num_classes,
+                        num_features=num_features,
+                        loss=loss,
+                        feval=feval,
+                        device=device,
+                    )
                     self.graph_model_list.append(model)
                 else:
                     raise KeyError("cannot find graph network %s." % (model))
@@ -162,7 +164,7 @@ class AutoNodeClassifier(BaseClassifier):
             # set model hp space
             if self._model_hp_spaces is not None:
                 if self._model_hp_spaces[i] is not None:
-                    if isinstance(model, NodeClassificationTrainer):
+                    if isinstance(model, BaseNodeClassificationTrainer):
                         model.model.hyper_parameter_space = self._model_hp_spaces[i]
                     else:
                         model.hyper_parameter_space = self._model_hp_spaces[i]
@@ -689,11 +691,11 @@ class AutoNodeClassifier(BaseClassifier):
         ]
 
         trainer = path_or_dict.pop("trainer", None)
-        default_trainer = "NodeClassification"
+        default_trainer = "NodeClassificationFull"
         trainer_space = None
         if isinstance(trainer, dict):
             # global default
-            default_trainer = trainer.pop("name", "NodeClassification")
+            default_trainer = trainer.pop("name", "NodeClassificationFull")
             trainer_space = _parse_hp_space(trainer.pop("hp_space", None))
             default_kwargs = {"num_features": None, "num_classes": None}
             default_kwargs.update(trainer)
@@ -712,7 +714,7 @@ class AutoNodeClassifier(BaseClassifier):
             trainer_space = []
             for i in range(len(model_list)):
                 train, model = trainer[i], model_list[i]
-                default_trainer = train.pop("name", "NodeClassification")
+                default_trainer = train.pop("name", "NodeClassificationFull")
                 trainer_space.append(_parse_hp_space(train.pop("hp_space", None)))
                 default_kwargs = {"num_features": None, "num_classes": None}
                 default_kwargs.update(train)

From 62ee4808e6b4032f9fc8df3abeae8f41370f0be8 Mon Sep 17 00:00:00 2001
From: cluster32 <general502570@outlook.com>
Date: Wed, 24 Mar 2021 11:37:30 +0800
Subject: [PATCH 029/144] test nas

---
 autogl/module/hpo/__init__.py   |   2 +
 autogl/module/hpo/darts.py      | 184 ++++++++++++++++++++++++++++++++
 autogl/module/hpo/nas.py        |  46 ++++++++
 autogl/module/hpo/test.py       |  33 ++++++
 autogl/module/hpo/utils.py      | 182 +++++++++++++++++++++++++++++++
 examples/node_classification.py |   2 +-
 6 files changed, 448 insertions(+), 1 deletion(-)
 create mode 100644 autogl/module/hpo/darts.py
 create mode 100644 autogl/module/hpo/nas.py
 create mode 100644 autogl/module/hpo/test.py
 create mode 100644 autogl/module/hpo/utils.py

diff --git a/autogl/module/hpo/__init__.py b/autogl/module/hpo/__init__.py
index e8fe41a..cd4bbba 100644
--- a/autogl/module/hpo/__init__.py
+++ b/autogl/module/hpo/__init__.py
@@ -28,6 +28,7 @@ from .mocmaes_advisorchoco import MocmaesAdvisorChoco
 from .quasi_advisorchoco import QuasiAdvisorChoco
 from .rand_advisor import RandAdvisor
 from .tpe_advisorhpo import TpeAdvisorHPO
+from .test import TestHPO
 
 
 def build_hpo_from_name(name: str) -> BaseHPOptimizer:
@@ -62,5 +63,6 @@ __all__ = [
     "QuasiAdvisorChoco",
     "RandAdvisor",
     "TpeAdvisorHPO",
+    "TestHPO",
     "build_hpo_from_name",
 ]
diff --git a/autogl/module/hpo/darts.py b/autogl/module/hpo/darts.py
new file mode 100644
index 0000000..6ac4b86
--- /dev/null
+++ b/autogl/module/hpo/darts.py
@@ -0,0 +1,184 @@
+# Modified from NNI
+
+import copy
+import logging
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .nas import BaseNAS
+from .utils import AverageMeterGroup, replace_layer_choice, replace_input_choice
+
+
+_logger = logging.getLogger(__name__)
+
+
+class DartsLayerChoice(nn.Module):
+    def __init__(self, layer_choice):
+        super(DartsLayerChoice, self).__init__()
+        self.name = layer_choice.key
+        self.op_choices = nn.ModuleDict(layer_choice.named_children())
+        self.alpha = nn.Parameter(torch.randn(len(self.op_choices)) * 1e-3)
+
+    def forward(self, *args, **kwargs):
+        op_results = torch.stack([op(*args, **kwargs) for op in self.op_choices.values()])
+        alpha_shape = [-1] + [1] * (len(op_results.size()) - 1)
+        return torch.sum(op_results * F.softmax(self.alpha, -1).view(*alpha_shape), 0)
+
+    def parameters(self):
+        for _, p in self.named_parameters():
+            yield p
+
+    def named_parameters(self):
+        for name, p in super(DartsLayerChoice, self).named_parameters():
+            if name == 'alpha':
+                continue
+            yield name, p
+
+    def export(self):
+        return torch.argmax(self.alpha).item()
+
+
+class DartsInputChoice(nn.Module):
+    def __init__(self, input_choice):
+        super(DartsInputChoice, self).__init__()
+        self.name = input_choice.key
+        self.alpha = nn.Parameter(torch.randn(input_choice.n_candidates) * 1e-3)
+        self.n_chosen = input_choice.n_chosen or 1
+
+    def forward(self, inputs):
+        inputs = torch.stack(inputs)
+        alpha_shape = [-1] + [1] * (len(inputs.size()) - 1)
+        return torch.sum(inputs * F.softmax(self.alpha, -1).view(*alpha_shape), 0)
+
+    def parameters(self):
+        for _, p in self.named_parameters():
+            yield p
+
+    def named_parameters(self):
+        for name, p in super(DartsInputChoice, self).named_parameters():
+            if name == 'alpha':
+                continue
+            yield name, p
+
+    def export(self):
+        return torch.argsort(-self.alpha).cpu().numpy().tolist()[:self.n_chosen]
+
+
+class DartsTrainer(BaseNAS):
+    """
+    DARTS trainer.
+
+    Parameters
+    ----------
+    model : nn.Module
+        PyTorch model to be trained.
+    loss : callable
+        Receives logits and ground truth label, return a loss tensor.
+    metrics : callable
+        Receives logits and ground truth label, return a dict of metrics.
+    optimizer : Optimizer
+        The optimizer used for optimizing the model.
+    num_epochs : int
+        Number of epochs planned for training.
+    dataset : Dataset
+        Dataset for training. Will be split for training weights and architecture weights.
+    grad_clip : float
+        Gradient clipping. Set to 0 to disable. Default: 5.
+    learning_rate : float
+        Learning rate to optimize the model.
+    batch_size : int
+        Batch size.
+    workers : int
+        Workers for data loading.
+    device : torch.device
+        ``torch.device("cpu")`` or ``torch.device("cuda")``.
+    log_frequency : int
+        Step count per logging.
+    arc_learning_rate : float
+        Learning rate of architecture parameters.
+    unrolled : float
+        ``True`` if using second order optimization, else first order optimization.
+    """
+
+    """def __init__(self, model, loss, metrics, optimizer,
+                 num_epochs, dataset, grad_clip=5.,
+                 learning_rate=2.5E-3, batch_size=64, workers=4,
+                 device=None, log_frequency=None,
+                 arc_learning_rate=3.0E-4, unrolled=False):"""
+    def __init__(self, *args, **kwargs):
+        self.num_epochs = kwargs.get("num_epochs", 5)
+        self.workers = 4
+        self.device = "cuda"
+        self.log_frequency = None
+
+        #for _, module in self.nas_modules:
+        #    module.to(self.device)
+
+        # use the same architecture weight for modules with duplicated names
+
+    def search(self, space, dset, trainer):
+        """
+        main process
+        """
+        self.model = space
+        self.dataset = dset
+        self.trainer = trainer
+        self.model_optim = torch.optim.SGD(
+            self.model.parameters(), lr=0.01, weight_decay=3e-4
+        )
+
+        self.nas_modules = []
+        replace_layer_choice(self.model, DartsLayerChoice, self.nas_modules)
+        replace_input_choice(self.model, DartsInputChoice, self.nas_modules)
+
+        ctrl_params = {}
+        for _, m in self.nas_modules:
+            if m.name in ctrl_params:
+                assert m.alpha.size() == ctrl_params[m.name].size(), 'Size of parameters with the same label should be same.'
+                m.alpha = ctrl_params[m.name]
+            else:
+                ctrl_params[m.name] = m.alpha
+        self.ctrl_optim = torch.optim.Adam(list(ctrl_params.values()), 3e-4, betas=(0.5, 0.999),
+                                           weight_decay=1.0E-3)
+        self.grad_clip = 5.
+
+        for step in range(self.num_epochs):
+            self._train_one_epoch(step)
+            if self.log_frequency is not None and step % self.log_frequency == 0:
+                _logger.info('Epoch [%s/%s] Step [%s/%s]  %s', epoch + 1,
+                                self.num_epochs, step + 1, len(self.train_loader), meters)
+
+        return self.export()
+
+    def _train_one_epoch(self, epoch):
+        self.model.train()
+        meters = AverageMeterGroup()
+
+        # phase 1. architecture step
+        self.ctrl_optim.zero_grad()
+        # only no unroll here
+        _, loss = self._infer()
+        loss.backward()
+        self.ctrl_optim.step()
+
+        # phase 2: child network step
+        self.model_optim.zero_grad()
+        metric, loss = self._infer()
+        loss.backward()
+        if self.grad_clip > 0:
+            nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_clip)  # gradient clipping
+        self.model_optim.step()
+
+    def _infer(self):
+        metric, loss = self.trainer.infer(self.model, self.dataset)
+        return metric, loss
+
+    @torch.no_grad()
+    def export(self):
+        result = dict()
+        for name, module in self.nas_modules:
+            if name not in result:
+                result[name] = module.export()
+        return result
diff --git a/autogl/module/hpo/nas.py b/autogl/module/hpo/nas.py
new file mode 100644
index 0000000..8091d06
--- /dev/null
+++ b/autogl/module/hpo/nas.py
@@ -0,0 +1,46 @@
+from torch_geometric.nn import GCNConv, SAGEConv
+from nni.nas.pytorch import mutables
+import torch.nn as nn
+
+class BaseNAS:
+    def search(self, space, dset, trainer):
+        """
+        The main process of NAS.
+        Parameters
+        ----------
+        space : BaseArchitectureSpace
+            No implementation yet
+        dataset : ...datasets
+            Dataset to train and evaluate.
+        trainer : ..train.BaseTrainer
+            Including model, giving HP space and using for training
+
+        Returns
+        -------
+        model: ..train.BaseTrainer
+            The trainer including the best trained model
+        """
+
+class GraphSpace(nn.Module):
+    def __init__(self, inp, hid, oup):
+        super().__init__()
+        self.gcn = GCNConv(inp, hid)
+        self.op1 = mutables.LayerChoice([GCNConv(inp, hid),SAGEConv(inp, hid)])
+        self.op2 = mutables.LayerChoice([
+            GCNConv(hid, oup),
+            SAGEConv(hid, oup)       
+        ], key = "2")
+
+    def forward(self, data):
+        x = self.op1(data.x, data.edge_index)
+        x = self.op2(x, data.edge_index)
+        return x
+        
+class BaseTrainer:
+    def infer(self, model, dataset):
+        dset = dataset[0]
+        pred = model(dset)[dset.train_mask]
+        y = dset.y[dset.train_mask]
+        loss_func = nn.CrossEntropyLoss()
+        loss = loss_func(pred, y)
+        return loss, loss
\ No newline at end of file
diff --git a/autogl/module/hpo/test.py b/autogl/module/hpo/test.py
new file mode 100644
index 0000000..fdb0a33
--- /dev/null
+++ b/autogl/module/hpo/test.py
@@ -0,0 +1,33 @@
+import hyperopt
+
+from . import register_hpo
+from .nas import BaseTrainer, GraphSpace
+from .darts import DartsTrainer
+from .base import BaseHPOptimizer, TimeTooLimitedError
+
+@register_hpo("test")
+class TestHPO(BaseHPOptimizer):
+    """
+    Random search algorithm in `advisor` package
+    See https://github.com/tobegit3hub/advisor for the package
+    See .advisorbase.AdvisorBaseHPOptimizer for more information
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def optimize(self, trainer, dataset, time_limit=None, memory_limit=None):
+        num_features=dataset[0].x.shape[1]
+        num_classes=dataset.num_classes
+        model = GraphSpace(num_features, 64, num_classes)
+        tr = BaseTrainer()
+        nas = DartsTrainer()
+        a = nas.search(model, dataset, tr)
+        print(a)
+        print(type(a))
+        return 1,2
+
+    @classmethod
+    def build_hpo_from_args(cls, args):
+        """Build a new hpo instance."""
+        return cls(args)
diff --git a/autogl/module/hpo/utils.py b/autogl/module/hpo/utils.py
new file mode 100644
index 0000000..61a4f91
--- /dev/null
+++ b/autogl/module/hpo/utils.py
@@ -0,0 +1,182 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import logging
+from collections import OrderedDict
+
+import numpy as np
+import torch
+import nni.retiarii.nn.pytorch as nn
+from nni.nas.pytorch.mutables import InputChoice, LayerChoice
+
+_logger = logging.getLogger(__name__)
+
+
+def to_device(obj, device):
+    """
+    Move a tensor, tuple, list, or dict onto device.
+    """
+    if torch.is_tensor(obj):
+        return obj.to(device)
+    if isinstance(obj, tuple):
+        return tuple(to_device(t, device) for t in obj)
+    if isinstance(obj, list):
+        return [to_device(t, device) for t in obj]
+    if isinstance(obj, dict):
+        return {k: to_device(v, device) for k, v in obj.items()}
+    if isinstance(obj, (int, float, str)):
+        return obj
+    raise ValueError("'%s' has unsupported type '%s'" % (obj, type(obj)))
+
+
+def to_list(arr):
+    if torch.is_tensor(arr):
+        return arr.cpu().numpy().tolist()
+    if isinstance(arr, np.ndarray):
+        return arr.tolist()
+    if isinstance(arr, (list, tuple)):
+        return list(arr)
+    return arr
+
+
+class AverageMeterGroup:
+    """
+    Average meter group for multiple average meters.
+    """
+
+    def __init__(self):
+        self.meters = OrderedDict()
+
+    def update(self, data):
+        """
+        Update the meter group with a dict of metrics.
+        Non-exist average meters will be automatically created.
+        """
+        for k, v in data.items():
+            if k not in self.meters:
+                self.meters[k] = AverageMeter(k, ":4f")
+            self.meters[k].update(v)
+
+    def __getattr__(self, item):
+        return self.meters[item]
+
+    def __getitem__(self, item):
+        return self.meters[item]
+
+    def __str__(self):
+        return "  ".join(str(v) for v in self.meters.values())
+
+    def summary(self):
+        """
+        Return a summary string of group data.
+        """
+        return "  ".join(v.summary() for v in self.meters.values())
+
+
+class AverageMeter:
+    """
+    Computes and stores the average and current value.
+
+    Parameters
+    ----------
+    name : str
+        Name to display.
+    fmt : str
+        Format string to print the values.
+    """
+
+    def __init__(self, name, fmt=':f'):
+        self.name = name
+        self.fmt = fmt
+        self.reset()
+
+    def reset(self):
+        """
+        Reset the meter.
+        """
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+
+    def update(self, val, n=1):
+        """
+        Update with value and weight.
+
+        Parameters
+        ----------
+        val : float or int
+            The new value to be accounted in.
+        n : int
+            The weight of the new value.
+        """
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+
+    def __str__(self):
+        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
+        return fmtstr.format(**self.__dict__)
+
+    def summary(self):
+        fmtstr = '{name}: {avg' + self.fmt + '}'
+        return fmtstr.format(**self.__dict__)
+
+
+def _replace_module_with_type(root_module, init_fn, type_name, modules):
+    if modules is None:
+        modules = []
+
+    def apply(m):
+        for name, child in m.named_children():
+            if isinstance(child, type_name):
+                setattr(m, name, init_fn(child))
+                modules.append((child.key, getattr(m, name)))
+            else:
+                apply(child)
+
+    apply(root_module)
+    return modules
+
+
+def replace_layer_choice(root_module, init_fn, modules=None):
+    """
+    Replace layer choice modules with modules that are initiated with init_fn.
+
+    Parameters
+    ----------
+    root_module : nn.Module
+        Root module to traverse.
+    init_fn : Callable
+        Initializing function.
+    modules : dict, optional
+        Update the replaced modules into the dict and check duplicate if provided.
+
+    Returns
+    -------
+    List[Tuple[str, nn.Module]]
+        A list from layer choice keys (names) and replaced modules.
+    """
+    return _replace_module_with_type(root_module, init_fn, (LayerChoice, nn.LayerChoice), modules)
+
+
+def replace_input_choice(root_module, init_fn, modules=None):
+    """
+    Replace input choice modules with modules that are initiated with init_fn.
+
+    Parameters
+    ----------
+    root_module : nn.Module
+        Root module to traverse.
+    init_fn : Callable
+        Initializing function.
+    modules : dict, optional
+        Update the replaced modules into the dict and check duplicate if provided.
+
+    Returns
+    -------
+    List[Tuple[str, nn.Module]]
+        A list from layer choice keys (names) and replaced modules.
+    """
+    return _replace_module_with_type(root_module, init_fn, (InputChoice, nn.InputChoice), modules)
diff --git a/examples/node_classification.py b/examples/node_classification.py
index 939b555..f95c07f 100644
--- a/examples/node_classification.py
+++ b/examples/node_classification.py
@@ -18,7 +18,7 @@ if __name__ == '__main__':
     parser.add_argument('--dataset', default='cora', type=str)
     parser.add_argument('--configs', type=str, default='../configs/nodeclf_gcn_benchmark_small.yml')
     # following arguments will override parameters in the config file
-    parser.add_argument('--hpo', type=str, default='random')
+    parser.add_argument('--hpo', type=str, default='test')
     parser.add_argument('--max_eval', type=int, default=5)
     parser.add_argument('--seed', type=int, default=0)
     parser.add_argument('--device', default=0, type=int)

From 93159cd363cdf8a9fc902e20a6cb434e97522a29 Mon Sep 17 00:00:00 2001
From: null <null>
Date: Tue, 30 Mar 2021 17:36:00 +0800
Subject: [PATCH 030/144] Implement Node-wise(Neighbor) Sampling with GraphSAGE
 model

---
 autogl/module/model/__init__.py               |  33 +-
 autogl/module/model/_model_registry.py        |  28 ++
 autogl/module/model/graph_sage.py             | 124 +++++
 autogl/module/model/graphsage.py              |   2 +-
 autogl/module/train/__init__.py               |   1 +
 .../node_classification_trainer/__init__.py   |   1 +
 .../node_classification_sampled_trainer.py    | 424 ++++++++++++++++++
 autogl/module/train/sampling/__init__.py      |   0
 .../module/train/sampling/sampler/__init__.py |   0
 .../sampling/sampler/neighbor_sampler.py      | 113 +++++
 configs/nodeclf_sage_benchmark_large.yml      |  15 +-
 11 files changed, 707 insertions(+), 34 deletions(-)
 create mode 100644 autogl/module/model/_model_registry.py
 create mode 100644 autogl/module/model/graph_sage.py
 create mode 100644 autogl/module/train/node_classification_trainer/__init__.py
 create mode 100644 autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
 create mode 100644 autogl/module/train/sampling/__init__.py
 create mode 100644 autogl/module/train/sampling/sampler/__init__.py
 create mode 100644 autogl/module/train/sampling/sampler/neighbor_sampler.py

diff --git a/autogl/module/model/__init__.py b/autogl/module/model/__init__.py
index e05a3a2..ef2a92d 100644
--- a/autogl/module/model/__init__.py
+++ b/autogl/module/model/__init__.py
@@ -1,36 +1,7 @@
-import importlib
-import os
-
-MODEL_DICT = {}
-
-
-def register_model(name):
-    def register_model_cls(cls):
-        if name in MODEL_DICT:
-            raise ValueError("Cannot register duplicate trainer ({})".format(name))
-        if not issubclass(cls, BaseModel):
-            raise ValueError(
-                "Trainer ({}: {}) must extend BaseModel".format(name, cls.__name__)
-            )
-        MODEL_DICT[name] = cls
-        return cls
-
-    return register_model_cls
-
-
+from ._model_registry import MODEL_DICT, ModelUniversalRegistry, register_model
 from .base import BaseModel
 from .topkpool import AutoTopkpool
-from .graphsage import AutoSAGE
+from .graph_sage import AutoSAGE
 from .gcn import AutoGCN
 from .gat import AutoGAT
 from .gin import AutoGIN
-
-
-__all__ = [
-    "BaseModel",
-    "AutoTopkpool",
-    "AutoSAGE",
-    "AutoGCN",
-    "AutoGAT",
-    "AutoGIN",
-]
diff --git a/autogl/module/model/_model_registry.py b/autogl/module/model/_model_registry.py
new file mode 100644
index 0000000..d8270eb
--- /dev/null
+++ b/autogl/module/model/_model_registry.py
@@ -0,0 +1,28 @@
+import typing as _typing
+from .base import BaseModel
+
+MODEL_DICT: _typing.Dict[str, _typing.Type[BaseModel]] = {}
+
+
+def register_model(name):
+    def register_model_cls(cls):
+        if name in MODEL_DICT:
+            raise ValueError("Cannot register duplicate trainer ({})".format(name))
+        if not issubclass(cls, BaseModel):
+            raise ValueError(
+                "Trainer ({}: {}) must extend BaseModel".format(name, cls.__name__)
+            )
+        MODEL_DICT[name] = cls
+        return cls
+    
+    return register_model_cls
+
+
+class ModelUniversalRegistry:
+    @classmethod
+    def get_model(cls, name: str) -> _typing.Type[BaseModel]:
+        if type(name) != str:
+            raise TypeError
+        if name not in MODEL_DICT:
+            raise KeyError
+        return MODEL_DICT.get(name)
diff --git a/autogl/module/model/graph_sage.py b/autogl/module/model/graph_sage.py
new file mode 100644
index 0000000..90ee515
--- /dev/null
+++ b/autogl/module/model/graph_sage.py
@@ -0,0 +1,124 @@
+import typing as _typing
+import torch
+import torch.nn.functional as F
+from torch_geometric.nn.conv import SAGEConv
+
+from . import register_model
+from .base import BaseModel, activate_func
+
+
+class GraphSAGE(torch.nn.Module):
+    def __init__(
+            self, num_features: int, num_classes: int,
+            hidden_features: _typing.Sequence[int],
+            dropout: float, activation_name: str,
+            aggr: str = "mean", **kwargs
+    ):
+        super(GraphSAGE, self).__init__()
+        if type(aggr) != str:
+            raise TypeError
+        if aggr not in ("add", "max", "mean"):
+            aggr = "mean"
+        
+        self.__convolution_layers: torch.nn.ModuleList = torch.nn.ModuleList()
+        
+        num_layers: int = len(hidden_features) + 1
+        if num_layers == 1:
+            self.__convolution_layers.append(
+                SAGEConv(num_features, num_classes, aggr=aggr)
+            )
+        else:
+            self.__convolution_layers.append(
+                SAGEConv(num_features, hidden_features[0], aggr=aggr)
+            )
+            for i in range(len(hidden_features)):
+                if i + 1 < len(hidden_features):
+                    self.__convolution_layers.append(
+                        SAGEConv(hidden_features[i], hidden_features[i + 1], aggr=aggr)
+                    )
+                else:
+                    self.__convolution_layers.append(
+                        SAGEConv(hidden_features[i], num_classes, aggr=aggr)
+                    )
+        self.__dropout: float = dropout
+        self.__activation_name: str = activation_name
+    
+    def __full_forward(self, data):
+        x: torch.Tensor = getattr(data, "x")
+        edge_index: torch.Tensor = getattr(data, "edge_index")
+        for layer_index in range(len(self.__convolution_layers)):
+            x: torch.Tensor = self.__convolution_layers[layer_index](x, edge_index)
+            if layer_index + 1 < len(self.__convolution_layers):
+                x = activate_func(x, self.__activation_name)
+                x = F.dropout(x, p=self.__dropout, training=self.training)
+        return F.log_softmax(x, dim=1)
+    
+    def __distributed_forward(self, data):
+        x: torch.Tensor = getattr(data, "x")
+        edge_indexes: _typing.Sequence[torch.Tensor] = getattr(data, "edge_indexes")
+        if len(edge_indexes) != len(self.__convolution_layers):
+            raise AttributeError
+        for layer_index in range(len(self.__convolution_layers)):
+            x: torch.Tensor = self.__convolution_layers[layer_index](x, edge_indexes[layer_index])
+            if layer_index + 1 < len(self.__convolution_layers):
+                x = activate_func(x, self.__activation_name)
+                x = F.dropout(x, p=self.__dropout, training=self.training)
+        return F.log_softmax(x, dim=1)
+    
+    def forward(self, data):
+        if (
+                hasattr(data, "edge_indexes") and
+                isinstance(getattr(data, "edge_indexes"), _typing.Sequence) and
+                len(getattr(data, "edge_indexes")) == len(self.__convolution_layers)
+        ):
+            return self.__distributed_forward(data)
+        else:
+            return self.__full_forward(data)
+
+
+@register_model("sage")
+class AutoSAGE(BaseModel):
+    def __init__(
+            self, num_features: int = 1, num_classes: int = 1,
+            device: _typing.Optional[torch.device] = torch.device("cpu"),
+            init: bool = False, **kwargs
+    ):
+        super(AutoSAGE, self).__init__(init)
+        self.__num_features: int = num_features
+        self.__num_classes: int = num_classes
+        self.__device: torch.device = device if device is not None else torch.device("cpu")
+        
+        self.hyperparams = {
+            "num_layers": 3,
+            "hidden": [64, 32],
+            "dropout": 0.5,
+            "act": "relu",
+            "aggr": "mean",
+        }
+        self.params = {
+            "num_features": self.__num_features,
+            "num_classes": self.__num_classes
+        }
+        
+        self._model: GraphSAGE = GraphSAGE(
+            self.__num_features, self.__num_classes, [64, 32], 0.5, "relu"
+        )
+        
+        self._initialized: bool = False
+        if init:
+            self.initialize()
+    
+    @property
+    def model(self) -> GraphSAGE:
+        return self._model
+    
+    def initialize(self):
+        """ Initialize model """
+        if not self._initialized:
+            self._model: GraphSAGE = GraphSAGE(
+                self.__num_features, self.__num_classes,
+                hidden_features=self.hyperparams["hidden"],
+                activation_name=self.hyperparams["act"],
+                **self.hyperparams
+            )
+            self._initialized = True
diff --git a/autogl/module/model/graphsage.py b/autogl/module/model/graphsage.py
index 9802432..ac541b8 100644
--- a/autogl/module/model/graphsage.py
+++ b/autogl/module/model/graphsage.py
@@ -172,7 +172,7 @@ class GraphSAGE(torch.nn.Module):
         return F.log_softmax(x, dim=1)
 
 
-@register_model("sage")
+# @register_model("sage")
 class AutoSAGE(BaseModel):
     r"""
     AutoSAGE. The model used in this automodel is GraphSAGE, i.e., the GraphSAGE from the `"Inductive Representation Learning on
diff --git a/autogl/module/train/__init__.py b/autogl/module/train/__init__.py
index 93cbbf0..68b5499 100644
--- a/autogl/module/train/__init__.py
+++ b/autogl/module/train/__init__.py
@@ -52,6 +52,7 @@ def get_feval(feval):
 
 from .graph_classification_full import GraphClassificationFullTrainer
 from .node_classification_full import NodeClassificationFullTrainer
+from .node_classification_trainer import *
 from .evaluate import Acc, Auc, Logloss
 
 __all__ = [
diff --git a/autogl/module/train/node_classification_trainer/__init__.py b/autogl/module/train/node_classification_trainer/__init__.py
new file mode 100644
index 0000000..4ba55ff
--- /dev/null
+++ b/autogl/module/train/node_classification_trainer/__init__.py
@@ -0,0 +1 @@
+from .node_classification_sampled_trainer import *
diff --git a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
new file mode 100644
index 0000000..e2d5e97
--- /dev/null
+++ b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
@@ -0,0 +1,424 @@
+import torch
+import logging
+import typing as _typing
+from torch.nn import functional as F
+
+from .. import EVALUATE_DICT, register_trainer
+from ..base import BaseNodeClassificationTrainer, EarlyStopping, Evaluation
+from ..evaluate import Logloss
+from ..sampling.sampler.neighbor_sampler import NeighborSampler
+from ...model import BaseModel, ModelUniversalRegistry
+
+LOGGER: logging.Logger = logging.getLogger("Node classification sampling trainer")
+
+
+def get_feval(feval):
+    if isinstance(feval, str):
+        return EVALUATE_DICT[feval]
+    if isinstance(feval, type) and issubclass(feval, Evaluation):
+        return feval
+    if isinstance(feval, list):
+        return [get_feval(f) for f in feval]
+    raise ValueError("feval argument of type", type(feval), "is not supported!")
+
+
+@register_trainer("NodeClassificationNeighborSampling")
+class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
+    """
+    The node classification trainer
+    for automatically training the node classification tasks
+    with neighbour sampling
+    """
+    
+    def __init__(
+            self,
+            model: _typing.Union[BaseModel, str],
+            num_features: int,
+            num_classes: int,
+            optimizer: _typing.Union[
+                _typing.Type[torch.optim.Optimizer], str, None
+            ] = None,
+            lr: float = 1e-4,
+            max_epoch: int = 100,
+            early_stopping_round: int = 100,
+            weight_decay: float = 1e-4,
+            device: _typing.Optional[torch.device] = None,
+            init: bool = True,
+            feval: _typing.Union[
+                _typing.Sequence[str],
+                _typing.Sequence[_typing.Type[Evaluation]]
+            ] = (Logloss,),
+            loss: str = "nll_loss",
+            lr_scheduler_type: _typing.Optional[str] = None,
+            **kwargs
+    ) -> None:
+        
+        self._functional_loss_name: str = loss
+        if device is None:
+            device: torch.device = torch.device("cpu")
+        
+        if type(model) == str:
+            self._model: BaseModel = ModelUniversalRegistry.get_model(model)(
+                num_features, num_classes, device, init=init
+            )
+        elif isinstance(model, BaseModel):
+            self._model: BaseModel = model
+        else:
+            raise TypeError
+        
+        if isinstance(optimizer, type) and issubclass(optimizer, torch.optim.Optimizer):
+            self._optimizer_class: _typing.Type[torch.optim.Optimizer] = optimizer
+        elif type(optimizer) == str:
+            if optimizer.lower() == "adam":
+                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.Adam
+            elif optimizer.lower() == "adam" + "w":
+                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.AdamW
+            elif optimizer.lower() == "sgd":
+                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.SGD
+            else:
+                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.Adam
+        else:
+            self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.Adam
+        
+        self._num_features: int = num_features
+        self._num_classes: int = num_classes
+        self._learning_rate: float = lr if lr > 0 else 1e-4
+        self._lr_scheduler_type: _typing.Optional[str] = lr_scheduler_type
+        self._max_epoch: int = max_epoch if max_epoch > 0 else 1e2
+        self._device: torch.device = device
+        
+        self.__sampling_sizes: _typing.Sequence[int] = kwargs.get("sampling_sizes")
+        
+        self._feval: _typing.Sequence[_typing.Type[Evaluation]] = get_feval(list(feval))
+        self._weight_decay: float = weight_decay if weight_decay > 0 else 1e-4
+        early_stopping_round: int = early_stopping_round if early_stopping_round > 0 else 1e2
+        self._early_stopping = EarlyStopping(patience=early_stopping_round, verbose=False)
+
+        super(NodeClassificationNeighborSamplingTrainer, self).__init__(
+            model, num_features, num_classes,
+            device=device if device is not None else "auto",
+            init=init, loss=loss
+        )
+        
+        self._valid_result: torch.Tensor = torch.zeros(0)
+        self._valid_result_prob: torch.Tensor = torch.zeros(0)
+        self._valid_score = None
+        
+        self._hyper_parameter_space: _typing.List[_typing.Dict[str, _typing.Any]] = [
+            {
+                "parameterName": "max_epoch",
+                "type": "INTEGER",
+                "maxValue": 500,
+                "minValue": 10,
+                "scalingType": "LINEAR",
+            },
+            {
+                "parameterName": "early_stopping_round",
+                "type": "INTEGER",
+                "maxValue": 30,
+                "minValue": 10,
+                "scalingType": "LINEAR",
+            },
+            {
+                "parameterName": "lr",
+                "type": "DOUBLE",
+                "maxValue": 1e-1,
+                "minValue": 1e-4,
+                "scalingType": "LOG",
+            },
+            {
+                "parameterName": "weight_decay",
+                "type": "DOUBLE",
+                "maxValue": 1e-2,
+                "minValue": 1e-4,
+                "scalingType": "LOG",
+            }
+        ]
+        
+        self._hyper_parameter: _typing.Dict[str, _typing.Any] = {
+            "max_epoch": self._max_epoch,
+            "early_stopping_round": self._early_stopping.patience,
+            "lr": self._learning_rate,
+            "weight_decay": self._weight_decay
+        }
+        
+        self.__initialized: bool = False
+        if init:
+            self.initialize()
+    
+    def initialize(self) -> "NodeClassificationNeighborSamplingTrainer":
+        if self.__initialized:
+            return self
+        self._model.initialize()
+        self.__initialized = True
+        return self
+    
+    def get_model(self) -> BaseModel:
+        return self._model
+    
+    def __train_only(
+            self, data
+    ) -> "NodeClassificationNeighborSamplingTrainer":
+        """
+        The function of training on the given dataset and mask.
+        :param data: data of a specific graph
+        :return: self
+        """
+        data = data.to(self._device)
+        optimizer: torch.optim.Optimizer = self._optimizer_class(
+            self._model.parameters(),
+            lr=self._learning_rate, weight_decay=self._weight_decay
+        )
+        if type(self._lr_scheduler_type) == str:
+            if self._lr_scheduler_type.lower() == "step" + "lr":
+                lr_scheduler: torch.optim.lr_scheduler.StepLR = \
+                    torch.optim.lr_scheduler.StepLR(
+                        optimizer, step_size=100, gamma=0.1
+                    )
+            elif self._lr_scheduler_type.lower() == "multi" + "step" + "lr":
+                lr_scheduler: torch.optim.lr_scheduler.MultiStepLR = \
+                    torch.optim.lr_scheduler.MultiStepLR(
+                        optimizer, milestones=[30, 80], gamma=0.1
+                    )
+            elif self._lr_scheduler_type.lower() == "exponential" + "lr":
+                lr_scheduler: torch.optim.lr_scheduler.ExponentialLR = \
+                    torch.optim.lr_scheduler.ExponentialLR(
+                        optimizer, gamma=0.1
+                    )
+            elif self._lr_scheduler_type.lower() == "ReduceLROnPlateau".lower():
+                lr_scheduler: torch.optim.lr_scheduler.ReduceLROnPlateau = \
+                    torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min")
+            else:
+                lr_scheduler: torch.optim.lr_scheduler.LambdaLR = \
+                    torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
+        else:
+            lr_scheduler: torch.optim.lr_scheduler.LambdaLR = \
+                torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
+        
+        train_sampler: NeighborSampler = NeighborSampler(
+            data, self.__sampling_sizes, batch_size=20
+        )
+        
+        for current_epoch in range(self._max_epoch):
+            self._model.model.train()
+            """ epoch start """
+            for target_node_indexes, edge_indexes in train_sampler:
+                optimizer.zero_grad()
+                data.edge_indexes = edge_indexes
+                prediction = self._model.model(data)
+                if not hasattr(F, self._functional_loss_name):
+                    raise TypeError(
+                        "PyTorch does not support loss type {}".format(self._functional_loss_name)
+                    )
+                loss_function = getattr(F, self._functional_loss_name)
+                loss: torch.Tensor = loss_function(
+                    prediction[target_node_indexes],
+                    data.y[target_node_indexes]
+                )
+                loss.backward()
+                optimizer.step()
+            
+            if lr_scheduler is not None:
+                lr_scheduler.step()
+            
+            """ Validate performance """
+            if hasattr(data, "val_mask") and getattr(data, "val_mask") is not None:
+                validation_results: _typing.Sequence[float] = \
+                    self.evaluate((data,), "val", [self._feval[0]])
+                
+                if self._feval[0].is_higher_better():
+                    validation_loss: float = -validation_results[0]
+                else:
+                    validation_loss: float = validation_results[0]
+                self._early_stopping(validation_loss, self._model.model)
+                if self._early_stopping.early_stop:
+                    LOGGER.debug("Early stopping at %d", current_epoch)
+                    break
+        if hasattr(data, "val_mask") and data.val_mask is not None:
+            self._early_stopping.load_checkpoint(self._model.model)
+        return self
+    
+    def __predict_only(self, data):
+        """
+        The function of predicting on the given data.
+        :param data: data of a specific graph
+        :return: the result of prediction on the given dataset
+        """
+        data = data.to(self._device)
+        self._model.model.eval()
+        with torch.no_grad():
+            prediction = self._model.model(data)
+        return prediction
+    
+    def train(self, dataset, keep_valid_result: bool = True):
+        """
+        The function of training on the given dataset and keeping valid result.
+        :param dataset:
+        :param keep_valid_result: Whether to save the validation result after training
+        """
+        data = dataset[0]
+        self.__train_only(data)
+        if keep_valid_result:
+            prediction: torch.Tensor = self.__predict_only(data)
+            self._valid_result: torch.Tensor = prediction[data.val_mask].max(1)[1]
+            self._valid_result_prob: torch.Tensor = prediction[data.val_mask]
+            self._valid_score = self.evaluate(dataset, "val")
+    
+    def predict_proba(
+            self, dataset, mask: _typing.Optional[str] = None,
+            in_log_format: bool = False
+    ) -> torch.Tensor:
+        """
+        The function of predicting the probability on the given dataset.
+        :param dataset: The node classification dataset used to be predicted.
+        :param mask:
+        :param in_log_format:
+        :return:
+        """
+        data = dataset[0].to(self._device)
+        if mask is not None and type(mask) == str:
+            if mask.lower() == "train":
+                _mask = data.train_mask
+            elif mask.lower() == "test":
+                _mask = data.test_mask
+            elif mask.lower() == "val":
+                _mask = data.val_mask
+            else:
+                _mask = data.test_mask
+        else:
+            _mask = data.test_mask
+        result = self.__predict_only(data)[_mask]
+        return result if in_log_format else torch.exp(result)
+    
+    def predict(self, dataset, mask: _typing.Optional[str] = None) -> torch.Tensor:
+        return self.predict_proba(
+            dataset, mask, in_log_format=True
+        ).max(1)[1]
+    
+    def get_valid_predict(self) -> torch.Tensor:
+        return self._valid_result
+    
+    def get_valid_predict_proba(self) -> torch.Tensor:
+        return self._valid_result_prob
+    
+    def get_valid_score(self, return_major: bool = True):
+        if return_major:
+            return (
+                self._valid_score[0],
+                self._feval[0].is_higher_better()
+            )
+        else:
+            return (
+                self._valid_score,
+                [f.is_higher_better() for f in self._feval]
+            )
+        
+    def get_name_with_hp(self) -> str:
+        # """Get the name of hyperparameter."""
+        name = "-".join(
+            [
+                str(self._optimizer_class),
+                str(self._learning_rate),
+                str(self._max_epoch),
+                str(self._early_stopping.patience),
+                str(self._model),
+                str(self._device),
+            ]
+        )
+        name = (
+            name
+            + "|"
+            + "-".join(
+                [
+                    str(x[0]) + "-" + str(x[1])
+                    for x in self.model.get_hyper_parameter().items()
+                ]
+            )
+        )
+        return name
+    
+    def evaluate(
+            self,
+            dataset,
+            mask: _typing.Optional[str] = None,
+            feval: _typing.Union[
+                None, _typing.Sequence[str],
+                _typing.Sequence[_typing.Type[Evaluation]]
+            ] = None
+    ) -> _typing.Sequence[float]:
+        data = dataset[0]
+        data = data.to(self._device)
+        if feval is None:
+            _feval: _typing.Sequence[_typing.Type[Evaluation]] = self._feval
+        else:
+            _feval: _typing.Sequence[_typing.Type[Evaluation]] = get_feval(list(feval))
+        if mask.lower() == "train":
+            _mask = data.train_mask
+        elif mask.lower() == "test":
+            _mask = data.test_mask
+        elif mask.lower() == "val":
+            _mask = data.val_mask
+        else:
+            _mask = data.test_mask
+        prediction_probability: torch.Tensor = self.predict_proba(dataset, mask)
+        y_ground_truth = data.y[_mask]
+        
+        results = []
+        for f in _feval:
+            try:
+                results.append(
+                    f.evaluate(prediction_probability, y_ground_truth)
+                )
+            except:
+                results.append(
+                    f.evaluate(prediction_probability.cpu().numpy(), y_ground_truth.cpu().numpy())
+                )
+        return results
+    
+    def to(self, device: torch.device):
+        self._device = device
+        if self._model is not None:
+            self._model.to(device)
+    
+    def duplicate_from_hyper_parameter(
+            self, hp: _typing.Dict[str, _typing.Any],
+            model: _typing.Union[BaseModel, str, None] = None
+    ) -> "NodeClassificationNeighborSamplingTrainer":
+        
+        if model is None or not isinstance(model, BaseModel):
+            model = self._model
+        model = model.from_hyper_parameter(
+            dict(
+                [
+                    x for x in hp.items()
+                    if x[0] in [y["parameterName"] for y in model.hyper_parameter_space]
+                ]
+            )
+        )
+        
+        return NodeClassificationNeighborSamplingTrainer(
+            model, self._num_features, self._num_classes,
+            self._optimizer_class,
+            device=self._device,
+            init=True,
+            feval=self._feval,
+            loss=self._functional_loss_name,
+            lr_scheduler_type=self._lr_scheduler_type,
+            **hp
+        )
+    
+    def set_feval(
+            self, feval: _typing.Union[
+                _typing.Sequence[str],
+                _typing.Sequence[_typing.Type[Evaluation]]
+            ]
+    ):
+        self._feval = get_feval(list(feval))
+    
+    @property
+    def hyper_parameter_space(self):
+        return self._hyper_parameter_space
+    
+    @hyper_parameter_space.setter
+    def hyper_parameter_space(self, hp_space):
+        self._hyper_parameter_space = hp_space
diff --git a/autogl/module/train/sampling/__init__.py b/autogl/module/train/sampling/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/autogl/module/train/sampling/sampler/__init__.py b/autogl/module/train/sampling/sampler/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/autogl/module/train/sampling/sampler/neighbor_sampler.py b/autogl/module/train/sampling/sampler/neighbor_sampler.py
new file mode 100644
index 0000000..0e62a74
--- /dev/null
+++ b/autogl/module/train/sampling/sampler/neighbor_sampler.py
@@ -0,0 +1,113 @@
+import collections
+import random
+import typing as _typing
+import numpy as np
+import torch.utils.data
+
+
+class NeighborSampler(torch.utils.data.DataLoader, collections.Iterable):
+    class _NodeIndexesDataset(torch.utils.data.Dataset):
+        def __init__(self, node_indexes):
+            self.__node_indexes: _typing.Sequence[int] = node_indexes
+        
+        def __getitem__(self, index) -> int:
+            if not 0 <= index < len(self.__node_indexes):
+                raise IndexError("Index out of range")
+            else:
+                return self.__node_indexes[index]
+        
+        def __len__(self) -> int:
+            return len(self.__node_indexes)
+    
+    def __init__(
+            self, data,
+            sampling_sizes: _typing.Sequence[int],
+            target_node_indexes: _typing.Optional[_typing.Sequence[int]] = None,
+            batch_size: _typing.Optional[int] = 1,
+            *args, **kwargs
+    ):
+        self._data = data
+        self.__sampling_sizes: _typing.Sequence[int] = sampling_sizes
+        
+        if not (
+                target_node_indexes is not None and
+                isinstance(target_node_indexes, _typing.Sequence)
+        ):
+            if hasattr(data, "train_mask"):
+                target_node_indexes: _typing.Sequence[int] = \
+                    torch.where(getattr(data, "train_mask"))[0]
+            else:
+                target_node_indexes: _typing.Sequence[int] = \
+                    list(np.arange(0, data.x.shape[0]))
+        
+        self.__edge_index_map: _typing.Dict[
+            int, _typing.Union[torch.Tensor, _typing.Sequence[int]]
+        ] = {}
+        self.__init_edge_index_map()
+        super(NeighborSampler, self).__init__(
+            self._NodeIndexesDataset(target_node_indexes),
+            batch_size=batch_size if batch_size > 0 else 1,
+            collate_fn=self.__sample, *args, **kwargs
+        )
+    
+    def __init_edge_index_map(self):
+        self.__edge_index_map.clear()
+        all_edge_index: torch.Tensor = getattr(self._data, "edge_index")
+        target_node_indexes: torch.Tensor = all_edge_index[1]
+        for target_node_index in target_node_indexes.unique().tolist():
+            self.__edge_index_map[target_node_index] = torch.where(
+                all_edge_index[1] == target_node_index
+            )[0]
+    
+    def __iter__(self):
+        return super(NeighborSampler, self).__iter__()
+    
+    def __sample(
+            self, target_nodes_indexes: _typing.List[int]
+    ) -> _typing.Tuple[torch.Tensor, _typing.List[torch.Tensor]]:
+        """
+        Sample a sub-graph with neighborhood sampling
+        :param target_nodes_indexes:
+        """
+        original_edge_index: torch.Tensor = self._data.edge_index
+        edges_indexes: _typing.List[torch.Tensor] = []
+        
+        current_target_nodes_indexes: _typing.List[int] = target_nodes_indexes
+        for current_sampling_size in self.__sampling_sizes:
+            current_edge_index: _typing.Optional[torch.Tensor] = None
+            for current_target_node_index in current_target_nodes_indexes:
+                if current_target_node_index in self.__edge_index_map:
+                    all_indexes: torch.Tensor = \
+                        self.__edge_index_map.get(current_target_node_index)
+                else:
+                    all_indexes: torch.Tensor = torch.where(
+                        original_edge_index[1] == current_target_node_index
+                    )[0]
+                if all_indexes.numel() < current_sampling_size:
+                    sampled_indexes: np.ndarray = np.random.choice(
+                        all_indexes.cpu().numpy(), current_sampling_size
+                    )
+                    if current_edge_index is not None:
+                        current_edge_index: torch.Tensor = torch.cat(
+                            [current_edge_index, original_edge_index[:, sampled_indexes]], dim=1
+                        )
+                    else:
+                        current_edge_index: torch.Tensor = original_edge_index[:, sampled_indexes]
+                else:
+                    all_indexes_list = all_indexes.tolist()
+                    random.shuffle(all_indexes_list)
+                    shuffled_indexes_list: _typing.List[int] = \
+                        all_indexes_list[0: current_sampling_size]
+                    if current_edge_index is not None:
+                        current_edge_index: torch.Tensor = torch.cat(
+                            [current_edge_index, original_edge_index[:, shuffled_indexes_list]], dim=1
+                        )
+                    else:
+                        current_edge_index: torch.Tensor = original_edge_index[:, shuffled_indexes_list]
+            edges_indexes.append(current_edge_index)
+            
+            if len(edges_indexes) < len(self.__sampling_sizes):
+                next_target_nodes_indexes: torch.Tensor = current_edge_index[0].unique()
+                current_target_nodes_indexes = next_target_nodes_indexes.tolist()
+        
+        return torch.tensor(target_nodes_indexes), edges_indexes[::-1]
diff --git a/configs/nodeclf_sage_benchmark_large.yml b/configs/nodeclf_sage_benchmark_large.yml
index 33833bd..8b7c2a5 100644
--- a/configs/nodeclf_sage_benchmark_large.yml
+++ b/configs/nodeclf_sage_benchmark_large.yml
@@ -31,7 +31,7 @@ models:
     type: DOUBLE
   - feasiblePoints":
     - mean
-    parameterName: agg,
+    parameterName: aggr,
     type: CATEGORICAL,
   - feasiblePoints:
     - leaky_relu
@@ -40,9 +40,20 @@ models:
     - tanh
     parameterName: act
     type: CATEGORICAL
-  name: gcn
+  name: sage
 trainer:
+  name: NodeClassificationNeighborSampling
   hp_space:
+  - parameterName: sampling_sizes
+    type: NUMERICAL_LIST
+    numericalType: INTEGER
+    length: 3
+    cutFunc: lambda x:x[0]
+    cutPara:
+    - num_layers
+    minValue: 3
+    maxValue: 8
+    scalingType: LOG
   - maxValue: 300
     minValue: 100
     parameterName: max_epoch

From b177e104e032dfdebc3cc144de7cfc8919268f4a Mon Sep 17 00:00:00 2001
From: SwiftieH <changheng95@126.com>
Date: Tue, 30 Mar 2021 18:02:23 +0800
Subject: [PATCH 031/144] fixed num_workers, set_start_method(fork) when
 num_workers>0

---
 autogl/datasets/utils.py                    |  4 ++--
 autogl/module/train/graph_classification.py | 15 ++++++++++-----
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/autogl/datasets/utils.py b/autogl/datasets/utils.py
index 0dc09eb..f3c8f62 100644
--- a/autogl/datasets/utils.py
+++ b/autogl/datasets/utils.py
@@ -315,7 +315,7 @@ def graph_random_splits(dataset, train_ratio=0.2, val_ratio=0.4, seed=None):
     return dataset
 
 
-def graph_get_split(dataset, mask="train", is_loader=True, batch_size=128):
+def graph_get_split(dataset, mask="train", is_loader=True, batch_size=128, num_workers = 0):
     r"""Get train/test dataset/dataloader after cross validation.
 
     Parameters
@@ -337,7 +337,7 @@ def graph_get_split(dataset, mask="train", is_loader=True, batch_size=128):
         dataset, "%s_split" % (mask)
     ), "Given dataset do not have %s split" % (mask)
     if is_loader:
-        return DataLoader(getattr(dataset, "%s_split" % (mask)), batch_size=batch_size)
+        return DataLoader(getattr(dataset, "%s_split" % (mask)), batch_size=batch_size, num_workers = num_workers)
     else:
         return getattr(dataset, "%s_split" % (mask))
 
diff --git a/autogl/module/train/graph_classification.py b/autogl/module/train/graph_classification.py
index e365021..81860fe 100644
--- a/autogl/module/train/graph_classification.py
+++ b/autogl/module/train/graph_classification.py
@@ -7,6 +7,7 @@ from .evaluate import Logloss
 from typing import Union
 from ...datasets import utils
 from copy import deepcopy
+import torch.multiprocessing as mp
 
 from ...utils import get_logger
 
@@ -65,6 +66,7 @@ class GraphClassificationTrainer(BaseTrainer):
         lr=None,
         max_epoch=None,
         batch_size=None,
+        num_workers=None,
         early_stopping_round=7,
         weight_decay=1e-4,
         device=None,
@@ -104,6 +106,9 @@ class GraphClassificationTrainer(BaseTrainer):
         self.lr = lr if lr is not None else 1e-4
         self.max_epoch = max_epoch if max_epoch is not None else 100
         self.batch_size = batch_size if batch_size is not None else 64
+        self.num_workers = num_workers if num_workers is not None else 4
+        if self.num_workers > 0:
+            mp.set_start_method('fork', force=True)
         self.early_stopping_round = (
             early_stopping_round if early_stopping_round is not None else 100
         )
@@ -294,10 +299,10 @@ class GraphClassificationTrainer(BaseTrainer):
 
         """
         train_loader = utils.graph_get_split(
-            dataset, "train", batch_size=self.batch_size
+            dataset, "train", batch_size=self.batch_size, num_workers = self.num_workers
         )  # DataLoader(dataset['train'], batch_size=self.batch_size)
         valid_loader = utils.graph_get_split(
-            dataset, "val", batch_size=self.batch_size
+            dataset, "val", batch_size=self.batch_size, num_workers = self.num_workers
         )  # DataLoader(dataset['val'], batch_size=self.batch_size)
         self.train_only(train_loader, valid_loader)
         if keep_valid_result and valid_loader:
@@ -321,7 +326,7 @@ class GraphClassificationTrainer(BaseTrainer):
         -------
         The prediction result of ``predict_proba``.
         """
-        loader = utils.graph_get_split(dataset, mask, batch_size=self.batch_size)
+        loader = utils.graph_get_split(dataset, mask, batch_size=self.batch_size, num_workers = self.num_workers)
         return self._predict_proba(loader, in_log_format=True).max(1)[1]
 
     def predict_proba(self, dataset, mask="test", in_log_format=False):
@@ -342,7 +347,7 @@ class GraphClassificationTrainer(BaseTrainer):
         -------
         The prediction result.
         """
-        loader = utils.graph_get_split(dataset, mask, batch_size=self.batch_size)
+        loader = utils.graph_get_split(dataset, mask, batch_size=self.batch_size, num_workers = self.num_workers)
         return self._predict_proba(loader, in_log_format)
 
     def _predict_proba(self, loader, in_log_format=False):
@@ -425,7 +430,7 @@ class GraphClassificationTrainer(BaseTrainer):
         res: The evaluation result on the given dataset.
 
         """
-        loader = utils.graph_get_split(dataset, mask, batch_size=self.batch_size)
+        loader = utils.graph_get_split(dataset, mask, batch_size=self.batch_size, num_workers = self.num_workers)
         return self._evaluate(loader, feval)
 
     def _evaluate(self, loader, feval=None):

From 8ae5156cdd23d03a94744f434d5deba588c70aca Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Tue, 30 Mar 2021 16:12:53 +0000
Subject: [PATCH 032/144] update examples & fix bugs in fe

---
 autogl/module/feature/generators/pyg.py       | 12 ++-
 autogl/solver/classifier/graph_classifier.py  |  6 +-
 autogl/solver/classifier/node_classifier.py   |  2 +-
 ..._classification.yaml => graphclf_full.yml} |  3 -
 configs/graphclf_gin_benchmark.yml            | 70 ++++++++++++++
 ...ation.yaml => graphclf_topk_benchmark.yml} | 42 +++-----
 .../{nodeclf_gcn.yaml => nodeclf_full.yml}    | 54 ++++++++---
 configs/nodeclf_gcn_large.yaml                | 61 ------------
 examples/graph_classification.py              | 96 +++++++++++++++----
 examples/graph_cv.py                          | 96 +++++++++++++++++++
 examples/node_classification.py               | 80 +++++++++++-----
 11 files changed, 364 insertions(+), 158 deletions(-)
 rename configs/{graph_classification.yaml => graphclf_full.yml} (94%)
 create mode 100644 configs/graphclf_gin_benchmark.yml
 rename configs/{node_classification.yaml => graphclf_topk_benchmark.yml} (58%)
 rename configs/{nodeclf_gcn.yaml => nodeclf_full.yml} (54%)
 delete mode 100644 configs/nodeclf_gcn_large.yaml
 create mode 100644 examples/graph_cv.py

diff --git a/autogl/module/feature/generators/pyg.py b/autogl/module/feature/generators/pyg.py
index f3bd4b3..f7919ef 100644
--- a/autogl/module/feature/generators/pyg.py
+++ b/autogl/module/feature/generators/pyg.py
@@ -78,12 +78,14 @@ class PYGOneHotDegree(PYGGenerator):
     def __init__(self, max_degree=1000):
         super(PYGOneHotDegree, self).__init__(max_degree=max_degree)
 
+    """
     def _transform(self, data):
-        idx, x = data.edge_index[0], data.x
-        deg = degree(idx, data.num_nodes, dtype=torch.long)
-        self._kwargs["max_degree"] = np.min(
-            [self._kwargs["max_degree"], torch.max(deg).numpy()]
-        )
+        #idx, x = data.edge_index[0], data.x
+        #deg = degree(idx, data.num_nodes, dtype=torch.long)
+        #self._kwargs["max_degree"] = np.min(
+        #    [self._kwargs["max_degree"], torch.max(deg).numpy()]
+        #)
         dsc = self.extract(data)
         data.x = torch.cat([data.x, dsc], dim=1)
         return data
+    """
diff --git a/autogl/solver/classifier/graph_classifier.py b/autogl/solver/classifier/graph_classifier.py
index 82c5254..7427e13 100644
--- a/autogl/solver/classifier/graph_classifier.py
+++ b/autogl/solver/classifier/graph_classifier.py
@@ -213,7 +213,7 @@ class AutoGraphClassifier(BaseClassifier):
         inplace=False,
         train_split=None,
         val_split=None,
-        cross_validation=True,
+        cross_validation=False,
         cv_split=10,
         evaluation_method="infer",
         seed=None,
@@ -245,7 +245,7 @@ class AutoGraphClassifier(BaseClassifier):
             Default ``None``.
 
         cross_validation: bool
-            Whether to use cross validation to fit on train dataset. Default ``True``.
+            Whether to use cross validation to fit on train dataset. Default ``False``.
 
         cv_split: int
             The cross validation split number. Only be effective when ``cross_validation=True``.
@@ -730,7 +730,7 @@ class AutoGraphClassifier(BaseClassifier):
         )
         if isinstance(path_or_dict, str):
             if filetype == "auto":
-                if path_or_dict.endswith(".yaml"):
+                if path_or_dict.endswith(".yaml") or path_or_dict.endswith(".yml"):
                     filetype = "yaml"
                 elif path_or_dict.endswith(".json"):
                     filetype = "json"
diff --git a/autogl/solver/classifier/node_classifier.py b/autogl/solver/classifier/node_classifier.py
index 20b915c..1d41d1a 100644
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -650,7 +650,7 @@ class AutoNodeClassifier(BaseClassifier):
         )
         if isinstance(path_or_dict, str):
             if filetype == "auto":
-                if path_or_dict.endswith(".yaml"):
+                if path_or_dict.endswith(".yaml") or path_or_dict.endswith(".yml"):
                     filetype = "yaml"
                 elif path_or_dict.endswith(".json"):
                     filetype = "json"
diff --git a/configs/graph_classification.yaml b/configs/graphclf_full.yml
similarity index 94%
rename from configs/graph_classification.yaml
rename to configs/graphclf_full.yml
index 4640ddc..9175778 100644
--- a/configs/graph_classification.yaml
+++ b/configs/graphclf_full.yml
@@ -1,9 +1,6 @@
 ensemble:
   name: voting
   size: 2
-feature:
-- name: NxLargeCliqueSize
-- name: NxLargeCliqueSize
 hpo:
   max_evals: 10
   name: anneal
diff --git a/configs/graphclf_gin_benchmark.yml b/configs/graphclf_gin_benchmark.yml
new file mode 100644
index 0000000..5ef7b3a
--- /dev/null
+++ b/configs/graphclf_gin_benchmark.yml
@@ -0,0 +1,70 @@
+hpo:
+  max_evals: 10
+  name: tpe
+models:
+- hp_space:
+  - parameterName: num_layers
+    type: DISCRETE
+    feasiblePoints: '3,4,5'
+
+  - parameterName: hidden
+    type: NUMERICAL_LIST
+    numericalType: INTEGER
+    length: 5
+    minValue: [8, 8, 8, 8, 8]
+    maxValue: [64, 64, 64, 64, 64]
+    scalingType: LOG
+    cutPara: ["num_layers"]
+    cutFunc: "lambda x: x[0] - 1"
+
+  - parameterName: dropout
+    type: DOUBLE
+    maxValue: 0.9
+    minValue: 0.1
+    scalingType: LINEAR
+
+  - parameterName: act
+    type: CATEGORICAL
+    feasiblePoints:
+      - leaky_relu
+      - relu
+      - elu
+      - tanh
+
+  - parameterName: eps
+    type: CATEGORICAL
+    feasiblePoints:
+      - True
+      - False
+
+  - parameterName: mlp_layers
+    type: DISCRETE
+    feasiblePoints: '2,3,4'
+  name: gin
+trainer:
+  hp_space:
+  - maxValue: 300
+    minValue: 10
+    parameterName: max_epoch
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 30
+    minValue: 10
+    parameterName: early_stopping_round
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 0.1
+    minValue: 0.0001
+    parameterName: lr
+    scalingType: LOG
+    type: DOUBLE
+  - maxValue: 0.005
+    minValue: 5.0e-05
+    parameterName: weight_decay
+    scalingType: LOG
+    type: DOUBLE
+  - maxValue: 128
+    minValue: 48
+    parameterName: batch_size
+    scalingType: LINEAR
+    type: INTEGER
diff --git a/configs/node_classification.yaml b/configs/graphclf_topk_benchmark.yml
similarity index 58%
rename from configs/node_classification.yaml
rename to configs/graphclf_topk_benchmark.yml
index 011fda2..bd708a9 100644
--- a/configs/node_classification.yaml
+++ b/configs/graphclf_topk_benchmark.yml
@@ -1,36 +1,13 @@
-ensemble:
-  name: voting
-  size: 2
-feature:
-- name: PYGNormalizeFeatures
-- name: pagerank
 hpo:
   max_evals: 10
-  name: anneal
+  name: tpe
 models:
 - hp_space:
-  - feasiblePoints: 2,3,4
-    parameterName: num_layers
-    type: DISCRETE
-  - feasiblePoints: 4,8,16
-    parameterName: heads
-    type: DISCRETE
-  - cutFunc: lambda x:x[0] - 1
-    cutPara:
-    - num_layers
-    length: 3
-    maxValue:
-    - 64
-    - 64
-    - 64
-    minValue:
-    - 8
-    - 8
-    - 8
-    numericalType: INTEGER
-    parameterName: hidden
-    scalingType: LOG
-    type: NUMERICAL_LIST
+  - maxValue: 0.9
+    minValue: 0.1
+    parameterName: ratio
+    scalingType: LINEAR
+    type: DOUBLE
   - maxValue: 0.9
     minValue: 0.1
     parameterName: dropout
@@ -43,7 +20,7 @@ models:
     - tanh
     parameterName: act
     type: CATEGORICAL
-  name: gat
+  name: topkpool
 trainer:
   hp_space:
   - maxValue: 300
@@ -66,3 +43,8 @@ trainer:
     parameterName: weight_decay
     scalingType: LOG
     type: DOUBLE
+  - maxValue: 128
+    minValue: 48
+    parameterName: batch_size
+    scalingType: LINEAR
+    type: INTEGER
diff --git a/configs/nodeclf_gcn.yaml b/configs/nodeclf_full.yml
similarity index 54%
rename from configs/nodeclf_gcn.yaml
rename to configs/nodeclf_full.yml
index 0f80b5b..8dac1ee 100644
--- a/configs/nodeclf_gcn.yaml
+++ b/configs/nodeclf_full.yml
@@ -1,25 +1,56 @@
 ensemble:
-  name: null
+  name: voting
+  size: 2
 feature:
-- name: null
+- name: PYGNormalizeFeatures
 hpo:
-  max_evals: 10
-  name: random
+  max_evals: 50
+  name: tpe
 models:
 - hp_space:
   - feasiblePoints: '2'
     parameterName: num_layers
     type: DISCRETE
+  - feasiblePoints: 6,8,10,12
+    parameterName: heads
+    type: DISCRETE
   - cutFunc: lambda x:x[0] - 1
     cutPara:
     - num_layers
-    length: 2
+    length: 1
+    maxValue:
+    - 16
+    minValue:
+    - 4
+    numericalType: INTEGER
+    parameterName: hidden
+    scalingType: LOG
+    type: NUMERICAL_LIST
+  - maxValue: 0.8
+    minValue: 0.2
+    parameterName: dropout
+    scalingType: LINEAR
+    type: DOUBLE
+  - feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    parameterName: act
+    type: CATEGORICAL
+  name: gat
+- hp_space:
+  - feasiblePoints: '2'
+    parameterName: num_layers
+    type: DISCRETE
+  - cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 1
     maxValue:
-    - 64
     - 64
     minValue:
     - 16
-    - 16
     numericalType: INTEGER
     parameterName: hidden
     scalingType: LOG
@@ -49,13 +80,14 @@ trainer:
     parameterName: early_stopping_round
     scalingType: LINEAR
     type: INTEGER
-  - maxValue: 0.01
-    minValue: 0.0025
+  - maxValue: 0.05
+    minValue: 0.01
     parameterName: lr
     scalingType: LOG
     type: DOUBLE
-  - maxValue: 0.025
-    minValue: 0.0025
+  - maxValue: 0.001
+    minValue: 0.0001
     parameterName: weight_decay
     scalingType: LOG
     type: DOUBLE
+
diff --git a/configs/nodeclf_gcn_large.yaml b/configs/nodeclf_gcn_large.yaml
deleted file mode 100644
index 8303833..0000000
--- a/configs/nodeclf_gcn_large.yaml
+++ /dev/null
@@ -1,61 +0,0 @@
-ensemble:
-  name: null
-feature:
-- name: null
-hpo:
-  max_evals: 10
-  name: random
-models:
-- hp_space:
-  - feasiblePoints: 2,3
-    parameterName: num_layers
-    type: DISCRETE
-  - cutFunc: lambda x:x[0] - 1
-    cutPara:
-    - num_layers
-    length: 2
-    maxValue:
-    - 128
-    - 128
-    minValue:
-    - 32
-    - 32
-    numericalType: INTEGER
-    parameterName: hidden
-    scalingType: LOG
-    type: NUMERICAL_LIST
-  - maxValue: 0.8
-    minValue: 0.2
-    parameterName: dropout
-    scalingType: LINEAR
-    type: DOUBLE
-  - feasiblePoints:
-    - leaky_relu
-    - relu
-    - elu
-    - tanh
-    parameterName: act
-    type: CATEGORICAL
-  name: gcn
-trainer:
-  hp_space:
-  - maxValue: 300
-    minValue: 100
-    parameterName: max_epoch
-    scalingType: LINEAR
-    type: INTEGER
-  - maxValue: 30
-    minValue: 10
-    parameterName: early_stopping_round
-    scalingType: LINEAR
-    type: INTEGER
-  - maxValue: 0.01
-    minValue: 0.001
-    parameterName: lr
-    scalingType: LOG
-    type: DOUBLE
-  - maxValue: 0.01
-    minValue: 0.001
-    parameterName: weight_decay
-    scalingType: LOG
-    type: DOUBLE
diff --git a/examples/graph_classification.py b/examples/graph_classification.py
index 9d197b2..9e40ee6 100644
--- a/examples/graph_classification.py
+++ b/examples/graph_classification.py
@@ -1,27 +1,85 @@
+"""
+Example of graph classification on given datasets.
+This version use random split to only show the usage of AutoGraphClassifier.
+Refer to `graph_cv.py` for cross validation evaluation of the whole system
+following paper `A Fair Comparison of Graph Neural Networks for Graph Classification`
+"""
+
 import sys
-sys.path.append('../')
+
+sys.path.append("../")
+import random
+import torch
+import numpy as np
 from autogl.datasets import build_dataset_from_name, utils
 from autogl.solver import AutoGraphClassifier
-from autogl.module import Acc, BaseModel
+from autogl.module import Acc
+from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
+
+if __name__ == "__main__":
+    parser = ArgumentParser(
+        "auto graph classification", formatter_class=ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "--dataset",
+        default="mutag",
+        type=str,
+        help="graph classification dataset",
+        choices=["mutag", "imdb-b", "imdb-m", "proteins", "collab"],
+    )
+    parser.add_argument(
+        "--configs", default="../configs/graph_classification.yaml", help="config files"
+    )
+    parser.add_argument("--device", type=int, default=0, help="device to run on")
+    parser.add_argument("--seed", type=int, default=0, help="random seed")
+
+    args = parser.parse_args()
+    if torch.cuda.is_available():
+        torch.cuda.set_device(args.device)
+    seed = args.seed
+    # set random seed
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+
+    dataset = build_dataset_from_name(args.dataset)
+    if args.dataset.startswith("imdb"):
+        from autogl.module.feature.generators import PYGOneHotDegree
+
+        # get max degree
+        from torch_geometric.utils import degree
+
+        max_degree = 0
+        for data in dataset:
+            deg_max = int(degree(data.edge_index[0], data.num_nodes).max().item())
+            max_degree = max(max_degree, deg_max)
+        dataset = PYGOneHotDegree(max_degree).fit_transform(dataset, inplace=False)
+    elif args.dataset == "collab":
+        from autogl.module.feature.auto_feature import Onlyconst
 
-dataset = build_dataset_from_name('mutag')
-utils.graph_random_splits(dataset, train_ratio=0.4, val_ratio=0.4)
+        dataset = Onlyconst().fit_transform(dataset, inplace=False)
+    utils.graph_random_splits(dataset, train_ratio=0.8, val_ratio=0.1, seed=args.seed)
 
-autoClassifier = AutoGraphClassifier.from_config('../configs/graph_classification.yaml')
+    autoClassifier = AutoGraphClassifier.from_config(args.configs)
 
-# train
-autoClassifier.fit(
-    dataset, 
-    time_limit=3600, 
-    train_split=0.8, 
-    val_split=0.1, 
-    cross_validation=True,
-    cv_split=10, 
-)
-autoClassifier.get_leaderboard().show()
+    # train
+    autoClassifier.fit(dataset, evaluation_method=[Acc], seed=args.seed)
+    autoClassifier.get_leaderboard().show()
 
-print('best single model:\n', autoClassifier.get_leaderboard().get_best_model(0))
+    print("best single model:\n", autoClassifier.get_leaderboard().get_best_model(0))
 
-# test
-predict_result = autoClassifier.predict_proba()
-print(Acc.evaluate(predict_result, dataset.data.y[dataset.test_index].cpu().detach().numpy()))
\ No newline at end of file
+    # test
+    predict_result = autoClassifier.predict_proba()
+    print(
+        "test acc %.4f"
+        % (
+            Acc.evaluate(
+                predict_result,
+                dataset.data.y[dataset.test_index].cpu().detach().numpy(),
+            )
+        )
+    )
diff --git a/examples/graph_cv.py b/examples/graph_cv.py
new file mode 100644
index 0000000..49e409a
--- /dev/null
+++ b/examples/graph_cv.py
@@ -0,0 +1,96 @@
+"""
+Auto graph classification using cross validation methods proposed in
+paper `A Fair Comparison of Graph Neural Networks for Graph Classification`
+"""
+
+import sys
+import random
+import torch
+import numpy as np
+from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
+
+sys.path.append("../")
+
+from autogl.datasets import build_dataset_from_name, utils
+from autogl.solver import AutoGraphClassifier
+from autogl.module import Acc
+
+if __name__ == "__main__":
+    parser = ArgumentParser(
+        "auto graph classification", formatter_class=ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "--dataset",
+        default="mutag",
+        type=str,
+        help="graph classification dataset",
+        choices=["mutag", "imdb-b", "imdb-m", "proteins", "collab"],
+    )
+    parser.add_argument(
+        "--configs", default="../configs/graph_classification.yaml", help="config files"
+    )
+    parser.add_argument("--device", type=int, default=0, help="device to run on")
+    parser.add_argument("--seed", type=int, default=0, help="random seed")
+    parser.add_argument("--folds", type=int, default=10, help="fold number")
+
+    args = parser.parse_args()
+    if torch.cuda.is_available():
+        torch.cuda.set_device(args.device)
+    seed = args.seed
+    # set random seed
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+
+    print("begin processing dataset", args.dataset, "into", args.folds, "folds.")
+    dataset = build_dataset_from_name(args.dataset)
+    if args.dataset.startswith("imdb"):
+        from autogl.module.feature.generators import PYGOneHotDegree
+
+        # get max degree
+        from torch_geometric.utils import degree
+
+        max_degree = 0
+        for data in dataset:
+            deg_max = int(degree(data.edge_index[0], data.num_nodes).max().item())
+            max_degree = max(max_degree, deg_max)
+        dataset = PYGOneHotDegree(max_degree).fit_transform(dataset, inplace=False)
+    elif args.dataset == "collab":
+        from autogl.module.feature.auto_feature import Onlyconst
+
+        dataset = Onlyconst().fit_transform(dataset, inplace=False)
+    utils.graph_cross_validation(dataset, args.folds, random_seed=args.seed)
+
+    accs = []
+    for fold in range(args.folds):
+        print("evaluating on fold number:", fold)
+        utils.graph_set_fold_id(dataset, fold)
+        train_dataset = utils.graph_get_split(dataset, "train", False)
+        autoClassifier = AutoGraphClassifier.from_config(args.configs)
+
+        autoClassifier.fit(
+            train_dataset,
+            train_split=0.9,
+            val_split=0.1,
+            seed=args.seed,
+            evaluation_method=[Acc],
+        )
+        predict_result = autoClassifier.predict_proba(dataset, mask="val")
+        acc = Acc.evaluate(
+            predict_result, dataset.data.y[dataset.val_index].cpu().detach().numpy()
+        )
+        print(
+            "test acc %.4f"
+            % (
+                Acc.evaluate(
+                    predict_result,
+                    dataset.data.y[dataset.val_index].cpu().detach().numpy(),
+                )
+            )
+        )
+        accs.append(acc)
+    print("Average acc on", args.dataset, ":", np.mean(accs), "~", np.std(accs))
diff --git a/examples/node_classification.py b/examples/node_classification.py
index 939b555..f60950b 100644
--- a/examples/node_classification.py
+++ b/examples/node_classification.py
@@ -1,5 +1,6 @@
 import sys
-sys.path.append('../')
+
+sys.path.append("../")
 from autogl.datasets import build_dataset_from_name
 from autogl.solver import AutoNodeClassifier
 from autogl.module import Acc
@@ -8,20 +9,42 @@ import random
 import torch
 import numpy as np
 
-import logging
-logging.basicConfig(level=logging.INFO)
+if __name__ == "__main__":
 
-if __name__ == '__main__':
+    from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
 
-    from argparse import ArgumentParser
-    parser = ArgumentParser()
-    parser.add_argument('--dataset', default='cora', type=str)
-    parser.add_argument('--configs', type=str, default='../configs/nodeclf_gcn_benchmark_small.yml')
+    parser = ArgumentParser(
+        "auto node classification", formatter_class=ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "--dataset",
+        default="cora",
+        type=str,
+        help="dataset to use",
+        choices=[
+            "cora",
+            "pubmed",
+            "citeseer",
+            "coauthor_cs",
+            "coauthor_physics",
+            "amazon_computers",
+            "amazon_photo",
+        ],
+    )
+    parser.add_argument(
+        "--configs",
+        type=str,
+        default="../configs/nodeclf_gcn_benchmark_small.yml",
+        help="config to use",
+    )
     # following arguments will override parameters in the config file
-    parser.add_argument('--hpo', type=str, default='random')
-    parser.add_argument('--max_eval', type=int, default=5)
-    parser.add_argument('--seed', type=int, default=0)
-    parser.add_argument('--device', default=0, type=int)
+    parser.add_argument("--hpo", type=str, default="tpe", help="hpo methods")
+    parser.add_argument(
+        "--max_eval", type=int, default=50, help="max hpo evaluation times"
+    )
+    parser.add_argument("--seed", type=int, default=0, help="random seed")
+    parser.add_argument("--device", default=0, type=int, help="GPU device")
+
     args = parser.parse_args()
     if torch.cuda.is_available():
         torch.cuda.set_device(args.device)
@@ -36,23 +59,30 @@ if __name__ == '__main__':
         torch.backends.cudnn.benchmark = False
 
     dataset = build_dataset_from_name(args.dataset)
-    
-    configs = yaml.load(open(args.configs, 'r').read(), Loader=yaml.FullLoader)
-    configs['hpo']['name'] = args.hpo
-    configs['hpo']['max_evals'] = args.max_eval
+
+    configs = yaml.load(open(args.configs, "r").read(), Loader=yaml.FullLoader)
+    configs["hpo"]["name"] = args.hpo
+    configs["hpo"]["max_evals"] = args.max_eval
     autoClassifier = AutoNodeClassifier.from_config(configs)
 
     # train
-    if args.dataset in ['cora', 'citeseer', 'pubmed']:
+    if args.dataset in ["cora", "citeseer", "pubmed"]:
         autoClassifier.fit(dataset, time_limit=3600, evaluation_method=[Acc])
     else:
-        autoClassifier.fit(dataset, time_limit=3600, evaluation_method=[Acc], seed=seed, train_split=20*dataset.num_classes, val_split=30*dataset.num_classes, balanced=False)
-    val = autoClassifier.get_model_by_performance(0)[0].get_valid_score()[0]
-    print('val acc: ', val)
+        autoClassifier.fit(
+            dataset,
+            time_limit=3600,
+            evaluation_method=[Acc],
+            seed=seed,
+            train_split=20 * dataset.num_classes,
+            val_split=30 * dataset.num_classes,
+            balanced=False,
+        )
+    autoClassifier.get_leaderboard().show()
 
     # test
-    predict_result = autoClassifier.predict_proba(use_best=True, use_ensemble=False)
-    print('test acc: ', Acc.evaluate(predict_result, dataset.data.y[dataset.data.test_mask].numpy()))
-
-
-
+    predict_result = autoClassifier.predict_proba()
+    print(
+        "test acc: %.4f"
+        % (Acc.evaluate(predict_result, dataset.data.y[dataset.data.test_mask].numpy()))
+    )

From ba81889c90945941a9927c80d2d903fabc248254 Mon Sep 17 00:00:00 2001
From: null <null>
Date: Mon, 5 Apr 2021 17:16:00 +0800
Subject: [PATCH 033/144] Refactor module.train and fix a bug in example for
 graph cls

---
 autogl/module/train/__init__.py               |  44 +--
 autogl/module/train/base.py                   | 261 +++++++++---------
 autogl/module/train/evaluate.py               |  94 -------
 autogl/module/train/evaluation.py             | 159 +++++++++++
 .../module/train/graph_classification_full.py |  20 +-
 .../module/train/node_classification_full.py  |  18 +-
 .../node_classification_sampled_trainer.py    |  88 ++----
 examples/graph_classification.py              |   2 +-
 8 files changed, 318 insertions(+), 368 deletions(-)
 delete mode 100644 autogl/module/train/evaluate.py
 create mode 100644 autogl/module/train/evaluation.py

diff --git a/autogl/module/train/__init__.py b/autogl/module/train/__init__.py
index 68b5499..b10a057 100644
--- a/autogl/module/train/__init__.py
+++ b/autogl/module/train/__init__.py
@@ -1,14 +1,11 @@
-import importlib
-import os
-
 TRAINER_DICT = {}
-EVALUATE_DICT = {}
 from .base import (
     BaseTrainer,
     Evaluation,
     BaseNodeClassificationTrainer,
     BaseGraphClassificationTrainer,
 )
+from .evaluation import get_feval
 
 
 def register_trainer(name):
@@ -25,44 +22,7 @@ def register_trainer(name):
     return register_trainer_cls
 
 
-def register_evaluate(*name):
-    def register_evaluate_cls(cls):
-        for n in name:
-            if n in EVALUATE_DICT:
-                raise ValueError("Cannot register duplicate evaluator ({})".format(n))
-            if not issubclass(cls, Evaluation):
-                raise ValueError(
-                    "Evaluator ({}: {}) must extend Evaluation".format(n, cls.__name__)
-                )
-            EVALUATE_DICT[n] = cls
-        return cls
-
-    return register_evaluate_cls
-
-
-def get_feval(feval):
-    if isinstance(feval, str):
-        return EVALUATE_DICT[feval]
-    if isinstance(feval, type) and issubclass(feval, Evaluation):
-        return feval
-    if isinstance(feval, list):
-        return [get_feval(f) for f in feval]
-    raise ValueError("feval argument of type", type(feval), "is not supported!")
-
-
 from .graph_classification_full import GraphClassificationFullTrainer
 from .node_classification_full import NodeClassificationFullTrainer
 from .node_classification_trainer import *
-from .evaluate import Acc, Auc, Logloss
-
-__all__ = [
-    "BaseTrainer",
-    "BaseNodeClassificationTrainer",
-    "BaseGraphClassificationTrainer",
-    "GraphClassificationFullTrainer",
-    "NodeClassificationFullTrainer",
-    "Evaluation",
-    "Acc",
-    "Auc",
-    "Logloss",
-]
+from .evaluation import get_feval, Acc, Auc, Logloss
diff --git a/autogl/module/train/base.py b/autogl/module/train/base.py
index b0ad872..e022631 100644
--- a/autogl/module/train/base.py
+++ b/autogl/module/train/base.py
@@ -1,25 +1,15 @@
 import numpy as np
-from typing import Union, Iterable
+import typing as _typing
 
 import torch
-from ..model import BaseModel, MODEL_DICT
 import pickle
+from ..model import BaseModel, ModelUniversalRegistry
+from .evaluation import Evaluation, get_feval, Acc
 from ...utils import get_logger
-from . import EVALUATE_DICT
 
 LOGGER_ES = get_logger("early-stopping")
 
 
-def get_feval(feval):
-    if isinstance(feval, str):
-        return EVALUATE_DICT[feval]
-    if isinstance(feval, type) and issubclass(feval, Evaluation):
-        return feval
-    if isinstance(feval, list):
-        return [get_feval(f) for f in feval]
-    raise ValueError("feval argument of type", type(feval), "is not supported!")
-
-
 class EarlyStopping:
     """Early stops the training if validation loss doesn't improve after a given patience."""
 
@@ -93,12 +83,15 @@ class EarlyStopping:
 
 class BaseTrainer:
     def __init__(
-        self,
-        model: BaseModel,
-        device: Union[torch.device, str],
-        init=True,
-        feval=["acc"],
-        loss="nll_loss",
+            self,
+            model: BaseModel,
+            device: _typing.Union[torch.device, str],
+            init: bool = True,
+            feval: _typing.Union[
+                _typing.Sequence[str],
+                _typing.Sequence[_typing.Type[Evaluation]]
+            ] = (Acc,),
+            loss: str = "nll_loss",
     ):
         """
         The basic trainer.
@@ -114,15 +107,51 @@ class BaseTrainer:
             If True(False), the model will (not) be initialized.
         """
         super().__init__()
-        self.model = model
-        self.to(device)
-        self.init = init
-        self.feval = get_feval(feval)
-        self.loss = loss
-
-    def to(self, device):
+        self.model: BaseModel = model
+        if (
+                type(device) == torch.device or
+                (type(device) == str and device.lower() != "auto")
+        ):
+            self.__device: torch.device = torch.device(device)
+        else:
+            self.__device: torch.device = torch.device(
+                "cuda" if torch.cuda.is_available() and torch.cuda.device_count() > 0 else "cpu"
+            )
+        self.init: bool = init
+        self.__feval: _typing.Sequence[_typing.Type[Evaluation]] = get_feval(feval)
+        self.loss: str = loss
+    
+    @property
+    def device(self) -> torch.device:
+        return self.__device
+    
+    @device.setter
+    def device(self, __device: _typing.Union[torch.device, str]):
+        if (
+                type(__device) == torch.device or
+                (type(__device) == str and __device.lower() != "auto")
+        ):
+            self.__device: torch.device = torch.device(__device)
+        else:
+            self.__device: torch.device = torch.device(
+                "cuda" if torch.cuda.is_available() and torch.cuda.device_count() > 0 else "cpu"
+            )
+    
+    @property
+    def feval(self) -> _typing.Sequence[_typing.Type[Evaluation]]:
+        return self.__feval
+    
+    @feval.setter
+    def feval(
+            self, _feval: _typing.Union[
+                _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+            ]
+    ):
+        self.__feval: _typing.Sequence[_typing.Type[Evaluation]] = get_feval(_feval)
+    
+    def to(self, device: torch.device):
         """
-        Migrate trainer to new device
+        Transfer the trainer to another device
 
         Parameters
         ----------
@@ -139,9 +168,9 @@ class BaseTrainer:
         """Get auto model used in trainer."""
         raise NotImplementedError()
 
-    def get_feval(
-        self, return_major: bool = False
-    ) -> Union["Evaluation", Iterable["Evaluation"]]:
+    def get_feval(self, return_major: bool = False) -> _typing.Union[
+        _typing.Type[Evaluation], _typing.Sequence[_typing.Type[Evaluation]]
+    ]:
         """
         Parameters
         ----------
@@ -155,17 +184,12 @@ class BaseTrainer:
             Otherwise, will return the ``evaluation`` element passed when constructing.
         """
         if return_major:
-            if isinstance(self.feval, list):
+            if isinstance(self.feval, _typing.Sequence):
                 return self.feval[0]
             else:
                 return self.feval
         return self.feval
 
-    @classmethod
-    def get_task_name(cls):
-        """Get task name, e.g., `base`, `NodeClassification`, `GraphClassification`, etc."""
-        return "base"
-
     @classmethod
     def save(cls, instance, path):
         with open(path, "wb") as output:
@@ -188,7 +212,7 @@ class BaseTrainer:
         pass
 
     def duplicate_from_hyper_parameter(
-        self, hp, model: Union[BaseModel, str, None] = None
+            self, hp, model: _typing.Union[BaseModel, str, None] = None
     ) -> "BaseTrainer":
         """Create a new trainer with the given hyper parameter."""
         raise NotImplementedError()
@@ -279,11 +303,7 @@ class BaseTrainer:
         -------
         The evaluation result.
         """
-        raise NotImplementedError()
-
-    def set_feval(self, feval):
-        """Set the evaluation metrics."""
-        self.feval = get_feval(feval)
+        raise NotImplementedError
 
     def update_parameters(self, **kwargs):
         """
@@ -291,7 +311,7 @@ class BaseTrainer:
         """
         for k, v in kwargs.items():
             if k == "feval":
-                self.set_feval(v)
+                self.feval = get_feval(v)
             elif k == "device":
                 self.to(v)
             elif hasattr(self, k):
@@ -300,106 +320,81 @@ class BaseTrainer:
                 raise KeyError("Cannot set parameter", k, "for trainer", self.__class__)
 
 
-# a static class for evaluating results
-class Evaluation:
-    @staticmethod
-    def get_eval_name():
-        """
-        Should return the name of this evaluation method
-        """
-        raise NotImplementedError()
-
-    @staticmethod
-    def is_higher_better():
-        """
-        Should return whether this evaluation method is higher better (bool)
-        """
-        return True
-
-    @staticmethod
-    def evaluate(predict, label):
-        """
-        Should return: the evaluation result (float)
-        """
-        raise NotImplementedError()
-
-
-class BaseNodeClassificationTrainer(BaseTrainer):
+class _BaseClassificationTrainer(BaseTrainer):
+    """ Base class of trainer for classification tasks """
+    
     def __init__(
-        self,
-        model: Union[BaseModel, str],
-        num_features,
-        num_classes,
-        device="auto",
-        init=True,
-        feval=["acc"],
-        loss="nll_loss",
+            self,
+            model: _typing.Union[BaseModel, str],
+            num_features: int,
+            num_classes: int,
+            device: _typing.Union[torch.device, str, None] = "auto",
+            init: bool = True,
+            feval: _typing.Union[
+                _typing.Sequence[str],
+                _typing.Sequence[_typing.Type[Evaluation]]
+            ] = (Acc,),
+            loss: str = "nll_loss",
     ):
-        self.num_features = num_features
-        self.num_classes = num_classes
-        device = (
-            torch.device("cuda" if torch.cuda.is_available() else "cpu")
-            if device == "auto"
-            else torch.device(device)
-        )
-        if isinstance(model, str):
-            assert model in MODEL_DICT, "Cannot parse model name " + model
-            self.model = MODEL_DICT[model](num_features, num_classes, device, init=init)
+        self.num_features: int = num_features
+        self.num_classes: int = num_classes
+        if (
+                type(device) == torch.device or
+                (type(device) == str and device.lower() != "auto")
+        ):
+            __device: torch.device = torch.device(device)
+        else:
+            __device: torch.device = torch.device(
+                "cuda" if torch.cuda.is_available() and torch.cuda.device_count() > 0 else "cpu"
+            )
+        if type(model) == str:
+            _model: BaseModel = ModelUniversalRegistry.get_model(model)(
+                num_features, num_classes, device, init=init
+            )
         elif isinstance(model, BaseModel):
-            self.model = model
+            _model: BaseModel = model
         else:
             raise TypeError(
-                "Model argument only support str or BaseModel, get",
-                type(model),
-                "instead.",
+                f"Model argument only support str or BaseModel, got ${model}."
             )
-        super().__init__(model, device=device, init=init, feval=feval, loss=loss)
-
-    @classmethod
-    def get_task_name(cls):
-        return "GraphClassification"
+        super(_BaseClassificationTrainer, self).__init__(_model, __device, init, feval, loss)
 
 
-class BaseGraphClassificationTrainer(BaseTrainer):
+class BaseNodeClassificationTrainer(_BaseClassificationTrainer):
     def __init__(
-        self,
-        model: Union[BaseModel, str],
-        num_features,
-        num_classes,
-        num_graph_features=0,
-        device=None,
-        init=True,
-        feval=["acc"],
-        loss="nll_loss",
+            self,
+            model: _typing.Union[BaseModel, str],
+            num_features: int,
+            num_classes: int,
+            device: _typing.Union[torch.device, str, None] = None,
+            init: bool = True,
+            feval: _typing.Union[
+                _typing.Sequence[str],
+                _typing.Sequence[_typing.Type[Evaluation]]
+            ] = (Acc,),
+            loss: str = "nll_loss",
     ):
-        self.num_features = num_features
-        self.num_classes = num_classes
-        self.num_graph_features = num_graph_features
-        device = (
-            torch.device("cuda" if torch.cuda.is_available() else "cpu")
-            if device == "auto"
-            else torch.device(device)
+        super(BaseNodeClassificationTrainer, self).__init__(
+            model, num_features, num_classes, device, init, feval, loss
         )
-        if isinstance(model, str):
-            assert model in MODEL_DICT, "Cannot parse model name " + model
-            self.model = MODEL_DICT[model](
-                num_features,
-                num_classes,
-                device,
-                init=init,
-                num_graph_features=num_graph_features,
-            )
-        elif isinstance(model, BaseModel):
-            self.model = model
-        else:
-            raise TypeError(
-                "Model argument only support str or BaseModel, get",
-                type(model),
-                "instead.",
-            )
 
-        super().__init__(model, device=device, init=init, feval=feval, loss=loss)
 
-    @classmethod
-    def get_task_name(cls):
-        return "NodeClassification"
+class BaseGraphClassificationTrainer(_BaseClassificationTrainer):
+    def __init__(
+            self,
+            model: _typing.Union[BaseModel, str],
+            num_features: int,
+            num_classes: int,
+            num_graph_features: int = 0,
+            device: _typing.Union[torch.device, str, None] = None,
+            init: bool = True,
+            feval: _typing.Union[
+                _typing.Sequence[str],
+                _typing.Sequence[_typing.Type[Evaluation]]
+            ] = (Acc,),
+            loss: str = "nll_loss",
+    ):
+        self.num_graph_features: int = num_graph_features
+        super(BaseGraphClassificationTrainer, self).__init__(
+            model, num_features, num_classes, device, init, feval, loss
+        )
diff --git a/autogl/module/train/evaluate.py b/autogl/module/train/evaluate.py
deleted file mode 100644
index 290989e..0000000
--- a/autogl/module/train/evaluate.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import numpy as np
-from . import register_evaluate, Evaluation
-from sklearn.metrics import (
-    log_loss,
-    accuracy_score,
-    roc_auc_score,
-    label_ranking_average_precision_score,
-)
-
-
-@register_evaluate("logloss")
-class Logloss(Evaluation):
-    @staticmethod
-    def get_eval_name():
-        return "logloss"
-
-    @staticmethod
-    def is_higher_better():
-        """
-        Should return whether this evaluation method is higher better (bool)
-        """
-        return False
-
-    @staticmethod
-    def evaluate(predict, label):
-        """
-        Should return: the evaluation result (float)
-        """
-        return log_loss(label, predict)
-
-
-@register_evaluate("auc", "ROC-AUC")
-class Auc(Evaluation):
-    @staticmethod
-    def get_eval_name():
-        return "auc"
-
-    @staticmethod
-    def is_higher_better():
-        """
-        Should return whether this evaluation method is higher better (bool)
-        """
-        return True
-
-    @staticmethod
-    def evaluate(predict, label):
-        """
-        Should return: the evaluation result (float)
-        """
-        pos_predict = predict[:, 1]
-        return roc_auc_score(label, pos_predict)
-
-
-@register_evaluate("acc", "Accuracy")
-class Acc(Evaluation):
-    @staticmethod
-    def get_eval_name():
-        return "acc"
-
-    @staticmethod
-    def is_higher_better():
-        """
-        Should return whether this evaluation method is higher better (bool)
-        """
-        return True
-
-    @staticmethod
-    def evaluate(predict, label):
-        """
-        Should return: the evaluation result (float)
-        """
-        return accuracy_score(label, np.argmax(predict, axis=1))
-
-
-@register_evaluate("mrr")
-class Mrr(Evaluation):
-    @staticmethod
-    def get_eval_name():
-        return "mrr"
-
-    @staticmethod
-    def is_higher_better():
-        """
-        Should return whether this evaluation method is higher better (bool)
-        """
-        return True
-
-    @staticmethod
-    def evaluate(predict, label):
-        """
-        Should return: the evaluation result (float)
-        """
-        pos_predict = predict[:, 1]
-        return label_ranking_average_precision_score(label, pos_predict)
diff --git a/autogl/module/train/evaluation.py b/autogl/module/train/evaluation.py
new file mode 100644
index 0000000..63fd81a
--- /dev/null
+++ b/autogl/module/train/evaluation.py
@@ -0,0 +1,159 @@
+import numpy as np
+import typing as _typing
+from sklearn.metrics import (
+    log_loss,
+    accuracy_score,
+    roc_auc_score,
+    label_ranking_average_precision_score,
+)
+
+
+class Evaluation:
+    @staticmethod
+    def get_eval_name() -> str:
+        """ Expected to return the name of this evaluation method """
+        raise NotImplementedError
+    
+    @staticmethod
+    def is_higher_better() -> bool:
+        """ Expected to return whether this evaluation method is higher better (bool) """
+        return True
+    
+    @staticmethod
+    def evaluate(predict, label) -> float:
+        """ Expected to return the evaluation result (float) """
+        raise NotImplementedError
+
+
+EVALUATE_DICT: _typing.Dict[str, _typing.Type[Evaluation]] = {}
+
+
+def register_evaluate(*name):
+    def register_evaluate_cls(cls):
+        for n in name:
+            if n in EVALUATE_DICT:
+                raise ValueError("Cannot register duplicate evaluator ({})".format(n))
+            if not issubclass(cls, Evaluation):
+                raise ValueError(
+                    "Evaluator ({}: {}) must extend Evaluation".format(n, cls.__name__)
+                )
+            EVALUATE_DICT[n] = cls
+        return cls
+    return register_evaluate_cls
+
+
+def get_feval(feval):
+    if isinstance(feval, str):
+        return EVALUATE_DICT[feval]
+    if isinstance(feval, type) and issubclass(feval, Evaluation):
+        return feval
+    if isinstance(feval, _typing.Sequence):
+        return [get_feval(f) for f in feval]
+    raise ValueError("feval argument of type", type(feval), "is not supported!")
+
+
+class EvaluationUniversalRegistry:
+    @classmethod
+    def register_evaluation(cls, *names) -> _typing.Callable[
+        [_typing.Type[Evaluation]], _typing.Type[Evaluation]
+    ]:
+        def _register_evaluation(
+                _class: _typing.Type[Evaluation]
+        ) -> _typing.Type[Evaluation]:
+            for n in names:
+                if n in EVALUATE_DICT:
+                    raise ValueError("Cannot register duplicate evaluator ({})".format(n))
+                if not issubclass(_class, Evaluation):
+                    raise ValueError(
+                        "Evaluator ({}: {}) must extend Evaluation".format(n, cls.__name__)
+                    )
+                EVALUATE_DICT[n] = _class
+            return _class
+        
+        return _register_evaluation
+
+
+@register_evaluate("logloss")
+class Logloss(Evaluation):
+    @staticmethod
+    def get_eval_name():
+        return "logloss"
+
+    @staticmethod
+    def is_higher_better():
+        """
+        Should return whether this evaluation method is higher better (bool)
+        """
+        return False
+
+    @staticmethod
+    def evaluate(predict, label):
+        """
+        Should return: the evaluation result (float)
+        """
+        return log_loss(label, predict)
+
+
+@register_evaluate("auc", "ROC-AUC")
+class Auc(Evaluation):
+    @staticmethod
+    def get_eval_name():
+        return "auc"
+
+    @staticmethod
+    def is_higher_better():
+        """
+        Should return whether this evaluation method is higher better (bool)
+        """
+        return True
+
+    @staticmethod
+    def evaluate(predict, label):
+        """
+        Should return: the evaluation result (float)
+        """
+        pos_predict = predict[:, 1]
+        return roc_auc_score(label, pos_predict)
+
+
+@register_evaluate("acc", "Accuracy")
+class Acc(Evaluation):
+    @staticmethod
+    def get_eval_name():
+        return "acc"
+
+    @staticmethod
+    def is_higher_better():
+        """
+        Should return whether this evaluation method is higher better (bool)
+        """
+        return True
+
+    @staticmethod
+    def evaluate(predict, label):
+        """
+        Should return: the evaluation result (float)
+        """
+        return accuracy_score(label, np.argmax(predict, axis=1))
+
+
+@register_evaluate("mrr")
+class Mrr(Evaluation):
+    @staticmethod
+    def get_eval_name():
+        return "mrr"
+
+    @staticmethod
+    def is_higher_better():
+        """
+        Should return whether this evaluation method is higher better (bool)
+        """
+        return True
+
+    @staticmethod
+    def evaluate(predict, label):
+        """
+        Should return: the evaluation result (float)
+        """
+        pos_predict = predict[:, 1]
+        return label_ranking_average_precision_score(label, pos_predict)
diff --git a/autogl/module/train/graph_classification_full.py b/autogl/module/train/graph_classification_full.py
index cef536d..eed5feb 100644
--- a/autogl/module/train/graph_classification_full.py
+++ b/autogl/module/train/graph_classification_full.py
@@ -1,4 +1,4 @@
-from . import register_trainer, EVALUATE_DICT
+from . import register_trainer
 from .base import BaseGraphClassificationTrainer, EarlyStopping, Evaluation
 import torch
 from torch.optim.lr_scheduler import (
@@ -8,8 +8,8 @@ from torch.optim.lr_scheduler import (
     ReduceLROnPlateau,
 )
 import torch.nn.functional as F
-from ..model import MODEL_DICT, BaseModel
-from .evaluate import Logloss
+from ..model import BaseModel
+from .evaluation import get_feval, Logloss
 from typing import Union
 from ...datasets import utils
 from copy import deepcopy
@@ -20,16 +20,6 @@ from ...utils import get_logger
 LOGGER = get_logger("graph classification solver")
 
 
-def get_feval(feval):
-    if isinstance(feval, str):
-        return EVALUATE_DICT[feval]
-    if isinstance(feval, type) and issubclass(feval, Evaluation):
-        return feval
-    if isinstance(feval, list):
-        return [get_feval(f) for f in feval]
-    raise ValueError("feval argument of type", type(feval), "is not supported!")
-
-
 @register_trainer("GraphClassificationFull")
 class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
     """
@@ -570,10 +560,6 @@ class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
 
         return ret
 
-    def set_feval(self, feval):
-        # """Get the space of hyperparameter."""
-        self.feval = get_feval(feval)
-
     @property
     def hyper_parameter_space(self):
         # """Set the space of hyperparameter."""
diff --git a/autogl/module/train/node_classification_full.py b/autogl/module/train/node_classification_full.py
index 361a391..73a7e06 100644
--- a/autogl/module/train/node_classification_full.py
+++ b/autogl/module/train/node_classification_full.py
@@ -2,7 +2,7 @@
 Node classification Full Trainer Implementation
 """
 
-from . import register_trainer, EVALUATE_DICT
+from . import register_trainer
 
 from .base import BaseNodeClassificationTrainer, EarlyStopping, Evaluation
 import torch
@@ -14,7 +14,7 @@ from torch.optim.lr_scheduler import (
 )
 import torch.nn.functional as F
 from ..model import MODEL_DICT, BaseModel
-from .evaluate import Logloss, Acc, Auc
+from .evaluation import get_feval, Logloss
 from typing import Union
 from copy import deepcopy
 
@@ -23,16 +23,6 @@ from ...utils import get_logger
 LOGGER = get_logger("node classification trainer")
 
 
-def get_feval(feval):
-    if isinstance(feval, str):
-        return EVALUATE_DICT[feval]
-    if isinstance(feval, type) and issubclass(feval, Evaluation):
-        return feval
-    if isinstance(feval, list):
-        return [get_feval(f) for f in feval]
-    raise ValueError("feval argument of type", type(feval), "is not supported!")
-
-
 @register_trainer("NodeClassificationFull")
 class NodeClassificationFullTrainer(BaseNodeClassificationTrainer):
     """
@@ -523,10 +513,6 @@ class NodeClassificationFullTrainer(BaseNodeClassificationTrainer):
 
         return ret
 
-    def set_feval(self, feval):
-        # """Set the evaluation metrics."""
-        self.feval = get_feval(feval)
-
     @property
     def hyper_parameter_space(self):
         # """Get the space of hyperparameter."""
diff --git a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
index e2d5e97..0c3e671 100644
--- a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
+++ b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
@@ -3,25 +3,15 @@ import logging
 import typing as _typing
 from torch.nn import functional as F
 
-from .. import EVALUATE_DICT, register_trainer
+from .. import register_trainer
 from ..base import BaseNodeClassificationTrainer, EarlyStopping, Evaluation
-from ..evaluate import Logloss
+from ..evaluation import get_feval, Logloss
 from ..sampling.sampler.neighbor_sampler import NeighborSampler
-from ...model import BaseModel, ModelUniversalRegistry
+from ...model import BaseModel
 
 LOGGER: logging.Logger = logging.getLogger("Node classification sampling trainer")
 
 
-def get_feval(feval):
-    if isinstance(feval, str):
-        return EVALUATE_DICT[feval]
-    if isinstance(feval, type) and issubclass(feval, Evaluation):
-        return feval
-    if isinstance(feval, list):
-        return [get_feval(f) for f in feval]
-    raise ValueError("feval argument of type", type(feval), "is not supported!")
-
-
 @register_trainer("NodeClassificationNeighborSampling")
 class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
     """
@@ -52,20 +42,6 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             lr_scheduler_type: _typing.Optional[str] = None,
             **kwargs
     ) -> None:
-        
-        self._functional_loss_name: str = loss
-        if device is None:
-            device: torch.device = torch.device("cpu")
-        
-        if type(model) == str:
-            self._model: BaseModel = ModelUniversalRegistry.get_model(model)(
-                num_features, num_classes, device, init=init
-            )
-        elif isinstance(model, BaseModel):
-            self._model: BaseModel = model
-        else:
-            raise TypeError
-        
         if isinstance(optimizer, type) and issubclass(optimizer, torch.optim.Optimizer):
             self._optimizer_class: _typing.Type[torch.optim.Optimizer] = optimizer
         elif type(optimizer) == str:
@@ -80,24 +56,17 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         else:
             self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.Adam
         
-        self._num_features: int = num_features
-        self._num_classes: int = num_classes
         self._learning_rate: float = lr if lr > 0 else 1e-4
         self._lr_scheduler_type: _typing.Optional[str] = lr_scheduler_type
         self._max_epoch: int = max_epoch if max_epoch > 0 else 1e2
-        self._device: torch.device = device
         
         self.__sampling_sizes: _typing.Sequence[int] = kwargs.get("sampling_sizes")
         
-        self._feval: _typing.Sequence[_typing.Type[Evaluation]] = get_feval(list(feval))
         self._weight_decay: float = weight_decay if weight_decay > 0 else 1e-4
         early_stopping_round: int = early_stopping_round if early_stopping_round > 0 else 1e2
         self._early_stopping = EarlyStopping(patience=early_stopping_round, verbose=False)
-
         super(NodeClassificationNeighborSamplingTrainer, self).__init__(
-            model, num_features, num_classes,
-            device=device if device is not None else "auto",
-            init=init, loss=loss
+            model, num_features, num_classes, device, init, feval, loss
         )
         
         self._valid_result: torch.Tensor = torch.zeros(0)
@@ -164,7 +133,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         :param data: data of a specific graph
         :return: self
         """
-        data = data.to(self._device)
+        data = data.to(self.device)
         optimizer: torch.optim.Optimizer = self._optimizer_class(
             self._model.parameters(),
             lr=self._learning_rate, weight_decay=self._weight_decay
@@ -206,11 +175,11 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
                 optimizer.zero_grad()
                 data.edge_indexes = edge_indexes
                 prediction = self._model.model(data)
-                if not hasattr(F, self._functional_loss_name):
+                if not hasattr(F, self.loss):
                     raise TypeError(
-                        "PyTorch does not support loss type {}".format(self._functional_loss_name)
+                        "PyTorch does not support loss type {}".format(self.loss)
                     )
-                loss_function = getattr(F, self._functional_loss_name)
+                loss_function = getattr(F, self.loss)
                 loss: torch.Tensor = loss_function(
                     prediction[target_node_indexes],
                     data.y[target_node_indexes]
@@ -224,9 +193,9 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             """ Validate performance """
             if hasattr(data, "val_mask") and getattr(data, "val_mask") is not None:
                 validation_results: _typing.Sequence[float] = \
-                    self.evaluate((data,), "val", [self._feval[0]])
+                    self.evaluate((data,), "val", [self.feval[0]])
                 
-                if self._feval[0].is_higher_better():
+                if self.feval[0].is_higher_better():
                     validation_loss: float = -validation_results[0]
                 else:
                     validation_loss: float = validation_results[0]
@@ -244,7 +213,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         :param data: data of a specific graph
         :return: the result of prediction on the given dataset
         """
-        data = data.to(self._device)
+        data = data.to(self.device)
         self._model.model.eval()
         with torch.no_grad():
             prediction = self._model.model(data)
@@ -275,7 +244,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         :param in_log_format:
         :return:
         """
-        data = dataset[0].to(self._device)
+        data = dataset[0].to(self.device)
         if mask is not None and type(mask) == str:
             if mask.lower() == "train":
                 _mask = data.train_mask
@@ -305,12 +274,11 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         if return_major:
             return (
                 self._valid_score[0],
-                self._feval[0].is_higher_better()
+                self.feval[0].is_higher_better()
             )
         else:
             return (
-                self._valid_score,
-                [f.is_higher_better() for f in self._feval]
+                self._valid_score, [f.is_higher_better() for f in self.feval]
             )
         
     def get_name_with_hp(self) -> str:
@@ -321,8 +289,8 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
                 str(self._learning_rate),
                 str(self._max_epoch),
                 str(self._early_stopping.patience),
-                str(self._model),
-                str(self._device),
+                str(self.model),
+                str(self.device),
             ]
         )
         name = (
@@ -347,9 +315,9 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             ] = None
     ) -> _typing.Sequence[float]:
         data = dataset[0]
-        data = data.to(self._device)
+        data = data.to(self.device)
         if feval is None:
-            _feval: _typing.Sequence[_typing.Type[Evaluation]] = self._feval
+            _feval: _typing.Sequence[_typing.Type[Evaluation]] = self.feval
         else:
             _feval: _typing.Sequence[_typing.Type[Evaluation]] = get_feval(list(feval))
         if mask.lower() == "train":
@@ -376,9 +344,9 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         return results
     
     def to(self, device: torch.device):
-        self._device = device
+        self.device = device
         if self._model is not None:
-            self._model.to(device)
+            self._model.to(self.device)
     
     def duplicate_from_hyper_parameter(
             self, hp: _typing.Dict[str, _typing.Any],
@@ -397,24 +365,14 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         )
         
         return NodeClassificationNeighborSamplingTrainer(
-            model, self._num_features, self._num_classes,
+            model, self.num_features, self.num_classes,
             self._optimizer_class,
-            device=self._device,
-            init=True,
-            feval=self._feval,
-            loss=self._functional_loss_name,
+            device=self.device, init=True,
+            feval=self.feval, loss=self.loss,
             lr_scheduler_type=self._lr_scheduler_type,
             **hp
         )
     
-    def set_feval(
-            self, feval: _typing.Union[
-                _typing.Sequence[str],
-                _typing.Sequence[_typing.Type[Evaluation]]
-            ]
-    ):
-        self._feval = get_feval(list(feval))
-    
     @property
     def hyper_parameter_space(self):
         return self._hyper_parameter_space
diff --git a/examples/graph_classification.py b/examples/graph_classification.py
index 9e40ee6..916bb3a 100644
--- a/examples/graph_classification.py
+++ b/examples/graph_classification.py
@@ -28,7 +28,7 @@ if __name__ == "__main__":
         choices=["mutag", "imdb-b", "imdb-m", "proteins", "collab"],
     )
     parser.add_argument(
-        "--configs", default="../configs/graph_classification.yaml", help="config files"
+        "--configs", default="../configs/graphclf_full.yml", help="config files"
     )
     parser.add_argument("--device", type=int, default=0, help="device to run on")
     parser.add_argument("--seed", type=int, default=0, help="random seed")

From ecba5475b763f5291a05c4e5134a59f60471da8b Mon Sep 17 00:00:00 2001
From: cluster32 <general502570@outlook.com>
Date: Tue, 6 Apr 2021 23:25:52 +0800
Subject: [PATCH 034/144] regularize hp in nas

---
 autogl/module/hpo/nas.py  | 15 ++++++++-------
 autogl/module/hpo/test.py | 11 ++++++++---
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/autogl/module/hpo/nas.py b/autogl/module/hpo/nas.py
index 8091d06..ac3e8b9 100644
--- a/autogl/module/hpo/nas.py
+++ b/autogl/module/hpo/nas.py
@@ -22,21 +22,22 @@ class BaseNAS:
         """
 
 class GraphSpace(nn.Module):
-    def __init__(self, inp, hid, oup):
+    def __init__(self, input_dim, hidden_dim, output_dim, ops, *arg, **kwargs):
         super().__init__()
-        self.gcn = GCNConv(inp, hid)
-        self.op1 = mutables.LayerChoice([GCNConv(inp, hid),SAGEConv(inp, hid)])
+        """self.op1 = mutables.LayerChoice([GCNConv(input_dim, hidden_dim),SAGEConv(input_dim, hidden_dim)])
         self.op2 = mutables.LayerChoice([
-            GCNConv(hid, oup),
-            SAGEConv(hid, oup)       
-        ], key = "2")
+            GCNConv(hidden_dim, output_dim),
+            SAGEConv(hidden_dim, output_dim)       
+        ], key = "2")"""
+        self.op1 = mutables.LayerChoice([op(input_dim, hidden_dim) for op in ops])
+        self.op2 = mutables.LayerChoice([op(hidden_dim, output_dim) for op in ops])
 
     def forward(self, data):
         x = self.op1(data.x, data.edge_index)
         x = self.op2(x, data.edge_index)
         return x
         
-class BaseTrainer:
+class BaseEstimator:
     def infer(self, model, dataset):
         dset = dataset[0]
         pred = model(dset)[dset.train_mask]
diff --git a/autogl/module/hpo/test.py b/autogl/module/hpo/test.py
index fdb0a33..d1ddfd9 100644
--- a/autogl/module/hpo/test.py
+++ b/autogl/module/hpo/test.py
@@ -1,7 +1,8 @@
 import hyperopt
 
+from torch_geometric.nn import GCNConv, SAGEConv
 from . import register_hpo
-from .nas import BaseTrainer, GraphSpace
+from .nas import BaseEstimator, GraphSpace
 from .darts import DartsTrainer
 from .base import BaseHPOptimizer, TimeTooLimitedError
 
@@ -19,8 +20,12 @@ class TestHPO(BaseHPOptimizer):
     def optimize(self, trainer, dataset, time_limit=None, memory_limit=None):
         num_features=dataset[0].x.shape[1]
         num_classes=dataset.num_classes
-        model = GraphSpace(num_features, 64, num_classes)
-        tr = BaseTrainer()
+
+        op1 = lambda x,y: GCNConv(x,y)
+        op2 = lambda x,y: SAGEConv(x,y)
+        ops = [op1,op2]
+        model = GraphSpace(num_features, 64, num_classes, ops)
+        tr = BaseEstimator()
         nas = DartsTrainer()
         a = nas.search(model, dataset, tr)
         print(a)

From c43e5a056f02303d25a8f237d54899c58fec8950 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Wed, 7 Apr 2021 08:30:41 +0000
Subject: [PATCH 035/144] adjust trainer

---
 autogl/module/hpo/darts.py                  | 38 +++++++----
 autogl/module/hpo/nas.py                    |  7 +-
 autogl/module/hpo/test.py                   | 13 ++--
 autogl/module/hpo/utils.py                  | 14 ++--
 autogl/solver/base.py                       | 74 +++++++++++++++++++++
 autogl/solver/classifier/node_classifier.py | 15 ++++-
 6 files changed, 135 insertions(+), 26 deletions(-)

diff --git a/autogl/module/hpo/darts.py b/autogl/module/hpo/darts.py
index 6ac4b86..8aee141 100644
--- a/autogl/module/hpo/darts.py
+++ b/autogl/module/hpo/darts.py
@@ -22,7 +22,9 @@ class DartsLayerChoice(nn.Module):
         self.alpha = nn.Parameter(torch.randn(len(self.op_choices)) * 1e-3)
 
     def forward(self, *args, **kwargs):
-        op_results = torch.stack([op(*args, **kwargs) for op in self.op_choices.values()])
+        op_results = torch.stack(
+            [op(*args, **kwargs) for op in self.op_choices.values()]
+        )
         alpha_shape = [-1] + [1] * (len(op_results.size()) - 1)
         return torch.sum(op_results * F.softmax(self.alpha, -1).view(*alpha_shape), 0)
 
@@ -32,7 +34,7 @@ class DartsLayerChoice(nn.Module):
 
     def named_parameters(self):
         for name, p in super(DartsLayerChoice, self).named_parameters():
-            if name == 'alpha':
+            if name == "alpha":
                 continue
             yield name, p
 
@@ -58,12 +60,12 @@ class DartsInputChoice(nn.Module):
 
     def named_parameters(self):
         for name, p in super(DartsInputChoice, self).named_parameters():
-            if name == 'alpha':
+            if name == "alpha":
                 continue
             yield name, p
 
     def export(self):
-        return torch.argsort(-self.alpha).cpu().numpy().tolist()[:self.n_chosen]
+        return torch.argsort(-self.alpha).cpu().numpy().tolist()[: self.n_chosen]
 
 
 class DartsTrainer(BaseNAS):
@@ -107,13 +109,14 @@ class DartsTrainer(BaseNAS):
                  learning_rate=2.5E-3, batch_size=64, workers=4,
                  device=None, log_frequency=None,
                  arc_learning_rate=3.0E-4, unrolled=False):"""
+
     def __init__(self, *args, **kwargs):
         self.num_epochs = kwargs.get("num_epochs", 5)
         self.workers = 4
         self.device = "cuda"
         self.log_frequency = None
 
-        #for _, module in self.nas_modules:
+        # for _, module in self.nas_modules:
         #    module.to(self.device)
 
         # use the same architecture weight for modules with duplicated names
@@ -136,19 +139,28 @@ class DartsTrainer(BaseNAS):
         ctrl_params = {}
         for _, m in self.nas_modules:
             if m.name in ctrl_params:
-                assert m.alpha.size() == ctrl_params[m.name].size(), 'Size of parameters with the same label should be same.'
+                assert (
+                    m.alpha.size() == ctrl_params[m.name].size()
+                ), "Size of parameters with the same label should be same."
                 m.alpha = ctrl_params[m.name]
             else:
                 ctrl_params[m.name] = m.alpha
-        self.ctrl_optim = torch.optim.Adam(list(ctrl_params.values()), 3e-4, betas=(0.5, 0.999),
-                                           weight_decay=1.0E-3)
-        self.grad_clip = 5.
+        self.ctrl_optim = torch.optim.Adam(
+            list(ctrl_params.values()), 3e-4, betas=(0.5, 0.999), weight_decay=1.0e-3
+        )
+        self.grad_clip = 5.0
 
         for step in range(self.num_epochs):
             self._train_one_epoch(step)
             if self.log_frequency is not None and step % self.log_frequency == 0:
-                _logger.info('Epoch [%s/%s] Step [%s/%s]  %s', epoch + 1,
-                                self.num_epochs, step + 1, len(self.train_loader), meters)
+                _logger.info(
+                    "Epoch [%s/%s] Step [%s/%s]  %s",
+                    epoch + 1,
+                    self.num_epochs,
+                    step + 1,
+                    len(self.train_loader),
+                    meters,
+                )
 
         return self.export()
 
@@ -168,7 +180,9 @@ class DartsTrainer(BaseNAS):
         metric, loss = self._infer()
         loss.backward()
         if self.grad_clip > 0:
-            nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_clip)  # gradient clipping
+            nn.utils.clip_grad_norm_(
+                self.model.parameters(), self.grad_clip
+            )  # gradient clipping
         self.model_optim.step()
 
     def _infer(self):
diff --git a/autogl/module/hpo/nas.py b/autogl/module/hpo/nas.py
index ac3e8b9..9e02d13 100644
--- a/autogl/module/hpo/nas.py
+++ b/autogl/module/hpo/nas.py
@@ -2,6 +2,7 @@ from torch_geometric.nn import GCNConv, SAGEConv
 from nni.nas.pytorch import mutables
 import torch.nn as nn
 
+
 class BaseNAS:
     def search(self, space, dset, trainer):
         """
@@ -21,6 +22,7 @@ class BaseNAS:
             The trainer including the best trained model
         """
 
+
 class GraphSpace(nn.Module):
     def __init__(self, input_dim, hidden_dim, output_dim, ops, *arg, **kwargs):
         super().__init__()
@@ -36,7 +38,8 @@ class GraphSpace(nn.Module):
         x = self.op1(data.x, data.edge_index)
         x = self.op2(x, data.edge_index)
         return x
-        
+
+
 class BaseEstimator:
     def infer(self, model, dataset):
         dset = dataset[0]
@@ -44,4 +47,4 @@ class BaseEstimator:
         y = dset.y[dset.train_mask]
         loss_func = nn.CrossEntropyLoss()
         loss = loss_func(pred, y)
-        return loss, loss
\ No newline at end of file
+        return loss, loss
diff --git a/autogl/module/hpo/test.py b/autogl/module/hpo/test.py
index d1ddfd9..721347a 100644
--- a/autogl/module/hpo/test.py
+++ b/autogl/module/hpo/test.py
@@ -6,6 +6,7 @@ from .nas import BaseEstimator, GraphSpace
 from .darts import DartsTrainer
 from .base import BaseHPOptimizer, TimeTooLimitedError
 
+
 @register_hpo("test")
 class TestHPO(BaseHPOptimizer):
     """
@@ -18,19 +19,19 @@ class TestHPO(BaseHPOptimizer):
         super().__init__(*args, **kwargs)
 
     def optimize(self, trainer, dataset, time_limit=None, memory_limit=None):
-        num_features=dataset[0].x.shape[1]
-        num_classes=dataset.num_classes
+        num_features = dataset[0].x.shape[1]
+        num_classes = dataset.num_classes
 
-        op1 = lambda x,y: GCNConv(x,y)
-        op2 = lambda x,y: SAGEConv(x,y)
-        ops = [op1,op2]
+        op1 = lambda x, y: GCNConv(x, y)
+        op2 = lambda x, y: SAGEConv(x, y)
+        ops = [op1, op2]
         model = GraphSpace(num_features, 64, num_classes, ops)
         tr = BaseEstimator()
         nas = DartsTrainer()
         a = nas.search(model, dataset, tr)
         print(a)
         print(type(a))
-        return 1,2
+        return 1, 2
 
     @classmethod
     def build_hpo_from_args(cls, args):
diff --git a/autogl/module/hpo/utils.py b/autogl/module/hpo/utils.py
index 61a4f91..4b76d5b 100644
--- a/autogl/module/hpo/utils.py
+++ b/autogl/module/hpo/utils.py
@@ -85,7 +85,7 @@ class AverageMeter:
         Format string to print the values.
     """
 
-    def __init__(self, name, fmt=':f'):
+    def __init__(self, name, fmt=":f"):
         self.name = name
         self.fmt = fmt
         self.reset()
@@ -116,11 +116,11 @@ class AverageMeter:
         self.avg = self.sum / self.count
 
     def __str__(self):
-        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
+        fmtstr = "{name} {val" + self.fmt + "} ({avg" + self.fmt + "})"
         return fmtstr.format(**self.__dict__)
 
     def summary(self):
-        fmtstr = '{name}: {avg' + self.fmt + '}'
+        fmtstr = "{name}: {avg" + self.fmt + "}"
         return fmtstr.format(**self.__dict__)
 
 
@@ -158,7 +158,9 @@ def replace_layer_choice(root_module, init_fn, modules=None):
     List[Tuple[str, nn.Module]]
         A list from layer choice keys (names) and replaced modules.
     """
-    return _replace_module_with_type(root_module, init_fn, (LayerChoice, nn.LayerChoice), modules)
+    return _replace_module_with_type(
+        root_module, init_fn, (LayerChoice, nn.LayerChoice), modules
+    )
 
 
 def replace_input_choice(root_module, init_fn, modules=None):
@@ -179,4 +181,6 @@ def replace_input_choice(root_module, init_fn, modules=None):
     List[Tuple[str, nn.Module]]
         A list from layer choice keys (names) and replaced modules.
     """
-    return _replace_module_with_type(root_module, init_fn, (InputChoice, nn.InputChoice), modules)
+    return _replace_module_with_type(
+        root_module, init_fn, (InputChoice, nn.InputChoice), modules
+    )
diff --git a/autogl/solver/base.py b/autogl/solver/base.py
index fbe1584..765ee1d 100644
--- a/autogl/solver/base.py
+++ b/autogl/solver/base.py
@@ -5,6 +5,7 @@ Provide some standard solver interface.
 """
 
 from typing import Any, Tuple
+from copy import deepcopy
 
 import torch
 
@@ -68,6 +69,9 @@ class BaseSolver:
         self,
         feature_module,
         graph_models,
+        nas_spaces,
+        nas_algorithms,
+        nas_estimators,
         hpo_module,
         ensemble_module,
         max_evals=50,
@@ -96,6 +100,7 @@ class BaseSolver:
         self.set_feature_module(feature_module)
         self.set_hpo_module(hpo_module, max_evals=max_evals)
         self.set_ensemble_module(ensemble_module, size=size)
+        self.set_nas_module(nas_algorithms, nas_spaces, nas_estimators)
 
         # initialize leaderboard
         self.leaderboard = None
@@ -224,6 +229,75 @@ class BaseSolver:
                 "instead.",
             )
 
+    def set_nas_module(
+        self, nas_algorithms, nas_spaces=None, nas_estimators=None
+    ) -> "BaseSolver":
+        """
+        Set the neural architecture search module in current solver.
+
+        Parameters
+        ----------
+        nas_spaces: (list of) `autogl.module.hpo.nas.GraphSpace`
+            The search space of nas. You can pass a list of space to enable
+            multiple space search. If list passed, the length of `nas_spaces`,
+            `nas_algorithms` and `nas_estimators` should be the same. If set
+            to `None`, will disable the whole nas module.
+
+        nas_algorithms: (list of) `autogl.module.hpo.nas.BaseNAS`
+            The search algorithm of nas. You can pass a list of algorithms
+            to enable multiple algorithms search. If list passed, the length of
+            `nas_spaces`, `nas_algorithms` and `nas_estimators` should be the same.
+            Default `None`.
+
+        nas_estimators: (list of) `autogl.module.hpo.nas.BaseEstimators`
+            The nas estimators. You can pass a list of estimators to enable multiple
+            estimators search. If list passed, the length of `nas_spaces`, `nas_algorithms`
+            and `nas_estimators` should be the same. Default `None`.
+
+        Returns
+        -------
+        self: autogl.solver.BaseSolver
+            A reference of current solver.
+        """
+        self.nas_algorithms = nas_algorithms
+        if self.nas_algorithms is not None:
+            max_number = -1
+            if isinstance(self.nas_algorithms, list):
+                max_number = len(self.nas_algorithms)
+            if isinstance(nas_spaces, list):
+                if max_number == -1:
+                    max_number = len(nas_spaces)
+                else:
+                    assert (
+                        len(nas_spaces) == max_number
+                    ), "lengths of algorithms/spaces/estimators do not match!"
+            if isinstance(nas_estimators, list):
+                if max_number == -1:
+                    max_number = len(nas_estimators)
+                else:
+                    assert (
+                        len(nas_estimators) == max_number
+                    ), "lengths of algorithms/spaces/estimators do not match!"
+            if max_number < 0:
+                self.nas_algorithms = [self.nas_algorithms]
+                self.nas_spaces = [nas_spaces]
+                self.nas_estimators = [nas_estimators]
+            else:
+                if not isinstance(self.nas_algorithms, list):
+                    self.nas_algorithms = [
+                        deepcopy(self.nas_algorithms) for _ in range(max_number)
+                    ]
+                if not isinstance(nas_spaces, list):
+                    self.nas_spaces = [deepcopy(nas_spaces) for _ in range(max_number)]
+                else:
+                    self.nas_spaces = nas_spaces
+                if not isinstance(nas_estimators, list):
+                    self.nas_estimators = [
+                        deepcopy(nas_estimators) for _ in range(max_number)
+                    ]
+                else:
+                    self.nas_estimators = nas_estimators
+
     def set_ensemble_module(self, ensemble_module, *args, **kwargs) -> "BaseSolver":
         """
         Set the ensemble module used in current solver.
diff --git a/autogl/solver/classifier/node_classifier.py b/autogl/solver/classifier/node_classifier.py
index 054581e..f7f8c12 100644
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -14,6 +14,7 @@ from .base import BaseClassifier
 from ...module.feature import FEATURE_DICT
 from ...module.model import MODEL_DICT
 from ...module.train import TRAINER_DICT, get_feval
+from ...module.hpo.nas import BaseNAS, BaseEstimator, GraphSpace
 from ...module import BaseModel
 from ..utils import Leaderboard, set_seed
 from ...datasets import utils
@@ -75,6 +76,9 @@ class AutoNodeClassifier(BaseClassifier):
         self,
         feature_module="deepgl",
         graph_models=["gat", "gcn"],
+        nas_spaces=None,
+        nas_estimators=None,
+        nas_algorithms=None,
         hpo_module="anneal",
         ensemble_module="voting",
         max_evals=50,
@@ -97,7 +101,7 @@ class AutoNodeClassifier(BaseClassifier):
         )
 
         # data to be kept when fit
-        self.data = None
+        self.dataset = None
 
     def _init_graph_module(
         self,
@@ -303,6 +307,15 @@ class AutoNodeClassifier(BaseClassifier):
             loss="cross_entropy" if not hasattr(dataset, "loss") else dataset.loss,
         )
 
+        # perform neural architecture search
+        if self.nas_algorithms is not None:
+            # perform nas and add them to trainer list
+            for algo, space, estimator in zip(
+                self.nas_algorithms, self.nas_spaces, self.nas_estimators
+            ):
+                trainer = algo.search(space, self.dataset, estimator)
+                self.graph_model_list.append(trainer)
+
         # train the models and tune hpo
         result_valid = []
         names = []

From 4b5a868585d6329588092d6dbc21677789ed5bf4 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Wed, 7 Apr 2021 08:56:53 +0000
Subject: [PATCH 036/144] black style

---
 autogl/datasets/utils.py                      |  10 +-
 autogl/module/model/_model_registry.py        |   2 +-
 autogl/module/model/graph_sage.py             |  60 +++--
 autogl/module/train/base.py                   | 142 +++++------
 autogl/module/train/evaluation.py             |  23 +-
 .../module/train/graph_classification_full.py |  18 +-
 .../module/train/node_classification_full.py  |   7 -
 .../node_classification_sampled_trainer.py    | 230 +++++++++---------
 .../sampling/sampler/neighbor_sampler.py      |  84 ++++---
 autogl/solver/classifier/node_classifier.py   |   3 +
 10 files changed, 319 insertions(+), 260 deletions(-)

diff --git a/autogl/datasets/utils.py b/autogl/datasets/utils.py
index 4afe00a..07a8459 100644
--- a/autogl/datasets/utils.py
+++ b/autogl/datasets/utils.py
@@ -315,7 +315,9 @@ def graph_random_splits(dataset, train_ratio=0.2, val_ratio=0.4, seed=None):
     return dataset
 
 
-def graph_get_split(dataset, mask="train", is_loader=True, batch_size=128, num_workers = 0):
+def graph_get_split(
+    dataset, mask="train", is_loader=True, batch_size=128, num_workers=0
+):
     r"""Get train/test dataset/dataloader after cross validation.
 
     Parameters
@@ -337,7 +339,11 @@ def graph_get_split(dataset, mask="train", is_loader=True, batch_size=128, num_w
         dataset, "%s_split" % (mask)
     ), "Given dataset do not have %s split" % (mask)
     if is_loader:
-        return DataLoader(getattr(dataset, "%s_split" % (mask)), batch_size=batch_size, num_workers = num_workers)
+        return DataLoader(
+            getattr(dataset, "%s_split" % (mask)),
+            batch_size=batch_size,
+            num_workers=num_workers,
+        )
     else:
         return getattr(dataset, "%s_split" % (mask))
 
diff --git a/autogl/module/model/_model_registry.py b/autogl/module/model/_model_registry.py
index d8270eb..14aa2d9 100644
--- a/autogl/module/model/_model_registry.py
+++ b/autogl/module/model/_model_registry.py
@@ -14,7 +14,7 @@ def register_model(name):
             )
         MODEL_DICT[name] = cls
         return cls
-    
+
     return register_model_cls
 
 
diff --git a/autogl/module/model/graph_sage.py b/autogl/module/model/graph_sage.py
index 90ee515..b06ff19 100644
--- a/autogl/module/model/graph_sage.py
+++ b/autogl/module/model/graph_sage.py
@@ -9,19 +9,23 @@ from .base import BaseModel, activate_func
 
 class GraphSAGE(torch.nn.Module):
     def __init__(
-            self, num_features: int, num_classes: int,
-            hidden_features: _typing.Sequence[int],
-            dropout: float, activation_name: str,
-            aggr: str = "mean", **kwargs
+        self,
+        num_features: int,
+        num_classes: int,
+        hidden_features: _typing.Sequence[int],
+        dropout: float,
+        activation_name: str,
+        aggr: str = "mean",
+        **kwargs
     ):
         super(GraphSAGE, self).__init__()
         if type(aggr) != str:
             raise TypeError
         if aggr not in ("add", "max", "mean"):
             aggr = "mean"
-        
+
         self.__convolution_layers: torch.nn.ModuleList = torch.nn.ModuleList()
-        
+
         num_layers: int = len(hidden_features) + 1
         if num_layers == 1:
             self.__convolution_layers.append(
@@ -42,7 +46,7 @@ class GraphSAGE(torch.nn.Module):
                     )
         self.__dropout: float = dropout
         self.__activation_name: str = activation_name
-    
+
     def __full_forward(self, data):
         x: torch.Tensor = getattr(data, "x")
         edge_index: torch.Tensor = getattr(data, "edge_index")
@@ -52,24 +56,26 @@ class GraphSAGE(torch.nn.Module):
                 x = activate_func(x, self.__activation_name)
                 x = F.dropout(x, p=self.__dropout, training=self.training)
         return F.log_softmax(x, dim=1)
-    
+
     def __distributed_forward(self, data):
         x: torch.Tensor = getattr(data, "x")
         edge_indexes: _typing.Sequence[torch.Tensor] = getattr(data, "edge_indexes")
         if len(edge_indexes) != len(self.__convolution_layers):
             raise AttributeError
         for layer_index in range(len(self.__convolution_layers)):
-            x: torch.Tensor = self.__convolution_layers[layer_index](x, edge_indexes[layer_index])
+            x: torch.Tensor = self.__convolution_layers[layer_index](
+                x, edge_indexes[layer_index]
+            )
             if layer_index + 1 < len(self.__convolution_layers):
                 x = activate_func(x, self.__activation_name)
                 x = F.dropout(x, p=self.__dropout, training=self.training)
         return F.log_softmax(x, dim=1)
-    
+
     def forward(self, data):
         if (
-                hasattr(data, "edge_indexes") and
-                isinstance(getattr(data, "edge_indexes"), _typing.Sequence) and
-                len(getattr(data, "edge_indexes")) == len(self.__convolution_layers)
+            hasattr(data, "edge_indexes")
+            and isinstance(getattr(data, "edge_indexes"), _typing.Sequence)
+            and len(getattr(data, "edge_indexes")) == len(self.__convolution_layers)
         ):
             return self.__distributed_forward(data)
         else:
@@ -79,15 +85,20 @@ class GraphSAGE(torch.nn.Module):
 @register_model("sage")
 class AutoSAGE(BaseModel):
     def __init__(
-            self, num_features: int = 1, num_classes: int = 1,
-            device: _typing.Optional[torch.device] = torch.device("cpu"),
-            init: bool = False, **kwargs
+        self,
+        num_features: int = 1,
+        num_classes: int = 1,
+        device: _typing.Optional[torch.device] = torch.device("cpu"),
+        init: bool = False,
+        **kwargs
     ):
         super(AutoSAGE, self).__init__(init)
         self.__num_features: int = num_features
         self.__num_classes: int = num_classes
-        self.__device: torch.device = device if device is not None else torch.device("cpu")
-        
+        self.__device: torch.device = (
+            device if device is not None else torch.device("cpu")
+        )
+
         self.hyperparams = {
             "num_layers": 3,
             "hidden": [64, 32],
@@ -97,26 +108,27 @@ class AutoSAGE(BaseModel):
         }
         self.params = {
             "num_features": self.__num_features,
-            "num_classes": self.__num_classes
+            "num_classes": self.__num_classes,
         }
-        
+
         self._model: GraphSAGE = GraphSAGE(
             self.__num_features, self.__num_classes, [64, 32], 0.5, "relu"
         )
-        
+
         self._initialized: bool = False
         if init:
             self.initialize()
-    
+
     @property
     def model(self) -> GraphSAGE:
         return self._model
-    
+
     def initialize(self):
         """ Initialize model """
         if not self._initialized:
             self._model: GraphSAGE = GraphSAGE(
-                self.__num_features, self.__num_classes,
+                self.__num_features,
+                self.__num_classes,
                 hidden_features=self.hyperparams["hidden"],
                 activation_name=self.hyperparams["act"],
                 **self.hyperparams
diff --git a/autogl/module/train/base.py b/autogl/module/train/base.py
index e022631..16227e9 100644
--- a/autogl/module/train/base.py
+++ b/autogl/module/train/base.py
@@ -83,15 +83,14 @@ class EarlyStopping:
 
 class BaseTrainer:
     def __init__(
-            self,
-            model: BaseModel,
-            device: _typing.Union[torch.device, str],
-            init: bool = True,
-            feval: _typing.Union[
-                _typing.Sequence[str],
-                _typing.Sequence[_typing.Type[Evaluation]]
-            ] = (Acc,),
-            loss: str = "nll_loss",
+        self,
+        model: BaseModel,
+        device: _typing.Union[torch.device, str],
+        init: bool = True,
+        feval: _typing.Union[
+            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = (Acc,),
+        loss: str = "nll_loss",
     ):
         """
         The basic trainer.
@@ -108,47 +107,50 @@ class BaseTrainer:
         """
         super().__init__()
         self.model: BaseModel = model
-        if (
-                type(device) == torch.device or
-                (type(device) == str and device.lower() != "auto")
+        if type(device) == torch.device or (
+            type(device) == str and device.lower() != "auto"
         ):
             self.__device: torch.device = torch.device(device)
         else:
             self.__device: torch.device = torch.device(
-                "cuda" if torch.cuda.is_available() and torch.cuda.device_count() > 0 else "cpu"
+                "cuda"
+                if torch.cuda.is_available() and torch.cuda.device_count() > 0
+                else "cpu"
             )
         self.init: bool = init
         self.__feval: _typing.Sequence[_typing.Type[Evaluation]] = get_feval(feval)
         self.loss: str = loss
-    
+
     @property
     def device(self) -> torch.device:
         return self.__device
-    
+
     @device.setter
     def device(self, __device: _typing.Union[torch.device, str]):
-        if (
-                type(__device) == torch.device or
-                (type(__device) == str and __device.lower() != "auto")
+        if type(__device) == torch.device or (
+            type(__device) == str and __device.lower() != "auto"
         ):
             self.__device: torch.device = torch.device(__device)
         else:
             self.__device: torch.device = torch.device(
-                "cuda" if torch.cuda.is_available() and torch.cuda.device_count() > 0 else "cpu"
+                "cuda"
+                if torch.cuda.is_available() and torch.cuda.device_count() > 0
+                else "cpu"
             )
-    
+
     @property
     def feval(self) -> _typing.Sequence[_typing.Type[Evaluation]]:
         return self.__feval
-    
+
     @feval.setter
     def feval(
-            self, _feval: _typing.Union[
-                _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
-            ]
+        self,
+        _feval: _typing.Union[
+            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ],
     ):
         self.__feval: _typing.Sequence[_typing.Type[Evaluation]] = get_feval(_feval)
-    
+
     def to(self, device: torch.device):
         """
         Transfer the trainer to another device
@@ -168,7 +170,9 @@ class BaseTrainer:
         """Get auto model used in trainer."""
         raise NotImplementedError()
 
-    def get_feval(self, return_major: bool = False) -> _typing.Union[
+    def get_feval(
+        self, return_major: bool = False
+    ) -> _typing.Union[
         _typing.Type[Evaluation], _typing.Sequence[_typing.Type[Evaluation]]
     ]:
         """
@@ -212,7 +216,7 @@ class BaseTrainer:
         pass
 
     def duplicate_from_hyper_parameter(
-            self, hp, model: _typing.Union[BaseModel, str, None] = None
+        self, hp, model: _typing.Union[BaseModel, str, None] = None
     ) -> "BaseTrainer":
         """Create a new trainer with the given hyper parameter."""
         raise NotImplementedError()
@@ -322,57 +326,58 @@ class BaseTrainer:
 
 class _BaseClassificationTrainer(BaseTrainer):
     """ Base class of trainer for classification tasks """
-    
+
     def __init__(
-            self,
-            model: _typing.Union[BaseModel, str],
-            num_features: int,
-            num_classes: int,
-            device: _typing.Union[torch.device, str, None] = "auto",
-            init: bool = True,
-            feval: _typing.Union[
-                _typing.Sequence[str],
-                _typing.Sequence[_typing.Type[Evaluation]]
-            ] = (Acc,),
-            loss: str = "nll_loss",
+        self,
+        model: _typing.Union[BaseModel, str],
+        num_features: int,
+        num_classes: int,
+        device: _typing.Union[torch.device, str, None] = "auto",
+        init: bool = True,
+        feval: _typing.Union[
+            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = (Acc,),
+        loss: str = "nll_loss",
     ):
         self.num_features: int = num_features
         self.num_classes: int = num_classes
-        if (
-                type(device) == torch.device or
-                (type(device) == str and device.lower() != "auto")
+        if type(device) == torch.device or (
+            type(device) == str and device.lower() != "auto"
         ):
             __device: torch.device = torch.device(device)
         else:
             __device: torch.device = torch.device(
-                "cuda" if torch.cuda.is_available() and torch.cuda.device_count() > 0 else "cpu"
+                "cuda"
+                if torch.cuda.is_available() and torch.cuda.device_count() > 0
+                else "cpu"
             )
         if type(model) == str:
             _model: BaseModel = ModelUniversalRegistry.get_model(model)(
-                num_features, num_classes, device, init=init
+                num_features, num_classes, __device, init=init
             )
         elif isinstance(model, BaseModel):
             _model: BaseModel = model
         else:
             raise TypeError(
-                f"Model argument only support str or BaseModel, got ${model}."
+                f"Model argument only support str or BaseModel, got {model}."
             )
-        super(_BaseClassificationTrainer, self).__init__(_model, __device, init, feval, loss)
+        super(_BaseClassificationTrainer, self).__init__(
+            _model, __device, init, feval, loss
+        )
 
 
 class BaseNodeClassificationTrainer(_BaseClassificationTrainer):
     def __init__(
-            self,
-            model: _typing.Union[BaseModel, str],
-            num_features: int,
-            num_classes: int,
-            device: _typing.Union[torch.device, str, None] = None,
-            init: bool = True,
-            feval: _typing.Union[
-                _typing.Sequence[str],
-                _typing.Sequence[_typing.Type[Evaluation]]
-            ] = (Acc,),
-            loss: str = "nll_loss",
+        self,
+        model: _typing.Union[BaseModel, str],
+        num_features: int,
+        num_classes: int,
+        device: _typing.Union[torch.device, str, None] = None,
+        init: bool = True,
+        feval: _typing.Union[
+            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = (Acc,),
+        loss: str = "nll_loss",
     ):
         super(BaseNodeClassificationTrainer, self).__init__(
             model, num_features, num_classes, device, init, feval, loss
@@ -381,18 +386,17 @@ class BaseNodeClassificationTrainer(_BaseClassificationTrainer):
 
 class BaseGraphClassificationTrainer(_BaseClassificationTrainer):
     def __init__(
-            self,
-            model: _typing.Union[BaseModel, str],
-            num_features: int,
-            num_classes: int,
-            num_graph_features: int = 0,
-            device: _typing.Union[torch.device, str, None] = None,
-            init: bool = True,
-            feval: _typing.Union[
-                _typing.Sequence[str],
-                _typing.Sequence[_typing.Type[Evaluation]]
-            ] = (Acc,),
-            loss: str = "nll_loss",
+        self,
+        model: _typing.Union[BaseModel, str],
+        num_features: int,
+        num_classes: int,
+        num_graph_features: int = 0,
+        device: _typing.Union[torch.device, str, None] = None,
+        init: bool = True,
+        feval: _typing.Union[
+            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = (Acc,),
+        loss: str = "nll_loss",
     ):
         self.num_graph_features: int = num_graph_features
         super(BaseGraphClassificationTrainer, self).__init__(
diff --git a/autogl/module/train/evaluation.py b/autogl/module/train/evaluation.py
index 63fd81a..c3ed320 100644
--- a/autogl/module/train/evaluation.py
+++ b/autogl/module/train/evaluation.py
@@ -13,12 +13,12 @@ class Evaluation:
     def get_eval_name() -> str:
         """ Expected to return the name of this evaluation method """
         raise NotImplementedError
-    
+
     @staticmethod
     def is_higher_better() -> bool:
         """ Expected to return whether this evaluation method is higher better (bool) """
         return True
-    
+
     @staticmethod
     def evaluate(predict, label) -> float:
         """ Expected to return the evaluation result (float) """
@@ -39,6 +39,7 @@ def register_evaluate(*name):
                 )
             EVALUATE_DICT[n] = cls
         return cls
+
     return register_evaluate_cls
 
 
@@ -54,22 +55,26 @@ def get_feval(feval):
 
 class EvaluationUniversalRegistry:
     @classmethod
-    def register_evaluation(cls, *names) -> _typing.Callable[
-        [_typing.Type[Evaluation]], _typing.Type[Evaluation]
-    ]:
+    def register_evaluation(
+        cls, *names
+    ) -> _typing.Callable[[_typing.Type[Evaluation]], _typing.Type[Evaluation]]:
         def _register_evaluation(
-                _class: _typing.Type[Evaluation]
+            _class: _typing.Type[Evaluation],
         ) -> _typing.Type[Evaluation]:
             for n in names:
                 if n in EVALUATE_DICT:
-                    raise ValueError("Cannot register duplicate evaluator ({})".format(n))
+                    raise ValueError(
+                        "Cannot register duplicate evaluator ({})".format(n)
+                    )
                 if not issubclass(_class, Evaluation):
                     raise ValueError(
-                        "Evaluator ({}: {}) must extend Evaluation".format(n, cls.__name__)
+                        "Evaluator ({}: {}) must extend Evaluation".format(
+                            n, cls.__name__
+                        )
                     )
                 EVALUATE_DICT[n] = _class
             return _class
-        
+
         return _register_evaluation
 
 
diff --git a/autogl/module/train/graph_classification_full.py b/autogl/module/train/graph_classification_full.py
index eed5feb..6a0317e 100644
--- a/autogl/module/train/graph_classification_full.py
+++ b/autogl/module/train/graph_classification_full.py
@@ -100,7 +100,7 @@ class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
         self.batch_size = batch_size if batch_size is not None else 64
         self.num_workers = num_workers if num_workers is not None else 4
         if self.num_workers > 0:
-            mp.set_start_method('fork', force=True)
+            mp.set_start_method("fork", force=True)
         self.early_stopping_round = (
             early_stopping_round if early_stopping_round is not None else 100
         )
@@ -305,10 +305,10 @@ class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
 
         """
         train_loader = utils.graph_get_split(
-            dataset, "train", batch_size=self.batch_size, num_workers = self.num_workers
+            dataset, "train", batch_size=self.batch_size, num_workers=self.num_workers
         )  # DataLoader(dataset['train'], batch_size=self.batch_size)
         valid_loader = utils.graph_get_split(
-            dataset, "val", batch_size=self.batch_size, num_workers = self.num_workers
+            dataset, "val", batch_size=self.batch_size, num_workers=self.num_workers
         )  # DataLoader(dataset['val'], batch_size=self.batch_size)
         self.train_only(train_loader, valid_loader)
         if keep_valid_result and valid_loader:
@@ -332,7 +332,9 @@ class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
         -------
         The prediction result of ``predict_proba``.
         """
-        loader = utils.graph_get_split(dataset, mask, batch_size=self.batch_size, num_workers = self.num_workers)
+        loader = utils.graph_get_split(
+            dataset, mask, batch_size=self.batch_size, num_workers=self.num_workers
+        )
         return self._predict_proba(loader, in_log_format=True).max(1)[1]
 
     def predict_proba(self, dataset, mask="test", in_log_format=False):
@@ -353,7 +355,9 @@ class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
         -------
         The prediction result.
         """
-        loader = utils.graph_get_split(dataset, mask, batch_size=self.batch_size, num_workers = self.num_workers)
+        loader = utils.graph_get_split(
+            dataset, mask, batch_size=self.batch_size, num_workers=self.num_workers
+        )
         return self._predict_proba(loader, in_log_format)
 
     def _predict_proba(self, loader, in_log_format=False):
@@ -436,7 +440,9 @@ class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
         res: The evaluation result on the given dataset.
 
         """
-        loader = utils.graph_get_split(dataset, mask, batch_size=self.batch_size, num_workers = self.num_workers)
+        loader = utils.graph_get_split(
+            dataset, mask, batch_size=self.batch_size, num_workers=self.num_workers
+        )
         return self._evaluate(loader, feval)
 
     def _evaluate(self, loader, feval=None):
diff --git a/autogl/module/train/node_classification_full.py b/autogl/module/train/node_classification_full.py
index 73a7e06..480cb1e 100644
--- a/autogl/module/train/node_classification_full.py
+++ b/autogl/module/train/node_classification_full.py
@@ -82,13 +82,6 @@ class NodeClassificationFullTrainer(BaseNodeClassificationTrainer):
             loss=loss,
         )
 
-        # init model
-        if isinstance(model, str):
-            assert model in MODEL_DICT, "Cannot parse model name " + model
-            self.model = MODEL_DICT[model](num_features, num_classes, device, init=init)
-        elif isinstance(model, BaseModel):
-            self.model = model
-
         self.opt_received = optimizer
         if type(optimizer) == str and optimizer.lower() == "adam":
             self.optimizer = torch.optim.Adam
diff --git a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
index 0c3e671..78df99c 100644
--- a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
+++ b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
@@ -19,60 +19,71 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
     for automatically training the node classification tasks
     with neighbour sampling
     """
-    
+
     def __init__(
-            self,
-            model: _typing.Union[BaseModel, str],
-            num_features: int,
-            num_classes: int,
-            optimizer: _typing.Union[
-                _typing.Type[torch.optim.Optimizer], str, None
-            ] = None,
-            lr: float = 1e-4,
-            max_epoch: int = 100,
-            early_stopping_round: int = 100,
-            weight_decay: float = 1e-4,
-            device: _typing.Optional[torch.device] = None,
-            init: bool = True,
-            feval: _typing.Union[
-                _typing.Sequence[str],
-                _typing.Sequence[_typing.Type[Evaluation]]
-            ] = (Logloss,),
-            loss: str = "nll_loss",
-            lr_scheduler_type: _typing.Optional[str] = None,
-            **kwargs
+        self,
+        model: _typing.Union[BaseModel, str],
+        num_features: int,
+        num_classes: int,
+        optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None] = None,
+        lr: float = 1e-4,
+        max_epoch: int = 100,
+        early_stopping_round: int = 100,
+        weight_decay: float = 1e-4,
+        device: _typing.Optional[torch.device] = None,
+        init: bool = True,
+        feval: _typing.Union[
+            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = (Logloss,),
+        loss: str = "nll_loss",
+        lr_scheduler_type: _typing.Optional[str] = None,
+        **kwargs
     ) -> None:
         if isinstance(optimizer, type) and issubclass(optimizer, torch.optim.Optimizer):
             self._optimizer_class: _typing.Type[torch.optim.Optimizer] = optimizer
         elif type(optimizer) == str:
             if optimizer.lower() == "adam":
-                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.Adam
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.Adam
             elif optimizer.lower() == "adam" + "w":
-                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.AdamW
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.AdamW
             elif optimizer.lower() == "sgd":
-                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.SGD
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.SGD
             else:
-                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.Adam
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.Adam
         else:
-            self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.Adam
-        
+            self._optimizer_class: _typing.Type[
+                torch.optim.Optimizer
+            ] = torch.optim.Adam
+
         self._learning_rate: float = lr if lr > 0 else 1e-4
         self._lr_scheduler_type: _typing.Optional[str] = lr_scheduler_type
         self._max_epoch: int = max_epoch if max_epoch > 0 else 1e2
-        
+
         self.__sampling_sizes: _typing.Sequence[int] = kwargs.get("sampling_sizes")
-        
+
         self._weight_decay: float = weight_decay if weight_decay > 0 else 1e-4
-        early_stopping_round: int = early_stopping_round if early_stopping_round > 0 else 1e2
-        self._early_stopping = EarlyStopping(patience=early_stopping_round, verbose=False)
+        early_stopping_round: int = (
+            early_stopping_round if early_stopping_round > 0 else 1e2
+        )
+        self._early_stopping = EarlyStopping(
+            patience=early_stopping_round, verbose=False
+        )
         super(NodeClassificationNeighborSamplingTrainer, self).__init__(
             model, num_features, num_classes, device, init, feval, loss
         )
-        
+
         self._valid_result: torch.Tensor = torch.zeros(0)
         self._valid_result_prob: torch.Tensor = torch.zeros(0)
         self._valid_score = None
-        
+
         self._hyper_parameter_space: _typing.List[_typing.Dict[str, _typing.Any]] = [
             {
                 "parameterName": "max_epoch",
@@ -101,33 +112,31 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
                 "maxValue": 1e-2,
                 "minValue": 1e-4,
                 "scalingType": "LOG",
-            }
+            },
         ]
-        
+
         self._hyper_parameter: _typing.Dict[str, _typing.Any] = {
             "max_epoch": self._max_epoch,
             "early_stopping_round": self._early_stopping.patience,
             "lr": self._learning_rate,
-            "weight_decay": self._weight_decay
+            "weight_decay": self._weight_decay,
         }
-        
+
         self.__initialized: bool = False
         if init:
             self.initialize()
-    
+
     def initialize(self) -> "NodeClassificationNeighborSamplingTrainer":
         if self.__initialized:
             return self
         self._model.initialize()
         self.__initialized = True
         return self
-    
+
     def get_model(self) -> BaseModel:
         return self._model
-    
-    def __train_only(
-            self, data
-    ) -> "NodeClassificationNeighborSamplingTrainer":
+
+    def __train_only(self, data) -> "NodeClassificationNeighborSamplingTrainer":
         """
         The function of training on the given dataset and mask.
         :param data: data of a specific graph
@@ -136,38 +145,41 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         data = data.to(self.device)
         optimizer: torch.optim.Optimizer = self._optimizer_class(
             self._model.parameters(),
-            lr=self._learning_rate, weight_decay=self._weight_decay
+            lr=self._learning_rate,
+            weight_decay=self._weight_decay,
         )
         if type(self._lr_scheduler_type) == str:
             if self._lr_scheduler_type.lower() == "step" + "lr":
-                lr_scheduler: torch.optim.lr_scheduler.StepLR = \
-                    torch.optim.lr_scheduler.StepLR(
-                        optimizer, step_size=100, gamma=0.1
-                    )
+                lr_scheduler: torch.optim.lr_scheduler.StepLR = (
+                    torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1)
+                )
             elif self._lr_scheduler_type.lower() == "multi" + "step" + "lr":
-                lr_scheduler: torch.optim.lr_scheduler.MultiStepLR = \
+                lr_scheduler: torch.optim.lr_scheduler.MultiStepLR = (
                     torch.optim.lr_scheduler.MultiStepLR(
                         optimizer, milestones=[30, 80], gamma=0.1
                     )
+                )
             elif self._lr_scheduler_type.lower() == "exponential" + "lr":
-                lr_scheduler: torch.optim.lr_scheduler.ExponentialLR = \
-                    torch.optim.lr_scheduler.ExponentialLR(
-                        optimizer, gamma=0.1
-                    )
+                lr_scheduler: torch.optim.lr_scheduler.ExponentialLR = (
+                    torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1)
+                )
             elif self._lr_scheduler_type.lower() == "ReduceLROnPlateau".lower():
-                lr_scheduler: torch.optim.lr_scheduler.ReduceLROnPlateau = \
+                lr_scheduler: torch.optim.lr_scheduler.ReduceLROnPlateau = (
                     torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min")
+                )
             else:
-                lr_scheduler: torch.optim.lr_scheduler.LambdaLR = \
+                lr_scheduler: torch.optim.lr_scheduler.LambdaLR = (
                     torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
+                )
         else:
-            lr_scheduler: torch.optim.lr_scheduler.LambdaLR = \
+            lr_scheduler: torch.optim.lr_scheduler.LambdaLR = (
                 torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
-        
+            )
+
         train_sampler: NeighborSampler = NeighborSampler(
             data, self.__sampling_sizes, batch_size=20
         )
-        
+
         for current_epoch in range(self._max_epoch):
             self._model.model.train()
             """ epoch start """
@@ -181,20 +193,20 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
                     )
                 loss_function = getattr(F, self.loss)
                 loss: torch.Tensor = loss_function(
-                    prediction[target_node_indexes],
-                    data.y[target_node_indexes]
+                    prediction[target_node_indexes], data.y[target_node_indexes]
                 )
                 loss.backward()
                 optimizer.step()
-            
+
             if lr_scheduler is not None:
                 lr_scheduler.step()
-            
+
             """ Validate performance """
             if hasattr(data, "val_mask") and getattr(data, "val_mask") is not None:
-                validation_results: _typing.Sequence[float] = \
-                    self.evaluate((data,), "val", [self.feval[0]])
-                
+                validation_results: _typing.Sequence[float] = self.evaluate(
+                    (data,), "val", [self.feval[0]]
+                )
+
                 if self.feval[0].is_higher_better():
                     validation_loss: float = -validation_results[0]
                 else:
@@ -206,7 +218,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         if hasattr(data, "val_mask") and data.val_mask is not None:
             self._early_stopping.load_checkpoint(self._model.model)
         return self
-    
+
     def __predict_only(self, data):
         """
         The function of predicting on the given data.
@@ -218,7 +230,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         with torch.no_grad():
             prediction = self._model.model(data)
         return prediction
-    
+
     def train(self, dataset, keep_valid_result: bool = True):
         """
         The function of training on the given dataset and keeping valid result.
@@ -232,10 +244,9 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             self._valid_result: torch.Tensor = prediction[data.val_mask].max(1)[1]
             self._valid_result_prob: torch.Tensor = prediction[data.val_mask]
             self._valid_score = self.evaluate(dataset, "val")
-    
+
     def predict_proba(
-            self, dataset, mask: _typing.Optional[str] = None,
-            in_log_format: bool = False
+        self, dataset, mask: _typing.Optional[str] = None, in_log_format: bool = False
     ) -> torch.Tensor:
         """
         The function of predicting the probability on the given dataset.
@@ -258,29 +269,22 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             _mask = data.test_mask
         result = self.__predict_only(data)[_mask]
         return result if in_log_format else torch.exp(result)
-    
+
     def predict(self, dataset, mask: _typing.Optional[str] = None) -> torch.Tensor:
-        return self.predict_proba(
-            dataset, mask, in_log_format=True
-        ).max(1)[1]
-    
+        return self.predict_proba(dataset, mask, in_log_format=True).max(1)[1]
+
     def get_valid_predict(self) -> torch.Tensor:
         return self._valid_result
-    
+
     def get_valid_predict_proba(self) -> torch.Tensor:
         return self._valid_result_prob
-    
+
     def get_valid_score(self, return_major: bool = True):
         if return_major:
-            return (
-                self._valid_score[0],
-                self.feval[0].is_higher_better()
-            )
+            return (self._valid_score[0], self.feval[0].is_higher_better())
         else:
-            return (
-                self._valid_score, [f.is_higher_better() for f in self.feval]
-            )
-        
+            return (self._valid_score, [f.is_higher_better() for f in self.feval])
+
     def get_name_with_hp(self) -> str:
         # """Get the name of hyperparameter."""
         name = "-".join(
@@ -304,15 +308,14 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             )
         )
         return name
-    
+
     def evaluate(
-            self,
-            dataset,
-            mask: _typing.Optional[str] = None,
-            feval: _typing.Union[
-                None, _typing.Sequence[str],
-                _typing.Sequence[_typing.Type[Evaluation]]
-            ] = None
+        self,
+        dataset,
+        mask: _typing.Optional[str] = None,
+        feval: _typing.Union[
+            None, _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = None,
     ) -> _typing.Sequence[float]:
         data = dataset[0]
         data = data.to(self.device)
@@ -330,53 +333,60 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             _mask = data.test_mask
         prediction_probability: torch.Tensor = self.predict_proba(dataset, mask)
         y_ground_truth = data.y[_mask]
-        
+
         results = []
         for f in _feval:
             try:
-                results.append(
-                    f.evaluate(prediction_probability, y_ground_truth)
-                )
+                results.append(f.evaluate(prediction_probability, y_ground_truth))
             except:
                 results.append(
-                    f.evaluate(prediction_probability.cpu().numpy(), y_ground_truth.cpu().numpy())
+                    f.evaluate(
+                        prediction_probability.cpu().numpy(),
+                        y_ground_truth.cpu().numpy(),
+                    )
                 )
         return results
-    
+
     def to(self, device: torch.device):
         self.device = device
         if self._model is not None:
             self._model.to(self.device)
-    
+
     def duplicate_from_hyper_parameter(
-            self, hp: _typing.Dict[str, _typing.Any],
-            model: _typing.Union[BaseModel, str, None] = None
+        self,
+        hp: _typing.Dict[str, _typing.Any],
+        model: _typing.Union[BaseModel, str, None] = None,
     ) -> "NodeClassificationNeighborSamplingTrainer":
-        
+
         if model is None or not isinstance(model, BaseModel):
             model = self._model
         model = model.from_hyper_parameter(
             dict(
                 [
-                    x for x in hp.items()
+                    x
+                    for x in hp.items()
                     if x[0] in [y["parameterName"] for y in model.hyper_parameter_space]
                 ]
             )
         )
-        
+
         return NodeClassificationNeighborSamplingTrainer(
-            model, self.num_features, self.num_classes,
+            model,
+            self.num_features,
+            self.num_classes,
             self._optimizer_class,
-            device=self.device, init=True,
-            feval=self.feval, loss=self.loss,
+            device=self.device,
+            init=True,
+            feval=self.feval,
+            loss=self.loss,
             lr_scheduler_type=self._lr_scheduler_type,
             **hp
         )
-    
+
     @property
     def hyper_parameter_space(self):
         return self._hyper_parameter_space
-    
+
     @hyper_parameter_space.setter
     def hyper_parameter_space(self, hp_space):
         self._hyper_parameter_space = hp_space
diff --git a/autogl/module/train/sampling/sampler/neighbor_sampler.py b/autogl/module/train/sampling/sampler/neighbor_sampler.py
index 0e62a74..53a6b7c 100644
--- a/autogl/module/train/sampling/sampler/neighbor_sampler.py
+++ b/autogl/module/train/sampling/sampler/neighbor_sampler.py
@@ -9,37 +9,41 @@ class NeighborSampler(torch.utils.data.DataLoader, collections.Iterable):
     class _NodeIndexesDataset(torch.utils.data.Dataset):
         def __init__(self, node_indexes):
             self.__node_indexes: _typing.Sequence[int] = node_indexes
-        
+
         def __getitem__(self, index) -> int:
             if not 0 <= index < len(self.__node_indexes):
                 raise IndexError("Index out of range")
             else:
                 return self.__node_indexes[index]
-        
+
         def __len__(self) -> int:
             return len(self.__node_indexes)
-    
+
     def __init__(
-            self, data,
-            sampling_sizes: _typing.Sequence[int],
-            target_node_indexes: _typing.Optional[_typing.Sequence[int]] = None,
-            batch_size: _typing.Optional[int] = 1,
-            *args, **kwargs
+        self,
+        data,
+        sampling_sizes: _typing.Sequence[int],
+        target_node_indexes: _typing.Optional[_typing.Sequence[int]] = None,
+        batch_size: _typing.Optional[int] = 1,
+        *args,
+        **kwargs
     ):
         self._data = data
         self.__sampling_sizes: _typing.Sequence[int] = sampling_sizes
-        
+
         if not (
-                target_node_indexes is not None and
-                isinstance(target_node_indexes, _typing.Sequence)
+            target_node_indexes is not None
+            and isinstance(target_node_indexes, _typing.Sequence)
         ):
             if hasattr(data, "train_mask"):
-                target_node_indexes: _typing.Sequence[int] = \
-                    torch.where(getattr(data, "train_mask"))[0]
+                target_node_indexes: _typing.Sequence[int] = torch.where(
+                    getattr(data, "train_mask")
+                )[0]
             else:
-                target_node_indexes: _typing.Sequence[int] = \
-                    list(np.arange(0, data.x.shape[0]))
-        
+                target_node_indexes: _typing.Sequence[int] = list(
+                    np.arange(0, data.x.shape[0])
+                )
+
         self.__edge_index_map: _typing.Dict[
             int, _typing.Union[torch.Tensor, _typing.Sequence[int]]
         ] = {}
@@ -47,9 +51,11 @@ class NeighborSampler(torch.utils.data.DataLoader, collections.Iterable):
         super(NeighborSampler, self).__init__(
             self._NodeIndexesDataset(target_node_indexes),
             batch_size=batch_size if batch_size > 0 else 1,
-            collate_fn=self.__sample, *args, **kwargs
+            collate_fn=self.__sample,
+            *args,
+            **kwargs
         )
-    
+
     def __init_edge_index_map(self):
         self.__edge_index_map.clear()
         all_edge_index: torch.Tensor = getattr(self._data, "edge_index")
@@ -58,12 +64,12 @@ class NeighborSampler(torch.utils.data.DataLoader, collections.Iterable):
             self.__edge_index_map[target_node_index] = torch.where(
                 all_edge_index[1] == target_node_index
             )[0]
-    
+
     def __iter__(self):
         return super(NeighborSampler, self).__iter__()
-    
+
     def __sample(
-            self, target_nodes_indexes: _typing.List[int]
+        self, target_nodes_indexes: _typing.List[int]
     ) -> _typing.Tuple[torch.Tensor, _typing.List[torch.Tensor]]:
         """
         Sample a sub-graph with neighborhood sampling
@@ -71,14 +77,15 @@ class NeighborSampler(torch.utils.data.DataLoader, collections.Iterable):
         """
         original_edge_index: torch.Tensor = self._data.edge_index
         edges_indexes: _typing.List[torch.Tensor] = []
-        
+
         current_target_nodes_indexes: _typing.List[int] = target_nodes_indexes
         for current_sampling_size in self.__sampling_sizes:
             current_edge_index: _typing.Optional[torch.Tensor] = None
             for current_target_node_index in current_target_nodes_indexes:
                 if current_target_node_index in self.__edge_index_map:
-                    all_indexes: torch.Tensor = \
-                        self.__edge_index_map.get(current_target_node_index)
+                    all_indexes: torch.Tensor = self.__edge_index_map.get(
+                        current_target_node_index
+                    )
                 else:
                     all_indexes: torch.Tensor = torch.where(
                         original_edge_index[1] == current_target_node_index
@@ -89,25 +96,38 @@ class NeighborSampler(torch.utils.data.DataLoader, collections.Iterable):
                     )
                     if current_edge_index is not None:
                         current_edge_index: torch.Tensor = torch.cat(
-                            [current_edge_index, original_edge_index[:, sampled_indexes]], dim=1
+                            [
+                                current_edge_index,
+                                original_edge_index[:, sampled_indexes],
+                            ],
+                            dim=1,
                         )
                     else:
-                        current_edge_index: torch.Tensor = original_edge_index[:, sampled_indexes]
+                        current_edge_index: torch.Tensor = original_edge_index[
+                            :, sampled_indexes
+                        ]
                 else:
                     all_indexes_list = all_indexes.tolist()
                     random.shuffle(all_indexes_list)
-                    shuffled_indexes_list: _typing.List[int] = \
-                        all_indexes_list[0: current_sampling_size]
+                    shuffled_indexes_list: _typing.List[int] = all_indexes_list[
+                        0:current_sampling_size
+                    ]
                     if current_edge_index is not None:
                         current_edge_index: torch.Tensor = torch.cat(
-                            [current_edge_index, original_edge_index[:, shuffled_indexes_list]], dim=1
+                            [
+                                current_edge_index,
+                                original_edge_index[:, shuffled_indexes_list],
+                            ],
+                            dim=1,
                         )
                     else:
-                        current_edge_index: torch.Tensor = original_edge_index[:, shuffled_indexes_list]
+                        current_edge_index: torch.Tensor = original_edge_index[
+                            :, shuffled_indexes_list
+                        ]
             edges_indexes.append(current_edge_index)
-            
+
             if len(edges_indexes) < len(self.__sampling_sizes):
                 next_target_nodes_indexes: torch.Tensor = current_edge_index[0].unique()
                 current_target_nodes_indexes = next_target_nodes_indexes.tolist()
-        
+
         return torch.tensor(target_nodes_indexes), edges_indexes[::-1]
diff --git a/autogl/solver/classifier/node_classifier.py b/autogl/solver/classifier/node_classifier.py
index 9d6bc83..1086e88 100644
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -92,6 +92,9 @@ class AutoNodeClassifier(BaseClassifier):
         super().__init__(
             feature_module=feature_module,
             graph_models=graph_models,
+            nas_spaces=nas_spaces,
+            nas_estimators=nas_estimators,
+            nas_algorithms=nas_algorithms,
             hpo_module=hpo_module,
             ensemble_module=ensemble_module,
             max_evals=max_evals,

From 04e6a61382eb053991f5de63214429971d746321 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Wed, 7 Apr 2021 13:50:18 +0000
Subject: [PATCH 037/144] update for nas interface in solver

---
 autogl/solver/classifier/graph_classifier.py | 35 ++++++++++++++++++++
 autogl/solver/classifier/node_classifier.py  | 21 ++++++++++--
 2 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/autogl/solver/classifier/graph_classifier.py b/autogl/solver/classifier/graph_classifier.py
index 7427e13..5503019 100644
--- a/autogl/solver/classifier/graph_classifier.py
+++ b/autogl/solver/classifier/graph_classifier.py
@@ -74,6 +74,9 @@ class AutoGraphClassifier(BaseClassifier):
         self,
         feature_module=None,
         graph_models=["gin", "topkpool"],
+        nas_algorithms=None,
+        nas_spaces=None,
+        nas_estimators=None,
         hpo_module="anneal",
         ensemble_module="voting",
         max_evals=50,
@@ -87,6 +90,9 @@ class AutoGraphClassifier(BaseClassifier):
         super().__init__(
             feature_module=feature_module,
             graph_models=graph_models,
+            nas_algorithms=nas_algorithms,
+            nas_spaces=nas_spaces,
+            nas_estimators=nas_estimators,
             hpo_module=hpo_module,
             ensemble_module=ensemble_module,
             max_evals=max_evals,
@@ -205,6 +211,15 @@ class AutoGraphClassifier(BaseClassifier):
 
         return self
 
+    def _init_nas_module(
+        self, num_features, num_classes, num_graph_features, feval, device, loss
+    ):
+        for algo, space, estimator in zip(
+            self.nas_algorithms, self.nas_spaces, self.nas_estimators
+        ):
+            # TODO: initialize important parameters
+            pass
+
     # pylint: disable=arguments-differ
     def fit(
         self,
@@ -344,6 +359,26 @@ class AutoGraphClassifier(BaseClassifier):
             else dataset.data.gf.size(1),
         )
 
+        self._init_nas_module(
+            num_features=dataset.num_node_features,
+            num_classes=dataset.num_classes,
+            feval=evaluator_list,
+            device=self.runtime_device,
+            loss="cross_entropy" if not hasattr(dataset, "loss") else dataset.loss,
+            num_graph_features=0
+            if not hasattr(dataset.data, "gf")
+            else dataset.data.gf.size(1),
+        )
+
+        # neural architecture search
+        if self.nas_algorithms is not None:
+            # perform nas and add them to trainer list
+            for algo, space, estimator in zip(
+                self.nas_algorithms, self.nas_spaces, self.nas_estimators
+            ):
+                trainer = algo.search(space, self.dataset, estimator)
+                self.graph_model_list.append(trainer)
+
         # train the models and tune hpo
         result_valid = []
         names = []
diff --git a/autogl/solver/classifier/node_classifier.py b/autogl/solver/classifier/node_classifier.py
index 1086e88..cdb7ac1 100644
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -76,9 +76,9 @@ class AutoNodeClassifier(BaseClassifier):
         self,
         feature_module=None,
         graph_models=["gat", "gcn"],
+        nas_algorithms=None,
         nas_spaces=None,
         nas_estimators=None,
-        nas_algorithms=None,
         hpo_module="anneal",
         ensemble_module="voting",
         max_evals=50,
@@ -92,9 +92,9 @@ class AutoNodeClassifier(BaseClassifier):
         super().__init__(
             feature_module=feature_module,
             graph_models=graph_models,
+            nas_algorithms=nas_algorithms,
             nas_spaces=nas_spaces,
             nas_estimators=nas_estimators,
-            nas_algorithms=nas_algorithms,
             hpo_module=hpo_module,
             ensemble_module=ensemble_module,
             max_evals=max_evals,
@@ -201,6 +201,13 @@ class AutoNodeClassifier(BaseClassifier):
 
         return self
 
+    def _init_nas_module(self, num_features, num_classes, feval, device, loss):
+        for algo, space, estimator in zip(
+            self.nas_algorithms, self.nas_spaces, self.nas_estimators
+        ):
+            # TODO: initialize important parameters
+            pass
+
     # pylint: disable=arguments-differ
     def fit(
         self,
@@ -332,6 +339,14 @@ class AutoNodeClassifier(BaseClassifier):
         )
 
         # perform neural architecture search
+        self._init_nas_module(
+            num_features=self.dataset[0].x.shape[1],
+            num_classes=dataset.num_classes,
+            feval=evaluator_list,
+            device=self.runtime_device,
+            loss="cross_entropy" if not hasattr(dataset, "loss") else dataset.loss,
+        )
+
         if self.nas_algorithms is not None:
             # perform nas and add them to trainer list
             for algo, space, estimator in zip(
@@ -554,7 +569,7 @@ class AutoNodeClassifier(BaseClassifier):
         if use_ensemble and self.ensemble_module is None:
             LOGGER.warning(
                 "Cannot use ensemble because no ensebmle module is given."
-                "Will use best model instead."
+                " Will use best model instead."
             )
 
         if use_best or (use_ensemble and self.ensemble_module is None):

From 295b6f587faf355ec880a91fa8b666fa4a3a7094 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Wed, 7 Apr 2021 13:50:34 +0000
Subject: [PATCH 038/144] add test examples

---
 examples/test_nas.py      | 36 ++++++++++++++++++++++++++++++++++++
 examples/test_nas_grah.py | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+)
 create mode 100644 examples/test_nas.py
 create mode 100644 examples/test_nas_grah.py

diff --git a/examples/test_nas.py b/examples/test_nas.py
new file mode 100644
index 0000000..270b0b6
--- /dev/null
+++ b/examples/test_nas.py
@@ -0,0 +1,36 @@
+import sys
+sys.path.append('../')
+from autogl.solver import AutoNodeClassifier
+from autogl.module.hpo.nas import BaseNAS, BaseEstimator, GraphSpace
+from autogl.datasets import build_dataset_from_name
+from autogl.module.model import AutoGAT
+from autogl.module.train import NodeClassificationFullTrainer
+from autogl.module.hpo.darts import DartsTrainer
+from torch_geometric.nn import GATConv, GCNConv
+
+class TestNASAlgorithm(BaseNAS):
+    model = None
+    def search(self, space, dset, trainer):
+        num_classes = dset.num_classes
+        num_features = dset.num_features
+        return NodeClassificationFullTrainer(
+            "gat",
+            num_features=num_features,
+            num_classes=num_classes,
+            device="auto"
+        )
+
+if __name__ == '__main__':
+    dataset = build_dataset_from_name('cora')
+    solver = AutoNodeClassifier(
+        feature_module=None,
+        graph_models=[],
+        hpo_module="random",
+        max_evals=10,
+        ensemble_module=None,
+        nas_algorithms=[TestNASAlgorithm()],
+        nas_spaces=[GraphSpace(dataset.num_features, 64, dataset.num_classes, [GATConv, GCNConv])],
+        nas_estimators=[BaseEstimator()]
+    )
+    solver.fit(dataset)
+    out = solver.predict(dataset)
\ No newline at end of file
diff --git a/examples/test_nas_grah.py b/examples/test_nas_grah.py
new file mode 100644
index 0000000..7a30a65
--- /dev/null
+++ b/examples/test_nas_grah.py
@@ -0,0 +1,34 @@
+import sys
+sys.path.append('../')
+from autogl.solver import AutoGraphClassifier
+from autogl.module.hpo.nas import BaseNAS, BaseEstimator, GraphSpace
+from autogl.datasets import build_dataset_from_name
+from autogl.module.train import GraphClassificationFullTrainer
+from torch_geometric.nn import GATConv, GCNConv
+
+class TestNASAlgorithm(BaseNAS):
+    model = None
+    def search(self, space, dset, trainer):
+        num_classes = dset.num_classes
+        num_features = dset.num_features
+        return GraphClassificationFullTrainer(
+            "gin",
+            num_features=num_features,
+            num_classes=num_classes,
+            device="auto"
+        )
+
+if __name__ == '__main__':
+    dataset = build_dataset_from_name('mutag')
+    solver = AutoGraphClassifier(
+        feature_module=None,
+        graph_models=[],
+        hpo_module="random",
+        max_evals=10,
+        ensemble_module=None,
+        nas_algorithms=[TestNASAlgorithm()],
+        nas_spaces=[GraphSpace(dataset.num_features, 64, dataset.num_classes, [GATConv, GCNConv])],
+        nas_estimators=[BaseEstimator()]
+    )
+    solver.fit(dataset, train_split=0.8, val_split=0.1)
+    out = solver.predict()
\ No newline at end of file

From ad4ef59294d2b9891da061b77b5af177c8574ea1 Mon Sep 17 00:00:00 2001
From: cluster32 <general502570@outlook.com>
Date: Thu, 8 Apr 2021 01:03:17 +0800
Subject: [PATCH 039/144] add progress bar

---
 autogl/module/hpo/advisorbase.py | 4 +++-
 autogl/module/hpo/autone.py      | 7 +++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/autogl/module/hpo/advisorbase.py b/autogl/module/hpo/advisorbase.py
index 6d9395a..2a63579 100644
--- a/autogl/module/hpo/advisorbase.py
+++ b/autogl/module/hpo/advisorbase.py
@@ -5,6 +5,7 @@ HPO Module for tuning hyper parameters
 import time
 import json
 import math
+from tqdm import trange
 from .suggestion.models import Study
 from .base import BaseHPOptimizer, TimeTooLimitedError
 from .suggestion.algorithm.random_search import RandomSearchAlgorithm
@@ -150,7 +151,8 @@ class AdvisorBaseHPOptimizer(BaseHPOptimizer):
         best_id = None
         best_trainer = None
 
-        for i in range(self.max_evals):
+        print("HPO Search Phase:\n")
+        for i in trange(self.max_evals):
             if time.time() - start_time > time_limit:
                 self.logger.info("Time out of limit, Epoch: {}".format(str(i)))
                 break
diff --git a/autogl/module/hpo/autone.py b/autogl/module/hpo/autone.py
index 30da0b9..f499eed 100644
--- a/autogl/module/hpo/autone.py
+++ b/autogl/module/hpo/autone.py
@@ -6,6 +6,7 @@ import time
 import json
 import math
 import numpy as np
+from tqdm import trange
 from . import register_hpo
 from .suggestion.models import Study
 from .base import BaseHPOptimizer, TimeTooLimitedError
@@ -115,7 +116,8 @@ class AutoNE(BaseHPOptimizer):
         K = utils.K(len(params.type_))
         gp = utils.GaussianProcessRegressor(K)
         sample_graphs = sample_subgraph(dataset)
-        for t in range(sampled_number):
+        print("Sample Phase:\n")
+        for t in trange(sampled_number):
             b_t = time.time()
             i = t
             subgraph = sample_graphs[t]
@@ -133,7 +135,8 @@ class AutoNE(BaseHPOptimizer):
         best_trainer = None
         best_para = None
         wne = get_wne(dataset)
-        for t in range(s):
+        print("HPO Search Phase:\n")
+        for t in trange(s):
             if time.time() - start_time > time_limit:
                 self.logger.info("Time out of limit, Epoch: {}".format(str(i)))
                 break

From 8998e0586f2805d5f88a787032ac958d6dfc7d17 Mon Sep 17 00:00:00 2001
From: cluster32 <general502570@outlook.com>
Date: Sat, 10 Apr 2021 21:34:43 +0800
Subject: [PATCH 040/144] problem 123

---
 autogl/module/hpo/darts.py                  | 10 +++++++---
 autogl/module/hpo/nas.py                    | 21 ++++++++++++++++-----
 autogl/module/hpo/test.py                   |  7 ++++---
 autogl/solver/classifier/node_classifier.py |  4 ++++
 examples/test_nas.py                        | 11 ++++++-----
 5 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/autogl/module/hpo/darts.py b/autogl/module/hpo/darts.py
index 8aee141..797c877 100644
--- a/autogl/module/hpo/darts.py
+++ b/autogl/module/hpo/darts.py
@@ -9,7 +9,7 @@ import torch.nn.functional as F
 
 from .nas import BaseNAS
 from .utils import AverageMeterGroup, replace_layer_choice, replace_input_choice
-
+from nni.nas.pytorch.fixed import apply_fixed_architecture
 
 _logger = logging.getLogger(__name__)
 
@@ -68,7 +68,7 @@ class DartsInputChoice(nn.Module):
         return torch.argsort(-self.alpha).cpu().numpy().tolist()[: self.n_chosen]
 
 
-class DartsTrainer(BaseNAS):
+class Darts(BaseNAS):
     """
     DARTS trainer.
 
@@ -162,7 +162,11 @@ class DartsTrainer(BaseNAS):
                     meters,
                 )
 
-        return self.export()
+        selection = self.export()
+        space.reinstantiate()
+        apply_fixed_architecture(space, selection)
+        return space
+        #return self.export()
 
     def _train_one_epoch(self, epoch):
         self.model.train()
diff --git a/autogl/module/hpo/nas.py b/autogl/module/hpo/nas.py
index 9e02d13..c36932f 100644
--- a/autogl/module/hpo/nas.py
+++ b/autogl/module/hpo/nas.py
@@ -22,25 +22,36 @@ class BaseNAS:
             The trainer including the best trained model
         """
 
-
 class GraphSpace(nn.Module):
-    def __init__(self, input_dim, hidden_dim, output_dim, ops, *arg, **kwargs):
+    def __init__(self, *arg, **kwargs):
         super().__init__()
+
+    def instantiate(self, input_dim, hidden_dim, output_dim, ops, *arg, **kwargs):
         """self.op1 = mutables.LayerChoice([GCNConv(input_dim, hidden_dim),SAGEConv(input_dim, hidden_dim)])
         self.op2 = mutables.LayerChoice([
             GCNConv(hidden_dim, output_dim),
             SAGEConv(hidden_dim, output_dim)       
         ], key = "2")"""
-        self.op1 = mutables.LayerChoice([op(input_dim, hidden_dim) for op in ops])
-        self.op2 = mutables.LayerChoice([op(hidden_dim, output_dim) for op in ops])
+        self.input_dim = input_dim
+        self.hidden_dim = hidden_dim
+        self.output_dim = output_dim
+        self.ops = ops
+        self.op1 = mutables.LayerChoice([op(input_dim, hidden_dim) for op in ops], key = "1")
+        self.op2 = mutables.LayerChoice([op(hidden_dim, output_dim) for op in ops], key = "2")
+
+    def reinstantiate(self):
+        self.instantiate(self.input_dim, self.hidden_dim, self.output_dim, self.ops)
 
     def forward(self, data):
         x = self.op1(data.x, data.edge_index)
         x = self.op2(x, data.edge_index)
         return x
 
-
 class BaseEstimator:
+    def infer(self, model, dataset):
+        pass
+
+class DartsNodeClfEstimator(BaseEstimator):
     def infer(self, model, dataset):
         dset = dataset[0]
         pred = model(dset)[dset.train_mask]
diff --git a/autogl/module/hpo/test.py b/autogl/module/hpo/test.py
index 721347a..778e5f7 100644
--- a/autogl/module/hpo/test.py
+++ b/autogl/module/hpo/test.py
@@ -3,7 +3,7 @@ import hyperopt
 from torch_geometric.nn import GCNConv, SAGEConv
 from . import register_hpo
 from .nas import BaseEstimator, GraphSpace
-from .darts import DartsTrainer
+from .darts import Darts
 from .base import BaseHPOptimizer, TimeTooLimitedError
 
 
@@ -25,9 +25,10 @@ class TestHPO(BaseHPOptimizer):
         op1 = lambda x, y: GCNConv(x, y)
         op2 = lambda x, y: SAGEConv(x, y)
         ops = [op1, op2]
-        model = GraphSpace(num_features, 64, num_classes, ops)
+        model = GraphSpace()
+        model.instantiate(num_features, 64, num_classes, ops)
         tr = BaseEstimator()
-        nas = DartsTrainer()
+        nas = Darts()
         a = nas.search(model, dataset, tr)
         print(a)
         print(type(a))
diff --git a/autogl/solver/classifier/node_classifier.py b/autogl/solver/classifier/node_classifier.py
index cdb7ac1..d776a58 100644
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -20,6 +20,8 @@ from ..utils import Leaderboard, set_seed
 from ...datasets import utils
 from ...utils import get_logger
 
+from torch_geometric.nn import GATConv, GCNConv
+
 LOGGER = get_logger("NodeClassifier")
 
 
@@ -154,6 +156,7 @@ class AutoNodeClassifier(BaseClassifier):
                         loss=loss,
                         feval=feval,
                         device=device,
+                    
                     )
                     self.graph_model_list.append(model)
                 else:
@@ -206,6 +209,7 @@ class AutoNodeClassifier(BaseClassifier):
             self.nas_algorithms, self.nas_spaces, self.nas_estimators
         ):
             # TODO: initialize important parameters
+            space.instantiate(num_features, 64, num_classes, [GATConv, GCNConv])
             pass
 
     # pylint: disable=arguments-differ
diff --git a/examples/test_nas.py b/examples/test_nas.py
index 270b0b6..d17bb9a 100644
--- a/examples/test_nas.py
+++ b/examples/test_nas.py
@@ -1,11 +1,11 @@
 import sys
 sys.path.append('../')
 from autogl.solver import AutoNodeClassifier
-from autogl.module.hpo.nas import BaseNAS, BaseEstimator, GraphSpace
+from autogl.module.hpo.nas import BaseNAS, BaseEstimator, GraphSpace, DartsNodeClfEstimator
 from autogl.datasets import build_dataset_from_name
 from autogl.module.model import AutoGAT
 from autogl.module.train import NodeClassificationFullTrainer
-from autogl.module.hpo.darts import DartsTrainer
+from autogl.module.hpo.darts import Darts
 from torch_geometric.nn import GATConv, GCNConv
 
 class TestNASAlgorithm(BaseNAS):
@@ -28,9 +28,10 @@ if __name__ == '__main__':
         hpo_module="random",
         max_evals=10,
         ensemble_module=None,
-        nas_algorithms=[TestNASAlgorithm()],
-        nas_spaces=[GraphSpace(dataset.num_features, 64, dataset.num_classes, [GATConv, GCNConv])],
-        nas_estimators=[BaseEstimator()]
+        nas_algorithms=[Darts()],
+        nas_spaces=[GraphSpace()],
+        #nas_spaces=[GraphSpace(dataset.num_features, 64, dataset.num_classes, [GATConv, GCNConv])],
+        nas_estimators=[DartsNodeClfEstimator()]
     )
     solver.fit(dataset)
     out = solver.predict(dataset)
\ No newline at end of file

From 21c9ef828ada13478312460971fe7184b91948a5 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Mon, 12 Apr 2021 04:32:49 +0000
Subject: [PATCH 041/144] update interface

---
 autogl/solver/classifier/node_classifier.py |  44 +++++--
 examples/test_nas.py                        | 131 +++++++++++++++++---
 2 files changed, 145 insertions(+), 30 deletions(-)

diff --git a/autogl/solver/classifier/node_classifier.py b/autogl/solver/classifier/node_classifier.py
index d776a58..81c047e 100644
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -209,8 +209,8 @@ class AutoNodeClassifier(BaseClassifier):
             self.nas_algorithms, self.nas_spaces, self.nas_estimators
         ):
             # TODO: initialize important parameters
-            space.instantiate(num_features, 64, num_classes, [GATConv, GCNConv])
-            pass
+            algo.to(device)
+            space.instantiate(input_dim=num_features, output_dim=num_classes)
 
     # pylint: disable=arguments-differ
     def fit(
@@ -342,21 +342,39 @@ class AutoNodeClassifier(BaseClassifier):
             loss="cross_entropy" if not hasattr(dataset, "loss") else dataset.loss,
         )
 
-        # perform neural architecture search
-        self._init_nas_module(
-            num_features=self.dataset[0].x.shape[1],
-            num_classes=dataset.num_classes,
-            feval=evaluator_list,
-            device=self.runtime_device,
-            loss="cross_entropy" if not hasattr(dataset, "loss") else dataset.loss,
-        )
-
         if self.nas_algorithms is not None:
-            # perform nas and add them to trainer list
+            # perform neural architecture search
+            self._init_nas_module(
+                num_features=self.dataset[0].x.shape[1],
+                num_classes=self.dataset.num_classes,
+                feval=evaluator_list,
+                device=self.runtime_device,
+                loss="cross_entropy" if not hasattr(dataset, "loss") else dataset.loss,
+            )
+
+            assert isinstance(self._default_trainer, str) or len(self.nas_algorithms) == len(self._default_trainer) - len(self.graph_model_list), "length of default trainer should match total graph models and nas models passed"
+
+            # perform nas and add them to model list
+            idx_trainer = len(self.graph_model_list)
             for algo, space, estimator in zip(
                 self.nas_algorithms, self.nas_spaces, self.nas_estimators
             ):
-                trainer = algo.search(space, self.dataset, estimator)
+                model = algo.search(space, self.dataset, estimator)
+                # insert model into default trainer
+                if isinstance(self._default_trainer, str):
+                    train_name = self._default_trainer
+                else:
+                    train_name = self._default_trainer[idx_trainer]
+                    idx_trainer += 1
+                trainer = TRAINER_DICT[train_name](
+                    model=model,
+                    num_features=self.dataset[0].x.shape[1],
+                    num_classes=self.dataset.num_classes,
+                    loss="cross_entropy" if not hasattr(dataset, "loss") else dataset.loss,
+                    feval=evaluator_list,
+                    device=self.runtime_device,
+                    init=False,
+                )
                 self.graph_model_list.append(trainer)
 
         # train the models and tune hpo
diff --git a/examples/test_nas.py b/examples/test_nas.py
index d17bb9a..fe039ca 100644
--- a/examples/test_nas.py
+++ b/examples/test_nas.py
@@ -1,24 +1,122 @@
+from copy import deepcopy
 import sys
+from nni.nas.pytorch.fixed import apply_fixed_architecture
+from torch_geometric.nn.conv.gat_conv import GATConv
+from torch_geometric.nn.conv.gcn_conv import GCNConv
 sys.path.append('../')
+import torch
 from autogl.solver import AutoNodeClassifier
-from autogl.module.hpo.nas import BaseNAS, BaseEstimator, GraphSpace, DartsNodeClfEstimator
+from autogl.module.hpo.nas import GraphSpace, DartsNodeClfEstimator
 from autogl.datasets import build_dataset_from_name
-from autogl.module.model import AutoGAT
-from autogl.module.train import NodeClassificationFullTrainer
+from autogl.module.model import BaseModel
 from autogl.module.hpo.darts import Darts
-from torch_geometric.nn import GATConv, GCNConv
+from autogl.utils import get_logger
 
-class TestNASAlgorithm(BaseNAS):
-    model = None
-    def search(self, space, dset, trainer):
-        num_classes = dset.num_classes
-        num_features = dset.num_features
-        return NodeClassificationFullTrainer(
-            "gat",
-            num_features=num_features,
-            num_classes=num_classes,
-            device="auto"
+class MyGraphSpace(GraphSpace):
+    def __init__(self, input_dim=None, hidden_dim=None, output_dim=None, ops=None, init=False):
+        super().__init__()
+        self.input_dim = input_dim
+        self.hidden_dim = hidden_dim
+        self.output_dim = output_dim
+        self.ops = ops
+        self._initialized = False
+        if init:
+            self.instantiate()
+        
+    def instantiate(self, input_dim=None, hidden_dim=None, output_dim=None, ops=None):
+        self.input_dim = input_dim or self.input_dim
+        self.hidden_dim = hidden_dim or self.hidden_dim
+        self.output_dim = output_dim or self.output_dim
+        self.ops = ops or self.ops
+        super().instantiate(
+            input_dim=self.input_dim,
+            hidden_dim=self.hidden_dim,
+            output_dim=self.output_dim,
+            ops=self.ops
         )
+        self._initialized = True
+
+class SpaceModel(BaseModel):
+    _logger = get_logger('space model')
+    def __init__(self, space_model: MyGraphSpace, selection, device=torch.device('cuda')):
+        super().__init__(init=True)
+        space_model.reinstantiate()
+        self.init = True
+        self.space = []
+        self.hyperparams = {}
+        self._model = space_model.to(device)
+        self.num_features = self._model.input_dim
+        self.num_classes = self._model.output_dim
+        self.selection = selection
+        apply_fixed_architecture(self._model, selection, verbose=False)
+        self.params = {
+            "num_class": self.num_classes,
+            "features_num": self.num_features
+        }
+        self.device = device
+
+    def to(self, device):
+        if isinstance(device, (str, torch.device)):
+            self.device = device
+        return super().to(device)
+
+    def forward(self, *args, **kwargs):
+        return self._model.forward(*args, **kwargs)
+
+    def from_hyper_parameter(self, hp):
+        """
+        receive no hp, just copy self and reset the learnable parameters.
+        """
+        ret_self = deepcopy(self)
+        ret_self._model.reinstantiate()
+        apply_fixed_architecture(ret_self._model, ret_self.selection, verbose=False)
+        ret_self.to(self.device)
+        return ret_self
+
+    @property
+    def model(self):
+        return self._model
+
+    def set_num_classes(self, num_classes):
+        """
+        TODO: if we can support resetting this (suggested), please implement
+        """
+        self._logger.error("Cannot reset num classes for nas model!")
+        raise ValueError("Cannot reset num classes for nas model!")
+
+    def set_num_features(self, num_features):
+        """
+        TODO: if we can support resetting this (suggested), please implement
+        """
+        self._logger.error("Cannot reset num features for nas model!")
+        raise ValueError("Cannot reset num features for nas model!")
+
+    def set_num_graph_features(self, num_graph_features):
+        """
+        TODO: if we can support resetting this (suggested), please implement
+        """
+        self._logger.error("Cannot reset num graph features for nas model!")
+        raise ValueError("Cannot reset num graph features for nas model!")
+
+class MyDarts(Darts):
+    def __init__(self, device="cuda", *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.device = device
+
+    def to(self, device):
+        """
+        change the device of the whole process
+        """
+        self.device = device
+
+    def search(self, space, dset, trainer):
+        """
+        TODO: please manage device when training
+        current device of search seems to be forced on CPU.
+        """
+        res = super().search(space, dset, trainer)
+        selection = self.export()
+        return SpaceModel(res, selection, self.device)
 
 if __name__ == '__main__':
     dataset = build_dataset_from_name('cora')
@@ -28,9 +126,8 @@ if __name__ == '__main__':
         hpo_module="random",
         max_evals=10,
         ensemble_module=None,
-        nas_algorithms=[Darts()],
-        nas_spaces=[GraphSpace()],
-        #nas_spaces=[GraphSpace(dataset.num_features, 64, dataset.num_classes, [GATConv, GCNConv])],
+        nas_algorithms=[MyDarts()],
+        nas_spaces=[MyGraphSpace(hidden_dim=64, ops=[GATConv, GCNConv])],
         nas_estimators=[DartsNodeClfEstimator()]
     )
     solver.fit(dataset)

From 3d8e9d1847039ec2c5889b6a12f9fb49d44684af Mon Sep 17 00:00:00 2001
From: null <null>
Date: Tue, 13 Apr 2021 20:12:30 +0800
Subject: [PATCH 042/144] Implement partial GraphSAINT Methodolgy and refactor
 BaseModel

Reproduce partial implementation of GraphSAINT, a representative Subgraph-wise sampling method.
Add experimental BaseModel for future major version.
TODO: Migrate the base class for all the concrete models to ClassificationModel (ClassificationApproach)
---
 autogl/module/model/base.py                   | 288 ++++++++++-
 autogl/module/model/gcn.py                    | 215 ++++----
 autogl/module/model/graph_sage.py             |   2 +-
 autogl/module/train/base.py                   |   2 +-
 .../module/train/graph_classification_full.py |   2 +-
 .../module/train/node_classification_full.py  |   2 +-
 .../node_classification_sampled_trainer.py    | 474 +++++++++++++++---
 .../sampling/sampler/graphsaint_sampler.py    | 105 ++++
 8 files changed, 900 insertions(+), 190 deletions(-)
 create mode 100644 autogl/module/train/sampling/sampler/graphsaint_sampler.py

diff --git a/autogl/module/model/base.py b/autogl/module/model/base.py
index 6e6d8c7..33cc5a9 100644
--- a/autogl/module/model/base.py
+++ b/autogl/module/model/base.py
@@ -3,10 +3,13 @@ auto graph model
 a list of models with their hyper parameters
 NOTE: neural architecture search (NAS) maybe included here
 """
-
+import copy
+import logging
+import typing as _typing
 import torch
 import torch.nn.functional as F
 from copy import deepcopy
+base_approach_logger: logging.Logger = logging.getLogger("BaseModel")
 
 
 def activate_func(x, func):
@@ -22,7 +25,7 @@ def activate_func(x, func):
     return x
 
 
-class BaseModel(torch.nn.Module):
+class BaseModel:
     def __init__(self, init=False, *args, **kwargs):
         super(BaseModel, self).__init__()
 
@@ -46,7 +49,9 @@ class BaseModel(torch.nn.Module):
     def to(self, device):
         if isinstance(device, (str, torch.device)):
             self.device = device
-        return super().to(device)
+        if hasattr(self, "model") and self.model is not None and isinstance(self.model, torch.nn.Module):
+            self.model.to(self.device)
+        return self
 
     def from_hyper_parameter(self, hp):
         ret_self = self.__class__(
@@ -80,3 +85,280 @@ class BaseModel(torch.nn.Module):
         ), "Cannot set graph features for tasks other than graph classification"
         self.num_graph_features = num_graph_features
         self.params["num_graph_features"] = num_graph_features
+
+
+class _BaseBaseModel:
+    # todo: after renaming the experimental base class _BaseModel to BaseModel,
+    #       rename this class to _BaseModel
+    """
+    The base class for class BaseModel,
+    designed to implement some basic functionality of BaseModel.
+    --  Designed by ZiXin Sun
+    """
+    @classmethod
+    def __formulate_device(
+            cls, device: _typing.Union[str, torch.device] = ...
+    ) -> torch.device:
+        if (
+                type(device) == torch.device or
+                (type(device) == str and device.strip().lower() != "auto")
+        ):
+            return torch.device(device)
+        elif torch.cuda.is_available() and torch.cuda.device_count() > 0:
+            return torch.device("cuda")
+        else:
+            return torch.device("cpu")
+    
+    @property
+    def device(self) -> torch.device:
+        return self.__device
+    
+    @device.setter
+    def device(self, __device: _typing.Union[str, torch.device, None]):
+        self.__device: torch.device = self.__formulate_device(__device)
+    
+    @property
+    def model(self) -> _typing.Optional[torch.nn.Module]:
+        if self._model is None:
+            base_approach_logger.debug(
+                "property of model NOT initialized before accessing"
+            )
+        return self._model
+    
+    @model.setter
+    def model(self, _model: torch.nn.Module) -> None:
+        if not isinstance(_model, torch.nn.Module):
+            raise TypeError(
+                "the property of model MUST be an instance of "
+                "torch.nn.Module"
+            )
+        self._model = _model
+    
+    def _initialize(self):
+        raise NotImplementedError
+    
+    def initialize(self) -> bool:
+        """
+        Initialize the model in case that the model has NOT been initialized
+        :return: whether self._initialize() method called
+        """
+        if not self.__is_initialized:
+            self._initialize()
+            self.__is_initialized = True
+            return True
+        return False
+    
+    # def to(self, *args, **kwargs):
+    #     """
+    #     Due to the signature of to() method in class BaseApproach
+    #     is inconsistent with the signature of the method
+    #     in the base class torch.nn.Module,
+    #     this intermediate overridden method is necessary to
+    #     walk around (bypass) the inspection for
+    #     signature of overriding method.
+    #     :param args: positional arguments list
+    #     :param kwargs: keyword arguments dict
+    #     :return: self
+    #     """
+    #     return super(_BaseBaseModel, self).to(*args, **kwargs)
+    
+    def forward(self, *args, **kwargs):
+        if self.model is not None and isinstance(self.model, torch.nn.Module):
+            return self.model(*args, **kwargs)
+        else:
+            raise NotImplementedError
+    
+    def __init__(
+            self, model: _typing.Optional[torch.nn.Module] = None,
+            initialize: bool = False,
+            device: _typing.Union[str, torch.device] = ...
+    ):
+        if type(initialize) != bool:
+            raise TypeError
+        super(_BaseBaseModel, self).__init__()
+        self.__device: torch.device = self.__formulate_device(device)
+        self._model: _typing.Optional[torch.nn.Module] = model
+        self.__is_initialized: bool = False
+        if initialize:
+            self.initialize()
+
+
+class _BaseModel(_BaseBaseModel, BaseModel):
+    """
+    The upcoming root base class for Model, i.e. BaseModel
+    --  Designed by ZiXin Sun
+    """
+    # todo: Deprecate and remove the legacy class "BaseModel",
+    #       then rename this class to "BaseModel",
+    #       correspondingly, this class will no longer extend
+    #       the legacy class "BaseModel" after the removal.
+    def _initialize(self):
+        raise NotImplementedError
+    
+    def to(self, device: torch.device):
+        self.device = device
+        if self.model is not None and isinstance(self.model, torch.nn.Module):
+            self.model.to(self.device)
+        return super().to(device)
+    
+    @property
+    def space(self) -> _typing.Sequence[_typing.Dict[str, _typing.Any]]:
+        # todo: deprecate and remove in future major version
+        return self.__hyper_parameter_space
+    
+    @property
+    def hyper_parameter_space(self):
+        return self.__hyper_parameter_space
+    
+    @hyper_parameter_space.setter
+    def hyper_parameter_space(
+            self, space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
+    ):
+        self.__hyper_parameter_space = space
+    
+    @property
+    def hyper_parameter(self) -> _typing.Dict[str, _typing.Any]:
+        return self.__hyper_parameter
+    
+    @hyper_parameter.setter
+    def hyper_parameter(self, _hyper_parameter: _typing.Dict[str, _typing.Any]):
+        if not isinstance(_hyper_parameter, dict):
+            raise TypeError
+        self.__hyper_parameter = _hyper_parameter
+    
+    def get_hyper_parameter(self) -> _typing.Dict[str, _typing.Any]:
+        """
+        todo: consider deprecating this trivial getter method in the future
+        :return: copied hyper parameter
+        """
+        return copy.deepcopy(self.__hyper_parameter)
+    
+    def __init__(
+            self, model: _typing.Optional[torch.nn.Module] = None,
+            initialize: bool = False,
+            hyper_parameter_space: _typing.Sequence[_typing.Any] = ...,
+            hyper_parameter: _typing.Dict[str, _typing.Any] = ...,
+            device: _typing.Union[str, torch.device] = ...
+    ):
+        if type(initialize) != bool:
+            raise TypeError
+        super(_BaseModel, self).__init__(model, initialize, device)
+        if (
+                hyper_parameter_space != Ellipsis and
+                isinstance(hyper_parameter_space, _typing.Sequence)
+        ):
+            self.__hyper_parameter_space: _typing.Sequence[
+                _typing.Dict[str, _typing.Any]
+            ] = hyper_parameter_space
+        else:
+            self.__hyper_parameter_space: _typing.Sequence[
+                _typing.Dict[str, _typing.Any]
+            ] = []
+        if hyper_parameter != Ellipsis and isinstance(hyper_parameter, dict):
+            self.__hyper_parameter: _typing.Dict[str, _typing.Any] = hyper_parameter
+        else:
+            self.__hyper_parameter: _typing.Dict[str, _typing.Any] = {}
+
+    def from_hyper_parameter(self, hyper_parameter: _typing.Dict[str, _typing.Any]):
+        raise NotImplementedError
+
+
+class ClassificationModel(_BaseModel):
+    def _initialize(self):
+        raise NotImplementedError
+    
+    def from_hyper_parameter(
+            self, hyper_parameter: _typing.Dict[str, _typing.Any]
+    ) -> "ClassificationModel":
+        new_model: ClassificationModel = self.__class__(
+            num_features=self.num_features,
+            num_classes=self.num_classes,
+            device=self.device,
+            init=False
+        )
+        _hyper_parameter = self.hyper_parameter
+        _hyper_parameter.update(hyper_parameter)
+        new_model.hyper_parameter = _hyper_parameter
+        new_model.initialize()
+        return new_model
+    
+    def __init__(
+            self, num_features: int = ..., num_classes: int = ...,
+            num_graph_features: int = ...,
+            device: _typing.Union[str, torch.device] = ...,
+            init: bool = False, **kwargs
+    ):
+        if "initialize" in kwargs:
+            del kwargs["initialize"]
+        super(ClassificationModel, self).__init__(
+            initialize=init, device=device, **kwargs
+        )
+        if num_classes != Ellipsis and type(num_classes) == int:
+            self.__num_classes: int = num_classes if num_classes > 0 else 0
+        else:
+            self.__num_classes: int = 0
+        if num_features != Ellipsis and type(num_features) == int:
+            self.__num_features: int = num_features if num_features > 0 else 0
+        else:
+            self.__num_features: int = 0
+        if num_graph_features != Ellipsis and type(num_graph_features) == int:
+            if num_graph_features > 0:
+                self.__num_graph_features: int = num_graph_features
+            else:
+                self.__num_graph_features: int = 0
+        else:
+            self.__num_graph_features: int = 0
+    
+    @property
+    def num_classes(self) -> int:
+        return self.__num_classes
+    
+    @num_classes.setter
+    def num_classes(self, __num_classes: int):
+        if type(__num_classes) != int:
+            raise TypeError
+        if not __num_classes > 0:
+            raise ValueError
+        self.__num_classes = __num_classes if __num_classes > 0 else 0
+    
+    @property
+    def num_features(self) -> int:
+        return self.__num_features
+    
+    @num_features.setter
+    def num_features(self, __num_features: int):
+        if type(__num_features) != int:
+            raise TypeError
+        if not __num_features > 0:
+            raise ValueError
+        self.__num_features = __num_features if __num_features > 0 else 0
+    
+    def get_num_classes(self) -> int:
+        # todo: consider replacing with property with getter and setter
+        return self.__num_classes
+    
+    def set_num_classes(self, num_classes: int) -> None:
+        # todo: consider replacing with property with getter and setter
+        if type(num_classes) != int:
+            raise TypeError
+        self.__num_classes = num_classes if num_classes > 0 else 0
+    
+    def get_num_features(self) -> int:
+        # todo: consider replacing with property with getter and setter
+        return self.__num_features
+    
+    def set_num_features(self, num_features: int):
+        # todo: consider replacing with property with getter and setter
+        if type(num_features) != int:
+            raise TypeError
+        self.__num_features = num_features if num_features > 0 else 0
+    
+    def set_num_graph_features(self, num_graph_features: int):
+        # todo: consider replacing with property with getter and setter
+        if type(num_graph_features) != int:
+            raise TypeError
+        else:
+            if num_graph_features > 0:
+                self.__num_graph_features = num_graph_features
+            else:
+                self.__num_graph_features = 0
diff --git a/autogl/module/model/gcn.py b/autogl/module/model/gcn.py
index 3e6208f..4fa3594 100644
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -1,73 +1,94 @@
 import torch
-import torch.nn.functional as F
-from torch_geometric.nn import GCNConv
+import torch.nn.functional
+import torch_geometric
+import typing as _typing
 from . import register_model
-from .base import BaseModel, activate_func
+from .base import activate_func, ClassificationModel
 from ...utils import get_logger
 
 LOGGER = get_logger("GCNModel")
 
 
-def set_default(args, d):
-    for k, v in d.items():
-        if k not in args:
-            args[k] = v
-    return args
-
-
 class GCN(torch.nn.Module):
-    def __init__(self, args):
-        super(GCN, self).__init__()
-        self.args = args
-        self.num_layer = int(self.args["num_layers"])
-
-        missing_keys = list(
-            set(["features_num", "num_class", "num_layers", "hidden", "dropout", "act"])
-            - set(self.args.keys())
-        )
-        if len(missing_keys) > 0:
-            raise Exception("Missing keys: %s." % ",".join(missing_keys))
-
-        if not self.num_layer == len(self.args["hidden"]) + 1:
-            LOGGER.warn("Warning: layer size does not match the length of hidden units")
-
-        self.convs = torch.nn.ModuleList()
-        self.convs.append(GCNConv(self.args["features_num"], self.args["hidden"][0]))
-        for i in range(self.num_layer - 2):
-            self.convs.append(
-                GCNConv(self.args["hidden"][i], self.args["hidden"][i + 1])
+    def __init__(
+            self, num_features: int, num_classes: int,
+            hidden_features: _typing.Sequence[int],
+            dropout: float, activation_name: str
+    ):
+        super().__init__()
+        self.__convolution_layers: torch.nn.ModuleList = torch.nn.ModuleList()
+        num_layers: int = len(hidden_features) + 1
+        if num_layers == 1:
+            self.__convolution_layers.append(
+                torch_geometric.nn.GCNConv(
+                    num_features, num_classes, add_self_loops=False
+                )
             )
-        self.convs.append(
-            GCNConv(self.args["hidden"][self.num_layer - 2], self.args["num_class"])
-        )
-
-    def forward(self, data):
-        try:
-            x = data.x
-        except:
-            print("no x")
-            pass
-        try:
-            edge_index = data.edge_index
-        except:
-            print("no index")
-            pass
-        try:
-            edge_weight = data.edge_weight
-        except:
-            edge_weight = None
-            pass
-
-        for i in range(self.num_layer):
-            x = self.convs[i](x, edge_index, edge_weight)
-            if i != self.num_layer - 1:
-                x = activate_func(x, self.args["act"])
-                x = F.dropout(x, p=self.args["dropout"], training=self.training)
-        return F.log_softmax(x, dim=1)
+        else:
+            self.__convolution_layers.append(torch_geometric.nn.GCNConv(
+                num_features, hidden_features[0], add_self_loops=False
+            ))
+            for i in range(len(hidden_features)):
+                self.__convolution_layers.append(
+                    torch_geometric.nn.GCNConv(
+                        hidden_features[i], hidden_features[i + 1]
+                    ) if i + 1 < len(hidden_features)
+                    else torch_geometric.nn.GCNConv(
+                        hidden_features[i], num_classes
+                    )
+                )
+        self.__dropout: float = dropout
+        self.__activation_name: str = activation_name
+    
+    def __layer_wise_forward(self, data):
+        # todo: Implement this forward method
+        #         in case that data.edge_indexes property is provided
+        #         for Layer-wise and Node-wise sampled training
+        raise NotImplementedError
+    
+    def __basic_forward(
+            self, x: torch.Tensor,
+            edge_index: torch.Tensor,
+            edge_weight: _typing.Optional[torch.Tensor] = None
+    ) -> torch.Tensor:
+        for layer_index in range(len(self.__convolution_layers)):
+            x: torch.Tensor = self.__convolution_layers[layer_index](
+                x, edge_index, edge_weight
+            )
+            if layer_index + 1 < len(self.__convolution_layers):
+                x = activate_func(x, self.__activation_name)
+                x = torch.nn.functional.dropout(x, p=self.__dropout, training=self.training)
+        return torch.nn.functional.log_softmax(x, dim=1)
+    
+    def forward(self, data) -> torch.Tensor:
+        if (
+                hasattr(data, "edge_indexes") and
+                getattr(data, "edge_indexes") is not None
+        ):
+            return self.__layer_wise_forward(data)
+        else:
+            if not (hasattr(data, "x") and hasattr(data, "edge_index")):
+                raise AttributeError
+            if not (
+                    type(getattr(data, "x")) == torch.Tensor and
+                    type(getattr(data, "edge_index")) == torch.Tensor
+            ):
+                raise TypeError
+            x: torch.Tensor = getattr(data, "x")
+            edge_index: torch.LongTensor = getattr(data, "edge_index")
+            if (
+                    hasattr(data, "edge_weight") and
+                    type(getattr(data, "edge_weight")) == torch.Tensor and
+                    getattr(data, "edge_weight").size() == (edge_index.size(1),)
+            ):
+                edge_weight: _typing.Optional[torch.Tensor] = getattr(data, "edge_weight")
+            else:
+                edge_weight: _typing.Optional[torch.Tensor] = None
+            return self.__basic_forward(x, edge_index, edge_weight)
 
 
 @register_model("gcn")
-class AutoGCN(BaseModel):
+class AutoGCN(ClassificationModel):
     r"""
     AutoGCN.
     The model used in this automodel is GCN, i.e., the graph convolutional network from the
@@ -99,66 +120,18 @@ class AutoGCN(BaseModel):
     """
 
     def __init__(
-        self, num_features=None, num_classes=None, device=None, init=False, **args
-    ):
-
-        super(AutoGCN, self).__init__()
-
-        self.num_features = num_features if num_features is not None else 0
-        self.num_classes = int(num_classes) if num_classes is not None else 0
-        self.device = device if device is not None else "cpu"
-        self.init = True
-
-        self.params = {
-            "features_num": self.num_features,
-            "num_class": self.num_classes,
-        }
-        self.space = [
-            {
-                "parameterName": "num_layers",
-                "type": "DISCRETE",
-                "feasiblePoints": "2,3,4",
-            },
-            {
-                "parameterName": "hidden",
-                "type": "NUMERICAL_LIST",
-                "numericalType": "INTEGER",
-                "length": 3,
-                "minValue": [8, 8, 8],
-                "maxValue": [128, 128, 128],
-                "scalingType": "LOG",
-                "cutPara": ("num_layers",),
-                "cutFunc": lambda x: x[0] - 1,
-            },
-            {
-                "parameterName": "dropout",
-                "type": "DOUBLE",
-                "maxValue": 0.8,
-                "minValue": 0.2,
-                "scalingType": "LINEAR",
-            },
-            {
-                "parameterName": "act",
-                "type": "CATEGORICAL",
-                "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
-            },
-        ]
-
-        # initial point of hp search
-        self.hyperparams = {
-            "num_layers": 2,
-            "hidden": [16],
-            "dropout": 0.2,
-            "act": "leaky_relu",
-        }
-
-        self.initialized = False
-        if init is True:
-            self.initialize()
-
-    def initialize(self):
-        # """Initialize model."""
-        if self.initialized:
-            return
-        self.initialized = True
-        self.model = GCN({**self.params, **self.hyperparams}).to(self.device)
+            self, num_features: int = ..., num_classes: int = ...,
+            device: _typing.Union[str, torch.device] = ...,
+            init: bool = False, **kwargs
+    ) -> None:
+        super(AutoGCN, self).__init__(
+            num_features, num_classes, device=device, init=init, **kwargs
+        )
+    
+    def _initialize(self):
+        self.model = GCN(
+            self.num_features, self.num_classes,
+            self.hyper_parameter.get("hidden"),
+            self.hyper_parameter.get("dropout"),
+            self.hyper_parameter.get("act")
+        ).to(self.device)
diff --git a/autogl/module/model/graph_sage.py b/autogl/module/model/graph_sage.py
index 90ee515..fbe8f6e 100644
--- a/autogl/module/model/graph_sage.py
+++ b/autogl/module/model/graph_sage.py
@@ -120,5 +120,5 @@ class AutoSAGE(BaseModel):
                 hidden_features=self.hyperparams["hidden"],
                 activation_name=self.hyperparams["act"],
                 **self.hyperparams
-            )
+            ).to(self.__device)
             self._initialized = True
diff --git a/autogl/module/train/base.py b/autogl/module/train/base.py
index e022631..7ec47f1 100644
--- a/autogl/module/train/base.py
+++ b/autogl/module/train/base.py
@@ -212,7 +212,7 @@ class BaseTrainer:
         pass
 
     def duplicate_from_hyper_parameter(
-            self, hp, model: _typing.Union[BaseModel, str, None] = None
+            self, hp, model: _typing.Optional[BaseModel] = ...
     ) -> "BaseTrainer":
         """Create a new trainer with the given hyper parameter."""
         raise NotImplementedError()
diff --git a/autogl/module/train/graph_classification_full.py b/autogl/module/train/graph_classification_full.py
index eed5feb..a04bf18 100644
--- a/autogl/module/train/graph_classification_full.py
+++ b/autogl/module/train/graph_classification_full.py
@@ -211,7 +211,7 @@ class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
 
         """
         optimizer = self.optimizer(
-            self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
+            self.model.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
         )
 
         # scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
diff --git a/autogl/module/train/node_classification_full.py b/autogl/module/train/node_classification_full.py
index 73a7e06..e5f4937 100644
--- a/autogl/module/train/node_classification_full.py
+++ b/autogl/module/train/node_classification_full.py
@@ -196,7 +196,7 @@ class NodeClassificationFullTrainer(BaseNodeClassificationTrainer):
         data = data.to(self.device)
         mask = data.train_mask if train_mask is None else train_mask
         optimizer = self.optimizer(
-            self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
+            self.model.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
         )
         # scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
         lr_scheduler_type = self.lr_scheduler_type
diff --git a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
index 0c3e671..542b03a 100644
--- a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
+++ b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
@@ -1,12 +1,14 @@
 import torch
 import logging
 import typing as _typing
-from torch.nn import functional as F
+import torch.nn.functional
+import torch.utils.data
 
 from .. import register_trainer
 from ..base import BaseNodeClassificationTrainer, EarlyStopping, Evaluation
 from ..evaluation import get_feval, Logloss
 from ..sampling.sampler.neighbor_sampler import NeighborSampler
+from ..sampling.sampler.graphsaint_sampler import *
 from ...model import BaseModel
 
 LOGGER: logging.Logger = logging.getLogger("Node classification sampling trainer")
@@ -71,45 +73,9 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         
         self._valid_result: torch.Tensor = torch.zeros(0)
         self._valid_result_prob: torch.Tensor = torch.zeros(0)
-        self._valid_score = None
+        self._valid_score: _typing.Sequence[float] = []
         
-        self._hyper_parameter_space: _typing.List[_typing.Dict[str, _typing.Any]] = [
-            {
-                "parameterName": "max_epoch",
-                "type": "INTEGER",
-                "maxValue": 500,
-                "minValue": 10,
-                "scalingType": "LINEAR",
-            },
-            {
-                "parameterName": "early_stopping_round",
-                "type": "INTEGER",
-                "maxValue": 30,
-                "minValue": 10,
-                "scalingType": "LINEAR",
-            },
-            {
-                "parameterName": "lr",
-                "type": "DOUBLE",
-                "maxValue": 1e-1,
-                "minValue": 1e-4,
-                "scalingType": "LOG",
-            },
-            {
-                "parameterName": "weight_decay",
-                "type": "DOUBLE",
-                "maxValue": 1e-2,
-                "minValue": 1e-4,
-                "scalingType": "LOG",
-            }
-        ]
-        
-        self._hyper_parameter: _typing.Dict[str, _typing.Any] = {
-            "max_epoch": self._max_epoch,
-            "early_stopping_round": self._early_stopping.patience,
-            "lr": self._learning_rate,
-            "weight_decay": self._weight_decay
-        }
+        self._hyper_parameter_space: _typing.Sequence[_typing.Dict[str, _typing.Any]] = []
         
         self.__initialized: bool = False
         if init:
@@ -118,12 +84,12 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
     def initialize(self) -> "NodeClassificationNeighborSamplingTrainer":
         if self.__initialized:
             return self
-        self._model.initialize()
+        self.model.initialize()
         self.__initialized = True
         return self
     
     def get_model(self) -> BaseModel:
-        return self._model
+        return self.model
     
     def __train_only(
             self, data
@@ -135,7 +101,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         """
         data = data.to(self.device)
         optimizer: torch.optim.Optimizer = self._optimizer_class(
-            self._model.parameters(),
+            self.model.model.parameters(),
             lr=self._learning_rate, weight_decay=self._weight_decay
         )
         if type(self._lr_scheduler_type) == str:
@@ -169,17 +135,17 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         )
         
         for current_epoch in range(self._max_epoch):
-            self._model.model.train()
+            self.model.model.train()
             """ epoch start """
             for target_node_indexes, edge_indexes in train_sampler:
                 optimizer.zero_grad()
                 data.edge_indexes = edge_indexes
-                prediction = self._model.model(data)
-                if not hasattr(F, self.loss):
+                prediction = self.model.model(data)
+                if not hasattr(torch.nn.functional, self.loss):
                     raise TypeError(
                         "PyTorch does not support loss type {}".format(self.loss)
                     )
-                loss_function = getattr(F, self.loss)
+                loss_function = getattr(torch.nn.functional, self.loss)
                 loss: torch.Tensor = loss_function(
                     prediction[target_node_indexes],
                     data.y[target_node_indexes]
@@ -199,12 +165,12 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
                     validation_loss: float = -validation_results[0]
                 else:
                     validation_loss: float = validation_results[0]
-                self._early_stopping(validation_loss, self._model.model)
+                self._early_stopping(validation_loss, self.model.model)
                 if self._early_stopping.early_stop:
                     LOGGER.debug("Early stopping at %d", current_epoch)
                     break
         if hasattr(data, "val_mask") and data.val_mask is not None:
-            self._early_stopping.load_checkpoint(self._model.model)
+            self._early_stopping.load_checkpoint(self.model.model)
         return self
     
     def __predict_only(self, data):
@@ -214,9 +180,9 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         :return: the result of prediction on the given dataset
         """
         data = data.to(self.device)
-        self._model.model.eval()
+        self.model.model.eval()
         with torch.no_grad():
-            prediction = self._model.model(data)
+            prediction = self.model.model(data)
         return prediction
     
     def train(self, dataset, keep_valid_result: bool = True):
@@ -282,7 +248,6 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             )
         
     def get_name_with_hp(self) -> str:
-        # """Get the name of hyperparameter."""
         name = "-".join(
             [
                 str(self._optimizer_class),
@@ -294,14 +259,14 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             ]
         )
         name = (
-            name
-            + "|"
-            + "-".join(
-                [
-                    str(x[0]) + "-" + str(x[1])
-                    for x in self.model.get_hyper_parameter().items()
-                ]
-            )
+                name
+                + "|"
+                + "-".join(
+                    [
+                        str(x[0]) + "-" + str(x[1])
+                        for x in self.model.get_hyper_parameter().items()
+                    ]
+                )
         )
         return name
     
@@ -345,8 +310,8 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
     
     def to(self, device: torch.device):
         self.device = device
-        if self._model is not None:
-            self._model.to(self.device)
+        if self.model is not None:
+            self.model.to(self.device)
     
     def duplicate_from_hyper_parameter(
             self, hp: _typing.Dict[str, _typing.Any],
@@ -354,7 +319,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
     ) -> "NodeClassificationNeighborSamplingTrainer":
         
         if model is None or not isinstance(model, BaseModel):
-            model = self._model
+            model = self.model
         model = model.from_hyper_parameter(
             dict(
                 [
@@ -380,3 +345,388 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
     @hyper_parameter_space.setter
     def hyper_parameter_space(self, hp_space):
         self._hyper_parameter_space = hp_space
+
+
+@register_trainer("NodeClassificationGraphSAINTTrainer")
+class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
+    def __init__(
+            self,
+            model: _typing.Union[BaseModel],
+            num_features: int,
+            num_classes: int,
+            optimizer: _typing.Union[
+                _typing.Type[torch.optim.Optimizer], str, None
+            ],
+            lr: float = 1e-4,
+            max_epoch: int = 100,
+            early_stopping_round: int = 100,
+            weight_decay: float = 1e-4,
+            device: _typing.Optional[torch.device] = None,
+            init: bool = True,
+            feval: _typing.Union[
+                _typing.Sequence[str],
+                _typing.Sequence[_typing.Type[Evaluation]]
+            ] = (Logloss,),
+            loss: str = "nll_loss",
+            lr_scheduler_type: _typing.Optional[str] = None,
+            **kwargs
+    ) -> None:
+        if isinstance(optimizer, type) and issubclass(optimizer, torch.optim.Optimizer):
+            self._optimizer_class: _typing.Type[torch.optim.Optimizer] = optimizer
+        elif type(optimizer) == str:
+            if optimizer.lower() == "adam":
+                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.Adam
+            elif optimizer.lower() == "adam" + "w":
+                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.AdamW
+            elif optimizer.lower() == "sgd":
+                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.SGD
+            else:
+                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.Adam
+        else:
+            self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.Adam
+        self._learning_rate: float = lr if lr > 0 else 1e-4
+        self._lr_scheduler_type: _typing.Optional[str] = lr_scheduler_type
+        self._max_epoch: int = max_epoch if max_epoch > 0 else 1e2
+        self._weight_decay: float = weight_decay if weight_decay > 0 else 1e-4
+        early_stopping_round: int = early_stopping_round if early_stopping_round > 0 else 1e2
+        self._early_stopping = EarlyStopping(patience=early_stopping_round, verbose=False)
+        
+        # Assign an empty initial hyper parameter space
+        self._hyper_parameter_space: _typing.Sequence[_typing.Dict[str, _typing.Any]] = []
+        
+        self._valid_result: torch.Tensor = torch.zeros(0)
+        self._valid_result_prob: torch.Tensor = torch.zeros(0)
+        self._valid_score: _typing.Sequence[float] = ()
+
+        super(NodeClassificationGraphSAINTTrainer, self).__init__(
+            model, num_features, num_classes, device, init, feval, loss
+        )
+        
+        """ Set hyper parameters """
+        if "num_subgraphs" not in kwargs:
+            raise KeyError
+        elif type(kwargs.get("num_subgraphs")) != int:
+            raise TypeError
+        elif not kwargs.get("num_subgraphs") > 0:
+            raise ValueError
+        else:
+            self.__num_subgraphs: int = kwargs.get("num_subgraphs")
+        if "sampling_budget" not in kwargs:
+            raise KeyError
+        elif type(kwargs.get("sampling_budget")) != int:
+            raise TypeError
+        elif not kwargs.get("sampling_budget") > 0:
+            raise ValueError
+        else:
+            self.__sampling_budget: int = kwargs.get("sampling_budget")
+        if "sampling_method" not in kwargs:
+            self.__sampling_method_identifier: str = "node"
+        elif type(kwargs.get("sampling_method")) != str:
+            self.__sampling_method_identifier: str = "node"
+        else:
+            self.__sampling_method_identifier: str = kwargs.get("sampling_method")
+            if self.__sampling_method_identifier.lower() not in ("node", "edge"):
+                self.__sampling_method_identifier: str = "node"
+        
+        self.__is_initialized: bool = False
+        if init:
+            self.initialize()
+    
+    def initialize(self):
+        if self.__is_initialized:
+            return self
+        self.model.initialize()
+        self.__is_initialized = True
+        return self
+    
+    def to(self, device: torch.device):
+        self.device = device
+        if self.model is not None:
+            self.model.to(self.device)
+    
+    def get_model(self):
+        return self.model
+
+    def __train_only(self, data):
+        """
+        The function of training on the given dataset and mask.
+        :param data: data of a specific graph
+        :return: self
+        """
+        data = data.to(self.device)
+        optimizer: torch.optim.Optimizer = self._optimizer_class(
+            self.model.parameters(),
+            lr=self._learning_rate, weight_decay=self._weight_decay
+        )
+        if type(self._lr_scheduler_type) == str:
+            if self._lr_scheduler_type.lower() == "step" + "lr":
+                lr_scheduler: torch.optim.lr_scheduler.StepLR = \
+                    torch.optim.lr_scheduler.StepLR(
+                        optimizer, step_size=100, gamma=0.1
+                    )
+            elif self._lr_scheduler_type.lower() == "multi" + "step" + "lr":
+                lr_scheduler: torch.optim.lr_scheduler.MultiStepLR = \
+                    torch.optim.lr_scheduler.MultiStepLR(
+                        optimizer, milestones=[30, 80], gamma=0.1
+                    )
+            elif self._lr_scheduler_type.lower() == "exponential" + "lr":
+                lr_scheduler: torch.optim.lr_scheduler.ExponentialLR = \
+                    torch.optim.lr_scheduler.ExponentialLR(
+                        optimizer, gamma=0.1
+                    )
+            elif self._lr_scheduler_type.lower() == "ReduceLROnPlateau".lower():
+                lr_scheduler: torch.optim.lr_scheduler.ReduceLROnPlateau = \
+                    torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min")
+            else:
+                lr_scheduler: torch.optim.lr_scheduler.LambdaLR = \
+                    torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
+        else:
+            lr_scheduler: torch.optim.lr_scheduler.LambdaLR = \
+                torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
+        
+        if self.__sampling_method_identifier.lower() == "edge":
+            sub_graph_sampler = GraphSAINTRandomEdgeSampler(
+                self.__sampling_budget, self.__num_subgraphs
+            )
+        else:
+            sub_graph_sampler = GraphSAINTRandomNodeSampler(
+                self.__sampling_budget, self.__num_subgraphs
+            )
+        
+        for current_epoch in range(self._max_epoch):
+            self.model.model.train()
+            """ epoch start """
+            """ Sample sub-graphs """
+            sub_graph_set = sub_graph_sampler.sample(data)
+            sub_graphs_loader: torch.utils.data.DataLoader = \
+                torch.utils.data.DataLoader(sub_graph_set)
+            integral_alpha: torch.Tensor = getattr(sub_graph_set, "alpha")
+            integral_lambda: torch.Tensor = getattr(sub_graph_set, "lambda")
+            """ iterate sub-graphs """
+            for sub_graph_data in sub_graphs_loader:
+                optimizer.zero_grad()
+                sampled_edge_indexes: torch.Tensor = \
+                    sub_graph_data.sampled_edge_indexes
+                sampled_node_indexes: torch.Tensor = \
+                    sub_graph_data.sampled_node_indexes
+                sampled_train_mask: torch.Tensor = \
+                    sub_graph_data.train_mask
+                
+                sampled_alpha = integral_alpha[sampled_edge_indexes]
+                sub_graph_data.edge_weight = 1 / sampled_alpha
+
+                prediction: torch.Tensor = self.model.model(sub_graph_data)
+                
+                if not hasattr(torch.nn.functional, self.loss):
+                    raise TypeError(
+                        f"PyTorch does not support loss type {self.loss}"
+                    )
+                loss_func = getattr(torch.nn.functional, self.loss)
+                unreduced_loss: torch.Tensor = loss_func(
+                    prediction[sampled_train_mask],
+                    data.y[sampled_train_mask],
+                    reduction="none"
+                )
+                
+                sampled_lambda: torch.Tensor = integral_lambda[sampled_node_indexes]
+                sampled_train_lambda: torch.Tensor = sampled_lambda[sampled_train_mask]
+                assert unreduced_loss.size() == sampled_train_lambda.size()
+                loss_weighted_sum: torch.Tensor = \
+                    torch.sum(unreduced_loss / sampled_train_lambda)
+                loss_weighted_sum.backward()
+                optimizer.step()
+            
+            if lr_scheduler is not None:
+                lr_scheduler.step()
+            
+            """ Validate performance """
+            if (
+                    hasattr(data, "val_mask") and
+                    type(getattr(data, "val_mask")) == torch.Tensor
+            ):
+                validation_results: _typing.Sequence[float] = \
+                    self.evaluate((data,), "val", [self.feval[0]])
+                if self.feval[0].is_higher_better():
+                    validation_loss: float = -validation_results[0]
+                else:
+                    validation_loss: float = validation_results[0]
+                self._early_stopping(validation_loss, self.model.model)
+                if self._early_stopping.early_stop:
+                    LOGGER.debug("Early stopping at %d", current_epoch)
+                    break
+        if hasattr(data, "val_mask") and data.val_mask is not None:
+            self._early_stopping.load_checkpoint(self.model.model)
+        return self
+        
+    def __predict_only(self, data):
+        """
+        The function of predicting on the given data.
+        :param data: data of a specific graph
+        :return: the result of prediction on the given dataset
+        """
+        data = data.to(self.device)
+        self.model.model.eval()
+        with torch.no_grad():
+            predicted_x: torch.Tensor = self.model.model(data)
+        return predicted_x
+    
+    def predict_proba(
+            self, dataset, mask: _typing.Optional[str] = None,
+            in_log_format=False
+    ):
+        """
+        The function of predicting the probability on the given dataset.
+        :param dataset: The node classification dataset used to be predicted.
+        :param mask:
+        :param in_log_format:
+        :return:
+        """
+        data = dataset[0].to(self.device)
+        if mask is not None and type(mask) == str:
+            if mask.lower() == "train":
+                _mask: torch.Tensor = data.train_mask
+            elif mask.lower() == "test":
+                _mask: torch.Tensor = data.test_mask
+            elif mask.lower() == "val":
+                _mask: torch.Tensor = data.val_mask
+            else:
+                _mask: torch.Tensor = data.test_mask
+        else:
+            _mask: torch.Tensor = data.test_mask
+        result = self.__predict_only(data)[_mask]
+        return result if in_log_format else torch.exp(result)
+    
+    def predict(self, dataset, mask: _typing.Optional[str] = None) -> torch.Tensor:
+        return self.predict_proba(dataset, mask, in_log_format=True).max(1)[1]
+    
+    def evaluate(
+            self, dataset,
+            mask: _typing.Optional[str] = None,
+            feval: _typing.Union[
+                None, _typing.Sequence[str],
+                _typing.Sequence[_typing.Type[Evaluation]]
+            ] = None
+    ) -> _typing.Sequence[float]:
+        data = dataset[0]
+        data = data.to(self.device)
+        if feval is None:
+            _feval: _typing.Sequence[_typing.Type[Evaluation]] = self.feval
+        else:
+            _feval: _typing.Sequence[_typing.Type[Evaluation]] = get_feval(list(feval))
+        if mask is not None and type(mask) == str:
+            if mask.lower() == "train":
+                _mask: torch.Tensor = data.train_mask
+            elif mask.lower() == "test":
+                _mask: torch.Tensor = data.test_mask
+            elif mask.lower() == "val":
+                _mask: torch.Tensor = data.val_mask
+            else:
+                _mask: torch.Tensor = data.test_mask
+        else:
+            _mask: torch.Tensor = data.test_mask
+        prediction_probability: torch.Tensor = \
+            self.predict_proba(dataset, mask)
+        y_ground_truth: torch.Tensor = data.y[_mask]
+        
+        eval_results = []
+        for f in _feval:
+            try:
+                eval_results.append(
+                    f.evaluate(prediction_probability, y_ground_truth)
+                )
+            except:
+                eval_results.append(
+                    f.evaluate(
+                        prediction_probability.cpu().numpy(), y_ground_truth.cpu().numpy()
+                    )
+                )
+        return eval_results
+    
+    def train(self, dataset, keep_valid_result: bool = True):
+        """
+        The function of training on the given dataset and keeping valid result.
+        :param dataset:
+        :param keep_valid_result: Whether to save the validation result after training
+        """
+        data = dataset[0]
+        self.__train_only(data)
+        if keep_valid_result:
+            prediction: torch.Tensor = self.__predict_only(data)
+            self._valid_result: torch.Tensor = prediction[data.val_mask].max(1)[1]
+            self._valid_result_prob: torch.Tensor = prediction[data.val_mask]
+            self._valid_score: _typing.Sequence[float] = self.evaluate(dataset, "val")
+    
+    def get_valid_predict(self) -> torch.Tensor:
+        return self._valid_result
+    
+    def get_valid_predict_proba(self) -> torch.Tensor:
+        return self._valid_result_prob
+    
+    def get_valid_score(self, return_major: bool = True) -> _typing.Tuple[
+        _typing.Union[float, _typing.Sequence[float]],
+        _typing.Union[bool, _typing.Sequence[bool]]
+    ]:
+        if return_major:
+            return self._valid_score[0], self.feval[0].is_higher_better()
+        else:
+            return (
+                self._valid_score, [f.is_higher_better() for f in self.feval]
+            )
+    
+    @property
+    def hyper_parameter_space(self) -> _typing.Sequence[_typing.Dict[str, _typing.Any]]:
+        return self._hyper_parameter_space
+    
+    @hyper_parameter_space.setter
+    def hyper_parameter_space(
+            self, hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
+    ) -> None:
+        if not isinstance(hp_space, _typing.Sequence):
+            raise TypeError
+        self._hyper_parameter_space = hp_space
+    
+    def get_name_with_hp(self) -> str:
+        name = "-".join(
+            [
+                str(self._optimizer_class),
+                str(self._learning_rate),
+                str(self._max_epoch),
+                str(self._early_stopping.patience),
+                str(self.model),
+                str(self.device),
+            ]
+        )
+        name = (
+                name
+                + "|"
+                + "-".join(
+                    [
+                        str(x[0]) + "-" + str(x[1])
+                        for x in self.model.get_hyper_parameter().items()
+                    ]
+                )
+        )
+        return name
+    
+    def duplicate_from_hyper_parameter(
+            self, hp: _typing.Dict[str, _typing.Any],
+            model: _typing.Optional[BaseModel] = None
+    ) -> "NodeClassificationGraphSAINTTrainer":
+        if model is None or not isinstance(model, BaseModel):
+            model: BaseModel = self.model
+        model = model.from_hyper_parameter(
+            dict(
+                [
+                    x for x in hp.items()
+                    if x[0] in [y["parameterName"] for y in model.hyper_parameter_space]
+                ]
+            )
+        )
+        return NodeClassificationGraphSAINTTrainer(
+            model, self.num_features, self.num_classes,
+            self._optimizer_class,
+            device=self.device, init=True,
+            feval=self.feval, loss=self.loss,
+            lr_scheduler_type=self._lr_scheduler_type,
+            **hp
+        )
diff --git a/autogl/module/train/sampling/sampler/graphsaint_sampler.py b/autogl/module/train/sampling/sampler/graphsaint_sampler.py
new file mode 100644
index 0000000..64972fc
--- /dev/null
+++ b/autogl/module/train/sampling/sampler/graphsaint_sampler.py
@@ -0,0 +1,105 @@
+import copy
+import typing as _typing
+import torch.utils.data
+import torch_geometric
+
+
+class _SubGraphSet(torch.utils.data.Dataset[_typing.Any]):
+    def __init__(self, datalist: _typing.Sequence[_typing.Any], *args, **kwargs):
+        self.__graphs: _typing.Sequence[_typing.Any] = datalist
+        self.__remaining_args: _typing.Sequence[_typing.Any] = args
+        for key, value in kwargs.items():
+            setattr(self, key, value)
+    
+    def __len__(self) -> int:
+        return len(self.__graphs)
+    
+    def __getitem__(self, index: int) -> _typing.Any:
+        if not 0 <= index < len(self.__graphs):
+            raise IndexError
+        return self.__graphs[index]
+
+
+class _GraphSAINTSubGraphSampler:
+    def __init__(
+            self, sampler_class: _typing.Type[torch_geometric.data.GraphSAINTSampler],
+            budget: int, num_graphs: int = 1, walk_length: int = 1, num_workers: int = 0
+    ):
+        """
+        :param sampler_class: class of torch_geometric.data.GraphSAINTSampler
+        :param budget: general budget
+        :param num_graphs: number of sub-graphs to sample, i.e. N in the paper
+        :param walk_length: walk length for RandomWalk Sampler
+        :param num_workers: how many sub-processes to use for data loading.
+                            0 means that the data will be loaded in the main process.
+        """
+        self.__sampler_class: _typing.Type[
+            torch_geometric.data.GraphSAINTSampler
+        ] = sampler_class
+        self.__budget: int = budget
+        self.__num_graphs: int = num_graphs
+        self.__walk_length: int = walk_length
+        self.__num_workers: int = num_workers if num_workers > 0 else 0
+    
+    def sample(self, _integral_data) -> _SubGraphSet:
+        """
+        :param _integral_data: conventional data for an integral graph
+        :return: instance of _SubGraphSet
+        """
+        data = copy.copy(_integral_data)
+        data.sampled_node_indexes = torch.arange(data.num_nodes, dtype=torch.int64)
+        data.sampled_edge_indexes = torch.arange(data.num_edges, dtype=torch.int64)
+        if type(self.__sampler_class) == torch_geometric.data.GraphSAINTRandomWalkSampler:
+            _sampler: torch_geometric.data.GraphSAINTRandomWalkSampler = \
+                torch_geometric.data.GraphSAINTRandomWalkSampler(
+                    data, self.__budget, self.__walk_length, self.__num_graphs,
+                    num_workers=self.__num_workers
+                )
+        else:
+            _sampler: torch_geometric.data.GraphSAINTSampler = \
+                self.__sampler_class(
+                    data, self.__budget, self.__num_graphs,
+                    num_workers=self.__num_workers
+                )
+        """ Sample sub-graphs """
+        datalist: list = [d for d in _sampler]
+        """ Compute the normalization """
+        node_sampled_count = torch.zeros(data.num_nodes, dtype=torch.int64)
+        edge_sampled_count = torch.zeros(data.num_edges, dtype=torch.int64)
+        concatenated_sampled_nodes: torch.Tensor = torch.cat(
+            [sub_graph.sampled_node_indexes for sub_graph in datalist]
+        )
+        concatenated_sampled_edges: torch.Tensor = torch.cat(
+            [sub_graph.sampled_edge_indexes for sub_graph in datalist]
+        )
+        for current_sampled_node_index in concatenated_sampled_nodes.unique():
+            node_sampled_count[current_sampled_node_index] = \
+                torch.where(concatenated_sampled_nodes == current_sampled_node_index)[0].size(0)
+        for current_sampled_edge_index in concatenated_sampled_edges.unique():
+            edge_sampled_count[current_sampled_edge_index] = \
+                torch.where(concatenated_sampled_edges == current_sampled_edge_index)[0].size(0)
+        _alpha: torch.Tensor = edge_sampled_count / node_sampled_count[data.edge_index[1]]
+        _alpha[torch.isnan(_alpha) | torch.isinf(_alpha)] = 0
+        _lambda: torch.Tensor = node_sampled_count / self.__num_graphs
+        return _SubGraphSet(datalist, **{"alpha": _alpha, "lambda": _lambda})
+
+
+class GraphSAINTRandomNodeSampler(_GraphSAINTSubGraphSampler):
+    def __init__(self, node_budget: int, num_graphs: int = 1):
+        super(GraphSAINTRandomNodeSampler, self).__init__(
+            torch_geometric.data.GraphSAINTNodeSampler, node_budget, num_graphs
+        )
+
+
+class GraphSAINTRandomEdgeSampler(_GraphSAINTSubGraphSampler):
+    def __init__(self, edge_budget: int, num_graphs: int = 1):
+        super(GraphSAINTRandomEdgeSampler, self).__init__(
+            torch_geometric.data.GraphSAINTNodeSampler, edge_budget, num_graphs
+        )
+
+
+class GraphSAINTRandomWalkSampler(_GraphSAINTSubGraphSampler):
+    def __init__(self, edge_budget: int, num_graphs: int = 1, walk_length: int = 4):
+        super(GraphSAINTRandomWalkSampler, self).__init__(
+            torch_geometric.data.GraphSAINTRandomWalkSampler, edge_budget, num_graphs, walk_length
+        )

From a4831e7db52d24b960958bbcb7ac25a3697aaa70 Mon Sep 17 00:00:00 2001
From: cluster32 <general502570@outlook.com>
Date: Thu, 15 Apr 2021 10:10:38 +0800
Subject: [PATCH 043/144] adjust nas

---
 autogl/module/hpo/__init__.py       |  2 -
 autogl/module/hpo/nas.py            | 61 -------------------
 autogl/module/hpo/test.py           | 40 -------------
 autogl/module/{hpo => nas}/darts.py | 15 +++--
 autogl/module/nas/nas.py            | 45 ++++++++++++++
 autogl/module/nas/space.py          | 92 +++++++++++++++++++++++++++++
 autogl/module/{hpo => nas}/utils.py |  0
 examples/test_nas.py                | 31 ++--------
 8 files changed, 152 insertions(+), 134 deletions(-)
 delete mode 100644 autogl/module/hpo/nas.py
 delete mode 100644 autogl/module/hpo/test.py
 rename autogl/module/{hpo => nas}/darts.py (93%)
 create mode 100644 autogl/module/nas/nas.py
 create mode 100644 autogl/module/nas/space.py
 rename autogl/module/{hpo => nas}/utils.py (100%)

diff --git a/autogl/module/hpo/__init__.py b/autogl/module/hpo/__init__.py
index cd4bbba..e8fe41a 100644
--- a/autogl/module/hpo/__init__.py
+++ b/autogl/module/hpo/__init__.py
@@ -28,7 +28,6 @@ from .mocmaes_advisorchoco import MocmaesAdvisorChoco
 from .quasi_advisorchoco import QuasiAdvisorChoco
 from .rand_advisor import RandAdvisor
 from .tpe_advisorhpo import TpeAdvisorHPO
-from .test import TestHPO
 
 
 def build_hpo_from_name(name: str) -> BaseHPOptimizer:
@@ -63,6 +62,5 @@ __all__ = [
     "QuasiAdvisorChoco",
     "RandAdvisor",
     "TpeAdvisorHPO",
-    "TestHPO",
     "build_hpo_from_name",
 ]
diff --git a/autogl/module/hpo/nas.py b/autogl/module/hpo/nas.py
deleted file mode 100644
index c36932f..0000000
--- a/autogl/module/hpo/nas.py
+++ /dev/null
@@ -1,61 +0,0 @@
-from torch_geometric.nn import GCNConv, SAGEConv
-from nni.nas.pytorch import mutables
-import torch.nn as nn
-
-
-class BaseNAS:
-    def search(self, space, dset, trainer):
-        """
-        The main process of NAS.
-        Parameters
-        ----------
-        space : BaseArchitectureSpace
-            No implementation yet
-        dataset : ...datasets
-            Dataset to train and evaluate.
-        trainer : ..train.BaseTrainer
-            Including model, giving HP space and using for training
-
-        Returns
-        -------
-        model: ..train.BaseTrainer
-            The trainer including the best trained model
-        """
-
-class GraphSpace(nn.Module):
-    def __init__(self, *arg, **kwargs):
-        super().__init__()
-
-    def instantiate(self, input_dim, hidden_dim, output_dim, ops, *arg, **kwargs):
-        """self.op1 = mutables.LayerChoice([GCNConv(input_dim, hidden_dim),SAGEConv(input_dim, hidden_dim)])
-        self.op2 = mutables.LayerChoice([
-            GCNConv(hidden_dim, output_dim),
-            SAGEConv(hidden_dim, output_dim)       
-        ], key = "2")"""
-        self.input_dim = input_dim
-        self.hidden_dim = hidden_dim
-        self.output_dim = output_dim
-        self.ops = ops
-        self.op1 = mutables.LayerChoice([op(input_dim, hidden_dim) for op in ops], key = "1")
-        self.op2 = mutables.LayerChoice([op(hidden_dim, output_dim) for op in ops], key = "2")
-
-    def reinstantiate(self):
-        self.instantiate(self.input_dim, self.hidden_dim, self.output_dim, self.ops)
-
-    def forward(self, data):
-        x = self.op1(data.x, data.edge_index)
-        x = self.op2(x, data.edge_index)
-        return x
-
-class BaseEstimator:
-    def infer(self, model, dataset):
-        pass
-
-class DartsNodeClfEstimator(BaseEstimator):
-    def infer(self, model, dataset):
-        dset = dataset[0]
-        pred = model(dset)[dset.train_mask]
-        y = dset.y[dset.train_mask]
-        loss_func = nn.CrossEntropyLoss()
-        loss = loss_func(pred, y)
-        return loss, loss
diff --git a/autogl/module/hpo/test.py b/autogl/module/hpo/test.py
deleted file mode 100644
index 778e5f7..0000000
--- a/autogl/module/hpo/test.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import hyperopt
-
-from torch_geometric.nn import GCNConv, SAGEConv
-from . import register_hpo
-from .nas import BaseEstimator, GraphSpace
-from .darts import Darts
-from .base import BaseHPOptimizer, TimeTooLimitedError
-
-
-@register_hpo("test")
-class TestHPO(BaseHPOptimizer):
-    """
-    Random search algorithm in `advisor` package
-    See https://github.com/tobegit3hub/advisor for the package
-    See .advisorbase.AdvisorBaseHPOptimizer for more information
-    """
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-
-    def optimize(self, trainer, dataset, time_limit=None, memory_limit=None):
-        num_features = dataset[0].x.shape[1]
-        num_classes = dataset.num_classes
-
-        op1 = lambda x, y: GCNConv(x, y)
-        op2 = lambda x, y: SAGEConv(x, y)
-        ops = [op1, op2]
-        model = GraphSpace()
-        model.instantiate(num_features, 64, num_classes, ops)
-        tr = BaseEstimator()
-        nas = Darts()
-        a = nas.search(model, dataset, tr)
-        print(a)
-        print(type(a))
-        return 1, 2
-
-    @classmethod
-    def build_hpo_from_args(cls, args):
-        """Build a new hpo instance."""
-        return cls(args)
diff --git a/autogl/module/hpo/darts.py b/autogl/module/nas/darts.py
similarity index 93%
rename from autogl/module/hpo/darts.py
rename to autogl/module/nas/darts.py
index 797c877..2a9aad4 100644
--- a/autogl/module/hpo/darts.py
+++ b/autogl/module/nas/darts.py
@@ -8,6 +8,7 @@ import torch.nn as nn
 import torch.nn.functional as F
 
 from .nas import BaseNAS
+from .space import SpaceModel
 from .utils import AverageMeterGroup, replace_layer_choice, replace_input_choice
 from nni.nas.pytorch.fixed import apply_fixed_architecture
 
@@ -110,7 +111,9 @@ class Darts(BaseNAS):
                  device=None, log_frequency=None,
                  arc_learning_rate=3.0E-4, unrolled=False):"""
 
-    def __init__(self, *args, **kwargs):
+    def __init__(self, device="cuda", *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.device = device
         self.num_epochs = kwargs.get("num_epochs", 5)
         self.workers = 4
         self.device = "cuda"
@@ -126,7 +129,7 @@ class Darts(BaseNAS):
         main process
         """
         self.model = space
-        self.dataset = dset
+        self.dataset = dset#.to(self.device)
         self.trainer = trainer
         self.model_optim = torch.optim.SGD(
             self.model.parameters(), lr=0.01, weight_decay=3e-4
@@ -135,6 +138,7 @@ class Darts(BaseNAS):
         self.nas_modules = []
         replace_layer_choice(self.model, DartsLayerChoice, self.nas_modules)
         replace_input_choice(self.model, DartsInputChoice, self.nas_modules)
+        self.model = self.model.to(self.device)
 
         ctrl_params = {}
         for _, m in self.nas_modules:
@@ -163,9 +167,10 @@ class Darts(BaseNAS):
                 )
 
         selection = self.export()
-        space.reinstantiate()
-        apply_fixed_architecture(space, selection)
-        return space
+        return SpaceModel(space, selection, self.device)
+        #space.reinstantiate()
+        #apply_fixed_architecture(space, selection)
+        #return space
         #return self.export()
 
     def _train_one_epoch(self, epoch):
diff --git a/autogl/module/nas/nas.py b/autogl/module/nas/nas.py
new file mode 100644
index 0000000..7b71909
--- /dev/null
+++ b/autogl/module/nas/nas.py
@@ -0,0 +1,45 @@
+from torch_geometric.nn import GCNConv, SAGEConv
+from nni.nas.pytorch import mutables
+import torch.nn as nn
+
+
+class BaseNAS:
+    def to(self, device):
+        """
+        change the device of the whole process
+        """
+        self.device = device
+
+    def search(self, space, dset, trainer):
+        """
+        The main process of NAS.
+        Parameters
+        ----------
+        space : BaseArchitectureSpace
+            No implementation yet
+        dataset : ...datasets
+            Dataset to train and evaluate.
+        trainer : ..train.BaseTrainer
+            Including model, giving HP space and using for training
+
+        Returns
+        -------
+        model: ..train.BaseTrainer
+            The trainer including the best trained model
+        """
+
+class BaseEstimator:
+    def __init__(self, device="cuda"):
+        self.device = device
+
+    def infer(self, model, dataset):
+        pass
+
+class DartsNodeClfEstimator(BaseEstimator):
+    def infer(self, model, dataset):
+        dset = dataset[0].to(self.device)
+        pred = model(dset)[dset.train_mask]
+        y = dset.y[dset.train_mask]
+        loss_func = nn.CrossEntropyLoss()
+        loss = loss_func(pred, y)
+        return loss, loss
diff --git a/autogl/module/nas/space.py b/autogl/module/nas/space.py
new file mode 100644
index 0000000..0351ee9
--- /dev/null
+++ b/autogl/module/nas/space.py
@@ -0,0 +1,92 @@
+from nni.nas.pytorch import mutables
+from autogl.module.model import BaseModel
+import torch.nn as nn
+import torch
+from autogl.utils import get_logger
+from nni.nas.pytorch.fixed import apply_fixed_architecture
+from copy import deepcopy
+
+class BaseSpace(nn.Module):
+    def __init__(self, input_dim=None, hidden_dim=None, output_dim=None, ops=None, init=False):
+        super().__init__()
+        self.input_dim = input_dim
+        self.hidden_dim = hidden_dim
+        self.output_dim = output_dim
+        self.ops = ops
+        self._initialized = False
+        if init:
+            self.instantiate()
+
+    def instantiate(self, input_dim=None, hidden_dim=None, output_dim=None, ops=None):
+        """
+        instantiate modules in the space
+        """
+        self.input_dim = input_dim or self.input_dim
+        self.hidden_dim = hidden_dim or self.hidden_dim
+        self.output_dim = output_dim or self.output_dim
+        self.ops = ops or self.ops
+        self._initialized = True
+
+    def forward(self, data):
+        pass
+
+class SpaceModel(BaseModel):
+    _logger = get_logger('space model')
+    def __init__(self, space_model: BaseSpace, selection, device=torch.device('cuda')):
+        super().__init__(init=True)
+        space_model.instantiate()
+        self.init = True
+        self.space = []
+        self.hyperparams = {}
+        self._model = space_model.to(device)
+        self.num_features = self._model.input_dim
+        self.num_classes = self._model.output_dim
+        self.selection = selection
+        apply_fixed_architecture(self._model, selection, verbose=False)
+        self.params = {
+            "num_class": self.num_classes,
+            "features_num": self.num_features
+        }
+        self.device = device
+
+    def to(self, device):
+        if isinstance(device, (str, torch.device)):
+            self.device = device
+        return super().to(device)
+
+    def forward(self, *args, **kwargs):
+        return self._model.forward(*args, **kwargs)
+
+    def from_hyper_parameter(self, hp):
+        """
+        receive no hp, just copy self and reset the learnable parameters.
+        """
+
+        """self._model.instantiate()
+        apply_fixed_architecture(self._model, self.selection, verbose=False)
+        self.to(self.device)
+        return self"""
+
+        ret_self = deepcopy(self)
+        ret_self._model.instantiate()
+        apply_fixed_architecture(ret_self._model, ret_self.selection, verbose=False)
+        ret_self.to(self.device)
+        return ret_self
+
+    @property
+    def model(self):
+        return self._model
+
+class GraphSpace(BaseSpace):
+    def __init__(self, input_dim=None, hidden_dim=None, output_dim=None, ops=None, init=False):
+        super().__init__(input_dim, hidden_dim, output_dim, ops, init)
+
+    def instantiate(self, input_dim=None, hidden_dim=None, output_dim=None, ops=None, *args, **kwargs):
+        super().instantiate(input_dim, hidden_dim, output_dim, ops)
+        self.op1 = mutables.LayerChoice([op(self.input_dim, self.hidden_dim) for op in self.ops], key = "1")
+        self.op2 = mutables.LayerChoice([op(self.hidden_dim, self.output_dim) for op in self.ops], key = "2")
+
+    def forward(self, data):
+        x = self.op1(data.x, data.edge_index)
+        x = self.op2(x, data.edge_index)
+        return x
diff --git a/autogl/module/hpo/utils.py b/autogl/module/nas/utils.py
similarity index 100%
rename from autogl/module/hpo/utils.py
rename to autogl/module/nas/utils.py
diff --git a/examples/test_nas.py b/examples/test_nas.py
index fe039ca..876f456 100644
--- a/examples/test_nas.py
+++ b/examples/test_nas.py
@@ -6,10 +6,11 @@ from torch_geometric.nn.conv.gcn_conv import GCNConv
 sys.path.append('../')
 import torch
 from autogl.solver import AutoNodeClassifier
-from autogl.module.hpo.nas import GraphSpace, DartsNodeClfEstimator
+from autogl.module.nas.nas import DartsNodeClfEstimator
+from autogl.module.nas.space import GraphSpace
 from autogl.datasets import build_dataset_from_name
 from autogl.module.model import BaseModel
-from autogl.module.hpo.darts import Darts
+from autogl.module.nas.darts import Darts
 from autogl.utils import get_logger
 
 class MyGraphSpace(GraphSpace):
@@ -37,7 +38,6 @@ class MyGraphSpace(GraphSpace):
         self._initialized = True
 
 class SpaceModel(BaseModel):
-    _logger = get_logger('space model')
     def __init__(self, space_model: MyGraphSpace, selection, device=torch.device('cuda')):
         super().__init__(init=True)
         space_model.reinstantiate()
@@ -77,27 +77,6 @@ class SpaceModel(BaseModel):
     def model(self):
         return self._model
 
-    def set_num_classes(self, num_classes):
-        """
-        TODO: if we can support resetting this (suggested), please implement
-        """
-        self._logger.error("Cannot reset num classes for nas model!")
-        raise ValueError("Cannot reset num classes for nas model!")
-
-    def set_num_features(self, num_features):
-        """
-        TODO: if we can support resetting this (suggested), please implement
-        """
-        self._logger.error("Cannot reset num features for nas model!")
-        raise ValueError("Cannot reset num features for nas model!")
-
-    def set_num_graph_features(self, num_graph_features):
-        """
-        TODO: if we can support resetting this (suggested), please implement
-        """
-        self._logger.error("Cannot reset num graph features for nas model!")
-        raise ValueError("Cannot reset num graph features for nas model!")
-
 class MyDarts(Darts):
     def __init__(self, device="cuda", *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -126,8 +105,8 @@ if __name__ == '__main__':
         hpo_module="random",
         max_evals=10,
         ensemble_module=None,
-        nas_algorithms=[MyDarts()],
-        nas_spaces=[MyGraphSpace(hidden_dim=64, ops=[GATConv, GCNConv])],
+        nas_algorithms=[Darts()],
+        nas_spaces=[GraphSpace(hidden_dim=64, ops=[GATConv, GCNConv])],
         nas_estimators=[DartsNodeClfEstimator()]
     )
     solver.fit(dataset)

From 7106622ec1fc361cae62466eb177da73ad3c732c Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Fri, 16 Apr 2021 09:37:47 +0000
Subject: [PATCH 044/144] change url of paper

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 3485b42..dc0fe10 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ Feel free to open <a href="https://github.com/THUMNLab/AutoGL/issues">issues</a>
 
 ## News!
 
-- 2021.04.10 Our paper [__AutoGL: A Library for Automated Graph Learning__](https://openreview.net/forum?id=0yHwpLeInDn) are accepted in _ICLR 2021 Workshop on Geometrical and Topological Representation Learning_! You can cite our paper following methods [here](#Cite).
+- 2021.04.10 Our paper [__AutoGL: A Library for Automated Graph Learning__](https://arxiv.org/abs/2104.04987) are accepted in _ICLR 2021 Workshop on Geometrical and Topological Representation Learning_! You can cite our paper following methods [here](#Cite).
 
 ## Introduction
 
@@ -111,7 +111,7 @@ The documentation will be automatically generated under `docs/_build/html`
 
 ## Cite
 
-You can cite [our paper](https://openreview.net/forum?id=0yHwpLeInDn) as follows if you use this code in your own work:
+You can cite [our paper](https://arxiv.org/abs/2104.04987) as follows if you use this code in your own work:
 ```
 @inproceedings{
 guan2021autogl,

From 13106d41d18dd301adc27c1b8b11681cacb035c5 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Wed, 21 Apr 2021 02:08:47 +0000
Subject: [PATCH 045/144] add stratify support, fix bugs

---
 autogl/datasets/utils.py                                 | 9 ++++++---
 .../module/train/sampling/sampler/graphsaint_sampler.py  | 2 +-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/autogl/datasets/utils.py b/autogl/datasets/utils.py
index 4afe00a..adc3923 100644
--- a/autogl/datasets/utils.py
+++ b/autogl/datasets/utils.py
@@ -2,7 +2,7 @@ from pdb import set_trace
 import torch
 import numpy as np
 from torch_geometric.data import DataLoader
-from sklearn.model_selection import StratifiedKFold
+from sklearn.model_selection import StratifiedKFold, KFold
 
 
 def get_label_number(dataset):
@@ -179,7 +179,7 @@ def random_splits_mask_class(
     return dataset
 
 
-def graph_cross_validation(dataset, n_splits=10, shuffle=True, random_seed=42):
+def graph_cross_validation(dataset, n_splits=10, shuffle=True, random_seed=42, stratify=False):
     r"""Cross validation for graph classification data, returning one fold with specific idx in autogl.datasets or pyg.Dataloader(default)
 
     Parameters
@@ -196,7 +196,10 @@ def graph_cross_validation(dataset, n_splits=10, shuffle=True, random_seed=42):
     random_seed : int
         random_state for sklearn.model_selection.StratifiedKFold
     """
-    skf = StratifiedKFold(n_splits=n_splits, shuffle=shuffle, random_state=random_seed)
+    if stratify:
+        skf = StratifiedKFold(n_splits=n_splits, shuffle=shuffle, random_state=random_seed)
+    else:
+        skf = KFold(n_splits=n_splits, shuffle=shuffle, random_state=random_seed)
     idx_list = []
 
     # BUG: from pytorch_geometric, not sure whether it is a bug. The dataset.data will return
diff --git a/autogl/module/train/sampling/sampler/graphsaint_sampler.py b/autogl/module/train/sampling/sampler/graphsaint_sampler.py
index 64972fc..9c5c978 100644
--- a/autogl/module/train/sampling/sampler/graphsaint_sampler.py
+++ b/autogl/module/train/sampling/sampler/graphsaint_sampler.py
@@ -4,7 +4,7 @@ import torch.utils.data
 import torch_geometric
 
 
-class _SubGraphSet(torch.utils.data.Dataset[_typing.Any]):
+class _SubGraphSet(torch.utils.data.Dataset):
     def __init__(self, datalist: _typing.Sequence[_typing.Any], *args, **kwargs):
         self.__graphs: _typing.Sequence[_typing.Any] = datalist
         self.__remaining_args: _typing.Sequence[_typing.Any] = args

From 8c20795a1cfa5099d50a01226bf9ed4cdc043264 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Thu, 22 Apr 2021 01:27:52 +0000
Subject: [PATCH 046/144] rebase nas framework

---
 .pylintrc                                   |   3 +-
 autogl/module/nas/__init__.py               |   3 +
 autogl/module/nas/algorithm/__init__.py     |   8 ++
 autogl/module/nas/algorithm/base.py         |  52 ++++++++
 autogl/module/nas/{ => algorithm}/darts.py  | 141 +++++++++-----------
 autogl/module/nas/estimator/__init__.py     |   8 ++
 autogl/module/nas/estimator/base.py         |  42 ++++++
 autogl/module/nas/estimator/one_shot.py     |  21 +++
 autogl/module/nas/nas.py                    |  45 -------
 autogl/module/nas/space.py                  |  92 -------------
 autogl/module/nas/space/__init__.py         |   4 +
 autogl/module/nas/space/base.py             |  55 ++++++++
 autogl/module/nas/space/single_path.py      | 116 ++++++++++++++++
 autogl/solver/classifier/node_classifier.py |  38 ++++--
 autogl/utils/__init__.py                    |   3 +-
 autogl/utils/device.py                      |   7 +
 examples/test_nas.py                        | 125 ++++-------------
 17 files changed, 433 insertions(+), 330 deletions(-)
 create mode 100644 autogl/module/nas/__init__.py
 create mode 100644 autogl/module/nas/algorithm/__init__.py
 create mode 100644 autogl/module/nas/algorithm/base.py
 rename autogl/module/nas/{ => algorithm}/darts.py (56%)
 create mode 100644 autogl/module/nas/estimator/__init__.py
 create mode 100644 autogl/module/nas/estimator/base.py
 create mode 100644 autogl/module/nas/estimator/one_shot.py
 delete mode 100644 autogl/module/nas/nas.py
 delete mode 100644 autogl/module/nas/space.py
 create mode 100644 autogl/module/nas/space/__init__.py
 create mode 100644 autogl/module/nas/space/base.py
 create mode 100644 autogl/module/nas/space/single_path.py
 create mode 100644 autogl/utils/device.py

diff --git a/.pylintrc b/.pylintrc
index c3f9e8d..0ad8b3f 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -143,7 +143,8 @@ disable=print-statement,
         too-many-arguments,
         too-many-branches,
         too-many-statements,
-        too-many-locals
+        too-many-locals,
+        relative-beyond-top-level
 
 # Enable the message, report, category or checker with the given id(s). You can
 # either give multiple identifier separated by comma (,) or put this option
diff --git a/autogl/module/nas/__init__.py b/autogl/module/nas/__init__.py
new file mode 100644
index 0000000..b31ae1f
--- /dev/null
+++ b/autogl/module/nas/__init__.py
@@ -0,0 +1,3 @@
+from .algorithm import *
+from .estimator import *
+from .space import *
diff --git a/autogl/module/nas/algorithm/__init__.py b/autogl/module/nas/algorithm/__init__.py
new file mode 100644
index 0000000..612102a
--- /dev/null
+++ b/autogl/module/nas/algorithm/__init__.py
@@ -0,0 +1,8 @@
+"""
+NAS algorithms
+"""
+
+from .base import BaseNAS
+from .darts import Darts
+
+__all__ = ["BaseNAS", "Darts"]
diff --git a/autogl/module/nas/algorithm/base.py b/autogl/module/nas/algorithm/base.py
new file mode 100644
index 0000000..f8c4f87
--- /dev/null
+++ b/autogl/module/nas/algorithm/base.py
@@ -0,0 +1,52 @@
+"""
+Base class for algorithm
+"""
+from ...model import BaseModel
+import torch
+from abc import abstractmethod
+from ....utils import get_device
+
+
+class BaseNAS:
+    """
+    Base NAS algorithm class
+
+    Parameters
+    ----------
+    device: str or torch.device
+        The device of the whole process
+    """
+
+    def __init__(self, device="auto") -> None:
+        self.device = get_device(device)
+
+    def to(self, device):
+        """
+        Change the device of the whole NAS search process
+
+        Parameters
+        ----------
+        device: str or torch.device
+        """
+        self.device = get_device(device)
+
+    @abstractmethod
+    def search(self, space, dataset, estimator) -> BaseModel:
+        """
+        The search process of NAS.
+
+        Parameters
+        ----------
+        space : autogl.module.nas.space.BaseSpace
+            The search space. Constructed following nni.
+        dataset : autogl.datasets
+            Dataset to perform search on.
+        estimator : autogl.module.nas.estimator.BaseEstimator
+            The estimator to compute loss & metrics.
+
+        Returns
+        -------
+        model: autogl.module.model.BaseModel
+            The searched model.
+        """
+        raise NotImplementedError()
diff --git a/autogl/module/nas/darts.py b/autogl/module/nas/algorithm/darts.py
similarity index 56%
rename from autogl/module/nas/darts.py
rename to autogl/module/nas/algorithm/darts.py
index 2a9aad4..00ac3a5 100644
--- a/autogl/module/nas/darts.py
+++ b/autogl/module/nas/algorithm/darts.py
@@ -1,16 +1,17 @@
 # Modified from NNI
 
-import copy
 import logging
 
 import torch
+import torch.optim
 import torch.nn as nn
 import torch.nn.functional as F
 
-from .nas import BaseNAS
-from .space import SpaceModel
-from .utils import AverageMeterGroup, replace_layer_choice, replace_input_choice
-from nni.nas.pytorch.fixed import apply_fixed_architecture
+from .base import BaseNAS
+from ..estimator.base import BaseEstimator
+from ..space import BaseSpace
+from ..utils import replace_layer_choice, replace_input_choice
+from ...model.base import BaseModel
 
 _logger = logging.getLogger(__name__)
 
@@ -95,8 +96,8 @@ class Darts(BaseNAS):
         Batch size.
     workers : int
         Workers for data loading.
-    device : torch.device
-        ``torch.device("cpu")`` or ``torch.device("cuda")``.
+    device : str or torch.device
+        The device of the whole process
     log_frequency : int
         Step count per logging.
     arc_learning_rate : float
@@ -105,43 +106,31 @@ class Darts(BaseNAS):
         ``True`` if using second order optimization, else first order optimization.
     """
 
-    """def __init__(self, model, loss, metrics, optimizer,
-                 num_epochs, dataset, grad_clip=5.,
-                 learning_rate=2.5E-3, batch_size=64, workers=4,
-                 device=None, log_frequency=None,
-                 arc_learning_rate=3.0E-4, unrolled=False):"""
-
-    def __init__(self, device="cuda", *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.device = device
-        self.num_epochs = kwargs.get("num_epochs", 5)
+    def __init__(self, num_epochs=5, device="cuda"):
+        super().__init__(device=device)
+        self.num_epochs = num_epochs
         self.workers = 4
-        self.device = "cuda"
         self.log_frequency = None
-
-        # for _, module in self.nas_modules:
-        #    module.to(self.device)
-
-        # use the same architecture weight for modules with duplicated names
-
-    def search(self, space, dset, trainer):
-        """
-        main process
-        """
-        self.model = space
-        self.dataset = dset#.to(self.device)
-        self.trainer = trainer
-        self.model_optim = torch.optim.SGD(
-            self.model.parameters(), lr=0.01, weight_decay=3e-4
+        self.gradient_clip = 5.0
+        self.model_optimizer = torch.optim.Adam
+        self.arch_optimizer = torch.optim.Adam
+        self.model_lr = 0.001
+        self.model_wd = 5e-4
+        self.arch_lr = 3e-4
+        self.arch_wd = 1e-3
+
+    def search(self, space: BaseSpace, dataset, estimator):
+        model_optim = self.model_optimizer(
+            space.parameters(), self.model_lr, weight_decay=self.model_wd
         )
 
-        self.nas_modules = []
-        replace_layer_choice(self.model, DartsLayerChoice, self.nas_modules)
-        replace_input_choice(self.model, DartsInputChoice, self.nas_modules)
-        self.model = self.model.to(self.device)
+        nas_modules = []
+        replace_layer_choice(space, DartsLayerChoice, nas_modules)
+        replace_input_choice(space, DartsInputChoice, nas_modules)
+        space = space.to(self.device)
 
         ctrl_params = {}
-        for _, m in self.nas_modules:
+        for _, m in nas_modules:
             if m.name in ctrl_params:
                 assert (
                     m.alpha.size() == ctrl_params[m.name].size()
@@ -149,59 +138,53 @@ class Darts(BaseNAS):
                 m.alpha = ctrl_params[m.name]
             else:
                 ctrl_params[m.name] = m.alpha
-        self.ctrl_optim = torch.optim.Adam(
-            list(ctrl_params.values()), 3e-4, betas=(0.5, 0.999), weight_decay=1.0e-3
+        arch_optim = self.arch_optimizer(
+            list(ctrl_params.values()), self.arch_lr, weight_decay=self.arch_wd
         )
-        self.grad_clip = 5.0
-
-        for step in range(self.num_epochs):
-            self._train_one_epoch(step)
-            if self.log_frequency is not None and step % self.log_frequency == 0:
-                _logger.info(
-                    "Epoch [%s/%s] Step [%s/%s]  %s",
-                    epoch + 1,
-                    self.num_epochs,
-                    step + 1,
-                    len(self.train_loader),
-                    meters,
-                )
-
-        selection = self.export()
-        return SpaceModel(space, selection, self.device)
-        #space.reinstantiate()
-        #apply_fixed_architecture(space, selection)
-        #return space
-        #return self.export()
-
-    def _train_one_epoch(self, epoch):
-        self.model.train()
-        meters = AverageMeterGroup()
+
+        for epoch in range(self.num_epochs):
+            self._train_one_epoch(
+                epoch, space, dataset, estimator, model_optim, arch_optim
+            )
+
+        selection = self.export(nas_modules)
+        return space.export(selection, self.device)
+
+    def _train_one_epoch(
+        self,
+        epoch,
+        model: BaseSpace,
+        dataset,
+        estimator,
+        model_optim: torch.optim.Optimizer,
+        arch_optim: torch.optim.Optimizer,
+    ):
+        model.train()
 
         # phase 1. architecture step
-        self.ctrl_optim.zero_grad()
+        arch_optim.zero_grad()
         # only no unroll here
-        _, loss = self._infer()
+        _, loss = self._infer(model, dataset, estimator, "val")
         loss.backward()
-        self.ctrl_optim.step()
+        arch_optim.step()
 
         # phase 2: child network step
-        self.model_optim.zero_grad()
-        metric, loss = self._infer()
+        model_optim.zero_grad()
+        metric, loss = self._infer(model, dataset, estimator, "train")
         loss.backward()
-        if self.grad_clip > 0:
-            nn.utils.clip_grad_norm_(
-                self.model.parameters(), self.grad_clip
-            )  # gradient clipping
-        self.model_optim.step()
-
-    def _infer(self):
-        metric, loss = self.trainer.infer(self.model, self.dataset)
+        # gradient clipping
+        if self.gradient_clip > 0:
+            nn.utils.clip_grad_norm_(model.parameters(), self.gradient_clip)
+        model_optim.step()
+
+    def _infer(self, model: BaseModel, dataset, estimator: BaseEstimator, mask="train"):
+        metric, loss = estimator.infer(model, dataset, mask=mask)
         return metric, loss
 
     @torch.no_grad()
-    def export(self):
+    def export(self, nas_modules) -> dict:
         result = dict()
-        for name, module in self.nas_modules:
+        for name, module in nas_modules:
             if name not in result:
                 result[name] = module.export()
         return result
diff --git a/autogl/module/nas/estimator/__init__.py b/autogl/module/nas/estimator/__init__.py
new file mode 100644
index 0000000..9184f64
--- /dev/null
+++ b/autogl/module/nas/estimator/__init__.py
@@ -0,0 +1,8 @@
+"""
+NAS Estimator
+"""
+
+from .base import BaseEstimator
+from .one_shot import OneShotEstimator
+
+__all__ = ["BaseEstimator", "OneShotEstimator"]
diff --git a/autogl/module/nas/estimator/base.py b/autogl/module/nas/estimator/base.py
new file mode 100644
index 0000000..388a951
--- /dev/null
+++ b/autogl/module/nas/estimator/base.py
@@ -0,0 +1,42 @@
+"""
+Base estimator of NAS
+"""
+
+from abc import abstractmethod
+from ..space import BaseSpace
+from typing import Tuple
+import torch
+
+
+class BaseEstimator:
+    """
+    The estimator of NAS model.
+    """
+
+    @abstractmethod
+    def infer(
+        self, model: BaseSpace, dataset, mask="train"
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Calculate the loss and metrics of given model on given dataset using
+        specified masks.
+
+        Parameters
+        ----------
+        model: autogl.module.nas.space.BaseSpace
+            The model in space.
+
+        dataset: autogl.dataset
+            The dataset to perform infer
+
+        mask: str
+            The mask to evalute on dataset
+
+        Return
+        ------
+        metric: torch.Tensor
+            the metric on given datasets.
+        loss: torch.Tensor
+            the loss on given datasets. Note that loss should be differentiable.
+        """
+        raise NotImplementedError()
diff --git a/autogl/module/nas/estimator/one_shot.py b/autogl/module/nas/estimator/one_shot.py
new file mode 100644
index 0000000..e887138
--- /dev/null
+++ b/autogl/module/nas/estimator/one_shot.py
@@ -0,0 +1,21 @@
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..space import BaseSpace
+from .base import BaseEstimator
+
+
+class OneShotEstimator(BaseEstimator):
+    """
+    One shot estimator.
+
+    Use model directly to get estimations.
+    """
+
+    def infer(self, model: BaseSpace, dataset, mask="train"):
+        device = next(model.parameters()).device
+        dset = dataset[0].to(device)
+        pred = model(dset)[getattr(dset, f"{mask}_mask")]
+        y = dset.y[getattr(dset, f'{mask}_mask')]
+        loss = F.nll_loss(pred, y)
+        return loss, loss
diff --git a/autogl/module/nas/nas.py b/autogl/module/nas/nas.py
deleted file mode 100644
index 7b71909..0000000
--- a/autogl/module/nas/nas.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from torch_geometric.nn import GCNConv, SAGEConv
-from nni.nas.pytorch import mutables
-import torch.nn as nn
-
-
-class BaseNAS:
-    def to(self, device):
-        """
-        change the device of the whole process
-        """
-        self.device = device
-
-    def search(self, space, dset, trainer):
-        """
-        The main process of NAS.
-        Parameters
-        ----------
-        space : BaseArchitectureSpace
-            No implementation yet
-        dataset : ...datasets
-            Dataset to train and evaluate.
-        trainer : ..train.BaseTrainer
-            Including model, giving HP space and using for training
-
-        Returns
-        -------
-        model: ..train.BaseTrainer
-            The trainer including the best trained model
-        """
-
-class BaseEstimator:
-    def __init__(self, device="cuda"):
-        self.device = device
-
-    def infer(self, model, dataset):
-        pass
-
-class DartsNodeClfEstimator(BaseEstimator):
-    def infer(self, model, dataset):
-        dset = dataset[0].to(self.device)
-        pred = model(dset)[dset.train_mask]
-        y = dset.y[dset.train_mask]
-        loss_func = nn.CrossEntropyLoss()
-        loss = loss_func(pred, y)
-        return loss, loss
diff --git a/autogl/module/nas/space.py b/autogl/module/nas/space.py
deleted file mode 100644
index 0351ee9..0000000
--- a/autogl/module/nas/space.py
+++ /dev/null
@@ -1,92 +0,0 @@
-from nni.nas.pytorch import mutables
-from autogl.module.model import BaseModel
-import torch.nn as nn
-import torch
-from autogl.utils import get_logger
-from nni.nas.pytorch.fixed import apply_fixed_architecture
-from copy import deepcopy
-
-class BaseSpace(nn.Module):
-    def __init__(self, input_dim=None, hidden_dim=None, output_dim=None, ops=None, init=False):
-        super().__init__()
-        self.input_dim = input_dim
-        self.hidden_dim = hidden_dim
-        self.output_dim = output_dim
-        self.ops = ops
-        self._initialized = False
-        if init:
-            self.instantiate()
-
-    def instantiate(self, input_dim=None, hidden_dim=None, output_dim=None, ops=None):
-        """
-        instantiate modules in the space
-        """
-        self.input_dim = input_dim or self.input_dim
-        self.hidden_dim = hidden_dim or self.hidden_dim
-        self.output_dim = output_dim or self.output_dim
-        self.ops = ops or self.ops
-        self._initialized = True
-
-    def forward(self, data):
-        pass
-
-class SpaceModel(BaseModel):
-    _logger = get_logger('space model')
-    def __init__(self, space_model: BaseSpace, selection, device=torch.device('cuda')):
-        super().__init__(init=True)
-        space_model.instantiate()
-        self.init = True
-        self.space = []
-        self.hyperparams = {}
-        self._model = space_model.to(device)
-        self.num_features = self._model.input_dim
-        self.num_classes = self._model.output_dim
-        self.selection = selection
-        apply_fixed_architecture(self._model, selection, verbose=False)
-        self.params = {
-            "num_class": self.num_classes,
-            "features_num": self.num_features
-        }
-        self.device = device
-
-    def to(self, device):
-        if isinstance(device, (str, torch.device)):
-            self.device = device
-        return super().to(device)
-
-    def forward(self, *args, **kwargs):
-        return self._model.forward(*args, **kwargs)
-
-    def from_hyper_parameter(self, hp):
-        """
-        receive no hp, just copy self and reset the learnable parameters.
-        """
-
-        """self._model.instantiate()
-        apply_fixed_architecture(self._model, self.selection, verbose=False)
-        self.to(self.device)
-        return self"""
-
-        ret_self = deepcopy(self)
-        ret_self._model.instantiate()
-        apply_fixed_architecture(ret_self._model, ret_self.selection, verbose=False)
-        ret_self.to(self.device)
-        return ret_self
-
-    @property
-    def model(self):
-        return self._model
-
-class GraphSpace(BaseSpace):
-    def __init__(self, input_dim=None, hidden_dim=None, output_dim=None, ops=None, init=False):
-        super().__init__(input_dim, hidden_dim, output_dim, ops, init)
-
-    def instantiate(self, input_dim=None, hidden_dim=None, output_dim=None, ops=None, *args, **kwargs):
-        super().instantiate(input_dim, hidden_dim, output_dim, ops)
-        self.op1 = mutables.LayerChoice([op(self.input_dim, self.hidden_dim) for op in self.ops], key = "1")
-        self.op2 = mutables.LayerChoice([op(self.hidden_dim, self.output_dim) for op in self.ops], key = "2")
-
-    def forward(self, data):
-        x = self.op1(data.x, data.edge_index)
-        x = self.op2(x, data.edge_index)
-        return x
diff --git a/autogl/module/nas/space/__init__.py b/autogl/module/nas/space/__init__.py
new file mode 100644
index 0000000..b8f7aa8
--- /dev/null
+++ b/autogl/module/nas/space/__init__.py
@@ -0,0 +1,4 @@
+from .base import BaseSpace
+from .single_path import SinglePathNodeClassificationSpace
+
+__all__ = ["BaseSpace", "SinglePathNodeClassificationSpace"]
diff --git a/autogl/module/nas/space/base.py b/autogl/module/nas/space/base.py
new file mode 100644
index 0000000..1022dce
--- /dev/null
+++ b/autogl/module/nas/space/base.py
@@ -0,0 +1,55 @@
+from abc import abstractmethod
+from autogl.module.model import BaseModel
+import torch.nn as nn
+
+
+class BaseSpace(nn.Module):
+    """
+    Base space class of NAS module. Defining space containing all models.
+    Please use mutables to define your whole space. Refer to
+    `https://nni.readthedocs.io/en/stable/NAS/WriteSearchSpace.html`
+    for detailed information.
+
+    Parameters
+    ----------
+    init: bool
+        Whether to initialize the whole space. Default: `False`
+    """
+
+    def __init__(self, init=False):
+        super().__init__()
+        self._initialized = False
+
+    @abstractmethod
+    def instantiate(self):
+        """
+        Instantiate modules in the space
+        """
+        if not self._initialized:
+            self._initialized = True
+
+    @abstractmethod
+    def forward(self, *args, **kwargs):
+        """
+        Define the forward pass of space model
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def export(self, selection: dict, device) -> BaseModel:
+        """
+        Export the searched model from space.
+
+        Parameters
+        ----------
+        selection: Dict
+            The dictionary containing all the choices of nni.
+        device: str or torch.device
+            The device to put model on.
+
+        Return
+        ------
+        model: autogl.module.model.BaseModel
+            model to be exported.
+        """
+        raise NotImplementedError()
diff --git a/autogl/module/nas/space/single_path.py b/autogl/module/nas/space/single_path.py
new file mode 100644
index 0000000..595a42f
--- /dev/null
+++ b/autogl/module/nas/space/single_path.py
@@ -0,0 +1,116 @@
+from copy import deepcopy
+import typing as _typ
+import torch
+
+import torch.nn.functional as F
+from nni.nas.pytorch import mutables
+from nni.nas.pytorch.fixed import apply_fixed_architecture
+from .base import BaseSpace
+from ...model import BaseModel
+from ....utils import get_logger
+
+
+class FixedNodeClassificationModel(BaseModel):
+    _logger = get_logger("space model")
+
+    def __init__(self, space_model: BaseSpace, selection, device=torch.device("cuda")):
+        super().__init__(init=True)
+        space_model.instantiate()
+        self.init = True
+        self.space = []
+        self.hyperparams = {}
+        self._model = space_model.to(device)
+        self.num_features = self._model.input_dim
+        self.num_classes = self._model.output_dim
+        self.selection = selection
+        apply_fixed_architecture(self._model, selection, verbose=False)
+        self.params = {"num_class": self.num_classes, "features_num": self.num_features}
+        self.device = device
+
+    def to(self, device):
+        if isinstance(device, (str, torch.device)):
+            self.device = device
+        return super().to(device)
+
+    def forward(self, *args, **kwargs):
+        return self._model.forward(*args, **kwargs)
+
+    def from_hyper_parameter(self, hp):
+        """
+        receive no hp, just copy self and reset the learnable parameters.
+        """
+
+        ret_self = deepcopy(self)
+        ret_self._model.instantiate()
+        apply_fixed_architecture(ret_self._model, ret_self.selection, verbose=False)
+        ret_self.to(self.device)
+        return ret_self
+
+    @property
+    def model(self):
+        return self._model
+
+
+class SinglePathNodeClassificationSpace(BaseSpace):
+    def __init__(
+        self,
+        hidden_dim: _typ.Optional[int] = 64,
+        layer_number: _typ.Optional[int] = 2,
+        dropout: _typ.Optional[float] = 0.6,
+        input_dim: _typ.Optional[int] = None,
+        output_dim: _typ.Optional[int] = None,
+        ops: _typ.Tuple = None,
+        init: bool = False,
+    ):
+        super().__init__()
+        self.layer_number = layer_number
+        self.hidden_dim = hidden_dim
+        self.input_dim = input_dim
+        self.output_dim = output_dim
+        self.ops = ops
+        self.dropout = dropout
+
+    def instantiate(
+        self,
+        hidden_dim: _typ.Optional[int] = None,
+        layer_number: _typ.Optional[int] = None,
+        input_dim: _typ.Optional[int] = None,
+        output_dim: _typ.Optional[int] = None,
+        ops: _typ.Tuple = None,
+        dropout = None
+    ):
+        self.hidden_dim = hidden_dim or self.hidden_dim
+        self.layer_number = layer_number or self.layer_number
+        self.input_dim = input_dim or self.input_dim
+        self.output_dim = output_dim or self.output_dim
+        self.ops = ops or self.ops
+        for layer in range(self.layer_number):
+            setattr(
+                self,
+                f"op_{layer}",
+                mutables.LayerChoice(
+                    [
+                        op(
+                            self.input_dim if layer == 0 else self.hidden_dim,
+                            self.output_dim
+                            if layer == self.layer_number - 1
+                            else self.hidden_dim,
+                        )
+                        for op in self.ops
+                    ],
+                    key=f"{layer}",
+                ),
+            )
+        self._initialized = True
+
+    def forward(self, data):
+        x, edges = data.x, data.edge_index
+        for layer in range(self.layer_number):
+            x = getattr(self, f"op_{layer}")(x, edges)
+            if layer != self.layer_number - 1:
+                x = F.relu(x)
+                x = F.dropout(x, p=self.dropout)
+        return F.log_softmax(x, dim=1)
+
+    def export(self, selection, device) -> BaseModel:
+        return FixedNodeClassificationModel(self, selection, device)
diff --git a/autogl/solver/classifier/node_classifier.py b/autogl/solver/classifier/node_classifier.py
index 81c047e..abc79c0 100644
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -352,7 +352,7 @@ class AutoNodeClassifier(BaseClassifier):
                 loss="cross_entropy" if not hasattr(dataset, "loss") else dataset.loss,
             )
 
-            assert isinstance(self._default_trainer, str) or len(self.nas_algorithms) == len(self._default_trainer) - len(self.graph_model_list), "length of default trainer should match total graph models and nas models passed"
+            assert not isinstance(self._default_trainer, list) or len(self.nas_algorithms) == len(self._default_trainer) - len(self.graph_model_list), "length of default trainer should match total graph models and nas models passed"
 
             # perform nas and add them to model list
             idx_trainer = len(self.graph_model_list)
@@ -361,20 +361,32 @@ class AutoNodeClassifier(BaseClassifier):
             ):
                 model = algo.search(space, self.dataset, estimator)
                 # insert model into default trainer
-                if isinstance(self._default_trainer, str):
-                    train_name = self._default_trainer
-                else:
+                if isinstance(self._default_trainer, list):
                     train_name = self._default_trainer[idx_trainer]
                     idx_trainer += 1
-                trainer = TRAINER_DICT[train_name](
-                    model=model,
-                    num_features=self.dataset[0].x.shape[1],
-                    num_classes=self.dataset.num_classes,
-                    loss="cross_entropy" if not hasattr(dataset, "loss") else dataset.loss,
-                    feval=evaluator_list,
-                    device=self.runtime_device,
-                    init=False,
-                )
+                else:
+                    train_name = self._default_trainer
+                if isinstance(train_name, str):
+                    trainer = TRAINER_DICT[train_name](
+                        model=model,
+                        num_features=self.dataset[0].x.shape[1],
+                        num_classes=self.dataset.num_classes,
+                        loss="cross_entropy" if not hasattr(dataset, "loss") else dataset.loss,
+                        feval=evaluator_list,
+                        device=self.runtime_device,
+                        init=False,
+                    )
+                else:
+                    trainer = train_name
+                    trainer.model = model
+                    trainer.update_parameters(
+                        num_classes=self.dataset.num_classes,
+                        num_features=self.dataset[0].x.shape[1],
+                        loss="cross_entropy" if not hasattr(dataset, "loss") else dataset.loss,
+                        feval=evaluator_list,
+                        device=self.runtime_device,
+                    
+                    )
                 self.graph_model_list.append(trainer)
 
         # train the models and tune hpo
diff --git a/autogl/utils/__init__.py b/autogl/utils/__init__.py
index 280cf73..521df32 100644
--- a/autogl/utils/__init__.py
+++ b/autogl/utils/__init__.py
@@ -3,5 +3,6 @@ Some utils used by AutoGL
 """
 
 from .log import get_logger
+from .device import get_device
 
-__all__ = ["get_logger"]
+__all__ = ["get_logger", "get_device"]
diff --git a/autogl/utils/device.py b/autogl/utils/device.py
new file mode 100644
index 0000000..a316839
--- /dev/null
+++ b/autogl/utils/device.py
@@ -0,0 +1,7 @@
+import torch
+
+def get_device(device):
+    assert isinstance(device, (str, torch.device)), "Only support device of str or torch.device, get {} instead".format(device)
+    if device == 'auto':
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    return torch.device(device)
diff --git a/examples/test_nas.py b/examples/test_nas.py
index 876f456..22aa03c 100644
--- a/examples/test_nas.py
+++ b/examples/test_nas.py
@@ -1,113 +1,40 @@
-from copy import deepcopy
 import sys
-from nni.nas.pytorch.fixed import apply_fixed_architecture
-from torch_geometric.nn.conv.gat_conv import GATConv
-from torch_geometric.nn.conv.gcn_conv import GCNConv
 sys.path.append('../')
+from torch_geometric.nn import GCNConv, GATConv
 import torch
-from autogl.solver import AutoNodeClassifier
-from autogl.module.nas.nas import DartsNodeClfEstimator
-from autogl.module.nas.space import GraphSpace
 from autogl.datasets import build_dataset_from_name
+from autogl.solver import AutoNodeClassifier
 from autogl.module.model import BaseModel
-from autogl.module.nas.darts import Darts
-from autogl.utils import get_logger
-
-class MyGraphSpace(GraphSpace):
-    def __init__(self, input_dim=None, hidden_dim=None, output_dim=None, ops=None, init=False):
-        super().__init__()
-        self.input_dim = input_dim
-        self.hidden_dim = hidden_dim
-        self.output_dim = output_dim
-        self.ops = ops
-        self._initialized = False
-        if init:
-            self.instantiate()
-        
-    def instantiate(self, input_dim=None, hidden_dim=None, output_dim=None, ops=None):
-        self.input_dim = input_dim or self.input_dim
-        self.hidden_dim = hidden_dim or self.hidden_dim
-        self.output_dim = output_dim or self.output_dim
-        self.ops = ops or self.ops
-        super().instantiate(
-            input_dim=self.input_dim,
-            hidden_dim=self.hidden_dim,
-            output_dim=self.output_dim,
-            ops=self.ops
-        )
-        self._initialized = True
-
-class SpaceModel(BaseModel):
-    def __init__(self, space_model: MyGraphSpace, selection, device=torch.device('cuda')):
-        super().__init__(init=True)
-        space_model.reinstantiate()
-        self.init = True
-        self.space = []
-        self.hyperparams = {}
-        self._model = space_model.to(device)
-        self.num_features = self._model.input_dim
-        self.num_classes = self._model.output_dim
-        self.selection = selection
-        apply_fixed_architecture(self._model, selection, verbose=False)
-        self.params = {
-            "num_class": self.num_classes,
-            "features_num": self.num_features
-        }
-        self.device = device
-
-    def to(self, device):
-        if isinstance(device, (str, torch.device)):
-            self.device = device
-        return super().to(device)
-
-    def forward(self, *args, **kwargs):
-        return self._model.forward(*args, **kwargs)
-
-    def from_hyper_parameter(self, hp):
-        """
-        receive no hp, just copy self and reset the learnable parameters.
-        """
-        ret_self = deepcopy(self)
-        ret_self._model.reinstantiate()
-        apply_fixed_architecture(ret_self._model, ret_self.selection, verbose=False)
-        ret_self.to(self.device)
-        return ret_self
-
-    @property
-    def model(self):
-        return self._model
-
-class MyDarts(Darts):
-    def __init__(self, device="cuda", *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.device = device
-
-    def to(self, device):
-        """
-        change the device of the whole process
-        """
-        self.device = device
-
-    def search(self, space, dset, trainer):
-        """
-        TODO: please manage device when training
-        current device of search seems to be forced on CPU.
-        """
-        res = super().search(space, dset, trainer)
-        selection = self.export()
-        return SpaceModel(res, selection, self.device)
+from autogl.module.train import NodeClassificationFullTrainer
+from autogl.module.nas import Darts, OneShotEstimator, SinglePathNodeClassificationSpace
+from autogl.module.train import Acc
 
 if __name__ == '__main__':
     dataset = build_dataset_from_name('cora')
     solver = AutoNodeClassifier(
         feature_module=None,
         graph_models=[],
-        hpo_module="random",
-        max_evals=10,
+        hpo_module=None,
         ensemble_module=None,
-        nas_algorithms=[Darts()],
-        nas_spaces=[GraphSpace(hidden_dim=64, ops=[GATConv, GCNConv])],
-        nas_estimators=[DartsNodeClfEstimator()]
+        default_trainer=NodeClassificationFullTrainer(
+            BaseModel(),
+            None,
+            None,
+            optimizer=torch.optim.Adam,
+            lr=0.01,
+            max_epoch=200,
+            early_stopping_round=200,
+            weight_decay=5e-4,
+            device="auto",
+            init=False,
+            feval=['acc'],
+            loss="nll_loss",
+            lr_scheduler_type=None,),
+        nas_algorithms=[Darts(num_epochs=1)],
+        nas_spaces=[SinglePathNodeClassificationSpace(hidden_dim=16, ops=[GCNConv, GCNConv])],
+        nas_estimators=[OneShotEstimator()]
     )
     solver.fit(dataset)
-    out = solver.predict(dataset)
\ No newline at end of file
+    solver.get_leaderboard().show()
+    out = solver.predict_proba()
+    print('acc on cora', Acc.evaluate(out, dataset[0].y[dataset[0].test_mask].detach().numpy()))

From d47dc1209d5a0dc99bea88ff3db15dd2ac20b91f Mon Sep 17 00:00:00 2001
From: wondergo2017 <wondergo2017@gmail.com>
Date: Thu, 22 Apr 2021 06:17:38 +0000
Subject: [PATCH 047/144] add enas

---
 autogl/module/nas/enas.py | 368 ++++++++++++++++++++++++++++++++++++++
 examples/test_enas.py     |  29 +++
 2 files changed, 397 insertions(+)
 create mode 100644 autogl/module/nas/enas.py
 create mode 100644 examples/test_enas.py

diff --git a/autogl/module/nas/enas.py b/autogl/module/nas/enas.py
new file mode 100644
index 0000000..2cad798
--- /dev/null
+++ b/autogl/module/nas/enas.py
@@ -0,0 +1,368 @@
+# codes in this file are reproduced from https://github.com/microsoft/nni with some changes.
+import copy
+import logging
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .nas import BaseNAS
+from .space import SpaceModel
+from .utils import AverageMeterGroup, replace_layer_choice, replace_input_choice
+from nni.nas.pytorch.fixed import apply_fixed_architecture
+_logger = logging.getLogger(__name__)
+def _get_mask(sampled, total):
+    multihot = [i == sampled or (isinstance(sampled, list) and i in sampled) for i in range(total)]
+    return torch.tensor(multihot, dtype=torch.bool)  # pylint: disable=not-callable
+
+class PathSamplingLayerChoice(nn.Module):
+    """
+    Mixed module, in which fprop is decided by exactly one or multiple (sampled) module.
+    If multiple module is selected, the result will be sumed and returned.
+
+    Attributes
+    ----------
+    sampled : int or list of int
+        Sampled module indices.
+    mask : tensor
+        A multi-hot bool 1D-tensor representing the sampled mask.
+    """
+
+    def __init__(self, layer_choice):
+        super(PathSamplingLayerChoice, self).__init__()
+        self.op_names = []
+        for name, module in layer_choice.named_children():
+            self.add_module(name, module)
+            self.op_names.append(name)
+        assert self.op_names, 'There has to be at least one op to choose from.'
+        self.sampled = None  # sampled can be either a list of indices or an index
+
+    def forward(self, *args, **kwargs):
+        assert self.sampled is not None, 'At least one path needs to be sampled before fprop.'
+        if isinstance(self.sampled, list):
+            return sum([getattr(self, self.op_names[i])(*args, **kwargs) for i in self.sampled])  # pylint: disable=not-an-iterable
+        else:
+            return getattr(self, self.op_names[self.sampled])(*args, **kwargs)  # pylint: disable=invalid-sequence-index
+
+    def __len__(self):
+        return len(self.op_names)
+
+    @property
+    def mask(self):
+        return _get_mask(self.sampled, len(self))
+
+
+class PathSamplingInputChoice(nn.Module):
+    """
+    Mixed input. Take a list of tensor as input, select some of them and return the sum.
+
+    Attributes
+    ----------
+    sampled : int or list of int
+        Sampled module indices.
+    mask : tensor
+        A multi-hot bool 1D-tensor representing the sampled mask.
+    """
+
+    def __init__(self, input_choice):
+        super(PathSamplingInputChoice, self).__init__()
+        self.n_candidates = input_choice.n_candidates
+        self.n_chosen = input_choice.n_chosen
+        self.sampled = None
+
+    def forward(self, input_tensors):
+        if isinstance(self.sampled, list):
+            return sum([input_tensors[t] for t in self.sampled])  # pylint: disable=not-an-iterable
+        else:
+            return input_tensors[self.sampled]
+
+    def __len__(self):
+        return self.n_candidates
+
+    @property
+    def mask(self):
+        return _get_mask(self.sampled, len(self))
+
+
+class StackedLSTMCell(nn.Module):
+    def __init__(self, layers, size, bias):
+        super().__init__()
+        self.lstm_num_layers = layers
+        self.lstm_modules = nn.ModuleList([nn.LSTMCell(size, size, bias=bias)
+                                           for _ in range(self.lstm_num_layers)])
+
+    def forward(self, inputs, hidden):
+        prev_h, prev_c = hidden
+        next_h, next_c = [], []
+        for i, m in enumerate(self.lstm_modules):
+            curr_h, curr_c = m(inputs, (prev_h[i], prev_c[i]))
+            next_c.append(curr_c)
+            next_h.append(curr_h)
+            # current implementation only supports batch size equals 1,
+            # but the algorithm does not necessarily have this limitation
+            inputs = curr_h[-1].view(1, -1)
+        return next_h, next_c
+
+
+class ReinforceField:
+    """
+    A field with ``name``, with ``total`` choices. ``choose_one`` is true if one and only one is meant to be
+    selected. Otherwise, any number of choices can be chosen.
+    """
+
+    def __init__(self, name, total, choose_one):
+        self.name = name
+        self.total = total
+        self.choose_one = choose_one
+
+    def __repr__(self):
+        return f'ReinforceField(name={self.name}, total={self.total}, choose_one={self.choose_one})'
+
+
+class ReinforceController(nn.Module):
+    """
+    A controller that mutates the graph with RL.
+
+    Parameters
+    ----------
+    fields : list of ReinforceField
+        List of fields to choose.
+    lstm_size : int
+        Controller LSTM hidden units.
+    lstm_num_layers : int
+        Number of layers for stacked LSTM.
+    tanh_constant : float
+        Logits will be equal to ``tanh_constant * tanh(logits)``. Don't use ``tanh`` if this value is ``None``.
+    skip_target : float
+        Target probability that skipconnect will appear.
+    temperature : float
+        Temperature constant that divides the logits.
+    entropy_reduction : str
+        Can be one of ``sum`` and ``mean``. How the entropy of multi-input-choice is reduced.
+    """
+
+    def __init__(self, fields, lstm_size=64, lstm_num_layers=1, tanh_constant=1.5,
+                 skip_target=0.4, temperature=None, entropy_reduction='sum'):
+        super(ReinforceController, self).__init__()
+        self.fields = fields
+        self.lstm_size = lstm_size
+        self.lstm_num_layers = lstm_num_layers
+        self.tanh_constant = tanh_constant
+        self.temperature = temperature
+        self.skip_target = skip_target
+
+        self.lstm = StackedLSTMCell(self.lstm_num_layers, self.lstm_size, False)
+        self.attn_anchor = nn.Linear(self.lstm_size, self.lstm_size, bias=False)
+        self.attn_query = nn.Linear(self.lstm_size, self.lstm_size, bias=False)
+        self.v_attn = nn.Linear(self.lstm_size, 1, bias=False)
+        self.g_emb = nn.Parameter(torch.randn(1, self.lstm_size) * 0.1)
+        self.skip_targets = nn.Parameter(torch.tensor([1.0 - self.skip_target, self.skip_target]),  # pylint: disable=not-callable
+                                         requires_grad=False)
+        assert entropy_reduction in ['sum', 'mean'], 'Entropy reduction must be one of sum and mean.'
+        self.entropy_reduction = torch.sum if entropy_reduction == 'sum' else torch.mean
+        self.cross_entropy_loss = nn.CrossEntropyLoss(reduction='none')
+        self.soft = nn.ModuleDict({
+            field.name: nn.Linear(self.lstm_size, field.total, bias=False) for field in fields
+        })
+        self.embedding = nn.ModuleDict({
+            field.name: nn.Embedding(field.total, self.lstm_size) for field in fields
+        })
+
+    def resample(self):
+        self._initialize()
+        result = dict()
+        for field in self.fields:
+            result[field.name] = self._sample_single(field)
+        return result
+
+    def _initialize(self):
+        self._inputs = self.g_emb.data
+        self._c = [torch.zeros((1, self.lstm_size),
+                               dtype=self._inputs.dtype,
+                               device=self._inputs.device) for _ in range(self.lstm_num_layers)]
+        self._h = [torch.zeros((1, self.lstm_size),
+                               dtype=self._inputs.dtype,
+                               device=self._inputs.device) for _ in range(self.lstm_num_layers)]
+        self.sample_log_prob = 0
+        self.sample_entropy = 0
+        self.sample_skip_penalty = 0
+
+    def _lstm_next_step(self):
+        self._h, self._c = self.lstm(self._inputs, (self._h, self._c))
+
+    def _sample_single(self, field):
+        self._lstm_next_step()
+        logit = self.soft[field.name](self._h[-1])
+        if self.temperature is not None:
+            logit /= self.temperature
+        if self.tanh_constant is not None:
+            logit = self.tanh_constant * torch.tanh(logit)
+        if field.choose_one:
+            sampled = torch.multinomial(F.softmax(logit, dim=-1), 1).view(-1)
+            log_prob = self.cross_entropy_loss(logit, sampled)
+            self._inputs = self.embedding[field.name](sampled)
+        else:
+            logit = logit.view(-1, 1)
+            logit = torch.cat([-logit, logit], 1)  # pylint: disable=invalid-unary-operand-type
+            sampled = torch.multinomial(F.softmax(logit, dim=-1), 1).view(-1)
+            skip_prob = torch.sigmoid(logit)
+            kl = torch.sum(skip_prob * torch.log(skip_prob / self.skip_targets))
+            self.sample_skip_penalty += kl
+            log_prob = self.cross_entropy_loss(logit, sampled)
+            sampled = sampled.nonzero().view(-1)
+            if sampled.sum().item():
+                self._inputs = (torch.sum(self.embedding[field.name](sampled.view(-1)), 0) / (1. + torch.sum(sampled))).unsqueeze(0)
+            else:
+                self._inputs = torch.zeros(1, self.lstm_size, device=self.embedding[field.name].weight.device)
+
+        sampled = sampled.detach().numpy().tolist()
+        self.sample_log_prob += self.entropy_reduction(log_prob)
+        entropy = (log_prob * torch.exp(-log_prob)).detach()  # pylint: disable=invalid-unary-operand-type
+        self.sample_entropy += self.entropy_reduction(entropy)
+        if len(sampled) == 1:
+            sampled = sampled[0]
+        return sampled
+
+
+class Enas(BaseNAS):
+    """
+    ENAS trainer.
+
+    Parameters
+    ----------
+    model : nn.Module
+        PyTorch model to be trained.
+    loss : callable
+        Receives logits and ground truth label, return a loss tensor.
+    metrics : callable
+        Receives logits and ground truth label, return a dict of metrics.
+    reward_function : callable
+        Receives logits and ground truth label, return a tensor, which will be feeded to RL controller as reward.
+    optimizer : Optimizer
+        The optimizer used for optimizing the model.
+    num_epochs : int
+        Number of epochs planned for training.
+    dataset : Dataset
+        Dataset for training. Will be split for training weights and architecture weights.
+    batch_size : int
+        Batch size.
+    workers : int
+        Workers for data loading.
+    device : torch.device
+        ``torch.device("cpu")`` or ``torch.device("cuda")``.
+    log_frequency : int
+        Step count per logging.
+    grad_clip : float
+        Gradient clipping. Set to 0 to disable. Default: 5.
+    entropy_weight : float
+        Weight of sample entropy loss.
+    skip_weight : float
+        Weight of skip penalty loss.
+    baseline_decay : float
+        Decay factor of baseline. New baseline will be equal to ``baseline_decay * baseline_old + reward * (1 - baseline_decay)``.
+    ctrl_lr : float
+        Learning rate for RL controller.
+    ctrl_steps_aggregate : int
+        Number of steps that will be aggregated into one mini-batch for RL controller.
+    ctrl_steps : int
+        Number of mini-batches for each epoch of RL controller learning.
+    ctrl_kwargs : dict
+        Optional kwargs that will be passed to :class:`ReinforceController`.
+    """
+
+    def __init__(self, device='cuda', workers=4,log_frequency=None,
+                 grad_clip=5., entropy_weight=0.0001, skip_weight=0.8, baseline_decay=0.999,
+                 ctrl_lr=0.00035, ctrl_steps_aggregate=20, ctrl_kwargs=None,*args,**kwargs):
+        super().__init__(*args,**kwargs)
+        self.device=device
+        self.num_epochs = kwargs.get("num_epochs", 5)
+        self.workers = workers
+        self.log_frequency = log_frequency
+        self.entropy_weight = entropy_weight
+        self.skip_weight = skip_weight
+        self.baseline_decay = baseline_decay
+        self.baseline = 0.
+        self.ctrl_steps_aggregate = ctrl_steps_aggregate
+        self.grad_clip = grad_clip
+        self.workers = workers
+        self.ctrl_kwargs=ctrl_kwargs
+        self.ctrl_lr=ctrl_lr
+
+    def search(self, space, dset, trainer):
+        self.model = space
+        self.dataset = dset#.to(self.device)
+        self.trainer = trainer
+        self.model_optim = torch.optim.SGD(
+            self.model.parameters(), lr=0.01, weight_decay=3e-4
+        )
+        # replace choice
+        self.nas_modules = []
+        replace_layer_choice(self.model, PathSamplingLayerChoice, self.nas_modules)
+        replace_input_choice(self.model, PathSamplingInputChoice, self.nas_modules)
+        # to device
+        self.model = self.model.to(self.device)
+        # fields
+        self.nas_fields = [ReinforceField(name, len(module),
+                                          isinstance(module, PathSamplingLayerChoice) or module.n_chosen == 1)
+                           for name, module in self.nas_modules]
+        self.controller = ReinforceController(self.nas_fields, **(self.ctrl_kwargs or {}))
+        self.ctrl_optim = torch.optim.Adam(self.controller.parameters(), lr=self.ctrl_lr)
+        # train
+        for i in range(self.num_epochs):
+            self._train_model(i)
+            self._train_controller(i)
+        
+        selection=self.export()
+        return SpaceModel(space,selection,self.device)
+    def _train_model(self, epoch): 
+        self.model.train()
+        self.controller.eval()
+        self.model_optim.zero_grad()
+        self._resample()
+        metric,loss=self._infer()
+        loss.backward()
+        if self.grad_clip > 0:
+            nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_clip)
+        self.model_optim.step()
+
+    def _train_controller(self, epoch):
+        self.model.eval()
+        self.controller.train()
+        self.ctrl_optim.zero_grad()
+        for ctrl_step in range(self.ctrl_steps_aggregate):
+            self._resample()
+            with torch.no_grad():
+                metric,loss=self._infer()
+            reward =-metric  # todo : now metric is loss 
+            if self.entropy_weight:
+                reward += self.entropy_weight * self.controller.sample_entropy.item()
+            self.baseline = self.baseline * self.baseline_decay + reward * (1 - self.baseline_decay)
+            loss = self.controller.sample_log_prob * (reward - self.baseline)
+            if self.skip_weight:
+                loss += self.skip_weight * self.controller.sample_skip_penalty
+            loss /= self.ctrl_steps_aggregate
+            loss.backward()
+        
+            if (ctrl_step + 1) % self.ctrl_steps_aggregate == 0:
+                if self.grad_clip > 0:
+                    nn.utils.clip_grad_norm_(self.controller.parameters(), self.grad_clip)
+                self.ctrl_optim.step()
+                self.ctrl_optim.zero_grad()
+
+            if self.log_frequency is not None and ctrl_step % self.log_frequency == 0:
+                _logger.info('RL Epoch [%d/%d] Step [%d/%d]  %s', epoch + 1, self.num_epochs,
+                                ctrl_step + 1, self.ctrl_steps_aggregate)
+
+    def _resample(self):
+        result = self.controller.resample()
+        for name, module in self.nas_modules:
+            module.sampled = result[name]
+
+    def export(self):
+        self.controller.eval()
+        with torch.no_grad():
+            return self.controller.resample()
+
+    def _infer(self):
+        metric, loss = self.trainer.infer(self.model, self.dataset)
+        return metric, loss
diff --git a/examples/test_enas.py b/examples/test_enas.py
new file mode 100644
index 0000000..ade6e50
--- /dev/null
+++ b/examples/test_enas.py
@@ -0,0 +1,29 @@
+from copy import deepcopy
+import sys
+from nni.nas.pytorch.fixed import apply_fixed_architecture
+from torch_geometric.nn.conv.gat_conv import GATConv
+from torch_geometric.nn.conv.gcn_conv import GCNConv
+sys.path.append('../')
+import torch
+from autogl.solver import AutoNodeClassifier
+from autogl.module.nas.nas import DartsNodeClfEstimator
+from autogl.module.nas.space import GraphSpace
+from autogl.datasets import build_dataset_from_name
+from autogl.module.model import BaseModel
+# from autogl.module.nas.darts import Darts
+from autogl.utils import get_logger
+from autogl.module.nas.enas import Enas
+if __name__ == '__main__':
+    dataset = build_dataset_from_name('cora')
+    solver = AutoNodeClassifier(
+        feature_module=None,
+        graph_models=[],
+        hpo_module="random",
+        max_evals=10,
+        ensemble_module=None,
+        nas_algorithms=[Enas()],
+        nas_spaces=[GraphSpace(hidden_dim=64, ops=[GATConv, GCNConv])],
+        nas_estimators=[DartsNodeClfEstimator()]
+    )
+    solver.fit(dataset)
+    out = solver.predict(dataset)
\ No newline at end of file

From 5b4c9189750c144e1a39618e7efaae89ffac7f82 Mon Sep 17 00:00:00 2001
From: lihy96 <lhy237451594@vip.qq.com>
Date: Thu, 22 Apr 2021 15:36:23 +0800
Subject: [PATCH 048/144] link prediction

---
 autogl/module/model/gcn.py             |  34 +-
 autogl/module/train/__init__.py        |   2 +
 autogl/module/train/evaluate.py        |   5 +-
 autogl/module/train/link_prediction.py | 534 +++++++++++++++++++++++++
 4 files changed, 570 insertions(+), 5 deletions(-)
 create mode 100644 autogl/module/train/link_prediction.py

diff --git a/autogl/module/model/gcn.py b/autogl/module/model/gcn.py
index 63e1bc4..44528a0 100644
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -62,6 +62,25 @@ class GCN(torch.nn.Module):
                 x = F.dropout(x, p=self.args["dropout"], training=self.training)
         return F.log_softmax(x, dim=1)
 
+    def encode(self, data):
+        x = data.x
+        for i in range(self.num_layer - 1):
+            x = self.convs[i](x, data.train_pos_edge_index)
+            if i != self.num_layer - 2:
+                x = activate_func(x, self.args["act"])
+                # x = F.dropout(x, p=self.args["dropout"], training=self.training)
+        return x
+
+    def decode(self, z, pos_edge_index, neg_edge_index):
+        edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
+        logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
+        return logits
+
+    def decode_all(self, z):
+        prob_adj = z @ z.t()
+        return (prob_adj > 0).nonzero(as_tuple=False).t()
+
+
 
 @register_model("gcn")
 class AutoGCN(BaseModel):
@@ -142,11 +161,18 @@ class AutoGCN(BaseModel):
         ]
 
         # initial point of hp search
+        # self.hyperparams = {
+        #     "num_layers": 2,
+        #     "hidden": [16],
+        #     "dropout": 0.2,
+        #     "act": "leaky_relu",
+        # }
+
         self.hyperparams = {
-            "num_layers": 2,
-            "hidden": [16],
-            "dropout": 0.2,
-            "act": "leaky_relu",
+            "num_layers": 3,
+            "hidden": [128, 64],
+            "dropout": 0,
+            "act": "relu",
         }
 
         self.initialized = False
diff --git a/autogl/module/train/__init__.py b/autogl/module/train/__init__.py
index 36fd434..8c090c2 100644
--- a/autogl/module/train/__init__.py
+++ b/autogl/module/train/__init__.py
@@ -48,12 +48,14 @@ def get_feval(feval):
 
 from .graph_classification import GraphClassificationTrainer
 from .node_classification import NodeClassificationTrainer
+from .link_prediction import LinkPredictionTrainer
 from .evaluate import Acc, Auc, Logloss
 
 __all__ = [
     "BaseTrainer",
     "GraphClassificationTrainer",
     "NodeClassificationTrainer",
+    "LinkPredictionTrainer",
     "Evaluation",
     "Acc",
     "Auc",
diff --git a/autogl/module/train/evaluate.py b/autogl/module/train/evaluate.py
index 290989e..ecd2413 100644
--- a/autogl/module/train/evaluate.py
+++ b/autogl/module/train/evaluate.py
@@ -47,7 +47,10 @@ class Auc(Evaluation):
         """
         Should return: the evaluation result (float)
         """
-        pos_predict = predict[:, 1]
+        if len(predict.shape) == 1:
+            pos_predict = predict
+        else:
+            pos_predict = predict[:, 1]
         return roc_auc_score(label, pos_predict)
 
 
diff --git a/autogl/module/train/link_prediction.py b/autogl/module/train/link_prediction.py
new file mode 100644
index 0000000..fdc7844
--- /dev/null
+++ b/autogl/module/train/link_prediction.py
@@ -0,0 +1,534 @@
+from . import register_trainer, BaseTrainer, Evaluation, EVALUATE_DICT, EarlyStopping
+import torch
+from torch.optim.lr_scheduler import StepLR
+import torch.nn.functional as F
+from ..model import MODEL_DICT, BaseModel
+from .evaluate import Logloss, Acc, Auc
+from typing import Union
+from copy import deepcopy
+from torch_geometric.utils import negative_sampling
+from torch_geometric.utils import train_test_split_edges
+
+from ...utils import get_logger
+
+LOGGER = get_logger("link prediction trainer")
+
+def get_feval(feval):
+    if isinstance(feval, str):
+        return EVALUATE_DICT[feval]
+    if isinstance(feval, type) and issubclass(feval, Evaluation):
+        return feval
+    if isinstance(feval, list):
+        return [get_feval(f) for f in feval]
+    raise ValueError("feval argument of type", type(feval), "is not supported!")
+
+
+@register_trainer("LinkPrediction")
+class LinkPredictionTrainer(BaseTrainer):
+    """
+    The link prediction trainer.
+
+    Used to automatically train the link prediction problem.
+
+    Parameters
+    ----------
+    model: ``BaseModel`` or ``str``
+        The (name of) model used to train and predict.
+
+    optimizer: ``Optimizer`` of ``str``
+        The (name of) optimizer used to train and predict.
+
+    lr: ``float``
+        The learning rate of link prediction task.
+
+    max_epoch: ``int``
+        The max number of epochs in training.
+
+    early_stopping_round: ``int``
+        The round of early stop.
+
+    device: ``torch.device`` or ``str``
+        The device where model will be running on.
+
+    init: ``bool``
+        If True(False), the model will (not) be initialized.
+    """
+
+    space = None
+
+    def __init__(
+        self,
+        model: Union[BaseModel, str],
+        num_features,
+        num_classes,
+        optimizer=None,
+        lr=None,
+        max_epoch=None,
+        early_stopping_round=None,
+        weight_decay=1e-4,
+        device=None,
+        init=True,
+        feval=[Auc],
+        loss="binary_cross_entropy_with_logits",
+        *args,
+        **kwargs
+    ):
+        super(LinkPredictionTrainer, self).__init__(model)
+
+        self.loss_type = loss
+
+        if device is None:
+            device = "cpu"
+
+        # init model
+        if isinstance(model, str):
+            assert model in MODEL_DICT, "Cannot parse model name " + model
+            self.model = MODEL_DICT[model](num_features, num_classes, device, init=init)
+        elif isinstance(model, BaseModel):
+            self.model = model
+
+        if type(optimizer) == str and optimizer.lower() == "adam":
+            self.optimizer = torch.optim.Adam
+        elif type(optimizer) == str and optimizer.lower() == "sgd":
+            self.optimizer = torch.optim.SGD
+        else:
+            self.optimizer = torch.optim.Adam
+
+        self.num_features = num_features
+        self.num_classes = num_classes
+        self.lr = lr if lr is not None else 1e-4
+        self.max_epoch = max_epoch if max_epoch is not None else 100
+        self.early_stopping_round = (
+            early_stopping_round if early_stopping_round is not None else 100
+        )
+        self.device = device
+        self.args = args
+        self.kwargs = kwargs
+
+        self.feval = get_feval(feval)
+
+        self.weight_decay = weight_decay
+
+        self.early_stopping = EarlyStopping(
+            patience=early_stopping_round, verbose=False
+        )
+
+        self.valid_result = None
+        self.valid_result_prob = None
+        self.valid_score = None
+
+        self.initialized = False
+        self.num_features = num_features
+        self.num_classes = num_classes
+        self.device = device
+
+        self.space = [
+            {
+                "parameterName": "max_epoch",
+                "type": "INTEGER",
+                "maxValue": 500,
+                "minValue": 10,
+                "scalingType": "LINEAR",
+            },
+            {
+                "parameterName": "early_stopping_round",
+                "type": "INTEGER",
+                "maxValue": 30,
+                "minValue": 10,
+                "scalingType": "LINEAR",
+            },
+            {
+                "parameterName": "lr",
+                "type": "DOUBLE",
+                "maxValue": 1e-1,
+                "minValue": 1e-4,
+                "scalingType": "LOG",
+            },
+            {
+                "parameterName": "weight_decay",
+                "type": "DOUBLE",
+                "maxValue": 1e-2,
+                "minValue": 1e-4,
+                "scalingType": "LOG",
+            },
+        ]
+        self.space += self.model.space
+        LinkPredictionTrainer.space = self.space
+
+        self.hyperparams = {
+            "max_epoch": self.max_epoch,
+            "early_stopping_round": self.early_stopping_round,
+            "lr": self.lr,
+            "weight_decay": self.weight_decay,
+        }
+        self.hyperparams = {**self.hyperparams, **self.model.get_hyper_parameter()}
+
+        if init is True:
+            self.initialize()
+
+    def initialize(self):
+        #  Initialize the auto model in trainer.
+        if self.initialized is True:
+            return
+        self.initialized = True
+        self.model.initialize()
+
+    def get_model(self):
+        # Get auto model used in trainer.
+        return self.model
+
+    @classmethod
+    def get_task_name(cls):
+        # Get task name, i.e., `LinkPrediction`.
+        return "LinkPrediction"
+
+    def train_only(self, data, train_mask=None):
+        """
+        The function of training on the given dataset and mask.
+
+        Parameters
+        ----------
+        data: The link prediction dataset used to be trained. It should consist of masks, including train_mask, and etc.
+        train_mask: The mask used in training stage.
+
+        Returns
+        -------
+        self: ``autogl.train.LinkPredictionTrainer``
+            A reference of current trainer.
+
+        """
+
+        # data.train_mask = data.val_mask = data.test_mask = data.y = None
+        # data = train_test_split_edges(data)
+        data = data.to(self.device)
+        # mask = data.train_mask if train_mask is None else train_mask
+        optimizer = self.optimizer(
+            self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
+        )
+        scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
+        for epoch in range(1, self.max_epoch):
+            self.model.model.train()
+
+            neg_edge_index = negative_sampling(
+                edge_index=data.train_pos_edge_index, num_nodes=data.num_nodes,
+                num_neg_samples=data.train_pos_edge_index.size(1))
+
+            optimizer.zero_grad()
+            # res = self.model.model.forward(data)
+            z = self.model.model.encode(data)
+            link_logits = self.model.model.decode(z, data.train_pos_edge_index, neg_edge_index)
+            link_labels = self.get_link_labels(data.train_pos_edge_index, neg_edge_index)
+            # loss = F.binary_cross_entropy_with_logits(link_logits, link_labels)
+            if hasattr(F, self.loss_type):
+                loss = getattr(F, self.loss_type)(link_logits, link_labels)
+            else:
+                raise TypeError("PyTorch does not support loss type {}".format(self.loss_type))
+
+            loss.backward()
+            optimizer.step()
+            scheduler.step()
+
+            if type(self.feval) is list:
+                feval = self.feval[0]
+            else:
+                feval = self.feval
+            val_loss = self.evaluate([data], mask='val', feval=feval)
+            if feval.is_higher_better() is True:
+                val_loss = -val_loss
+            self.early_stopping(val_loss, self.model.model)
+            if self.early_stopping.early_stop:
+                LOGGER.debug("Early stopping at %d", epoch)
+                self.early_stopping.load_checkpoint(self.model.model)
+                break
+
+    def predict_only(self, data, test_mask=None):
+        """
+        The function of predicting on the given dataset and mask.
+
+        Parameters
+        ----------
+        data: The link prediction dataset used to be predicted.
+        train_mask: The mask used in training stage.
+
+        Returns
+        -------
+        res: The result of predicting on the given dataset.
+
+        """
+        data = data.to(self.device)
+        self.model.model.eval()
+        with torch.no_grad():
+            z = self.model.model.encode(data)
+        return z
+
+    def train(self, dataset, keep_valid_result=True):
+        """
+        The function of training on the given dataset and keeping valid result.
+
+        Parameters
+        ----------
+        dataset: The link prediction dataset used to be trained.
+
+        keep_valid_result: ``bool``
+            If True(False), save the validation result after training.
+
+        Returns
+        -------
+        self: ``autogl.train.LinkPredictionTrainer``
+            A reference of current trainer.
+
+        """
+        data = dataset[0]
+        self.train_only(data)
+        if keep_valid_result:
+            self.valid_result = self.predict_only(data)
+            self.valid_result_prob = self.predict_proba(dataset, 'val')
+            self.valid_score = self.evaluate(
+                dataset, mask='val', feval=self.feval
+            )
+
+    def predict(self, dataset, mask=None):
+        """
+        The function of predicting on the given dataset.
+
+        Parameters
+        ----------
+        dataset: The link prediction dataset used to be predicted.
+
+        mask: ``train``, ``val``, or ``test``.
+            The dataset mask.
+
+        Returns
+        -------
+        The prediction result of ``predict_proba``.
+        """
+        return self.predict_proba(dataset, mask=mask, in_log_format=False)
+
+    def predict_proba(self, dataset, mask=None, in_log_format=False):
+        """
+        The function of predicting the probability on the given dataset.
+
+        Parameters
+        ----------
+        dataset: The link prediction dataset used to be predicted.
+
+        mask: ``train``, ``val``, or ``test``.
+            The dataset mask.
+
+        in_log_format: ``bool``.
+            If True(False), the probability will (not) be log format.
+
+        Returns
+        -------
+        The prediction result.
+        """
+        data = dataset[0]
+        data = data.to(self.device)
+        if mask in ["train", "val", "test"]:
+            pos_edge_index = data[f'{mask}_pos_edge_index']
+            neg_edge_index = data[f'{mask}_neg_edge_index']
+        else:
+            pos_edge_index = data[f'test_pos_edge_index']
+            neg_edge_index = data[f'test_neg_edge_index']
+
+        self.model.model.eval()
+        with torch.no_grad():
+            z = self.predict_only(data)
+            link_logits = self.model.model.decode(z, pos_edge_index, neg_edge_index)
+            link_probs = link_logits.sigmoid()
+
+        return link_probs
+
+    def get_valid_predict(self):
+        # """Get the valid result."""
+        return self.valid_result
+
+    def get_valid_predict_proba(self):
+        # """Get the valid result (prediction probability)."""
+        return self.valid_result_prob
+
+    def get_valid_score(self, return_major=True):
+        """
+        The function of getting the valid score.
+
+        Parameters
+        ----------
+        return_major: ``bool``.
+            If True, the return only consists of the major result.
+            If False, the return consists of the all results.
+
+        Returns
+        -------
+        result: The valid score in training stage.
+        """
+        if isinstance(self.feval, list):
+            if return_major:
+                return self.valid_score[0], self.feval[0].is_higher_better()
+            else:
+                return self.valid_score, [f.is_higher_better() for f in self.feval]
+        else:
+            return self.valid_score, self.feval.is_higher_better()
+
+    def get_name_with_hp(self):
+        # """Get the name of hyperparameter."""
+        name = "-".join(
+            [
+                str(self.optimizer),
+                str(self.lr),
+                str(self.max_epoch),
+                str(self.early_stopping_round),
+                str(self.model),
+                str(self.device),
+            ]
+        )
+        name = (
+            name
+            + "|"
+            + "-".join(
+                [
+                    str(x[0]) + "-" + str(x[1])
+                    for x in self.model.get_hyper_parameter().items()
+                ]
+            )
+        )
+        return name
+
+    def evaluate(self, dataset, mask=None, feval=None):
+        """
+        The function of training on the given dataset and keeping valid result.
+
+        Parameters
+        ----------
+        dataset: The link prediction dataset used to be evaluated.
+
+        mask: ``train``, ``val``, or ``test``.
+            The dataset mask.
+
+        feval: ``str``.
+            The evaluation method used in this function.
+
+        Returns
+        -------
+        res: The evaluation result on the given dataset.
+
+        """
+        data = dataset[0]
+        data = data.to(self.device)
+        test_mask = mask
+        if feval is None:
+            feval = self.feval
+        else:
+            feval = get_feval(feval)
+
+        if mask in ["train", "val", "test"]:
+            pos_edge_index = data[f'{mask}_pos_edge_index']
+            neg_edge_index = data[f'{mask}_neg_edge_index']
+        else:
+            pos_edge_index = data[f'test_pos_edge_index']
+            neg_edge_index = data[f'test_neg_edge_index']
+
+        self.model.model.eval()
+        with torch.no_grad():
+            link_probs = self.predict_proba(dataset, mask)
+            link_labels = self.get_link_labels(pos_edge_index, neg_edge_index)
+
+        if not isinstance(feval, list):
+            feval = [feval]
+            return_signle = True
+        else:
+            return_signle = False
+
+        res = []
+        for f in feval:
+            try:
+                res.append(f.evaluate(link_probs, link_labels))
+            except:
+                res.append(f.evaluate(link_probs.cpu().numpy(), link_labels.cpu().numpy()))
+        if return_signle:
+            return res[0]
+        return res
+
+    def to(self, new_device):
+        assert isinstance(new_device, torch.device)
+        self.device = new_device
+        if self.model is not None:
+            self.model.to(self.device)
+
+    def duplicate_from_hyper_parameter(self, hp: dict, model=None, restricted=True):
+        """
+        The function of duplicating a new instance from the given hyperparameter.
+
+        Parameters
+        ----------
+        hp: ``dict``.
+            The hyperparameter used in the new instance.
+
+        model: The model used in the new instance of trainer.
+
+        restricted: ``bool``.
+            If False(True), the hyperparameter should (not) be updated from origin hyperparameter.
+
+        Returns
+        -------
+        self: ``autogl.train.LinkPredictionTrainer``
+            A new instance of trainer.
+
+        """
+        if not restricted:
+            origin_hp = deepcopy(self.hyperparams)
+            origin_hp.update(hp)
+            hp = origin_hp
+        if model is None:
+            model = self.model
+        model = model.from_hyper_parameter(
+            dict(
+                [
+                    x
+                    for x in hp.items()
+                    if x[0] in [y["parameterName"] for y in model.space]
+                ]
+            )
+        )
+
+        ret = self.__class__(
+            model=model,
+            num_features=self.num_features,
+            num_classes=self.num_classes,
+            optimizer=self.optimizer,
+            lr=hp["lr"],
+            max_epoch=hp["max_epoch"],
+            early_stopping_round=hp["early_stopping_round"],
+            device=self.device,
+            weight_decay=hp["weight_decay"],
+            feval=self.feval,
+            init=True,
+            *self.args,
+            **self.kwargs
+        )
+
+        return ret
+
+    def set_feval(self, feval):
+        # """Set the evaluation metrics."""
+        self.feval = get_feval(feval)
+
+    @property
+    def hyper_parameter_space(self):
+        # """Get the space of hyperparameter."""
+        return self.space
+
+    @hyper_parameter_space.setter
+    def hyper_parameter_space(self, space):
+        # """Set the space of hyperparameter."""
+        self.space = space
+        LinkPredictionTrainer.space = space
+
+    def get_hyper_parameter(self):
+        # """Get the hyperparameter in this trainer."""
+        return self.hyperparams
+
+    def get_link_labels(self, pos_edge_index, neg_edge_index):
+        E = pos_edge_index.size(1) + neg_edge_index.size(1)
+        link_labels = torch.zeros(E, dtype=torch.float, device=self.device)
+        link_labels[:pos_edge_index.size(1)] = 1.
+        return link_labels
\ No newline at end of file

From 1577b5d5bccff896a81d5dd635f9fef184ae728a Mon Sep 17 00:00:00 2001
From: cluster32 <general502570@outlook.com>
Date: Fri, 23 Apr 2021 11:11:40 +0800
Subject: [PATCH 049/144] tune performance

---
 autogl/module/nas/space/single_path.py | 8 +++++---
 examples/test_nas.py                   | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/autogl/module/nas/space/single_path.py b/autogl/module/nas/space/single_path.py
index 595a42f..8bbe8b3 100644
--- a/autogl/module/nas/space/single_path.py
+++ b/autogl/module/nas/space/single_path.py
@@ -9,6 +9,7 @@ from .base import BaseSpace
 from ...model import BaseModel
 from ....utils import get_logger
 
+from ...model import AutoGCN
 
 class FixedNodeClassificationModel(BaseModel):
     _logger = get_logger("space model")
@@ -56,7 +57,7 @@ class SinglePathNodeClassificationSpace(BaseSpace):
         self,
         hidden_dim: _typ.Optional[int] = 64,
         layer_number: _typ.Optional[int] = 2,
-        dropout: _typ.Optional[float] = 0.6,
+        dropout: _typ.Optional[float] = 0.2,
         input_dim: _typ.Optional[int] = None,
         output_dim: _typ.Optional[int] = None,
         ops: _typ.Tuple = None,
@@ -108,9 +109,10 @@ class SinglePathNodeClassificationSpace(BaseSpace):
         for layer in range(self.layer_number):
             x = getattr(self, f"op_{layer}")(x, edges)
             if layer != self.layer_number - 1:
-                x = F.relu(x)
-                x = F.dropout(x, p=self.dropout)
+                x = F.leaky_relu(x)
+                x = F.dropout(x, p=self.dropout, training = self.training)
         return F.log_softmax(x, dim=1)
 
     def export(self, selection, device) -> BaseModel:
+        #return AutoGCN(self.input_dim, self.output_dim, device)
         return FixedNodeClassificationModel(self, selection, device)
diff --git a/examples/test_nas.py b/examples/test_nas.py
index 22aa03c..4fc34dc 100644
--- a/examples/test_nas.py
+++ b/examples/test_nas.py
@@ -31,7 +31,7 @@ if __name__ == '__main__':
             loss="nll_loss",
             lr_scheduler_type=None,),
         nas_algorithms=[Darts(num_epochs=1)],
-        nas_spaces=[SinglePathNodeClassificationSpace(hidden_dim=16, ops=[GCNConv, GCNConv])],
+        nas_spaces=[SinglePathNodeClassificationSpace(hidden_dim=16, ops=[GCNConv, GATConv])],
         nas_estimators=[OneShotEstimator()]
     )
     solver.fit(dataset)

From 0f795131948ea07cfdfef9432cf94e853adc490d Mon Sep 17 00:00:00 2001
From: cluster32 <general502570@outlook.com>
Date: Fri, 23 Apr 2021 11:28:44 +0800
Subject: [PATCH 050/144] merge enas

---
 autogl/module/nas/algorithm/__init__.py   |  3 ++-
 autogl/module/nas/{ => algorithm}/enas.py | 15 ++++++++-------
 examples/test_nas.py                      |  2 ++
 3 files changed, 12 insertions(+), 8 deletions(-)
 rename autogl/module/nas/{ => algorithm}/enas.py (97%)

diff --git a/autogl/module/nas/algorithm/__init__.py b/autogl/module/nas/algorithm/__init__.py
index 612102a..537f223 100644
--- a/autogl/module/nas/algorithm/__init__.py
+++ b/autogl/module/nas/algorithm/__init__.py
@@ -4,5 +4,6 @@ NAS algorithms
 
 from .base import BaseNAS
 from .darts import Darts
+from .enas import Enas
 
-__all__ = ["BaseNAS", "Darts"]
+__all__ = ["BaseNAS", "Darts", "Enas"]
diff --git a/autogl/module/nas/enas.py b/autogl/module/nas/algorithm/enas.py
similarity index 97%
rename from autogl/module/nas/enas.py
rename to autogl/module/nas/algorithm/enas.py
index 2cad798..3d175aa 100644
--- a/autogl/module/nas/enas.py
+++ b/autogl/module/nas/algorithm/enas.py
@@ -6,9 +6,9 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
-from .nas import BaseNAS
-from .space import SpaceModel
-from .utils import AverageMeterGroup, replace_layer_choice, replace_input_choice
+from .base import BaseNAS
+from ..space import BaseSpace
+from ..utils import AverageMeterGroup, replace_layer_choice, replace_input_choice
 from nni.nas.pytorch.fixed import apply_fixed_architecture
 _logger = logging.getLogger(__name__)
 def _get_mask(sampled, total):
@@ -288,10 +288,10 @@ class Enas(BaseNAS):
         self.ctrl_kwargs=ctrl_kwargs
         self.ctrl_lr=ctrl_lr
 
-    def search(self, space, dset, trainer):
+    def search(self, space: BaseSpace, dset, estimator):
         self.model = space
         self.dataset = dset#.to(self.device)
-        self.trainer = trainer
+        self.estimator = estimator
         self.model_optim = torch.optim.SGD(
             self.model.parameters(), lr=0.01, weight_decay=3e-4
         )
@@ -313,7 +313,8 @@ class Enas(BaseNAS):
             self._train_controller(i)
         
         selection=self.export()
-        return SpaceModel(space,selection,self.device)
+        return space.export(selection,self.device)
+
     def _train_model(self, epoch): 
         self.model.train()
         self.controller.eval()
@@ -364,5 +365,5 @@ class Enas(BaseNAS):
             return self.controller.resample()
 
     def _infer(self):
-        metric, loss = self.trainer.infer(self.model, self.dataset)
+        metric, loss = self.estimator.infer(self.model, self.dataset)
         return metric, loss
diff --git a/examples/test_nas.py b/examples/test_nas.py
index 22aa03c..693cd10 100644
--- a/examples/test_nas.py
+++ b/examples/test_nas.py
@@ -8,6 +8,7 @@ from autogl.module.model import BaseModel
 from autogl.module.train import NodeClassificationFullTrainer
 from autogl.module.nas import Darts, OneShotEstimator, SinglePathNodeClassificationSpace
 from autogl.module.train import Acc
+from autogl.module.nas.algorithm.enas import Enas
 
 if __name__ == '__main__':
     dataset = build_dataset_from_name('cora')
@@ -30,6 +31,7 @@ if __name__ == '__main__':
             feval=['acc'],
             loss="nll_loss",
             lr_scheduler_type=None,),
+        #nas_algorithms=[Enas()],
         nas_algorithms=[Darts(num_epochs=1)],
         nas_spaces=[SinglePathNodeClassificationSpace(hidden_dim=16, ops=[GCNConv, GCNConv])],
         nas_estimators=[OneShotEstimator()]

From a94341e93839b9d8637ee776681dffa104ddb73d Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Fri, 23 Apr 2021 13:21:18 +0000
Subject: [PATCH 051/144] fix bugs in nas

---
 autogl/module/nas/space/single_path.py          |  5 +++--
 autogl/module/train/base.py                     |  2 ++
 autogl/module/train/node_classification_full.py |  6 +++---
 examples/test_nas.py                            | 10 +++-------
 4 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/autogl/module/nas/space/single_path.py b/autogl/module/nas/space/single_path.py
index 595a42f..42da228 100644
--- a/autogl/module/nas/space/single_path.py
+++ b/autogl/module/nas/space/single_path.py
@@ -33,7 +33,7 @@ class FixedNodeClassificationModel(BaseModel):
         return super().to(device)
 
     def forward(self, *args, **kwargs):
-        return self._model.forward(*args, **kwargs)
+        return self._model(*args, **kwargs)
 
     def from_hyper_parameter(self, hp):
         """
@@ -84,6 +84,7 @@ class SinglePathNodeClassificationSpace(BaseSpace):
         self.input_dim = input_dim or self.input_dim
         self.output_dim = output_dim or self.output_dim
         self.ops = ops or self.ops
+        self.dropout = dropout or self.dropout
         for layer in range(self.layer_number):
             setattr(
                 self,
@@ -109,7 +110,7 @@ class SinglePathNodeClassificationSpace(BaseSpace):
             x = getattr(self, f"op_{layer}")(x, edges)
             if layer != self.layer_number - 1:
                 x = F.relu(x)
-                x = F.dropout(x, p=self.dropout)
+                x = F.dropout(x, p=self.dropout, training=self.training)
         return F.log_softmax(x, dim=1)
 
     def export(self, selection, device) -> BaseModel:
diff --git a/autogl/module/train/base.py b/autogl/module/train/base.py
index 16227e9..7d2284d 100644
--- a/autogl/module/train/base.py
+++ b/autogl/module/train/base.py
@@ -357,6 +357,8 @@ class _BaseClassificationTrainer(BaseTrainer):
             )
         elif isinstance(model, BaseModel):
             _model: BaseModel = model
+        elif model is None:
+            _model = None
         else:
             raise TypeError(
                 f"Model argument only support str or BaseModel, got {model}."
diff --git a/autogl/module/train/node_classification_full.py b/autogl/module/train/node_classification_full.py
index 480cb1e..b219903 100644
--- a/autogl/module/train/node_classification_full.py
+++ b/autogl/module/train/node_classification_full.py
@@ -56,9 +56,9 @@ class NodeClassificationFullTrainer(BaseNodeClassificationTrainer):
 
     def __init__(
         self,
-        model: Union[BaseModel, str],
-        num_features,
-        num_classes,
+        model: Union[BaseModel, str]=None,
+        num_features=None,
+        num_classes=None,
         optimizer=None,
         lr=None,
         max_epoch=None,
diff --git a/examples/test_nas.py b/examples/test_nas.py
index 22aa03c..b1a956a 100644
--- a/examples/test_nas.py
+++ b/examples/test_nas.py
@@ -1,10 +1,9 @@
 import sys
 sys.path.append('../')
-from torch_geometric.nn import GCNConv, GATConv
+from torch_geometric.nn import GCNConv
 import torch
 from autogl.datasets import build_dataset_from_name
 from autogl.solver import AutoNodeClassifier
-from autogl.module.model import BaseModel
 from autogl.module.train import NodeClassificationFullTrainer
 from autogl.module.nas import Darts, OneShotEstimator, SinglePathNodeClassificationSpace
 from autogl.module.train import Acc
@@ -12,14 +11,11 @@ from autogl.module.train import Acc
 if __name__ == '__main__':
     dataset = build_dataset_from_name('cora')
     solver = AutoNodeClassifier(
-        feature_module=None,
+        feature_module='PYGNormalizeFeatures',
         graph_models=[],
         hpo_module=None,
         ensemble_module=None,
         default_trainer=NodeClassificationFullTrainer(
-            BaseModel(),
-            None,
-            None,
             optimizer=torch.optim.Adam,
             lr=0.01,
             max_epoch=200,
@@ -30,7 +26,7 @@ if __name__ == '__main__':
             feval=['acc'],
             loss="nll_loss",
             lr_scheduler_type=None,),
-        nas_algorithms=[Darts(num_epochs=1)],
+        nas_algorithms=[Darts(num_epochs=200)],
         nas_spaces=[SinglePathNodeClassificationSpace(hidden_dim=16, ops=[GCNConv, GCNConv])],
         nas_estimators=[OneShotEstimator()]
     )

From 07d2329476aee23dc7343fe300af0cf3e2c73db1 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Mon, 26 Apr 2021 08:06:43 +0000
Subject: [PATCH 052/144] black style

---
 autogl/datasets/utils.py                      |  18 +-
 autogl/module/model/_model_registry.py        |   2 +-
 autogl/module/model/base.py                   | 115 ++--
 autogl/module/model/gcn.py                    |  74 +--
 autogl/module/model/graph_sage.py             |  60 ++-
 autogl/module/train/base.py                   | 138 ++---
 autogl/module/train/evaluation.py             |  23 +-
 .../module/train/graph_classification_full.py |  18 +-
 .../node_classification_sampled_trainer.py    | 503 +++++++++---------
 .../sampling/sampler/graphsaint_sampler.py    |  56 +-
 .../sampling/sampler/neighbor_sampler.py      |  84 +--
 autogl/solver/classifier/node_classifier.py   |   4 +-
 examples/graph_cv.py                          |   2 +-
 13 files changed, 608 insertions(+), 489 deletions(-)

diff --git a/autogl/datasets/utils.py b/autogl/datasets/utils.py
index adc3923..b0708db 100644
--- a/autogl/datasets/utils.py
+++ b/autogl/datasets/utils.py
@@ -179,7 +179,9 @@ def random_splits_mask_class(
     return dataset
 
 
-def graph_cross_validation(dataset, n_splits=10, shuffle=True, random_seed=42, stratify=False):
+def graph_cross_validation(
+    dataset, n_splits=10, shuffle=True, random_seed=42, stratify=False
+):
     r"""Cross validation for graph classification data, returning one fold with specific idx in autogl.datasets or pyg.Dataloader(default)
 
     Parameters
@@ -197,7 +199,9 @@ def graph_cross_validation(dataset, n_splits=10, shuffle=True, random_seed=42, s
         random_state for sklearn.model_selection.StratifiedKFold
     """
     if stratify:
-        skf = StratifiedKFold(n_splits=n_splits, shuffle=shuffle, random_state=random_seed)
+        skf = StratifiedKFold(
+            n_splits=n_splits, shuffle=shuffle, random_state=random_seed
+        )
     else:
         skf = KFold(n_splits=n_splits, shuffle=shuffle, random_state=random_seed)
     idx_list = []
@@ -318,7 +322,9 @@ def graph_random_splits(dataset, train_ratio=0.2, val_ratio=0.4, seed=None):
     return dataset
 
 
-def graph_get_split(dataset, mask="train", is_loader=True, batch_size=128, num_workers = 0):
+def graph_get_split(
+    dataset, mask="train", is_loader=True, batch_size=128, num_workers=0
+):
     r"""Get train/test dataset/dataloader after cross validation.
 
     Parameters
@@ -340,7 +346,11 @@ def graph_get_split(dataset, mask="train", is_loader=True, batch_size=128, num_w
         dataset, "%s_split" % (mask)
     ), "Given dataset do not have %s split" % (mask)
     if is_loader:
-        return DataLoader(getattr(dataset, "%s_split" % (mask)), batch_size=batch_size, num_workers = num_workers)
+        return DataLoader(
+            getattr(dataset, "%s_split" % (mask)),
+            batch_size=batch_size,
+            num_workers=num_workers,
+        )
     else:
         return getattr(dataset, "%s_split" % (mask))
 
diff --git a/autogl/module/model/_model_registry.py b/autogl/module/model/_model_registry.py
index d8270eb..14aa2d9 100644
--- a/autogl/module/model/_model_registry.py
+++ b/autogl/module/model/_model_registry.py
@@ -14,7 +14,7 @@ def register_model(name):
             )
         MODEL_DICT[name] = cls
         return cls
-    
+
     return register_model_cls
 
 
diff --git a/autogl/module/model/base.py b/autogl/module/model/base.py
index 33cc5a9..965c306 100644
--- a/autogl/module/model/base.py
+++ b/autogl/module/model/base.py
@@ -9,6 +9,7 @@ import typing as _typing
 import torch
 import torch.nn.functional as F
 from copy import deepcopy
+
 base_approach_logger: logging.Logger = logging.getLogger("BaseModel")
 
 
@@ -49,7 +50,11 @@ class BaseModel:
     def to(self, device):
         if isinstance(device, (str, torch.device)):
             self.device = device
-        if hasattr(self, "model") and self.model is not None and isinstance(self.model, torch.nn.Module):
+        if (
+            hasattr(self, "model")
+            and self.model is not None
+            and isinstance(self.model, torch.nn.Module)
+        ):
             self.model.to(self.device)
         return self
 
@@ -95,28 +100,28 @@ class _BaseBaseModel:
     designed to implement some basic functionality of BaseModel.
     --  Designed by ZiXin Sun
     """
+
     @classmethod
     def __formulate_device(
-            cls, device: _typing.Union[str, torch.device] = ...
+        cls, device: _typing.Union[str, torch.device] = ...
     ) -> torch.device:
-        if (
-                type(device) == torch.device or
-                (type(device) == str and device.strip().lower() != "auto")
+        if type(device) == torch.device or (
+            type(device) == str and device.strip().lower() != "auto"
         ):
             return torch.device(device)
         elif torch.cuda.is_available() and torch.cuda.device_count() > 0:
             return torch.device("cuda")
         else:
             return torch.device("cpu")
-    
+
     @property
     def device(self) -> torch.device:
         return self.__device
-    
+
     @device.setter
     def device(self, __device: _typing.Union[str, torch.device, None]):
         self.__device: torch.device = self.__formulate_device(__device)
-    
+
     @property
     def model(self) -> _typing.Optional[torch.nn.Module]:
         if self._model is None:
@@ -124,19 +129,18 @@ class _BaseBaseModel:
                 "property of model NOT initialized before accessing"
             )
         return self._model
-    
+
     @model.setter
     def model(self, _model: torch.nn.Module) -> None:
         if not isinstance(_model, torch.nn.Module):
             raise TypeError(
-                "the property of model MUST be an instance of "
-                "torch.nn.Module"
+                "the property of model MUST be an instance of " "torch.nn.Module"
             )
         self._model = _model
-    
+
     def _initialize(self):
         raise NotImplementedError
-    
+
     def initialize(self) -> bool:
         """
         Initialize the model in case that the model has NOT been initialized
@@ -147,7 +151,7 @@ class _BaseBaseModel:
             self.__is_initialized = True
             return True
         return False
-    
+
     # def to(self, *args, **kwargs):
     #     """
     #     Due to the signature of to() method in class BaseApproach
@@ -161,17 +165,18 @@ class _BaseBaseModel:
     #     :return: self
     #     """
     #     return super(_BaseBaseModel, self).to(*args, **kwargs)
-    
+
     def forward(self, *args, **kwargs):
         if self.model is not None and isinstance(self.model, torch.nn.Module):
             return self.model(*args, **kwargs)
         else:
             raise NotImplementedError
-    
+
     def __init__(
-            self, model: _typing.Optional[torch.nn.Module] = None,
-            initialize: bool = False,
-            device: _typing.Union[str, torch.device] = ...
+        self,
+        model: _typing.Optional[torch.nn.Module] = None,
+        initialize: bool = False,
+        device: _typing.Union[str, torch.device] = ...,
     ):
         if type(initialize) != bool:
             raise TypeError
@@ -188,64 +193,65 @@ class _BaseModel(_BaseBaseModel, BaseModel):
     The upcoming root base class for Model, i.e. BaseModel
     --  Designed by ZiXin Sun
     """
+
     # todo: Deprecate and remove the legacy class "BaseModel",
     #       then rename this class to "BaseModel",
     #       correspondingly, this class will no longer extend
     #       the legacy class "BaseModel" after the removal.
     def _initialize(self):
         raise NotImplementedError
-    
+
     def to(self, device: torch.device):
         self.device = device
         if self.model is not None and isinstance(self.model, torch.nn.Module):
             self.model.to(self.device)
         return super().to(device)
-    
+
     @property
     def space(self) -> _typing.Sequence[_typing.Dict[str, _typing.Any]]:
         # todo: deprecate and remove in future major version
         return self.__hyper_parameter_space
-    
+
     @property
     def hyper_parameter_space(self):
         return self.__hyper_parameter_space
-    
+
     @hyper_parameter_space.setter
     def hyper_parameter_space(
-            self, space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
+        self, space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
     ):
         self.__hyper_parameter_space = space
-    
+
     @property
     def hyper_parameter(self) -> _typing.Dict[str, _typing.Any]:
         return self.__hyper_parameter
-    
+
     @hyper_parameter.setter
     def hyper_parameter(self, _hyper_parameter: _typing.Dict[str, _typing.Any]):
         if not isinstance(_hyper_parameter, dict):
             raise TypeError
         self.__hyper_parameter = _hyper_parameter
-    
+
     def get_hyper_parameter(self) -> _typing.Dict[str, _typing.Any]:
         """
         todo: consider deprecating this trivial getter method in the future
         :return: copied hyper parameter
         """
         return copy.deepcopy(self.__hyper_parameter)
-    
+
     def __init__(
-            self, model: _typing.Optional[torch.nn.Module] = None,
-            initialize: bool = False,
-            hyper_parameter_space: _typing.Sequence[_typing.Any] = ...,
-            hyper_parameter: _typing.Dict[str, _typing.Any] = ...,
-            device: _typing.Union[str, torch.device] = ...
+        self,
+        model: _typing.Optional[torch.nn.Module] = None,
+        initialize: bool = False,
+        hyper_parameter_space: _typing.Sequence[_typing.Any] = ...,
+        hyper_parameter: _typing.Dict[str, _typing.Any] = ...,
+        device: _typing.Union[str, torch.device] = ...,
     ):
         if type(initialize) != bool:
             raise TypeError
         super(_BaseModel, self).__init__(model, initialize, device)
-        if (
-                hyper_parameter_space != Ellipsis and
-                isinstance(hyper_parameter_space, _typing.Sequence)
+        if hyper_parameter_space != Ellipsis and isinstance(
+            hyper_parameter_space, _typing.Sequence
         ):
             self.__hyper_parameter_space: _typing.Sequence[
                 _typing.Dict[str, _typing.Any]
@@ -266,27 +272,30 @@ class _BaseModel(_BaseBaseModel, BaseModel):
 class ClassificationModel(_BaseModel):
     def _initialize(self):
         raise NotImplementedError
-    
+
     def from_hyper_parameter(
-            self, hyper_parameter: _typing.Dict[str, _typing.Any]
+        self, hyper_parameter: _typing.Dict[str, _typing.Any]
     ) -> "ClassificationModel":
         new_model: ClassificationModel = self.__class__(
             num_features=self.num_features,
             num_classes=self.num_classes,
             device=self.device,
-            init=False
+            init=False,
         )
         _hyper_parameter = self.hyper_parameter
         _hyper_parameter.update(hyper_parameter)
         new_model.hyper_parameter = _hyper_parameter
         new_model.initialize()
         return new_model
-    
+
     def __init__(
-            self, num_features: int = ..., num_classes: int = ...,
-            num_graph_features: int = ...,
-            device: _typing.Union[str, torch.device] = ...,
-            init: bool = False, **kwargs
+        self,
+        num_features: int = ...,
+        num_classes: int = ...,
+        num_graph_features: int = ...,
+        device: _typing.Union[str, torch.device] = ...,
+        init: bool = False,
+        **kwargs
     ):
         if "initialize" in kwargs:
             del kwargs["initialize"]
@@ -308,11 +317,11 @@ class ClassificationModel(_BaseModel):
                 self.__num_graph_features: int = 0
         else:
             self.__num_graph_features: int = 0
-    
+
     @property
     def num_classes(self) -> int:
         return self.__num_classes
-    
+
     @num_classes.setter
     def num_classes(self, __num_classes: int):
         if type(__num_classes) != int:
@@ -320,11 +329,11 @@ class ClassificationModel(_BaseModel):
         if not __num_classes > 0:
             raise ValueError
         self.__num_classes = __num_classes if __num_classes > 0 else 0
-    
+
     @property
     def num_features(self) -> int:
         return self.__num_features
-    
+
     @num_features.setter
     def num_features(self, __num_features: int):
         if type(__num_features) != int:
@@ -332,27 +341,27 @@ class ClassificationModel(_BaseModel):
         if not __num_features > 0:
             raise ValueError
         self.__num_features = __num_features if __num_features > 0 else 0
-    
+
     def get_num_classes(self) -> int:
         # todo: consider replacing with property with getter and setter
         return self.__num_classes
-    
+
     def set_num_classes(self, num_classes: int) -> None:
         # todo: consider replacing with property with getter and setter
         if type(num_classes) != int:
             raise TypeError
         self.__num_classes = num_classes if num_classes > 0 else 0
-    
+
     def get_num_features(self) -> int:
         # todo: consider replacing with property with getter and setter
         return self.__num_features
-    
+
     def set_num_features(self, num_features: int):
         # todo: consider replacing with property with getter and setter
         if type(num_features) != int:
             raise TypeError
         self.__num_features = num_features if num_features > 0 else 0
-    
+
     def set_num_graph_features(self, num_graph_features: int):
         # todo: consider replacing with property with getter and setter
         if type(num_graph_features) != int:
diff --git a/autogl/module/model/gcn.py b/autogl/module/model/gcn.py
index 4fa3594..73b91e0 100644
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -11,9 +11,12 @@ LOGGER = get_logger("GCNModel")
 
 class GCN(torch.nn.Module):
     def __init__(
-            self, num_features: int, num_classes: int,
-            hidden_features: _typing.Sequence[int],
-            dropout: float, activation_name: str
+        self,
+        num_features: int,
+        num_classes: int,
+        hidden_features: _typing.Sequence[int],
+        dropout: float,
+        activation_name: str,
     ):
         super().__init__()
         self.__convolution_layers: torch.nn.ModuleList = torch.nn.ModuleList()
@@ -25,31 +28,33 @@ class GCN(torch.nn.Module):
                 )
             )
         else:
-            self.__convolution_layers.append(torch_geometric.nn.GCNConv(
-                num_features, hidden_features[0], add_self_loops=False
-            ))
+            self.__convolution_layers.append(
+                torch_geometric.nn.GCNConv(
+                    num_features, hidden_features[0], add_self_loops=False
+                )
+            )
             for i in range(len(hidden_features)):
                 self.__convolution_layers.append(
                     torch_geometric.nn.GCNConv(
                         hidden_features[i], hidden_features[i + 1]
-                    ) if i + 1 < len(hidden_features)
-                    else torch_geometric.nn.GCNConv(
-                        hidden_features[i], num_classes
                     )
+                    if i + 1 < len(hidden_features)
+                    else torch_geometric.nn.GCNConv(hidden_features[i], num_classes)
                 )
         self.__dropout: float = dropout
         self.__activation_name: str = activation_name
-    
+
     def __layer_wise_forward(self, data):
         # todo: Implement this forward method
         #         in case that data.edge_indexes property is provided
         #         for Layer-wise and Node-wise sampled training
         raise NotImplementedError
-    
+
     def __basic_forward(
-            self, x: torch.Tensor,
-            edge_index: torch.Tensor,
-            edge_weight: _typing.Optional[torch.Tensor] = None
+        self,
+        x: torch.Tensor,
+        edge_index: torch.Tensor,
+        edge_weight: _typing.Optional[torch.Tensor] = None,
     ) -> torch.Tensor:
         for layer_index in range(len(self.__convolution_layers)):
             x: torch.Tensor = self.__convolution_layers[layer_index](
@@ -57,31 +62,32 @@ class GCN(torch.nn.Module):
             )
             if layer_index + 1 < len(self.__convolution_layers):
                 x = activate_func(x, self.__activation_name)
-                x = torch.nn.functional.dropout(x, p=self.__dropout, training=self.training)
+                x = torch.nn.functional.dropout(
+                    x, p=self.__dropout, training=self.training
+                )
         return torch.nn.functional.log_softmax(x, dim=1)
-    
+
     def forward(self, data) -> torch.Tensor:
-        if (
-                hasattr(data, "edge_indexes") and
-                getattr(data, "edge_indexes") is not None
-        ):
+        if hasattr(data, "edge_indexes") and getattr(data, "edge_indexes") is not None:
             return self.__layer_wise_forward(data)
         else:
             if not (hasattr(data, "x") and hasattr(data, "edge_index")):
                 raise AttributeError
             if not (
-                    type(getattr(data, "x")) == torch.Tensor and
-                    type(getattr(data, "edge_index")) == torch.Tensor
+                type(getattr(data, "x")) == torch.Tensor
+                and type(getattr(data, "edge_index")) == torch.Tensor
             ):
                 raise TypeError
             x: torch.Tensor = getattr(data, "x")
             edge_index: torch.LongTensor = getattr(data, "edge_index")
             if (
-                    hasattr(data, "edge_weight") and
-                    type(getattr(data, "edge_weight")) == torch.Tensor and
-                    getattr(data, "edge_weight").size() == (edge_index.size(1),)
+                hasattr(data, "edge_weight")
+                and type(getattr(data, "edge_weight")) == torch.Tensor
+                and getattr(data, "edge_weight").size() == (edge_index.size(1),)
             ):
-                edge_weight: _typing.Optional[torch.Tensor] = getattr(data, "edge_weight")
+                edge_weight: _typing.Optional[torch.Tensor] = getattr(
+                    data, "edge_weight"
+                )
             else:
                 edge_weight: _typing.Optional[torch.Tensor] = None
             return self.__basic_forward(x, edge_index, edge_weight)
@@ -120,18 +126,22 @@ class AutoGCN(ClassificationModel):
     """
 
     def __init__(
-            self, num_features: int = ..., num_classes: int = ...,
-            device: _typing.Union[str, torch.device] = ...,
-            init: bool = False, **kwargs
+        self,
+        num_features: int = ...,
+        num_classes: int = ...,
+        device: _typing.Union[str, torch.device] = ...,
+        init: bool = False,
+        **kwargs
     ) -> None:
         super(AutoGCN, self).__init__(
             num_features, num_classes, device=device, init=init, **kwargs
         )
-    
+
     def _initialize(self):
         self.model = GCN(
-            self.num_features, self.num_classes,
+            self.num_features,
+            self.num_classes,
             self.hyper_parameter.get("hidden"),
             self.hyper_parameter.get("dropout"),
-            self.hyper_parameter.get("act")
+            self.hyper_parameter.get("act"),
         ).to(self.device)
diff --git a/autogl/module/model/graph_sage.py b/autogl/module/model/graph_sage.py
index fbe8f6e..2fe0450 100644
--- a/autogl/module/model/graph_sage.py
+++ b/autogl/module/model/graph_sage.py
@@ -9,19 +9,23 @@ from .base import BaseModel, activate_func
 
 class GraphSAGE(torch.nn.Module):
     def __init__(
-            self, num_features: int, num_classes: int,
-            hidden_features: _typing.Sequence[int],
-            dropout: float, activation_name: str,
-            aggr: str = "mean", **kwargs
+        self,
+        num_features: int,
+        num_classes: int,
+        hidden_features: _typing.Sequence[int],
+        dropout: float,
+        activation_name: str,
+        aggr: str = "mean",
+        **kwargs
     ):
         super(GraphSAGE, self).__init__()
         if type(aggr) != str:
             raise TypeError
         if aggr not in ("add", "max", "mean"):
             aggr = "mean"
-        
+
         self.__convolution_layers: torch.nn.ModuleList = torch.nn.ModuleList()
-        
+
         num_layers: int = len(hidden_features) + 1
         if num_layers == 1:
             self.__convolution_layers.append(
@@ -42,7 +46,7 @@ class GraphSAGE(torch.nn.Module):
                     )
         self.__dropout: float = dropout
         self.__activation_name: str = activation_name
-    
+
     def __full_forward(self, data):
         x: torch.Tensor = getattr(data, "x")
         edge_index: torch.Tensor = getattr(data, "edge_index")
@@ -52,24 +56,26 @@ class GraphSAGE(torch.nn.Module):
                 x = activate_func(x, self.__activation_name)
                 x = F.dropout(x, p=self.__dropout, training=self.training)
         return F.log_softmax(x, dim=1)
-    
+
     def __distributed_forward(self, data):
         x: torch.Tensor = getattr(data, "x")
         edge_indexes: _typing.Sequence[torch.Tensor] = getattr(data, "edge_indexes")
         if len(edge_indexes) != len(self.__convolution_layers):
             raise AttributeError
         for layer_index in range(len(self.__convolution_layers)):
-            x: torch.Tensor = self.__convolution_layers[layer_index](x, edge_indexes[layer_index])
+            x: torch.Tensor = self.__convolution_layers[layer_index](
+                x, edge_indexes[layer_index]
+            )
             if layer_index + 1 < len(self.__convolution_layers):
                 x = activate_func(x, self.__activation_name)
                 x = F.dropout(x, p=self.__dropout, training=self.training)
         return F.log_softmax(x, dim=1)
-    
+
     def forward(self, data):
         if (
-                hasattr(data, "edge_indexes") and
-                isinstance(getattr(data, "edge_indexes"), _typing.Sequence) and
-                len(getattr(data, "edge_indexes")) == len(self.__convolution_layers)
+            hasattr(data, "edge_indexes")
+            and isinstance(getattr(data, "edge_indexes"), _typing.Sequence)
+            and len(getattr(data, "edge_indexes")) == len(self.__convolution_layers)
         ):
             return self.__distributed_forward(data)
         else:
@@ -79,15 +85,20 @@ class GraphSAGE(torch.nn.Module):
 @register_model("sage")
 class AutoSAGE(BaseModel):
     def __init__(
-            self, num_features: int = 1, num_classes: int = 1,
-            device: _typing.Optional[torch.device] = torch.device("cpu"),
-            init: bool = False, **kwargs
+        self,
+        num_features: int = 1,
+        num_classes: int = 1,
+        device: _typing.Optional[torch.device] = torch.device("cpu"),
+        init: bool = False,
+        **kwargs
     ):
         super(AutoSAGE, self).__init__(init)
         self.__num_features: int = num_features
         self.__num_classes: int = num_classes
-        self.__device: torch.device = device if device is not None else torch.device("cpu")
-        
+        self.__device: torch.device = (
+            device if device is not None else torch.device("cpu")
+        )
+
         self.hyperparams = {
             "num_layers": 3,
             "hidden": [64, 32],
@@ -97,26 +108,27 @@ class AutoSAGE(BaseModel):
         }
         self.params = {
             "num_features": self.__num_features,
-            "num_classes": self.__num_classes
+            "num_classes": self.__num_classes,
         }
-        
+
         self._model: GraphSAGE = GraphSAGE(
             self.__num_features, self.__num_classes, [64, 32], 0.5, "relu"
         )
-        
+
         self._initialized: bool = False
         if init:
             self.initialize()
-    
+
     @property
     def model(self) -> GraphSAGE:
         return self._model
-    
+
     def initialize(self):
         """ Initialize model """
         if not self._initialized:
             self._model: GraphSAGE = GraphSAGE(
-                self.__num_features, self.__num_classes,
+                self.__num_features,
+                self.__num_classes,
                 hidden_features=self.hyperparams["hidden"],
                 activation_name=self.hyperparams["act"],
                 **self.hyperparams
diff --git a/autogl/module/train/base.py b/autogl/module/train/base.py
index 7ec47f1..af26fa4 100644
--- a/autogl/module/train/base.py
+++ b/autogl/module/train/base.py
@@ -83,15 +83,14 @@ class EarlyStopping:
 
 class BaseTrainer:
     def __init__(
-            self,
-            model: BaseModel,
-            device: _typing.Union[torch.device, str],
-            init: bool = True,
-            feval: _typing.Union[
-                _typing.Sequence[str],
-                _typing.Sequence[_typing.Type[Evaluation]]
-            ] = (Acc,),
-            loss: str = "nll_loss",
+        self,
+        model: BaseModel,
+        device: _typing.Union[torch.device, str],
+        init: bool = True,
+        feval: _typing.Union[
+            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = (Acc,),
+        loss: str = "nll_loss",
     ):
         """
         The basic trainer.
@@ -108,47 +107,50 @@ class BaseTrainer:
         """
         super().__init__()
         self.model: BaseModel = model
-        if (
-                type(device) == torch.device or
-                (type(device) == str and device.lower() != "auto")
+        if type(device) == torch.device or (
+            type(device) == str and device.lower() != "auto"
         ):
             self.__device: torch.device = torch.device(device)
         else:
             self.__device: torch.device = torch.device(
-                "cuda" if torch.cuda.is_available() and torch.cuda.device_count() > 0 else "cpu"
+                "cuda"
+                if torch.cuda.is_available() and torch.cuda.device_count() > 0
+                else "cpu"
             )
         self.init: bool = init
         self.__feval: _typing.Sequence[_typing.Type[Evaluation]] = get_feval(feval)
         self.loss: str = loss
-    
+
     @property
     def device(self) -> torch.device:
         return self.__device
-    
+
     @device.setter
     def device(self, __device: _typing.Union[torch.device, str]):
-        if (
-                type(__device) == torch.device or
-                (type(__device) == str and __device.lower() != "auto")
+        if type(__device) == torch.device or (
+            type(__device) == str and __device.lower() != "auto"
         ):
             self.__device: torch.device = torch.device(__device)
         else:
             self.__device: torch.device = torch.device(
-                "cuda" if torch.cuda.is_available() and torch.cuda.device_count() > 0 else "cpu"
+                "cuda"
+                if torch.cuda.is_available() and torch.cuda.device_count() > 0
+                else "cpu"
             )
-    
+
     @property
     def feval(self) -> _typing.Sequence[_typing.Type[Evaluation]]:
         return self.__feval
-    
+
     @feval.setter
     def feval(
-            self, _feval: _typing.Union[
-                _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
-            ]
+        self,
+        _feval: _typing.Union[
+            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ],
     ):
         self.__feval: _typing.Sequence[_typing.Type[Evaluation]] = get_feval(_feval)
-    
+
     def to(self, device: torch.device):
         """
         Transfer the trainer to another device
@@ -168,7 +170,9 @@ class BaseTrainer:
         """Get auto model used in trainer."""
         raise NotImplementedError()
 
-    def get_feval(self, return_major: bool = False) -> _typing.Union[
+    def get_feval(
+        self, return_major: bool = False
+    ) -> _typing.Union[
         _typing.Type[Evaluation], _typing.Sequence[_typing.Type[Evaluation]]
     ]:
         """
@@ -212,7 +216,7 @@ class BaseTrainer:
         pass
 
     def duplicate_from_hyper_parameter(
-            self, hp, model: _typing.Optional[BaseModel] = ...
+        self, hp, model: _typing.Optional[BaseModel] = ...
     ) -> "BaseTrainer":
         """Create a new trainer with the given hyper parameter."""
         raise NotImplementedError()
@@ -322,30 +326,30 @@ class BaseTrainer:
 
 class _BaseClassificationTrainer(BaseTrainer):
     """ Base class of trainer for classification tasks """
-    
+
     def __init__(
-            self,
-            model: _typing.Union[BaseModel, str],
-            num_features: int,
-            num_classes: int,
-            device: _typing.Union[torch.device, str, None] = "auto",
-            init: bool = True,
-            feval: _typing.Union[
-                _typing.Sequence[str],
-                _typing.Sequence[_typing.Type[Evaluation]]
-            ] = (Acc,),
-            loss: str = "nll_loss",
+        self,
+        model: _typing.Union[BaseModel, str],
+        num_features: int,
+        num_classes: int,
+        device: _typing.Union[torch.device, str, None] = "auto",
+        init: bool = True,
+        feval: _typing.Union[
+            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = (Acc,),
+        loss: str = "nll_loss",
     ):
         self.num_features: int = num_features
         self.num_classes: int = num_classes
-        if (
-                type(device) == torch.device or
-                (type(device) == str and device.lower() != "auto")
+        if type(device) == torch.device or (
+            type(device) == str and device.lower() != "auto"
         ):
             __device: torch.device = torch.device(device)
         else:
             __device: torch.device = torch.device(
-                "cuda" if torch.cuda.is_available() and torch.cuda.device_count() > 0 else "cpu"
+                "cuda"
+                if torch.cuda.is_available() and torch.cuda.device_count() > 0
+                else "cpu"
             )
         if type(model) == str:
             _model: BaseModel = ModelUniversalRegistry.get_model(model)(
@@ -357,22 +361,23 @@ class _BaseClassificationTrainer(BaseTrainer):
             raise TypeError(
                 f"Model argument only support str or BaseModel, got ${model}."
             )
-        super(_BaseClassificationTrainer, self).__init__(_model, __device, init, feval, loss)
+        super(_BaseClassificationTrainer, self).__init__(
+            _model, __device, init, feval, loss
+        )
 
 
 class BaseNodeClassificationTrainer(_BaseClassificationTrainer):
     def __init__(
-            self,
-            model: _typing.Union[BaseModel, str],
-            num_features: int,
-            num_classes: int,
-            device: _typing.Union[torch.device, str, None] = None,
-            init: bool = True,
-            feval: _typing.Union[
-                _typing.Sequence[str],
-                _typing.Sequence[_typing.Type[Evaluation]]
-            ] = (Acc,),
-            loss: str = "nll_loss",
+        self,
+        model: _typing.Union[BaseModel, str],
+        num_features: int,
+        num_classes: int,
+        device: _typing.Union[torch.device, str, None] = None,
+        init: bool = True,
+        feval: _typing.Union[
+            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = (Acc,),
+        loss: str = "nll_loss",
     ):
         super(BaseNodeClassificationTrainer, self).__init__(
             model, num_features, num_classes, device, init, feval, loss
@@ -381,18 +386,17 @@ class BaseNodeClassificationTrainer(_BaseClassificationTrainer):
 
 class BaseGraphClassificationTrainer(_BaseClassificationTrainer):
     def __init__(
-            self,
-            model: _typing.Union[BaseModel, str],
-            num_features: int,
-            num_classes: int,
-            num_graph_features: int = 0,
-            device: _typing.Union[torch.device, str, None] = None,
-            init: bool = True,
-            feval: _typing.Union[
-                _typing.Sequence[str],
-                _typing.Sequence[_typing.Type[Evaluation]]
-            ] = (Acc,),
-            loss: str = "nll_loss",
+        self,
+        model: _typing.Union[BaseModel, str],
+        num_features: int,
+        num_classes: int,
+        num_graph_features: int = 0,
+        device: _typing.Union[torch.device, str, None] = None,
+        init: bool = True,
+        feval: _typing.Union[
+            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = (Acc,),
+        loss: str = "nll_loss",
     ):
         self.num_graph_features: int = num_graph_features
         super(BaseGraphClassificationTrainer, self).__init__(
diff --git a/autogl/module/train/evaluation.py b/autogl/module/train/evaluation.py
index 63fd81a..c3ed320 100644
--- a/autogl/module/train/evaluation.py
+++ b/autogl/module/train/evaluation.py
@@ -13,12 +13,12 @@ class Evaluation:
     def get_eval_name() -> str:
         """ Expected to return the name of this evaluation method """
         raise NotImplementedError
-    
+
     @staticmethod
     def is_higher_better() -> bool:
         """ Expected to return whether this evaluation method is higher better (bool) """
         return True
-    
+
     @staticmethod
     def evaluate(predict, label) -> float:
         """ Expected to return the evaluation result (float) """
@@ -39,6 +39,7 @@ def register_evaluate(*name):
                 )
             EVALUATE_DICT[n] = cls
         return cls
+
     return register_evaluate_cls
 
 
@@ -54,22 +55,26 @@ def get_feval(feval):
 
 class EvaluationUniversalRegistry:
     @classmethod
-    def register_evaluation(cls, *names) -> _typing.Callable[
-        [_typing.Type[Evaluation]], _typing.Type[Evaluation]
-    ]:
+    def register_evaluation(
+        cls, *names
+    ) -> _typing.Callable[[_typing.Type[Evaluation]], _typing.Type[Evaluation]]:
         def _register_evaluation(
-                _class: _typing.Type[Evaluation]
+            _class: _typing.Type[Evaluation],
         ) -> _typing.Type[Evaluation]:
             for n in names:
                 if n in EVALUATE_DICT:
-                    raise ValueError("Cannot register duplicate evaluator ({})".format(n))
+                    raise ValueError(
+                        "Cannot register duplicate evaluator ({})".format(n)
+                    )
                 if not issubclass(_class, Evaluation):
                     raise ValueError(
-                        "Evaluator ({}: {}) must extend Evaluation".format(n, cls.__name__)
+                        "Evaluator ({}: {}) must extend Evaluation".format(
+                            n, cls.__name__
+                        )
                     )
                 EVALUATE_DICT[n] = _class
             return _class
-        
+
         return _register_evaluation
 
 
diff --git a/autogl/module/train/graph_classification_full.py b/autogl/module/train/graph_classification_full.py
index a04bf18..f6b32ae 100644
--- a/autogl/module/train/graph_classification_full.py
+++ b/autogl/module/train/graph_classification_full.py
@@ -100,7 +100,7 @@ class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
         self.batch_size = batch_size if batch_size is not None else 64
         self.num_workers = num_workers if num_workers is not None else 4
         if self.num_workers > 0:
-            mp.set_start_method('fork', force=True)
+            mp.set_start_method("fork", force=True)
         self.early_stopping_round = (
             early_stopping_round if early_stopping_round is not None else 100
         )
@@ -305,10 +305,10 @@ class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
 
         """
         train_loader = utils.graph_get_split(
-            dataset, "train", batch_size=self.batch_size, num_workers = self.num_workers
+            dataset, "train", batch_size=self.batch_size, num_workers=self.num_workers
         )  # DataLoader(dataset['train'], batch_size=self.batch_size)
         valid_loader = utils.graph_get_split(
-            dataset, "val", batch_size=self.batch_size, num_workers = self.num_workers
+            dataset, "val", batch_size=self.batch_size, num_workers=self.num_workers
         )  # DataLoader(dataset['val'], batch_size=self.batch_size)
         self.train_only(train_loader, valid_loader)
         if keep_valid_result and valid_loader:
@@ -332,7 +332,9 @@ class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
         -------
         The prediction result of ``predict_proba``.
         """
-        loader = utils.graph_get_split(dataset, mask, batch_size=self.batch_size, num_workers = self.num_workers)
+        loader = utils.graph_get_split(
+            dataset, mask, batch_size=self.batch_size, num_workers=self.num_workers
+        )
         return self._predict_proba(loader, in_log_format=True).max(1)[1]
 
     def predict_proba(self, dataset, mask="test", in_log_format=False):
@@ -353,7 +355,9 @@ class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
         -------
         The prediction result.
         """
-        loader = utils.graph_get_split(dataset, mask, batch_size=self.batch_size, num_workers = self.num_workers)
+        loader = utils.graph_get_split(
+            dataset, mask, batch_size=self.batch_size, num_workers=self.num_workers
+        )
         return self._predict_proba(loader, in_log_format)
 
     def _predict_proba(self, loader, in_log_format=False):
@@ -436,7 +440,9 @@ class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
         res: The evaluation result on the given dataset.
 
         """
-        loader = utils.graph_get_split(dataset, mask, batch_size=self.batch_size, num_workers = self.num_workers)
+        loader = utils.graph_get_split(
+            dataset, mask, batch_size=self.batch_size, num_workers=self.num_workers
+        )
         return self._evaluate(loader, feval)
 
     def _evaluate(self, loader, feval=None):
diff --git a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
index 542b03a..cf91fc6 100644
--- a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
+++ b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
@@ -21,79 +21,90 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
     for automatically training the node classification tasks
     with neighbour sampling
     """
-    
+
     def __init__(
-            self,
-            model: _typing.Union[BaseModel, str],
-            num_features: int,
-            num_classes: int,
-            optimizer: _typing.Union[
-                _typing.Type[torch.optim.Optimizer], str, None
-            ] = None,
-            lr: float = 1e-4,
-            max_epoch: int = 100,
-            early_stopping_round: int = 100,
-            weight_decay: float = 1e-4,
-            device: _typing.Optional[torch.device] = None,
-            init: bool = True,
-            feval: _typing.Union[
-                _typing.Sequence[str],
-                _typing.Sequence[_typing.Type[Evaluation]]
-            ] = (Logloss,),
-            loss: str = "nll_loss",
-            lr_scheduler_type: _typing.Optional[str] = None,
-            **kwargs
+        self,
+        model: _typing.Union[BaseModel, str],
+        num_features: int,
+        num_classes: int,
+        optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None] = None,
+        lr: float = 1e-4,
+        max_epoch: int = 100,
+        early_stopping_round: int = 100,
+        weight_decay: float = 1e-4,
+        device: _typing.Optional[torch.device] = None,
+        init: bool = True,
+        feval: _typing.Union[
+            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = (Logloss,),
+        loss: str = "nll_loss",
+        lr_scheduler_type: _typing.Optional[str] = None,
+        **kwargs,
     ) -> None:
         if isinstance(optimizer, type) and issubclass(optimizer, torch.optim.Optimizer):
             self._optimizer_class: _typing.Type[torch.optim.Optimizer] = optimizer
         elif type(optimizer) == str:
             if optimizer.lower() == "adam":
-                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.Adam
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.Adam
             elif optimizer.lower() == "adam" + "w":
-                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.AdamW
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.AdamW
             elif optimizer.lower() == "sgd":
-                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.SGD
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.SGD
             else:
-                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.Adam
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.Adam
         else:
-            self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.Adam
-        
+            self._optimizer_class: _typing.Type[
+                torch.optim.Optimizer
+            ] = torch.optim.Adam
+
         self._learning_rate: float = lr if lr > 0 else 1e-4
         self._lr_scheduler_type: _typing.Optional[str] = lr_scheduler_type
         self._max_epoch: int = max_epoch if max_epoch > 0 else 1e2
-        
+
         self.__sampling_sizes: _typing.Sequence[int] = kwargs.get("sampling_sizes")
-        
+
         self._weight_decay: float = weight_decay if weight_decay > 0 else 1e-4
-        early_stopping_round: int = early_stopping_round if early_stopping_round > 0 else 1e2
-        self._early_stopping = EarlyStopping(patience=early_stopping_round, verbose=False)
+        early_stopping_round: int = (
+            early_stopping_round if early_stopping_round > 0 else 1e2
+        )
+        self._early_stopping = EarlyStopping(
+            patience=early_stopping_round, verbose=False
+        )
         super(NodeClassificationNeighborSamplingTrainer, self).__init__(
             model, num_features, num_classes, device, init, feval, loss
         )
-        
+
         self._valid_result: torch.Tensor = torch.zeros(0)
         self._valid_result_prob: torch.Tensor = torch.zeros(0)
         self._valid_score: _typing.Sequence[float] = []
-        
-        self._hyper_parameter_space: _typing.Sequence[_typing.Dict[str, _typing.Any]] = []
-        
+
+        self._hyper_parameter_space: _typing.Sequence[
+            _typing.Dict[str, _typing.Any]
+        ] = []
+
         self.__initialized: bool = False
         if init:
             self.initialize()
-    
+
     def initialize(self) -> "NodeClassificationNeighborSamplingTrainer":
         if self.__initialized:
             return self
         self.model.initialize()
         self.__initialized = True
         return self
-    
+
     def get_model(self) -> BaseModel:
         return self.model
-    
-    def __train_only(
-            self, data
-    ) -> "NodeClassificationNeighborSamplingTrainer":
+
+    def __train_only(self, data) -> "NodeClassificationNeighborSamplingTrainer":
         """
         The function of training on the given dataset and mask.
         :param data: data of a specific graph
@@ -102,38 +113,41 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         data = data.to(self.device)
         optimizer: torch.optim.Optimizer = self._optimizer_class(
             self.model.model.parameters(),
-            lr=self._learning_rate, weight_decay=self._weight_decay
+            lr=self._learning_rate,
+            weight_decay=self._weight_decay,
         )
         if type(self._lr_scheduler_type) == str:
             if self._lr_scheduler_type.lower() == "step" + "lr":
-                lr_scheduler: torch.optim.lr_scheduler.StepLR = \
-                    torch.optim.lr_scheduler.StepLR(
-                        optimizer, step_size=100, gamma=0.1
-                    )
+                lr_scheduler: torch.optim.lr_scheduler.StepLR = (
+                    torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1)
+                )
             elif self._lr_scheduler_type.lower() == "multi" + "step" + "lr":
-                lr_scheduler: torch.optim.lr_scheduler.MultiStepLR = \
+                lr_scheduler: torch.optim.lr_scheduler.MultiStepLR = (
                     torch.optim.lr_scheduler.MultiStepLR(
                         optimizer, milestones=[30, 80], gamma=0.1
                     )
+                )
             elif self._lr_scheduler_type.lower() == "exponential" + "lr":
-                lr_scheduler: torch.optim.lr_scheduler.ExponentialLR = \
-                    torch.optim.lr_scheduler.ExponentialLR(
-                        optimizer, gamma=0.1
-                    )
+                lr_scheduler: torch.optim.lr_scheduler.ExponentialLR = (
+                    torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1)
+                )
             elif self._lr_scheduler_type.lower() == "ReduceLROnPlateau".lower():
-                lr_scheduler: torch.optim.lr_scheduler.ReduceLROnPlateau = \
+                lr_scheduler: torch.optim.lr_scheduler.ReduceLROnPlateau = (
                     torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min")
+                )
             else:
-                lr_scheduler: torch.optim.lr_scheduler.LambdaLR = \
+                lr_scheduler: torch.optim.lr_scheduler.LambdaLR = (
                     torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
+                )
         else:
-            lr_scheduler: torch.optim.lr_scheduler.LambdaLR = \
+            lr_scheduler: torch.optim.lr_scheduler.LambdaLR = (
                 torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
-        
+            )
+
         train_sampler: NeighborSampler = NeighborSampler(
             data, self.__sampling_sizes, batch_size=20
         )
-        
+
         for current_epoch in range(self._max_epoch):
             self.model.model.train()
             """ epoch start """
@@ -147,20 +161,20 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
                     )
                 loss_function = getattr(torch.nn.functional, self.loss)
                 loss: torch.Tensor = loss_function(
-                    prediction[target_node_indexes],
-                    data.y[target_node_indexes]
+                    prediction[target_node_indexes], data.y[target_node_indexes]
                 )
                 loss.backward()
                 optimizer.step()
-            
+
             if lr_scheduler is not None:
                 lr_scheduler.step()
-            
+
             """ Validate performance """
             if hasattr(data, "val_mask") and getattr(data, "val_mask") is not None:
-                validation_results: _typing.Sequence[float] = \
-                    self.evaluate((data,), "val", [self.feval[0]])
-                
+                validation_results: _typing.Sequence[float] = self.evaluate(
+                    (data,), "val", [self.feval[0]]
+                )
+
                 if self.feval[0].is_higher_better():
                     validation_loss: float = -validation_results[0]
                 else:
@@ -172,7 +186,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         if hasattr(data, "val_mask") and data.val_mask is not None:
             self._early_stopping.load_checkpoint(self.model.model)
         return self
-    
+
     def __predict_only(self, data):
         """
         The function of predicting on the given data.
@@ -184,7 +198,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         with torch.no_grad():
             prediction = self.model.model(data)
         return prediction
-    
+
     def train(self, dataset, keep_valid_result: bool = True):
         """
         The function of training on the given dataset and keeping valid result.
@@ -198,10 +212,9 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             self._valid_result: torch.Tensor = prediction[data.val_mask].max(1)[1]
             self._valid_result_prob: torch.Tensor = prediction[data.val_mask]
             self._valid_score = self.evaluate(dataset, "val")
-    
+
     def predict_proba(
-            self, dataset, mask: _typing.Optional[str] = None,
-            in_log_format: bool = False
+        self, dataset, mask: _typing.Optional[str] = None, in_log_format: bool = False
     ) -> torch.Tensor:
         """
         The function of predicting the probability on the given dataset.
@@ -224,29 +237,22 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             _mask = data.test_mask
         result = self.__predict_only(data)[_mask]
         return result if in_log_format else torch.exp(result)
-    
+
     def predict(self, dataset, mask: _typing.Optional[str] = None) -> torch.Tensor:
-        return self.predict_proba(
-            dataset, mask, in_log_format=True
-        ).max(1)[1]
-    
+        return self.predict_proba(dataset, mask, in_log_format=True).max(1)[1]
+
     def get_valid_predict(self) -> torch.Tensor:
         return self._valid_result
-    
+
     def get_valid_predict_proba(self) -> torch.Tensor:
         return self._valid_result_prob
-    
+
     def get_valid_score(self, return_major: bool = True):
         if return_major:
-            return (
-                self._valid_score[0],
-                self.feval[0].is_higher_better()
-            )
+            return (self._valid_score[0], self.feval[0].is_higher_better())
         else:
-            return (
-                self._valid_score, [f.is_higher_better() for f in self.feval]
-            )
-        
+            return (self._valid_score, [f.is_higher_better() for f in self.feval])
+
     def get_name_with_hp(self) -> str:
         name = "-".join(
             [
@@ -259,25 +265,24 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             ]
         )
         name = (
-                name
-                + "|"
-                + "-".join(
-                    [
-                        str(x[0]) + "-" + str(x[1])
-                        for x in self.model.get_hyper_parameter().items()
-                    ]
-                )
+            name
+            + "|"
+            + "-".join(
+                [
+                    str(x[0]) + "-" + str(x[1])
+                    for x in self.model.get_hyper_parameter().items()
+                ]
+            )
         )
         return name
-    
+
     def evaluate(
-            self,
-            dataset,
-            mask: _typing.Optional[str] = None,
-            feval: _typing.Union[
-                None, _typing.Sequence[str],
-                _typing.Sequence[_typing.Type[Evaluation]]
-            ] = None
+        self,
+        dataset,
+        mask: _typing.Optional[str] = None,
+        feval: _typing.Union[
+            None, _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = None,
     ) -> _typing.Sequence[float]:
         data = dataset[0]
         data = data.to(self.device)
@@ -295,53 +300,60 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             _mask = data.test_mask
         prediction_probability: torch.Tensor = self.predict_proba(dataset, mask)
         y_ground_truth = data.y[_mask]
-        
+
         results = []
         for f in _feval:
             try:
-                results.append(
-                    f.evaluate(prediction_probability, y_ground_truth)
-                )
+                results.append(f.evaluate(prediction_probability, y_ground_truth))
             except:
                 results.append(
-                    f.evaluate(prediction_probability.cpu().numpy(), y_ground_truth.cpu().numpy())
+                    f.evaluate(
+                        prediction_probability.cpu().numpy(),
+                        y_ground_truth.cpu().numpy(),
+                    )
                 )
         return results
-    
+
     def to(self, device: torch.device):
         self.device = device
         if self.model is not None:
             self.model.to(self.device)
-    
+
     def duplicate_from_hyper_parameter(
-            self, hp: _typing.Dict[str, _typing.Any],
-            model: _typing.Union[BaseModel, str, None] = None
+        self,
+        hp: _typing.Dict[str, _typing.Any],
+        model: _typing.Union[BaseModel, str, None] = None,
     ) -> "NodeClassificationNeighborSamplingTrainer":
-        
+
         if model is None or not isinstance(model, BaseModel):
             model = self.model
         model = model.from_hyper_parameter(
             dict(
                 [
-                    x for x in hp.items()
+                    x
+                    for x in hp.items()
                     if x[0] in [y["parameterName"] for y in model.hyper_parameter_space]
                 ]
             )
         )
-        
+
         return NodeClassificationNeighborSamplingTrainer(
-            model, self.num_features, self.num_classes,
+            model,
+            self.num_features,
+            self.num_classes,
             self._optimizer_class,
-            device=self.device, init=True,
-            feval=self.feval, loss=self.loss,
+            device=self.device,
+            init=True,
+            feval=self.feval,
+            loss=self.loss,
             lr_scheduler_type=self._lr_scheduler_type,
-            **hp
+            **hp,
         )
-    
+
     @property
     def hyper_parameter_space(self):
         return self._hyper_parameter_space
-    
+
     @hyper_parameter_space.setter
     def hyper_parameter_space(self, hp_space):
         self._hyper_parameter_space = hp_space
@@ -350,50 +362,63 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
 @register_trainer("NodeClassificationGraphSAINTTrainer")
 class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
     def __init__(
-            self,
-            model: _typing.Union[BaseModel],
-            num_features: int,
-            num_classes: int,
-            optimizer: _typing.Union[
-                _typing.Type[torch.optim.Optimizer], str, None
-            ],
-            lr: float = 1e-4,
-            max_epoch: int = 100,
-            early_stopping_round: int = 100,
-            weight_decay: float = 1e-4,
-            device: _typing.Optional[torch.device] = None,
-            init: bool = True,
-            feval: _typing.Union[
-                _typing.Sequence[str],
-                _typing.Sequence[_typing.Type[Evaluation]]
-            ] = (Logloss,),
-            loss: str = "nll_loss",
-            lr_scheduler_type: _typing.Optional[str] = None,
-            **kwargs
+        self,
+        model: _typing.Union[BaseModel],
+        num_features: int,
+        num_classes: int,
+        optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None],
+        lr: float = 1e-4,
+        max_epoch: int = 100,
+        early_stopping_round: int = 100,
+        weight_decay: float = 1e-4,
+        device: _typing.Optional[torch.device] = None,
+        init: bool = True,
+        feval: _typing.Union[
+            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = (Logloss,),
+        loss: str = "nll_loss",
+        lr_scheduler_type: _typing.Optional[str] = None,
+        **kwargs,
     ) -> None:
         if isinstance(optimizer, type) and issubclass(optimizer, torch.optim.Optimizer):
             self._optimizer_class: _typing.Type[torch.optim.Optimizer] = optimizer
         elif type(optimizer) == str:
             if optimizer.lower() == "adam":
-                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.Adam
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.Adam
             elif optimizer.lower() == "adam" + "w":
-                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.AdamW
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.AdamW
             elif optimizer.lower() == "sgd":
-                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.SGD
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.SGD
             else:
-                self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.Adam
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.Adam
         else:
-            self._optimizer_class: _typing.Type[torch.optim.Optimizer] = torch.optim.Adam
+            self._optimizer_class: _typing.Type[
+                torch.optim.Optimizer
+            ] = torch.optim.Adam
         self._learning_rate: float = lr if lr > 0 else 1e-4
         self._lr_scheduler_type: _typing.Optional[str] = lr_scheduler_type
         self._max_epoch: int = max_epoch if max_epoch > 0 else 1e2
         self._weight_decay: float = weight_decay if weight_decay > 0 else 1e-4
-        early_stopping_round: int = early_stopping_round if early_stopping_round > 0 else 1e2
-        self._early_stopping = EarlyStopping(patience=early_stopping_round, verbose=False)
-        
+        early_stopping_round: int = (
+            early_stopping_round if early_stopping_round > 0 else 1e2
+        )
+        self._early_stopping = EarlyStopping(
+            patience=early_stopping_round, verbose=False
+        )
+
         # Assign an empty initial hyper parameter space
-        self._hyper_parameter_space: _typing.Sequence[_typing.Dict[str, _typing.Any]] = []
-        
+        self._hyper_parameter_space: _typing.Sequence[
+            _typing.Dict[str, _typing.Any]
+        ] = []
+
         self._valid_result: torch.Tensor = torch.zeros(0)
         self._valid_result_prob: torch.Tensor = torch.zeros(0)
         self._valid_score: _typing.Sequence[float] = ()
@@ -401,7 +426,7 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         super(NodeClassificationGraphSAINTTrainer, self).__init__(
             model, num_features, num_classes, device, init, feval, loss
         )
-        
+
         """ Set hyper parameters """
         if "num_subgraphs" not in kwargs:
             raise KeyError
@@ -427,23 +452,23 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
             self.__sampling_method_identifier: str = kwargs.get("sampling_method")
             if self.__sampling_method_identifier.lower() not in ("node", "edge"):
                 self.__sampling_method_identifier: str = "node"
-        
+
         self.__is_initialized: bool = False
         if init:
             self.initialize()
-    
+
     def initialize(self):
         if self.__is_initialized:
             return self
         self.model.initialize()
         self.__is_initialized = True
         return self
-    
+
     def to(self, device: torch.device):
         self.device = device
         if self.model is not None:
             self.model.to(self.device)
-    
+
     def get_model(self):
         return self.model
 
@@ -456,34 +481,37 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         data = data.to(self.device)
         optimizer: torch.optim.Optimizer = self._optimizer_class(
             self.model.parameters(),
-            lr=self._learning_rate, weight_decay=self._weight_decay
+            lr=self._learning_rate,
+            weight_decay=self._weight_decay,
         )
         if type(self._lr_scheduler_type) == str:
             if self._lr_scheduler_type.lower() == "step" + "lr":
-                lr_scheduler: torch.optim.lr_scheduler.StepLR = \
-                    torch.optim.lr_scheduler.StepLR(
-                        optimizer, step_size=100, gamma=0.1
-                    )
+                lr_scheduler: torch.optim.lr_scheduler.StepLR = (
+                    torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1)
+                )
             elif self._lr_scheduler_type.lower() == "multi" + "step" + "lr":
-                lr_scheduler: torch.optim.lr_scheduler.MultiStepLR = \
+                lr_scheduler: torch.optim.lr_scheduler.MultiStepLR = (
                     torch.optim.lr_scheduler.MultiStepLR(
                         optimizer, milestones=[30, 80], gamma=0.1
                     )
+                )
             elif self._lr_scheduler_type.lower() == "exponential" + "lr":
-                lr_scheduler: torch.optim.lr_scheduler.ExponentialLR = \
-                    torch.optim.lr_scheduler.ExponentialLR(
-                        optimizer, gamma=0.1
-                    )
+                lr_scheduler: torch.optim.lr_scheduler.ExponentialLR = (
+                    torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1)
+                )
             elif self._lr_scheduler_type.lower() == "ReduceLROnPlateau".lower():
-                lr_scheduler: torch.optim.lr_scheduler.ReduceLROnPlateau = \
+                lr_scheduler: torch.optim.lr_scheduler.ReduceLROnPlateau = (
                     torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min")
+                )
             else:
-                lr_scheduler: torch.optim.lr_scheduler.LambdaLR = \
+                lr_scheduler: torch.optim.lr_scheduler.LambdaLR = (
                     torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
+                )
         else:
-            lr_scheduler: torch.optim.lr_scheduler.LambdaLR = \
+            lr_scheduler: torch.optim.lr_scheduler.LambdaLR = (
                 torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
-        
+            )
+
         if self.__sampling_method_identifier.lower() == "edge":
             sub_graph_sampler = GraphSAINTRandomEdgeSampler(
                 self.__sampling_budget, self.__num_subgraphs
@@ -492,60 +520,58 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
             sub_graph_sampler = GraphSAINTRandomNodeSampler(
                 self.__sampling_budget, self.__num_subgraphs
             )
-        
+
         for current_epoch in range(self._max_epoch):
             self.model.model.train()
             """ epoch start """
             """ Sample sub-graphs """
             sub_graph_set = sub_graph_sampler.sample(data)
-            sub_graphs_loader: torch.utils.data.DataLoader = \
+            sub_graphs_loader: torch.utils.data.DataLoader = (
                 torch.utils.data.DataLoader(sub_graph_set)
+            )
             integral_alpha: torch.Tensor = getattr(sub_graph_set, "alpha")
             integral_lambda: torch.Tensor = getattr(sub_graph_set, "lambda")
             """ iterate sub-graphs """
             for sub_graph_data in sub_graphs_loader:
                 optimizer.zero_grad()
-                sampled_edge_indexes: torch.Tensor = \
-                    sub_graph_data.sampled_edge_indexes
-                sampled_node_indexes: torch.Tensor = \
-                    sub_graph_data.sampled_node_indexes
-                sampled_train_mask: torch.Tensor = \
-                    sub_graph_data.train_mask
-                
+                sampled_edge_indexes: torch.Tensor = sub_graph_data.sampled_edge_indexes
+                sampled_node_indexes: torch.Tensor = sub_graph_data.sampled_node_indexes
+                sampled_train_mask: torch.Tensor = sub_graph_data.train_mask
+
                 sampled_alpha = integral_alpha[sampled_edge_indexes]
                 sub_graph_data.edge_weight = 1 / sampled_alpha
 
                 prediction: torch.Tensor = self.model.model(sub_graph_data)
-                
+
                 if not hasattr(torch.nn.functional, self.loss):
-                    raise TypeError(
-                        f"PyTorch does not support loss type {self.loss}"
-                    )
+                    raise TypeError(f"PyTorch does not support loss type {self.loss}")
                 loss_func = getattr(torch.nn.functional, self.loss)
                 unreduced_loss: torch.Tensor = loss_func(
                     prediction[sampled_train_mask],
                     data.y[sampled_train_mask],
-                    reduction="none"
+                    reduction="none",
                 )
-                
+
                 sampled_lambda: torch.Tensor = integral_lambda[sampled_node_indexes]
                 sampled_train_lambda: torch.Tensor = sampled_lambda[sampled_train_mask]
                 assert unreduced_loss.size() == sampled_train_lambda.size()
-                loss_weighted_sum: torch.Tensor = \
-                    torch.sum(unreduced_loss / sampled_train_lambda)
+                loss_weighted_sum: torch.Tensor = torch.sum(
+                    unreduced_loss / sampled_train_lambda
+                )
                 loss_weighted_sum.backward()
                 optimizer.step()
-            
+
             if lr_scheduler is not None:
                 lr_scheduler.step()
-            
+
             """ Validate performance """
             if (
-                    hasattr(data, "val_mask") and
-                    type(getattr(data, "val_mask")) == torch.Tensor
+                hasattr(data, "val_mask")
+                and type(getattr(data, "val_mask")) == torch.Tensor
             ):
-                validation_results: _typing.Sequence[float] = \
-                    self.evaluate((data,), "val", [self.feval[0]])
+                validation_results: _typing.Sequence[float] = self.evaluate(
+                    (data,), "val", [self.feval[0]]
+                )
                 if self.feval[0].is_higher_better():
                     validation_loss: float = -validation_results[0]
                 else:
@@ -557,7 +583,7 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         if hasattr(data, "val_mask") and data.val_mask is not None:
             self._early_stopping.load_checkpoint(self.model.model)
         return self
-        
+
     def __predict_only(self, data):
         """
         The function of predicting on the given data.
@@ -569,10 +595,9 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         with torch.no_grad():
             predicted_x: torch.Tensor = self.model.model(data)
         return predicted_x
-    
+
     def predict_proba(
-            self, dataset, mask: _typing.Optional[str] = None,
-            in_log_format=False
+        self, dataset, mask: _typing.Optional[str] = None, in_log_format=False
     ):
         """
         The function of predicting the probability on the given dataset.
@@ -595,17 +620,17 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
             _mask: torch.Tensor = data.test_mask
         result = self.__predict_only(data)[_mask]
         return result if in_log_format else torch.exp(result)
-    
+
     def predict(self, dataset, mask: _typing.Optional[str] = None) -> torch.Tensor:
         return self.predict_proba(dataset, mask, in_log_format=True).max(1)[1]
-    
+
     def evaluate(
-            self, dataset,
-            mask: _typing.Optional[str] = None,
-            feval: _typing.Union[
-                None, _typing.Sequence[str],
-                _typing.Sequence[_typing.Type[Evaluation]]
-            ] = None
+        self,
+        dataset,
+        mask: _typing.Optional[str] = None,
+        feval: _typing.Union[
+            None, _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = None,
     ) -> _typing.Sequence[float]:
         data = dataset[0]
         data = data.to(self.device)
@@ -624,24 +649,22 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
                 _mask: torch.Tensor = data.test_mask
         else:
             _mask: torch.Tensor = data.test_mask
-        prediction_probability: torch.Tensor = \
-            self.predict_proba(dataset, mask)
+        prediction_probability: torch.Tensor = self.predict_proba(dataset, mask)
         y_ground_truth: torch.Tensor = data.y[_mask]
-        
+
         eval_results = []
         for f in _feval:
             try:
-                eval_results.append(
-                    f.evaluate(prediction_probability, y_ground_truth)
-                )
+                eval_results.append(f.evaluate(prediction_probability, y_ground_truth))
             except:
                 eval_results.append(
                     f.evaluate(
-                        prediction_probability.cpu().numpy(), y_ground_truth.cpu().numpy()
+                        prediction_probability.cpu().numpy(),
+                        y_ground_truth.cpu().numpy(),
                     )
                 )
         return eval_results
-    
+
     def train(self, dataset, keep_valid_result: bool = True):
         """
         The function of training on the given dataset and keeping valid result.
@@ -655,36 +678,36 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
             self._valid_result: torch.Tensor = prediction[data.val_mask].max(1)[1]
             self._valid_result_prob: torch.Tensor = prediction[data.val_mask]
             self._valid_score: _typing.Sequence[float] = self.evaluate(dataset, "val")
-    
+
     def get_valid_predict(self) -> torch.Tensor:
         return self._valid_result
-    
+
     def get_valid_predict_proba(self) -> torch.Tensor:
         return self._valid_result_prob
-    
-    def get_valid_score(self, return_major: bool = True) -> _typing.Tuple[
+
+    def get_valid_score(
+        self, return_major: bool = True
+    ) -> _typing.Tuple[
         _typing.Union[float, _typing.Sequence[float]],
-        _typing.Union[bool, _typing.Sequence[bool]]
+        _typing.Union[bool, _typing.Sequence[bool]],
     ]:
         if return_major:
             return self._valid_score[0], self.feval[0].is_higher_better()
         else:
-            return (
-                self._valid_score, [f.is_higher_better() for f in self.feval]
-            )
-    
+            return (self._valid_score, [f.is_higher_better() for f in self.feval])
+
     @property
     def hyper_parameter_space(self) -> _typing.Sequence[_typing.Dict[str, _typing.Any]]:
         return self._hyper_parameter_space
-    
+
     @hyper_parameter_space.setter
     def hyper_parameter_space(
-            self, hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
+        self, hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
     ) -> None:
         if not isinstance(hp_space, _typing.Sequence):
             raise TypeError
         self._hyper_parameter_space = hp_space
-    
+
     def get_name_with_hp(self) -> str:
         name = "-".join(
             [
@@ -697,36 +720,42 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
             ]
         )
         name = (
-                name
-                + "|"
-                + "-".join(
-                    [
-                        str(x[0]) + "-" + str(x[1])
-                        for x in self.model.get_hyper_parameter().items()
-                    ]
-                )
+            name
+            + "|"
+            + "-".join(
+                [
+                    str(x[0]) + "-" + str(x[1])
+                    for x in self.model.get_hyper_parameter().items()
+                ]
+            )
         )
         return name
-    
+
     def duplicate_from_hyper_parameter(
-            self, hp: _typing.Dict[str, _typing.Any],
-            model: _typing.Optional[BaseModel] = None
+        self,
+        hp: _typing.Dict[str, _typing.Any],
+        model: _typing.Optional[BaseModel] = None,
     ) -> "NodeClassificationGraphSAINTTrainer":
         if model is None or not isinstance(model, BaseModel):
             model: BaseModel = self.model
         model = model.from_hyper_parameter(
             dict(
                 [
-                    x for x in hp.items()
+                    x
+                    for x in hp.items()
                     if x[0] in [y["parameterName"] for y in model.hyper_parameter_space]
                 ]
             )
         )
         return NodeClassificationGraphSAINTTrainer(
-            model, self.num_features, self.num_classes,
+            model,
+            self.num_features,
+            self.num_classes,
             self._optimizer_class,
-            device=self.device, init=True,
-            feval=self.feval, loss=self.loss,
+            device=self.device,
+            init=True,
+            feval=self.feval,
+            loss=self.loss,
             lr_scheduler_type=self._lr_scheduler_type,
-            **hp
+            **hp,
         )
diff --git a/autogl/module/train/sampling/sampler/graphsaint_sampler.py b/autogl/module/train/sampling/sampler/graphsaint_sampler.py
index 9c5c978..27434e2 100644
--- a/autogl/module/train/sampling/sampler/graphsaint_sampler.py
+++ b/autogl/module/train/sampling/sampler/graphsaint_sampler.py
@@ -10,10 +10,10 @@ class _SubGraphSet(torch.utils.data.Dataset):
         self.__remaining_args: _typing.Sequence[_typing.Any] = args
         for key, value in kwargs.items():
             setattr(self, key, value)
-    
+
     def __len__(self) -> int:
         return len(self.__graphs)
-    
+
     def __getitem__(self, index: int) -> _typing.Any:
         if not 0 <= index < len(self.__graphs):
             raise IndexError
@@ -22,8 +22,12 @@ class _SubGraphSet(torch.utils.data.Dataset):
 
 class _GraphSAINTSubGraphSampler:
     def __init__(
-            self, sampler_class: _typing.Type[torch_geometric.data.GraphSAINTSampler],
-            budget: int, num_graphs: int = 1, walk_length: int = 1, num_workers: int = 0
+        self,
+        sampler_class: _typing.Type[torch_geometric.data.GraphSAINTSampler],
+        budget: int,
+        num_graphs: int = 1,
+        walk_length: int = 1,
+        num_workers: int = 0,
     ):
         """
         :param sampler_class: class of torch_geometric.data.GraphSAINTSampler
@@ -40,7 +44,7 @@ class _GraphSAINTSubGraphSampler:
         self.__num_graphs: int = num_graphs
         self.__walk_length: int = walk_length
         self.__num_workers: int = num_workers if num_workers > 0 else 0
-    
+
     def sample(self, _integral_data) -> _SubGraphSet:
         """
         :param _integral_data: conventional data for an integral graph
@@ -49,18 +53,23 @@ class _GraphSAINTSubGraphSampler:
         data = copy.copy(_integral_data)
         data.sampled_node_indexes = torch.arange(data.num_nodes, dtype=torch.int64)
         data.sampled_edge_indexes = torch.arange(data.num_edges, dtype=torch.int64)
-        if type(self.__sampler_class) == torch_geometric.data.GraphSAINTRandomWalkSampler:
-            _sampler: torch_geometric.data.GraphSAINTRandomWalkSampler = \
+        if (
+            type(self.__sampler_class)
+            == torch_geometric.data.GraphSAINTRandomWalkSampler
+        ):
+            _sampler: torch_geometric.data.GraphSAINTRandomWalkSampler = (
                 torch_geometric.data.GraphSAINTRandomWalkSampler(
-                    data, self.__budget, self.__walk_length, self.__num_graphs,
-                    num_workers=self.__num_workers
+                    data,
+                    self.__budget,
+                    self.__walk_length,
+                    self.__num_graphs,
+                    num_workers=self.__num_workers,
                 )
+            )
         else:
-            _sampler: torch_geometric.data.GraphSAINTSampler = \
-                self.__sampler_class(
-                    data, self.__budget, self.__num_graphs,
-                    num_workers=self.__num_workers
-                )
+            _sampler: torch_geometric.data.GraphSAINTSampler = self.__sampler_class(
+                data, self.__budget, self.__num_graphs, num_workers=self.__num_workers
+            )
         """ Sample sub-graphs """
         datalist: list = [d for d in _sampler]
         """ Compute the normalization """
@@ -73,12 +82,16 @@ class _GraphSAINTSubGraphSampler:
             [sub_graph.sampled_edge_indexes for sub_graph in datalist]
         )
         for current_sampled_node_index in concatenated_sampled_nodes.unique():
-            node_sampled_count[current_sampled_node_index] = \
-                torch.where(concatenated_sampled_nodes == current_sampled_node_index)[0].size(0)
+            node_sampled_count[current_sampled_node_index] = torch.where(
+                concatenated_sampled_nodes == current_sampled_node_index
+            )[0].size(0)
         for current_sampled_edge_index in concatenated_sampled_edges.unique():
-            edge_sampled_count[current_sampled_edge_index] = \
-                torch.where(concatenated_sampled_edges == current_sampled_edge_index)[0].size(0)
-        _alpha: torch.Tensor = edge_sampled_count / node_sampled_count[data.edge_index[1]]
+            edge_sampled_count[current_sampled_edge_index] = torch.where(
+                concatenated_sampled_edges == current_sampled_edge_index
+            )[0].size(0)
+        _alpha: torch.Tensor = (
+            edge_sampled_count / node_sampled_count[data.edge_index[1]]
+        )
         _alpha[torch.isnan(_alpha) | torch.isinf(_alpha)] = 0
         _lambda: torch.Tensor = node_sampled_count / self.__num_graphs
         return _SubGraphSet(datalist, **{"alpha": _alpha, "lambda": _lambda})
@@ -101,5 +114,8 @@ class GraphSAINTRandomEdgeSampler(_GraphSAINTSubGraphSampler):
 class GraphSAINTRandomWalkSampler(_GraphSAINTSubGraphSampler):
     def __init__(self, edge_budget: int, num_graphs: int = 1, walk_length: int = 4):
         super(GraphSAINTRandomWalkSampler, self).__init__(
-            torch_geometric.data.GraphSAINTRandomWalkSampler, edge_budget, num_graphs, walk_length
+            torch_geometric.data.GraphSAINTRandomWalkSampler,
+            edge_budget,
+            num_graphs,
+            walk_length,
         )
diff --git a/autogl/module/train/sampling/sampler/neighbor_sampler.py b/autogl/module/train/sampling/sampler/neighbor_sampler.py
index 0e62a74..53a6b7c 100644
--- a/autogl/module/train/sampling/sampler/neighbor_sampler.py
+++ b/autogl/module/train/sampling/sampler/neighbor_sampler.py
@@ -9,37 +9,41 @@ class NeighborSampler(torch.utils.data.DataLoader, collections.Iterable):
     class _NodeIndexesDataset(torch.utils.data.Dataset):
         def __init__(self, node_indexes):
             self.__node_indexes: _typing.Sequence[int] = node_indexes
-        
+
         def __getitem__(self, index) -> int:
             if not 0 <= index < len(self.__node_indexes):
                 raise IndexError("Index out of range")
             else:
                 return self.__node_indexes[index]
-        
+
         def __len__(self) -> int:
             return len(self.__node_indexes)
-    
+
     def __init__(
-            self, data,
-            sampling_sizes: _typing.Sequence[int],
-            target_node_indexes: _typing.Optional[_typing.Sequence[int]] = None,
-            batch_size: _typing.Optional[int] = 1,
-            *args, **kwargs
+        self,
+        data,
+        sampling_sizes: _typing.Sequence[int],
+        target_node_indexes: _typing.Optional[_typing.Sequence[int]] = None,
+        batch_size: _typing.Optional[int] = 1,
+        *args,
+        **kwargs
     ):
         self._data = data
         self.__sampling_sizes: _typing.Sequence[int] = sampling_sizes
-        
+
         if not (
-                target_node_indexes is not None and
-                isinstance(target_node_indexes, _typing.Sequence)
+            target_node_indexes is not None
+            and isinstance(target_node_indexes, _typing.Sequence)
         ):
             if hasattr(data, "train_mask"):
-                target_node_indexes: _typing.Sequence[int] = \
-                    torch.where(getattr(data, "train_mask"))[0]
+                target_node_indexes: _typing.Sequence[int] = torch.where(
+                    getattr(data, "train_mask")
+                )[0]
             else:
-                target_node_indexes: _typing.Sequence[int] = \
-                    list(np.arange(0, data.x.shape[0]))
-        
+                target_node_indexes: _typing.Sequence[int] = list(
+                    np.arange(0, data.x.shape[0])
+                )
+
         self.__edge_index_map: _typing.Dict[
             int, _typing.Union[torch.Tensor, _typing.Sequence[int]]
         ] = {}
@@ -47,9 +51,11 @@ class NeighborSampler(torch.utils.data.DataLoader, collections.Iterable):
         super(NeighborSampler, self).__init__(
             self._NodeIndexesDataset(target_node_indexes),
             batch_size=batch_size if batch_size > 0 else 1,
-            collate_fn=self.__sample, *args, **kwargs
+            collate_fn=self.__sample,
+            *args,
+            **kwargs
         )
-    
+
     def __init_edge_index_map(self):
         self.__edge_index_map.clear()
         all_edge_index: torch.Tensor = getattr(self._data, "edge_index")
@@ -58,12 +64,12 @@ class NeighborSampler(torch.utils.data.DataLoader, collections.Iterable):
             self.__edge_index_map[target_node_index] = torch.where(
                 all_edge_index[1] == target_node_index
             )[0]
-    
+
     def __iter__(self):
         return super(NeighborSampler, self).__iter__()
-    
+
     def __sample(
-            self, target_nodes_indexes: _typing.List[int]
+        self, target_nodes_indexes: _typing.List[int]
     ) -> _typing.Tuple[torch.Tensor, _typing.List[torch.Tensor]]:
         """
         Sample a sub-graph with neighborhood sampling
@@ -71,14 +77,15 @@ class NeighborSampler(torch.utils.data.DataLoader, collections.Iterable):
         """
         original_edge_index: torch.Tensor = self._data.edge_index
         edges_indexes: _typing.List[torch.Tensor] = []
-        
+
         current_target_nodes_indexes: _typing.List[int] = target_nodes_indexes
         for current_sampling_size in self.__sampling_sizes:
             current_edge_index: _typing.Optional[torch.Tensor] = None
             for current_target_node_index in current_target_nodes_indexes:
                 if current_target_node_index in self.__edge_index_map:
-                    all_indexes: torch.Tensor = \
-                        self.__edge_index_map.get(current_target_node_index)
+                    all_indexes: torch.Tensor = self.__edge_index_map.get(
+                        current_target_node_index
+                    )
                 else:
                     all_indexes: torch.Tensor = torch.where(
                         original_edge_index[1] == current_target_node_index
@@ -89,25 +96,38 @@ class NeighborSampler(torch.utils.data.DataLoader, collections.Iterable):
                     )
                     if current_edge_index is not None:
                         current_edge_index: torch.Tensor = torch.cat(
-                            [current_edge_index, original_edge_index[:, sampled_indexes]], dim=1
+                            [
+                                current_edge_index,
+                                original_edge_index[:, sampled_indexes],
+                            ],
+                            dim=1,
                         )
                     else:
-                        current_edge_index: torch.Tensor = original_edge_index[:, sampled_indexes]
+                        current_edge_index: torch.Tensor = original_edge_index[
+                            :, sampled_indexes
+                        ]
                 else:
                     all_indexes_list = all_indexes.tolist()
                     random.shuffle(all_indexes_list)
-                    shuffled_indexes_list: _typing.List[int] = \
-                        all_indexes_list[0: current_sampling_size]
+                    shuffled_indexes_list: _typing.List[int] = all_indexes_list[
+                        0:current_sampling_size
+                    ]
                     if current_edge_index is not None:
                         current_edge_index: torch.Tensor = torch.cat(
-                            [current_edge_index, original_edge_index[:, shuffled_indexes_list]], dim=1
+                            [
+                                current_edge_index,
+                                original_edge_index[:, shuffled_indexes_list],
+                            ],
+                            dim=1,
                         )
                     else:
-                        current_edge_index: torch.Tensor = original_edge_index[:, shuffled_indexes_list]
+                        current_edge_index: torch.Tensor = original_edge_index[
+                            :, shuffled_indexes_list
+                        ]
             edges_indexes.append(current_edge_index)
-            
+
             if len(edges_indexes) < len(self.__sampling_sizes):
                 next_target_nodes_indexes: torch.Tensor = current_edge_index[0].unique()
                 current_target_nodes_indexes = next_target_nodes_indexes.tolist()
-        
+
         return torch.tensor(target_nodes_indexes), edges_indexes[::-1]
diff --git a/autogl/solver/classifier/node_classifier.py b/autogl/solver/classifier/node_classifier.py
index 1d41d1a..cd0ed86 100644
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -70,12 +70,10 @@ class AutoNodeClassifier(BaseClassifier):
         Default ``auto``.
     """
 
-    # pylint: disable=W0102
-
     def __init__(
         self,
         feature_module=None,
-        graph_models=["gat", "gcn"],
+        graph_models=("gat", "gcn"),
         hpo_module="anneal",
         ensemble_module="voting",
         max_evals=50,
diff --git a/examples/graph_cv.py b/examples/graph_cv.py
index 49e409a..2dd938c 100644
--- a/examples/graph_cv.py
+++ b/examples/graph_cv.py
@@ -27,7 +27,7 @@ if __name__ == "__main__":
         choices=["mutag", "imdb-b", "imdb-m", "proteins", "collab"],
     )
     parser.add_argument(
-        "--configs", default="../configs/graph_classification.yaml", help="config files"
+        "--configs", default="../configs/graphclf_full.yml", help="config files"
     )
     parser.add_argument("--device", type=int, default=0, help="device to run on")
     parser.add_argument("--seed", type=int, default=0, help="random seed")

From e3a11d5d5ec05f71aac9deebb205caf153b017eb Mon Sep 17 00:00:00 2001
From: cluster32 <general502570@outlook.com>
Date: Wed, 28 Apr 2021 11:47:25 +0800
Subject: [PATCH 053/144] ordered fields in space

---
 autogl/module/nas/algorithm/darts.py   | 55 +-------------------------
 autogl/module/nas/algorithm/enas.py    |  6 ++-
 autogl/module/nas/space/base.py        | 47 ++++++++++++++++++++--
 autogl/module/nas/space/single_path.py |  6 +--
 autogl/module/nas/utils.py             | 17 +++++++-
 examples/test_nas.py                   |  6 +--
 6 files changed, 72 insertions(+), 65 deletions(-)

diff --git a/autogl/module/nas/algorithm/darts.py b/autogl/module/nas/algorithm/darts.py
index 00ac3a5..c7510c7 100644
--- a/autogl/module/nas/algorithm/darts.py
+++ b/autogl/module/nas/algorithm/darts.py
@@ -12,64 +12,11 @@ from ..estimator.base import BaseEstimator
 from ..space import BaseSpace
 from ..utils import replace_layer_choice, replace_input_choice
 from ...model.base import BaseModel
+from nni.retiarii.oneshot.pytorch.darts import DartsLayerChoice, DartsInputChoice
 
 _logger = logging.getLogger(__name__)
 
 
-class DartsLayerChoice(nn.Module):
-    def __init__(self, layer_choice):
-        super(DartsLayerChoice, self).__init__()
-        self.name = layer_choice.key
-        self.op_choices = nn.ModuleDict(layer_choice.named_children())
-        self.alpha = nn.Parameter(torch.randn(len(self.op_choices)) * 1e-3)
-
-    def forward(self, *args, **kwargs):
-        op_results = torch.stack(
-            [op(*args, **kwargs) for op in self.op_choices.values()]
-        )
-        alpha_shape = [-1] + [1] * (len(op_results.size()) - 1)
-        return torch.sum(op_results * F.softmax(self.alpha, -1).view(*alpha_shape), 0)
-
-    def parameters(self):
-        for _, p in self.named_parameters():
-            yield p
-
-    def named_parameters(self):
-        for name, p in super(DartsLayerChoice, self).named_parameters():
-            if name == "alpha":
-                continue
-            yield name, p
-
-    def export(self):
-        return torch.argmax(self.alpha).item()
-
-
-class DartsInputChoice(nn.Module):
-    def __init__(self, input_choice):
-        super(DartsInputChoice, self).__init__()
-        self.name = input_choice.key
-        self.alpha = nn.Parameter(torch.randn(input_choice.n_candidates) * 1e-3)
-        self.n_chosen = input_choice.n_chosen or 1
-
-    def forward(self, inputs):
-        inputs = torch.stack(inputs)
-        alpha_shape = [-1] + [1] * (len(inputs.size()) - 1)
-        return torch.sum(inputs * F.softmax(self.alpha, -1).view(*alpha_shape), 0)
-
-    def parameters(self):
-        for _, p in self.named_parameters():
-            yield p
-
-    def named_parameters(self):
-        for name, p in super(DartsInputChoice, self).named_parameters():
-            if name == "alpha":
-                continue
-            yield name, p
-
-    def export(self):
-        return torch.argsort(-self.alpha).cpu().numpy().tolist()[: self.n_chosen]
-
-
 class Darts(BaseNAS):
     """
     DARTS trainer.
diff --git a/autogl/module/nas/algorithm/enas.py b/autogl/module/nas/algorithm/enas.py
index 3d175aa..57ca42b 100644
--- a/autogl/module/nas/algorithm/enas.py
+++ b/autogl/module/nas/algorithm/enas.py
@@ -8,7 +8,7 @@ import torch.nn.functional as F
 
 from .base import BaseNAS
 from ..space import BaseSpace
-from ..utils import AverageMeterGroup, replace_layer_choice, replace_input_choice
+from ..utils import AverageMeterGroup, replace_layer_choice, replace_input_choice, get_module_order, sort_replaced_module
 from nni.nas.pytorch.fixed import apply_fixed_architecture
 _logger = logging.getLogger(__name__)
 def _get_mask(sampled, total):
@@ -297,8 +297,12 @@ class Enas(BaseNAS):
         )
         # replace choice
         self.nas_modules = []
+
+        k2o = get_module_order(self.model)
         replace_layer_choice(self.model, PathSamplingLayerChoice, self.nas_modules)
         replace_input_choice(self.model, PathSamplingInputChoice, self.nas_modules)
+        self.nas_modules = sort_replaced_module(k2o, self.nas_modules)
+
         # to device
         self.model = self.model.to(self.device)
         # fields
diff --git a/autogl/module/nas/space/base.py b/autogl/module/nas/space/base.py
index 1022dce..4c38584 100644
--- a/autogl/module/nas/space/base.py
+++ b/autogl/module/nas/space/base.py
@@ -1,7 +1,21 @@
 from abc import abstractmethod
 from autogl.module.model import BaseModel
 import torch.nn as nn
+from nni.nas.pytorch import mutables
 
+class OrderedMutable():
+    def __init__(self, order):
+        self.order = order
+
+class OrderedLayerChoice(OrderedMutable, mutables.LayerChoice):
+    def __init__(self, order, *args, **kwargs):
+        OrderedMutable.__init__(self, order)
+        mutables.LayerChoice.__init__(self, *args, **kwargs)
+
+class OrderedInputChoice(OrderedMutable, mutables.InputChoice):
+    def __init__(self, order, *args, **kwargs):
+        OrderedMutable.__init__(self, order)
+        mutables.InputChoice.__init__(self, *args, **kwargs)
 
 class BaseSpace(nn.Module):
     """
@@ -21,12 +35,11 @@ class BaseSpace(nn.Module):
         self._initialized = False
 
     @abstractmethod
-    def instantiate(self):
+    def _instantiate(self):
         """
         Instantiate modules in the space
         """
-        if not self._initialized:
-            self._initialized = True
+        raise NotImplementedError()
 
     @abstractmethod
     def forward(self, *args, **kwargs):
@@ -53,3 +66,31 @@ class BaseSpace(nn.Module):
             model to be exported.
         """
         raise NotImplementedError()
+
+    def instantiate(self, *args, **kwargs):
+        self._default_key = 0
+        self._instantiate(*args, **kwargs)
+        if not self._initialized:
+            self._initialized = True
+
+    def setLayerChoice(self, *args, **kwargs):
+        """
+        Give a unique key if not given
+        """
+        if len(args) < 5 and not "key" in kwargs:
+            key = f"default_key_{self._default_key}"
+            self._default_key += 1
+            kwargs["key"] = key
+        layer = OrderedLayerChoice(*args, **kwargs)
+        return layer
+
+    def setInputChoice(self, *args, **kwargs):
+        """
+        Give a unique key if not given
+        """
+        if len(args) < 7 and not "key" in kwargs:
+            key = f"default_key_{self._default_key}"
+            self._default_key += 1
+            kwargs["key"] = key
+        layer = OrderedInputChoice(*args, **kwargs)
+        return layer
diff --git a/autogl/module/nas/space/single_path.py b/autogl/module/nas/space/single_path.py
index 8bbe8b3..c1a2a60 100644
--- a/autogl/module/nas/space/single_path.py
+++ b/autogl/module/nas/space/single_path.py
@@ -71,7 +71,7 @@ class SinglePathNodeClassificationSpace(BaseSpace):
         self.ops = ops
         self.dropout = dropout
 
-    def instantiate(
+    def _instantiate(
         self,
         hidden_dim: _typ.Optional[int] = None,
         layer_number: _typ.Optional[int] = None,
@@ -89,7 +89,8 @@ class SinglePathNodeClassificationSpace(BaseSpace):
             setattr(
                 self,
                 f"op_{layer}",
-                mutables.LayerChoice(
+                self.setLayerChoice(
+                    layer,
                     [
                         op(
                             self.input_dim if layer == 0 else self.hidden_dim,
@@ -99,7 +100,6 @@ class SinglePathNodeClassificationSpace(BaseSpace):
                         )
                         for op in self.ops
                     ],
-                    key=f"{layer}",
                 ),
             )
         self._initialized = True
diff --git a/autogl/module/nas/utils.py b/autogl/module/nas/utils.py
index 4b76d5b..2504cfc 100644
--- a/autogl/module/nas/utils.py
+++ b/autogl/module/nas/utils.py
@@ -7,7 +7,7 @@ from collections import OrderedDict
 import numpy as np
 import torch
 import nni.retiarii.nn.pytorch as nn
-from nni.nas.pytorch.mutables import InputChoice, LayerChoice
+from nni.nas.pytorch.mutables import Mutable, InputChoice, LayerChoice
 
 _logger = logging.getLogger(__name__)
 
@@ -123,6 +123,21 @@ class AverageMeter:
         fmtstr = "{name}: {avg" + self.fmt + "}"
         return fmtstr.format(**self.__dict__)
 
+def get_module_order(root_module):
+    key2order = {}
+    def apply(m):
+        for name, child in m.named_children():
+            if isinstance(child, Mutable):
+                key2order[child.key] = child.order 
+            else:
+                apply(child)
+
+    apply(root_module)
+    return key2order
+
+def sort_replaced_module(k2o, modules):
+    modules = sorted(modules, key = lambda x:k2o[x[0]])
+    return modules
 
 def _replace_module_with_type(root_module, init_fn, type_name, modules):
     if modules is None:
diff --git a/examples/test_nas.py b/examples/test_nas.py
index 5e053b4..6d27c26 100644
--- a/examples/test_nas.py
+++ b/examples/test_nas.py
@@ -31,9 +31,9 @@ if __name__ == '__main__':
             feval=['acc'],
             loss="nll_loss",
             lr_scheduler_type=None,),
-        #nas_algorithms=[Enas()],
-        nas_algorithms=[Darts(num_epochs=1)],
-        nas_spaces=[SinglePathNodeClassificationSpace(hidden_dim=16, ops=[GCNConv, GATConv])],
+        nas_algorithms=[Enas()],
+        #nas_algorithms=[Darts(num_epochs=1)],
+        nas_spaces=[SinglePathNodeClassificationSpace(hidden_dim=16, ops=[GCNConv, GCNConv])],
         nas_estimators=[OneShotEstimator()]
     )
     solver.fit(dataset)

From ee6e58e802c351b80eb37abec7648cb1d9646616 Mon Sep 17 00:00:00 2001
From: lihy96 <lhy237451594@vip.qq.com>
Date: Thu, 29 Apr 2021 14:18:18 +0800
Subject: [PATCH 054/144] roc auc (gcn on cora) ~0.911

---
 examples/link_prediction.py | 57 +++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 examples/link_prediction.py

diff --git a/examples/link_prediction.py b/examples/link_prediction.py
new file mode 100644
index 0000000..65236a4
--- /dev/null
+++ b/examples/link_prediction.py
@@ -0,0 +1,57 @@
+import os.path as osp
+import sys
+sys.path.insert(0, '../')
+import torch
+from torch_geometric.datasets import Planetoid
+import torch_geometric.transforms as T
+from sklearn.metrics import accuracy_score as acc
+from sklearn.metrics import roc_auc_score
+from autogl.module.train import LinkPredictionTrainer
+import numpy as np
+from torch_geometric.utils import train_test_split_edges
+
+dataset = 'Cora'
+path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset)
+dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures())
+
+print('len', len(dataset))
+print('num_class', dataset.num_classes)
+print('num_node_features', dataset.num_node_features)
+
+a = []
+for _ in range(10):
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    data = dataset[0]
+
+    data = data.to(device)
+    data.train_mask = data.val_mask = data.test_mask = data.y = None
+    data = train_test_split_edges(data)
+
+    clf = LinkPredictionTrainer(
+        'gcn',
+        num_features=dataset.num_node_features,
+        num_classes=dataset.num_classes,
+        max_epoch=100,
+        early_stopping_round=101,
+        feval=['auc'],
+        lr=0.01,
+        weight_decay=0,
+        lr_scheduler_type=None,
+    )
+    clf.train([data], keep_valid_result=True)
+    print(clf.valid_score, end=',')
+    y = clf.predict([data], 'test')
+    y_ = y.cpu().numpy()
+    # acc_ = y.eq(data.y[data.test_mask]).sum().item() / data.test_mask.sum().item()
+    # print(acc_, end=',')
+
+    pos_edge_index = data[f'test_pos_edge_index']
+    neg_edge_index = data[f'test_neg_edge_index']
+    link_labels = clf.get_link_labels(pos_edge_index, neg_edge_index)
+    label = link_labels.cpu().numpy()
+    ret = roc_auc_score(label, y_)
+    print(ret)
+    a.append(ret)
+print(np.mean(a), np.std(a))
+
+

From 9cbd8af51aee2ea2647f572fdcc3e9b15355622c Mon Sep 17 00:00:00 2001
From: lihy96 <lhy237451594@vip.qq.com>
Date: Thu, 29 Apr 2021 14:42:16 +0800
Subject: [PATCH 055/144] add encode and decode in gat and graphsage

---
 autogl/module/model/gat.py       | 18 ++++++++++++++++++
 autogl/module/model/graphsage.py | 18 ++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/autogl/module/model/gat.py b/autogl/module/model/gat.py
index 4a5a3f2..9f20530 100644
--- a/autogl/module/model/gat.py
+++ b/autogl/module/model/gat.py
@@ -82,6 +82,24 @@ class GAT(torch.nn.Module):
 
         return F.log_softmax(x, dim=1)
 
+    def encode(self, data):
+        x = data.x
+        for i in range(self.num_layer - 1):
+            x = self.convs[i](x, data.train_pos_edge_index)
+            if i != self.num_layer - 2:
+                x = activate_func(x, self.args["act"])
+                # x = F.dropout(x, p=self.args["dropout"], training=self.training)
+        return x
+
+    def decode(self, z, pos_edge_index, neg_edge_index):
+        edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
+        logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
+        return logits
+
+    def decode_all(self, z):
+        prob_adj = z @ z.t()
+        return (prob_adj > 0).nonzero(as_tuple=False).t()
+
 
 @register_model("gat")
 class AutoGAT(BaseModel):
diff --git a/autogl/module/model/graphsage.py b/autogl/module/model/graphsage.py
index 6c492a5..3472896 100644
--- a/autogl/module/model/graphsage.py
+++ b/autogl/module/model/graphsage.py
@@ -159,6 +159,24 @@ class GraphSAGE(torch.nn.Module):
 
         return F.log_softmax(x, dim=1)
 
+    def encode(self, data):
+        x = data.x
+        for i in range(self.num_layer - 1):
+            x = self.convs[i](x, data.train_pos_edge_index)
+            if i != self.num_layer - 2:
+                x = activate_func(x, self.args["act"])
+                # x = F.dropout(x, p=self.args["dropout"], training=self.training)
+        return x
+
+    def decode(self, z, pos_edge_index, neg_edge_index):
+        edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
+        logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
+        return logits
+
+    def decode_all(self, z):
+        prob_adj = z @ z.t()
+        return (prob_adj > 0).nonzero(as_tuple=False).t()
+
 
 @register_model("sage")
 class AutoSAGE(BaseModel):

From a69f60750db945ab3eae69248cd99d2ea70d5cb1 Mon Sep 17 00:00:00 2001
From: wondergo2017 <wondergo2017@gmail.com>
Date: Thu, 29 Apr 2021 08:43:23 +0000
Subject: [PATCH 056/144] add graph nas space. test ok for enas

---
 autogl/module/nas/algorithm/enas.py  |   2 +-
 autogl/module/nas/space/graph_nas.py | 225 +++++++++++++++++++++++++++
 examples/test_graph_nas.py           |  39 +++++
 3 files changed, 265 insertions(+), 1 deletion(-)
 create mode 100644 autogl/module/nas/space/graph_nas.py
 create mode 100644 examples/test_graph_nas.py

diff --git a/autogl/module/nas/algorithm/enas.py b/autogl/module/nas/algorithm/enas.py
index 57ca42b..715139a 100644
--- a/autogl/module/nas/algorithm/enas.py
+++ b/autogl/module/nas/algorithm/enas.py
@@ -273,7 +273,7 @@ class Enas(BaseNAS):
     def __init__(self, device='cuda', workers=4,log_frequency=None,
                  grad_clip=5., entropy_weight=0.0001, skip_weight=0.8, baseline_decay=0.999,
                  ctrl_lr=0.00035, ctrl_steps_aggregate=20, ctrl_kwargs=None,*args,**kwargs):
-        super().__init__(*args,**kwargs)
+        super().__init__(device)
         self.device=device
         self.num_epochs = kwargs.get("num_epochs", 5)
         self.workers = workers
diff --git a/autogl/module/nas/space/graph_nas.py b/autogl/module/nas/space/graph_nas.py
new file mode 100644
index 0000000..5966303
--- /dev/null
+++ b/autogl/module/nas/space/graph_nas.py
@@ -0,0 +1,225 @@
+from copy import deepcopy
+import typing as _typ
+import torch
+
+import torch.nn.functional as F
+from nni.nas.pytorch import mutables
+from nni.nas.pytorch.fixed import apply_fixed_architecture
+from .base import BaseSpace
+from ...model import BaseModel
+from ....utils import get_logger
+
+from ...model import AutoGCN
+from .single_path import FixedNodeClassificationModel
+from .base import OrderedLayerChoice,OrderedInputChoice
+from torch import nn
+
+from torch_geometric.nn.conv import *
+from pdb import set_trace
+gnn_list = [
+    "gat_8",  # GAT with 8 heads
+    "gat_6",  # GAT with 6 heads
+    "gat_4",  # GAT with 4 heads
+    "gat_2",  # GAT with 2 heads
+    "gat_1",  # GAT with 1 heads
+    "gcn",  # GCN
+    "cheb",  # chebnet
+    "sage",  # sage
+    "arma",
+    "sg",  # simplifying gcn
+    "linear",  # skip connection
+    "zero",  # skip connection
+]
+act_list = [
+    # "sigmoid", "tanh", "relu", "linear",
+    #  "softplus", "leaky_relu", "relu6", "elu"
+    "sigmoid", "tanh", "relu", "linear", "elu"
+]
+
+class LambdaModule(nn.Module):
+    def __init__(self, lambd):
+        super().__init__()
+        self.lambd = lambd
+
+    def forward(self, x):
+        return self.lambd(x)
+class StrModule(nn.Module):
+    def __init__(self, lambd):
+        super().__init__()
+        self.str = lambd
+
+    def forward(self, *args,**kwargs):
+        return self.str  
+def act_map(act):
+    if act == "linear":
+        return lambda x: x
+    elif act == "elu":
+        return F.elu
+    elif act == "sigmoid":
+        return torch.sigmoid
+    elif act == "tanh":
+        return torch.tanh
+    elif act == "relu":
+        return torch.nn.functional.relu
+    elif act == "relu6":
+        return torch.nn.functional.relu6
+    elif act == "softplus":
+        return torch.nn.functional.softplus
+    elif act == "leaky_relu":
+        return torch.nn.functional.leaky_relu
+    else:
+        raise Exception("wrong activate function")
+def act_map_nn(act):
+    return LambdaModule(act_map(act))
+def map_nn(l):
+    return [StrModule(x) for x in l]
+
+def gnn_map(gnn_name, in_dim, out_dim, concat=False, bias=True) -> nn.Module:
+    '''
+
+    :param gnn_name:
+    :param in_dim:
+    :param out_dim:
+    :param concat: for gat, concat multi-head output or not
+    :return: GNN model
+    '''
+    if gnn_name == "gat_8":
+        return GATConv(in_dim, out_dim, 8, concat=concat, bias=bias)
+    elif gnn_name == "gat_6":
+        return GATConv(in_dim, out_dim, 6, concat=concat, bias=bias)
+    elif gnn_name == "gat_4":
+        return GATConv(in_dim, out_dim, 4, concat=concat, bias=bias)
+    elif gnn_name == "gat_2":
+        return GATConv(in_dim, out_dim, 2, concat=concat, bias=bias)
+    elif gnn_name in ["gat_1", "gat"]:
+        return GATConv(in_dim, out_dim, 1, concat=concat, bias=bias)
+    elif gnn_name == "gcn":
+        return GCNConv(in_dim, out_dim)
+    elif gnn_name == "cheb":
+        return ChebConv(in_dim, out_dim, K=2, bias=bias)
+    elif gnn_name == "sage":
+        return SAGEConv(in_dim, out_dim, bias=bias)
+    elif gnn_name == "gated":
+        return GatedGraphConv(in_dim, out_dim, bias=bias)
+    elif gnn_name == "arma":
+        return ARMAConv(in_dim, out_dim, bias=bias)
+    elif gnn_name == "sg":
+        return SGConv(in_dim, out_dim, bias=bias)
+    elif gnn_name == "linear":
+        return LinearConv(in_dim, out_dim, bias=bias)
+    elif gnn_name == "zero":
+        return ZeroConv(in_dim, out_dim, bias=bias)
+class LinearConv(nn.Module):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 bias=True):
+        super(LinearConv, self).__init__()
+
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.linear = torch.nn.Linear(in_channels, out_channels, bias)
+
+    def forward(self, x, edge_index, edge_weight=None):
+        return self.linear(x)
+
+    def __repr__(self):
+        return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
+                                   self.out_channels)
+
+
+class ZeroConv(nn.Module):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 bias=True):
+        super(ZeroConv, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.out_dim = out_channels
+
+
+    def forward(self, x, edge_index, edge_weight=None):
+        return torch.zeros([x.size(0), self.out_dim]).to(x.device)
+
+    def __repr__(self):
+        return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
+                                   self.out_channels)
+
+class GraphNasNodeClassificationSpace(BaseSpace):
+    def __init__(
+        self,
+        hidden_dim: _typ.Optional[int] = 64,
+        layer_number: _typ.Optional[int] = 2,
+        dropout: _typ.Optional[float] = 0.2,
+        input_dim: _typ.Optional[int] = None,
+        output_dim: _typ.Optional[int] = None,
+        ops: _typ.Tuple = None,
+        init: bool = False,
+    ):
+        super().__init__()
+        self.layer_number = layer_number
+        self.hidden_dim = hidden_dim
+        self.input_dim = input_dim
+        self.output_dim = output_dim
+        self.ops = ops
+        self.dropout = dropout
+
+    def _instantiate(
+        self,
+        hidden_dim: _typ.Optional[int] = None,
+        layer_number: _typ.Optional[int] = None,
+        input_dim: _typ.Optional[int] = None,
+        output_dim: _typ.Optional[int] = None,
+        ops: _typ.Tuple = None,
+        dropout = None
+    ):
+        self.hidden_dim = hidden_dim or self.hidden_dim
+        self.layer_number = layer_number or self.layer_number
+        self.input_dim = input_dim or self.input_dim
+        self.output_dim = output_dim or self.output_dim
+        self.ops = ops or self.ops
+        self.dropout = dropout or self.dropout
+        self.preproc0 = nn.Linear(self.input_dim, self.hidden_dim)
+        self.preproc1 = nn.Linear(self.input_dim, self.hidden_dim)
+        node_labels = [mutables.InputChoice.NO_KEY, mutables.InputChoice.NO_KEY]
+        for layer in range(2,self.layer_number+2):
+            node_labels.append(f"op_{layer}")
+            setattr(self,f"in_{layer}",self.setInputChoice(layer,choose_from=node_labels[:-1], n_chosen=1, return_mask=False,key=f"in_{layer}"))
+            setattr(self,f"op_{layer}",self.setLayerChoice(layer,[gnn_map(op,self.hidden_dim,self.hidden_dim)for op in gnn_list],key=f"op_{layer}"))
+        # setattr(self,f"act",self.setLayerChoice(2*layer,[act_map_nn(a)for a in act_list],key=f"act"))
+        # setattr(self,f"concat",self.setLayerChoice(2*layer+1,map_nn(["add", "product", "concat"]) ,key=f"concat"))
+        self._initialized = True
+
+    def forward(self, data):
+        x, edges = data.x, data.edge_index # x [2708,1433] ,[2, 10556]
+        pprev_, prev_ = self.preproc0(x), self.preproc1(x)
+        prev_nodes_out = [pprev_,prev_]
+        for layer in range(2,self.layer_number+2):
+            node_in = getattr(self, f"in_{layer}")(prev_nodes_out)
+            node_out= getattr(self, f"op_{layer}")(node_in,edges)
+            prev_nodes_out.append(node_out)
+        x = torch.cat(prev_nodes_out[2:],dim=1)
+        x = F.leaky_relu(x)
+        # x = F.dropout(x, p=self.dropout, training = self.training)
+        if False:
+            act=getattr(self, f"act")
+            con=getattr(self, f"concat")()
+            states=prev_nodes_out
+            if con == "concat":
+                x=torch.cat(states[2:], dim=1)
+            else:
+                tmp = states[2]
+                for i in range(2,len(states)):
+                    if con == "add":
+                        tmp = torch.add(tmp, states[i])
+                    elif con == "product":
+                        tmp = torch.mul(tmp, states[i])
+                x=tmp
+            x = act(x)
+            x = F.dropout(x, p=self.dropout, training = self.training)
+        return F.log_softmax(x, dim=1)
+
+    def export(self, selection, device) -> BaseModel:
+        #return AutoGCN(self.input_dim, self.output_dim, device)
+        return FixedNodeClassificationModel(self, selection, device)
\ No newline at end of file
diff --git a/examples/test_graph_nas.py b/examples/test_graph_nas.py
new file mode 100644
index 0000000..194c739
--- /dev/null
+++ b/examples/test_graph_nas.py
@@ -0,0 +1,39 @@
+import sys
+sys.path.append('../')
+from torch_geometric.nn import GCNConv
+import torch
+from autogl.datasets import build_dataset_from_name
+from autogl.solver import AutoNodeClassifier
+from autogl.module.train import NodeClassificationFullTrainer
+from autogl.module.nas import Darts, OneShotEstimator
+from autogl.module.nas.space.graph_nas import GraphNasNodeClassificationSpace
+from autogl.module.train import Acc
+from autogl.module.nas.algorithm.enas import Enas
+
+if __name__ == '__main__':
+    dataset = build_dataset_from_name('cora')
+    solver = AutoNodeClassifier(
+        feature_module='PYGNormalizeFeatures',
+        graph_models=[],
+        hpo_module=None,
+        ensemble_module=None,
+        default_trainer=NodeClassificationFullTrainer(
+            optimizer=torch.optim.Adam,
+            lr=0.01,
+            max_epoch=200,
+            early_stopping_round=200,
+            weight_decay=5e-4,
+            device="auto",
+            init=False,
+            feval=['acc'],
+            loss="nll_loss",
+            lr_scheduler_type=None,),
+        nas_algorithms=[Enas(num_epochs=10)],
+        #nas_algorithms=[Darts(num_epochs=200)],
+        nas_spaces=[GraphNasNodeClassificationSpace(hidden_dim=16, ops=[GCNConv, GCNConv])],
+        nas_estimators=[OneShotEstimator()]
+    )
+    solver.fit(dataset)
+    solver.get_leaderboard().show()
+    out = solver.predict_proba()
+    print('acc on cora', Acc.evaluate(out, dataset[0].y[dataset[0].test_mask].detach().numpy()))

From bb41925da4bb990c5266e8ddaefea87935e9b40b Mon Sep 17 00:00:00 2001
From: wondergo2017 <wondergo2017@gmail.com>
Date: Thu, 29 Apr 2021 08:52:46 +0000
Subject: [PATCH 057/144] add act&concat search

---
 examples/test_graph_nas.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/test_graph_nas.py b/examples/test_graph_nas.py
index 194c739..d1e297c 100644
--- a/examples/test_graph_nas.py
+++ b/examples/test_graph_nas.py
@@ -30,7 +30,7 @@ if __name__ == '__main__':
             lr_scheduler_type=None,),
         nas_algorithms=[Enas(num_epochs=10)],
         #nas_algorithms=[Darts(num_epochs=200)],
-        nas_spaces=[GraphNasNodeClassificationSpace(hidden_dim=16, ops=[GCNConv, GCNConv])],
+        nas_spaces=[GraphNasNodeClassificationSpace(hidden_dim=16, ops=[GCNConv, GCNConv],search_act_con=True)],
         nas_estimators=[OneShotEstimator()]
     )
     solver.fit(dataset)

From 3e9a11fa36be299dd3a9d4629687f0d5ff32ccbb Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Thu, 29 Apr 2021 11:25:17 +0000
Subject: [PATCH 058/144] change the default num_workers to 0

---
 autogl/module/train/graph_classification_full.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autogl/module/train/graph_classification_full.py b/autogl/module/train/graph_classification_full.py
index f6b32ae..1f4bb4a 100644
--- a/autogl/module/train/graph_classification_full.py
+++ b/autogl/module/train/graph_classification_full.py
@@ -98,7 +98,7 @@ class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
         self.lr = lr if lr is not None else 1e-4
         self.max_epoch = max_epoch if max_epoch is not None else 100
         self.batch_size = batch_size if batch_size is not None else 64
-        self.num_workers = num_workers if num_workers is not None else 4
+        self.num_workers = num_workers if num_workers is not None else 0
         if self.num_workers > 0:
             mp.set_start_method("fork", force=True)
         self.early_stopping_round = (

From ab0f8aec5cdb62c88eebfcdeedcc60815d411c8b Mon Sep 17 00:00:00 2001
From: null <null>
Date: Sun, 2 May 2021 18:23:00 +0800
Subject: [PATCH 059/144] Reproduce LADIES, a layer-wise sampling approach

Reproduce LADIES, a layer-wise sampling approach
assign default hyper parameter space for model
fix bug for configs

Planning major refactorings for upcomming minor unstable version.
---
 autogl/module/model/base.py                   |   4 +-
 autogl/module/model/gcn.py                    | 101 ++++-
 autogl/module/model/graph_sage.py             | 222 +++++++---
 autogl/module/model/graphsage.py              | 270 ------------
 .../node_classification_sampled_trainer.py    | 415 ++++++++++++++++--
 .../layer_dependent_importance_sampler.py     | 215 +++++++++
 configs/nodeclf_ladies_gcn.yml                |  65 +++
 configs/nodeclf_sage_benchmark_large.yml      |   6 +-
 configs/nodeclf_sage_benchmark_small.yml      |   6 +-
 9 files changed, 922 insertions(+), 382 deletions(-)
 delete mode 100644 autogl/module/model/graphsage.py
 create mode 100644 autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py
 create mode 100644 configs/nodeclf_ladies_gcn.yml

diff --git a/autogl/module/model/base.py b/autogl/module/model/base.py
index 965c306..9695eb5 100644
--- a/autogl/module/model/base.py
+++ b/autogl/module/model/base.py
@@ -294,13 +294,15 @@ class ClassificationModel(_BaseModel):
         num_classes: int = ...,
         num_graph_features: int = ...,
         device: _typing.Union[str, torch.device] = ...,
+        hyper_parameter_space: _typing.Sequence[_typing.Any] = ...,
         init: bool = False,
         **kwargs
     ):
         if "initialize" in kwargs:
             del kwargs["initialize"]
         super(ClassificationModel, self).__init__(
-            initialize=init, device=device, **kwargs
+            initialize=init, hyper_parameter_space=hyper_parameter_space,
+            device=device, **kwargs
         )
         if num_classes != Ellipsis and type(num_classes) == int:
             self.__num_classes: int = num_classes if num_classes > 0 else 0
diff --git a/autogl/module/model/gcn.py b/autogl/module/model/gcn.py
index 73b91e0..dbe0f8a 100644
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -17,6 +17,7 @@ class GCN(torch.nn.Module):
         hidden_features: _typing.Sequence[int],
         dropout: float,
         activation_name: str,
+        add_self_loops: bool = True
     ):
         super().__init__()
         self.__convolution_layers: torch.nn.ModuleList = torch.nn.ModuleList()
@@ -24,13 +25,13 @@ class GCN(torch.nn.Module):
         if num_layers == 1:
             self.__convolution_layers.append(
                 torch_geometric.nn.GCNConv(
-                    num_features, num_classes, add_self_loops=False
+                    num_features, num_classes, add_self_loops=add_self_loops
                 )
             )
         else:
             self.__convolution_layers.append(
                 torch_geometric.nn.GCNConv(
-                    num_features, hidden_features[0], add_self_loops=False
+                    num_features, hidden_features[0], add_self_loops=add_self_loops
                 )
             )
             for i in range(len(hidden_features)):
@@ -44,11 +45,31 @@ class GCN(torch.nn.Module):
         self.__dropout: float = dropout
         self.__activation_name: str = activation_name
 
-    def __layer_wise_forward(self, data):
-        # todo: Implement this forward method
-        #         in case that data.edge_indexes property is provided
-        #         for Layer-wise and Node-wise sampled training
-        raise NotImplementedError
+    def __layer_wise_forward(
+            self, x: torch.Tensor,
+            edge_indexes: _typing.Sequence[torch.Tensor],
+            edge_weights: _typing.Sequence[_typing.Optional[torch.Tensor]]
+    ) -> torch.Tensor:
+        assert len(edge_indexes) == len(edge_weights) == len(self.__convolution_layers)
+        for edge_index in edge_indexes:
+            if type(edge_index) != torch.Tensor:
+                raise TypeError
+            if edge_index.size(0) != 2:
+                raise ValueError
+        for edge_weight in edge_weights:
+            if not (edge_weight is None or type(edge_weight) == torch.Tensor):
+                raise TypeError
+
+        for layer_index in range(len(self.__convolution_layers)):
+            x: torch.Tensor = self.__convolution_layers[layer_index](
+                x, edge_indexes[layer_index], edge_weights[layer_index]
+            )
+            if layer_index + 1 < len(self.__convolution_layers):
+                x = activate_func(x, self.__activation_name)
+                x = torch.nn.functional.dropout(
+                    x, p=self.__dropout, training=self.training
+                )
+        return torch.nn.functional.log_softmax(x, dim=1)
 
     def __basic_forward(
         self,
@@ -68,8 +89,27 @@ class GCN(torch.nn.Module):
         return torch.nn.functional.log_softmax(x, dim=1)
 
     def forward(self, data) -> torch.Tensor:
-        if hasattr(data, "edge_indexes") and getattr(data, "edge_indexes") is not None:
-            return self.__layer_wise_forward(data)
+        if (
+                hasattr(data, "edge_indexes") and
+                isinstance(getattr(data, "edge_indexes"), _typing.Sequence) and
+                len(getattr(data, "edge_indexes")) == len(self.__convolution_layers)
+        ):
+            edge_indexes: _typing.Sequence[torch.Tensor] = getattr(data, "edge_indexes")
+            if (
+                hasattr(data, "edge_weights") and
+                isinstance(getattr(data, "edge_weights"), _typing.Sequence) and
+                len(getattr(data, "edge_weights")) == len(self.__convolution_layers)
+            ):
+                edge_weights: _typing.Sequence[_typing.Optional[torch.Tensor]] = (
+                    getattr(data, "edge_weights")
+                )
+            else:
+                edge_weights: _typing.Sequence[_typing.Optional[torch.Tensor]] = [
+                    None for _ in range(len(self.__convolution_layers))
+                ]
+            return self.__layer_wise_forward(
+                getattr(data, "x"), edge_indexes, edge_weights
+            )
         else:
             if not (hasattr(data, "x") and hasattr(data, "edge_index")):
                 raise AttributeError
@@ -133,8 +173,45 @@ class AutoGCN(ClassificationModel):
         init: bool = False,
         **kwargs
     ) -> None:
+        default_hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]] = [
+            {
+                "parameterName": "add_self_loops",
+                "type": "CATEGORICAL",
+                "feasiblePoints": [1],
+            },
+            {
+                "parameterName": "num_layers",
+                "type": "DISCRETE",
+                "feasiblePoints": "2,3,4",
+            },
+            {
+                "parameterName": "hidden",
+                "type": "NUMERICAL_LIST",
+                "numericalType": "INTEGER",
+                "length": 3,
+                "minValue": [8, 8, 8],
+                "maxValue": [128, 128, 128],
+                "scalingType": "LOG",
+                "cutPara": ("num_layers",),
+                "cutFunc": lambda x: x[0] - 1,
+            },
+            {
+                "parameterName": "dropout",
+                "type": "DOUBLE",
+                "maxValue": 0.8,
+                "minValue": 0.2,
+                "scalingType": "LINEAR",
+            },
+            {
+                "parameterName": "act",
+                "type": "CATEGORICAL",
+                "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
+            },
+        ]
+
         super(AutoGCN, self).__init__(
-            num_features, num_classes, device=device, init=init, **kwargs
+            num_features, num_classes, device=device,
+            hyper_parameter_space=default_hp_space, init=init, **kwargs
         )
 
     def _initialize(self):
@@ -144,4 +221,8 @@ class AutoGCN(ClassificationModel):
             self.hyper_parameter.get("hidden"),
             self.hyper_parameter.get("dropout"),
             self.hyper_parameter.get("act"),
+            add_self_loops=(
+                    "add_self_loops" in self.hyper_parameter
+                    and self.hyper_parameter.get("add_self_loops")
+            )
         ).to(self.device)
diff --git a/autogl/module/model/graph_sage.py b/autogl/module/model/graph_sage.py
index 2fe0450..1405b20 100644
--- a/autogl/module/model/graph_sage.py
+++ b/autogl/module/model/graph_sage.py
@@ -1,10 +1,10 @@
 import typing as _typing
 import torch
-import torch.nn.functional as F
+import torch.nn.functional
 from torch_geometric.nn.conv import SAGEConv
 
 from . import register_model
-from .base import BaseModel, activate_func
+from .base import ClassificationModel, activate_func
 
 
 class GraphSAGE(torch.nn.Module):
@@ -15,8 +15,7 @@ class GraphSAGE(torch.nn.Module):
         hidden_features: _typing.Sequence[int],
         dropout: float,
         activation_name: str,
-        aggr: str = "mean",
-        **kwargs
+        aggr: str = "mean"
     ):
         super(GraphSAGE, self).__init__()
         if type(aggr) != str:
@@ -47,90 +46,173 @@ class GraphSAGE(torch.nn.Module):
         self.__dropout: float = dropout
         self.__activation_name: str = activation_name
 
-    def __full_forward(self, data):
-        x: torch.Tensor = getattr(data, "x")
-        edge_index: torch.Tensor = getattr(data, "edge_index")
+    def __basic_forward(
+            self,
+            x: torch.Tensor,
+            edge_index: torch.Tensor,
+            edge_weight: _typing.Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
         for layer_index in range(len(self.__convolution_layers)):
-            x: torch.Tensor = self.__convolution_layers[layer_index](x, edge_index)
+            x: torch.Tensor = self.__convolution_layers[layer_index](
+                x, edge_index, edge_weight
+            )
             if layer_index + 1 < len(self.__convolution_layers):
                 x = activate_func(x, self.__activation_name)
-                x = F.dropout(x, p=self.__dropout, training=self.training)
-        return F.log_softmax(x, dim=1)
-
-    def __distributed_forward(self, data):
-        x: torch.Tensor = getattr(data, "x")
-        edge_indexes: _typing.Sequence[torch.Tensor] = getattr(data, "edge_indexes")
-        if len(edge_indexes) != len(self.__convolution_layers):
-            raise AttributeError
+                x = torch.nn.functional.dropout(
+                    x, p=self.__dropout, training=self.training
+                )
+        return torch.nn.functional.log_softmax(x, dim=1)
+
+    def __layer_wise_forward(
+            self, x: torch.Tensor,
+            edge_indexes: _typing.Sequence[torch.Tensor],
+            edge_weights: _typing.Sequence[_typing.Optional[torch.Tensor]]
+    ) -> torch.Tensor:
+        assert len(edge_indexes) == len(edge_weights) == len(self.__convolution_layers)
+        for edge_index in edge_indexes:
+            if type(edge_index) != torch.Tensor:
+                raise TypeError
+            if edge_index.size(0) != 2:
+                raise ValueError
+        for edge_weight in edge_weights:
+            if not (edge_weight is None or type(edge_weight) == torch.Tensor):
+                raise TypeError
+
         for layer_index in range(len(self.__convolution_layers)):
             x: torch.Tensor = self.__convolution_layers[layer_index](
                 x, edge_indexes[layer_index]
             )
             if layer_index + 1 < len(self.__convolution_layers):
                 x = activate_func(x, self.__activation_name)
-                x = F.dropout(x, p=self.__dropout, training=self.training)
-        return F.log_softmax(x, dim=1)
+                x = torch.nn.functional.dropout(x, p=self.__dropout, training=self.training)
+        return torch.nn.functional.log_softmax(x, dim=1)
 
-    def forward(self, data):
+    def forward(self, data) -> torch.Tensor:
         if (
-            hasattr(data, "edge_indexes")
-            and isinstance(getattr(data, "edge_indexes"), _typing.Sequence)
-            and len(getattr(data, "edge_indexes")) == len(self.__convolution_layers)
+                hasattr(data, "edge_indexes") and
+                isinstance(getattr(data, "edge_indexes"), _typing.Sequence) and
+                len(getattr(data, "edge_indexes")) == len(self.__convolution_layers)
         ):
-            return self.__distributed_forward(data)
+            edge_indexes: _typing.Sequence[torch.Tensor] = getattr(data, "edge_indexes")
+            if (
+                hasattr(data, "edge_weights") and
+                isinstance(getattr(data, "edge_weights"), _typing.Sequence) and
+                len(getattr(data, "edge_weights")) == len(self.__convolution_layers)
+            ):
+                edge_weights: _typing.Sequence[_typing.Optional[torch.Tensor]] = (
+                    getattr(data, "edge_weights")
+                )
+            else:
+                edge_weights: _typing.Sequence[_typing.Optional[torch.Tensor]] = [
+                    None for _ in range(len(self.__convolution_layers))
+                ]
+            return self.__layer_wise_forward(
+                getattr(data, "x"), edge_indexes, edge_weights
+            )
         else:
-            return self.__full_forward(data)
+            if not (hasattr(data, "x") and hasattr(data, "edge_index")):
+                raise AttributeError
+            if not (
+                type(getattr(data, "x")) == torch.Tensor
+                and type(getattr(data, "edge_index")) == torch.Tensor
+            ):
+                raise TypeError
+            x: torch.Tensor = getattr(data, "x")
+            edge_index: torch.LongTensor = getattr(data, "edge_index")
+            if (
+                hasattr(data, "edge_weight")
+                and type(getattr(data, "edge_weight")) == torch.Tensor
+                and getattr(data, "edge_weight").size() == (edge_index.size(1),)
+            ):
+                edge_weight: _typing.Optional[torch.Tensor] = getattr(
+                    data, "edge_weight"
+                )
+            else:
+                edge_weight: _typing.Optional[torch.Tensor] = None
+            return self.__basic_forward(x, edge_index, edge_weight)
 
 
 @register_model("sage")
-class AutoSAGE(BaseModel):
-    def __init__(
-        self,
-        num_features: int = 1,
-        num_classes: int = 1,
-        device: _typing.Optional[torch.device] = torch.device("cpu"),
-        init: bool = False,
-        **kwargs
-    ):
-        super(AutoSAGE, self).__init__(init)
-        self.__num_features: int = num_features
-        self.__num_classes: int = num_classes
-        self.__device: torch.device = (
-            device if device is not None else torch.device("cpu")
-        )
+class AutoSAGE(ClassificationModel):
+    r"""
+    AutoSAGE. The model used in this automodel is GraphSAGE, i.e., the GraphSAGE from the `"Inductive Representation Learning on
+    Large Graphs" <https://arxiv.org/abs/1706.02216>`_ paper. The layer is
 
-        self.hyperparams = {
-            "num_layers": 3,
-            "hidden": [64, 32],
-            "dropout": 0.5,
-            "act": "relu",
-            "aggr": "mean",
-        }
-        self.params = {
-            "num_features": self.__num_features,
-            "num_classes": self.__num_classes,
-        }
-
-        self._model: GraphSAGE = GraphSAGE(
-            self.__num_features, self.__num_classes, [64, 32], 0.5, "relu"
-        )
+    .. math::
+
+        \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + \mathbf{W_2} \cdot
+        \mathrm{mean}_{j \in \mathcal{N(i)}} \mathbf{x}_j
+
+    Parameters
+    ----------
+    num_features: `int`.
+        The dimension of features.
+
+    num_classes: `int`.
+        The number of classes.
 
-        self._initialized: bool = False
-        if init:
-            self.initialize()
+    device: `torch.device` or `str`
+        The device where model will be running on.
 
-    @property
-    def model(self) -> GraphSAGE:
-        return self._model
+    init: `bool`.
+        If True(False), the model will (not) be initialized.
+    """
+
+    def __init__(
+            self,
+            num_features: int = ...,
+            num_classes: int = ...,
+            device: _typing.Union[str, torch.device] = ...,
+            init: bool = False,
+            **kwargs
+    ):
+        default_hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]] = [
+            {
+                "parameterName": "num_layers",
+                "type": "DISCRETE",
+                "feasiblePoints": "2,3,4",
+            },
+            {
+                "parameterName": "hidden",
+                "type": "NUMERICAL_LIST",
+                "numericalType": "INTEGER",
+                "length": 3,
+                "minValue": [8, 8, 8],
+                "maxValue": [128, 128, 128],
+                "scalingType": "LOG",
+                "cutPara": ("num_layers",),
+                "cutFunc": lambda x: x[0] - 1,
+            },
+            {
+                "parameterName": "dropout",
+                "type": "DOUBLE",
+                "maxValue": 0.8,
+                "minValue": 0.2,
+                "scalingType": "LINEAR",
+            },
+            {
+                "parameterName": "act",
+                "type": "CATEGORICAL",
+                "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
+            },
+            {
+                "parameterName": "aggr",
+                "type": "CATEGORICAL",
+                "feasiblePoints": ["mean", "add", "max"],
+            },
+        ]
+        super(AutoSAGE, self).__init__(
+            num_features, num_classes, device=device,
+            hyper_parameter_space=default_hp_space, init=init, **kwargs
+        )
 
-    def initialize(self):
+    def _initialize(self):
         """ Initialize model """
-        if not self._initialized:
-            self._model: GraphSAGE = GraphSAGE(
-                self.__num_features,
-                self.__num_classes,
-                hidden_features=self.hyperparams["hidden"],
-                activation_name=self.hyperparams["act"],
-                **self.hyperparams
-            ).to(self.__device)
-            self._initialized = True
+        self.model = GraphSAGE(
+            self.num_features,
+            self.num_classes,
+            self.hyper_parameter.get("hidden"),
+            self.hyper_parameter.get("dropout"),
+            self.hyper_parameter.get("act"),
+            self.hyper_parameter.get("aggr")
+        ).to(self.device)
diff --git a/autogl/module/model/graphsage.py b/autogl/module/model/graphsage.py
deleted file mode 100644
index ac541b8..0000000
--- a/autogl/module/model/graphsage.py
+++ /dev/null
@@ -1,270 +0,0 @@
-import torch
-from . import register_model
-from .base import BaseModel, activate_func
-
-from typing import Union, Tuple
-from torch_geometric.typing import OptPairTensor, Adj, Size
-
-from torch import Tensor
-from torch.nn import Linear
-import torch.nn.functional as F
-from torch_sparse import SparseTensor, matmul
-from torch_geometric.nn.conv import MessagePassing
-from ...utils import get_logger
-
-LOGGER = get_logger("SAGEModel")
-
-
-class SAGEConv(MessagePassing):
-    r"""Modified from SAGEConv in Pytorch Geometric <https://github.com/rusty1s/pytorch_geometric/blob/master/torch_geometric/nn/conv/sage_conv.py>
-    The GraphSAGE operator from the `"Inductive Representation Learning on
-    Large Graphs" <https://arxiv.org/abs/1706.02216>`_ paper
-    .. math::
-        \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + \mathbf{W_2} \cdot
-        \mathrm{mean}_{j \in \mathcal{N(i)}} \mathbf{x}_j
-    Args:
-        in_channels (int or tuple): Size of each input sample. A tuple
-            corresponds to the sizes of source and target dimensionalities.
-        out_channels (int): Size of each output sample.
-        normalize (bool, optional): If set to :obj:`True`, output features
-            will be :math:`\ell_2`-normalized, *i.e.*,
-            :math:`\frac{\mathbf{x}^{\prime}_i}
-            {\| \mathbf{x}^{\prime}_i \|_2}`.
-            (default: :obj:`False`)
-        bias (bool, optional): If set to :obj:`False`, the layer will not learn
-            an additive bias. (default: :obj:`True`)
-        **kwargs (optional): Additional arguments of
-            :class:`torch_geometric.nn.conv.MessagePassing`.
-    """
-
-    def __init__(
-        self,
-        in_channels: Union[int, Tuple[int, int]],
-        out_channels: int,
-        normalize: bool = False,
-        bias: bool = True,
-        aggr: str = "mean",
-        **kwargs
-    ):
-        super(SAGEConv, self).__init__(aggr=aggr, **kwargs)
-
-        self.in_channels = in_channels
-        self.out_channels = out_channels
-        self.normalize = normalize
-
-        if isinstance(in_channels, int):
-            in_channels = (in_channels, in_channels)
-
-        self.lin_l = Linear(in_channels[0], out_channels, bias=bias)
-        self.lin_r = Linear(in_channels[1], out_channels, bias=False)
-
-        self.reset_parameters()
-
-    def reset_parameters(self):
-        self.lin_l.reset_parameters()
-        self.lin_r.reset_parameters()
-
-    def forward(
-        self, x: Union[Tensor, OptPairTensor], edge_index: Adj, size: Size = None
-    ) -> Tensor:
-        """"""
-        if isinstance(x, Tensor):
-            x: OptPairTensor = (x, x)
-
-        # propagate_type: (x: OptPairTensor)
-        out = self.propagate(edge_index, x=x, size=size)
-        out = self.lin_l(out)
-
-        x_r = x[1]
-        if x_r is not None:
-            out += self.lin_r(x_r)
-
-        if self.normalize:
-            out = F.normalize(out, p=2.0, dim=-1)
-
-        return out
-
-    def message(self, x_j: Tensor) -> Tensor:
-        return x_j
-
-    def message_and_aggregate(self, adj_t: SparseTensor, x: OptPairTensor) -> Tensor:
-        adj_t = adj_t.set_value(None, layout=None)
-        return matmul(adj_t, x[0], reduce=self.aggr)
-
-    def __repr__(self):
-        return "{}({}, {})".format(
-            self.__class__.__name__, self.in_channels, self.out_channels
-        )
-
-
-def set_default(args, d):
-    for k, v in d.items():
-        if k not in args:
-            args[k] = v
-    return args
-
-
-class GraphSAGE(torch.nn.Module):
-    def __init__(self, args):
-        super(GraphSAGE, self).__init__()
-        self.args = args
-        agg = self.args["agg"]
-        self.num_layer = int(self.args["num_layers"])
-        if not self.num_layer == len(self.args["hidden"]) + 1:
-            LOGGER.warn("Warning: layer size does not match the length of hidden units")
-
-        missing_keys = list(
-            set(
-                [
-                    "features_num",
-                    "num_class",
-                    "num_layers",
-                    "hidden",
-                    "dropout",
-                    "act",
-                    "agg",
-                ]
-            )
-            - set(self.args.keys())
-        )
-        if len(missing_keys) > 0:
-            raise Exception("Missing keys: %s." % ",".join(missing_keys))
-
-        self.convs = torch.nn.ModuleList()
-        self.convs.append(
-            SAGEConv(self.args["features_num"], self.args["hidden"][0], aggr=agg)
-        )
-        for i in range(self.num_layer - 2):
-            self.convs.append(
-                SAGEConv(self.args["hidden"][i], self.args["hidden"][i + 1], aggr=agg)
-            )
-        self.convs.append(
-            SAGEConv(
-                self.args["hidden"][self.num_layer - 2],
-                self.args["num_class"],
-                aggr=agg,
-            )
-        )
-
-    def forward(self, data):
-        try:
-            x = data.x
-        except:
-            print("no x")
-            pass
-        try:
-            edge_index = data.edge_index
-        except:
-            print("no index")
-            pass
-        try:
-            edge_weight = data.edge_weight
-        except:
-            edge_weight = None
-            pass
-
-        for i in range(self.num_layer):
-            x = self.convs[i](x, edge_index, edge_weight)
-            if i != self.num_layer - 1:
-                x = activate_func(x, self.args["act"])
-                x = F.dropout(x, p=self.args["dropout"], training=self.training)
-
-        return F.log_softmax(x, dim=1)
-
-
-# @register_model("sage")
-class AutoSAGE(BaseModel):
-    r"""
-    AutoSAGE. The model used in this automodel is GraphSAGE, i.e., the GraphSAGE from the `"Inductive Representation Learning on
-    Large Graphs" <https://arxiv.org/abs/1706.02216>`_ paper. The layer is
-
-    .. math::
-
-        \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + \mathbf{W_2} \cdot
-        \mathrm{mean}_{j \in \mathcal{N(i)}} \mathbf{x}_j
-
-    Parameters
-    ----------
-    num_features: `int`.
-        The dimension of features.
-
-    num_classes: `int`.
-        The number of classes.
-
-    device: `torch.device` or `str`
-        The device where model will be running on.
-
-    init: `bool`.
-        If True(False), the model will (not) be initialized.
-
-    """
-
-    def __init__(
-        self, num_features=None, num_classes=None, device=None, init=False, **args
-    ):
-
-        super(AutoSAGE, self).__init__()
-
-        self.num_features = num_features if num_features is not None else 0
-        self.num_classes = int(num_classes) if num_classes is not None else 0
-        self.device = device if device is not None else "cpu"
-        self.init = True
-
-        self.params = {
-            "features_num": self.num_features,
-            "num_class": self.num_classes,
-        }
-        self.space = [
-            {
-                "parameterName": "num_layers",
-                "type": "DISCRETE",
-                "feasiblePoints": "2,3,4",
-            },
-            {
-                "parameterName": "hidden",
-                "type": "NUMERICAL_LIST",
-                "numericalType": "INTEGER",
-                "length": 3,
-                "minValue": [8, 8, 8],
-                "maxValue": [128, 128, 128],
-                "scalingType": "LOG",
-                "cutPara": ("num_layers",),
-                "cutFunc": lambda x: x[0] - 1,
-            },
-            {
-                "parameterName": "dropout",
-                "type": "DOUBLE",
-                "maxValue": 0.8,
-                "minValue": 0.2,
-                "scalingType": "LINEAR",
-            },
-            {
-                "parameterName": "act",
-                "type": "CATEGORICAL",
-                "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
-            },
-            {
-                "parameterName": "agg",
-                "type": "CATEGORICAL",
-                "feasiblePoints": ["mean", "add", "max"],
-            },
-        ]
-
-        self.hyperparams = {
-            "num_layers": 3,
-            "hidden": [64, 32],
-            "dropout": 0.5,
-            "act": "relu",
-            "agg": "mean",
-        }
-
-        self.initialized = False
-        if init is True:
-            self.initialize()
-
-    def initialize(self):
-        # """Initialize model."""
-        if self.initialized:
-            return
-        self.initialized = True
-        self.model = GraphSAGE({**self.params, **self.hyperparams}).to(self.device)
diff --git a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
index cf91fc6..8f727f9 100644
--- a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
+++ b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
@@ -9,6 +9,9 @@ from ..base import BaseNodeClassificationTrainer, EarlyStopping, Evaluation
 from ..evaluation import get_feval, Logloss
 from ..sampling.sampler.neighbor_sampler import NeighborSampler
 from ..sampling.sampler.graphsaint_sampler import *
+from ..sampling.sampler.layer_dependent_importance_sampler import (
+    LayerDependentImportanceSampler
+)
 from ...model import BaseModel
 
 LOGGER: logging.Logger = logging.getLogger("Node classification sampling trainer")
@@ -366,7 +369,7 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         model: _typing.Union[BaseModel],
         num_features: int,
         num_classes: int,
-        optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None],
+        optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None] = ...,
         lr: float = 1e-4,
         max_epoch: int = 100,
         early_stopping_round: int = 100,
@@ -428,30 +431,16 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         )
 
         """ Set hyper parameters """
-        if "num_subgraphs" not in kwargs:
-            raise KeyError
-        elif type(kwargs.get("num_subgraphs")) != int:
-            raise TypeError
-        elif not kwargs.get("num_subgraphs") > 0:
-            raise ValueError
-        else:
-            self.__num_subgraphs: int = kwargs.get("num_subgraphs")
-        if "sampling_budget" not in kwargs:
-            raise KeyError
-        elif type(kwargs.get("sampling_budget")) != int:
-            raise TypeError
-        elif not kwargs.get("sampling_budget") > 0:
-            raise ValueError
+        self.__num_subgraphs: int = kwargs.get("num_subgraphs")
+        self.__sampling_budget: int = kwargs.get("sampling_budget")
+        if (
+                kwargs.get("sampling_method") is not None
+                and type(kwargs.get("sampling_method")) == str
+                and kwargs.get("sampling_method") in ("node", "edge")
+        ):
+            self.__sampling_method_identifier: str = kwargs.get("sampling_method")
         else:
-            self.__sampling_budget: int = kwargs.get("sampling_budget")
-        if "sampling_method" not in kwargs:
-            self.__sampling_method_identifier: str = "node"
-        elif type(kwargs.get("sampling_method")) != str:
             self.__sampling_method_identifier: str = "node"
-        else:
-            self.__sampling_method_identifier: str = kwargs.get("sampling_method")
-            if self.__sampling_method_identifier.lower() not in ("node", "edge"):
-                self.__sampling_method_identifier: str = "node"
 
         self.__is_initialized: bool = False
         if init:
@@ -480,7 +469,7 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         """
         data = data.to(self.device)
         optimizer: torch.optim.Optimizer = self._optimizer_class(
-            self.model.parameters(),
+            self.model.model.parameters(),
             lr=self._learning_rate,
             weight_decay=self._weight_decay,
         )
@@ -694,7 +683,9 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         if return_major:
             return self._valid_score[0], self.feval[0].is_higher_better()
         else:
-            return (self._valid_score, [f.is_higher_better() for f in self.feval])
+            return (
+                self._valid_score, [f.is_higher_better() for f in self.feval]
+            )
 
     @property
     def hyper_parameter_space(self) -> _typing.Sequence[_typing.Dict[str, _typing.Any]]:
@@ -759,3 +750,377 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
             lr_scheduler_type=self._lr_scheduler_type,
             **hp,
         )
+
+
+@register_trainer("NodeClassificationLayerDependentImportanceSamplingTrainer")
+class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassificationTrainer):
+    def __init__(
+            self,
+            model: _typing.Union[BaseModel, str],
+            num_features: int,
+            num_classes: int,
+            optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None] = ...,
+            lr: float = 1e-4,
+            max_epoch: int = 100,
+            early_stopping_round: int = 100,
+            weight_decay: float = 1e-4,
+            device: _typing.Optional[torch.device] = None,
+            init: bool = True,
+            feval: _typing.Union[
+                _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+            ] = (Logloss,),
+            loss: str = "nll_loss",
+            lr_scheduler_type: _typing.Optional[str] = None,
+            **kwargs,
+    ) -> None:
+        if isinstance(optimizer, type) and issubclass(optimizer, torch.optim.Optimizer):
+            self._optimizer_class: _typing.Type[torch.optim.Optimizer] = optimizer
+        elif type(optimizer) == str:
+            if optimizer.lower() == "adam":
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.Adam
+            elif optimizer.lower() == "adam" + "w":
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.AdamW
+            elif optimizer.lower() == "sgd":
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.SGD
+            else:
+                self._optimizer_class: _typing.Type[
+                    torch.optim.Optimizer
+                ] = torch.optim.Adam
+        else:
+            self._optimizer_class: _typing.Type[
+                torch.optim.Optimizer
+            ] = torch.optim.Adam
+        self._learning_rate: float = lr if lr > 0 else 1e-4
+        self._lr_scheduler_type: _typing.Optional[str] = lr_scheduler_type
+        self._max_epoch: int = max_epoch if max_epoch > 0 else 1e2
+        self._weight_decay: float = weight_decay if weight_decay > 0 else 1e-4
+        self._early_stopping = EarlyStopping(
+            patience=early_stopping_round if early_stopping_round > 0 else 1e2,
+            verbose=False
+        )
+        """ Assign an empty initial hyper parameter space """
+        self._hyper_parameter_space: _typing.Sequence[_typing.Dict[str, _typing.Any]] = []
+
+        self._valid_result: torch.Tensor = torch.zeros(0)
+        self._valid_result_prob: torch.Tensor = torch.zeros(0)
+        self._valid_score: _typing.Sequence[float] = ()
+
+        super(NodeClassificationLayerDependentImportanceSamplingTrainer, self).__init__(
+            model, num_features, num_classes, device, init, feval, loss
+        )
+
+        """ Set hyper parameters """
+        " Configure num_layers "
+        self.__num_layers: int = kwargs.get("num_layers")
+        " Configure sampled_node_size_budget "
+        self.__sampled_node_size_budget: int = (
+            kwargs.get("sampled_node_size_budget")
+        )
+
+        self.__is_initialized: bool = False
+        if init:
+            self.initialize()
+
+    def initialize(self):
+        if self.__is_initialized:
+            return self
+        self.model.initialize()
+        self.__is_initialized = True
+        return self
+
+    def to(self, device: torch.device):
+        self.device = device
+        if self.model is not None:
+            self.model.to(self.device)
+
+    def get_model(self):
+        return self.model
+
+    def __train_only(self, data):
+        """
+        The function of training on the given dataset and mask.
+        :param data: data of a specific graph
+        :return: self
+        """
+        optimizer: torch.optim.Optimizer = self._optimizer_class(
+            self.model.model.parameters(),
+            lr=self._learning_rate,
+            weight_decay=self._weight_decay
+        )
+
+        if type(self._lr_scheduler_type) == str:
+            if self._lr_scheduler_type.lower() == "step" + "lr":
+                lr_scheduler: torch.optim.lr_scheduler.StepLR = (
+                    torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1)
+                )
+            elif self._lr_scheduler_type.lower() == "multi" + "step" + "lr":
+                lr_scheduler: torch.optim.lr_scheduler.MultiStepLR = (
+                    torch.optim.lr_scheduler.MultiStepLR(
+                        optimizer, milestones=[30, 80], gamma=0.1
+                    )
+                )
+            elif self._lr_scheduler_type.lower() == "exponential" + "lr":
+                lr_scheduler: torch.optim.lr_scheduler.ExponentialLR = (
+                    torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1)
+                )
+            elif self._lr_scheduler_type.lower() == "ReduceLROnPlateau".lower():
+                lr_scheduler: torch.optim.lr_scheduler.ReduceLROnPlateau = (
+                    torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min")
+                )
+            else:
+                lr_scheduler: torch.optim.lr_scheduler.LambdaLR = (
+                    torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
+                )
+        else:
+            lr_scheduler: torch.optim.lr_scheduler.LambdaLR = (
+                torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
+            )
+
+        sampled_node_size_budget: int = self.__sampled_node_size_budget
+        num_layers: int = self.__num_layers
+
+        __layer_dependent_importance_sampler: LayerDependentImportanceSampler = (
+            LayerDependentImportanceSampler(data.edge_index)
+        )
+        __top_layer_target_nodes_indexes: torch.LongTensor = (
+            torch.where(data.train_mask)[0].unique()
+        )
+        for current_epoch in range(self._max_epoch):
+            self.model.model.train()
+            optimizer.zero_grad()
+            """ epoch start """
+            " sample graphs "
+            __layers: _typing.Sequence[
+                _typing.Tuple[torch.Tensor, torch.Tensor]
+            ] = __layer_dependent_importance_sampler.sample(
+                __top_layer_target_nodes_indexes,
+                [sampled_node_size_budget for _ in range(num_layers)]
+            )
+            data.edge_indexes = [layer[0] for layer in __layers]
+            data.edge_weights = [layer[1] for layer in __layers]
+            data = data.to(self.device)
+
+            result: torch.Tensor = self.model.model.forward(data)
+            if hasattr(torch.nn.functional, self.loss):
+                loss_function = getattr(
+                    torch.nn.functional, self.loss
+                )
+                loss_value: torch.Tensor = loss_function(
+                    result[data.train_mask],
+                    data.y[data.train_mask]
+                )
+            else:
+                raise TypeError(
+                    f"PyTorch does not support loss type {self.loss}"
+                )
+
+            loss_value.backward()
+            optimizer.step()
+            if self._lr_scheduler_type:
+                lr_scheduler.step()
+
+            if (
+                    hasattr(data, "val_mask") and
+                    getattr(data, "val_mask") is not None and
+                    type(getattr(data, "val_mask")) == torch.Tensor
+            ):
+                validation_results: _typing.Sequence[float] = self.evaluate(
+                    (data,), "val", [self.feval[0]]
+                )
+                if self.feval[0].is_higher_better():
+                    validation_loss: float = -validation_results[0]
+                else:
+                    validation_loss: float = validation_results[0]
+                self._early_stopping(validation_loss, self.model.model)
+                if self._early_stopping.early_stop:
+                    LOGGER.debug("Early stopping at %d", current_epoch)
+                    break
+        if (
+                hasattr(data, "val_mask") and
+                getattr(data, "val_mask") is not None and
+                type(getattr(data, "val_mask")) == torch.Tensor
+        ):
+            self._early_stopping.load_checkpoint(self.model.model)
+
+    def __predict_only(self, data) -> torch.Tensor:
+        """
+        The function of predicting on the given data.
+        :param data: data of a specific graph
+        :return: the result of prediction on the given dataset
+        """
+        data = data.to(self.device)
+        self.model.model.eval()
+        with torch.no_grad():
+            predicted_x: torch.Tensor = self.model.model(data)
+        return predicted_x
+
+    def predict_proba(
+            self, dataset, mask: _typing.Optional[str]=None,
+            in_log_format: bool=False
+    ):
+        """
+        The function of predicting the probability on the given dataset.
+        :param dataset: The node classification dataset used to be predicted.
+        :param mask:
+        :param in_log_format:
+        :return:
+        """
+        data = dataset[0].to(self.device)
+        if mask is not None and type(mask) == str:
+            if mask.lower() == "train":
+                _mask: torch.Tensor = data.train_mask
+            elif mask.lower() == "test":
+                _mask: torch.Tensor = data.test_mask
+            elif mask.lower() == "val":
+                _mask: torch.Tensor = data.val_mask
+            else:
+                _mask: torch.Tensor = data.test_mask
+        else:
+            _mask: torch.Tensor = data.test_mask
+        result = self.__predict_only(data)[_mask]
+        return result if in_log_format else torch.exp(result)
+
+    def predict(self, dataset, mask: _typing.Optional[str] = None) -> torch.Tensor:
+        return self.predict_proba(dataset, mask, in_log_format=True).max(1)[1]
+
+    def evaluate(
+            self,
+            dataset,
+            mask: _typing.Optional[str] = None,
+            feval: _typing.Union[
+                None, _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+            ] = None,
+    ) -> _typing.Sequence[float]:
+        data = dataset[0]
+        data = data.to(self.device)
+        if feval is None:
+            _feval: _typing.Sequence[_typing.Type[Evaluation]] = self.feval
+        else:
+            _feval: _typing.Sequence[_typing.Type[Evaluation]] = get_feval(list(feval))
+        if mask is not None and type(mask) == str:
+            if mask.lower() == "train":
+                _mask: torch.Tensor = data.train_mask
+            elif mask.lower() == "test":
+                _mask: torch.Tensor = data.test_mask
+            elif mask.lower() == "val":
+                _mask: torch.Tensor = data.val_mask
+            else:
+                _mask: torch.Tensor = data.test_mask
+        else:
+            _mask: torch.Tensor = data.test_mask
+        prediction_probability: torch.Tensor = self.predict_proba(dataset, mask)
+        y_ground_truth: torch.Tensor = data.y[_mask]
+
+        eval_results = []
+        for f in _feval:
+            try:
+                eval_results.append(f.evaluate(prediction_probability, y_ground_truth))
+            except:
+                eval_results.append(
+                    f.evaluate(
+                        prediction_probability.cpu().numpy(),
+                        y_ground_truth.cpu().numpy(),
+                    )
+                )
+        return eval_results
+
+    def train(self, dataset, keep_valid_result: bool = True):
+        """
+        The function of training on the given dataset and keeping valid result.
+        :param dataset:
+        :param keep_valid_result: Whether to save the validation result after training
+        """
+        data = dataset[0]
+        self.__train_only(data)
+        if keep_valid_result:
+            prediction: torch.Tensor = self.__predict_only(data)
+            self._valid_result: torch.Tensor = prediction[data.val_mask].max(1)[1]
+            self._valid_result_prob: torch.Tensor = prediction[data.val_mask]
+            self._valid_score: _typing.Sequence[float] = self.evaluate(dataset, "val")
+
+    def get_valid_predict(self) -> torch.Tensor:
+        return self._valid_result
+
+    def get_valid_predict_proba(self) -> torch.Tensor:
+        return self._valid_result_prob
+
+    def get_valid_score(
+            self, return_major: bool = True
+    ) -> _typing.Union[
+        _typing.Tuple[float, bool],
+        _typing.Tuple[_typing.Sequence[float], _typing.Sequence[bool]]
+    ]:
+        if return_major:
+            return self._valid_score[0], self.feval[0].is_higher_better()
+        else:
+            return self._valid_score, [f.is_higher_better() for f in self.feval]
+
+    @property
+    def hyper_parameter_space(self) -> _typing.Sequence[_typing.Dict[str, _typing.Any]]:
+        return self._hyper_parameter_space
+
+    @hyper_parameter_space.setter
+    def hyper_parameter_space(
+            self, hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
+    ) -> None:
+        if not isinstance(hp_space, _typing.Sequence):
+            raise TypeError
+        self._hyper_parameter_space = hp_space
+
+    def get_name_with_hp(self) -> str:
+        name = "-".join(
+            [
+                str(self._optimizer_class),
+                str(self._learning_rate),
+                str(self._max_epoch),
+                str(self._early_stopping.patience),
+                str(self.model),
+                str(self.device),
+            ]
+        )
+        name = (
+            name
+            + "|"
+            + "-".join(
+                [
+                    str(x[0]) + "-" + str(x[1])
+                    for x in self.model.get_hyper_parameter().items()
+                ]
+            )
+        )
+        return name
+
+    def duplicate_from_hyper_parameter(
+            self,
+            hp: _typing.Dict[str, _typing.Any],
+            model: _typing.Optional[BaseModel] = None,
+    ) -> "NodeClassificationLayerDependentImportanceSamplingTrainer":
+        if model is None or not isinstance(model, BaseModel):
+            model: BaseModel = self.model
+        model = model.from_hyper_parameter(
+            dict(
+                [
+                    x
+                    for x in hp.items()
+                    if x[0] in [y["parameterName"] for y in model.hyper_parameter_space]
+                ]
+            )
+        )
+        return NodeClassificationLayerDependentImportanceSamplingTrainer(
+            model,
+            self.num_features,
+            self.num_classes,
+            self._optimizer_class,
+            device=self.device,
+            init=True,
+            feval=self.feval,
+            loss=self.loss,
+            lr_scheduler_type=self._lr_scheduler_type,
+            **hp,
+        )
diff --git a/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py b/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py
new file mode 100644
index 0000000..a46ba56
--- /dev/null
+++ b/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py
@@ -0,0 +1,215 @@
+import numpy as np
+import torch
+import torch.utils.data
+import typing as _typing
+import torch_geometric
+
+
+class LayerDependentImportanceSampler:
+    class _Utility:
+        @classmethod
+        def compute_edge_weights(cls, __all_edge_index_with_self_loops: torch.LongTensor) -> torch.Tensor:
+            __out_degree: torch.Tensor = \
+                torch_geometric.utils.degree(__all_edge_index_with_self_loops[0])
+            __in_degree: torch.Tensor = \
+                torch_geometric.utils.degree(__all_edge_index_with_self_loops[1])
+
+            # temp_tensor: torch.Tensor = torch.zeros_like(__all_edge_index_with_self_loops)
+            # temp_tensor[0] = __out_degree[__all_edge_index_with_self_loops[0]]
+            # temp_tensor[1] = __in_degree[__all_edge_index_with_self_loops[1]]
+            temp_tensor: torch.Tensor = torch.stack(
+                [
+                    __out_degree[__all_edge_index_with_self_loops[0]],
+                    __in_degree[__all_edge_index_with_self_loops[1]]
+                ]
+            )
+            temp_tensor: torch.Tensor = 1.0 / temp_tensor
+            temp_tensor[torch.isinf(temp_tensor)] = 0.0
+            return temp_tensor[0] * temp_tensor[1]
+
+        @classmethod
+        def get_candidate_source_nodes_probabilities(
+                cls, all_candidate_edge_indexes: torch.Tensor,
+                all_edge_index_with_self_loops: torch.Tensor,
+                all_edge_weights: torch.Tensor
+        ) -> _typing.Tuple[torch.LongTensor, torch.Tensor]:
+            """
+            :param all_candidate_edge_indexes:
+            :param all_edge_index_with_self_loops: integral edge index with self-loops
+            :param all_edge_weights:
+            :return: (all_source_nodes_indexes, all_source_nodes_probabilities)
+            """
+            _all_candidate_edges: torch.Tensor = \
+                all_edge_index_with_self_loops[:, all_candidate_edge_indexes]
+            _all_candidate_edges_weights: torch.Tensor = \
+                all_edge_weights[all_candidate_edge_indexes]
+
+            all_candidate_source_nodes_indexes: torch.LongTensor = _all_candidate_edges[0].unique()
+            all_candidate_source_nodes_probabilities: torch.Tensor = torch.tensor(
+                [
+                    torch.sum(
+                        _all_candidate_edges_weights[_all_candidate_edges[0] == _current_source_node_index]
+                    ).item() / torch.sum(_all_candidate_edges_weights).item()
+                    for _current_source_node_index in all_candidate_source_nodes_indexes.tolist()
+                ]
+            )
+            assert (
+                    all_candidate_source_nodes_indexes.size() ==
+                    all_candidate_source_nodes_probabilities.size()
+            )
+            return all_candidate_source_nodes_indexes, all_candidate_source_nodes_probabilities
+
+        @classmethod
+        def filter_selected_edges_by_source_nodes_and_target_nodes(
+                cls, all_edges_with_self_loops: torch.Tensor,
+                selected_source_node_indexes: torch.LongTensor,
+                selected_target_node_indexes: torch.LongTensor
+        ) -> torch.Tensor:
+            """
+            :param all_edges_with_self_loops: all edges with self loops
+            :param selected_source_node_indexes: selected source node indexes
+            :param selected_target_node_indexes: selected target node indexes
+            :return: filtered edge indexes
+            """
+            selected_edges_mask_for_source_nodes: torch.Tensor = torch.zeros(
+                all_edges_with_self_loops.size(1), dtype=torch.bool
+            )
+            selected_edges_mask_for_source_nodes[
+                torch.cat([
+                    torch.where(all_edges_with_self_loops[0] == __current_selected_source_node_index)[0]
+                    for __current_selected_source_node_index in selected_source_node_indexes.unique().tolist()
+                ]).unique()
+            ] = True
+            selected_edges_mask_for_target_nodes: torch.Tensor = torch.zeros(
+                all_edges_with_self_loops.size(1), dtype=torch.bool
+            )
+            selected_edges_mask_for_target_nodes[
+                torch.cat([
+                    torch.where(all_edges_with_self_loops[1] == __current_selected_target_node_index)[0]
+                    for __current_selected_target_node_index in selected_target_node_indexes.unique().tolist()
+                ])
+            ] = True
+            return torch.where(
+                selected_edges_mask_for_source_nodes & selected_edges_mask_for_target_nodes
+            )[0]
+
+    def __init__(self, all_edge_index: torch.LongTensor):
+        self.__all_edge_index_with_self_loops: torch.LongTensor = \
+            torch_geometric.utils.add_remaining_self_loops(all_edge_index)[0]
+        self.__all_edge_weights: torch.Tensor = \
+            self._Utility.compute_edge_weights(self.__all_edge_index_with_self_loops)
+
+    def __sample_layer(
+            self, target_nodes_indexes: torch.LongTensor,
+            sampled_node_size_budget: int
+    ) -> _typing.Tuple[torch.Tensor, torch.Tensor, torch.LongTensor, torch.LongTensor]:
+        """
+        :param target_nodes_indexes:
+                node indexes for target nodes in the top layer or nodes sampled in upper layer
+        :param sampled_node_size_budget:
+        :return: (Tensor, Tensor, LongTensor, LongTensor)
+        """
+        all_candidate_edge_indexes: torch.LongTensor = torch.cat(
+            [
+                torch.where(self.__all_edge_index_with_self_loops[1] == current_target_node_index)[0]
+                for current_target_node_index in target_nodes_indexes.unique().tolist()
+            ]
+        ).unique()
+        __all_candidate_source_nodes_indexes, all_candidate_source_nodes_probabilities = \
+            self._Utility.get_candidate_source_nodes_probabilities(
+                all_candidate_edge_indexes,
+                self.__all_edge_index_with_self_loops,
+                self.__all_edge_weights
+            )
+        assert __all_candidate_source_nodes_indexes.size() == all_candidate_source_nodes_probabilities.size()
+
+        """ Sampling """
+        if sampled_node_size_budget < __all_candidate_source_nodes_indexes.numel():
+            selected_source_node_indexes: torch.LongTensor = __all_candidate_source_nodes_indexes[
+                torch.from_numpy(
+                    np.unique(np.random.choice(
+                        np.arange(__all_candidate_source_nodes_indexes.numel()), sampled_node_size_budget,
+                        p=all_candidate_source_nodes_probabilities.numpy()
+                    ))
+                ).unique()
+            ].unique()
+        else:
+            selected_source_node_indexes: torch.LongTensor = __all_candidate_source_nodes_indexes
+
+        __selected_edges_indexes: torch.LongTensor = (
+            self._Utility.filter_selected_edges_by_source_nodes_and_target_nodes(
+                self.__all_edge_index_with_self_loops,
+                selected_source_node_indexes, target_nodes_indexes
+            )
+        ).unique()
+
+        non_normalized_selected_edges_weight: torch.Tensor = (
+                self.__all_edge_weights[__selected_edges_indexes] / (
+                    selected_source_node_indexes.numel() * torch.tensor(
+                        [
+                            all_candidate_source_nodes_probabilities[
+                                __all_candidate_source_nodes_indexes == current_source_node_index
+                            ].item()
+                            for current_source_node_index
+                            in self.__all_edge_index_with_self_loops[0, __selected_edges_indexes].tolist()
+                        ]
+                    )
+                )
+        )
+
+        def __normalize_edges_weight_by_target_nodes(
+                __edge_index: torch.Tensor, __edge_weight: torch.Tensor
+        ) -> torch.Tensor:
+            if __edge_index.size(1) != __edge_weight.numel():
+                raise ValueError
+            for current_target_node_index in __edge_index[1].unique().tolist():
+                __current_mask_for_edges: torch.BoolTensor = (
+                        __edge_index[1] == current_target_node_index
+                )
+                __edge_weight[__current_mask_for_edges] = (
+                    __edge_weight[__current_mask_for_edges] / (
+                        torch.sum(__edge_weight[__current_mask_for_edges])
+                    )
+                )
+            return __edge_weight
+
+        normalized_selected_edges_weight: torch.Tensor = __normalize_edges_weight_by_target_nodes(
+            self.__all_edge_index_with_self_loops[:, __selected_edges_indexes],
+            non_normalized_selected_edges_weight
+        )
+        return (
+            self.__all_edge_index_with_self_loops[:, __selected_edges_indexes],
+            normalized_selected_edges_weight,
+            selected_source_node_indexes,
+            __selected_edges_indexes
+        )
+
+    def sample(
+            self, __top_layer_target_nodes_indexes: torch.LongTensor,
+            sampling_node_size_budgets: _typing.Sequence[int]
+    ) -> _typing.Sequence[_typing.Tuple[torch.Tensor, torch.Tensor]]:
+        """
+        :param __top_layer_target_nodes_indexes: indexes of target nodes for the top layer
+        :param sampling_node_size_budgets:
+        :return:
+        """
+        if type(__top_layer_target_nodes_indexes) != torch.Tensor:
+            raise TypeError
+        if not isinstance(sampling_node_size_budgets, _typing.Sequence):
+            raise TypeError
+        if len(sampling_node_size_budgets) == 0:
+            raise ValueError
+
+        layers: _typing.List[_typing.Tuple[torch.Tensor, torch.Tensor]] = []
+        upper_layer_sampled_node_indexes: torch.LongTensor = __top_layer_target_nodes_indexes
+        for current_sampled_node_size_budget in sampling_node_size_budgets[::-1]:
+            _sampling_result: _typing.Tuple[
+                torch.Tensor, torch.Tensor, torch.LongTensor, torch.LongTensor
+            ] = self.__sample_layer(upper_layer_sampled_node_indexes, current_sampled_node_size_budget)
+            current_layer_edge_index: torch.Tensor = _sampling_result[0]
+            current_layer_edge_weight: torch.Tensor = _sampling_result[1]
+            layers.append((current_layer_edge_index, current_layer_edge_weight))
+
+            upper_layer_sampled_node_indexes: torch.LongTensor = _sampling_result[2]
+
+        return layers[::-1]
diff --git a/configs/nodeclf_ladies_gcn.yml b/configs/nodeclf_ladies_gcn.yml
new file mode 100644
index 0000000..83c7e6f
--- /dev/null
+++ b/configs/nodeclf_ladies_gcn.yml
@@ -0,0 +1,65 @@
+ensemble:
+  name: null
+feature:
+- name: PYGNormalizeFeatures
+hpo:
+  max_evals: 10
+  name: random
+models:
+- hp_space:
+  - feasiblePoints:
+    - 0
+    parameterName: add_self_loops,
+    type: CATEGORICAL,
+  - feasiblePoints: 5,5
+    parameterName: num_layers
+    type: DISCRETE
+  - cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 4
+    maxValue: 256
+    minValue: 64
+    numericalType: INTEGER
+    parameterName: hidden
+    scalingType: LOG
+    type: NUMERICAL_LIST
+  - maxValue: 0.8
+    minValue: 0.2
+    parameterName: dropout
+    scalingType: LINEAR
+    type: DOUBLE
+  - feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    parameterName: act
+    type: CATEGORICAL
+  name: gcn
+trainer:
+  name: NodeClassificationLayerDependentImportanceSamplingTrainer
+  hp_space:
+  - feasiblePoints: 128,256,512
+    parameterName: sampled_node_size_budget
+    type: DISCRETE
+  - maxValue: 300
+    minValue: 100
+    parameterName: max_epoch
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 30
+    minValue: 10
+    parameterName: early_stopping_round
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 0.05
+    minValue: 0.01
+    parameterName: lr
+    scalingType: LOG
+    type: DOUBLE
+  - maxValue: 0.0005
+    minValue: 0.0001
+    parameterName: weight_decay
+    scalingType: LOG
+    type: DOUBLE
diff --git a/configs/nodeclf_sage_benchmark_large.yml b/configs/nodeclf_sage_benchmark_large.yml
index 8b7c2a5..2cdf556 100644
--- a/configs/nodeclf_sage_benchmark_large.yml
+++ b/configs/nodeclf_sage_benchmark_large.yml
@@ -29,10 +29,10 @@ models:
     parameterName: dropout
     scalingType: LINEAR
     type: DOUBLE
-  - feasiblePoints":
+  - feasiblePoints:
     - mean
-    parameterName: aggr,
-    type: CATEGORICAL,
+    parameterName: aggr
+    type: CATEGORICAL
   - feasiblePoints:
     - leaky_relu
     - relu
diff --git a/configs/nodeclf_sage_benchmark_small.yml b/configs/nodeclf_sage_benchmark_small.yml
index 2bd0ffe..9bd7aaa 100644
--- a/configs/nodeclf_sage_benchmark_small.yml
+++ b/configs/nodeclf_sage_benchmark_small.yml
@@ -29,12 +29,12 @@ models:
     parameterName: dropout
     scalingType: LINEAR
     type: DOUBLE
-  - feasiblePoints":
+  - feasiblePoints:
     - mean
     - add
     - max
-    parameterName: agg,
-    type: CATEGORICAL,
+    parameterName: aggr
+    type: CATEGORICAL
   - feasiblePoints:
     - leaky_relu
     - relu

From 25572e045e56d2bc1c99570e9b7e4d7ceba00575 Mon Sep 17 00:00:00 2001
From: wondergo2017 <wondergo2017@gmail.com>
Date: Thu, 6 May 2021 01:45:30 +0000
Subject: [PATCH 060/144] add act concat code

---
 autogl/module/nas/space/graph_nas.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/autogl/module/nas/space/graph_nas.py b/autogl/module/nas/space/graph_nas.py
index 5966303..447ccc9 100644
--- a/autogl/module/nas/space/graph_nas.py
+++ b/autogl/module/nas/space/graph_nas.py
@@ -156,6 +156,7 @@ class GraphNasNodeClassificationSpace(BaseSpace):
         output_dim: _typ.Optional[int] = None,
         ops: _typ.Tuple = None,
         init: bool = False,
+        search_act_con=False
     ):
         super().__init__()
         self.layer_number = layer_number
@@ -164,6 +165,7 @@ class GraphNasNodeClassificationSpace(BaseSpace):
         self.output_dim = output_dim
         self.ops = ops
         self.dropout = dropout
+        self.search_act_con=search_act_con
 
     def _instantiate(
         self,
@@ -187,8 +189,9 @@ class GraphNasNodeClassificationSpace(BaseSpace):
             node_labels.append(f"op_{layer}")
             setattr(self,f"in_{layer}",self.setInputChoice(layer,choose_from=node_labels[:-1], n_chosen=1, return_mask=False,key=f"in_{layer}"))
             setattr(self,f"op_{layer}",self.setLayerChoice(layer,[gnn_map(op,self.hidden_dim,self.hidden_dim)for op in gnn_list],key=f"op_{layer}"))
-        # setattr(self,f"act",self.setLayerChoice(2*layer,[act_map_nn(a)for a in act_list],key=f"act"))
-        # setattr(self,f"concat",self.setLayerChoice(2*layer+1,map_nn(["add", "product", "concat"]) ,key=f"concat"))
+        if self.search_act_con:
+            setattr(self,f"act",self.setLayerChoice(2*layer,[act_map_nn(a)for a in act_list],key=f"act"))
+            setattr(self,f"concat",self.setLayerChoice(2*layer+1,map_nn(["add", "product", "concat"]) ,key=f"concat"))
         self._initialized = True
 
     def forward(self, data):
@@ -199,10 +202,11 @@ class GraphNasNodeClassificationSpace(BaseSpace):
             node_in = getattr(self, f"in_{layer}")(prev_nodes_out)
             node_out= getattr(self, f"op_{layer}")(node_in,edges)
             prev_nodes_out.append(node_out)
-        x = torch.cat(prev_nodes_out[2:],dim=1)
-        x = F.leaky_relu(x)
-        # x = F.dropout(x, p=self.dropout, training = self.training)
-        if False:
+        if self.search_act_con:
+            x = torch.cat(prev_nodes_out[2:],dim=1)
+            x = F.leaky_relu(x)
+            x = F.dropout(x, p=self.dropout, training = self.training)
+        else:
             act=getattr(self, f"act")
             con=getattr(self, f"concat")()
             states=prev_nodes_out

From 3cd0367a236db9ec6db493fa2cf361ea1295947b Mon Sep 17 00:00:00 2001
From: wondergo2017 <wondergo2017@gmail.com>
Date: Thu, 6 May 2021 03:42:15 +0000
Subject: [PATCH 061/144] fix zeroconv bug

---
 autogl/module/nas/algorithm/enas.py    | 23 ++++++++++++++++++++---
 autogl/module/nas/space/graph_nas.py   | 20 +++++++++++++++++---
 autogl/module/nas/space/single_path.py |  2 ++
 examples/test_graph_nas.py             |  4 ++--
 4 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/autogl/module/nas/algorithm/enas.py b/autogl/module/nas/algorithm/enas.py
index 715139a..8b66f5e 100644
--- a/autogl/module/nas/algorithm/enas.py
+++ b/autogl/module/nas/algorithm/enas.py
@@ -10,6 +10,7 @@ from .base import BaseNAS
 from ..space import BaseSpace
 from ..utils import AverageMeterGroup, replace_layer_choice, replace_input_choice, get_module_order, sort_replaced_module
 from nni.nas.pytorch.fixed import apply_fixed_architecture
+from tqdm import tqdm
 _logger = logging.getLogger(__name__)
 def _get_mask(sampled, total):
     multihot = [i == sampled or (isinstance(sampled, list) and i in sampled) for i in range(total)]
@@ -312,9 +313,21 @@ class Enas(BaseNAS):
         self.controller = ReinforceController(self.nas_fields, **(self.ctrl_kwargs or {}))
         self.ctrl_optim = torch.optim.Adam(self.controller.parameters(), lr=self.ctrl_lr)
         # train
-        for i in range(self.num_epochs):
-            self._train_model(i)
-            self._train_controller(i)
+        with tqdm(range(self.num_epochs)) as bar:
+            for i in bar:
+                try:
+                    l1=self._train_model(i)
+                    l2=self._train_controller(i)
+                except Exception as e:
+                    print(e)
+                    nm=self.nas_modules
+                    for i in range(len(nm)):
+                        print(nm[i][1].sampled)
+                    import pdb
+                    pdb.set_trace()
+                    
+
+                bar.set_postfix(loss_model=l1,reward_controller=l2)
         
         selection=self.export()
         return space.export(selection,self.device)
@@ -329,16 +342,19 @@ class Enas(BaseNAS):
         if self.grad_clip > 0:
             nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_clip)
         self.model_optim.step()
+        return loss.item()
 
     def _train_controller(self, epoch):
         self.model.eval()
         self.controller.train()
         self.ctrl_optim.zero_grad()
+        rewards=[]
         for ctrl_step in range(self.ctrl_steps_aggregate):
             self._resample()
             with torch.no_grad():
                 metric,loss=self._infer()
             reward =-metric  # todo : now metric is loss 
+            rewards.append(reward)
             if self.entropy_weight:
                 reward += self.entropy_weight * self.controller.sample_entropy.item()
             self.baseline = self.baseline * self.baseline_decay + reward * (1 - self.baseline_decay)
@@ -357,6 +373,7 @@ class Enas(BaseNAS):
             if self.log_frequency is not None and ctrl_step % self.log_frequency == 0:
                 _logger.info('RL Epoch [%d/%d] Step [%d/%d]  %s', epoch + 1, self.num_epochs,
                                 ctrl_step + 1, self.ctrl_steps_aggregate)
+        return (sum(rewards)/len(rewards)).item()
 
     def _resample(self):
         result = self.controller.resample()
diff --git a/autogl/module/nas/space/graph_nas.py b/autogl/module/nas/space/graph_nas.py
index 447ccc9..9e02169 100644
--- a/autogl/module/nas/space/graph_nas.py
+++ b/autogl/module/nas/space/graph_nas.py
@@ -43,6 +43,9 @@ class LambdaModule(nn.Module):
 
     def forward(self, x):
         return self.lambd(x)
+    
+    def __repr__(self):
+        return '{}({})'.format(self.__class__.__name__,self.lambd)
 class StrModule(nn.Module):
     def __init__(self, lambd):
         super().__init__()
@@ -50,6 +53,9 @@ class StrModule(nn.Module):
 
     def forward(self, *args,**kwargs):
         return self.str  
+
+    def __repr__(self):
+        return '{}({})'.format(self.__class__.__name__,self.str)
 def act_map(act):
     if act == "linear":
         return lambda x: x
@@ -128,6 +134,15 @@ class LinearConv(nn.Module):
                                    self.out_channels)
 
 
+from torch.autograd import Function
+class ZeroConvFunc(Function):
+    @staticmethod
+    def forward(ctx,x):
+        return x
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        return grad_output
 class ZeroConv(nn.Module):
     def __init__(self,
                  in_channels,
@@ -138,9 +153,8 @@ class ZeroConv(nn.Module):
         self.out_channels = out_channels
         self.out_dim = out_channels
 
-
     def forward(self, x, edge_index, edge_weight=None):
-        return torch.zeros([x.size(0), self.out_dim]).to(x.device)
+        return ZeroConvFunc.apply(torch.zeros([x.size(0), self.out_dim]).to(x.device))
 
     def __repr__(self):
         return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
@@ -202,7 +216,7 @@ class GraphNasNodeClassificationSpace(BaseSpace):
             node_in = getattr(self, f"in_{layer}")(prev_nodes_out)
             node_out= getattr(self, f"op_{layer}")(node_in,edges)
             prev_nodes_out.append(node_out)
-        if self.search_act_con:
+        if not self.search_act_con:
             x = torch.cat(prev_nodes_out[2:],dim=1)
             x = F.leaky_relu(x)
             x = F.dropout(x, p=self.dropout, training = self.training)
diff --git a/autogl/module/nas/space/single_path.py b/autogl/module/nas/space/single_path.py
index fcbffa2..75c9746 100644
--- a/autogl/module/nas/space/single_path.py
+++ b/autogl/module/nas/space/single_path.py
@@ -27,6 +27,8 @@ class FixedNodeClassificationModel(BaseModel):
         apply_fixed_architecture(self._model, selection, verbose=False)
         self.params = {"num_class": self.num_classes, "features_num": self.num_features}
         self.device = device
+        print(self._model)
+        print(selection)
 
     def to(self, device):
         if isinstance(device, (str, torch.device)):
diff --git a/examples/test_graph_nas.py b/examples/test_graph_nas.py
index d1e297c..49207d8 100644
--- a/examples/test_graph_nas.py
+++ b/examples/test_graph_nas.py
@@ -28,9 +28,9 @@ if __name__ == '__main__':
             feval=['acc'],
             loss="nll_loss",
             lr_scheduler_type=None,),
-        nas_algorithms=[Enas(num_epochs=10)],
+        nas_algorithms=[Enas(num_epochs=100)],
         #nas_algorithms=[Darts(num_epochs=200)],
-        nas_spaces=[GraphNasNodeClassificationSpace(hidden_dim=16, ops=[GCNConv, GCNConv],search_act_con=True)],
+        nas_spaces=[GraphNasNodeClassificationSpace(hidden_dim=16,search_act_con=False)],
         nas_estimators=[OneShotEstimator()]
     )
     solver.fit(dataset)

From 0dad422b67f01c18234b87e991c5ffb808616a0b Mon Sep 17 00:00:00 2001
From: wondergo2017 <wondergo2017@gmail.com>
Date: Thu, 6 May 2021 03:48:02 +0000
Subject: [PATCH 062/144] fix zeroconv

---
 autogl/module/nas/space/graph_nas.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autogl/module/nas/space/graph_nas.py b/autogl/module/nas/space/graph_nas.py
index 9e02169..95c290f 100644
--- a/autogl/module/nas/space/graph_nas.py
+++ b/autogl/module/nas/space/graph_nas.py
@@ -142,7 +142,7 @@ class ZeroConvFunc(Function):
 
     @staticmethod
     def backward(ctx, grad_output):
-        return grad_output
+        return 0
 class ZeroConv(nn.Module):
     def __init__(self,
                  in_channels,

From a0bee2ca3525de784666b68292dad22b163f3453 Mon Sep 17 00:00:00 2001
From: wondergo2017 <wondergo2017@gmail.com>
Date: Fri, 7 May 2021 06:15:55 +0000
Subject: [PATCH 063/144] fix classifier; fix zero conv

---
 autogl/module/nas/estimator/one_shot.py |  5 +++--
 autogl/module/nas/space/graph_nas.py    | 14 +++++++++++++-
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/autogl/module/nas/estimator/one_shot.py b/autogl/module/nas/estimator/one_shot.py
index e887138..e43695c 100644
--- a/autogl/module/nas/estimator/one_shot.py
+++ b/autogl/module/nas/estimator/one_shot.py
@@ -3,7 +3,7 @@ import torch.nn.functional as F
 
 from ..space import BaseSpace
 from .base import BaseEstimator
-
+import torch
 
 class OneShotEstimator(BaseEstimator):
     """
@@ -18,4 +18,5 @@ class OneShotEstimator(BaseEstimator):
         pred = model(dset)[getattr(dset, f"{mask}_mask")]
         y = dset.y[getattr(dset, f'{mask}_mask')]
         loss = F.nll_loss(pred, y)
-        return loss, loss
+        acc=sum(pred.max(1)[1]==y).item()/y.size(0)
+        return acc, loss
diff --git a/autogl/module/nas/space/graph_nas.py b/autogl/module/nas/space/graph_nas.py
index 95c290f..8e392d0 100644
--- a/autogl/module/nas/space/graph_nas.py
+++ b/autogl/module/nas/space/graph_nas.py
@@ -114,7 +114,11 @@ def gnn_map(gnn_name, in_dim, out_dim, concat=False, bias=True) -> nn.Module:
     elif gnn_name == "linear":
         return LinearConv(in_dim, out_dim, bias=bias)
     elif gnn_name == "zero":
-        return ZeroConv(in_dim, out_dim, bias=bias)
+        # return ZeroConv(in_dim, out_dim, bias=bias)
+        return Identity()
+class Identity(nn.Module):
+    def forward(self, x, edge_index, edge_weight=None):
+        return x
 class LinearConv(nn.Module):
     def __init__(self,
                  in_channels,
@@ -207,6 +211,8 @@ class GraphNasNodeClassificationSpace(BaseSpace):
             setattr(self,f"act",self.setLayerChoice(2*layer,[act_map_nn(a)for a in act_list],key=f"act"))
             setattr(self,f"concat",self.setLayerChoice(2*layer+1,map_nn(["add", "product", "concat"]) ,key=f"concat"))
         self._initialized = True
+        self.classifier1 = nn.Linear(self.hidden_dim*self.layer_number, self.output_dim)
+        self.classifier2 = nn.Linear(self.hidden_dim, self.output_dim)
 
     def forward(self, data):
         x, edges = data.x, data.edge_index # x [2708,1433] ,[2, 10556]
@@ -220,6 +226,7 @@ class GraphNasNodeClassificationSpace(BaseSpace):
             x = torch.cat(prev_nodes_out[2:],dim=1)
             x = F.leaky_relu(x)
             x = F.dropout(x, p=self.dropout, training = self.training)
+            x = self.classifier1(x)
         else:
             act=getattr(self, f"act")
             con=getattr(self, f"concat")()
@@ -236,6 +243,11 @@ class GraphNasNodeClassificationSpace(BaseSpace):
                 x=tmp
             x = act(x)
             x = F.dropout(x, p=self.dropout, training = self.training)
+            if con=='concat':
+                x=self.classifier1(x)
+            else:
+                x=self.classifier2(x)
+        # set_trace()
         return F.log_softmax(x, dim=1)
 
     def export(self, selection, device) -> BaseModel:

From 71c60ac0bfb5d8f0648b74a4d0b32b603687a9cc Mon Sep 17 00:00:00 2001
From: wondergo2017 <wondergo2017@gmail.com>
Date: Fri, 7 May 2021 06:52:48 +0000
Subject: [PATCH 064/144] fix enas

---
 autogl/module/nas/algorithm/enas.py  | 39 ++++++++++++++++++----------
 autogl/module/nas/space/graph_nas.py |  1 -
 examples/test_graph_nas.py           |  4 +--
 3 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/autogl/module/nas/algorithm/enas.py b/autogl/module/nas/algorithm/enas.py
index 8b66f5e..2760d24 100644
--- a/autogl/module/nas/algorithm/enas.py
+++ b/autogl/module/nas/algorithm/enas.py
@@ -273,7 +273,7 @@ class Enas(BaseNAS):
 
     def __init__(self, device='cuda', workers=4,log_frequency=None,
                  grad_clip=5., entropy_weight=0.0001, skip_weight=0.8, baseline_decay=0.999,
-                 ctrl_lr=0.00035, ctrl_steps_aggregate=20, ctrl_kwargs=None,*args,**kwargs):
+                 ctrl_lr=0.00035, ctrl_steps_aggregate=20, ctrl_kwargs=None,n_warmup=100,model_lr=5e-3,model_wd=5e-4,*args,**kwargs):
         super().__init__(device)
         self.device=device
         self.num_epochs = kwargs.get("num_epochs", 5)
@@ -288,14 +288,13 @@ class Enas(BaseNAS):
         self.workers = workers
         self.ctrl_kwargs=ctrl_kwargs
         self.ctrl_lr=ctrl_lr
-
+        self.n_warmup=n_warmup
+        self.model_lr = model_lr
+        self.model_wd = model_wd
     def search(self, space: BaseSpace, dset, estimator):
         self.model = space
         self.dataset = dset#.to(self.device)
-        self.estimator = estimator
-        self.model_optim = torch.optim.SGD(
-            self.model.parameters(), lr=0.01, weight_decay=3e-4
-        )
+        self.estimator = estimator    
         # replace choice
         self.nas_modules = []
 
@@ -306,12 +305,24 @@ class Enas(BaseNAS):
 
         # to device
         self.model = self.model.to(self.device)
+        self.model_optim = torch.optim.Adam(
+            self.model.parameters(), lr=self.model_lr, weight_decay=self.model_wd
+        )
         # fields
         self.nas_fields = [ReinforceField(name, len(module),
                                           isinstance(module, PathSamplingLayerChoice) or module.n_chosen == 1)
                            for name, module in self.nas_modules]
         self.controller = ReinforceController(self.nas_fields, **(self.ctrl_kwargs or {}))
         self.ctrl_optim = torch.optim.Adam(self.controller.parameters(), lr=self.ctrl_lr)
+        self._resample()
+        
+        # warm up supernet
+        with tqdm(range(self.n_warmup)) as bar:
+            for i in bar:
+                acc,l1=self._train_model(i)
+                with torch.no_grad():
+                    val_acc,val_loss=self._infer('val')
+                bar.set_postfix(loss=l1,acc=acc,val_acc=val_acc,val_loss=val_loss)
         # train
         with tqdm(range(self.num_epochs)) as bar:
             for i in bar:
@@ -330,8 +341,9 @@ class Enas(BaseNAS):
                 bar.set_postfix(loss_model=l1,reward_controller=l2)
         
         selection=self.export()
+        print(selection)
         return space.export(selection,self.device)
-
+    
     def _train_model(self, epoch): 
         self.model.train()
         self.controller.eval()
@@ -342,7 +354,8 @@ class Enas(BaseNAS):
         if self.grad_clip > 0:
             nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_clip)
         self.model_optim.step()
-        return loss.item()
+
+        return metric,loss.item()
 
     def _train_controller(self, epoch):
         self.model.eval()
@@ -352,8 +365,8 @@ class Enas(BaseNAS):
         for ctrl_step in range(self.ctrl_steps_aggregate):
             self._resample()
             with torch.no_grad():
-                metric,loss=self._infer()
-            reward =-metric  # todo : now metric is loss 
+                metric,loss=self._infer(mask='val')
+            reward =metric 
             rewards.append(reward)
             if self.entropy_weight:
                 reward += self.entropy_weight * self.controller.sample_entropy.item()
@@ -373,7 +386,7 @@ class Enas(BaseNAS):
             if self.log_frequency is not None and ctrl_step % self.log_frequency == 0:
                 _logger.info('RL Epoch [%d/%d] Step [%d/%d]  %s', epoch + 1, self.num_epochs,
                                 ctrl_step + 1, self.ctrl_steps_aggregate)
-        return (sum(rewards)/len(rewards)).item()
+        return sum(rewards)/len(rewards)
 
     def _resample(self):
         result = self.controller.resample()
@@ -385,6 +398,6 @@ class Enas(BaseNAS):
         with torch.no_grad():
             return self.controller.resample()
 
-    def _infer(self):
-        metric, loss = self.estimator.infer(self.model, self.dataset)
+    def _infer(self,mask='train'):
+        metric, loss = self.estimator.infer(self.model, self.dataset,mask=mask)
         return metric, loss
diff --git a/autogl/module/nas/space/graph_nas.py b/autogl/module/nas/space/graph_nas.py
index 8e392d0..5d44591 100644
--- a/autogl/module/nas/space/graph_nas.py
+++ b/autogl/module/nas/space/graph_nas.py
@@ -247,7 +247,6 @@ class GraphNasNodeClassificationSpace(BaseSpace):
                 x=self.classifier1(x)
             else:
                 x=self.classifier2(x)
-        # set_trace()
         return F.log_softmax(x, dim=1)
 
     def export(self, selection, device) -> BaseModel:
diff --git a/examples/test_graph_nas.py b/examples/test_graph_nas.py
index 49207d8..4f6740f 100644
--- a/examples/test_graph_nas.py
+++ b/examples/test_graph_nas.py
@@ -28,9 +28,9 @@ if __name__ == '__main__':
             feval=['acc'],
             loss="nll_loss",
             lr_scheduler_type=None,),
-        nas_algorithms=[Enas(num_epochs=100)],
+        nas_algorithms=[Enas(num_epochs=400,n_warmup=250)],
         #nas_algorithms=[Darts(num_epochs=200)],
-        nas_spaces=[GraphNasNodeClassificationSpace(hidden_dim=16,search_act_con=False)],
+        nas_spaces=[GraphNasNodeClassificationSpace(hidden_dim=32,search_act_con=False,layer_number=2)],
         nas_estimators=[OneShotEstimator()]
     )
     solver.fit(dataset)

From c13df920e3245dfb2526500070ae8926bc01666e Mon Sep 17 00:00:00 2001
From: wondergo2017 <wondergo2017@gmail.com>
Date: Fri, 7 May 2021 08:34:53 +0000
Subject: [PATCH 065/144] add pure rl for graphnas

---
 autogl/module/nas/algorithm/enas.py     | 216 +------------
 autogl/module/nas/algorithm/rl.py       | 383 ++++++++++++++++++++++++
 autogl/module/nas/estimator/one_shot.py |  23 ++
 autogl/module/nas/space/single_path.py  |   2 -
 examples/test_graph_nas_rl.py           |  42 +++
 5 files changed, 450 insertions(+), 216 deletions(-)
 create mode 100644 autogl/module/nas/algorithm/rl.py
 create mode 100644 examples/test_graph_nas_rl.py

diff --git a/autogl/module/nas/algorithm/enas.py b/autogl/module/nas/algorithm/enas.py
index 2760d24..f9962fb 100644
--- a/autogl/module/nas/algorithm/enas.py
+++ b/autogl/module/nas/algorithm/enas.py
@@ -12,218 +12,7 @@ from ..utils import AverageMeterGroup, replace_layer_choice, replace_input_choic
 from nni.nas.pytorch.fixed import apply_fixed_architecture
 from tqdm import tqdm
 _logger = logging.getLogger(__name__)
-def _get_mask(sampled, total):
-    multihot = [i == sampled or (isinstance(sampled, list) and i in sampled) for i in range(total)]
-    return torch.tensor(multihot, dtype=torch.bool)  # pylint: disable=not-callable
-
-class PathSamplingLayerChoice(nn.Module):
-    """
-    Mixed module, in which fprop is decided by exactly one or multiple (sampled) module.
-    If multiple module is selected, the result will be sumed and returned.
-
-    Attributes
-    ----------
-    sampled : int or list of int
-        Sampled module indices.
-    mask : tensor
-        A multi-hot bool 1D-tensor representing the sampled mask.
-    """
-
-    def __init__(self, layer_choice):
-        super(PathSamplingLayerChoice, self).__init__()
-        self.op_names = []
-        for name, module in layer_choice.named_children():
-            self.add_module(name, module)
-            self.op_names.append(name)
-        assert self.op_names, 'There has to be at least one op to choose from.'
-        self.sampled = None  # sampled can be either a list of indices or an index
-
-    def forward(self, *args, **kwargs):
-        assert self.sampled is not None, 'At least one path needs to be sampled before fprop.'
-        if isinstance(self.sampled, list):
-            return sum([getattr(self, self.op_names[i])(*args, **kwargs) for i in self.sampled])  # pylint: disable=not-an-iterable
-        else:
-            return getattr(self, self.op_names[self.sampled])(*args, **kwargs)  # pylint: disable=invalid-sequence-index
-
-    def __len__(self):
-        return len(self.op_names)
-
-    @property
-    def mask(self):
-        return _get_mask(self.sampled, len(self))
-
-
-class PathSamplingInputChoice(nn.Module):
-    """
-    Mixed input. Take a list of tensor as input, select some of them and return the sum.
-
-    Attributes
-    ----------
-    sampled : int or list of int
-        Sampled module indices.
-    mask : tensor
-        A multi-hot bool 1D-tensor representing the sampled mask.
-    """
-
-    def __init__(self, input_choice):
-        super(PathSamplingInputChoice, self).__init__()
-        self.n_candidates = input_choice.n_candidates
-        self.n_chosen = input_choice.n_chosen
-        self.sampled = None
-
-    def forward(self, input_tensors):
-        if isinstance(self.sampled, list):
-            return sum([input_tensors[t] for t in self.sampled])  # pylint: disable=not-an-iterable
-        else:
-            return input_tensors[self.sampled]
-
-    def __len__(self):
-        return self.n_candidates
-
-    @property
-    def mask(self):
-        return _get_mask(self.sampled, len(self))
-
-
-class StackedLSTMCell(nn.Module):
-    def __init__(self, layers, size, bias):
-        super().__init__()
-        self.lstm_num_layers = layers
-        self.lstm_modules = nn.ModuleList([nn.LSTMCell(size, size, bias=bias)
-                                           for _ in range(self.lstm_num_layers)])
-
-    def forward(self, inputs, hidden):
-        prev_h, prev_c = hidden
-        next_h, next_c = [], []
-        for i, m in enumerate(self.lstm_modules):
-            curr_h, curr_c = m(inputs, (prev_h[i], prev_c[i]))
-            next_c.append(curr_c)
-            next_h.append(curr_h)
-            # current implementation only supports batch size equals 1,
-            # but the algorithm does not necessarily have this limitation
-            inputs = curr_h[-1].view(1, -1)
-        return next_h, next_c
-
-
-class ReinforceField:
-    """
-    A field with ``name``, with ``total`` choices. ``choose_one`` is true if one and only one is meant to be
-    selected. Otherwise, any number of choices can be chosen.
-    """
-
-    def __init__(self, name, total, choose_one):
-        self.name = name
-        self.total = total
-        self.choose_one = choose_one
-
-    def __repr__(self):
-        return f'ReinforceField(name={self.name}, total={self.total}, choose_one={self.choose_one})'
-
-
-class ReinforceController(nn.Module):
-    """
-    A controller that mutates the graph with RL.
-
-    Parameters
-    ----------
-    fields : list of ReinforceField
-        List of fields to choose.
-    lstm_size : int
-        Controller LSTM hidden units.
-    lstm_num_layers : int
-        Number of layers for stacked LSTM.
-    tanh_constant : float
-        Logits will be equal to ``tanh_constant * tanh(logits)``. Don't use ``tanh`` if this value is ``None``.
-    skip_target : float
-        Target probability that skipconnect will appear.
-    temperature : float
-        Temperature constant that divides the logits.
-    entropy_reduction : str
-        Can be one of ``sum`` and ``mean``. How the entropy of multi-input-choice is reduced.
-    """
-
-    def __init__(self, fields, lstm_size=64, lstm_num_layers=1, tanh_constant=1.5,
-                 skip_target=0.4, temperature=None, entropy_reduction='sum'):
-        super(ReinforceController, self).__init__()
-        self.fields = fields
-        self.lstm_size = lstm_size
-        self.lstm_num_layers = lstm_num_layers
-        self.tanh_constant = tanh_constant
-        self.temperature = temperature
-        self.skip_target = skip_target
-
-        self.lstm = StackedLSTMCell(self.lstm_num_layers, self.lstm_size, False)
-        self.attn_anchor = nn.Linear(self.lstm_size, self.lstm_size, bias=False)
-        self.attn_query = nn.Linear(self.lstm_size, self.lstm_size, bias=False)
-        self.v_attn = nn.Linear(self.lstm_size, 1, bias=False)
-        self.g_emb = nn.Parameter(torch.randn(1, self.lstm_size) * 0.1)
-        self.skip_targets = nn.Parameter(torch.tensor([1.0 - self.skip_target, self.skip_target]),  # pylint: disable=not-callable
-                                         requires_grad=False)
-        assert entropy_reduction in ['sum', 'mean'], 'Entropy reduction must be one of sum and mean.'
-        self.entropy_reduction = torch.sum if entropy_reduction == 'sum' else torch.mean
-        self.cross_entropy_loss = nn.CrossEntropyLoss(reduction='none')
-        self.soft = nn.ModuleDict({
-            field.name: nn.Linear(self.lstm_size, field.total, bias=False) for field in fields
-        })
-        self.embedding = nn.ModuleDict({
-            field.name: nn.Embedding(field.total, self.lstm_size) for field in fields
-        })
-
-    def resample(self):
-        self._initialize()
-        result = dict()
-        for field in self.fields:
-            result[field.name] = self._sample_single(field)
-        return result
-
-    def _initialize(self):
-        self._inputs = self.g_emb.data
-        self._c = [torch.zeros((1, self.lstm_size),
-                               dtype=self._inputs.dtype,
-                               device=self._inputs.device) for _ in range(self.lstm_num_layers)]
-        self._h = [torch.zeros((1, self.lstm_size),
-                               dtype=self._inputs.dtype,
-                               device=self._inputs.device) for _ in range(self.lstm_num_layers)]
-        self.sample_log_prob = 0
-        self.sample_entropy = 0
-        self.sample_skip_penalty = 0
-
-    def _lstm_next_step(self):
-        self._h, self._c = self.lstm(self._inputs, (self._h, self._c))
-
-    def _sample_single(self, field):
-        self._lstm_next_step()
-        logit = self.soft[field.name](self._h[-1])
-        if self.temperature is not None:
-            logit /= self.temperature
-        if self.tanh_constant is not None:
-            logit = self.tanh_constant * torch.tanh(logit)
-        if field.choose_one:
-            sampled = torch.multinomial(F.softmax(logit, dim=-1), 1).view(-1)
-            log_prob = self.cross_entropy_loss(logit, sampled)
-            self._inputs = self.embedding[field.name](sampled)
-        else:
-            logit = logit.view(-1, 1)
-            logit = torch.cat([-logit, logit], 1)  # pylint: disable=invalid-unary-operand-type
-            sampled = torch.multinomial(F.softmax(logit, dim=-1), 1).view(-1)
-            skip_prob = torch.sigmoid(logit)
-            kl = torch.sum(skip_prob * torch.log(skip_prob / self.skip_targets))
-            self.sample_skip_penalty += kl
-            log_prob = self.cross_entropy_loss(logit, sampled)
-            sampled = sampled.nonzero().view(-1)
-            if sampled.sum().item():
-                self._inputs = (torch.sum(self.embedding[field.name](sampled.view(-1)), 0) / (1. + torch.sum(sampled))).unsqueeze(0)
-            else:
-                self._inputs = torch.zeros(1, self.lstm_size, device=self.embedding[field.name].weight.device)
-
-        sampled = sampled.detach().numpy().tolist()
-        self.sample_log_prob += self.entropy_reduction(log_prob)
-        entropy = (log_prob * torch.exp(-log_prob)).detach()  # pylint: disable=invalid-unary-operand-type
-        self.sample_entropy += self.entropy_reduction(entropy)
-        if len(sampled) == 1:
-            sampled = sampled[0]
-        return sampled
-
+from .rl import PathSamplingLayerChoice,PathSamplingInputChoice,ReinforceField,ReinforceController
 
 class Enas(BaseNAS):
     """
@@ -314,8 +103,7 @@ class Enas(BaseNAS):
                            for name, module in self.nas_modules]
         self.controller = ReinforceController(self.nas_fields, **(self.ctrl_kwargs or {}))
         self.ctrl_optim = torch.optim.Adam(self.controller.parameters(), lr=self.ctrl_lr)
-        self._resample()
-        
+
         # warm up supernet
         with tqdm(range(self.n_warmup)) as bar:
             for i in bar:
diff --git a/autogl/module/nas/algorithm/rl.py b/autogl/module/nas/algorithm/rl.py
new file mode 100644
index 0000000..18f92ad
--- /dev/null
+++ b/autogl/module/nas/algorithm/rl.py
@@ -0,0 +1,383 @@
+# codes in this file are reproduced from https://github.com/microsoft/nni with some changes.
+import copy
+import logging
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .base import BaseNAS
+from ..space import BaseSpace
+from ..utils import AverageMeterGroup, replace_layer_choice, replace_input_choice, get_module_order, sort_replaced_module
+from nni.nas.pytorch.fixed import apply_fixed_architecture
+from tqdm import tqdm
+_logger = logging.getLogger(__name__)
+def _get_mask(sampled, total):
+    multihot = [i == sampled or (isinstance(sampled, list) and i in sampled) for i in range(total)]
+    return torch.tensor(multihot, dtype=torch.bool)  # pylint: disable=not-callable
+
+class PathSamplingLayerChoice(nn.Module):
+    """
+    Mixed module, in which fprop is decided by exactly one or multiple (sampled) module.
+    If multiple module is selected, the result will be sumed and returned.
+
+    Attributes
+    ----------
+    sampled : int or list of int
+        Sampled module indices.
+    mask : tensor
+        A multi-hot bool 1D-tensor representing the sampled mask.
+    """
+
+    def __init__(self, layer_choice):
+        super(PathSamplingLayerChoice, self).__init__()
+        self.op_names = []
+        for name, module in layer_choice.named_children():
+            self.add_module(name, module)
+            self.op_names.append(name)
+        assert self.op_names, 'There has to be at least one op to choose from.'
+        self.sampled = None  # sampled can be either a list of indices or an index
+
+    def forward(self, *args, **kwargs):
+        assert self.sampled is not None, 'At least one path needs to be sampled before fprop.'
+        if isinstance(self.sampled, list):
+            return sum([getattr(self, self.op_names[i])(*args, **kwargs) for i in self.sampled])  # pylint: disable=not-an-iterable
+        else:
+            return getattr(self, self.op_names[self.sampled])(*args, **kwargs)  # pylint: disable=invalid-sequence-index
+
+    def __len__(self):
+        return len(self.op_names)
+
+    @property
+    def mask(self):
+        return _get_mask(self.sampled, len(self))
+
+
+class PathSamplingInputChoice(nn.Module):
+    """
+    Mixed input. Take a list of tensor as input, select some of them and return the sum.
+
+    Attributes
+    ----------
+    sampled : int or list of int
+        Sampled module indices.
+    mask : tensor
+        A multi-hot bool 1D-tensor representing the sampled mask.
+    """
+
+    def __init__(self, input_choice):
+        super(PathSamplingInputChoice, self).__init__()
+        self.n_candidates = input_choice.n_candidates
+        self.n_chosen = input_choice.n_chosen
+        self.sampled = None
+
+    def forward(self, input_tensors):
+        if isinstance(self.sampled, list):
+            return sum([input_tensors[t] for t in self.sampled])  # pylint: disable=not-an-iterable
+        else:
+            return input_tensors[self.sampled]
+
+    def __len__(self):
+        return self.n_candidates
+
+    @property
+    def mask(self):
+        return _get_mask(self.sampled, len(self))
+
+    def __repr__(self):
+        return f'PathSamplingInputChoice(n_candidates={self.n_candidates}, chosen={self.sampled})'
+
+class StackedLSTMCell(nn.Module):
+    def __init__(self, layers, size, bias):
+        super().__init__()
+        self.lstm_num_layers = layers
+        self.lstm_modules = nn.ModuleList([nn.LSTMCell(size, size, bias=bias)
+                                           for _ in range(self.lstm_num_layers)])
+
+    def forward(self, inputs, hidden):
+        prev_h, prev_c = hidden
+        next_h, next_c = [], []
+        for i, m in enumerate(self.lstm_modules):
+            curr_h, curr_c = m(inputs, (prev_h[i], prev_c[i]))
+            next_c.append(curr_c)
+            next_h.append(curr_h)
+            # current implementation only supports batch size equals 1,
+            # but the algorithm does not necessarily have this limitation
+            inputs = curr_h[-1].view(1, -1)
+        return next_h, next_c
+
+
+class ReinforceField:
+    """
+    A field with ``name``, with ``total`` choices. ``choose_one`` is true if one and only one is meant to be
+    selected. Otherwise, any number of choices can be chosen.
+    """
+
+    def __init__(self, name, total, choose_one):
+        self.name = name
+        self.total = total
+        self.choose_one = choose_one
+
+    def __repr__(self):
+        return f'ReinforceField(name={self.name}, total={self.total}, choose_one={self.choose_one})'
+
+
+class ReinforceController(nn.Module):
+    """
+    A controller that mutates the graph with RL.
+
+    Parameters
+    ----------
+    fields : list of ReinforceField
+        List of fields to choose.
+    lstm_size : int
+        Controller LSTM hidden units.
+    lstm_num_layers : int
+        Number of layers for stacked LSTM.
+    tanh_constant : float
+        Logits will be equal to ``tanh_constant * tanh(logits)``. Don't use ``tanh`` if this value is ``None``.
+    skip_target : float
+        Target probability that skipconnect will appear.
+    temperature : float
+        Temperature constant that divides the logits.
+    entropy_reduction : str
+        Can be one of ``sum`` and ``mean``. How the entropy of multi-input-choice is reduced.
+    """
+
+    def __init__(self, fields, lstm_size=64, lstm_num_layers=1, tanh_constant=1.5,
+                 skip_target=0.4, temperature=None, entropy_reduction='sum'):
+        super(ReinforceController, self).__init__()
+        self.fields = fields
+        self.lstm_size = lstm_size
+        self.lstm_num_layers = lstm_num_layers
+        self.tanh_constant = tanh_constant
+        self.temperature = temperature
+        self.skip_target = skip_target
+
+        self.lstm = StackedLSTMCell(self.lstm_num_layers, self.lstm_size, False)
+        self.attn_anchor = nn.Linear(self.lstm_size, self.lstm_size, bias=False)
+        self.attn_query = nn.Linear(self.lstm_size, self.lstm_size, bias=False)
+        self.v_attn = nn.Linear(self.lstm_size, 1, bias=False)
+        self.g_emb = nn.Parameter(torch.randn(1, self.lstm_size) * 0.1)
+        self.skip_targets = nn.Parameter(torch.tensor([1.0 - self.skip_target, self.skip_target]),  # pylint: disable=not-callable
+                                         requires_grad=False)
+        assert entropy_reduction in ['sum', 'mean'], 'Entropy reduction must be one of sum and mean.'
+        self.entropy_reduction = torch.sum if entropy_reduction == 'sum' else torch.mean
+        self.cross_entropy_loss = nn.CrossEntropyLoss(reduction='none')
+        self.soft = nn.ModuleDict({
+            field.name: nn.Linear(self.lstm_size, field.total, bias=False) for field in fields
+        })
+        self.embedding = nn.ModuleDict({
+            field.name: nn.Embedding(field.total, self.lstm_size) for field in fields
+        })
+
+    def resample(self):
+        self._initialize()
+        result = dict()
+        for field in self.fields:
+            result[field.name] = self._sample_single(field)
+        return result
+
+    def _initialize(self):
+        self._inputs = self.g_emb.data
+        self._c = [torch.zeros((1, self.lstm_size),
+                               dtype=self._inputs.dtype,
+                               device=self._inputs.device) for _ in range(self.lstm_num_layers)]
+        self._h = [torch.zeros((1, self.lstm_size),
+                               dtype=self._inputs.dtype,
+                               device=self._inputs.device) for _ in range(self.lstm_num_layers)]
+        self.sample_log_prob = 0
+        self.sample_entropy = 0
+        self.sample_skip_penalty = 0
+
+    def _lstm_next_step(self):
+        self._h, self._c = self.lstm(self._inputs, (self._h, self._c))
+
+    def _sample_single(self, field):
+        self._lstm_next_step()
+        logit = self.soft[field.name](self._h[-1])
+        if self.temperature is not None:
+            logit /= self.temperature
+        if self.tanh_constant is not None:
+            logit = self.tanh_constant * torch.tanh(logit)
+        if field.choose_one:
+            sampled = torch.multinomial(F.softmax(logit, dim=-1), 1).view(-1)
+            log_prob = self.cross_entropy_loss(logit, sampled)
+            self._inputs = self.embedding[field.name](sampled)
+        else:
+            logit = logit.view(-1, 1)
+            logit = torch.cat([-logit, logit], 1)  # pylint: disable=invalid-unary-operand-type
+            sampled = torch.multinomial(F.softmax(logit, dim=-1), 1).view(-1)
+            skip_prob = torch.sigmoid(logit)
+            kl = torch.sum(skip_prob * torch.log(skip_prob / self.skip_targets))
+            self.sample_skip_penalty += kl
+            log_prob = self.cross_entropy_loss(logit, sampled)
+            sampled = sampled.nonzero().view(-1)
+            if sampled.sum().item():
+                self._inputs = (torch.sum(self.embedding[field.name](sampled.view(-1)), 0) / (1. + torch.sum(sampled))).unsqueeze(0)
+            else:
+                self._inputs = torch.zeros(1, self.lstm_size, device=self.embedding[field.name].weight.device)
+
+        sampled = sampled.detach().numpy().tolist()
+        self.sample_log_prob += self.entropy_reduction(log_prob)
+        entropy = (log_prob * torch.exp(-log_prob)).detach()  # pylint: disable=invalid-unary-operand-type
+        self.sample_entropy += self.entropy_reduction(entropy)
+        if len(sampled) == 1:
+            sampled = sampled[0]
+        return sampled
+
+
+class RL(BaseNAS):
+    """
+    ENAS trainer.
+
+    Parameters
+    ----------
+    model : nn.Module
+        PyTorch model to be trained.
+    loss : callable
+        Receives logits and ground truth label, return a loss tensor.
+    metrics : callable
+        Receives logits and ground truth label, return a dict of metrics.
+    reward_function : callable
+        Receives logits and ground truth label, return a tensor, which will be feeded to RL controller as reward.
+    optimizer : Optimizer
+        The optimizer used for optimizing the model.
+    num_epochs : int
+        Number of epochs planned for training.
+    dataset : Dataset
+        Dataset for training. Will be split for training weights and architecture weights.
+    batch_size : int
+        Batch size.
+    workers : int
+        Workers for data loading.
+    device : torch.device
+        ``torch.device("cpu")`` or ``torch.device("cuda")``.
+    log_frequency : int
+        Step count per logging.
+    grad_clip : float
+        Gradient clipping. Set to 0 to disable. Default: 5.
+    entropy_weight : float
+        Weight of sample entropy loss.
+    skip_weight : float
+        Weight of skip penalty loss.
+    baseline_decay : float
+        Decay factor of baseline. New baseline will be equal to ``baseline_decay * baseline_old + reward * (1 - baseline_decay)``.
+    ctrl_lr : float
+        Learning rate for RL controller.
+    ctrl_steps_aggregate : int
+        Number of steps that will be aggregated into one mini-batch for RL controller.
+    ctrl_steps : int
+        Number of mini-batches for each epoch of RL controller learning.
+    ctrl_kwargs : dict
+        Optional kwargs that will be passed to :class:`ReinforceController`.
+    """
+
+    def __init__(self, device='cuda', workers=4,log_frequency=None,
+                 grad_clip=5., entropy_weight=0.0001, skip_weight=0.8, baseline_decay=0.999,
+                 ctrl_lr=0.00035, ctrl_steps_aggregate=20, ctrl_kwargs=None,n_warmup=100,model_lr=5e-3,model_wd=5e-4,*args,**kwargs):
+        super().__init__(device)
+        self.device=device
+        self.num_epochs = kwargs.get("num_epochs", 5)
+        self.workers = workers
+        self.log_frequency = log_frequency
+        self.entropy_weight = entropy_weight
+        self.skip_weight = skip_weight
+        self.baseline_decay = baseline_decay
+        self.baseline = 0.
+        self.ctrl_steps_aggregate = ctrl_steps_aggregate
+        self.grad_clip = grad_clip
+        self.workers = workers
+        self.ctrl_kwargs=ctrl_kwargs
+        self.ctrl_lr=ctrl_lr
+        self.n_warmup=n_warmup
+        self.model_lr = model_lr
+        self.model_wd = model_wd
+        self.log=open('log.txt','w')
+    def search(self, space: BaseSpace, dset, estimator):
+        self.model = space
+        self.dataset = dset#.to(self.device)
+        self.estimator = estimator    
+        # replace choice
+        self.nas_modules = []
+
+        k2o = get_module_order(self.model)
+        replace_layer_choice(self.model, PathSamplingLayerChoice, self.nas_modules)
+        replace_input_choice(self.model, PathSamplingInputChoice, self.nas_modules)
+        self.nas_modules = sort_replaced_module(k2o, self.nas_modules)
+
+        # to device
+        self.model = self.model.to(self.device)
+        # fields
+        self.nas_fields = [ReinforceField(name, len(module),
+                                          isinstance(module, PathSamplingLayerChoice) or module.n_chosen == 1)
+                           for name, module in self.nas_modules]
+        self.controller = ReinforceController(self.nas_fields, **(self.ctrl_kwargs or {}))
+        self.ctrl_optim = torch.optim.Adam(self.controller.parameters(), lr=self.ctrl_lr)
+        # train
+        with tqdm(range(self.num_epochs)) as bar:
+            for i in bar:
+                l2=self._train_controller(i)
+
+                # try:
+                #     l2=self._train_controller(i)
+                # except Exception as e:
+                #     print(e)
+                #     nm=self.nas_modules
+                #     for i in range(len(nm)):
+                #         print(nm[i][1].sampled)
+                #     # import pdb
+                #     # pdb.set_trace()
+                bar.set_postfix(reward_controller=l2)
+        
+        selection=self.export()
+        arch=space.export(selection,self.device)
+        print(selection,arch)
+        return arch
+    
+    def _train_controller(self, epoch):
+        self.model.eval()
+        self.controller.train()
+        self.ctrl_optim.zero_grad()
+        rewards=[]
+        with tqdm(range(self.ctrl_steps_aggregate)) as bar:
+            for ctrl_step in bar:
+                self._resample()
+                metric,loss=self._infer(mask='val')
+                bar.set_postfix(acc=metric,loss=loss.item())
+                self.log.write(f'{self.arch}\n{self.selection}\n{metric},{loss}\n')
+                self.log.flush()
+                reward =metric 
+                rewards.append(reward)
+                if self.entropy_weight:
+                    reward += self.entropy_weight * self.controller.sample_entropy.item()
+                self.baseline = self.baseline * self.baseline_decay + reward * (1 - self.baseline_decay)
+                loss = self.controller.sample_log_prob * (reward - self.baseline)
+                if self.skip_weight:
+                    loss += self.skip_weight * self.controller.sample_skip_penalty
+                loss /= self.ctrl_steps_aggregate
+                loss.backward()
+            
+                if (ctrl_step + 1) % self.ctrl_steps_aggregate == 0:
+                    if self.grad_clip > 0:
+                        nn.utils.clip_grad_norm_(self.controller.parameters(), self.grad_clip)
+                    self.ctrl_optim.step()
+                    self.ctrl_optim.zero_grad()
+
+                if self.log_frequency is not None and ctrl_step % self.log_frequency == 0:
+                    _logger.info('RL Epoch [%d/%d] Step [%d/%d]  %s', epoch + 1, self.num_epochs,
+                                    ctrl_step + 1, self.ctrl_steps_aggregate)
+        return sum(rewards)/len(rewards)
+
+    def _resample(self):
+        result = self.controller.resample()
+        self.arch=self.model.export(result,device=self.device)
+        self.selection=result
+    def export(self):
+        self.controller.eval()
+        with torch.no_grad():
+            return self.controller.resample()
+
+    def _infer(self,mask='train'):
+        metric, loss = self.estimator.infer(self.arch, self.dataset,mask=mask)
+        return metric, loss
diff --git a/autogl/module/nas/estimator/one_shot.py b/autogl/module/nas/estimator/one_shot.py
index e43695c..4964947 100644
--- a/autogl/module/nas/estimator/one_shot.py
+++ b/autogl/module/nas/estimator/one_shot.py
@@ -20,3 +20,26 @@ class OneShotEstimator(BaseEstimator):
         loss = F.nll_loss(pred, y)
         acc=sum(pred.max(1)[1]==y).item()/y.size(0)
         return acc, loss
+
+from autogl.module.train import NodeClassificationFullTrainer
+class TrainEstimator(BaseEstimator):
+    def __init__(self):
+        self.estimator=OneShotEstimator()
+    def infer(self,model: BaseSpace, dataset, mask="train"):
+        # self.trainer.model=model
+        # self.trainer.device=model.device
+        self.trainer=NodeClassificationFullTrainer(
+                    model=model,
+                    optimizer=torch.optim.Adam,
+                    lr=0.01,
+                    max_epoch=200,
+                    early_stopping_round=200,
+                    weight_decay=5e-4,
+                    device="auto",
+                    init=False,
+                    feval=['acc'],
+                    loss="nll_loss",
+                    lr_scheduler_type=None)
+        self.trainer.train(dataset)
+        with torch.no_grad():
+            return self.estimator.infer(model,dataset,mask)
diff --git a/autogl/module/nas/space/single_path.py b/autogl/module/nas/space/single_path.py
index 75c9746..fcbffa2 100644
--- a/autogl/module/nas/space/single_path.py
+++ b/autogl/module/nas/space/single_path.py
@@ -27,8 +27,6 @@ class FixedNodeClassificationModel(BaseModel):
         apply_fixed_architecture(self._model, selection, verbose=False)
         self.params = {"num_class": self.num_classes, "features_num": self.num_features}
         self.device = device
-        print(self._model)
-        print(selection)
 
     def to(self, device):
         if isinstance(device, (str, torch.device)):
diff --git a/examples/test_graph_nas_rl.py b/examples/test_graph_nas_rl.py
new file mode 100644
index 0000000..5ec5480
--- /dev/null
+++ b/examples/test_graph_nas_rl.py
@@ -0,0 +1,42 @@
+import sys
+sys.path.append('../')
+from torch_geometric.nn import GCNConv
+import torch
+from autogl.datasets import build_dataset_from_name
+from autogl.solver import AutoNodeClassifier
+from autogl.module.train import NodeClassificationFullTrainer
+from autogl.module.nas import Darts, OneShotEstimator
+from autogl.module.nas.space.graph_nas import GraphNasNodeClassificationSpace
+from autogl.module.train import Acc
+from autogl.module.nas.algorithm.enas import Enas
+from autogl.module.nas.algorithm.rl import RL
+from autogl.module.nas.estimator.one_shot import TrainEstimator
+import logging
+if __name__ == '__main__':
+    logging.getLogger().setLevel(logging.WARNING)
+    dataset = build_dataset_from_name('cora')
+    solver = AutoNodeClassifier(
+        feature_module='PYGNormalizeFeatures',
+        graph_models=[],
+        hpo_module=None,
+        ensemble_module=None,
+        default_trainer=NodeClassificationFullTrainer(
+            optimizer=torch.optim.Adam,
+            lr=0.01,
+            max_epoch=200,
+            early_stopping_round=200,
+            weight_decay=5e-4,
+            device="auto",
+            init=False,
+            feval=['acc'],
+            loss="nll_loss",
+            lr_scheduler_type=None,),
+        nas_algorithms=[RL(num_epochs=400)],
+        #nas_algorithms=[Darts(num_epochs=200)],
+        nas_spaces=[GraphNasNodeClassificationSpace(hidden_dim=16,search_act_con=True,layer_number=2)],
+        nas_estimators=[TrainEstimator()]
+    )
+    solver.fit(dataset)
+    solver.get_leaderboard().show()
+    out = solver.predict_proba()
+    print('acc on cora', Acc.evaluate(out, dataset[0].y[dataset[0].test_mask].detach().numpy()))

From edecb3149cc67da31fbc6cc659b778ac794d308f Mon Sep 17 00:00:00 2001
From: wondergo2017 <wondergo2017@gmail.com>
Date: Fri, 7 May 2021 09:17:36 +0000
Subject: [PATCH 066/144] add graphnas space test

---
 examples/test_graph_nas_space.py | 73 ++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100644 examples/test_graph_nas_space.py

diff --git a/examples/test_graph_nas_space.py b/examples/test_graph_nas_space.py
new file mode 100644
index 0000000..0d60f3c
--- /dev/null
+++ b/examples/test_graph_nas_space.py
@@ -0,0 +1,73 @@
+import sys
+from nni.nas.pytorch.mutables import Mutable
+sys.path.append('../')
+from torch_geometric.nn import GCNConv
+import torch
+from autogl.datasets import build_dataset_from_name
+from autogl.solver import AutoNodeClassifier
+from autogl.module.train import NodeClassificationFullTrainer
+from autogl.module.nas import Darts, OneShotEstimator
+from autogl.module.nas.space.graph_nas import *
+from autogl.module.train import Acc
+from autogl.module.nas.algorithm.enas import Enas
+from autogl.module.nas.algorithm.rl import *
+from autogl.module.nas.estimator.one_shot import TrainEstimator
+import logging
+import numpy as np
+from tqdm import  tqdm
+if __name__ == '__main__':
+    logging.getLogger().setLevel(logging.WARNING)
+    dataset = build_dataset_from_name('cora')
+    space=GraphNasNodeClassificationSpace(hidden_dim=16,search_act_con=True,layer_number=2)
+    space.instantiate(input_dim=dataset[0].x.shape[1],
+                output_dim=dataset.num_classes,)
+    estim=TrainEstimator()
+    # solver.fit(dataset)
+    # solver.get_leaderboard().show()
+    # out = solver.predict_proba()
+    
+    # print('acc on cora', Acc.evaluate(out, dataset[0].y[dataset[0].test_mask].detach().numpy()))
+    class Tmp:
+        def __init__(self,space):
+            self.model = space
+            self.nas_modules = []
+            k2o = get_module_order(self.model)
+            replace_layer_choice(self.model, PathSamplingLayerChoice, self.nas_modules)
+            replace_input_choice(self.model, PathSamplingInputChoice, self.nas_modules)
+            self.nas_modules = sort_replaced_module(k2o, self.nas_modules)
+    
+    t=Tmp(space)
+    print(t.nas_modules)
+    nm=t.nas_modules
+    selection_range={}
+    for k,v in nm:
+        selection_range[k]=len(v)
+    ks=list(selection_range.keys())
+    selections=[]
+    def dfs(selection,d):
+        if d>=len(ks):
+            selections.append(selection.copy())
+            return 
+        k=ks[d]
+        r=selection_range[k]
+        for i in range(r):
+            selection[k]=i
+            dfs(selection,d+1)
+    dfs({},0)
+    print(f'#selections {len(selections)}')
+    device=torch.device('cuda:0')
+    accs=[]
+    from datetime import datetime
+    timestamp=datetime.now().strftime('%m%d-%H-%M-%S')
+    log=open(f'acclog{timestamp}.txt','w')
+    with tqdm(selections) as bar:
+        for selection in bar:
+            arch=space.export(selection,device)
+            m,l=estim.infer(arch,dataset,'test')
+            bar.set_postfix(m=m,l=l.item())
+            log.write(f'{arch}\n{selection}\n{m},{l}\n')
+            log.flush()
+            accs.append(m)
+
+    np.save(f'space_acc{timestamp}',np.array(accs))
+    print(f'max acc {np.max(accs)}')
\ No newline at end of file

From 022bc607ce1604750793856ffe39afc8d40b41a0 Mon Sep 17 00:00:00 2001
From: null <null>
Date: Thu, 13 May 2021 01:00:00 +0800
Subject: [PATCH 067/144] Attempt to design an experimental abstract
 TargetDependantSampler

Attempt to design an experimental abstract TargetDependantSampler interface for Node-wise Sampling and Layer-wise Sampling.
Refactor the Neighbor Sampler and Layer Dependant Importance Sampling (LADIES) to be the implementations of the TargetDependantSampler interface, meanwhile fix a sampling issue, i.e. Samplers and the corresponding Trainers only sample edge_index without sampling the involved sub-graph and remapping the node indexes for the sampled sub-graph previously. Now Both the Neibor sampler and the LADIES Sampler will sample a sub-graph composed of all the nodes involved in the sampled edges for all layers, according to the sampled set of edge_index.
TODO: The LADIES Sampler MUST be fixed recently, Zixin plans to do it.
TODO: The unified abstraction of autogl.data.Data and autogl.data.Dataset is urgently necessary for the long-term development of the AutoGL library. Schedule to do the abstraction for next upcoming major version.
---
 autogl/module/train/evaluation.py             |   61 +-
 .../node_classification_sampled_trainer.py    | 1027 ++++++++++++-----
 .../layer_dependent_importance_sampler.py     |  184 ++-
 .../sampling/sampler/neighbor_sampler.py      |  160 +--
 .../sampler/target_dependant_sampler.py       |  262 +++++
 5 files changed, 1202 insertions(+), 492 deletions(-)
 create mode 100644 autogl/module/train/sampling/sampler/target_dependant_sampler.py

diff --git a/autogl/module/train/evaluation.py b/autogl/module/train/evaluation.py
index c3ed320..b0d25cb 100644
--- a/autogl/module/train/evaluation.py
+++ b/autogl/module/train/evaluation.py
@@ -17,7 +17,7 @@ class Evaluation:
     @staticmethod
     def is_higher_better() -> bool:
         """ Expected to return whether this evaluation method is higher better (bool) """
-        return True
+        raise NotImplementedError
 
     @staticmethod
     def evaluate(predict, label) -> float:
@@ -25,6 +25,65 @@ class Evaluation:
         raise NotImplementedError
 
 
+class EvaluatorUtility:
+    class PredictionBatchCumulativeBuilder:
+        """ Batch-cumulative builder for prediction """
+        def __init__(self):
+            self.__indexes_in_integral_data: _typing.Optional[np.ndarray] = None
+            self.__prediction: _typing.Optional[np.ndarray] = None
+
+        def clear_batches(
+                self, *__args, **__kwargs
+        ) -> 'EvaluatorUtility.PredictionBatchCumulativeBuilder':
+            self.__indexes_in_integral_data = None
+            self.__prediction = None
+            return self
+
+        def add_batch(
+                self, indexes_in_integral_data: np.ndarray,
+                batch_prediction: np.ndarray
+        ) -> 'EvaluatorUtility.PredictionBatchCumulativeBuilder':
+            if not(
+                isinstance(indexes_in_integral_data, np.ndarray)
+                and isinstance(batch_prediction, np.ndarray)
+                and len(indexes_in_integral_data.shape) == 1
+            ):
+                raise TypeError
+            elif indexes_in_integral_data.shape[0] != batch_prediction.shape[0]:
+                raise ValueError
+
+            if self.__indexes_in_integral_data is None:
+                if indexes_in_integral_data.shape != np.unique(indexes_in_integral_data).shape:
+                    raise ValueError(
+                        f"There exists duplicate index "
+                        f"in the argument indexes_in_integral_data {indexes_in_integral_data}"
+                    )
+                else:
+                    self.__indexes_in_integral_data: np.ndarray = np.unique(indexes_in_integral_data)
+            else:
+                __indexes_in_integral_data = np.concatenate(
+                    (self.__indexes_in_integral_data, indexes_in_integral_data)
+                )
+                if __indexes_in_integral_data.shape != np.unique(__indexes_in_integral_data).shape:
+                    raise ValueError
+                else:
+                    self.__indexes_in_integral_data: np.ndarray = __indexes_in_integral_data
+
+            if self.__prediction is None:
+                self.__prediction: np.ndarray = batch_prediction
+            else:
+                self.__prediction: np.ndarray = np.concatenate((self.__prediction, batch_prediction))
+
+            return self
+
+        def compose(self, __sorted: bool = True, **__kwargs) -> _typing.Tuple[np.ndarray, np.ndarray]:
+            if __sorted:
+                sorted_index = np.argsort(self.__indexes_in_integral_data)
+                return self.__indexes_in_integral_data[sorted_index], self.__prediction[sorted_index]
+            else:
+                return self.__indexes_in_integral_data, self.__prediction
+
+
 EVALUATE_DICT: _typing.Dict[str, _typing.Type[Evaluation]] = {}
 
 
diff --git a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
index 8f727f9..6894d42 100644
--- a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
+++ b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
@@ -1,12 +1,16 @@
+import os
 import torch
 import logging
 import typing as _typing
 import torch.nn.functional
 import torch.utils.data
+import tqdm
 
+import autogl.data
 from .. import register_trainer
 from ..base import BaseNodeClassificationTrainer, EarlyStopping, Evaluation
-from ..evaluation import get_feval, Logloss
+from ..evaluation import get_feval, Logloss, EvaluatorUtility
+from ..sampling.sampler.target_dependant_sampler import TargetDependantSampledData
 from ..sampling.sampler.neighbor_sampler import NeighborSampler
 from ..sampling.sampler.graphsaint_sampler import *
 from ..sampling.sampler.layer_dependent_importance_sampler import (
@@ -17,20 +21,359 @@ from ...model import BaseModel
 LOGGER: logging.Logger = logging.getLogger("Node classification sampling trainer")
 
 
-@register_trainer("NodeClassificationNeighborSampling")
-class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
-    """
-    The node classification trainer
-    for automatically training the node classification tasks
-    with neighbour sampling
-    """
+# @register_trainer("NodeClassificationNeighborSampling")
+# class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
+#     """
+#     The node classification trainer
+#     for automatically training the node classification tasks
+#     with neighbour sampling
+#     """
+#
+#     def __init__(
+#         self,
+#         model: _typing.Union[BaseModel, str],
+#         num_features: int,
+#         num_classes: int,
+#         optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None] = None,
+#         lr: float = 1e-4,
+#         max_epoch: int = 100,
+#         early_stopping_round: int = 100,
+#         weight_decay: float = 1e-4,
+#         device: _typing.Optional[torch.device] = None,
+#         init: bool = True,
+#         feval: _typing.Union[
+#             _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+#         ] = (Logloss,),
+#         loss: str = "nll_loss",
+#         lr_scheduler_type: _typing.Optional[str] = None,
+#         **kwargs,
+#     ) -> None:
+#         if isinstance(optimizer, type) and issubclass(optimizer, torch.optim.Optimizer):
+#             self._optimizer_class: _typing.Type[torch.optim.Optimizer] = optimizer
+#         elif type(optimizer) == str:
+#             if optimizer.lower() == "adam":
+#                 self._optimizer_class: _typing.Type[
+#                     torch.optim.Optimizer
+#                 ] = torch.optim.Adam
+#             elif optimizer.lower() == "adam" + "w":
+#                 self._optimizer_class: _typing.Type[
+#                     torch.optim.Optimizer
+#                 ] = torch.optim.AdamW
+#             elif optimizer.lower() == "sgd":
+#                 self._optimizer_class: _typing.Type[
+#                     torch.optim.Optimizer
+#                 ] = torch.optim.SGD
+#             else:
+#                 self._optimizer_class: _typing.Type[
+#                     torch.optim.Optimizer
+#                 ] = torch.optim.Adam
+#         else:
+#             self._optimizer_class: _typing.Type[
+#                 torch.optim.Optimizer
+#             ] = torch.optim.Adam
+#
+#         self._learning_rate: float = lr if lr > 0 else 1e-4
+#         self._lr_scheduler_type: _typing.Optional[str] = lr_scheduler_type
+#         self._max_epoch: int = max_epoch if max_epoch > 0 else 1e2
+#
+#         self.__sampling_sizes: _typing.Sequence[int] = kwargs.get("sampling_sizes")
+#
+#         self._weight_decay: float = weight_decay if weight_decay > 0 else 1e-4
+#         early_stopping_round: int = (
+#             early_stopping_round if early_stopping_round > 0 else 1e2
+#         )
+#         self._early_stopping = EarlyStopping(
+#             patience=early_stopping_round, verbose=False
+#         )
+#         super(NodeClassificationNeighborSamplingTrainer, self).__init__(
+#             model, num_features, num_classes, device, init, feval, loss
+#         )
+#
+#         self._valid_result: torch.Tensor = torch.zeros(0)
+#         self._valid_result_prob: torch.Tensor = torch.zeros(0)
+#         self._valid_score: _typing.Sequence[float] = []
+#
+#         self._hyper_parameter_space: _typing.Sequence[
+#             _typing.Dict[str, _typing.Any]
+#         ] = []
+#
+#         self.__initialized: bool = False
+#         if init:
+#             self.initialize()
+#
+#     def initialize(self) -> "NodeClassificationNeighborSamplingTrainer":
+#         if self.__initialized:
+#             return self
+#         self.model.initialize()
+#         self.__initialized = True
+#         return self
+#
+#     def get_model(self) -> BaseModel:
+#         return self.model
+#
+#     def __train_only(self, data) -> "NodeClassificationNeighborSamplingTrainer":
+#         """
+#         The function of training on the given dataset and mask.
+#         :param data: data of a specific graph
+#         :return: self
+#         """
+#         data = data.to(self.device)
+#         optimizer: torch.optim.Optimizer = self._optimizer_class(
+#             self.model.model.parameters(),
+#             lr=self._learning_rate,
+#             weight_decay=self._weight_decay,
+#         )
+#         if type(self._lr_scheduler_type) == str:
+#             if self._lr_scheduler_type.lower() == "step" + "lr":
+#                 lr_scheduler: torch.optim.lr_scheduler.StepLR = (
+#                     torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1)
+#                 )
+#             elif self._lr_scheduler_type.lower() == "multi" + "step" + "lr":
+#                 lr_scheduler: torch.optim.lr_scheduler.MultiStepLR = (
+#                     torch.optim.lr_scheduler.MultiStepLR(
+#                         optimizer, milestones=[30, 80], gamma=0.1
+#                     )
+#                 )
+#             elif self._lr_scheduler_type.lower() == "exponential" + "lr":
+#                 lr_scheduler: torch.optim.lr_scheduler.ExponentialLR = (
+#                     torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1)
+#                 )
+#             elif self._lr_scheduler_type.lower() == "ReduceLROnPlateau".lower():
+#                 lr_scheduler: torch.optim.lr_scheduler.ReduceLROnPlateau = (
+#                     torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min")
+#                 )
+#             else:
+#                 lr_scheduler: torch.optim.lr_scheduler.LambdaLR = (
+#                     torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
+#                 )
+#         else:
+#             lr_scheduler: torch.optim.lr_scheduler.LambdaLR = (
+#                 torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
+#             )
+#
+#         train_sampler: NeighborSampler = NeighborSampler(
+#             data, self.__sampling_sizes, batch_size=20
+#         )
+#
+#         for current_epoch in range(self._max_epoch):
+#             self.model.model.train()
+#             """ epoch start """
+#             for target_node_indexes, edge_indexes in train_sampler:
+#                 optimizer.zero_grad()
+#                 data.edge_indexes = edge_indexes
+#                 prediction = self.model.model(data)
+#                 if not hasattr(torch.nn.functional, self.loss):
+#                     raise TypeError(
+#                         "PyTorch does not support loss type {}".format(self.loss)
+#                     )
+#                 loss_function = getattr(torch.nn.functional, self.loss)
+#                 loss: torch.Tensor = loss_function(
+#                     prediction[target_node_indexes], data.y[target_node_indexes]
+#                 )
+#                 loss.backward()
+#                 optimizer.step()
+#
+#             if lr_scheduler is not None:
+#                 lr_scheduler.step()
+#
+#             """ Validate performance """
+#             if hasattr(data, "val_mask") and getattr(data, "val_mask") is not None:
+#                 validation_results: _typing.Sequence[float] = self.evaluate(
+#                     (data,), "val", [self.feval[0]]
+#                 )
+#
+#                 if self.feval[0].is_higher_better():
+#                     validation_loss: float = -validation_results[0]
+#                 else:
+#                     validation_loss: float = validation_results[0]
+#                 self._early_stopping(validation_loss, self.model.model)
+#                 if self._early_stopping.early_stop:
+#                     LOGGER.debug("Early stopping at %d", current_epoch)
+#                     break
+#         if hasattr(data, "val_mask") and data.val_mask is not None:
+#             self._early_stopping.load_checkpoint(self.model.model)
+#         return self
+#
+#     def __predict_only(self, data):
+#         """
+#         The function of predicting on the given data.
+#         :param data: data of a specific graph
+#         :return: the result of prediction on the given dataset
+#         """
+#         data = data.to(self.device)
+#         self.model.model.eval()
+#         with torch.no_grad():
+#             prediction = self.model.model(data)
+#         return prediction
+#
+#     def train(self, dataset, keep_valid_result: bool = True):
+#         """
+#         The function of training on the given dataset and keeping valid result.
+#         :param dataset:
+#         :param keep_valid_result: Whether to save the validation result after training
+#         """
+#         data = dataset[0]
+#         self.__train_only(data)
+#         if keep_valid_result:
+#             prediction: torch.Tensor = self.__predict_only(data)
+#             self._valid_result: torch.Tensor = prediction[data.val_mask].max(1)[1]
+#             self._valid_result_prob: torch.Tensor = prediction[data.val_mask]
+#             self._valid_score = self.evaluate(dataset, "val")
+#
+#     def predict_proba(
+#         self, dataset, mask: _typing.Optional[str] = None, in_log_format: bool = False
+#     ) -> torch.Tensor:
+#         """
+#         The function of predicting the probability on the given dataset.
+#         :param dataset: The node classification dataset used to be predicted.
+#         :param mask:
+#         :param in_log_format:
+#         :return:
+#         """
+#         data = dataset[0].to(self.device)
+#         if mask is not None and type(mask) == str:
+#             if mask.lower() == "train":
+#                 _mask = data.train_mask
+#             elif mask.lower() == "test":
+#                 _mask = data.test_mask
+#             elif mask.lower() == "val":
+#                 _mask = data.val_mask
+#             else:
+#                 _mask = data.test_mask
+#         else:
+#             _mask = data.test_mask
+#         result = self.__predict_only(data)[_mask]
+#         return result if in_log_format else torch.exp(result)
+#
+#     def predict(self, dataset, mask: _typing.Optional[str] = None) -> torch.Tensor:
+#         return self.predict_proba(dataset, mask, in_log_format=True).max(1)[1]
+#
+#     def get_valid_predict(self) -> torch.Tensor:
+#         return self._valid_result
+#
+#     def get_valid_predict_proba(self) -> torch.Tensor:
+#         return self._valid_result_prob
+#
+#     def get_valid_score(self, return_major: bool = True):
+#         if return_major:
+#             return (self._valid_score[0], self.feval[0].is_higher_better())
+#         else:
+#             return (self._valid_score, [f.is_higher_better() for f in self.feval])
+#
+#     def get_name_with_hp(self) -> str:
+#         name = "-".join(
+#             [
+#                 str(self._optimizer_class),
+#                 str(self._learning_rate),
+#                 str(self._max_epoch),
+#                 str(self._early_stopping.patience),
+#                 str(self.model),
+#                 str(self.device),
+#             ]
+#         )
+#         name = (
+#             name
+#             + "|"
+#             + "-".join(
+#                 [
+#                     str(x[0]) + "-" + str(x[1])
+#                     for x in self.model.get_hyper_parameter().items()
+#                 ]
+#             )
+#         )
+#         return name
+#
+#     def evaluate(
+#         self,
+#         dataset,
+#         mask: _typing.Optional[str] = None,
+#         feval: _typing.Union[
+#             None, _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+#         ] = None,
+#     ) -> _typing.Sequence[float]:
+#         data = dataset[0]
+#         data = data.to(self.device)
+#         if feval is None:
+#             _feval: _typing.Sequence[_typing.Type[Evaluation]] = self.feval
+#         else:
+#             _feval: _typing.Sequence[_typing.Type[Evaluation]] = get_feval(list(feval))
+#         if mask.lower() == "train":
+#             _mask = data.train_mask
+#         elif mask.lower() == "test":
+#             _mask = data.test_mask
+#         elif mask.lower() == "val":
+#             _mask = data.val_mask
+#         else:
+#             _mask = data.test_mask
+#         prediction_probability: torch.Tensor = self.predict_proba(dataset, mask)
+#         y_ground_truth = data.y[_mask]
+#
+#         results = []
+#         for f in _feval:
+#             try:
+#                 results.append(f.evaluate(prediction_probability, y_ground_truth))
+#             except:
+#                 results.append(
+#                     f.evaluate(
+#                         prediction_probability.cpu().numpy(),
+#                         y_ground_truth.cpu().numpy(),
+#                     )
+#                 )
+#         return results
+#
+#     def to(self, device: torch.device):
+#         self.device = device
+#         if self.model is not None:
+#             self.model.to(self.device)
+#
+#     def duplicate_from_hyper_parameter(
+#         self,
+#         hp: _typing.Dict[str, _typing.Any],
+#         model: _typing.Union[BaseModel, str, None] = None,
+#     ) -> "NodeClassificationNeighborSamplingTrainer":
+#
+#         if model is None or not isinstance(model, BaseModel):
+#             model = self.model
+#         model = model.from_hyper_parameter(
+#             dict(
+#                 [
+#                     x
+#                     for x in hp.items()
+#                     if x[0] in [y["parameterName"] for y in model.hyper_parameter_space]
+#                 ]
+#             )
+#         )
+#
+#         return NodeClassificationNeighborSamplingTrainer(
+#             model,
+#             self.num_features,
+#             self.num_classes,
+#             self._optimizer_class,
+#             device=self.device,
+#             init=True,
+#             feval=self.feval,
+#             loss=self.loss,
+#             lr_scheduler_type=self._lr_scheduler_type,
+#             **hp,
+#         )
+#
+#     @property
+#     def hyper_parameter_space(self):
+#         return self._hyper_parameter_space
+#
+#     @hyper_parameter_space.setter
+#     def hyper_parameter_space(self, hp_space):
+#         self._hyper_parameter_space = hp_space
+
 
+@register_trainer("NodeClassificationGraphSAINTTrainer")
+class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
     def __init__(
         self,
-        model: _typing.Union[BaseModel, str],
+        model: _typing.Union[BaseModel],
         num_features: int,
         num_classes: int,
-        optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None] = None,
+        optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None] = ...,
         lr: float = 1e-4,
         max_epoch: int = 100,
         early_stopping_round: int = 100,
@@ -67,13 +410,9 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             self._optimizer_class: _typing.Type[
                 torch.optim.Optimizer
             ] = torch.optim.Adam
-
         self._learning_rate: float = lr if lr > 0 else 1e-4
         self._lr_scheduler_type: _typing.Optional[str] = lr_scheduler_type
         self._max_epoch: int = max_epoch if max_epoch > 0 else 1e2
-
-        self.__sampling_sizes: _typing.Sequence[int] = kwargs.get("sampling_sizes")
-
         self._weight_decay: float = weight_decay if weight_decay > 0 else 1e-4
         early_stopping_round: int = (
             early_stopping_round if early_stopping_round > 0 else 1e2
@@ -81,33 +420,52 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         self._early_stopping = EarlyStopping(
             patience=early_stopping_round, verbose=False
         )
-        super(NodeClassificationNeighborSamplingTrainer, self).__init__(
-            model, num_features, num_classes, device, init, feval, loss
-        )
-
-        self._valid_result: torch.Tensor = torch.zeros(0)
-        self._valid_result_prob: torch.Tensor = torch.zeros(0)
-        self._valid_score: _typing.Sequence[float] = []
 
+        # Assign an empty initial hyper parameter space
         self._hyper_parameter_space: _typing.Sequence[
             _typing.Dict[str, _typing.Any]
         ] = []
 
-        self.__initialized: bool = False
+        self._valid_result: torch.Tensor = torch.zeros(0)
+        self._valid_result_prob: torch.Tensor = torch.zeros(0)
+        self._valid_score: _typing.Sequence[float] = ()
+
+        super(NodeClassificationGraphSAINTTrainer, self).__init__(
+            model, num_features, num_classes, device, init, feval, loss
+        )
+
+        """ Set hyper parameters """
+        self.__num_subgraphs: int = kwargs.get("num_subgraphs")
+        self.__sampling_budget: int = kwargs.get("sampling_budget")
+        if (
+                kwargs.get("sampling_method") is not None
+                and type(kwargs.get("sampling_method")) == str
+                and kwargs.get("sampling_method") in ("node", "edge")
+        ):
+            self.__sampling_method_identifier: str = kwargs.get("sampling_method")
+        else:
+            self.__sampling_method_identifier: str = "node"
+
+        self.__is_initialized: bool = False
         if init:
             self.initialize()
 
-    def initialize(self) -> "NodeClassificationNeighborSamplingTrainer":
-        if self.__initialized:
+    def initialize(self):
+        if self.__is_initialized:
             return self
         self.model.initialize()
-        self.__initialized = True
+        self.__is_initialized = True
         return self
 
-    def get_model(self) -> BaseModel:
+    def to(self, device: torch.device):
+        self.device = device
+        if self.model is not None:
+            self.model.to(self.device)
+
+    def get_model(self):
         return self.model
 
-    def __train_only(self, data) -> "NodeClassificationNeighborSamplingTrainer":
+    def __train_only(self, data):
         """
         The function of training on the given dataset and mask.
         :param data: data of a specific graph
@@ -147,37 +505,66 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
                 torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
             )
 
-        train_sampler: NeighborSampler = NeighborSampler(
-            data, self.__sampling_sizes, batch_size=20
-        )
+        if self.__sampling_method_identifier.lower() == "edge":
+            sub_graph_sampler = GraphSAINTRandomEdgeSampler(
+                self.__sampling_budget, self.__num_subgraphs
+            )
+        else:
+            sub_graph_sampler = GraphSAINTRandomNodeSampler(
+                self.__sampling_budget, self.__num_subgraphs
+            )
 
         for current_epoch in range(self._max_epoch):
             self.model.model.train()
             """ epoch start """
-            for target_node_indexes, edge_indexes in train_sampler:
+            """ Sample sub-graphs """
+            sub_graph_set = sub_graph_sampler.sample(data)
+            sub_graphs_loader: torch.utils.data.DataLoader = (
+                torch.utils.data.DataLoader(sub_graph_set)
+            )
+            integral_alpha: torch.Tensor = getattr(sub_graph_set, "alpha")
+            integral_lambda: torch.Tensor = getattr(sub_graph_set, "lambda")
+            """ iterate sub-graphs """
+            for sub_graph_data in sub_graphs_loader:
                 optimizer.zero_grad()
-                data.edge_indexes = edge_indexes
-                prediction = self.model.model(data)
+                sampled_edge_indexes: torch.Tensor = sub_graph_data.sampled_edge_indexes
+                sampled_node_indexes: torch.Tensor = sub_graph_data.sampled_node_indexes
+                sampled_train_mask: torch.Tensor = sub_graph_data.train_mask
+
+                sampled_alpha = integral_alpha[sampled_edge_indexes]
+                sub_graph_data.edge_weight = 1 / sampled_alpha
+
+                prediction: torch.Tensor = self.model.model(sub_graph_data)
+
                 if not hasattr(torch.nn.functional, self.loss):
-                    raise TypeError(
-                        "PyTorch does not support loss type {}".format(self.loss)
-                    )
-                loss_function = getattr(torch.nn.functional, self.loss)
-                loss: torch.Tensor = loss_function(
-                    prediction[target_node_indexes], data.y[target_node_indexes]
+                    raise TypeError(f"PyTorch does not support loss type {self.loss}")
+                loss_func = getattr(torch.nn.functional, self.loss)
+                unreduced_loss: torch.Tensor = loss_func(
+                    prediction[sampled_train_mask],
+                    data.y[sampled_train_mask],
+                    reduction="none",
                 )
-                loss.backward()
+
+                sampled_lambda: torch.Tensor = integral_lambda[sampled_node_indexes]
+                sampled_train_lambda: torch.Tensor = sampled_lambda[sampled_train_mask]
+                assert unreduced_loss.size() == sampled_train_lambda.size()
+                loss_weighted_sum: torch.Tensor = torch.sum(
+                    unreduced_loss / sampled_train_lambda
+                )
+                loss_weighted_sum.backward()
                 optimizer.step()
 
             if lr_scheduler is not None:
                 lr_scheduler.step()
 
             """ Validate performance """
-            if hasattr(data, "val_mask") and getattr(data, "val_mask") is not None:
+            if (
+                hasattr(data, "val_mask")
+                and type(getattr(data, "val_mask")) == torch.Tensor
+            ):
                 validation_results: _typing.Sequence[float] = self.evaluate(
                     (data,), "val", [self.feval[0]]
                 )
-
                 if self.feval[0].is_higher_better():
                     validation_loss: float = -validation_results[0]
                 else:
@@ -199,26 +586,12 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         data = data.to(self.device)
         self.model.model.eval()
         with torch.no_grad():
-            prediction = self.model.model(data)
-        return prediction
-
-    def train(self, dataset, keep_valid_result: bool = True):
-        """
-        The function of training on the given dataset and keeping valid result.
-        :param dataset:
-        :param keep_valid_result: Whether to save the validation result after training
-        """
-        data = dataset[0]
-        self.__train_only(data)
-        if keep_valid_result:
-            prediction: torch.Tensor = self.__predict_only(data)
-            self._valid_result: torch.Tensor = prediction[data.val_mask].max(1)[1]
-            self._valid_result_prob: torch.Tensor = prediction[data.val_mask]
-            self._valid_score = self.evaluate(dataset, "val")
+            predicted_x: torch.Tensor = self.model.model(data)
+        return predicted_x
 
     def predict_proba(
-        self, dataset, mask: _typing.Optional[str] = None, in_log_format: bool = False
-    ) -> torch.Tensor:
+        self, dataset, mask: _typing.Optional[str] = None, in_log_format=False
+    ):
         """
         The function of predicting the probability on the given dataset.
         :param dataset: The node classification dataset used to be predicted.
@@ -229,32 +602,106 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         data = dataset[0].to(self.device)
         if mask is not None and type(mask) == str:
             if mask.lower() == "train":
-                _mask = data.train_mask
+                _mask: torch.Tensor = data.train_mask
             elif mask.lower() == "test":
-                _mask = data.test_mask
+                _mask: torch.Tensor = data.test_mask
             elif mask.lower() == "val":
-                _mask = data.val_mask
+                _mask: torch.Tensor = data.val_mask
             else:
-                _mask = data.test_mask
+                _mask: torch.Tensor = data.test_mask
         else:
-            _mask = data.test_mask
+            _mask: torch.Tensor = data.test_mask
         result = self.__predict_only(data)[_mask]
         return result if in_log_format else torch.exp(result)
 
     def predict(self, dataset, mask: _typing.Optional[str] = None) -> torch.Tensor:
         return self.predict_proba(dataset, mask, in_log_format=True).max(1)[1]
 
+    def evaluate(
+        self,
+        dataset,
+        mask: _typing.Optional[str] = None,
+        feval: _typing.Union[
+            None, _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = None,
+    ) -> _typing.Sequence[float]:
+        data = dataset[0]
+        data = data.to(self.device)
+        if feval is None:
+            _feval: _typing.Sequence[_typing.Type[Evaluation]] = self.feval
+        else:
+            _feval: _typing.Sequence[_typing.Type[Evaluation]] = get_feval(list(feval))
+        if mask is not None and type(mask) == str:
+            if mask.lower() == "train":
+                _mask: torch.Tensor = data.train_mask
+            elif mask.lower() == "test":
+                _mask: torch.Tensor = data.test_mask
+            elif mask.lower() == "val":
+                _mask: torch.Tensor = data.val_mask
+            else:
+                _mask: torch.Tensor = data.test_mask
+        else:
+            _mask: torch.Tensor = data.test_mask
+        prediction_probability: torch.Tensor = self.predict_proba(dataset, mask)
+        y_ground_truth: torch.Tensor = data.y[_mask]
+
+        eval_results = []
+        for f in _feval:
+            try:
+                eval_results.append(f.evaluate(prediction_probability, y_ground_truth))
+            except:
+                eval_results.append(
+                    f.evaluate(
+                        prediction_probability.cpu().numpy(),
+                        y_ground_truth.cpu().numpy(),
+                    )
+                )
+        return eval_results
+
+    def train(self, dataset, keep_valid_result: bool = True):
+        """
+        The function of training on the given dataset and keeping valid result.
+        :param dataset:
+        :param keep_valid_result: Whether to save the validation result after training
+        """
+        data = dataset[0]
+        self.__train_only(data)
+        if keep_valid_result:
+            prediction: torch.Tensor = self.__predict_only(data)
+            self._valid_result: torch.Tensor = prediction[data.val_mask].max(1)[1]
+            self._valid_result_prob: torch.Tensor = prediction[data.val_mask]
+            self._valid_score: _typing.Sequence[float] = self.evaluate(dataset, "val")
+
     def get_valid_predict(self) -> torch.Tensor:
         return self._valid_result
 
     def get_valid_predict_proba(self) -> torch.Tensor:
         return self._valid_result_prob
 
-    def get_valid_score(self, return_major: bool = True):
+    def get_valid_score(
+        self, return_major: bool = True
+    ) -> _typing.Tuple[
+        _typing.Union[float, _typing.Sequence[float]],
+        _typing.Union[bool, _typing.Sequence[bool]],
+    ]:
         if return_major:
-            return (self._valid_score[0], self.feval[0].is_higher_better())
+            return self._valid_score[0], self.feval[0].is_higher_better()
         else:
-            return (self._valid_score, [f.is_higher_better() for f in self.feval])
+            return (
+                self._valid_score, [f.is_higher_better() for f in self.feval]
+            )
+
+    @property
+    def hyper_parameter_space(self) -> _typing.Sequence[_typing.Dict[str, _typing.Any]]:
+        return self._hyper_parameter_space
+
+    @hyper_parameter_space.setter
+    def hyper_parameter_space(
+        self, hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
+    ) -> None:
+        if not isinstance(hp_space, _typing.Sequence):
+            raise TypeError
+        self._hyper_parameter_space = hp_space
 
     def get_name_with_hp(self) -> str:
         name = "-".join(
@@ -279,57 +726,13 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         )
         return name
 
-    def evaluate(
-        self,
-        dataset,
-        mask: _typing.Optional[str] = None,
-        feval: _typing.Union[
-            None, _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
-        ] = None,
-    ) -> _typing.Sequence[float]:
-        data = dataset[0]
-        data = data.to(self.device)
-        if feval is None:
-            _feval: _typing.Sequence[_typing.Type[Evaluation]] = self.feval
-        else:
-            _feval: _typing.Sequence[_typing.Type[Evaluation]] = get_feval(list(feval))
-        if mask.lower() == "train":
-            _mask = data.train_mask
-        elif mask.lower() == "test":
-            _mask = data.test_mask
-        elif mask.lower() == "val":
-            _mask = data.val_mask
-        else:
-            _mask = data.test_mask
-        prediction_probability: torch.Tensor = self.predict_proba(dataset, mask)
-        y_ground_truth = data.y[_mask]
-
-        results = []
-        for f in _feval:
-            try:
-                results.append(f.evaluate(prediction_probability, y_ground_truth))
-            except:
-                results.append(
-                    f.evaluate(
-                        prediction_probability.cpu().numpy(),
-                        y_ground_truth.cpu().numpy(),
-                    )
-                )
-        return results
-
-    def to(self, device: torch.device):
-        self.device = device
-        if self.model is not None:
-            self.model.to(self.device)
-
     def duplicate_from_hyper_parameter(
         self,
         hp: _typing.Dict[str, _typing.Any],
-        model: _typing.Union[BaseModel, str, None] = None,
-    ) -> "NodeClassificationNeighborSamplingTrainer":
-
+        model: _typing.Optional[BaseModel] = None,
+    ) -> "NodeClassificationGraphSAINTTrainer":
         if model is None or not isinstance(model, BaseModel):
-            model = self.model
+            model: BaseModel = self.model
         model = model.from_hyper_parameter(
             dict(
                 [
@@ -339,8 +742,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
                 ]
             )
         )
-
-        return NodeClassificationNeighborSamplingTrainer(
+        return NodeClassificationGraphSAINTTrainer(
             model,
             self.num_features,
             self.num_classes,
@@ -353,35 +755,27 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             **hp,
         )
 
-    @property
-    def hyper_parameter_space(self):
-        return self._hyper_parameter_space
 
-    @hyper_parameter_space.setter
-    def hyper_parameter_space(self, hp_space):
-        self._hyper_parameter_space = hp_space
-
-
-@register_trainer("NodeClassificationGraphSAINTTrainer")
-class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
+@register_trainer("NodeClassificationLayerDependentImportanceSamplingTrainer")
+class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassificationTrainer):
     def __init__(
-        self,
-        model: _typing.Union[BaseModel],
-        num_features: int,
-        num_classes: int,
-        optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None] = ...,
-        lr: float = 1e-4,
-        max_epoch: int = 100,
-        early_stopping_round: int = 100,
-        weight_decay: float = 1e-4,
-        device: _typing.Optional[torch.device] = None,
-        init: bool = True,
-        feval: _typing.Union[
-            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
-        ] = (Logloss,),
-        loss: str = "nll_loss",
-        lr_scheduler_type: _typing.Optional[str] = None,
-        **kwargs,
+            self,
+            model: _typing.Union[BaseModel, str],
+            num_features: int,
+            num_classes: int,
+            optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None] = ...,
+            lr: float = 1e-4,
+            max_epoch: int = 100,
+            early_stopping_round: int = 100,
+            weight_decay: float = 1e-4,
+            device: _typing.Optional[torch.device] = None,
+            init: bool = True,
+            feval: _typing.Union[
+                _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+            ] = (Logloss,),
+            loss: str = "nll_loss",
+            lr_scheduler_type: _typing.Optional[str] = None,
+            **kwargs,
     ) -> None:
         if isinstance(optimizer, type) and issubclass(optimizer, torch.optim.Optimizer):
             self._optimizer_class: _typing.Type[torch.optim.Optimizer] = optimizer
@@ -410,37 +804,28 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         self._lr_scheduler_type: _typing.Optional[str] = lr_scheduler_type
         self._max_epoch: int = max_epoch if max_epoch > 0 else 1e2
         self._weight_decay: float = weight_decay if weight_decay > 0 else 1e-4
-        early_stopping_round: int = (
-            early_stopping_round if early_stopping_round > 0 else 1e2
-        )
         self._early_stopping = EarlyStopping(
-            patience=early_stopping_round, verbose=False
+            patience=early_stopping_round if early_stopping_round > 0 else 1e2,
+            verbose=False
         )
-
-        # Assign an empty initial hyper parameter space
-        self._hyper_parameter_space: _typing.Sequence[
-            _typing.Dict[str, _typing.Any]
-        ] = []
+        """ Assign an empty initial hyper parameter space """
+        self._hyper_parameter_space: _typing.Sequence[_typing.Dict[str, _typing.Any]] = []
 
         self._valid_result: torch.Tensor = torch.zeros(0)
         self._valid_result_prob: torch.Tensor = torch.zeros(0)
         self._valid_score: _typing.Sequence[float] = ()
 
-        super(NodeClassificationGraphSAINTTrainer, self).__init__(
+        super(NodeClassificationLayerDependentImportanceSamplingTrainer, self).__init__(
             model, num_features, num_classes, device, init, feval, loss
         )
 
         """ Set hyper parameters """
-        self.__num_subgraphs: int = kwargs.get("num_subgraphs")
-        self.__sampling_budget: int = kwargs.get("sampling_budget")
-        if (
-                kwargs.get("sampling_method") is not None
-                and type(kwargs.get("sampling_method")) == str
-                and kwargs.get("sampling_method") in ("node", "edge")
-        ):
-            self.__sampling_method_identifier: str = kwargs.get("sampling_method")
-        else:
-            self.__sampling_method_identifier: str = "node"
+        " Configure num_layers "
+        self.__num_layers: int = kwargs.get("num_layers")
+        " Configure sampled_node_size_budget "
+        self.__sampled_node_size_budget: int = (
+            kwargs.get("sampled_node_size_budget")
+        )
 
         self.__is_initialized: bool = False
         if init:
@@ -467,12 +852,12 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         :param data: data of a specific graph
         :return: self
         """
-        data = data.to(self.device)
         optimizer: torch.optim.Optimizer = self._optimizer_class(
             self.model.model.parameters(),
             lr=self._learning_rate,
-            weight_decay=self._weight_decay,
+            weight_decay=self._weight_decay
         )
+
         if type(self._lr_scheduler_type) == str:
             if self._lr_scheduler_type.lower() == "step" + "lr":
                 lr_scheduler: torch.optim.lr_scheduler.StepLR = (
@@ -501,62 +886,53 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
                 torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
             )
 
-        if self.__sampling_method_identifier.lower() == "edge":
-            sub_graph_sampler = GraphSAINTRandomEdgeSampler(
-                self.__sampling_budget, self.__num_subgraphs
-            )
-        else:
-            sub_graph_sampler = GraphSAINTRandomNodeSampler(
-                self.__sampling_budget, self.__num_subgraphs
-            )
+        sampled_node_size_budget: int = self.__sampled_node_size_budget
+        num_layers: int = self.__num_layers
 
+        __layer_dependent_importance_sampler: LayerDependentImportanceSampler = (
+            LayerDependentImportanceSampler(data.edge_index)
+        )
+        __top_layer_target_nodes_indexes: torch.LongTensor = (
+            torch.where(data.train_mask)[0].unique()
+        )
         for current_epoch in range(self._max_epoch):
             self.model.model.train()
+            optimizer.zero_grad()
             """ epoch start """
-            """ Sample sub-graphs """
-            sub_graph_set = sub_graph_sampler.sample(data)
-            sub_graphs_loader: torch.utils.data.DataLoader = (
-                torch.utils.data.DataLoader(sub_graph_set)
+            " sample graphs "
+            __layers: _typing.Sequence[
+                _typing.Tuple[torch.Tensor, torch.Tensor]
+            ] = __layer_dependent_importance_sampler.sample(
+                __top_layer_target_nodes_indexes,
+                [sampled_node_size_budget for _ in range(num_layers)]
             )
-            integral_alpha: torch.Tensor = getattr(sub_graph_set, "alpha")
-            integral_lambda: torch.Tensor = getattr(sub_graph_set, "lambda")
-            """ iterate sub-graphs """
-            for sub_graph_data in sub_graphs_loader:
-                optimizer.zero_grad()
-                sampled_edge_indexes: torch.Tensor = sub_graph_data.sampled_edge_indexes
-                sampled_node_indexes: torch.Tensor = sub_graph_data.sampled_node_indexes
-                sampled_train_mask: torch.Tensor = sub_graph_data.train_mask
-
-                sampled_alpha = integral_alpha[sampled_edge_indexes]
-                sub_graph_data.edge_weight = 1 / sampled_alpha
-
-                prediction: torch.Tensor = self.model.model(sub_graph_data)
+            data.edge_indexes = [layer[0] for layer in __layers]
+            data.edge_weights = [layer[1] for layer in __layers]
+            data = data.to(self.device)
 
-                if not hasattr(torch.nn.functional, self.loss):
-                    raise TypeError(f"PyTorch does not support loss type {self.loss}")
-                loss_func = getattr(torch.nn.functional, self.loss)
-                unreduced_loss: torch.Tensor = loss_func(
-                    prediction[sampled_train_mask],
-                    data.y[sampled_train_mask],
-                    reduction="none",
+            result: torch.Tensor = self.model.model.forward(data)
+            if hasattr(torch.nn.functional, self.loss):
+                loss_function = getattr(
+                    torch.nn.functional, self.loss
                 )
-
-                sampled_lambda: torch.Tensor = integral_lambda[sampled_node_indexes]
-                sampled_train_lambda: torch.Tensor = sampled_lambda[sampled_train_mask]
-                assert unreduced_loss.size() == sampled_train_lambda.size()
-                loss_weighted_sum: torch.Tensor = torch.sum(
-                    unreduced_loss / sampled_train_lambda
+                loss_value: torch.Tensor = loss_function(
+                    result[data.train_mask],
+                    data.y[data.train_mask]
+                )
+            else:
+                raise TypeError(
+                    f"PyTorch does not support loss type {self.loss}"
                 )
-                loss_weighted_sum.backward()
-                optimizer.step()
 
-            if lr_scheduler is not None:
+            loss_value.backward()
+            optimizer.step()
+            if self._lr_scheduler_type:
                 lr_scheduler.step()
 
-            """ Validate performance """
             if (
-                hasattr(data, "val_mask")
-                and type(getattr(data, "val_mask")) == torch.Tensor
+                    hasattr(data, "val_mask") and
+                    getattr(data, "val_mask") is not None and
+                    type(getattr(data, "val_mask")) == torch.Tensor
             ):
                 validation_results: _typing.Sequence[float] = self.evaluate(
                     (data,), "val", [self.feval[0]]
@@ -569,11 +945,14 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
                 if self._early_stopping.early_stop:
                     LOGGER.debug("Early stopping at %d", current_epoch)
                     break
-        if hasattr(data, "val_mask") and data.val_mask is not None:
+        if (
+                hasattr(data, "val_mask") and
+                getattr(data, "val_mask") is not None and
+                type(getattr(data, "val_mask")) == torch.Tensor
+        ):
             self._early_stopping.load_checkpoint(self.model.model)
-        return self
 
-    def __predict_only(self, data):
+    def __predict_only(self, data) -> torch.Tensor:
         """
         The function of predicting on the given data.
         :param data: data of a specific graph
@@ -586,7 +965,8 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         return predicted_x
 
     def predict_proba(
-        self, dataset, mask: _typing.Optional[str] = None, in_log_format=False
+            self, dataset, mask: _typing.Optional[str]=None,
+            in_log_format: bool=False
     ):
         """
         The function of predicting the probability on the given dataset.
@@ -614,12 +994,12 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         return self.predict_proba(dataset, mask, in_log_format=True).max(1)[1]
 
     def evaluate(
-        self,
-        dataset,
-        mask: _typing.Optional[str] = None,
-        feval: _typing.Union[
-            None, _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
-        ] = None,
+            self,
+            dataset,
+            mask: _typing.Optional[str] = None,
+            feval: _typing.Union[
+                None, _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+            ] = None,
     ) -> _typing.Sequence[float]:
         data = dataset[0]
         data = data.to(self.device)
@@ -675,17 +1055,15 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         return self._valid_result_prob
 
     def get_valid_score(
-        self, return_major: bool = True
-    ) -> _typing.Tuple[
-        _typing.Union[float, _typing.Sequence[float]],
-        _typing.Union[bool, _typing.Sequence[bool]],
+            self, return_major: bool = True
+    ) -> _typing.Union[
+        _typing.Tuple[float, bool],
+        _typing.Tuple[_typing.Sequence[float], _typing.Sequence[bool]]
     ]:
         if return_major:
             return self._valid_score[0], self.feval[0].is_higher_better()
         else:
-            return (
-                self._valid_score, [f.is_higher_better() for f in self.feval]
-            )
+            return self._valid_score, [f.is_higher_better() for f in self.feval]
 
     @property
     def hyper_parameter_space(self) -> _typing.Sequence[_typing.Dict[str, _typing.Any]]:
@@ -693,7 +1071,7 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
 
     @hyper_parameter_space.setter
     def hyper_parameter_space(
-        self, hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
+            self, hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
     ) -> None:
         if not isinstance(hp_space, _typing.Sequence):
             raise TypeError
@@ -723,10 +1101,10 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         return name
 
     def duplicate_from_hyper_parameter(
-        self,
-        hp: _typing.Dict[str, _typing.Any],
-        model: _typing.Optional[BaseModel] = None,
-    ) -> "NodeClassificationGraphSAINTTrainer":
+            self,
+            hp: _typing.Dict[str, _typing.Any],
+            model: _typing.Optional[BaseModel] = None,
+    ) -> "NodeClassificationLayerDependentImportanceSamplingTrainer":
         if model is None or not isinstance(model, BaseModel):
             model: BaseModel = self.model
         model = model.from_hyper_parameter(
@@ -738,7 +1116,7 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
                 ]
             )
         )
-        return NodeClassificationGraphSAINTTrainer(
+        return NodeClassificationLayerDependentImportanceSamplingTrainer(
             model,
             self.num_features,
             self.num_classes,
@@ -752,8 +1130,8 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         )
 
 
-@register_trainer("NodeClassificationLayerDependentImportanceSamplingTrainer")
-class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassificationTrainer):
+@register_trainer("NodeClassificationNeighborSampling")
+class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
     def __init__(
             self,
             model: _typing.Union[BaseModel, str],
@@ -772,7 +1150,7 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
             loss: str = "nll_loss",
             lr_scheduler_type: _typing.Optional[str] = None,
             **kwargs,
-    ) -> None:
+    ):
         if isinstance(optimizer, type) and issubclass(optimizer, torch.optim.Optimizer):
             self._optimizer_class: _typing.Type[torch.optim.Optimizer] = optimizer
         elif type(optimizer) == str:
@@ -811,17 +1189,12 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
         self._valid_result_prob: torch.Tensor = torch.zeros(0)
         self._valid_score: _typing.Sequence[float] = ()
 
-        super(NodeClassificationLayerDependentImportanceSamplingTrainer, self).__init__(
+        super(NodeClassificationNeighborSamplingTrainer, self).__init__(
             model, num_features, num_classes, device, init, feval, loss
         )
 
-        """ Set hyper parameters """
-        " Configure num_layers "
-        self.__num_layers: int = kwargs.get("num_layers")
-        " Configure sampled_node_size_budget "
-        self.__sampled_node_size_budget: int = (
-            kwargs.get("sampled_node_size_budget")
-        )
+        """ Set hyper-parameter """
+        self.__sampling_sizes: _typing.Sequence[int] = kwargs.get("sampling_sizes")
 
         self.__is_initialized: bool = False
         if init:
@@ -842,10 +1215,10 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
     def get_model(self):
         return self.model
 
-    def __train_only(self, data):
+    def __train_only(self, integral_data):
         """
         The function of training on the given dataset and mask.
-        :param data: data of a specific graph
+        :param integral_data: data of the integral graph
         :return: self
         """
         optimizer: torch.optim.Optimizer = self._optimizer_class(
@@ -882,56 +1255,53 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
                 torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
             )
 
-        sampled_node_size_budget: int = self.__sampled_node_size_budget
-        num_layers: int = self.__num_layers
-
-        __layer_dependent_importance_sampler: LayerDependentImportanceSampler = (
-            LayerDependentImportanceSampler(data.edge_index)
-        )
-        __top_layer_target_nodes_indexes: torch.LongTensor = (
-            torch.where(data.train_mask)[0].unique()
+        neighbor_sampler: NeighborSampler = NeighborSampler(
+            integral_data.edge_index, torch.where(integral_data.train_mask)[0].unique(),
+            self.__sampling_sizes, batch_size=1024,
+            num_workers=os.cpu_count() if os.cpu_count() is not None else 0
         )
-        for current_epoch in range(self._max_epoch):
+        for current_epoch in tqdm.tqdm(range(self._max_epoch), desc="Epoch"):
             self.model.model.train()
             optimizer.zero_grad()
             """ epoch start """
             " sample graphs "
-            __layers: _typing.Sequence[
-                _typing.Tuple[torch.Tensor, torch.Tensor]
-            ] = __layer_dependent_importance_sampler.sample(
-                __top_layer_target_nodes_indexes,
-                [sampled_node_size_budget for _ in range(num_layers)]
-            )
-            data.edge_indexes = [layer[0] for layer in __layers]
-            data.edge_weights = [layer[1] for layer in __layers]
-            data = data.to(self.device)
-
-            result: torch.Tensor = self.model.model.forward(data)
-            if hasattr(torch.nn.functional, self.loss):
-                loss_function = getattr(
-                    torch.nn.functional, self.loss
+            # todo: Done this
+            for sampled_data in neighbor_sampler:
+                optimizer.zero_grad()
+                sampled_data: TargetDependantSampledData = sampled_data
+                # 由于现在的Model设计是接受Data的，所以只能组装一个采样的Data作为参数
+                sampled_graph: autogl.data.Data = autogl.data.Data(
+                    x=integral_data.x[sampled_data.all_sampled_nodes_indexes],
+                    y=integral_data.y[sampled_data.all_sampled_nodes_indexes]
                 )
+                sampled_graph.to(self.device)
+                sampled_graph.edge_indexes: _typing.Sequence[torch.LongTensor] = [
+                    current_layer.edge_index_for_sampled_graph.to(self.device)
+                    for current_layer in sampled_data.sampled_edges_for_layers
+                ]
+                prediction: torch.Tensor = self.model.model(sampled_graph)
+                if not hasattr(torch.nn.functional, self.loss):
+                    raise TypeError(
+                        f"PyTorch does not support loss type {self.loss}"
+                    )
+                loss_function = getattr(torch.nn.functional, self.loss)
                 loss_value: torch.Tensor = loss_function(
-                    result[data.train_mask],
-                    data.y[data.train_mask]
-                )
-            else:
-                raise TypeError(
-                    f"PyTorch does not support loss type {self.loss}"
+                    prediction[sampled_data.target_nodes_indexes.indexes_in_sampled_graph],
+                    sampled_graph.y[sampled_data.target_nodes_indexes.indexes_in_sampled_graph]
                 )
+                loss_value.backward()
+                optimizer.step()
 
-            loss_value.backward()
-            optimizer.step()
             if self._lr_scheduler_type:
                 lr_scheduler.step()
 
             if (
-                    hasattr(data, "val_mask") and
-                    getattr(data, "val_mask") is not None and
-                    type(getattr(data, "val_mask")) == torch.Tensor
+                    hasattr(integral_data, "val_mask") and
+                    getattr(integral_data, "val_mask") is not None and
+                    type(getattr(integral_data, "val_mask")) == torch.Tensor
             ):
                 validation_results: _typing.Sequence[float] = self.evaluate(
-                    (data,), "val", [self.feval[0]]
+                    (integral_data,), "val", [self.feval[0]]
                 )
                 if self.feval[0].is_higher_better():
                     validation_loss: float = -validation_results[0]
@@ -942,27 +1312,65 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
                     LOGGER.debug("Early stopping at %d", current_epoch)
                     break
         if (
-                hasattr(data, "val_mask") and
-                getattr(data, "val_mask") is not None and
-                type(getattr(data, "val_mask")) == torch.Tensor
+                hasattr(integral_data, "val_mask") and
+                getattr(integral_data, "val_mask") is not None and
+                type(getattr(integral_data, "val_mask")) == torch.Tensor
         ):
             self._early_stopping.load_checkpoint(self.model.model)
 
-    def __predict_only(self, data) -> torch.Tensor:
+    def __predict_only(
+            self, integral_data,
+            mask_or_target_nodes_indexes: _typing.Union[
+                torch.BoolTensor, torch.LongTensor
+            ]
+    ) -> torch.Tensor:
         """
         The function of predicting on the given data.
-        :param data: data of a specific graph
+        :param integral_data: data of a specific graph
+        :param mask_or_target_nodes_indexes: ...
         :return: the result of prediction on the given dataset
         """
-        data = data.to(self.device)
+        if mask_or_target_nodes_indexes.dtype == torch.bool:
+            target_nodes_indexes: _typing.Any = (
+                torch.where(mask_or_target_nodes_indexes)[0]
+            )
+        else:
+            target_nodes_indexes: _typing.Any = mask_or_target_nodes_indexes.long()
+
+        neighbor_sampler: NeighborSampler = NeighborSampler(
+            integral_data.edge_index, target_nodes_indexes, [-1 for _ in self.__sampling_sizes],
+            batch_size=1024, num_workers=0, shuffle=False
+        )
+
+        prediction_batch_cumulative_builder = (
+            EvaluatorUtility.PredictionBatchCumulativeBuilder()
+        )
         self.model.model.eval()
-        with torch.no_grad():
-            predicted_x: torch.Tensor = self.model.model(data)
-        return predicted_x
+        for sampled_data in neighbor_sampler:
+            sampled_data: TargetDependantSampledData = sampled_data
+            sampled_graph: autogl.data.Data = autogl.data.Data(
+                integral_data.x[sampled_data.all_sampled_nodes_indexes],
+                integral_data.y[sampled_data.all_sampled_nodes_indexes]
+            )
+            sampled_graph.to(self.device)
+            sampled_graph.edge_indexes: _typing.Sequence[torch.LongTensor] = [
+                current_layer.edge_index_for_sampled_graph.to(self.device)
+                for current_layer in sampled_data.sampled_edges_for_layers
+            ]
+
+            with torch.no_grad():
+                prediction_batch_cumulative_builder.add_batch(
+                    sampled_data.target_nodes_indexes.indexes_in_integral_graph.cpu().numpy(),
+                    self.model.model(sampled_graph)[
+                        sampled_data.target_nodes_indexes.indexes_in_sampled_graph
+                    ].cpu().numpy()
+                )
+
+        return torch.from_numpy(prediction_batch_cumulative_builder.compose()[1])
 
     def predict_proba(
-            self, dataset, mask: _typing.Optional[str]=None,
-            in_log_format: bool=False
+            self, dataset, mask: _typing.Optional[str] = None,
+            in_log_format: bool = False
     ):
         """
         The function of predicting the probability on the given dataset.
@@ -974,16 +1382,16 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
         data = dataset[0].to(self.device)
         if mask is not None and type(mask) == str:
             if mask.lower() == "train":
-                _mask: torch.Tensor = data.train_mask
+                _mask: torch.BoolTensor = data.train_mask
             elif mask.lower() == "test":
-                _mask: torch.Tensor = data.test_mask
+                _mask: torch.BoolTensor = data.test_mask
             elif mask.lower() == "val":
-                _mask: torch.Tensor = data.val_mask
+                _mask: torch.BoolTensor = data.val_mask
             else:
-                _mask: torch.Tensor = data.test_mask
+                _mask: torch.BoolTensor = data.test_mask
         else:
-            _mask: torch.Tensor = data.test_mask
-        result = self.__predict_only(data)[_mask]
+            _mask: torch.BoolTensor = data.test_mask
+        result = self.__predict_only(data, _mask)
         return result if in_log_format else torch.exp(result)
 
     def predict(self, dataset, mask: _typing.Optional[str] = None) -> torch.Tensor:
@@ -1019,15 +1427,12 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
 
         eval_results = []
         for f in _feval:
-            try:
-                eval_results.append(f.evaluate(prediction_probability, y_ground_truth))
-            except:
-                eval_results.append(
-                    f.evaluate(
-                        prediction_probability.cpu().numpy(),
-                        y_ground_truth.cpu().numpy(),
-                    )
+            eval_results.append(
+                f.evaluate(
+                    prediction_probability.cpu().numpy(),
+                    y_ground_truth.cpu().numpy(),
                 )
+            )
         return eval_results
 
     def train(self, dataset, keep_valid_result: bool = True):
@@ -1039,9 +1444,9 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
         data = dataset[0]
         self.__train_only(data)
         if keep_valid_result:
-            prediction: torch.Tensor = self.__predict_only(data)
-            self._valid_result: torch.Tensor = prediction[data.val_mask].max(1)[1]
-            self._valid_result_prob: torch.Tensor = prediction[data.val_mask]
+            prediction: torch.Tensor = self.__predict_only(data, data.val_mask)
+            self._valid_result: torch.Tensor = prediction.max(1)[1]
+            self._valid_result_prob: torch.Tensor = prediction
             self._valid_score: _typing.Sequence[float] = self.evaluate(dataset, "val")
 
     def get_valid_predict(self) -> torch.Tensor:
@@ -1100,7 +1505,7 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
             self,
             hp: _typing.Dict[str, _typing.Any],
             model: _typing.Optional[BaseModel] = None,
-    ) -> "NodeClassificationLayerDependentImportanceSamplingTrainer":
+    ) -> "NodeClassificationNeighborSamplingTrainer":
         if model is None or not isinstance(model, BaseModel):
             model: BaseModel = self.model
         model = model.from_hyper_parameter(
@@ -1112,7 +1517,7 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
                 ]
             )
         )
-        return NodeClassificationLayerDependentImportanceSamplingTrainer(
+        return NodeClassificationNeighborSamplingTrainer(
             model,
             self.num_features,
             self.num_classes,
diff --git a/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py b/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py
index a46ba56..abd3433 100644
--- a/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py
+++ b/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py
@@ -3,12 +3,13 @@ import torch
 import torch.utils.data
 import typing as _typing
 import torch_geometric
+from . import target_dependant_sampler
 
 
-class LayerDependentImportanceSampler:
+class LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTargetDependantSampler):
     class _Utility:
         @classmethod
-        def compute_edge_weights(cls, __all_edge_index_with_self_loops: torch.LongTensor) -> torch.Tensor:
+        def compute_edge_weights(cls, __all_edge_index_with_self_loops: torch.Tensor) -> torch.Tensor:
             __out_degree: torch.Tensor = \
                 torch_geometric.utils.degree(__all_edge_index_with_self_loops[0])
             __in_degree: torch.Tensor = \
@@ -93,32 +94,48 @@ class LayerDependentImportanceSampler:
                 selected_edges_mask_for_source_nodes & selected_edges_mask_for_target_nodes
             )[0]
 
-    def __init__(self, all_edge_index: torch.LongTensor):
-        self.__all_edge_index_with_self_loops: torch.LongTensor = \
-            torch_geometric.utils.add_remaining_self_loops(all_edge_index)[0]
-        self.__all_edge_weights: torch.Tensor = \
-            self._Utility.compute_edge_weights(self.__all_edge_index_with_self_loops)
+    def __init__(
+            self, edge_index: torch.LongTensor,
+            target_nodes_indexes: torch.LongTensor,
+            layer_wise_arguments: _typing.Sequence,
+            batch_size: _typing.Optional[int] = 1, num_workers: int = 0,
+            shuffle: bool = True, **kwargs
+    ):
+        super().__init__(
+            torch_geometric.utils.add_remaining_self_loops(edge_index)[0],
+            target_nodes_indexes, layer_wise_arguments, batch_size, num_workers, shuffle, **kwargs
+        )
+        self.__all_edge_weights: torch.Tensor = self._Utility.compute_edge_weights(self._edge_index)
 
-    def __sample_layer(
+    def _sample_edges_for_layer(
             self, target_nodes_indexes: torch.LongTensor,
-            sampled_node_size_budget: int
-    ) -> _typing.Tuple[torch.Tensor, torch.Tensor, torch.LongTensor, torch.LongTensor]:
+            layer_argument: _typing.Any, *args, **kwargs
+    ) -> _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]:
         """
-        :param target_nodes_indexes:
-                node indexes for target nodes in the top layer or nodes sampled in upper layer
-        :param sampled_node_size_budget:
-        :return: (Tensor, Tensor, LongTensor, LongTensor)
+        Sample edges for one layer
+        :param target_nodes_indexes: indexes of target nodes
+        :param layer_argument: argument for current layer
+        :param args: remaining positional arguments
+        :param kwargs: remaining keyword arguments
+        :return: (edge_id_in_integral_graph, edge_weight)
         """
+        if type(layer_argument) != int:
+            raise TypeError
+        elif not layer_argument > 0:
+            raise ValueError
+        else:
+            sampled_node_size_budget: int = layer_argument
+
         all_candidate_edge_indexes: torch.LongTensor = torch.cat(
             [
-                torch.where(self.__all_edge_index_with_self_loops[1] == current_target_node_index)[0]
+                torch.where(self._edge_index[1] == current_target_node_index)[0]
                 for current_target_node_index in target_nodes_indexes.unique().tolist()
             ]
         ).unique()
         __all_candidate_source_nodes_indexes, all_candidate_source_nodes_probabilities = \
             self._Utility.get_candidate_source_nodes_probabilities(
                 all_candidate_edge_indexes,
-                self.__all_edge_index_with_self_loops,
+                self._edge_index,
                 self.__all_edge_weights
             )
         assert __all_candidate_source_nodes_indexes.size() == all_candidate_source_nodes_probabilities.size()
@@ -138,7 +155,7 @@ class LayerDependentImportanceSampler:
 
         __selected_edges_indexes: torch.LongTensor = (
             self._Utility.filter_selected_edges_by_source_nodes_and_target_nodes(
-                self.__all_edge_index_with_self_loops,
+                self._edge_index,
                 selected_source_node_indexes, target_nodes_indexes
             )
         ).unique()
@@ -149,9 +166,9 @@ class LayerDependentImportanceSampler:
                         [
                             all_candidate_source_nodes_probabilities[
                                 __all_candidate_source_nodes_indexes == current_source_node_index
-                            ].item()
+                                ].item()
                             for current_source_node_index
-                            in self.__all_edge_index_with_self_loops[0, __selected_edges_indexes].tolist()
+                            in self._edge_index[0, __selected_edges_indexes].tolist()
                         ]
                     )
                 )
@@ -167,49 +184,100 @@ class LayerDependentImportanceSampler:
                         __edge_index[1] == current_target_node_index
                 )
                 __edge_weight[__current_mask_for_edges] = (
-                    __edge_weight[__current_mask_for_edges] / (
-                        torch.sum(__edge_weight[__current_mask_for_edges])
-                    )
+                        __edge_weight[__current_mask_for_edges] / (
+                            torch.sum(__edge_weight[__current_mask_for_edges])
+                        )
                 )
             return __edge_weight
 
         normalized_selected_edges_weight: torch.Tensor = __normalize_edges_weight_by_target_nodes(
-            self.__all_edge_index_with_self_loops[:, __selected_edges_indexes],
+            self._edge_index[:, __selected_edges_indexes],
             non_normalized_selected_edges_weight
         )
-        return (
-            self.__all_edge_index_with_self_loops[:, __selected_edges_indexes],
-            normalized_selected_edges_weight,
-            selected_source_node_indexes,
-            __selected_edges_indexes
-        )
-
-    def sample(
-            self, __top_layer_target_nodes_indexes: torch.LongTensor,
-            sampling_node_size_budgets: _typing.Sequence[int]
-    ) -> _typing.Sequence[_typing.Tuple[torch.Tensor, torch.Tensor]]:
-        """
-        :param __top_layer_target_nodes_indexes: indexes of target nodes for the top layer
-        :param sampling_node_size_budgets:
-        :return:
-        """
-        if type(__top_layer_target_nodes_indexes) != torch.Tensor:
-            raise TypeError
-        if not isinstance(sampling_node_size_budgets, _typing.Sequence):
-            raise TypeError
-        if len(sampling_node_size_budgets) == 0:
-            raise ValueError
-
-        layers: _typing.List[_typing.Tuple[torch.Tensor, torch.Tensor]] = []
-        upper_layer_sampled_node_indexes: torch.LongTensor = __top_layer_target_nodes_indexes
-        for current_sampled_node_size_budget in sampling_node_size_budgets[::-1]:
-            _sampling_result: _typing.Tuple[
-                torch.Tensor, torch.Tensor, torch.LongTensor, torch.LongTensor
-            ] = self.__sample_layer(upper_layer_sampled_node_indexes, current_sampled_node_size_budget)
-            current_layer_edge_index: torch.Tensor = _sampling_result[0]
-            current_layer_edge_weight: torch.Tensor = _sampling_result[1]
-            layers.append((current_layer_edge_index, current_layer_edge_weight))
-
-            upper_layer_sampled_node_indexes: torch.LongTensor = _sampling_result[2]
+        return __selected_edges_indexes, normalized_selected_edges_weight
 
-        return layers[::-1]
+    # todo: Migrated to the overrode _sample_edges_for_layer method, remove in the future version
+    # def __sample_layer(
+    #         self, target_nodes_indexes: torch.LongTensor,
+    #         sampled_node_size_budget: int
+    # ) -> _typing.Tuple[torch.Tensor, torch.Tensor, torch.LongTensor, torch.LongTensor]:
+    #     """
+    #     :param target_nodes_indexes:
+    #             node indexes for target nodes in the top layer or nodes sampled in upper layer
+    #     :param sampled_node_size_budget:
+    #     :return: (Tensor, Tensor, LongTensor, LongTensor)
+    #     """
+    #     all_candidate_edge_indexes: torch.LongTensor = torch.cat(
+    #         [
+    #             torch.where(self._edge_index[1] == current_target_node_index)[0]
+    #             for current_target_node_index in target_nodes_indexes.unique().tolist()
+    #         ]
+    #     ).unique()
+    #     __all_candidate_source_nodes_indexes, all_candidate_source_nodes_probabilities = \
+    #         self._Utility.get_candidate_source_nodes_probabilities(
+    #             all_candidate_edge_indexes,
+    #             self._edge_index,
+    #             self.__all_edge_weights
+    #         )
+    #     assert __all_candidate_source_nodes_indexes.size() == all_candidate_source_nodes_probabilities.size()
+    #
+    #     """ Sampling """
+    #     if sampled_node_size_budget < __all_candidate_source_nodes_indexes.numel():
+    #         selected_source_node_indexes: torch.LongTensor = __all_candidate_source_nodes_indexes[
+    #             torch.from_numpy(
+    #                 np.unique(np.random.choice(
+    #                     np.arange(__all_candidate_source_nodes_indexes.numel()), sampled_node_size_budget,
+    #                     p=all_candidate_source_nodes_probabilities.numpy()
+    #                 ))
+    #             ).unique()
+    #         ].unique()
+    #     else:
+    #         selected_source_node_indexes: torch.LongTensor = __all_candidate_source_nodes_indexes
+    #
+    #     __selected_edges_indexes: torch.LongTensor = (
+    #         self._Utility.filter_selected_edges_by_source_nodes_and_target_nodes(
+    #             self._edge_index,
+    #             selected_source_node_indexes, target_nodes_indexes
+    #         )
+    #     ).unique()
+    #
+    #     non_normalized_selected_edges_weight: torch.Tensor = (
+    #             self.__all_edge_weights[__selected_edges_indexes] / (
+    #                 selected_source_node_indexes.numel() * torch.tensor(
+    #                     [
+    #                         all_candidate_source_nodes_probabilities[
+    #                             __all_candidate_source_nodes_indexes == current_source_node_index
+    #                         ].item()
+    #                         for current_source_node_index
+    #                         in self._edge_index[0, __selected_edges_indexes].tolist()
+    #                     ]
+    #                 )
+    #             )
+    #     )
+    #
+    #     def __normalize_edges_weight_by_target_nodes(
+    #             __edge_index: torch.Tensor, __edge_weight: torch.Tensor
+    #     ) -> torch.Tensor:
+    #         if __edge_index.size(1) != __edge_weight.numel():
+    #             raise ValueError
+    #         for current_target_node_index in __edge_index[1].unique().tolist():
+    #             __current_mask_for_edges: torch.BoolTensor = (
+    #                     __edge_index[1] == current_target_node_index
+    #             )
+    #             __edge_weight[__current_mask_for_edges] = (
+    #                 __edge_weight[__current_mask_for_edges] / (
+    #                     torch.sum(__edge_weight[__current_mask_for_edges])
+    #                 )
+    #             )
+    #         return __edge_weight
+    #
+    #     normalized_selected_edges_weight: torch.Tensor = __normalize_edges_weight_by_target_nodes(
+    #         self._edge_index[:, __selected_edges_indexes],
+    #         non_normalized_selected_edges_weight
+    #     )
+    #     return (
+    #         self._edge_index[:, __selected_edges_indexes],
+    #         normalized_selected_edges_weight,
+    #         selected_source_node_indexes,
+    #         __selected_edges_indexes
+    #     )
diff --git a/autogl/module/train/sampling/sampler/neighbor_sampler.py b/autogl/module/train/sampling/sampler/neighbor_sampler.py
index 53a6b7c..b1d7c39 100644
--- a/autogl/module/train/sampling/sampler/neighbor_sampler.py
+++ b/autogl/module/train/sampling/sampler/neighbor_sampler.py
@@ -1,133 +1,49 @@
-import collections
-import random
 import typing as _typing
-import numpy as np
 import torch.utils.data
+import torch_geometric
+from .target_dependant_sampler import TargetDependantSampler, TargetDependantSampledData
 
 
-class NeighborSampler(torch.utils.data.DataLoader, collections.Iterable):
-    class _NodeIndexesDataset(torch.utils.data.Dataset):
-        def __init__(self, node_indexes):
-            self.__node_indexes: _typing.Sequence[int] = node_indexes
+def _neighbor_sampler_transform(
+        batch_size: int, n_id: torch.LongTensor,
+        adj_list: _typing.Sequence[
+            _typing.Tuple[torch.LongTensor, torch.LongTensor, _typing.Tuple[int, int]]
+        ]
+) -> TargetDependantSampledData:
+    return TargetDependantSampledData(
+        [(current_layer[0], current_layer[1], None)for current_layer in adj_list],
+        (torch.arange(batch_size), n_id[:batch_size]), n_id
+    )
 
-        def __getitem__(self, index) -> int:
-            if not 0 <= index < len(self.__node_indexes):
-                raise IndexError("Index out of range")
-            else:
-                return self.__node_indexes[index]
-
-        def __len__(self) -> int:
-            return len(self.__node_indexes)
 
+class NeighborSampler(TargetDependantSampler, _typing.Iterable):
     def __init__(
-        self,
-        data,
-        sampling_sizes: _typing.Sequence[int],
-        target_node_indexes: _typing.Optional[_typing.Sequence[int]] = None,
-        batch_size: _typing.Optional[int] = 1,
-        *args,
-        **kwargs
+            self, edge_index: torch.LongTensor,
+            target_nodes_indexes: torch.LongTensor,
+            sampling_sizes: _typing.Sequence[int],
+            batch_size: int = 1, num_workers: int = 0,
+            shuffle: bool = True, **kwargs
     ):
-        self._data = data
-        self.__sampling_sizes: _typing.Sequence[int] = sampling_sizes
-
-        if not (
-            target_node_indexes is not None
-            and isinstance(target_node_indexes, _typing.Sequence)
-        ):
-            if hasattr(data, "train_mask"):
-                target_node_indexes: _typing.Sequence[int] = torch.where(
-                    getattr(data, "train_mask")
-                )[0]
-            else:
-                target_node_indexes: _typing.Sequence[int] = list(
-                    np.arange(0, data.x.shape[0])
-                )
-
-        self.__edge_index_map: _typing.Dict[
-            int, _typing.Union[torch.Tensor, _typing.Sequence[int]]
-        ] = {}
-        self.__init_edge_index_map()
-        super(NeighborSampler, self).__init__(
-            self._NodeIndexesDataset(target_node_indexes),
-            batch_size=batch_size if batch_size > 0 else 1,
-            collate_fn=self.__sample,
-            *args,
-            **kwargs
+        self.__pyg_neighbor_sampler: torch_geometric.data.NeighborSampler = (
+            torch_geometric.data.NeighborSampler(
+                edge_index, list(sampling_sizes[::-1]), target_nodes_indexes,
+                transform=_neighbor_sampler_transform, batch_size=batch_size,
+                num_workers=num_workers, shuffle=shuffle, **kwargs
+            )
         )
 
-    def __init_edge_index_map(self):
-        self.__edge_index_map.clear()
-        all_edge_index: torch.Tensor = getattr(self._data, "edge_index")
-        target_node_indexes: torch.Tensor = all_edge_index[1]
-        for target_node_index in target_node_indexes.unique().tolist():
-            self.__edge_index_map[target_node_index] = torch.where(
-                all_edge_index[1] == target_node_index
-            )[0]
-
     def __iter__(self):
-        return super(NeighborSampler, self).__iter__()
-
-    def __sample(
-        self, target_nodes_indexes: _typing.List[int]
-    ) -> _typing.Tuple[torch.Tensor, _typing.List[torch.Tensor]]:
-        """
-        Sample a sub-graph with neighborhood sampling
-        :param target_nodes_indexes:
-        """
-        original_edge_index: torch.Tensor = self._data.edge_index
-        edges_indexes: _typing.List[torch.Tensor] = []
-
-        current_target_nodes_indexes: _typing.List[int] = target_nodes_indexes
-        for current_sampling_size in self.__sampling_sizes:
-            current_edge_index: _typing.Optional[torch.Tensor] = None
-            for current_target_node_index in current_target_nodes_indexes:
-                if current_target_node_index in self.__edge_index_map:
-                    all_indexes: torch.Tensor = self.__edge_index_map.get(
-                        current_target_node_index
-                    )
-                else:
-                    all_indexes: torch.Tensor = torch.where(
-                        original_edge_index[1] == current_target_node_index
-                    )[0]
-                if all_indexes.numel() < current_sampling_size:
-                    sampled_indexes: np.ndarray = np.random.choice(
-                        all_indexes.cpu().numpy(), current_sampling_size
-                    )
-                    if current_edge_index is not None:
-                        current_edge_index: torch.Tensor = torch.cat(
-                            [
-                                current_edge_index,
-                                original_edge_index[:, sampled_indexes],
-                            ],
-                            dim=1,
-                        )
-                    else:
-                        current_edge_index: torch.Tensor = original_edge_index[
-                            :, sampled_indexes
-                        ]
-                else:
-                    all_indexes_list = all_indexes.tolist()
-                    random.shuffle(all_indexes_list)
-                    shuffled_indexes_list: _typing.List[int] = all_indexes_list[
-                        0:current_sampling_size
-                    ]
-                    if current_edge_index is not None:
-                        current_edge_index: torch.Tensor = torch.cat(
-                            [
-                                current_edge_index,
-                                original_edge_index[:, shuffled_indexes_list],
-                            ],
-                            dim=1,
-                        )
-                    else:
-                        current_edge_index: torch.Tensor = original_edge_index[
-                            :, shuffled_indexes_list
-                        ]
-            edges_indexes.append(current_edge_index)
-
-            if len(edges_indexes) < len(self.__sampling_sizes):
-                next_target_nodes_indexes: torch.Tensor = current_edge_index[0].unique()
-                current_target_nodes_indexes = next_target_nodes_indexes.tolist()
-
-        return torch.tensor(target_nodes_indexes), edges_indexes[::-1]
+        return iter(self.__pyg_neighbor_sampler)
+
+    @classmethod
+    def create_basic_sampler(
+            cls, edge_index: torch.LongTensor,
+            target_nodes_indexes: torch.LongTensor,
+            layer_wise_arguments: _typing.Sequence,
+            batch_size: int = 1, num_workers: int = 1,
+            shuffle: bool = True, *args, **kwargs
+    ) -> TargetDependantSampler:
+        return cls(
+            edge_index, target_nodes_indexes, layer_wise_arguments,
+            batch_size, num_workers, shuffle, **kwargs
+        )
diff --git a/autogl/module/train/sampling/sampler/target_dependant_sampler.py b/autogl/module/train/sampling/sampler/target_dependant_sampler.py
new file mode 100644
index 0000000..7adc816
--- /dev/null
+++ b/autogl/module/train/sampling/sampler/target_dependant_sampler.py
@@ -0,0 +1,262 @@
+import torch.utils.data
+import typing as _typing
+
+
+class TargetDependantSampledData:
+    class _LayerSampledEdgeData:
+        def __init__(
+                self, edge_index_for_sampled_graph: torch.Tensor,
+                edge_id_in_integral_graph: torch.Tensor,
+                edge_weight: _typing.Optional[torch.Tensor]
+        ):
+            self.__edge_index_for_sampled_graph: torch.Tensor = (
+                edge_index_for_sampled_graph
+            )
+            self.__edge_id_in_integral_graph: torch.Tensor = (
+                edge_id_in_integral_graph
+            )
+            self.__edge_weight: _typing.Optional[torch.Tensor] = edge_weight
+
+        @property
+        def edge_index_for_sampled_graph(self) -> torch.LongTensor:
+            edge_index_for_sampled_graph: _typing.Any = (
+                self.__edge_index_for_sampled_graph
+            )
+            return edge_index_for_sampled_graph
+
+        @property
+        def edge_id_in_integral_graph(self) -> torch.LongTensor:
+            edge_id_in_integral_graph: _typing.Any = (
+                self.__edge_id_in_integral_graph
+            )
+            return edge_id_in_integral_graph
+
+        @property
+        def edge_weight(self) -> _typing.Optional[torch.Tensor]:
+            return self.__edge_weight
+
+    class _TargetNodes:
+        @property
+        def indexes_in_sampled_graph(self) -> torch.LongTensor:
+            indexes_in_sampled_graph: _typing.Any = self.__indexes_in_sampled_graph
+            return indexes_in_sampled_graph
+
+        @property
+        def indexes_in_integral_graph(self) -> torch.LongTensor:
+            indexes_in_integral_graph: _typing.Any = self.__indexes_in_integral_graph
+            return indexes_in_integral_graph
+
+        def __init__(
+                self,
+                indexes_in_sampled_graph: torch.Tensor,
+                indexes_in_integral_graph: torch.Tensor,
+        ):
+            self.__indexes_in_sampled_graph: torch.Tensor = indexes_in_sampled_graph
+            self.__indexes_in_integral_graph: torch.Tensor = indexes_in_integral_graph
+
+    @property
+    def target_nodes_indexes(self) -> _TargetNodes:
+        """ indexes of target nodes in the integral graph """
+        return self.__target_nodes_indexes
+
+    @property
+    def all_sampled_nodes_indexes(self) -> torch.LongTensor:
+        """ indexes of all sampled nodes in the integral graph """
+        all_sampled_nodes_indexes: _typing.Any = self.__all_sampled_nodes_indexes
+        return all_sampled_nodes_indexes
+
+    @property
+    def sampled_edges_for_layers(self) -> _typing.Sequence[_LayerSampledEdgeData]:
+        return self.__sampled_edges_for_layers
+
+    def __init__(
+            self,
+            sampled_edges_for_layers: _typing.Sequence[
+                _typing.Tuple[torch.Tensor, torch.Tensor, _typing.Optional[torch.Tensor]]
+            ],
+            target_nodes_indexes: _typing.Tuple[torch.Tensor, torch.Tensor],
+            all_sampled_nodes_indexes: torch.Tensor
+    ):
+        """
+
+        :param sampled_edges_for_layers: Sequence of tuple (
+                                             edge_index_for_sampled_graph,
+                                             edge_id_in_integral_graph,
+                                             optional edge_weight
+                                         )
+        :param target_nodes_indexes: (indexes_in_sampled_data, indexes_in_integral_data)
+        :param all_sampled_nodes_indexes: torch.Tensor
+        """
+        self.__sampled_edges_for_layers: _typing.Sequence[
+            TargetDependantSampledData._LayerSampledEdgeData
+        ] = [
+            self._LayerSampledEdgeData(item[0], item[1], item[2])
+            for item in sampled_edges_for_layers
+        ]
+        self.__target_nodes_indexes: TargetDependantSampledData._TargetNodes = (
+            self._TargetNodes(target_nodes_indexes[0], target_nodes_indexes[1])
+        )
+        self.__all_sampled_nodes_indexes: torch.Tensor = all_sampled_nodes_indexes
+
+
+class TargetDependantSampler(torch.utils.data.DataLoader, _typing.Iterable):
+    @classmethod
+    def create_basic_sampler(
+            cls, edge_index: torch.LongTensor,
+            target_nodes_indexes: torch.LongTensor,
+            layer_wise_arguments: _typing.Sequence,
+            batch_size: int = 1, num_workers: int = 0,
+            shuffle: bool = True, *args, **kwargs
+    ) -> "TargetDependantSampler":
+        """
+        :param edge_index: edge index of integral graph
+        :param target_nodes_indexes: indexes of target nodes in the integral graph
+        :param layer_wise_arguments: layer-wise arguments for sampling
+        :param batch_size: batch size for target nodes, default to 1
+        :param num_workers: number of workers, default to 0
+        :param shuffle: flag for shuffling, default to True
+        :param args: remaining positional arguments
+        :param kwargs: remaining keyword arguments
+        :return: instance of TargetDependantSampler
+        """
+        raise NotImplementedError
+    
+    def __iter__(self):
+        return super(TargetDependantSampler, self).__iter__()
+
+
+class BasicLayerWiseTargetDependantSampler(TargetDependantSampler):
+    def __init__(
+            self, edge_index: torch.LongTensor,
+            target_nodes_indexes: torch.LongTensor,
+            layer_wise_arguments: _typing.Sequence,
+            batch_size: _typing.Optional[int] = 1, num_workers: int = 0,
+            shuffle: bool = True, **kwargs
+    ):
+        self._edge_index: torch.LongTensor = edge_index
+        self.__target_nodes_indexes: torch.LongTensor = target_nodes_indexes
+        self.__layer_wise_arguments: _typing.Sequence = layer_wise_arguments
+        if "collate_fn" in kwargs:
+            del kwargs["collate_fn"]
+        super(BasicLayerWiseTargetDependantSampler, self).__init__(
+            self.__target_nodes_indexes.unique().tolist(),
+            batch_size, shuffle, num_workers=num_workers,
+            collate_fn=self._collate_fn, **kwargs
+        )
+
+    @classmethod
+    def create_basic_sampler(
+            cls, edge_index: torch.LongTensor,
+            target_nodes_indexes: torch.LongTensor,
+            layer_wise_arguments: _typing.Sequence,
+            batch_size: int = 1, num_workers: int = 0,
+            shuffle: bool = True, *args, **kwargs
+    ) -> TargetDependantSampler:
+        """
+        :param edge_index: edge index of integral graph
+        :param target_nodes_indexes: indexes of target nodes in the integral graph
+        :param layer_wise_arguments: layer-wise arguments for sampling
+        :param batch_size: batch size for target nodes
+        :param num_workers: number of workers
+        :param shuffle: flag for shuffling, default to True
+        :param args: remaining positional arguments
+        :param kwargs: remaining keyword arguments
+        :return: instance of TargetDependantSampler
+        """
+        return BasicLayerWiseTargetDependantSampler(
+            edge_index, target_nodes_indexes, layer_wise_arguments,
+            batch_size, num_workers, shuffle, **kwargs
+        )
+
+    def _sample_edges_for_layer(
+            self, target_nodes_indexes: torch.LongTensor,
+            layer_argument: _typing.Any, *args, **kwargs
+    ) -> _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]:
+        """
+        Sample edges for one layer
+        :param target_nodes_indexes: indexes of target nodes
+        :param layer_argument: argument for current layer
+        :param args: remaining positional arguments
+        :param kwargs: remaining keyword arguments
+        :return: (edge_id_in_integral_graph, edge_weight)
+        """
+        raise NotImplementedError
+
+    def _collate_fn(
+            self, top_layer_target_nodes_indexes_list: _typing.List[int]
+    ) -> TargetDependantSampledData:
+        return self.__sample_layers(top_layer_target_nodes_indexes_list)
+
+    def __sample_layers(
+            self, top_layer_target_nodes_indexes_list: _typing.Sequence[int]
+    ) -> TargetDependantSampledData:
+        sampled_edges_for_layers: _typing.List[
+            _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]
+        ] = list()
+        top_layer_target_nodes_indexes: torch.LongTensor = (
+            torch.tensor(top_layer_target_nodes_indexes_list).unique()
+        )   # sorted
+        target_nodes_indexes: torch.LongTensor = top_layer_target_nodes_indexes
+        " Reverse self.__layer_wise_arguments from bottom-up to top-down "
+        for layer_argument in self.__layer_wise_arguments[::-1]:
+            current_layer_result: _typing.Tuple[
+                torch.LongTensor, _typing.Optional[torch.Tensor]
+            ] = self._sample_edges_for_layer(target_nodes_indexes, layer_argument)
+            __source_nodes_indexes_for_current_layer: torch.Tensor = (
+                self._edge_index[0, current_layer_result[0]]
+            )
+            target_nodes_indexes: torch.LongTensor = (
+                __source_nodes_indexes_for_current_layer.unique()
+            )
+            sampled_edges_for_layers.append(current_layer_result)
+        """ Reverse sampled_edges_for_layers from top-down to bottom-up """
+        sampled_edges_for_layers: _typing.Sequence[
+            _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]
+        ] = sampled_edges_for_layers[::-1]
+
+        sampled_nodes_in_sub_graph: torch.LongTensor = torch.cat(
+            [
+                self._edge_index[:, current_layer_result[0]].reshape([-1])
+                for current_layer_result in sampled_edges_for_layers
+            ]
+        ).unique()
+        __sampled_nodes_in_sub_graph_mapping: _typing.Dict[int, int] = dict(list(zip(
+            sampled_nodes_in_sub_graph.tolist(),
+            range(sampled_nodes_in_sub_graph.size(0))
+        )))
+
+        __sampled_edge_index_for_layers_in_sub_graph: _typing.Sequence[torch.Tensor] = [
+            torch.stack([
+                torch.tensor(
+                    [
+                        __sampled_nodes_in_sub_graph_mapping.get(node_index)
+                        for node_index in self._edge_index[0, current_layer_result[0]].tolist()
+                    ]
+                ),
+                torch.tensor(
+                    [
+                        __sampled_nodes_in_sub_graph_mapping.get(node_index)
+                        for node_index in self._edge_index[1, current_layer_result[0]].tolist()
+                    ]
+                ),
+            ])
+            for current_layer_result in sampled_edges_for_layers
+        ]
+
+        return TargetDependantSampledData(
+            [
+                (temp_tuple[0], temp_tuple[1][0], temp_tuple[1][1]) for temp_tuple
+                in zip(__sampled_edge_index_for_layers_in_sub_graph, sampled_edges_for_layers)
+            ],
+            (
+                torch.tensor(
+                    [
+                        __sampled_nodes_in_sub_graph_mapping.get(current_target_node_index_in_integral_data)
+                        for current_target_node_index_in_integral_data
+                        in top_layer_target_nodes_indexes.tolist()
+                    ]
+                ).long(),  # Remap
+                top_layer_target_nodes_indexes
+            ),
+            sampled_nodes_in_sub_graph
+        )

From 985b2c8eac0a82ca3c76f38560ca223dfc178418 Mon Sep 17 00:00:00 2001
From: null <null>
Date: Wed, 19 May 2021 20:24:00 +0800
Subject: [PATCH 068/144] Complete fixing Layer-wise Sampling (LADIES) and
 Node-wise Sampling

---
 autogl/module/hpo/base.py                     |  18 +-
 autogl/module/model/gcn.py                    |  17 +-
 autogl/module/train/evaluation.py             |  16 +
 .../node_classification_sampled_trainer.py    | 571 +++++-------------
 .../layer_dependent_importance_sampler.py     |  95 +--
 .../sampling/sampler/neighbor_sampler.py      |  45 +-
 .../sampler/target_dependant_sampler.py       |   1 +
 configs/nodeclf_ladies_gcn.yml                |  61 +-
 8 files changed, 267 insertions(+), 557 deletions(-)

diff --git a/autogl/module/hpo/base.py b/autogl/module/hpo/base.py
index 1e666dd..fdef8a2 100644
--- a/autogl/module/hpo/base.py
+++ b/autogl/module/hpo/base.py
@@ -30,7 +30,7 @@ class BaseHPOptimizer:
             raise WrongDependedParameterError("The depended parameter does not exist.")
 
         for para in config:
-            if para["type"] == "NUMERICAL_LIST" and para.get("cutPara", None):
+            if para["type"] in ("NUMERICAL_LIST", "CATEGORICAL_LIST") and para.get("cutPara", None):
                 self._depend_map[para["parameterName"]] = para
                 if type(para["cutPara"]) == str:
                     get_depended_para(para["cutPara"])
@@ -76,6 +76,18 @@ class BaseHPOptimizer:
                     new_para["maxValue"] = y
                     new_para["scalingType"] = para["scalingType"]
                     fin.append(new_para)
+            elif para["type"] == "CATEGORICAL_LIST":
+                self._list_map[para["parameterName"]] = para["length"]
+                category = para["feasiblePoints"]
+                self._category_map[para["parameterName"]] = category
+
+                cur_points = ",".join(map(lambda _x: str(_x), range(len(category))))
+                for i in range(para["length"]):
+                    new_para = dict()
+                    new_para["parameterName"] = para["parameterName"] + "_" + str(i)
+                    new_para["type"] = "DISCRETE"
+                    new_para["feasiblePoints"] = cur_points
+                    fin.append(new_para)
             elif para["type"] == "FIXED":
                 self._fix_map[para["parameterName"]] = para["value"]
             else:
@@ -92,6 +104,8 @@ class BaseHPOptimizer:
             for i in range(self._list_map[pname]):
                 val.append(config[pname + "_" + str(i)])
                 del config[pname + "_" + str(i)]
+            if pname in self._category_map:
+                val = [self._category_map[pname][i] for i in val]
             fin[pname] = val
         # deal other para
         for pname in config:
@@ -123,10 +137,10 @@ class BaseHPOptimizer:
             "maxValue": 0.9,
             "scalingType": "LINEAR"
         }]"""
-        config = self._decompose_list_fixed_para(config)
         self._category_map = {}
         self._discrete_map = {}
         self._numerical_map = {}
+        config = self._decompose_list_fixed_para(config)
 
         current_config = []
         for para in config:
diff --git a/autogl/module/model/gcn.py b/autogl/module/model/gcn.py
index dbe0f8a..7d2d4e1 100644
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -17,7 +17,8 @@ class GCN(torch.nn.Module):
         hidden_features: _typing.Sequence[int],
         dropout: float,
         activation_name: str,
-        add_self_loops: bool = True
+        add_self_loops: bool = True,
+        normalize: bool = True
     ):
         super().__init__()
         self.__convolution_layers: torch.nn.ModuleList = torch.nn.ModuleList()
@@ -25,13 +26,17 @@ class GCN(torch.nn.Module):
         if num_layers == 1:
             self.__convolution_layers.append(
                 torch_geometric.nn.GCNConv(
-                    num_features, num_classes, add_self_loops=add_self_loops
+                    num_features, num_classes,
+                    add_self_loops=add_self_loops,
+                    normalize=normalize
                 )
             )
         else:
             self.__convolution_layers.append(
                 torch_geometric.nn.GCNConv(
-                    num_features, hidden_features[0], add_self_loops=add_self_loops
+                    num_features, hidden_features[0],
+                    add_self_loops=add_self_loops,
+                    normalize=normalize
                 )
             )
             for i in range(len(hidden_features)):
@@ -221,8 +226,6 @@ class AutoGCN(ClassificationModel):
             self.hyper_parameter.get("hidden"),
             self.hyper_parameter.get("dropout"),
             self.hyper_parameter.get("act"),
-            add_self_loops=(
-                    "add_self_loops" in self.hyper_parameter
-                    and self.hyper_parameter.get("add_self_loops")
-            )
+            add_self_loops=bool(self.hyper_parameter.get("add_self_loops", True)),
+            normalize=bool(self.hyper_parameter.get("normalize", True))
         ).to(self.device)
diff --git a/autogl/module/train/evaluation.py b/autogl/module/train/evaluation.py
index b0d25cb..1ebb324 100644
--- a/autogl/module/train/evaluation.py
+++ b/autogl/module/train/evaluation.py
@@ -1,6 +1,7 @@
 import numpy as np
 import typing as _typing
 from sklearn.metrics import (
+    f1_score,
     log_loss,
     accuracy_score,
     roc_auc_score,
@@ -221,3 +222,18 @@ class Mrr(Evaluation):
         """
         pos_predict = predict[:, 1]
         return label_ranking_average_precision_score(label, pos_predict)
+
+
+@register_evaluate("MicroF1")
+class MicroF1(Evaluation):
+    @staticmethod
+    def get_eval_name() -> str:
+        return "MicroF1"
+
+    @staticmethod
+    def is_higher_better() -> bool:
+        return True
+
+    @staticmethod
+    def evaluate(predict, label) -> float:
+        return f1_score(label, np.argmax(predict, axis=1), average='micro')
diff --git a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
index 6894d42..181c5b3 100644
--- a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
+++ b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
@@ -9,7 +9,7 @@ import tqdm
 import autogl.data
 from .. import register_trainer
 from ..base import BaseNodeClassificationTrainer, EarlyStopping, Evaluation
-from ..evaluation import get_feval, Logloss, EvaluatorUtility
+from ..evaluation import get_feval, EvaluatorUtility, Logloss, MicroF1
 from ..sampling.sampler.target_dependant_sampler import TargetDependantSampledData
 from ..sampling.sampler.neighbor_sampler import NeighborSampler
 from ..sampling.sampler.graphsaint_sampler import *
@@ -21,351 +21,6 @@ from ...model import BaseModel
 LOGGER: logging.Logger = logging.getLogger("Node classification sampling trainer")
 
 
-# @register_trainer("NodeClassificationNeighborSampling")
-# class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
-#     """
-#     The node classification trainer
-#     for automatically training the node classification tasks
-#     with neighbour sampling
-#     """
-#
-#     def __init__(
-#         self,
-#         model: _typing.Union[BaseModel, str],
-#         num_features: int,
-#         num_classes: int,
-#         optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None] = None,
-#         lr: float = 1e-4,
-#         max_epoch: int = 100,
-#         early_stopping_round: int = 100,
-#         weight_decay: float = 1e-4,
-#         device: _typing.Optional[torch.device] = None,
-#         init: bool = True,
-#         feval: _typing.Union[
-#             _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
-#         ] = (Logloss,),
-#         loss: str = "nll_loss",
-#         lr_scheduler_type: _typing.Optional[str] = None,
-#         **kwargs,
-#     ) -> None:
-#         if isinstance(optimizer, type) and issubclass(optimizer, torch.optim.Optimizer):
-#             self._optimizer_class: _typing.Type[torch.optim.Optimizer] = optimizer
-#         elif type(optimizer) == str:
-#             if optimizer.lower() == "adam":
-#                 self._optimizer_class: _typing.Type[
-#                     torch.optim.Optimizer
-#                 ] = torch.optim.Adam
-#             elif optimizer.lower() == "adam" + "w":
-#                 self._optimizer_class: _typing.Type[
-#                     torch.optim.Optimizer
-#                 ] = torch.optim.AdamW
-#             elif optimizer.lower() == "sgd":
-#                 self._optimizer_class: _typing.Type[
-#                     torch.optim.Optimizer
-#                 ] = torch.optim.SGD
-#             else:
-#                 self._optimizer_class: _typing.Type[
-#                     torch.optim.Optimizer
-#                 ] = torch.optim.Adam
-#         else:
-#             self._optimizer_class: _typing.Type[
-#                 torch.optim.Optimizer
-#             ] = torch.optim.Adam
-#
-#         self._learning_rate: float = lr if lr > 0 else 1e-4
-#         self._lr_scheduler_type: _typing.Optional[str] = lr_scheduler_type
-#         self._max_epoch: int = max_epoch if max_epoch > 0 else 1e2
-#
-#         self.__sampling_sizes: _typing.Sequence[int] = kwargs.get("sampling_sizes")
-#
-#         self._weight_decay: float = weight_decay if weight_decay > 0 else 1e-4
-#         early_stopping_round: int = (
-#             early_stopping_round if early_stopping_round > 0 else 1e2
-#         )
-#         self._early_stopping = EarlyStopping(
-#             patience=early_stopping_round, verbose=False
-#         )
-#         super(NodeClassificationNeighborSamplingTrainer, self).__init__(
-#             model, num_features, num_classes, device, init, feval, loss
-#         )
-#
-#         self._valid_result: torch.Tensor = torch.zeros(0)
-#         self._valid_result_prob: torch.Tensor = torch.zeros(0)
-#         self._valid_score: _typing.Sequence[float] = []
-#
-#         self._hyper_parameter_space: _typing.Sequence[
-#             _typing.Dict[str, _typing.Any]
-#         ] = []
-#
-#         self.__initialized: bool = False
-#         if init:
-#             self.initialize()
-#
-#     def initialize(self) -> "NodeClassificationNeighborSamplingTrainer":
-#         if self.__initialized:
-#             return self
-#         self.model.initialize()
-#         self.__initialized = True
-#         return self
-#
-#     def get_model(self) -> BaseModel:
-#         return self.model
-#
-#     def __train_only(self, data) -> "NodeClassificationNeighborSamplingTrainer":
-#         """
-#         The function of training on the given dataset and mask.
-#         :param data: data of a specific graph
-#         :return: self
-#         """
-#         data = data.to(self.device)
-#         optimizer: torch.optim.Optimizer = self._optimizer_class(
-#             self.model.model.parameters(),
-#             lr=self._learning_rate,
-#             weight_decay=self._weight_decay,
-#         )
-#         if type(self._lr_scheduler_type) == str:
-#             if self._lr_scheduler_type.lower() == "step" + "lr":
-#                 lr_scheduler: torch.optim.lr_scheduler.StepLR = (
-#                     torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1)
-#                 )
-#             elif self._lr_scheduler_type.lower() == "multi" + "step" + "lr":
-#                 lr_scheduler: torch.optim.lr_scheduler.MultiStepLR = (
-#                     torch.optim.lr_scheduler.MultiStepLR(
-#                         optimizer, milestones=[30, 80], gamma=0.1
-#                     )
-#                 )
-#             elif self._lr_scheduler_type.lower() == "exponential" + "lr":
-#                 lr_scheduler: torch.optim.lr_scheduler.ExponentialLR = (
-#                     torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1)
-#                 )
-#             elif self._lr_scheduler_type.lower() == "ReduceLROnPlateau".lower():
-#                 lr_scheduler: torch.optim.lr_scheduler.ReduceLROnPlateau = (
-#                     torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min")
-#                 )
-#             else:
-#                 lr_scheduler: torch.optim.lr_scheduler.LambdaLR = (
-#                     torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
-#                 )
-#         else:
-#             lr_scheduler: torch.optim.lr_scheduler.LambdaLR = (
-#                 torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
-#             )
-#
-#         train_sampler: NeighborSampler = NeighborSampler(
-#             data, self.__sampling_sizes, batch_size=20
-#         )
-#
-#         for current_epoch in range(self._max_epoch):
-#             self.model.model.train()
-#             """ epoch start """
-#             for target_node_indexes, edge_indexes in train_sampler:
-#                 optimizer.zero_grad()
-#                 data.edge_indexes = edge_indexes
-#                 prediction = self.model.model(data)
-#                 if not hasattr(torch.nn.functional, self.loss):
-#                     raise TypeError(
-#                         "PyTorch does not support loss type {}".format(self.loss)
-#                     )
-#                 loss_function = getattr(torch.nn.functional, self.loss)
-#                 loss: torch.Tensor = loss_function(
-#                     prediction[target_node_indexes], data.y[target_node_indexes]
-#                 )
-#                 loss.backward()
-#                 optimizer.step()
-#
-#             if lr_scheduler is not None:
-#                 lr_scheduler.step()
-#
-#             """ Validate performance """
-#             if hasattr(data, "val_mask") and getattr(data, "val_mask") is not None:
-#                 validation_results: _typing.Sequence[float] = self.evaluate(
-#                     (data,), "val", [self.feval[0]]
-#                 )
-#
-#                 if self.feval[0].is_higher_better():
-#                     validation_loss: float = -validation_results[0]
-#                 else:
-#                     validation_loss: float = validation_results[0]
-#                 self._early_stopping(validation_loss, self.model.model)
-#                 if self._early_stopping.early_stop:
-#                     LOGGER.debug("Early stopping at %d", current_epoch)
-#                     break
-#         if hasattr(data, "val_mask") and data.val_mask is not None:
-#             self._early_stopping.load_checkpoint(self.model.model)
-#         return self
-#
-#     def __predict_only(self, data):
-#         """
-#         The function of predicting on the given data.
-#         :param data: data of a specific graph
-#         :return: the result of prediction on the given dataset
-#         """
-#         data = data.to(self.device)
-#         self.model.model.eval()
-#         with torch.no_grad():
-#             prediction = self.model.model(data)
-#         return prediction
-#
-#     def train(self, dataset, keep_valid_result: bool = True):
-#         """
-#         The function of training on the given dataset and keeping valid result.
-#         :param dataset:
-#         :param keep_valid_result: Whether to save the validation result after training
-#         """
-#         data = dataset[0]
-#         self.__train_only(data)
-#         if keep_valid_result:
-#             prediction: torch.Tensor = self.__predict_only(data)
-#             self._valid_result: torch.Tensor = prediction[data.val_mask].max(1)[1]
-#             self._valid_result_prob: torch.Tensor = prediction[data.val_mask]
-#             self._valid_score = self.evaluate(dataset, "val")
-#
-#     def predict_proba(
-#         self, dataset, mask: _typing.Optional[str] = None, in_log_format: bool = False
-#     ) -> torch.Tensor:
-#         """
-#         The function of predicting the probability on the given dataset.
-#         :param dataset: The node classification dataset used to be predicted.
-#         :param mask:
-#         :param in_log_format:
-#         :return:
-#         """
-#         data = dataset[0].to(self.device)
-#         if mask is not None and type(mask) == str:
-#             if mask.lower() == "train":
-#                 _mask = data.train_mask
-#             elif mask.lower() == "test":
-#                 _mask = data.test_mask
-#             elif mask.lower() == "val":
-#                 _mask = data.val_mask
-#             else:
-#                 _mask = data.test_mask
-#         else:
-#             _mask = data.test_mask
-#         result = self.__predict_only(data)[_mask]
-#         return result if in_log_format else torch.exp(result)
-#
-#     def predict(self, dataset, mask: _typing.Optional[str] = None) -> torch.Tensor:
-#         return self.predict_proba(dataset, mask, in_log_format=True).max(1)[1]
-#
-#     def get_valid_predict(self) -> torch.Tensor:
-#         return self._valid_result
-#
-#     def get_valid_predict_proba(self) -> torch.Tensor:
-#         return self._valid_result_prob
-#
-#     def get_valid_score(self, return_major: bool = True):
-#         if return_major:
-#             return (self._valid_score[0], self.feval[0].is_higher_better())
-#         else:
-#             return (self._valid_score, [f.is_higher_better() for f in self.feval])
-#
-#     def get_name_with_hp(self) -> str:
-#         name = "-".join(
-#             [
-#                 str(self._optimizer_class),
-#                 str(self._learning_rate),
-#                 str(self._max_epoch),
-#                 str(self._early_stopping.patience),
-#                 str(self.model),
-#                 str(self.device),
-#             ]
-#         )
-#         name = (
-#             name
-#             + "|"
-#             + "-".join(
-#                 [
-#                     str(x[0]) + "-" + str(x[1])
-#                     for x in self.model.get_hyper_parameter().items()
-#                 ]
-#             )
-#         )
-#         return name
-#
-#     def evaluate(
-#         self,
-#         dataset,
-#         mask: _typing.Optional[str] = None,
-#         feval: _typing.Union[
-#             None, _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
-#         ] = None,
-#     ) -> _typing.Sequence[float]:
-#         data = dataset[0]
-#         data = data.to(self.device)
-#         if feval is None:
-#             _feval: _typing.Sequence[_typing.Type[Evaluation]] = self.feval
-#         else:
-#             _feval: _typing.Sequence[_typing.Type[Evaluation]] = get_feval(list(feval))
-#         if mask.lower() == "train":
-#             _mask = data.train_mask
-#         elif mask.lower() == "test":
-#             _mask = data.test_mask
-#         elif mask.lower() == "val":
-#             _mask = data.val_mask
-#         else:
-#             _mask = data.test_mask
-#         prediction_probability: torch.Tensor = self.predict_proba(dataset, mask)
-#         y_ground_truth = data.y[_mask]
-#
-#         results = []
-#         for f in _feval:
-#             try:
-#                 results.append(f.evaluate(prediction_probability, y_ground_truth))
-#             except:
-#                 results.append(
-#                     f.evaluate(
-#                         prediction_probability.cpu().numpy(),
-#                         y_ground_truth.cpu().numpy(),
-#                     )
-#                 )
-#         return results
-#
-#     def to(self, device: torch.device):
-#         self.device = device
-#         if self.model is not None:
-#             self.model.to(self.device)
-#
-#     def duplicate_from_hyper_parameter(
-#         self,
-#         hp: _typing.Dict[str, _typing.Any],
-#         model: _typing.Union[BaseModel, str, None] = None,
-#     ) -> "NodeClassificationNeighborSamplingTrainer":
-#
-#         if model is None or not isinstance(model, BaseModel):
-#             model = self.model
-#         model = model.from_hyper_parameter(
-#             dict(
-#                 [
-#                     x
-#                     for x in hp.items()
-#                     if x[0] in [y["parameterName"] for y in model.hyper_parameter_space]
-#                 ]
-#             )
-#         )
-#
-#         return NodeClassificationNeighborSamplingTrainer(
-#             model,
-#             self.num_features,
-#             self.num_classes,
-#             self._optimizer_class,
-#             device=self.device,
-#             init=True,
-#             feval=self.feval,
-#             loss=self.loss,
-#             lr_scheduler_type=self._lr_scheduler_type,
-#             **hp,
-#         )
-#
-#     @property
-#     def hyper_parameter_space(self):
-#         return self._hyper_parameter_space
-#
-#     @hyper_parameter_space.setter
-#     def hyper_parameter_space(self, hp_space):
-#         self._hyper_parameter_space = hp_space
-
-
 @register_trainer("NodeClassificationGraphSAINTTrainer")
 class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
     def __init__(
@@ -772,7 +427,7 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
             init: bool = True,
             feval: _typing.Union[
                 _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
-            ] = (Logloss,),
+            ] = (MicroF1,),
             loss: str = "nll_loss",
             lr_scheduler_type: _typing.Optional[str] = None,
             **kwargs,
@@ -815,17 +470,31 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
         self._valid_result_prob: torch.Tensor = torch.zeros(0)
         self._valid_score: _typing.Sequence[float] = ()
 
+        self.__training_batch_size: int = kwargs.get("training_batch_size", 1024)
+        if not self.__training_batch_size > 0:
+            self.__training_batch_size: int = 1024
+        self.__predicting_batch_size: int = kwargs.get("predicting_batch_size", 1024)
+        if not self.__predicting_batch_size > 0:
+            self.__predicting_batch_size: int = 1024
+
+        cpu_count: int = os.cpu_count() if os.cpu_count() is not None else 0
+        self.__training_sampler_num_workers: int = kwargs.get(
+            "training_sampler_num_workers", cpu_count
+        )
+        if self.__training_sampler_num_workers > cpu_count:
+            self.__training_sampler_num_workers = cpu_count
+        self.__predicting_sampler_num_workers: int = kwargs.get(
+            "predicting_sampler_num_workers", cpu_count
+        )
+        if self.__predicting_sampler_num_workers > cpu_count:
+            self.__predicting_sampler_num_workers = cpu_count
+
         super(NodeClassificationLayerDependentImportanceSamplingTrainer, self).__init__(
             model, num_features, num_classes, device, init, feval, loss
         )
 
         """ Set hyper parameters """
-        " Configure num_layers "
-        self.__num_layers: int = kwargs.get("num_layers")
-        " Configure sampled_node_size_budget "
-        self.__sampled_node_size_budget: int = (
-            kwargs.get("sampled_node_size_budget")
-        )
+        self.__sampled_node_sizes: _typing.Sequence[int] = kwargs.get("sampled_node_sizes")
 
         self.__is_initialized: bool = False
         if init:
@@ -846,10 +515,10 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
     def get_model(self):
         return self.model
 
-    def __train_only(self, data):
+    def __train_only(self, integral_data):
         """
         The function of training on the given dataset and mask.
-        :param data: data of a specific graph
+        :param integral_data: data of a specific graph
         :return: self
         """
         optimizer: torch.optim.Optimizer = self._optimizer_class(
@@ -886,56 +555,54 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
                 torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
             )
 
-        sampled_node_size_budget: int = self.__sampled_node_size_budget
-        num_layers: int = self.__num_layers
-
         __layer_dependent_importance_sampler: LayerDependentImportanceSampler = (
-            LayerDependentImportanceSampler(data.edge_index)
-        )
-        __top_layer_target_nodes_indexes: torch.LongTensor = (
-            torch.where(data.train_mask)[0].unique()
+            LayerDependentImportanceSampler(
+                integral_data.edge_index, torch.where(integral_data.train_mask)[0].unique(),
+                self.__sampled_node_sizes, batch_size=self.__training_batch_size,
+                num_workers=self.__training_sampler_num_workers
+            )
         )
         for current_epoch in range(self._max_epoch):
             self.model.model.train()
             optimizer.zero_grad()
             """ epoch start """
             " sample graphs "
-            __layers: _typing.Sequence[
-                _typing.Tuple[torch.Tensor, torch.Tensor]
-            ] = __layer_dependent_importance_sampler.sample(
-                __top_layer_target_nodes_indexes,
-                [sampled_node_size_budget for _ in range(num_layers)]
-            )
-            data.edge_indexes = [layer[0] for layer in __layers]
-            data.edge_weights = [layer[1] for layer in __layers]
-            data = data.to(self.device)
-
-            result: torch.Tensor = self.model.model.forward(data)
-            if hasattr(torch.nn.functional, self.loss):
-                loss_function = getattr(
-                    torch.nn.functional, self.loss
+            for sampled_data in __layer_dependent_importance_sampler:
+                optimizer.zero_grad()
+                sampled_data: TargetDependantSampledData = sampled_data
+                # 由于现在的Model设计是接受Data的，所以只能组装一个采样的Data作为参数
+                sampled_graph: autogl.data.Data = autogl.data.Data(
+                    x=integral_data.x[sampled_data.all_sampled_nodes_indexes],
+                    y=integral_data.y[sampled_data.all_sampled_nodes_indexes]
                 )
+                sampled_graph.to(self.device)
+                sampled_graph.edge_indexes: _typing.Sequence[torch.LongTensor] = [
+                    current_layer.edge_index_for_sampled_graph.to(self.device)
+                    for current_layer in sampled_data.sampled_edges_for_layers
+                ]
+                prediction: torch.Tensor = self.model.model(sampled_graph)
+                if not hasattr(torch.nn.functional, self.loss):
+                    raise TypeError(
+                        f"PyTorch does not support loss type {self.loss}"
+                    )
+                loss_function = getattr(torch.nn.functional, self.loss)
                 loss_value: torch.Tensor = loss_function(
-                    result[data.train_mask],
-                    data.y[data.train_mask]
-                )
-            else:
-                raise TypeError(
-                    f"PyTorch does not support loss type {self.loss}"
+                    prediction[sampled_data.target_nodes_indexes.indexes_in_sampled_graph],
+                    sampled_graph.y[sampled_data.target_nodes_indexes.indexes_in_sampled_graph]
                 )
+                loss_value.backward()
+                optimizer.step()
 
-            loss_value.backward()
-            optimizer.step()
             if self._lr_scheduler_type:
                 lr_scheduler.step()
 
             if (
-                    hasattr(data, "val_mask") and
-                    getattr(data, "val_mask") is not None and
-                    type(getattr(data, "val_mask")) == torch.Tensor
+                    hasattr(integral_data, "val_mask") and
+                    getattr(integral_data, "val_mask") is not None and
+                    type(getattr(integral_data, "val_mask")) == torch.Tensor
             ):
                 validation_results: _typing.Sequence[float] = self.evaluate(
-                    (data,), "val", [self.feval[0]]
+                    (integral_data,), "val", [self.feval[0]]
                 )
                 if self.feval[0].is_higher_better():
                     validation_loss: float = -validation_results[0]
@@ -946,23 +613,68 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
                     LOGGER.debug("Early stopping at %d", current_epoch)
                     break
         if (
-                hasattr(data, "val_mask") and
-                getattr(data, "val_mask") is not None and
-                type(getattr(data, "val_mask")) == torch.Tensor
+                hasattr(integral_data, "val_mask") and
+                getattr(integral_data, "val_mask") is not None and
+                type(getattr(integral_data, "val_mask")) == torch.Tensor
         ):
             self._early_stopping.load_checkpoint(self.model.model)
 
-    def __predict_only(self, data) -> torch.Tensor:
+    def __predict_only(
+            self, integral_data,
+            mask_or_target_nodes_indexes: _typing.Union[
+                torch.BoolTensor, torch.LongTensor
+            ]
+    ) -> torch.Tensor:
         """
         The function of predicting on the given data.
-        :param data: data of a specific graph
+        :param integral_data: data of a specific graph
+        :param mask_or_target_nodes_indexes: ...
         :return: the result of prediction on the given dataset
         """
-        data = data.to(self.device)
+        if mask_or_target_nodes_indexes.dtype == torch.bool:
+            target_nodes_indexes: _typing.Any = (
+                torch.where(mask_or_target_nodes_indexes)[0]
+            )
+        else:
+            target_nodes_indexes: _typing.Any = mask_or_target_nodes_indexes.long()
+
+        neighbor_sampler: NeighborSampler = NeighborSampler(
+            torch_geometric.utils.add_remaining_self_loops(integral_data.edge_index)[0],
+            target_nodes_indexes, [-1 for _ in self.__sampled_node_sizes],
+            batch_size=self.__predicting_batch_size,
+            num_workers=self.__predicting_sampler_num_workers,
+            shuffle=False
+        )
+
+        prediction_batch_cumulative_builder = (
+            EvaluatorUtility.PredictionBatchCumulativeBuilder()
+        )
         self.model.model.eval()
-        with torch.no_grad():
-            predicted_x: torch.Tensor = self.model.model(data)
-        return predicted_x
+        for sampled_data in neighbor_sampler:
+            sampled_data: TargetDependantSampledData = sampled_data
+            sampled_graph: autogl.data.Data = autogl.data.Data(
+                integral_data.x[sampled_data.all_sampled_nodes_indexes],
+                integral_data.y[sampled_data.all_sampled_nodes_indexes]
+            )
+            sampled_graph.to(self.device)
+            sampled_graph.edge_indexes: _typing.Sequence[torch.LongTensor] = [
+                current_layer.edge_index_for_sampled_graph.to(self.device)
+                for current_layer in sampled_data.sampled_edges_for_layers
+            ]
+            sampled_graph.edge_weights: _typing.Sequence[torch.FloatTensor] = [
+                current_layer.edge_weight.to(self.device)
+                for current_layer in sampled_data.sampled_edges_for_layers
+            ]
+
+            with torch.no_grad():
+                prediction_batch_cumulative_builder.add_batch(
+                    sampled_data.target_nodes_indexes.indexes_in_integral_graph.cpu().numpy(),
+                    self.model.model(sampled_graph)[
+                        sampled_data.target_nodes_indexes.indexes_in_sampled_graph
+                    ].cpu().numpy()
+                )
+
+        return torch.from_numpy(prediction_batch_cumulative_builder.compose()[1])
 
     def predict_proba(
             self, dataset, mask: _typing.Optional[str]=None,
@@ -978,16 +690,16 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
         data = dataset[0].to(self.device)
         if mask is not None and type(mask) == str:
             if mask.lower() == "train":
-                _mask: torch.Tensor = data.train_mask
+                _mask: torch.BoolTensor = data.train_mask
             elif mask.lower() == "test":
-                _mask: torch.Tensor = data.test_mask
+                _mask: torch.BoolTensor = data.test_mask
             elif mask.lower() == "val":
-                _mask: torch.Tensor = data.val_mask
+                _mask: torch.BoolTensor = data.val_mask
             else:
-                _mask: torch.Tensor = data.test_mask
+                _mask: torch.BoolTensor = data.test_mask
         else:
-            _mask: torch.Tensor = data.test_mask
-        result = self.__predict_only(data)[_mask]
+            _mask: torch.BoolTensor = data.test_mask
+        result = self.__predict_only(data, _mask)
         return result if in_log_format else torch.exp(result)
 
     def predict(self, dataset, mask: _typing.Optional[str] = None) -> torch.Tensor:
@@ -1021,18 +733,12 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
         prediction_probability: torch.Tensor = self.predict_proba(dataset, mask)
         y_ground_truth: torch.Tensor = data.y[_mask]
 
-        eval_results = []
-        for f in _feval:
-            try:
-                eval_results.append(f.evaluate(prediction_probability, y_ground_truth))
-            except:
-                eval_results.append(
-                    f.evaluate(
-                        prediction_probability.cpu().numpy(),
-                        y_ground_truth.cpu().numpy(),
-                    )
-                )
-        return eval_results
+        return [
+            f.evaluate(
+                prediction_probability.cpu().numpy(),
+                y_ground_truth.cpu().numpy(),
+            ) for f in _feval
+        ]
 
     def train(self, dataset, keep_valid_result: bool = True):
         """
@@ -1043,9 +749,9 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
         data = dataset[0]
         self.__train_only(data)
         if keep_valid_result:
-            prediction: torch.Tensor = self.__predict_only(data)
-            self._valid_result: torch.Tensor = prediction[data.val_mask].max(1)[1]
-            self._valid_result_prob: torch.Tensor = prediction[data.val_mask]
+            prediction: torch.Tensor = self.__predict_only(data, data.val_mask)
+            self._valid_result: torch.Tensor = prediction.max(1)[1]
+            self._valid_result_prob: torch.Tensor = prediction
             self._valid_score: _typing.Sequence[float] = self.evaluate(dataset, "val")
 
     def get_valid_predict(self) -> torch.Tensor:
@@ -1189,6 +895,25 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         self._valid_result_prob: torch.Tensor = torch.zeros(0)
         self._valid_score: _typing.Sequence[float] = ()
 
+        self.__training_batch_size: int = kwargs.get("training_batch_size", 1024)
+        if not self.__training_batch_size > 0:
+            self.__training_batch_size: int = 1024
+        self.__predicting_batch_size: int = kwargs.get("predicting_batch_size", 1024)
+        if not self.__predicting_batch_size > 0:
+            self.__predicting_batch_size: int = 1024
+
+        cpu_count: int = os.cpu_count() if os.cpu_count() is not None else 0
+        self.__training_sampler_num_workers: int = kwargs.get(
+            "training_sampler_num_workers", cpu_count
+        )
+        if self.__training_sampler_num_workers > cpu_count:
+            self.__training_sampler_num_workers = cpu_count
+        self.__predicting_sampler_num_workers: int = kwargs.get(
+            "predicting_sampler_num_workers", cpu_count
+        )
+        if self.__predicting_sampler_num_workers > cpu_count:
+            self.__predicting_sampler_num_workers = cpu_count
+
         super(NodeClassificationNeighborSamplingTrainer, self).__init__(
             model, num_features, num_classes, device, init, feval, loss
         )
@@ -1257,15 +982,14 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
 
         neighbor_sampler: NeighborSampler = NeighborSampler(
             integral_data.edge_index, torch.where(integral_data.train_mask)[0].unique(),
-            self.__sampling_sizes, batch_size=1024,
-            num_workers=os.cpu_count() if os.cpu_count() is not None else 0
+            self.__sampling_sizes, batch_size=self.__training_batch_size,
+            num_workers=self.__training_sampler_num_workers
         )
-        for current_epoch in tqdm.tqdm(range(self._max_epoch), desc="Epoch"):
+        for current_epoch in range(self._max_epoch):
             self.model.model.train()
             optimizer.zero_grad()
             """ epoch start """
             " sample graphs "
-            # todo: Done this
             for sampled_data in neighbor_sampler:
                 optimizer.zero_grad()
                 sampled_data: TargetDependantSampledData = sampled_data
@@ -1339,7 +1063,8 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
 
         neighbor_sampler: NeighborSampler = NeighborSampler(
             integral_data.edge_index, target_nodes_indexes, [-1 for _ in self.__sampling_sizes],
-            batch_size=1024, num_workers=0, shuffle=False
+            batch_size=self.__predicting_batch_size,
+            num_workers=self.__predicting_sampler_num_workers, shuffle=False
         )
 
         prediction_batch_cumulative_builder = (
diff --git a/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py b/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py
index abd3433..1c50cbe 100644
--- a/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py
+++ b/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py
@@ -24,7 +24,7 @@ class LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTar
                     __in_degree[__all_edge_index_with_self_loops[1]]
                 ]
             )
-            temp_tensor: torch.Tensor = 1.0 / temp_tensor
+            temp_tensor: torch.Tensor = torch.pow(temp_tensor, -0.5)
             temp_tensor[torch.isinf(temp_tensor)] = 0.0
             return temp_tensor[0] * temp_tensor[1]
 
@@ -134,9 +134,8 @@ class LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTar
         ).unique()
         __all_candidate_source_nodes_indexes, all_candidate_source_nodes_probabilities = \
             self._Utility.get_candidate_source_nodes_probabilities(
-                all_candidate_edge_indexes,
-                self._edge_index,
-                self.__all_edge_weights
+                all_candidate_edge_indexes, self._edge_index,
+                self.__all_edge_weights * self.__all_edge_weights
             )
         assert __all_candidate_source_nodes_indexes.size() == all_candidate_source_nodes_probabilities.size()
 
@@ -162,7 +161,7 @@ class LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTar
 
         non_normalized_selected_edges_weight: torch.Tensor = (
                 self.__all_edge_weights[__selected_edges_indexes] / (
-                    selected_source_node_indexes.numel() * torch.tensor(
+                    torch.tensor(
                         [
                             all_candidate_source_nodes_probabilities[
                                 __all_candidate_source_nodes_indexes == current_source_node_index
@@ -195,89 +194,3 @@ class LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTar
             non_normalized_selected_edges_weight
         )
         return __selected_edges_indexes, normalized_selected_edges_weight
-
-    # todo: Migrated to the overrode _sample_edges_for_layer method, remove in the future version
-    # def __sample_layer(
-    #         self, target_nodes_indexes: torch.LongTensor,
-    #         sampled_node_size_budget: int
-    # ) -> _typing.Tuple[torch.Tensor, torch.Tensor, torch.LongTensor, torch.LongTensor]:
-    #     """
-    #     :param target_nodes_indexes:
-    #             node indexes for target nodes in the top layer or nodes sampled in upper layer
-    #     :param sampled_node_size_budget:
-    #     :return: (Tensor, Tensor, LongTensor, LongTensor)
-    #     """
-    #     all_candidate_edge_indexes: torch.LongTensor = torch.cat(
-    #         [
-    #             torch.where(self._edge_index[1] == current_target_node_index)[0]
-    #             for current_target_node_index in target_nodes_indexes.unique().tolist()
-    #         ]
-    #     ).unique()
-    #     __all_candidate_source_nodes_indexes, all_candidate_source_nodes_probabilities = \
-    #         self._Utility.get_candidate_source_nodes_probabilities(
-    #             all_candidate_edge_indexes,
-    #             self._edge_index,
-    #             self.__all_edge_weights
-    #         )
-    #     assert __all_candidate_source_nodes_indexes.size() == all_candidate_source_nodes_probabilities.size()
-    #
-    #     """ Sampling """
-    #     if sampled_node_size_budget < __all_candidate_source_nodes_indexes.numel():
-    #         selected_source_node_indexes: torch.LongTensor = __all_candidate_source_nodes_indexes[
-    #             torch.from_numpy(
-    #                 np.unique(np.random.choice(
-    #                     np.arange(__all_candidate_source_nodes_indexes.numel()), sampled_node_size_budget,
-    #                     p=all_candidate_source_nodes_probabilities.numpy()
-    #                 ))
-    #             ).unique()
-    #         ].unique()
-    #     else:
-    #         selected_source_node_indexes: torch.LongTensor = __all_candidate_source_nodes_indexes
-    #
-    #     __selected_edges_indexes: torch.LongTensor = (
-    #         self._Utility.filter_selected_edges_by_source_nodes_and_target_nodes(
-    #             self._edge_index,
-    #             selected_source_node_indexes, target_nodes_indexes
-    #         )
-    #     ).unique()
-    #
-    #     non_normalized_selected_edges_weight: torch.Tensor = (
-    #             self.__all_edge_weights[__selected_edges_indexes] / (
-    #                 selected_source_node_indexes.numel() * torch.tensor(
-    #                     [
-    #                         all_candidate_source_nodes_probabilities[
-    #                             __all_candidate_source_nodes_indexes == current_source_node_index
-    #                         ].item()
-    #                         for current_source_node_index
-    #                         in self._edge_index[0, __selected_edges_indexes].tolist()
-    #                     ]
-    #                 )
-    #             )
-    #     )
-    #
-    #     def __normalize_edges_weight_by_target_nodes(
-    #             __edge_index: torch.Tensor, __edge_weight: torch.Tensor
-    #     ) -> torch.Tensor:
-    #         if __edge_index.size(1) != __edge_weight.numel():
-    #             raise ValueError
-    #         for current_target_node_index in __edge_index[1].unique().tolist():
-    #             __current_mask_for_edges: torch.BoolTensor = (
-    #                     __edge_index[1] == current_target_node_index
-    #             )
-    #             __edge_weight[__current_mask_for_edges] = (
-    #                 __edge_weight[__current_mask_for_edges] / (
-    #                     torch.sum(__edge_weight[__current_mask_for_edges])
-    #                 )
-    #             )
-    #         return __edge_weight
-    #
-    #     normalized_selected_edges_weight: torch.Tensor = __normalize_edges_weight_by_target_nodes(
-    #         self._edge_index[:, __selected_edges_indexes],
-    #         non_normalized_selected_edges_weight
-    #     )
-    #     return (
-    #         self._edge_index[:, __selected_edges_indexes],
-    #         normalized_selected_edges_weight,
-    #         selected_source_node_indexes,
-    #         __selected_edges_indexes
-    #     )
diff --git a/autogl/module/train/sampling/sampler/neighbor_sampler.py b/autogl/module/train/sampling/sampler/neighbor_sampler.py
index b1d7c39..314e264 100644
--- a/autogl/module/train/sampling/sampler/neighbor_sampler.py
+++ b/autogl/module/train/sampling/sampler/neighbor_sampler.py
@@ -4,19 +4,23 @@ import torch_geometric
 from .target_dependant_sampler import TargetDependantSampler, TargetDependantSampledData
 
 
-def _neighbor_sampler_transform(
-        batch_size: int, n_id: torch.LongTensor,
-        adj_list: _typing.Sequence[
-            _typing.Tuple[torch.LongTensor, torch.LongTensor, _typing.Tuple[int, int]]
-        ]
-) -> TargetDependantSampledData:
-    return TargetDependantSampledData(
-        [(current_layer[0], current_layer[1], None)for current_layer in adj_list],
-        (torch.arange(batch_size), n_id[:batch_size]), n_id
-    )
-
-
 class NeighborSampler(TargetDependantSampler, _typing.Iterable):
+    @classmethod
+    def __compute_edge_weight(cls, edge_index: torch.LongTensor) -> torch.Tensor:
+        __num_nodes = max(int(edge_index[0].max()), int(edge_index[1].max())) + 1
+        __out_degree: torch.LongTensor = torch_geometric.utils.degree(
+            edge_index[0], __num_nodes
+        )
+        __in_degree: torch.LongTensor = torch_geometric.utils.degree(
+            edge_index[1], __num_nodes
+        )
+        temp_tensor: torch.Tensor = torch.stack(
+            [__out_degree[edge_index[0]], __in_degree[edge_index[1]]]
+        )
+        temp_tensor: torch.Tensor = torch.pow(temp_tensor, -0.5)
+        temp_tensor[torch.isinf(temp_tensor)] = 0.0
+        return temp_tensor[0] * temp_tensor[1]
+
     def __init__(
             self, edge_index: torch.LongTensor,
             target_nodes_indexes: torch.LongTensor,
@@ -24,14 +28,29 @@ class NeighborSampler(TargetDependantSampler, _typing.Iterable):
             batch_size: int = 1, num_workers: int = 0,
             shuffle: bool = True, **kwargs
     ):
+        self.__edge_weight: torch.Tensor = self.__compute_edge_weight(edge_index)
         self.__pyg_neighbor_sampler: torch_geometric.data.NeighborSampler = (
             torch_geometric.data.NeighborSampler(
                 edge_index, list(sampling_sizes[::-1]), target_nodes_indexes,
-                transform=_neighbor_sampler_transform, batch_size=batch_size,
+                transform=self._transform, batch_size=batch_size,
                 num_workers=num_workers, shuffle=shuffle, **kwargs
             )
         )
 
+    def _transform(
+        self, batch_size: int, n_id: torch.LongTensor,
+        adj_list: _typing.Sequence[
+            _typing.Tuple[torch.LongTensor, torch.LongTensor, _typing.Tuple[int, int]]
+        ]
+    ) -> TargetDependantSampledData:
+        return TargetDependantSampledData(
+            [
+                (current_layer[0], current_layer[1], self.__edge_weight[current_layer[1]])
+                for current_layer in adj_list
+            ],
+            (torch.arange(batch_size, dtype=torch.long).long(), n_id[:batch_size]), n_id
+        )
+
     def __iter__(self):
         return iter(self.__pyg_neighbor_sampler)
 
diff --git a/autogl/module/train/sampling/sampler/target_dependant_sampler.py b/autogl/module/train/sampling/sampler/target_dependant_sampler.py
index 7adc816..ab8620f 100644
--- a/autogl/module/train/sampling/sampler/target_dependant_sampler.py
+++ b/autogl/module/train/sampling/sampler/target_dependant_sampler.py
@@ -254,6 +254,7 @@ class BasicLayerWiseTargetDependantSampler(TargetDependantSampler):
                         __sampled_nodes_in_sub_graph_mapping.get(current_target_node_index_in_integral_data)
                         for current_target_node_index_in_integral_data
                         in top_layer_target_nodes_indexes.tolist()
+                        if current_target_node_index_in_integral_data in __sampled_nodes_in_sub_graph_mapping
                     ]
                 ).long(),  # Remap
                 top_layer_target_nodes_indexes
diff --git a/configs/nodeclf_ladies_gcn.yml b/configs/nodeclf_ladies_gcn.yml
index 83c7e6f..c61ce3b 100644
--- a/configs/nodeclf_ladies_gcn.yml
+++ b/configs/nodeclf_ladies_gcn.yml
@@ -7,23 +7,19 @@ hpo:
   name: random
 models:
 - hp_space:
-  - feasiblePoints:
-    - 0
-    parameterName: add_self_loops,
-    type: CATEGORICAL,
-  - feasiblePoints: 5,5
-    parameterName: num_layers
-    type: DISCRETE
-  - cutFunc: lambda x:x[0] - 1
+  - parameterName: num_layers
+    type: FIXED
+    value: 5
+  - parameterName: hidden
+    type: CATEGORICAL_LIST
+    cutFunc: lambda x:x[0] - 1
     cutPara:
     - num_layers
     length: 4
-    maxValue: 256
-    minValue: 64
-    numericalType: INTEGER
-    parameterName: hidden
-    scalingType: LOG
-    type: NUMERICAL_LIST
+    feasiblePoints:
+    - 128
+    - 256
+    - 512
   - maxValue: 0.8
     minValue: 0.2
     parameterName: dropout
@@ -36,23 +32,46 @@ models:
     - tanh
     parameterName: act
     type: CATEGORICAL
+  - parameterName: add_self_loops
+    type: FIXED
+    value: 0
+  - parameterName: normalize
+    type: FIXED
+    value: 0
   name: gcn
 trainer:
   name: NodeClassificationLayerDependentImportanceSamplingTrainer
   hp_space:
-  - feasiblePoints: 128,256,512
-    parameterName: sampled_node_size_budget
-    type: DISCRETE
-  - maxValue: 300
-    minValue: 100
+  - parameterName: sampled_node_sizes
+    type: CATEGORICAL_LIST
+    length: 5
+    feasiblePoints:
+    - 128
+    - 256
+    - 512
+    - 1024
+    cutFunc: lambda x:x[0]
+    cutPara:
+    - num_layers
+  - maxValue: 128
+    minValue: 64
     parameterName: max_epoch
     scalingType: LINEAR
     type: INTEGER
-  - maxValue: 30
-    minValue: 10
+  - maxValue: 16
+    minValue: 8
     parameterName: early_stopping_round
     scalingType: LINEAR
     type: INTEGER
+  - parameterName: training_batch_size
+    type: FIXED
+    value: 1024
+  - parameterName: predicting_batch_size
+    type: FIXED
+    value: 1024
+  - parameterName: predicting_sampler_num_workers
+    type: FIXED
+    value: 0
   - maxValue: 0.05
     minValue: 0.01
     parameterName: lr

From 5602bf794e15021b5721b4a8847f7611d069bd63 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Mon, 24 May 2021 21:45:30 +0800
Subject: [PATCH 069/144] adjust for new model

---
 autogl/module/train/link_prediction.py | 8 ++++----
 examples/link_prediction.py            | 9 ++-------
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/autogl/module/train/link_prediction.py b/autogl/module/train/link_prediction.py
index fdc7844..3adcb95 100644
--- a/autogl/module/train/link_prediction.py
+++ b/autogl/module/train/link_prediction.py
@@ -1,13 +1,13 @@
-from . import register_trainer, BaseTrainer, Evaluation, EVALUATE_DICT, EarlyStopping
+from . import register_trainer, BaseTrainer, Evaluation
 import torch
 from torch.optim.lr_scheduler import StepLR
 import torch.nn.functional as F
 from ..model import MODEL_DICT, BaseModel
-from .evaluate import Logloss, Acc, Auc
+from .evaluation import Auc, EVALUATE_DICT
+from .base import EarlyStopping
 from typing import Union
 from copy import deepcopy
 from torch_geometric.utils import negative_sampling
-from torch_geometric.utils import train_test_split_edges
 
 from ...utils import get_logger
 
@@ -73,7 +73,7 @@ class LinkPredictionTrainer(BaseTrainer):
         *args,
         **kwargs
     ):
-        super(LinkPredictionTrainer, self).__init__(model)
+        super().__init__(model, device, init, feval, loss)
 
         self.loss_type = loss
 
diff --git a/examples/link_prediction.py b/examples/link_prediction.py
index 65236a4..a9c9825 100644
--- a/examples/link_prediction.py
+++ b/examples/link_prediction.py
@@ -2,17 +2,12 @@ import os.path as osp
 import sys
 sys.path.insert(0, '../')
 import torch
-from torch_geometric.datasets import Planetoid
-import torch_geometric.transforms as T
-from sklearn.metrics import accuracy_score as acc
-from sklearn.metrics import roc_auc_score
+from autogl.datasets import build_dataset_from_name
 from autogl.module.train import LinkPredictionTrainer
 import numpy as np
 from torch_geometric.utils import train_test_split_edges
 
-dataset = 'Cora'
-path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset)
-dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures())
+dataset = build_dataset_from_name('cora')
 
 print('len', len(dataset))
 print('num_class', dataset.num_classes)

From 09614ddc492b8d7ee194403375d6c60a8015cbfc Mon Sep 17 00:00:00 2001
From: null <null>
Date: Tue, 25 May 2021 02:56:00 +0800
Subject: [PATCH 070/144] Implement the Layer-wise prediction for sampling
 trainer, fix bugs

1. Implement the Layer-wise prediction in Node Classification Node-wise Sampling Trainer, inspired by the example of reddit provided by PyTorch-Geometric. Particularly, for large graphs, e.g. the Reddit data, it's usually infeasible to conduct inferences with all the neighbor nodes in 2-hop neighborhood/proximity of a specific node. Therefore a practical way to conduct inferences to validate or test the Graph Neural Network is to predict with layer-wise approach, see the source code of NodeClassificationNodeWiseSamplingTrainer and the example on Reddit dataset (example/reddit.py) provided by PyTorch-Geometric for the idea and details.

2. Fix a bug caused by an ambiguous behaviour of torch_geometric.data.sampler.NeighborSampler. More specifically, for only one sampling layer, the conducted item of "adjs" is a 3-item tuple, instead of a sequence of 3-item tuples. This ambiguous behaviour will easily cause bugs in downstream library.

3. Re-add default hyper-parameter in GCN and GraphSAGE to fix initialization issue caused by incorrectly removing default hyper-parametrer space and default hyper-parametrer by Zixin Sun. Sorry.
---
 autogl/module/model/base.py                   |  21 +-
 autogl/module/model/gcn.py                    |  17 +-
 autogl/module/model/graph_sage.py             | 197 +++++++++---------
 .../node_classification_sampled_trainer.py    | 133 +++++++++---
 .../sampling/sampler/neighbor_sampler.py      |  32 ++-
 5 files changed, 259 insertions(+), 141 deletions(-)

diff --git a/autogl/module/model/base.py b/autogl/module/model/base.py
index 9695eb5..67b80c2 100644
--- a/autogl/module/model/base.py
+++ b/autogl/module/model/base.py
@@ -295,6 +295,7 @@ class ClassificationModel(_BaseModel):
         num_graph_features: int = ...,
         device: _typing.Union[str, torch.device] = ...,
         hyper_parameter_space: _typing.Sequence[_typing.Any] = ...,
+        hyper_parameter: _typing.Dict[str, _typing.Any] = ...,
         init: bool = False,
         **kwargs
     ):
@@ -302,7 +303,7 @@ class ClassificationModel(_BaseModel):
             del kwargs["initialize"]
         super(ClassificationModel, self).__init__(
             initialize=init, hyper_parameter_space=hyper_parameter_space,
-            device=device, **kwargs
+            hyper_parameter=hyper_parameter, device=device, **kwargs
         )
         if num_classes != Ellipsis and type(num_classes) == int:
             self.__num_classes: int = num_classes if num_classes > 0 else 0
@@ -373,3 +374,21 @@ class ClassificationModel(_BaseModel):
                 self.__num_graph_features = num_graph_features
             else:
                 self.__num_graph_features = 0
+
+
+class SequentialGraphNeuralNetwork(torch.nn.Module):
+    def __init__(self):
+        super(SequentialGraphNeuralNetwork, self).__init__()
+
+    def decode(self, x: torch.Tensor) -> torch.Tensor:
+        raise NotImplementedError
+
+    def encode(self, data) -> torch.Tensor:
+        raise NotImplementedError
+
+    @property
+    def encoder_sequential_modules(self) -> torch.nn.ModuleList:
+        raise NotImplementedError
+
+    def forward(self, data) -> torch.Tensor:
+        return self.decode(self.encode(data))
diff --git a/autogl/module/model/gcn.py b/autogl/module/model/gcn.py
index 7d2d4e1..72ca48a 100644
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -184,6 +184,11 @@ class AutoGCN(ClassificationModel):
                 "type": "CATEGORICAL",
                 "feasiblePoints": [1],
             },
+            {
+                "parameterName": "normalize",
+                "type": "CATEGORICAL",
+                "feasiblePoints": [1],
+            },
             {
                 "parameterName": "num_layers",
                 "type": "DISCRETE",
@@ -213,10 +218,20 @@ class AutoGCN(ClassificationModel):
                 "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
             },
         ]
+        default_hp = {
+            "add_self_loops": 1,
+            "normalize": 1,
+            "num_layers": 2,
+            "hidden": [16],
+            "dropout": 0.2,
+            "act": "leaky_relu",
+        }
 
         super(AutoGCN, self).__init__(
             num_features, num_classes, device=device,
-            hyper_parameter_space=default_hp_space, init=init, **kwargs
+            hyper_parameter_space=default_hp_space,
+            hyper_parameter=default_hp, init=init,
+            **kwargs
         )
 
     def _initialize(self):
diff --git a/autogl/module/model/graph_sage.py b/autogl/module/model/graph_sage.py
index 1405b20..f7073b6 100644
--- a/autogl/module/model/graph_sage.py
+++ b/autogl/module/model/graph_sage.py
@@ -3,11 +3,61 @@ import torch
 import torch.nn.functional
 from torch_geometric.nn.conv import SAGEConv
 
+import autogl.data
 from . import register_model
-from .base import ClassificationModel, activate_func
+from .base import (
+    ClassificationModel, activate_func,
+    SequentialGraphNeuralNetwork
+)
+
+
+class GraphSAGE(SequentialGraphNeuralNetwork):
+    class _SAGELayer(torch.nn.Module):
+        def __init__(
+                self, input_channels: int, output_channels: int, aggr: str,
+                activation_name: _typing.Optional[str] = ...,
+                dropout_probability: _typing.Optional[float] = ...
+        ):
+            super().__init__()
+            self._convolution: SAGEConv = SAGEConv(
+                input_channels, output_channels, aggr=aggr
+            )
+            if (
+                    activation_name is not Ellipsis and
+                    activation_name is not None and
+                    type(activation_name) == str
+            ):
+                self._activation_name: _typing.Optional[str] = activation_name
+            else:
+                self._activation_name: _typing.Optional[str] = None
+            if (
+                    dropout_probability is not Ellipsis and
+                    dropout_probability is not None and
+                    type(dropout_probability) == float
+            ):
+                if dropout_probability < 0:
+                    dropout_probability = 0
+                if dropout_probability > 1:
+                    dropout_probability = 1
+                self._dropout: _typing.Optional[torch.nn.Dropout] = (
+                    torch.nn.Dropout(dropout_probability)
+                )
+            else:
+                self._dropout: _typing.Optional[torch.nn.Dropout] = None
 
+        def forward(self, data) -> torch.Tensor:
+            x: torch.Tensor = getattr(data, "x")
+            edge_index: torch.Tensor = getattr(data, "edge_index")
+            if type(x) != torch.Tensor or type(edge_index) != torch.Tensor:
+                raise TypeError
+
+            x: torch.Tensor = self._convolution.forward(x, edge_index)
+            if self._activation_name is not None:
+                x: torch.Tensor = activate_func(x, self._activation_name)
+            if self._dropout is not None:
+                x: torch.Tensor = self._dropout.forward(x)
+            return x
 
-class GraphSAGE(torch.nn.Module):
     def __init__(
         self,
         num_features: int,
@@ -23,113 +73,54 @@ class GraphSAGE(torch.nn.Module):
         if aggr not in ("add", "max", "mean"):
             aggr = "mean"
 
-        self.__convolution_layers: torch.nn.ModuleList = torch.nn.ModuleList()
-
-        num_layers: int = len(hidden_features) + 1
-        if num_layers == 1:
-            self.__convolution_layers.append(
-                SAGEConv(num_features, num_classes, aggr=aggr)
+        if len(hidden_features) == 0:
+            self.__sequential_module_list: torch.nn.ModuleList = torch.nn.ModuleList(
+                (self._SAGELayer(num_features, num_classes, aggr),)
             )
         else:
-            self.__convolution_layers.append(
-                SAGEConv(num_features, hidden_features[0], aggr=aggr)
-            )
+            self.__sequential_module_list: torch.nn.ModuleList = torch.nn.ModuleList()
+            self.__sequential_module_list.append(self._SAGELayer(
+                num_features, hidden_features[0], aggr, activation_name, dropout
+            ))
             for i in range(len(hidden_features)):
                 if i + 1 < len(hidden_features):
-                    self.__convolution_layers.append(
-                        SAGEConv(hidden_features[i], hidden_features[i + 1], aggr=aggr)
-                    )
+                    self.__sequential_module_list.append(self._SAGELayer(
+                        hidden_features[i], hidden_features[i + 1], aggr,
+                        activation_name, dropout
+                    ))
                 else:
-                    self.__convolution_layers.append(
-                        SAGEConv(hidden_features[i], num_classes, aggr=aggr)
-                    )
-        self.__dropout: float = dropout
-        self.__activation_name: str = activation_name
-
-    def __basic_forward(
-            self,
-            x: torch.Tensor,
-            edge_index: torch.Tensor,
-            edge_weight: _typing.Optional[torch.Tensor] = None,
-    ) -> torch.Tensor:
-        for layer_index in range(len(self.__convolution_layers)):
-            x: torch.Tensor = self.__convolution_layers[layer_index](
-                x, edge_index, edge_weight
-            )
-            if layer_index + 1 < len(self.__convolution_layers):
-                x = activate_func(x, self.__activation_name)
-                x = torch.nn.functional.dropout(
-                    x, p=self.__dropout, training=self.training
-                )
-        return torch.nn.functional.log_softmax(x, dim=1)
+                    self.__sequential_module_list.append(self._SAGELayer(
+                        hidden_features[i], num_classes, aggr
+                    ))
 
-    def __layer_wise_forward(
-            self, x: torch.Tensor,
-            edge_indexes: _typing.Sequence[torch.Tensor],
-            edge_weights: _typing.Sequence[_typing.Optional[torch.Tensor]]
-    ) -> torch.Tensor:
-        assert len(edge_indexes) == len(edge_weights) == len(self.__convolution_layers)
-        for edge_index in edge_indexes:
-            if type(edge_index) != torch.Tensor:
-                raise TypeError
-            if edge_index.size(0) != 2:
-                raise ValueError
-        for edge_weight in edge_weights:
-            if not (edge_weight is None or type(edge_weight) == torch.Tensor):
-                raise TypeError
-
-        for layer_index in range(len(self.__convolution_layers)):
-            x: torch.Tensor = self.__convolution_layers[layer_index](
-                x, edge_indexes[layer_index]
-            )
-            if layer_index + 1 < len(self.__convolution_layers):
-                x = activate_func(x, self.__activation_name)
-                x = torch.nn.functional.dropout(x, p=self.__dropout, training=self.training)
-        return torch.nn.functional.log_softmax(x, dim=1)
+    @property
+    def encoder_sequential_modules(self) -> torch.nn.ModuleList:
+        return self.__sequential_module_list
 
-    def forward(self, data) -> torch.Tensor:
+    def encode(self, data) -> torch.Tensor:
         if (
-                hasattr(data, "edge_indexes") and
-                isinstance(getattr(data, "edge_indexes"), _typing.Sequence) and
-                len(getattr(data, "edge_indexes")) == len(self.__convolution_layers)
+            hasattr(data, "edge_indexes") and
+            isinstance(getattr(data, "edge_indexes"), _typing.Sequence) and
+            len(getattr(data, "edge_indexes")) == len(self.__sequential_module_list)
         ):
-            edge_indexes: _typing.Sequence[torch.Tensor] = getattr(data, "edge_indexes")
-            if (
-                hasattr(data, "edge_weights") and
-                isinstance(getattr(data, "edge_weights"), _typing.Sequence) and
-                len(getattr(data, "edge_weights")) == len(self.__convolution_layers)
-            ):
-                edge_weights: _typing.Sequence[_typing.Optional[torch.Tensor]] = (
-                    getattr(data, "edge_weights")
-                )
-            else:
-                edge_weights: _typing.Sequence[_typing.Optional[torch.Tensor]] = [
-                    None for _ in range(len(self.__convolution_layers))
-                ]
-            return self.__layer_wise_forward(
-                getattr(data, "x"), edge_indexes, edge_weights
-            )
-        else:
-            if not (hasattr(data, "x") and hasattr(data, "edge_index")):
-                raise AttributeError
-            if not (
-                type(getattr(data, "x")) == torch.Tensor
-                and type(getattr(data, "edge_index")) == torch.Tensor
-            ):
-                raise TypeError
+            for __edge_index in getattr(data, "edge_indexes"):
+                if type(__edge_index) != torch.Tensor:
+                    raise TypeError
+            """ Layer-wise encode """
             x: torch.Tensor = getattr(data, "x")
-            edge_index: torch.LongTensor = getattr(data, "edge_index")
-            if (
-                hasattr(data, "edge_weight")
-                and type(getattr(data, "edge_weight")) == torch.Tensor
-                and getattr(data, "edge_weight").size() == (edge_index.size(1),)
-            ):
-                edge_weight: _typing.Optional[torch.Tensor] = getattr(
-                    data, "edge_weight"
+            for i, __edge_index in enumerate(getattr(data, "edge_indexes")):
+                _intermediate_data: autogl.data.Data = autogl.data.Data(
+                    x=x, edge_index=__edge_index
                 )
-            else:
-                edge_weight: _typing.Optional[torch.Tensor] = None
-            return self.__basic_forward(x, edge_index, edge_weight)
+                x: torch.Tensor = self.encoder_sequential_modules[i](_intermediate_data)
+            return x
+        else:
+            for i in range(len(self.encoder_sequential_modules)):
+                data.x = self.encoder_sequential_modules[i](data)
+            return data.x
+
+    def decode(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.nn.functional.log_softmax(x, dim=1)
 
 
 @register_model("sage")
@@ -201,9 +192,17 @@ class AutoSAGE(ClassificationModel):
                 "feasiblePoints": ["mean", "add", "max"],
             },
         ]
+        default_hp = {
+            "num_layers": 3,
+            "hidden": [64, 32],
+            "dropout": 0.5,
+            "act": "relu",
+            "agg": "mean",
+        }
         super(AutoSAGE, self).__init__(
             num_features, num_classes, device=device,
-            hyper_parameter_space=default_hp_space, init=init, **kwargs
+            hyper_parameter_space=default_hp_space,
+            hyper_parameter=default_hp, init=init, **kwargs
         )
 
     def _initialize(self):
diff --git a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
index 181c5b3..b2a9f84 100644
--- a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
+++ b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
@@ -17,6 +17,7 @@ from ..sampling.sampler.layer_dependent_importance_sampler import (
     LayerDependentImportanceSampler
 )
 from ...model import BaseModel
+from ...model.base import SequentialGraphNeuralNetwork
 
 LOGGER: logging.Logger = logging.getLogger("Node classification sampling trainer")
 
@@ -1054,44 +1055,112 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         :param mask_or_target_nodes_indexes: ...
         :return: the result of prediction on the given dataset
         """
-        if mask_or_target_nodes_indexes.dtype == torch.bool:
-            target_nodes_indexes: _typing.Any = (
-                torch.where(mask_or_target_nodes_indexes)[0]
+        self.model.model.eval()
+        integral_data = integral_data.to(torch.device("cpu"))
+        if isinstance(self.model.model, SequentialGraphNeuralNetwork):
+            sequential_gnn_model: SequentialGraphNeuralNetwork = self.model.model
+            __num_layers: int = len(self.__sampling_sizes)
+
+            x: torch.Tensor = getattr(integral_data, "x")
+            for _current_layer_index in range(__num_layers - 1):
+                __next_x: _typing.Optional[torch.Tensor] = None
+                current_neighbor_sampler: NeighborSampler = NeighborSampler(
+                    integral_data.edge_index, torch.arange(x.size(0)).unique(),
+                    [-1], batch_size=self.__predicting_batch_size,
+                    num_workers=self.__predicting_sampler_num_workers, shuffle=False
+                )
+                for _target_dependant_sampled_data in current_neighbor_sampler:
+                    _target_dependant_sampled_data: TargetDependantSampledData = (
+                        _target_dependant_sampled_data
+                    )
+                    _sampled_graph: autogl.data.Data = autogl.data.Data(
+                        x=x[_target_dependant_sampled_data.all_sampled_nodes_indexes],
+                        edge_index=(
+                            _target_dependant_sampled_data.sampled_edges_for_layers[0].edge_index_for_sampled_graph
+                        )
+                    )
+                    _sampled_graph: autogl.data.Data = _sampled_graph.to(self.device)
+
+                    with torch.no_grad():
+                        __sampled_graph_inferences: torch.Tensor = (
+                            sequential_gnn_model.encoder_sequential_modules[_current_layer_index](_sampled_graph)
+                        )
+                    __sampled_graph_inferences: torch.Tensor = __sampled_graph_inferences.cpu()
+                    if __next_x is None:
+                        __next_x: torch.Tensor = torch.zeros(x.size(0), __sampled_graph_inferences.size(1))
+                    __next_x[_target_dependant_sampled_data.target_nodes_indexes.indexes_in_integral_graph] = (
+                        __sampled_graph_inferences[
+                            _target_dependant_sampled_data.target_nodes_indexes.indexes_in_sampled_graph
+                        ]
+                    )
+                x: torch.Tensor = __next_x
+            # The following procedures are for the top layer
+            if mask_or_target_nodes_indexes.dtype == torch.bool:
+                target_nodes_indexes: _typing.Any = (
+                    torch.where(mask_or_target_nodes_indexes)[0]
+                )
+            else:
+                target_nodes_indexes: _typing.Any = mask_or_target_nodes_indexes.long()
+
+            current_neighbor_sampler: NeighborSampler = NeighborSampler(
+                integral_data.edge_index, target_nodes_indexes,
+                [-1], batch_size=self.__predicting_batch_size,
+                num_workers=self.__predicting_sampler_num_workers, shuffle=False
+            )
+            prediction_batch_cumulative_builder = (
+                EvaluatorUtility.PredictionBatchCumulativeBuilder()
             )
+            for _target_dependant_sampled_data in current_neighbor_sampler:
+                _sampled_graph: autogl.data.Data = autogl.data.Data(
+                    x[_target_dependant_sampled_data.all_sampled_nodes_indexes],
+                    _target_dependant_sampled_data.sampled_edges_for_layers[0].edge_index_for_sampled_graph
+                )
+                _sampled_graph: autogl.data.Data = _sampled_graph.to(self.device)
+                with torch.no_grad():
+                    prediction_batch_cumulative_builder.add_batch(
+                        _target_dependant_sampled_data.target_nodes_indexes.indexes_in_integral_graph.cpu().numpy(),
+                        sequential_gnn_model.decode(
+                            sequential_gnn_model.encoder_sequential_modules[-1](_sampled_graph)
+                        )[_target_dependant_sampled_data.target_nodes_indexes.indexes_in_sampled_graph].cpu().numpy()
+                    )
+            return torch.from_numpy(prediction_batch_cumulative_builder.compose()[1])
         else:
-            target_nodes_indexes: _typing.Any = mask_or_target_nodes_indexes.long()
-
-        neighbor_sampler: NeighborSampler = NeighborSampler(
-            integral_data.edge_index, target_nodes_indexes, [-1 for _ in self.__sampling_sizes],
-            batch_size=self.__predicting_batch_size,
-            num_workers=self.__predicting_sampler_num_workers, shuffle=False
-        )
+            if mask_or_target_nodes_indexes.dtype == torch.bool:
+                target_nodes_indexes: _typing.Any = (
+                    torch.where(mask_or_target_nodes_indexes)[0]
+                )
+            else:
+                target_nodes_indexes: _typing.Any = mask_or_target_nodes_indexes.long()
 
-        prediction_batch_cumulative_builder = (
-            EvaluatorUtility.PredictionBatchCumulativeBuilder()
-        )
-        self.model.model.eval()
-        for sampled_data in neighbor_sampler:
-            sampled_data: TargetDependantSampledData = sampled_data
-            sampled_graph: autogl.data.Data = autogl.data.Data(
-                integral_data.x[sampled_data.all_sampled_nodes_indexes],
-                integral_data.y[sampled_data.all_sampled_nodes_indexes]
+            neighbor_sampler: NeighborSampler = NeighborSampler(
+                integral_data.edge_index, target_nodes_indexes, [-1 for _ in self.__sampling_sizes],
+                batch_size=self.__predicting_batch_size,
+                num_workers=self.__predicting_sampler_num_workers, shuffle=False
             )
-            sampled_graph.to(self.device)
-            sampled_graph.edge_indexes: _typing.Sequence[torch.LongTensor] = [
-                current_layer.edge_index_for_sampled_graph.to(self.device)
-                for current_layer in sampled_data.sampled_edges_for_layers
-            ]
 
-            with torch.no_grad():
-                prediction_batch_cumulative_builder.add_batch(
-                    sampled_data.target_nodes_indexes.indexes_in_integral_graph.cpu().numpy(),
-                    self.model.model(sampled_graph)[
-                        sampled_data.target_nodes_indexes.indexes_in_sampled_graph
-                    ].cpu().numpy()
+            prediction_batch_cumulative_builder = (
+                EvaluatorUtility.PredictionBatchCumulativeBuilder()
+            )
+            self.model.model.eval()
+            for _target_dependant_sampled_data in neighbor_sampler:
+                _sampled_graph: autogl.data.Data = autogl.data.Data(
+                    x=integral_data.x[
+                        _target_dependant_sampled_data.all_sampled_nodes_indexes
+                    ]
                 )
-
-        return torch.from_numpy(prediction_batch_cumulative_builder.compose()[1])
+                _sampled_graph = _sampled_graph.to(self.device)
+                _sampled_graph.edge_indexes: _typing.Sequence[torch.LongTensor] = [
+                    current_layer.edge_index_for_sampled_graph.to(self.device)
+                    for current_layer in _target_dependant_sampled_data.sampled_edges_for_layers
+                ]
+                with torch.no_grad():
+                    prediction_batch_cumulative_builder.add_batch(
+                        _target_dependant_sampled_data.target_nodes_indexes.indexes_in_integral_graph.cpu().numpy(),
+                        self.model.model(_sampled_graph)[
+                            _target_dependant_sampled_data.target_nodes_indexes.indexes_in_sampled_graph
+                        ].cpu().numpy()
+                    )
+            return torch.from_numpy(prediction_batch_cumulative_builder.compose()[1])
 
     def predict_proba(
             self, dataset, mask: _typing.Optional[str] = None,
diff --git a/autogl/module/train/sampling/sampler/neighbor_sampler.py b/autogl/module/train/sampling/sampler/neighbor_sampler.py
index 314e264..97e8e66 100644
--- a/autogl/module/train/sampling/sampler/neighbor_sampler.py
+++ b/autogl/module/train/sampling/sampler/neighbor_sampler.py
@@ -39,17 +39,33 @@ class NeighborSampler(TargetDependantSampler, _typing.Iterable):
 
     def _transform(
         self, batch_size: int, n_id: torch.LongTensor,
-        adj_list: _typing.Sequence[
+        adj_or_adj_list: _typing.Union[
+            _typing.Sequence[
+                _typing.Tuple[torch.LongTensor, torch.LongTensor, _typing.Tuple[int, int]]
+            ],
             _typing.Tuple[torch.LongTensor, torch.LongTensor, _typing.Tuple[int, int]]
         ]
     ) -> TargetDependantSampledData:
-        return TargetDependantSampledData(
-            [
-                (current_layer[0], current_layer[1], self.__edge_weight[current_layer[1]])
-                for current_layer in adj_list
-            ],
-            (torch.arange(batch_size, dtype=torch.long).long(), n_id[:batch_size]), n_id
-        )
+        if (
+                isinstance(adj_or_adj_list[0], _typing.Tuple) and
+                isinstance(adj_or_adj_list, _typing.Sequence) and
+                not isinstance(adj_or_adj_list, _typing.Tuple)
+        ):
+            return TargetDependantSampledData(
+                [
+                    (current_layer[0], current_layer[1], self.__edge_weight[current_layer[1]])
+                    for current_layer in adj_or_adj_list
+                ],
+                (torch.arange(batch_size, dtype=torch.long).long(), n_id[:batch_size]), n_id
+            )
+        elif isinstance(adj_or_adj_list, _typing.Tuple) and type(adj_or_adj_list[0]) == torch.Tensor:
+            adj_or_adj_list: _typing.Tuple[
+                torch.LongTensor, torch.LongTensor, _typing.Tuple[int, int]
+            ] = adj_or_adj_list
+            return TargetDependantSampledData(
+                [(adj_or_adj_list[0], adj_or_adj_list[1], self.__edge_weight[adj_or_adj_list[1]])],
+                (torch.arange(batch_size, dtype=torch.long).long(), n_id[:batch_size]), n_id
+            )
 
     def __iter__(self):
         return iter(self.__pyg_neighbor_sampler)

From e2321300dfe3d1793799bf6bcc83c2f1578caba6 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sat, 29 May 2021 14:38:56 +0800
Subject: [PATCH 071/144] update lp solver

---
 autogl/datasets/utils.py                   |   7 +
 autogl/module/model/__init__.py            |   3 +-
 autogl/module/model/gcn.py                 |  30 +-
 autogl/module/model/graph_sage.py          |   2 +-
 autogl/module/model/graphsage.py           |   2 +-
 autogl/module/train/__init__.py            |   2 +
 autogl/module/train/base.py                |  16 +
 autogl/module/train/link_prediction.py     |  71 +-
 autogl/solver/classifier/link_predictor.py | 722 +++++++++++++++++++++
 configs/lp_gcn_benchmark_small.yml         |  61 ++
 examples/link_prediction.py                |   1 +
 examples/link_prediction_solver.py         |  90 +++
 12 files changed, 944 insertions(+), 63 deletions(-)
 create mode 100644 autogl/solver/classifier/link_predictor.py
 create mode 100644 configs/lp_gcn_benchmark_small.yml
 create mode 100644 examples/link_prediction_solver.py

diff --git a/autogl/datasets/utils.py b/autogl/datasets/utils.py
index b0708db..7b5679c 100644
--- a/autogl/datasets/utils.py
+++ b/autogl/datasets/utils.py
@@ -2,9 +2,16 @@ from pdb import set_trace
 import torch
 import numpy as np
 from torch_geometric.data import DataLoader
+from torch_geometric.utils import train_test_split_edges
 from sklearn.model_selection import StratifiedKFold, KFold
 
 
+def split_edges(dataset, train_ratio, val_ratio):
+    datas = [data for data in dataset]
+    for i in range(len(datas)):
+        datas[i] = train_test_split_edges(datas[i], val_ratio, 1 - train_ratio - val_ratio)
+    dataset.data, dataset.slices = dataset.collate(datas)
+
 def get_label_number(dataset):
     r"""Get the number of labels in this dataset as dict."""
     label_num = {}
diff --git a/autogl/module/model/__init__.py b/autogl/module/model/__init__.py
index ef2a92d..42bdbc4 100644
--- a/autogl/module/model/__init__.py
+++ b/autogl/module/model/__init__.py
@@ -1,7 +1,8 @@
 from ._model_registry import MODEL_DICT, ModelUniversalRegistry, register_model
 from .base import BaseModel
 from .topkpool import AutoTopkpool
-from .graph_sage import AutoSAGE
+#from .graph_sage import AutoSAGE
+from .graphsage import AutoSAGE
 from .gcn import AutoGCN
 from .gat import AutoGAT
 from .gin import AutoGIN
diff --git a/autogl/module/model/gcn.py b/autogl/module/model/gcn.py
index 276a9e2..71ce274 100644
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -3,7 +3,7 @@ import torch.nn.functional
 import torch_geometric
 import typing as _typing
 from . import register_model
-from .base import activate_func, ClassificationModel
+from .base import BaseModel, activate_func, ClassificationModel
 from ...utils import get_logger
 
 LOGGER = get_logger("GCNModel")
@@ -94,10 +94,11 @@ class GCN(torch.nn.Module):
 
     def encode(self, data):
         x = data.x
-        for i in range(self.num_layer - 1):
-            x = self.convs[i](x, data.train_pos_edge_index)
-            if i != self.num_layer - 2:
-                x = activate_func(x, self.args["act"])
+        num_layers = len(self.__convolution_layers)
+        for i in range(num_layers - 1):
+            x = self.__convolution_layers[i](x, data.train_pos_edge_index)
+            if i != num_layers - 2:
+                x = activate_func(x, self.__activation_name)
                 # x = F.dropout(x, p=self.args["dropout"], training=self.training)
         return x
 
@@ -112,8 +113,10 @@ class GCN(torch.nn.Module):
 
 
 
+#@register_model("gcn")
+#class AutoGCN(ClassificationModel):
 @register_model("gcn")
-class AutoGCN(ClassificationModel):
+class AutoGCN(BaseModel):
     r"""
     AutoGCN.
     The model used in this automodel is GCN, i.e., the graph convolutional network from the
@@ -152,9 +155,10 @@ class AutoGCN(ClassificationModel):
         init: bool = False,
         **kwargs
     ) -> None:
-        super(AutoGCN, self).__init__(
-            num_features, num_classes, device=device, init=init, **kwargs
-        )
+        super().__init__()
+        self.num_features = num_features
+        self.num_classes = num_classes
+        self.device = device
 
         self.params = {
             "features_num": self.num_features,
@@ -210,11 +214,11 @@ class AutoGCN(ClassificationModel):
         if init is True:
             self.initialize()
 
-    def _initialize(self):
+    def initialize(self):
         self.model = GCN(
             self.num_features,
             self.num_classes,
-            self.hyper_parameter.get("hidden"),
-            self.hyper_parameter.get("dropout"),
-            self.hyper_parameter.get("act"),
+            self.hyperparams.get("hidden"),
+            self.hyperparams.get("dropout"),
+            self.hyperparams.get("act"),
         ).to(self.device)
diff --git a/autogl/module/model/graph_sage.py b/autogl/module/model/graph_sage.py
index 2fe0450..a064585 100644
--- a/autogl/module/model/graph_sage.py
+++ b/autogl/module/model/graph_sage.py
@@ -82,7 +82,7 @@ class GraphSAGE(torch.nn.Module):
             return self.__full_forward(data)
 
 
-@register_model("sage")
+# @register_model("sage")
 class AutoSAGE(BaseModel):
     def __init__(
         self,
diff --git a/autogl/module/model/graphsage.py b/autogl/module/model/graphsage.py
index b9245a6..5b09817 100644
--- a/autogl/module/model/graphsage.py
+++ b/autogl/module/model/graphsage.py
@@ -190,7 +190,7 @@ class GraphSAGE(torch.nn.Module):
         return (prob_adj > 0).nonzero(as_tuple=False).t()
 
 
-# @register_model("sage")
+@register_model("sage")
 class AutoSAGE(BaseModel):
     r"""
     AutoSAGE. The model used in this automodel is GraphSAGE, i.e., the GraphSAGE from the `"Inductive Representation Learning on
diff --git a/autogl/module/train/__init__.py b/autogl/module/train/__init__.py
index 35f52b4..8e70ff2 100644
--- a/autogl/module/train/__init__.py
+++ b/autogl/module/train/__init__.py
@@ -4,6 +4,7 @@ from .base import (
     Evaluation,
     BaseNodeClassificationTrainer,
     BaseGraphClassificationTrainer,
+    BaseLinkPredictionTrainer
 )
 
 def register_trainer(name):
@@ -30,6 +31,7 @@ __all__ = [
     "Evaluation",
     "BaseGraphClassificationTrainer",
     "BaseNodeClassificationTrainer",
+    "BaseLinkPredictionTrainer",
     "GraphClassificationFullTrainer",
     "NodeClassificationFullTrainer",
     "LinkPredictionTrainer",
diff --git a/autogl/module/train/base.py b/autogl/module/train/base.py
index af26fa4..76c6a61 100644
--- a/autogl/module/train/base.py
+++ b/autogl/module/train/base.py
@@ -402,3 +402,19 @@ class BaseGraphClassificationTrainer(_BaseClassificationTrainer):
         super(BaseGraphClassificationTrainer, self).__init__(
             model, num_features, num_classes, device, init, feval, loss
         )
+
+class BaseLinkPredictionTrainer(_BaseClassificationTrainer):
+    def __init__(
+        self,
+        model: _typing.Union[BaseModel, str],
+        num_features: int,
+        device: _typing.Union[torch.device, str, None] = None,
+        init: bool = True,
+        feval: _typing.Union[
+            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = (Acc,),
+        loss: str = "nll_loss",
+    ):
+        super(BaseLinkPredictionTrainer, self).__init__(
+            model, num_features, 2, device, init, feval, loss
+        )
\ No newline at end of file
diff --git a/autogl/module/train/link_prediction.py b/autogl/module/train/link_prediction.py
index 3adcb95..5c17047 100644
--- a/autogl/module/train/link_prediction.py
+++ b/autogl/module/train/link_prediction.py
@@ -1,10 +1,10 @@
-from . import register_trainer, BaseTrainer, Evaluation
+from . import register_trainer, Evaluation
 import torch
 from torch.optim.lr_scheduler import StepLR
 import torch.nn.functional as F
 from ..model import MODEL_DICT, BaseModel
 from .evaluation import Auc, EVALUATE_DICT
-from .base import EarlyStopping
+from .base import EarlyStopping, BaseLinkPredictionTrainer
 from typing import Union
 from copy import deepcopy
 from torch_geometric.utils import negative_sampling
@@ -23,8 +23,8 @@ def get_feval(feval):
     raise ValueError("feval argument of type", type(feval), "is not supported!")
 
 
-@register_trainer("LinkPrediction")
-class LinkPredictionTrainer(BaseTrainer):
+@register_trainer("LinkPredictionFull")
+class LinkPredictionTrainer(BaseLinkPredictionTrainer):
     """
     The link prediction trainer.
 
@@ -58,34 +58,21 @@ class LinkPredictionTrainer(BaseTrainer):
 
     def __init__(
         self,
-        model: Union[BaseModel, str],
-        num_features,
-        num_classes,
+        model: Union[BaseModel, str] = None,
+        num_features=None,
         optimizer=None,
-        lr=None,
-        max_epoch=None,
-        early_stopping_round=None,
+        lr=1e-4,
+        max_epoch=100,
+        early_stopping_round=101,
         weight_decay=1e-4,
-        device=None,
+        device='auto',
         init=True,
         feval=[Auc],
         loss="binary_cross_entropy_with_logits",
         *args,
         **kwargs
     ):
-        super().__init__(model, device, init, feval, loss)
-
-        self.loss_type = loss
-
-        if device is None:
-            device = "cpu"
-
-        # init model
-        if isinstance(model, str):
-            assert model in MODEL_DICT, "Cannot parse model name " + model
-            self.model = MODEL_DICT[model](num_features, num_classes, device, init=init)
-        elif isinstance(model, BaseModel):
-            self.model = model
+        super().__init__(model, num_features, device, init, feval, loss)
 
         if type(optimizer) == str and optimizer.lower() == "adam":
             self.optimizer = torch.optim.Adam
@@ -94,19 +81,12 @@ class LinkPredictionTrainer(BaseTrainer):
         else:
             self.optimizer = torch.optim.Adam
 
-        self.num_features = num_features
-        self.num_classes = num_classes
-        self.lr = lr if lr is not None else 1e-4
-        self.max_epoch = max_epoch if max_epoch is not None else 100
-        self.early_stopping_round = (
-            early_stopping_round if early_stopping_round is not None else 100
-        )
+        self.lr = lr
+        self.max_epoch = max_epoch
+        self.early_stopping_round = early_stopping_round
         self.device = device
         self.args = args
         self.kwargs = kwargs
-
-        self.feval = get_feval(feval)
-
         self.weight_decay = weight_decay
 
         self.early_stopping = EarlyStopping(
@@ -118,8 +98,6 @@ class LinkPredictionTrainer(BaseTrainer):
         self.valid_score = None
 
         self.initialized = False
-        self.num_features = num_features
-        self.num_classes = num_classes
         self.device = device
 
         self.space = [
@@ -152,7 +130,7 @@ class LinkPredictionTrainer(BaseTrainer):
                 "scalingType": "LOG",
             },
         ]
-        self.space += self.model.space
+
         LinkPredictionTrainer.space = self.space
 
         self.hyperparams = {
@@ -161,7 +139,6 @@ class LinkPredictionTrainer(BaseTrainer):
             "lr": self.lr,
             "weight_decay": self.weight_decay,
         }
-        self.hyperparams = {**self.hyperparams, **self.model.get_hyper_parameter()}
 
         if init is True:
             self.initialize()
@@ -171,6 +148,8 @@ class LinkPredictionTrainer(BaseTrainer):
         if self.initialized is True:
             return
         self.initialized = True
+        self.model.set_num_classes(self.num_classes)
+        self.model.set_num_features(self.num_features)
         self.model.initialize()
 
     def get_model(self):
@@ -203,7 +182,7 @@ class LinkPredictionTrainer(BaseTrainer):
         data = data.to(self.device)
         # mask = data.train_mask if train_mask is None else train_mask
         optimizer = self.optimizer(
-            self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
+            self.model.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
         )
         scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
         for epoch in range(1, self.max_epoch):
@@ -219,10 +198,10 @@ class LinkPredictionTrainer(BaseTrainer):
             link_logits = self.model.model.decode(z, data.train_pos_edge_index, neg_edge_index)
             link_labels = self.get_link_labels(data.train_pos_edge_index, neg_edge_index)
             # loss = F.binary_cross_entropy_with_logits(link_logits, link_labels)
-            if hasattr(F, self.loss_type):
-                loss = getattr(F, self.loss_type)(link_logits, link_labels)
+            if hasattr(F, self.loss):
+                loss = getattr(F, self.loss)(link_logits, link_labels)
             else:
-                raise TypeError("PyTorch does not support loss type {}".format(self.loss_type))
+                raise TypeError("PyTorch does not support loss type {}".format(self.loss))
 
             loss.backward()
             optimizer.step()
@@ -440,10 +419,7 @@ class LinkPredictionTrainer(BaseTrainer):
 
         res = []
         for f in feval:
-            try:
-                res.append(f.evaluate(link_probs, link_labels))
-            except:
-                res.append(f.evaluate(link_probs.cpu().numpy(), link_labels.cpu().numpy()))
+            res.append(f.evaluate(link_probs.cpu().numpy(), link_labels.cpu().numpy()))
         if return_signle:
             return res[0]
         return res
@@ -480,6 +456,8 @@ class LinkPredictionTrainer(BaseTrainer):
             hp = origin_hp
         if model is None:
             model = self.model
+        model.set_num_classes(self.num_classes)
+        model.set_num_features(self.num_features)
         model = model.from_hyper_parameter(
             dict(
                 [
@@ -493,7 +471,6 @@ class LinkPredictionTrainer(BaseTrainer):
         ret = self.__class__(
             model=model,
             num_features=self.num_features,
-            num_classes=self.num_classes,
             optimizer=self.optimizer,
             lr=hp["lr"],
             max_epoch=hp["max_epoch"],
diff --git a/autogl/solver/classifier/link_predictor.py b/autogl/solver/classifier/link_predictor.py
new file mode 100644
index 0000000..d2ef6b4
--- /dev/null
+++ b/autogl/solver/classifier/link_predictor.py
@@ -0,0 +1,722 @@
+"""
+Auto Classfier for Node Classification
+"""
+import time
+import json
+
+from copy import deepcopy
+
+import torch
+import numpy as np
+import yaml
+
+from .base import BaseClassifier
+from ..base import _parse_hp_space, _initialize_single_model
+from ...module.feature import FEATURE_DICT
+from ...module.model import MODEL_DICT, BaseModel
+from ...module.train import TRAINER_DICT, BaseLinkPredictionTrainer
+from ...module.train import get_feval
+from ..utils import Leaderboard, set_seed
+from ...datasets import utils
+from ...utils import get_logger
+
+LOGGER = get_logger("LinkPredictor")
+
+
+class AutoLinkPredictor(BaseClassifier):
+    """
+    Auto Link Predictor.
+
+    Used to automatically solve the link prediction problems.
+
+    Parameters
+    ----------
+    feature_module: autogl.module.feature.BaseFeatureEngineer or str or None
+        The (name of) auto feature engineer used to process the given dataset. Default ``deepgl``.
+        Disable feature engineer by setting it to ``None``.
+
+    graph_models: list of autogl.module.model.BaseModel or list of str
+        The (name of) models to be optimized as backbone. Default ``['gat', 'gcn']``.
+
+    hpo_module: autogl.module.hpo.BaseHPOptimizer or str or None
+        The (name of) hpo module used to search for best hyper parameters. Default ``anneal``.
+        Disable hpo by setting it to ``None``.
+
+    ensemble_module: autogl.module.ensemble.BaseEnsembler or str or None
+        The (name of) ensemble module used to ensemble the multi-models found. Default ``voting``.
+        Disable ensemble by setting it to ``None``.
+
+    max_evals: int (Optional)
+        If given, will set the number eval times the hpo module will use.
+        Only be effective when hpo_module is ``str``. Default ``None``.
+
+    trainer_hp_space: list of dict (Optional)
+        trainer hp space or list of trainer hp spaces configuration.
+        If a single trainer hp is given, will specify the hp space of trainer for every model.
+        If a list of trainer hp is given, will specify every model with corrsponding
+        trainer hp space.
+        Default ``None``.
+
+    model_hp_spaces: list of list of dict (Optional)
+        model hp space configuration.
+        If given, will specify every hp space of every passed model. Default ``None``.
+
+    size: int (Optional)
+        The max models ensemble module will use. Default ``None``.
+
+    device: torch.device or str
+        The device where model will be running on. If set to ``auto``, will use gpu when available.
+        You can also specify the device by directly giving ``gpu`` or ``cuda:0``, etc.
+        Default ``auto``.
+    """
+
+    def __init__(
+        self,
+        feature_module=None,
+        graph_models=("gat", "gcn"),
+        hpo_module="anneal",
+        ensemble_module="voting",
+        max_evals=50,
+        default_trainer=None,
+        trainer_hp_space=None,
+        model_hp_spaces=None,
+        size=4,
+        device="auto",
+    ):
+
+        super().__init__(
+            feature_module=feature_module,
+            graph_models=graph_models,
+            hpo_module=hpo_module,
+            ensemble_module=ensemble_module,
+            max_evals=max_evals,
+            default_trainer=default_trainer or "LinkPredictionFull",
+            trainer_hp_space=trainer_hp_space,
+            model_hp_spaces=model_hp_spaces,
+            size=size,
+            device=device,
+        )
+
+        # data to be kept when fit
+        self.dataset = None
+
+    def _init_graph_module(
+        self, graph_models, num_features, feval, device, loss
+    ) -> "AutoLinkPredictor":
+        # load graph network module
+        self.graph_model_list = []
+        if isinstance(graph_models, list):
+            for model in graph_models:
+                if isinstance(model, str):
+                    if model in MODEL_DICT:
+                        self.graph_model_list.append(
+                            MODEL_DICT[model](
+                                num_classes=1,
+                                num_features=num_features,
+                                device=device,
+                                init=False,
+                            )
+                        )
+                    else:
+                        raise KeyError("cannot find model %s" % (model))
+                elif isinstance(model, type) and issubclass(model, BaseModel):
+                    self.graph_model_list.append(
+                        model(
+                            num_classes=1,
+                            num_features=num_features,
+                            device=device,
+                            init=False,
+                        )
+                    )
+                elif isinstance(model, BaseModel):
+                    # setup the hp of num_classes and num_features
+                    model.set_num_classes(1)
+                    model.set_num_features(num_features)
+                    self.graph_model_list.append(model.to(device))
+                elif isinstance(model, BaseLinkPredictionTrainer):
+                    # receive a trainer list, put trainer to list
+                    assert (
+                        model.get_model() is not None
+                    ), "Passed trainer should contain a model"
+                    model.model.set_num_classes(1)
+                    model.model.set_num_features(num_features)
+                    model.update_parameters(
+                        num_classes=1,
+                        num_features=num_features,
+                        loss=loss,
+                        feval=feval,
+                        device=device,
+                    )
+                    self.graph_model_list.append(model)
+                else:
+                    raise KeyError("cannot find graph network %s." % (model))
+        else:
+            raise ValueError(
+                "need graph network to be (list of) str or a BaseModel class/instance, get",
+                graph_models,
+                "instead.",
+            )
+
+        # wrap all model_cls with specified trainer
+        for i, model in enumerate(self.graph_model_list):
+            # set model hp space
+            if self._model_hp_spaces is not None:
+                if self._model_hp_spaces[i] is not None:
+                    if isinstance(model, BaseLinkPredictionTrainer):
+                        model.model.hyper_parameter_space = self._model_hp_spaces[i]
+                    else:
+                        model.hyper_parameter_space = self._model_hp_spaces[i]
+            # initialize trainer if needed
+            if isinstance(model, BaseModel):
+                name = (
+                    self._default_trainer
+                    if isinstance(self._default_trainer, str)
+                    else self._default_trainer[i]
+                )
+                model = TRAINER_DICT[name](
+                    model=model,
+                    num_features=num_features,
+                    loss=loss,
+                    feval=feval,
+                    device=device,
+                    init=False,
+                )
+            # set trainer hp space
+            if self._trainer_hp_space is not None:
+                if isinstance(self._trainer_hp_space[0], list):
+                    current_hp_for_trainer = self._trainer_hp_space[i]
+                else:
+                    current_hp_for_trainer = self._trainer_hp_space
+                model.hyper_parameter_space = current_hp_for_trainer
+            self.graph_model_list[i] = model
+
+        return self
+
+    # pylint: disable=arguments-differ
+    def fit(
+        self,
+        dataset,
+        time_limit=-1,
+        inplace=False,
+        train_split=None,
+        val_split=None,
+        evaluation_method="infer",
+        seed=None,
+    ) -> "AutoLinkPredictor":
+        """
+        Fit current solver on given dataset.
+
+        Parameters
+        ----------
+        dataset: torch_geometric.data.dataset.Dataset
+            The dataset needed to fit on. This dataset must have only one graph.
+
+        time_limit: int
+            The time limit of the whole fit process (in seconds). If set below 0,
+            will ignore time limit. Default ``-1``.
+
+        inplace: bool
+            Whether we process the given dataset in inplace manner. Default ``False``.
+            Set it to True if you want to save memory by modifying the given dataset directly.
+
+        train_split: float or int (Optional)
+            The train ratio (in ``float``) or number (in ``int``) of dataset. If you want to
+            use default train/val/test split in dataset, please set this to ``None``.
+            Default ``None``.
+
+        val_split: float or int (Optional)
+            The validation ratio (in ``float``) or number (in ``int``) of dataset. If you want
+            to use default train/val/test split in dataset, please set this to ``None``.
+            Default ``None``.
+
+        evaluation_method: (list of) str or autogl.module.train.evaluation
+            A (list of) evaluation method for current solver. If ``infer``, will automatically
+            determine. Default ``infer``.
+
+        seed: int (Optional)
+            The random seed. If set to ``None``, will run everything at random.
+            Default ``None``.
+
+        Returns
+        -------
+        self: autogl.solver.AutoNodeClassifier
+            A reference of current solver.
+        """
+        set_seed(seed)
+
+        if time_limit < 0:
+            time_limit = 3600 * 24
+        time_begin = time.time()
+
+        # initialize leaderboard
+        if evaluation_method == "infer":
+            if hasattr(dataset, "metric"):
+                evaluation_method = [dataset.metric]
+            else:
+                num_of_label = dataset.num_classes
+                if num_of_label == 2:
+                    evaluation_method = ["auc"]
+                else:
+                    evaluation_method = ["acc"]
+        assert isinstance(evaluation_method, list)
+        evaluator_list = get_feval(evaluation_method)
+
+        self.leaderboard = Leaderboard(
+            [e.get_eval_name() for e in evaluator_list],
+            {e.get_eval_name(): e.is_higher_better() for e in evaluator_list},
+        )
+
+        # set up the dataset
+        if train_split is not None and val_split is not None:
+            utils.split_edges(dataset, train_split, val_split)
+        else:
+            assert all([hasattr(dataset.data, f'{name}') for name in [
+                'train_pos_edge_index', 
+                'train_neg_adj_mask', 
+                'val_pos_edge_index',
+                'val_neg_edge_index', 
+                'test_pos_edge_index', 
+                'test_neg_edge_index'
+            ]]), (
+                "The dataset has no default train/val split! Please manually pass "
+                "train and val ratio."
+            )
+            LOGGER.info("Use the default train/val/test ratio in given dataset")
+
+        # feature engineering
+        if self.feature_module is not None:
+            dataset = self.feature_module.fit_transform(dataset, inplace=inplace)
+
+        self.dataset = dataset
+        assert self.dataset[0].x is not None, (
+            "Does not support fit on non node-feature dataset!"
+            " Please add node features to dataset or specify feature engineers that generate"
+            " node features."
+        )
+
+        # initialize graph networks
+        self._init_graph_module(
+            self.gml,
+            num_features=self.dataset[0].x.shape[1],
+            feval=evaluator_list,
+            device=self.runtime_device,
+            loss="binary_cross_entropy_with_logits" if not hasattr(dataset, "loss") else dataset.loss,
+        )
+
+        # train the models and tune hpo
+        result_valid = []
+        names = []
+        for idx, model in enumerate(self.graph_model_list):
+            time_for_each_model = (time_limit - time.time() + time_begin) / (
+                len(self.graph_model_list) - idx
+            )
+            if self.hpo_module is None:
+                model.initialize()
+                model.train(self.dataset, True)
+                optimized = model
+            else:
+                optimized, _ = self.hpo_module.optimize(
+                    trainer=model, dataset=self.dataset, time_limit=time_for_each_model
+                )
+            # to save memory, all the trainer derived will be mapped to cpu
+            optimized.to(torch.device("cpu"))
+            name = optimized.get_name_with_hp() + "_idx%d" % (idx)
+            names.append(name)
+            performance_on_valid, _ = optimized.get_valid_score(return_major=False)
+            result_valid.append(optimized.get_valid_predict_proba().cpu().numpy())
+            self.leaderboard.insert_model_performance(
+                name,
+                dict(
+                    zip(
+                        [e.get_eval_name() for e in evaluator_list],
+                        performance_on_valid,
+                    )
+                ),
+            )
+            self.trained_models[name] = optimized
+
+        # fit the ensemble model
+        if self.ensemble_module is not None:
+            pos_edge_index, neg_edge_index = self.dataset[0].val_pos_edge_index, self.dataset[0].val_neg_edge_index
+            E = pos_edge_index.size(1) + neg_edge_index.size(1)
+            link_labels = torch.zeros(E, dtype=torch.float)
+            link_labels[:pos_edge_index.size(1)] = 1.
+
+            performance = self.ensemble_module.fit(
+                result_valid,
+                link_labels.detach().cpu().numpy(),
+                names,
+                evaluator_list,
+                n_classes=dataset.num_classes,
+            )
+            self.leaderboard.insert_model_performance(
+                "ensemble",
+                dict(zip([e.get_eval_name() for e in evaluator_list], performance)),
+            )
+
+        return self
+
+    def fit_predict(
+        self,
+        dataset,
+        time_limit=-1,
+        inplace=False,
+        train_split=None,
+        val_split=None,
+        evaluation_method="infer",
+        use_ensemble=True,
+        use_best=True,
+        name=None,
+    ) -> np.ndarray:
+        """
+        Fit current solver on given dataset and return the predicted value.
+
+        Parameters
+        ----------
+        dataset: torch_geometric.data.dataset.Dataset
+            The dataset needed to fit on. This dataset must have only one graph.
+
+        time_limit: int
+            The time limit of the whole fit process (in seconds).
+            If set below 0, will ignore time limit. Default ``-1``.
+
+        inplace: bool
+            Whether we process the given dataset in inplace manner. Default ``False``.
+            Set it to True if you want to save memory by modifying the given dataset directly.
+
+        train_split: float or int (Optional)
+            The train ratio (in ``float``) or number (in ``int``) of dataset. If you want to
+            use default train/val/test split in dataset, please set this to ``None``.
+            Default ``None``.
+
+        val_split: float or int (Optional)
+            The validation ratio (in ``float``) or number (in ``int``) of dataset. If you want
+            to use default train/val/test split in dataset, please set this to ``None``.
+            Default ``None``.
+
+        balanced: bool
+            Wether to create the train/valid/test split in a balanced way.
+            If set to ``True``, the train/valid will have the same number of different classes.
+            Default ``False``.
+
+        evaluation_method: (list of) str or autogl.module.train.evaluation
+            A (list of) evaluation method for current solver. If ``infer``, will automatically
+            determine. Default ``infer``.
+
+        use_ensemble: bool
+            Whether to use ensemble to do the predict. Default ``True``.
+
+        use_best: bool
+            Whether to use the best single model to do the predict. Will only be effective when
+            ``use_ensemble`` is ``False``.
+            Default ``True``.
+
+        name: str or None
+            The name of model used to predict. Will only be effective when ``use_ensemble`` and
+            ``use_best`` both are ``False``.
+            Default ``None``.
+
+        Returns
+        -------
+        result: np.ndarray
+            An array of shape ``(N,)``, where ``N`` is the number of test nodes. The prediction
+            on given dataset.
+        """
+        self.fit(
+            dataset=dataset,
+            time_limit=time_limit,
+            inplace=inplace,
+            train_split=train_split,
+            val_split=val_split,
+            evaluation_method=evaluation_method,
+        )
+        return self.predict(
+            dataset=dataset,
+            inplaced=inplace,
+            inplace=inplace,
+            use_ensemble=use_ensemble,
+            use_best=use_best,
+            name=name,
+        )
+
+    def predict_proba(
+        self,
+        dataset=None,
+        inplaced=False,
+        inplace=False,
+        use_ensemble=True,
+        use_best=True,
+        name=None,
+        mask="test",
+    ) -> np.ndarray:
+        """
+        Predict the node probability.
+
+        Parameters
+        ----------
+        dataset: torch_geometric.data.dataset.Dataset or None
+            The dataset needed to predict. If ``None``, will use the processed dataset passed
+            to ``fit()`` instead. Default ``None``.
+
+        inplaced: bool
+            Whether the given dataset is processed. Only be effective when ``dataset``
+            is not ``None``. If you pass the dataset to ``fit()`` with ``inplace=True``, and
+            you pass the dataset again to this method, you should set this argument to ``True``.
+            Otherwise ``False``. Default ``False``.
+
+        inplace: bool
+            Whether we process the given dataset in inplace manner. Default ``False``. Set it to
+            True if you want to save memory by modifying the given dataset directly.
+
+        use_ensemble: bool
+            Whether to use ensemble to do the predict. Default ``True``.
+
+        use_best: bool
+            Whether to use the best single model to do the predict. Will only be effective when
+            ``use_ensemble`` is ``False``. Default ``True``.
+
+        name: str or None
+            The name of model used to predict. Will only be effective when ``use_ensemble`` and
+            ``use_best`` both are ``False``. Default ``None``.
+
+        mask: str
+            The data split to give prediction on. Default ``test``.
+
+        Returns
+        -------
+        result: np.ndarray
+            An array of shape ``(N,C,)``, where ``N`` is the number of test nodes and ``C`` is
+            the number of classes. The prediction on given dataset.
+        """
+        if dataset is None:
+            dataset = self.dataset
+            assert dataset is not None, (
+                "Please execute fit() first before" " predicting on remembered dataset"
+            )
+        elif not inplaced and self.feature_module is not None:
+            dataset = self.feature_module.transform(dataset, inplace=inplace)
+
+        if use_ensemble:
+            LOGGER.info("Ensemble argument on, will try using ensemble model.")
+
+        if not use_ensemble and use_best:
+            LOGGER.info(
+                "Ensemble argument off and best argument on, will try using best model."
+            )
+
+        if (use_ensemble and self.ensemble_module is not None) or (
+            not use_best and name == "ensemble"
+        ):
+            # we need to get all the prediction of every model trained
+            predict_result = []
+            names = []
+            for model_name in self.trained_models:
+                predict_result.append(
+                    self._predict_proba_by_name(dataset, model_name, mask)
+                )
+                names.append(model_name)
+            return self.ensemble_module.ensemble(predict_result, names)
+
+        if use_ensemble and self.ensemble_module is None:
+            LOGGER.warning(
+                "Cannot use ensemble because no ensebmle module is given. "
+                "Will use best model instead."
+            )
+
+        if use_best or (use_ensemble and self.ensemble_module is None):
+            # just return the best model we have found
+            name = self.leaderboard.get_best_model()
+            return self._predict_proba_by_name(dataset, name, mask)
+
+        if name is not None:
+            # return model performance by name
+            return self._predict_proba_by_name(dataset, name, mask)
+
+        LOGGER.error(
+            "No model name is given while ensemble and best arguments are off."
+        )
+        raise ValueError(
+            "You need to specify a model name if you do not want use ensemble and best model."
+        )
+
+    def _predict_proba_by_name(self, dataset, name, mask="test"):
+        self.trained_models[name].to(self.runtime_device)
+        predicted = (
+            self.trained_models[name].predict_proba(dataset, mask=mask).cpu().numpy()
+        )
+        self.trained_models[name].to(torch.device("cpu"))
+        return predicted
+
+    def predict(
+        self,
+        dataset=None,
+        inplaced=False,
+        inplace=False,
+        use_ensemble=True,
+        use_best=True,
+        name=None,
+        mask="test",
+    ) -> np.ndarray:
+        """
+        Predict the node class number.
+
+        Parameters
+        ----------
+        dataset: torch_geometric.data.dataset.Dataset or None
+            The dataset needed to predict. If ``None``, will use the processed dataset passed
+            to ``fit()`` instead. Default ``None``.
+
+        inplaced: bool
+            Whether the given dataset is processed. Only be effective when ``dataset``
+            is not ``None``. If you pass the dataset to ``fit()`` with ``inplace=True``,
+            and you pass the dataset again to this method, you should set this argument
+            to ``True``. Otherwise ``False``. Default ``False``.
+
+        inplace: bool
+            Whether we process the given dataset in inplace manner. Default ``False``.
+            Set it to True if you want to save memory by modifying the given dataset directly.
+
+        use_ensemble: bool
+            Whether to use ensemble to do the predict. Default ``True``.
+
+        use_best: bool
+            Whether to use the best single model to do the predict. Will only be effective
+            when ``use_ensemble`` is ``False``. Default ``True``.
+
+        name: str or None
+            The name of model used to predict. Will only be effective when ``use_ensemble``
+            and ``use_best`` both are ``False``. Default ``None``.
+
+        mask: str
+            The data split to give prediction on. Default ``test``.
+
+        Returns
+        -------
+        result: np.ndarray
+            An array of shape ``(N,)``, where ``N`` is the number of test nodes.
+            The prediction on given dataset.
+        """
+        proba = self.predict_proba(
+            dataset, inplaced, inplace, use_ensemble, use_best, name, mask
+        )
+        return np.argmax(proba, axis=1)
+
+    @classmethod
+    def from_config(cls, path_or_dict, filetype="auto") -> "AutoLinkPredictor":
+        """
+        Load solver from config file.
+
+        You can use this function to directly load a solver from predefined config dict
+        or config file path. Currently, only support file type of ``json`` or ``yaml``,
+        if you pass a path.
+
+        Parameters
+        ----------
+        path_or_dict: str or dict
+            The path to the config file or the config dictionary object
+
+        filetype: str
+            The filetype the given file if the path is specified. Currently only support
+            ``json`` or ``yaml``. You can set to ``auto`` to automatically detect the file
+            type (from file name). Default ``auto``.
+
+        Returns
+        -------
+        solver: autogl.solver.AutoGraphClassifier
+            The solver that is created from given file or dictionary.
+        """
+        assert filetype in ["auto", "yaml", "json"], (
+            "currently only support yaml file or json file type, but get type "
+            + filetype
+        )
+        if isinstance(path_or_dict, str):
+            if filetype == "auto":
+                if path_or_dict.endswith(".yaml") or path_or_dict.endswith(".yml"):
+                    filetype = "yaml"
+                elif path_or_dict.endswith(".json"):
+                    filetype = "json"
+                else:
+                    LOGGER.error(
+                        "cannot parse the type of the given file name, "
+                        "please manually set the file type"
+                    )
+                    raise ValueError(
+                        "cannot parse the type of the given file name, "
+                        "please manually set the file type"
+                    )
+            if filetype == "yaml":
+                path_or_dict = yaml.load(
+                    open(path_or_dict, "r").read(), Loader=yaml.FullLoader
+                )
+            else:
+                path_or_dict = json.load(open(path_or_dict, "r"))
+
+        path_or_dict = deepcopy(path_or_dict)
+        solver = cls(None, [], None, None)
+        fe_list = path_or_dict.pop("feature", None)
+        if fe_list is not None:
+            fe_list_ele = []
+            for feature_engineer in fe_list:
+                name = feature_engineer.pop("name")
+                if name is not None:
+                    fe_list_ele.append(FEATURE_DICT[name](**feature_engineer))
+            if fe_list_ele != []:
+                solver.set_feature_module(fe_list_ele)
+
+        models = path_or_dict.pop("models", [{"name": "gcn"}, {"name": "gat"}])
+        model_hp_space = [
+            _parse_hp_space(model.pop("hp_space", None)) for model in models
+        ]
+        model_list = [
+            _initialize_single_model(model.pop("name"), model) for model in models
+        ]
+
+        trainer = path_or_dict.pop("trainer", None)
+        default_trainer = "LinkPredictionFull"
+        trainer_space = None
+        if isinstance(trainer, dict):
+            # global default
+            default_trainer = trainer.pop("name", "LinkPredictionFull")
+            trainer_space = _parse_hp_space(trainer.pop("hp_space", None))
+            default_kwargs = {"num_features": None}
+            default_kwargs.update(trainer)
+            default_kwargs["init"] = False
+            for i in range(len(model_list)):
+                model = model_list[i]
+                trainer_wrap = TRAINER_DICT[default_trainer](
+                    model=model, **default_kwargs
+                )
+                model_list[i] = trainer_wrap
+        elif isinstance(trainer, list):
+            # sequential trainer definition
+            assert len(trainer) == len(
+                model_list
+            ), "The number of trainer and model does not match"
+            trainer_space = []
+            for i in range(len(model_list)):
+                train, model = trainer[i], model_list[i]
+                default_trainer = train.pop("name", "LinkPredictionFull")
+                trainer_space.append(_parse_hp_space(train.pop("hp_space", None)))
+                default_kwargs = {"num_features": None}
+                default_kwargs.update(train)
+                default_kwargs["init"] = False
+                trainer_wrap = TRAINER_DICT[default_trainer](
+                    model=model, **default_kwargs
+                )
+                model_list[i] = trainer_wrap
+
+        solver.set_graph_models(
+            model_list, default_trainer, trainer_space, model_hp_space
+        )
+
+        hpo_dict = path_or_dict.pop("hpo", {"name": "anneal"})
+        if hpo_dict is not None:
+            name = hpo_dict.pop("name")
+            solver.set_hpo_module(name, **hpo_dict)
+
+        ensemble_dict = path_or_dict.pop("ensemble", {"name": "voting"})
+        if ensemble_dict is not None:
+            name = ensemble_dict.pop("name")
+            solver.set_ensemble_module(name, **ensemble_dict)
+
+        return solver
diff --git a/configs/lp_gcn_benchmark_small.yml b/configs/lp_gcn_benchmark_small.yml
new file mode 100644
index 0000000..8fb9d8a
--- /dev/null
+++ b/configs/lp_gcn_benchmark_small.yml
@@ -0,0 +1,61 @@
+ensemble:
+  name: null
+feature:
+- name: PYGNormalizeFeatures
+hpo:
+  max_evals: 10
+  name: random
+models:
+- hp_space:
+  - feasiblePoints: 2,3
+    parameterName: num_layers
+    type: DISCRETE
+  - cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 2
+    maxValue:
+    - 256
+    - 256
+    minValue:
+    - 64
+    - 64
+    numericalType: INTEGER
+    parameterName: hidden
+    scalingType: LOG
+    type: NUMERICAL_LIST
+  - maxValue: 0.2
+    minValue: 0.0
+    parameterName: dropout
+    scalingType: LINEAR
+    type: DOUBLE
+  - feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    parameterName: act
+    type: CATEGORICAL
+  name: gcn
+trainer:
+  hp_space:
+  - maxValue: 150
+    minValue: 50
+    parameterName: max_epoch
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 40
+    minValue: 25
+    parameterName: early_stopping_round
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 0.05
+    minValue: 0.005
+    parameterName: lr
+    scalingType: LOG
+    type: DOUBLE
+  - maxValue: 1.0E-7
+    minValue: 1.0E-10
+    parameterName: weight_decay
+    scalingType: LOG
+    type: DOUBLE
diff --git a/examples/link_prediction.py b/examples/link_prediction.py
index a9c9825..331ec31 100644
--- a/examples/link_prediction.py
+++ b/examples/link_prediction.py
@@ -6,6 +6,7 @@ from autogl.datasets import build_dataset_from_name
 from autogl.module.train import LinkPredictionTrainer
 import numpy as np
 from torch_geometric.utils import train_test_split_edges
+from sklearn.metrics import roc_auc_score
 
 dataset = build_dataset_from_name('cora')
 
diff --git a/examples/link_prediction_solver.py b/examples/link_prediction_solver.py
new file mode 100644
index 0000000..a2051d7
--- /dev/null
+++ b/examples/link_prediction_solver.py
@@ -0,0 +1,90 @@
+import sys
+
+sys.path.append("../")
+from autogl.datasets import build_dataset_from_name
+from autogl.solver.classifier.link_predictor import AutoLinkPredictor
+from autogl.module.train.evaluation import Auc
+import yaml
+import random
+import torch
+import numpy as np
+
+if __name__ == "__main__":
+
+    from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
+
+    parser = ArgumentParser(
+        "auto link prediction", formatter_class=ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "--dataset",
+        default="cora",
+        type=str,
+        help="dataset to use",
+        choices=[
+            "cora",
+            "pubmed",
+            "citeseer",
+            "coauthor_cs",
+            "coauthor_physics",
+            "amazon_computers",
+            "amazon_photo",
+        ],
+    )
+    parser.add_argument(
+        "--configs",
+        type=str,
+        default="../configs/lp_gcn_benchmark_small.yml",
+        help="config to use",
+    )
+    # following arguments will override parameters in the config file
+    parser.add_argument("--hpo", type=str, default="tpe", help="hpo methods")
+    parser.add_argument(
+        "--max_eval", type=int, default=50, help="max hpo evaluation times"
+    )
+    parser.add_argument("--seed", type=int, default=0, help="random seed")
+    parser.add_argument("--device", default=0, type=int, help="GPU device")
+
+    args = parser.parse_args()
+    if torch.cuda.is_available():
+        torch.cuda.set_device(args.device)
+    seed = args.seed
+    # set random seed
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+
+    dataset = build_dataset_from_name(args.dataset)
+
+    configs = yaml.load(open(args.configs, "r").read(), Loader=yaml.FullLoader)
+    configs["hpo"]["name"] = args.hpo
+    configs["hpo"]["max_evals"] = args.max_eval
+    autoClassifier = AutoLinkPredictor.from_config(configs)
+
+    # train
+    autoClassifier.fit(
+        dataset,
+        time_limit=3600,
+        evaluation_method=[Auc],
+        seed=seed,
+        train_split=0.85,
+        val_split=0.05,
+    )
+    autoClassifier.get_leaderboard().show()
+
+    # test
+    predict_result = autoClassifier.predict_proba()
+
+    pos_edge_index, neg_edge_index = dataset[0].test_pos_edge_index, dataset[0].test_neg_edge_index
+    E = pos_edge_index.size(1) + neg_edge_index.size(1)
+    link_labels = torch.zeros(E)
+    link_labels[:pos_edge_index.size(1)] = 1.
+
+    print(
+        "test auc: %.4f"
+        % (Auc.evaluate(predict_result, link_labels.detach().cpu().numpy()))
+    )

From 5c88a3e57f91b90374317581e1f1a0409fae69a0 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sat, 29 May 2021 15:17:15 +0800
Subject: [PATCH 072/144] add comparison code

---
 benchmark/lp_pyg.py       | 126 ++++++++++++++++++++++++++++++++++++++
 benchmark/lp_reproduce.py |  82 +++++++++++++++++++++++++
 2 files changed, 208 insertions(+)
 create mode 100644 benchmark/lp_pyg.py
 create mode 100644 benchmark/lp_reproduce.py

diff --git a/benchmark/lp_pyg.py b/benchmark/lp_pyg.py
new file mode 100644
index 0000000..32d8db4
--- /dev/null
+++ b/benchmark/lp_pyg.py
@@ -0,0 +1,126 @@
+import sys
+sys.path.append('../')
+import os.path as osp
+
+import torch
+import torch.nn.functional as F
+from sklearn.metrics import roc_auc_score
+
+from torch_geometric.utils import negative_sampling
+from torch_geometric.nn import GCNConv, GATConv, SAGEConv
+from torch_geometric.utils import train_test_split_edges
+
+import argparse
+import pickle
+import numpy as np
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--model', choices=['gcn', 'sage', 'gat'], type=str, default='gcn', help='model to train')
+parser.add_argument('--dataset', choices=['cora', 'citseer', 'pubmed'], type=str, default='cora', help='dataset to evaluate')
+parser.add_argument('--times', type=int, default=10, help='time to rerun')
+
+args = parser.parse_args()
+
+DIM = 64
+dataset = pickle.load(open(f'/DATA/DATANAS1/guancy/github/AutoGL/env/cache/{args.dataset}-edge.data', 'rb'))
+data = dataset[0]
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+data = data.to(device)
+
+def _decode(z, pos_edge_index, neg_edge_index):
+    edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
+    return (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
+
+class GNN(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def encode(self, x, edge_index):
+        return self.conv2(self.conv1(x, edge_index).relu(), edge_index)
+
+class GCN(GNN):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.conv1 = GCNConv(in_channels, 128)
+        self.conv2 = GCNConv(128, DIM)
+
+class GAT(GNN):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.conv1 = GATConv(in_channels, 16, 8)
+        self.conv2 = GATConv(128, DIM // 8, 8)
+
+class SAGE(GNN):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.conv1 = SAGEConv(in_channels, 128)
+        self.conv2 = SAGEConv(128, DIM)
+
+MODEL = {
+    'gcn': GCN,
+    'gat': GAT,
+    'sage': SAGE
+}
+
+scores = []
+
+for t in range(args.times):
+
+    model = MODEL[args.model](dataset.num_features).to(device)
+    optimizer = torch.optim.Adam(params=model.parameters(), lr=0.01)
+
+    def get_link_labels(pos_edge_index, neg_edge_index):
+        num_links = pos_edge_index.size(1) + neg_edge_index.size(1)
+        link_labels = torch.zeros(num_links, dtype=torch.float, device=device)
+        link_labels[:pos_edge_index.size(1)] = 1.
+        return link_labels
+
+    def train(data):
+        model.train()
+
+        neg_edge_index = negative_sampling(
+            edge_index=data.train_pos_edge_index, num_nodes=data.num_nodes,
+            num_neg_samples=data.train_pos_edge_index.size(1))
+
+        optimizer.zero_grad()
+        z = model.encode(data.x, data.train_pos_edge_index)
+        link_logits = _decode(z, data.train_pos_edge_index, neg_edge_index)
+        link_labels = get_link_labels(data.train_pos_edge_index, neg_edge_index)
+        loss = F.binary_cross_entropy_with_logits(link_logits, link_labels)
+        loss.backward()
+        optimizer.step()
+
+        return loss
+
+
+    @torch.no_grad()
+    def test(data):
+        model.eval()
+
+        z = model.encode(data.x, data.train_pos_edge_index)
+
+        results = []
+        for prefix in ['val', 'test']:
+            pos_edge_index = data[f'{prefix}_pos_edge_index']
+            neg_edge_index = data[f'{prefix}_neg_edge_index']
+            link_logits = _decode(z, pos_edge_index, neg_edge_index)
+            link_probs = link_logits.sigmoid()
+            link_labels = get_link_labels(pos_edge_index, neg_edge_index)
+            results.append(roc_auc_score(link_labels.cpu(), link_probs.cpu()))
+        return results
+
+
+    best_val_auc = test_auc = 0
+    for epoch in range(1, 101):
+        loss = train(data)
+        val_auc, tmp_test_auc = test(data)
+        if val_auc > best_val_auc:
+            best_val = val_auc
+            test_auc = tmp_test_auc
+        # print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Val: {val_auc:.4f}, '
+        #     f'Test: {test_auc:.4f}')
+
+    scores.append(test_auc)
+    print('time', t, test_auc)
+print('mean', np.mean(scores), 'std', np.std(scores))
+open('lp_pyg.log', 'a').write('\t'.join([args.dataset, args.model, str(np.mean(scores)), str(np.std(scores)), '\n']))
diff --git a/benchmark/lp_reproduce.py b/benchmark/lp_reproduce.py
new file mode 100644
index 0000000..1409f17
--- /dev/null
+++ b/benchmark/lp_reproduce.py
@@ -0,0 +1,82 @@
+"""
+Used to reproduce the statistics from pyg
+"""
+
+import sys
+sys.path.append('../')
+import pickle
+import torch
+import argparse
+import numpy as np
+from sklearn.metrics import roc_auc_score
+
+from autogl.module.train import LinkPredictionTrainer
+# Fix data split
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--model', choices=['gcn', 'sage', 'gat'], type=str, default='gcn', help='model to train')
+parser.add_argument('--dataset', choices=['cora', 'citseer', 'pubmed'], type=str, default='cora', help='dataset to evaluate')
+parser.add_argument('--times', type=int, default=10, help='time to rerun')
+
+args = parser.parse_args()
+
+DIM = 64
+dataset = pickle.load(open(f'/DATA/DATANAS1/guancy/github/AutoGL/env/cache/{args.dataset}-edge.data', 'rb'))
+data = dataset[0]
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+data = data.to(device)
+
+HP = {
+    'gcn' : {
+        'num_layers': 3,
+        'hidden': [128, DIM],
+        'dropout': 0.0,
+        'act': 'relu'
+    },
+    'gat' : {
+        'num_layers': 3,
+        'hidden': [16, DIM // 8],
+        'dropout': 0.0,
+        'act': 'relu',
+        'heads': 8
+    },
+    'sage': {
+        'num_layers': 3,
+        'hidden': [128, DIM],
+        'dropout': 0.0,
+        'act': 'relu',
+        'aggr': 'mean'
+    }
+}
+
+scores = []
+
+for t in range(args.times):
+
+    trainer = LinkPredictionTrainer(
+        args.model,
+        num_features=dataset.num_features,
+        lr=0.01,
+        max_epoch=100,
+        early_stopping_round=101,
+        weight_decay=0,
+        device='cuda',
+        init=False,
+        feval='auc',
+        loss="binary_cross_entropy_with_logits",
+    )
+
+    trainer = trainer.duplicate_from_hyper_parameter(HP[args.model], restricted=False)
+    trainer.train([data], keep_valid_result=True)
+    y = trainer.predict([data], 'test')
+    y_ = y.cpu().numpy()
+    
+    pos_edge_index = data[f'test_pos_edge_index']
+    neg_edge_index = data[f'test_neg_edge_index']
+    link_labels = trainer.get_link_labels(pos_edge_index, neg_edge_index)
+    label = link_labels.cpu().numpy()
+    test_auc = roc_auc_score(label, y_)
+    scores.append(test_auc)
+    print('time', t, test_auc)
+print('mean', np.mean(scores), 'std', np.std(scores))
+open('lp_reproduce.log', 'a').write('\t'.join([args.dataset, args.model, str(np.mean(scores)), str(np.std(scores)), '\n']))

From 194cd502c06f5e0b9415c34ffba4d95640e16a7e Mon Sep 17 00:00:00 2001
From: null <null>
Date: Sun, 30 May 2021 01:30:00 +0800
Subject: [PATCH 073/144] Implement layer-wise prediction for LADIES, improve
 the LeaderBoard

Speed up the prediction procedure by caching the immutable edges conducted by deterministic neighbors for specific batch of target nodes in the layer-wise prediction procedure.
---
 autogl/module/model/gcn.py                    | 269 ++++++-----
 autogl/module/train/base.py                   |   8 +-
 .../module/train/graph_classification_full.py |  36 +-
 .../module/train/node_classification_full.py  |  36 +-
 .../node_classification_sampled_trainer.py    | 418 ++++++++++++------
 .../sampling/sampler/neighbor_sampler.py      |  42 +-
 autogl/solver/__init__.py                     |   4 +-
 autogl/solver/base.py                         |   4 +-
 autogl/solver/classifier/graph_classifier.py  |  19 +-
 autogl/solver/classifier/node_classifier.py   |  14 +-
 autogl/solver/utils.py                        | 173 +++++---
 11 files changed, 648 insertions(+), 375 deletions(-)

diff --git a/autogl/module/model/gcn.py b/autogl/module/model/gcn.py
index 72ca48a..c2e0910 100644
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -1,141 +1,188 @@
 import torch
 import torch.nn.functional
-import torch_geometric
+from torch_geometric.nn.conv import GCNConv
 import typing as _typing
+import autogl.data
 from . import register_model
-from .base import activate_func, ClassificationModel
+from .base import activate_func, ClassificationModel, SequentialGraphNeuralNetwork
 from ...utils import get_logger
 
 LOGGER = get_logger("GCNModel")
 
 
-class GCN(torch.nn.Module):
+class GCN(SequentialGraphNeuralNetwork):
+    class _GCNLayer(torch.nn.Module):
+        def __init__(
+                self, input_channels: int, output_channels: int,
+                add_self_loops: bool = True, normalize: bool = True,
+                activation_name: _typing.Optional[str] = ...,
+                dropout_probability: _typing.Optional[float] = ...
+        ):
+            super().__init__()
+            self._convolution: GCNConv = GCNConv(
+                input_channels, output_channels,
+                add_self_loops=bool(add_self_loops),
+                normalize=bool(normalize)
+            )
+            if (
+                    activation_name is not Ellipsis and
+                    activation_name is not None and
+                    type(activation_name) == str
+            ):
+                self._activation_name: _typing.Optional[str] = activation_name
+            else:
+                self._activation_name: _typing.Optional[str] = None
+            if (
+                    dropout_probability is not Ellipsis and
+                    dropout_probability is not None and
+                    type(dropout_probability) == float
+            ):
+                if dropout_probability < 0:
+                    dropout_probability = 0
+                if dropout_probability > 1:
+                    dropout_probability = 1
+                self._dropout: _typing.Optional[torch.nn.Dropout] = (
+                    torch.nn.Dropout(dropout_probability)
+                )
+            else:
+                self._dropout: _typing.Optional[torch.nn.Dropout] = None
+
+        def forward(self, data) -> torch.Tensor:
+            x: torch.Tensor = getattr(data, "x")
+            edge_index: torch.LongTensor = getattr(data, "edge_index")
+            edge_weight: _typing.Optional[torch.Tensor] = getattr(data, "edge_weight")
+            """ Validate the arguments """
+            if not type(x) == type(edge_index) == torch.Tensor:
+                raise TypeError
+            if edge_weight is not None and (
+                    type(edge_weight) != torch.Tensor or
+                    edge_index.size() != (2, edge_weight.size(0))
+            ):
+                edge_weight: _typing.Optional[torch.Tensor] = None
+
+            x: torch.Tensor = self._convolution.forward(x, edge_index, edge_weight)
+            if self._activation_name is not None:
+                x: torch.Tensor = activate_func(x, self._activation_name)
+            if self._dropout is not None:
+                x: torch.Tensor = self._dropout.forward(x)
+            return x
+
     def __init__(
-        self,
-        num_features: int,
-        num_classes: int,
-        hidden_features: _typing.Sequence[int],
-        dropout: float,
-        activation_name: str,
-        add_self_loops: bool = True,
-        normalize: bool = True
+            self,
+            num_features: int,
+            num_classes: int,
+            hidden_features: _typing.Sequence[int],
+            dropout: float,
+            activation_name: str,
+            add_self_loops: bool = True,
+            normalize: bool = True
     ):
         super().__init__()
-        self.__convolution_layers: torch.nn.ModuleList = torch.nn.ModuleList()
-        num_layers: int = len(hidden_features) + 1
-        if num_layers == 1:
-            self.__convolution_layers.append(
-                torch_geometric.nn.GCNConv(
-                    num_features, num_classes,
-                    add_self_loops=add_self_loops,
-                    normalize=normalize
-                )
+        if len(hidden_features) == 0:
+            self.__sequential_module_list: torch.nn.ModuleList = torch.nn.ModuleList(
+                (self._GCNLayer(num_features, num_classes, add_self_loops, normalize),)
             )
         else:
-            self.__convolution_layers.append(
-                torch_geometric.nn.GCNConv(
-                    num_features, hidden_features[0],
-                    add_self_loops=add_self_loops,
-                    normalize=normalize
-                )
-            )
-            for i in range(len(hidden_features)):
-                self.__convolution_layers.append(
-                    torch_geometric.nn.GCNConv(
-                        hidden_features[i], hidden_features[i + 1]
-                    )
-                    if i + 1 < len(hidden_features)
-                    else torch_geometric.nn.GCNConv(hidden_features[i], num_classes)
-                )
-        self.__dropout: float = dropout
-        self.__activation_name: str = activation_name
-
-    def __layer_wise_forward(
-            self, x: torch.Tensor,
-            edge_indexes: _typing.Sequence[torch.Tensor],
-            edge_weights: _typing.Sequence[_typing.Optional[torch.Tensor]]
-    ) -> torch.Tensor:
-        assert len(edge_indexes) == len(edge_weights) == len(self.__convolution_layers)
-        for edge_index in edge_indexes:
-            if type(edge_index) != torch.Tensor:
-                raise TypeError
-            if edge_index.size(0) != 2:
-                raise ValueError
-        for edge_weight in edge_weights:
-            if not (edge_weight is None or type(edge_weight) == torch.Tensor):
-                raise TypeError
+            self.__sequential_module_list: torch.nn.ModuleList = torch.nn.ModuleList()
+            self.__sequential_module_list.append(self._GCNLayer(
+                num_features, hidden_features[0], add_self_loops,
+                normalize, activation_name, dropout
+            ))
+            for hidden_feature_index in range(len(hidden_features)):
+                if hidden_feature_index + 1 < len(hidden_features):
+                    self.__sequential_module_list.append(self._GCNLayer(
+                        hidden_features[hidden_feature_index],
+                        hidden_features[hidden_feature_index + 1],
+                        add_self_loops, normalize, activation_name, dropout
+                    ))
+                else:
+                    self.__sequential_module_list.append(self._GCNLayer(
+                        hidden_features[hidden_feature_index], num_classes,
+                        add_self_loops, normalize
+                    ))
 
-        for layer_index in range(len(self.__convolution_layers)):
-            x: torch.Tensor = self.__convolution_layers[layer_index](
-                x, edge_indexes[layer_index], edge_weights[layer_index]
-            )
-            if layer_index + 1 < len(self.__convolution_layers):
-                x = activate_func(x, self.__activation_name)
-                x = torch.nn.functional.dropout(
-                    x, p=self.__dropout, training=self.training
-                )
+    def decode(self, x: torch.Tensor) -> torch.Tensor:
         return torch.nn.functional.log_softmax(x, dim=1)
 
-    def __basic_forward(
-        self,
-        x: torch.Tensor,
-        edge_index: torch.Tensor,
-        edge_weight: _typing.Optional[torch.Tensor] = None,
-    ) -> torch.Tensor:
-        for layer_index in range(len(self.__convolution_layers)):
-            x: torch.Tensor = self.__convolution_layers[layer_index](
-                x, edge_index, edge_weight
-            )
-            if layer_index + 1 < len(self.__convolution_layers):
-                x = activate_func(x, self.__activation_name)
-                x = torch.nn.functional.dropout(
-                    x, p=self.__dropout, training=self.training
-                )
-        return torch.nn.functional.log_softmax(x, dim=1)
+    @property
+    def encoder_sequential_modules(self) -> torch.nn.ModuleList:
+        return self.__sequential_module_list
 
-    def forward(self, data) -> torch.Tensor:
-        if (
+    def __extract_edge_indexes_and_weights(self, data) -> _typing.Union[
+        _typing.Sequence[_typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]],
+        _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]
+    ]:
+        def __compose_edge_index_and_weight(
+                _edge_index: torch.LongTensor,
+                _edge_weight: _typing.Optional[torch.Tensor] = None
+        ) -> _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]:
+            if type(_edge_index) != torch.Tensor or _edge_index.dtype != torch.int64:
+                raise TypeError
+            if _edge_weight is not None and (
+                    type(_edge_weight) != torch.Tensor or
+                    _edge_index.size() != (2, _edge_weight.size(0))
+            ):
+                _edge_weight: _typing.Optional[torch.Tensor] = None
+            return _edge_index, _edge_weight
+
+        if not (
                 hasattr(data, "edge_indexes") and
                 isinstance(getattr(data, "edge_indexes"), _typing.Sequence) and
-                len(getattr(data, "edge_indexes")) == len(self.__convolution_layers)
+                len(getattr(data, "edge_indexes")) == len(self.__sequential_module_list)
         ):
-            edge_indexes: _typing.Sequence[torch.Tensor] = getattr(data, "edge_indexes")
-            if (
+            return __compose_edge_index_and_weight(
+                getattr(data, "edge_index"), getattr(data, "edge_weight", None)
+            )
+        for __edge_index in getattr(data, "edge_indexes"):
+            if type(__edge_index) != torch.Tensor or __edge_index.dtype != torch.int64:
+                return __compose_edge_index_and_weight(
+                    getattr(data, "edge_index"), getattr(data, "edge_weight", None)
+                )
+
+        if (
                 hasattr(data, "edge_weights") and
                 isinstance(getattr(data, "edge_weights"), _typing.Sequence) and
                 len(getattr(data, "edge_weights")) == len(self.__convolution_layers)
-            ):
-                edge_weights: _typing.Sequence[_typing.Optional[torch.Tensor]] = (
-                    getattr(data, "edge_weights")
-                )
-            else:
-                edge_weights: _typing.Sequence[_typing.Optional[torch.Tensor]] = [
-                    None for _ in range(len(self.__convolution_layers))
-                ]
-            return self.__layer_wise_forward(
-                getattr(data, "x"), edge_indexes, edge_weights
-            )
+        ):
+            return [
+                __compose_edge_index_and_weight(_edge_index, _edge_weight)
+                for _edge_index, _edge_weight
+                in zip(getattr(data, "edge_indexes"), getattr(data, "edge_weights"))
+            ]
         else:
-            if not (hasattr(data, "x") and hasattr(data, "edge_index")):
-                raise AttributeError
-            if not (
-                type(getattr(data, "x")) == torch.Tensor
-                and type(getattr(data, "edge_index")) == torch.Tensor
-            ):
-                raise TypeError
+            return [
+                __compose_edge_index_and_weight(__edge_index)
+                for __edge_index in getattr(data, "edge_indexes")
+            ]
+
+    def encode(self, data) -> torch.Tensor:
+        edge_indexes_and_weights: _typing.Union[
+            _typing.Sequence[_typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]],
+            _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]
+        ] = self.__extract_edge_indexes_and_weights(data)
+
+        if (
+                (not isinstance(edge_indexes_and_weights, tuple))
+                and isinstance(edge_indexes_and_weights[0], tuple)
+        ):
+            """ edge_indexes_and_weights is sequence of (edge_index, edge_weight) """
+            assert len(edge_indexes_and_weights) == len(self.__sequential_module_list)
             x: torch.Tensor = getattr(data, "x")
-            edge_index: torch.LongTensor = getattr(data, "edge_index")
-            if (
-                hasattr(data, "edge_weight")
-                and type(getattr(data, "edge_weight")) == torch.Tensor
-                and getattr(data, "edge_weight").size() == (edge_index.size(1),)
+            for _edge_index_and_weight, gcn in zip(
+                    edge_indexes_and_weights, self.__sequential_module_list
             ):
-                edge_weight: _typing.Optional[torch.Tensor] = getattr(
-                    data, "edge_weight"
-                )
-            else:
-                edge_weight: _typing.Optional[torch.Tensor] = None
-            return self.__basic_forward(x, edge_index, edge_weight)
+                _temp_data = autogl.data.Data(x=x, edge_index=_edge_index_and_weight[0])
+                _temp_data.edge_weight = _edge_index_and_weight[1]
+                x = gcn(_temp_data)
+            return x
+        else:
+            """ edge_indexes_and_weights is (edge_index, edge_weight) """
+            x = getattr(data, "x")
+            for gcn in self.__sequential_module_list:
+                _temp_data = autogl.data.Data(x=x, edge_index=edge_indexes_and_weights[0])
+                _temp_data.edge_weight = edge_indexes_and_weights[1]
+                x = gcn(_temp_data)
+            return x
 
 
 @register_model("gcn")
diff --git a/autogl/module/train/base.py b/autogl/module/train/base.py
index af26fa4..25cfa39 100644
--- a/autogl/module/train/base.py
+++ b/autogl/module/train/base.py
@@ -287,9 +287,11 @@ class BaseTrainer:
         """Get the validation score."""
         raise NotImplementedError()
 
-    def get_name_with_hp(self):
-        """Get the name of hyperparameter."""
-        raise NotImplementedError()
+    def __repr__(self) -> str:
+        raise NotImplementedError
+
+    def __str__(self) -> str:
+        return repr(self)
 
     def evaluate(self, dataset, mask=None, feval=None):
         """
diff --git a/autogl/module/train/graph_classification_full.py b/autogl/module/train/graph_classification_full.py
index 1f4bb4a..f9f548c 100644
--- a/autogl/module/train/graph_classification_full.py
+++ b/autogl/module/train/graph_classification_full.py
@@ -397,29 +397,19 @@ class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
         else:
             return self.valid_score, self.feval.is_higher_better()
 
-    def get_name_with_hp(self):
-        # """Get the name of hyperparameter."""
-        name = "-".join(
-            [
-                str(self.optimizer),
-                str(self.lr),
-                str(self.max_epoch),
-                str(self.early_stopping_round),
-                str(self.model),
-                str(self.device),
-            ]
-        )
-        name = (
-            name
-            + "|"
-            + "-".join(
-                [
-                    str(x[0]) + "-" + str(x[1])
-                    for x in self.model.get_hyper_parameter().items()
-                ]
-            )
-        )
-        return name
+    def __repr__(self) -> dict:
+        return {
+            "trainer_name": self.__class__.__name__,
+            "optimizer": self.optimizer,
+            "learning_rate": self.lr,
+            "max_epoch": self.max_epoch,
+            "early_stopping_round": self.early_stopping_round,
+            "model": repr(self.model)
+        }
+
+    def __str__(self) -> str:
+        import yaml
+        return yaml.dump(repr(self))
 
     def evaluate(self, dataset, mask="val", feval=None):
         """
diff --git a/autogl/module/train/node_classification_full.py b/autogl/module/train/node_classification_full.py
index e5f4937..0fca8c7 100644
--- a/autogl/module/train/node_classification_full.py
+++ b/autogl/module/train/node_classification_full.py
@@ -373,29 +373,19 @@ class NodeClassificationFullTrainer(BaseNodeClassificationTrainer):
         else:
             return self.valid_score, self.feval.is_higher_better()
 
-    def get_name_with_hp(self):
-        # """Get the name of hyperparameter."""
-        name = "-".join(
-            [
-                str(self.optimizer),
-                str(self.lr),
-                str(self.max_epoch),
-                str(self.early_stopping_round),
-                str(self.model),
-                str(self.device),
-            ]
-        )
-        name = (
-            name
-            + "|"
-            + "-".join(
-                [
-                    str(x[0]) + "-" + str(x[1])
-                    for x in self.model.get_hyper_parameter().items()
-                ]
-            )
-        )
-        return name
+    def __repr__(self) -> dict:
+        return {
+            "trainer_name": self.__class__.__name__,
+            "optimizer": self.optimizer,
+            "learning_rate": self.lr,
+            "max_epoch": self.max_epoch,
+            "early_stopping_round": self.early_stopping_round,
+            "model": repr(self.model)
+        }
+
+    def __str__(self) -> str:
+        import yaml
+        return yaml.dump(repr(self))
 
     def evaluate(self, dataset, mask=None, feval=None):
         """
diff --git a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
index b2a9f84..895ca32 100644
--- a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
+++ b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
@@ -4,7 +4,6 @@ import logging
 import typing as _typing
 import torch.nn.functional
 import torch.utils.data
-import tqdm
 
 import autogl.data
 from .. import register_trainer
@@ -215,8 +214,8 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
 
             """ Validate performance """
             if (
-                hasattr(data, "val_mask")
-                and type(getattr(data, "val_mask")) == torch.Tensor
+                    hasattr(data, "val_mask")
+                    and type(getattr(data, "val_mask")) == torch.Tensor
             ):
                 validation_results: _typing.Sequence[float] = self.evaluate(
                     (data,), "val", [self.feval[0]]
@@ -246,7 +245,7 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         return predicted_x
 
     def predict_proba(
-        self, dataset, mask: _typing.Optional[str] = None, in_log_format=False
+            self, dataset, mask: _typing.Optional[str] = None, in_log_format=False
     ):
         """
         The function of predicting the probability on the given dataset.
@@ -274,12 +273,12 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         return self.predict_proba(dataset, mask, in_log_format=True).max(1)[1]
 
     def evaluate(
-        self,
-        dataset,
-        mask: _typing.Optional[str] = None,
-        feval: _typing.Union[
-            None, _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
-        ] = None,
+            self,
+            dataset,
+            mask: _typing.Optional[str] = None,
+            feval: _typing.Union[
+                None, _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+            ] = None,
     ) -> _typing.Sequence[float]:
         data = dataset[0]
         data = data.to(self.device)
@@ -335,7 +334,7 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         return self._valid_result_prob
 
     def get_valid_score(
-        self, return_major: bool = True
+            self, return_major: bool = True
     ) -> _typing.Tuple[
         _typing.Union[float, _typing.Sequence[float]],
         _typing.Union[bool, _typing.Sequence[bool]],
@@ -353,39 +352,30 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
 
     @hyper_parameter_space.setter
     def hyper_parameter_space(
-        self, hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
+            self, hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
     ) -> None:
         if not isinstance(hp_space, _typing.Sequence):
             raise TypeError
         self._hyper_parameter_space = hp_space
 
-    def get_name_with_hp(self) -> str:
-        name = "-".join(
-            [
-                str(self._optimizer_class),
-                str(self._learning_rate),
-                str(self._max_epoch),
-                str(self._early_stopping.patience),
-                str(self.model),
-                str(self.device),
-            ]
-        )
-        name = (
-            name
-            + "|"
-            + "-".join(
-                [
-                    str(x[0]) + "-" + str(x[1])
-                    for x in self.model.get_hyper_parameter().items()
-                ]
-            )
-        )
-        return name
+    def __repr__(self) -> dict:
+        return {
+            "trainer_name": self.__class__.__name__,
+            "optimizer": self.optimizer,
+            "learning_rate": self.lr,
+            "max_epoch": self.max_epoch,
+            "early_stopping_round": self.early_stopping_round,
+            "model": repr(self.model)
+        }
+
+    def __str__(self) -> str:
+        import yaml
+        return yaml.dump(repr(self))
 
     def duplicate_from_hyper_parameter(
-        self,
-        hp: _typing.Dict[str, _typing.Any],
-        model: _typing.Optional[BaseModel] = None,
+            self,
+            hp: _typing.Dict[str, _typing.Any],
+            model: _typing.Optional[BaseModel] = None,
     ) -> "NodeClassificationGraphSAINTTrainer":
         if model is None or not isinstance(model, BaseModel):
             model: BaseModel = self.model
@@ -412,6 +402,44 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         )
 
 
+class _DeterministicNeighborSamplerStore:
+    def __init__(self):
+        self.__neighbor_sampler_mapping: _typing.List[
+            _typing.Tuple[torch.LongTensor, NeighborSampler]
+        ] = []
+
+    @classmethod
+    def __is_target_node_indexes_equal(cls, a: torch.LongTensor, b: torch.LongTensor) -> bool:
+        if not a.dtype == b.dtype == torch.int64:
+            return False
+        if a.size() != b.size():
+            return False
+        return torch.where(a != b)[0].size(0) == 0
+
+    def __setitem__(self, target_nodes: torch.Tensor, neighbor_sampler: NeighborSampler):
+        target_nodes: _typing.Any = target_nodes.cpu()
+        if type(target_nodes) != torch.Tensor or target_nodes.dtype != torch.int64:
+            raise TypeError
+        if type(neighbor_sampler) != NeighborSampler:
+            raise TypeError
+        for i in range(len(self.__neighbor_sampler_mapping)):
+            if self.__is_target_node_indexes_equal(
+                    target_nodes, self.__neighbor_sampler_mapping[i][0]
+            ):
+                self.__neighbor_sampler_mapping[i] = (target_nodes, neighbor_sampler)
+                return
+        self.__neighbor_sampler_mapping.append((target_nodes, neighbor_sampler))
+
+    def __getitem__(self, target_nodes: torch.Tensor) -> _typing.Optional[NeighborSampler]:
+        target_nodes: _typing.Any = target_nodes.cpu()
+        if type(target_nodes) != torch.Tensor or target_nodes.dtype != torch.int64:
+            raise TypeError
+        for __current_target_nodes, __neighbor_sampler in self.__neighbor_sampler_mapping:
+            if self.__is_target_node_indexes_equal(target_nodes, __current_target_nodes):
+                return __neighbor_sampler
+        return None
+
+
 @register_trainer("NodeClassificationLayerDependentImportanceSamplingTrainer")
 class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassificationTrainer):
     def __init__(
@@ -471,6 +499,9 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
         self._valid_result_prob: torch.Tensor = torch.zeros(0)
         self._valid_score: _typing.Sequence[float] = ()
 
+        """ Set hyper parameters """
+        self.__sampled_node_sizes: _typing.Sequence[int] = kwargs.get("sampled_node_sizes")
+
         self.__training_batch_size: int = kwargs.get("training_batch_size", 1024)
         if not self.__training_batch_size > 0:
             self.__training_batch_size: int = 1024
@@ -494,8 +525,9 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
             model, num_features, num_classes, device, init, feval, loss
         )
 
-        """ Set hyper parameters """
-        self.__sampled_node_sizes: _typing.Sequence[int] = kwargs.get("sampled_node_sizes")
+        self.__neighbor_sampler_store: _DeterministicNeighborSamplerStore = (
+            _DeterministicNeighborSamplerStore()
+        )
 
         self.__is_initialized: bool = False
         if init:
@@ -632,54 +664,150 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
         :param mask_or_target_nodes_indexes: ...
         :return: the result of prediction on the given dataset
         """
-        if mask_or_target_nodes_indexes.dtype == torch.bool:
-            target_nodes_indexes: _typing.Any = (
-                torch.where(mask_or_target_nodes_indexes)[0]
-            )
-        else:
-            target_nodes_indexes: _typing.Any = mask_or_target_nodes_indexes.long()
+        self.model.model.eval()
+        integral_data = integral_data.to(torch.device("cpu"))
+        mask_or_target_nodes_indexes = mask_or_target_nodes_indexes.to(torch.device("cpu"))
+        if isinstance(self.model.model, SequentialGraphNeuralNetwork):
+            sequential_gnn_model: SequentialGraphNeuralNetwork = self.model.model
+            __num_layers: int = len(self.__sampled_node_sizes)
 
-        neighbor_sampler: NeighborSampler = NeighborSampler(
-            torch_geometric.utils.add_remaining_self_loops(integral_data.edge_index)[0],
-            target_nodes_indexes, [-1 for _ in self.__sampled_node_sizes],
-            batch_size=self.__predicting_batch_size,
-            num_workers=self.__predicting_sampler_num_workers,
-            shuffle=False
-        )
+            x: torch.Tensor = getattr(integral_data, "x")
+            for _current_layer_index in range(__num_layers - 1):
+                __next_x: _typing.Optional[torch.Tensor] = None
 
-        prediction_batch_cumulative_builder = (
-            EvaluatorUtility.PredictionBatchCumulativeBuilder()
-        )
-        self.model.model.eval()
-        for sampled_data in neighbor_sampler:
-            sampled_data: TargetDependantSampledData = sampled_data
-            sampled_graph: autogl.data.Data = autogl.data.Data(
-                integral_data.x[sampled_data.all_sampled_nodes_indexes],
-                integral_data.y[sampled_data.all_sampled_nodes_indexes]
-            )
-            sampled_graph.to(self.device)
-            sampled_graph.edge_indexes: _typing.Sequence[torch.LongTensor] = [
-                current_layer.edge_index_for_sampled_graph.to(self.device)
-                for current_layer in sampled_data.sampled_edges_for_layers
-            ]
-            sampled_graph.edge_weights: _typing.Sequence[torch.FloatTensor] = [
-                current_layer.edge_weight.to(self.device)
-                for current_layer in sampled_data.sampled_edges_for_layers
-            ]
+                _optional_neighbor_sampler: _typing.Optional[NeighborSampler] = (
+                    self.__neighbor_sampler_store[torch.arange(x.size(0))]
+                )
+                if (
+                        _optional_neighbor_sampler is not None and
+                        type(_optional_neighbor_sampler) == NeighborSampler
+                ):
+                    current_neighbor_sampler: NeighborSampler = _optional_neighbor_sampler
+                else:
+                    current_neighbor_sampler: NeighborSampler = NeighborSampler(
+                        integral_data.edge_index, torch.arange(x.size(0)).unique(),
+                        [-1], batch_size=self.__predicting_batch_size,
+                        num_workers=self.__predicting_sampler_num_workers, shuffle=False
+                    )
+                    self.__neighbor_sampler_store[torch.arange(x.size(0))] = current_neighbor_sampler
+
+                for _target_dependant_sampled_data in current_neighbor_sampler:
+                    _target_dependant_sampled_data: TargetDependantSampledData = (
+                        _target_dependant_sampled_data
+                    )
+                    _sampled_graph: autogl.data.Data = autogl.data.Data(
+                        x=x[_target_dependant_sampled_data.all_sampled_nodes_indexes],
+                        edge_index=(
+                            _target_dependant_sampled_data.sampled_edges_for_layers[0].edge_index_for_sampled_graph
+                        )
+                    )
+                    _sampled_graph.edge_weight: torch.Tensor = (
+                        _target_dependant_sampled_data.sampled_edges_for_layers[0].edge_weight
+                    )
+                    _sampled_graph: autogl.data.Data = _sampled_graph.to(self.device)
 
-            with torch.no_grad():
-                prediction_batch_cumulative_builder.add_batch(
-                    sampled_data.target_nodes_indexes.indexes_in_integral_graph.cpu().numpy(),
-                    self.model.model(sampled_graph)[
-                        sampled_data.target_nodes_indexes.indexes_in_sampled_graph
-                    ].cpu().numpy()
+                    with torch.no_grad():
+                        __sampled_graph_inferences: torch.Tensor = (
+                            sequential_gnn_model.encoder_sequential_modules[_current_layer_index](_sampled_graph)
+                        )
+                        _sampled_target_nodes_inferences: torch.Tensor = __sampled_graph_inferences[
+                            _target_dependant_sampled_data.target_nodes_indexes.indexes_in_sampled_graph
+                        ].cpu()
+                    if __next_x is None:
+                        __next_x: torch.Tensor = torch.zeros(x.size(0), __sampled_graph_inferences.size(1))
+                    __next_x[_target_dependant_sampled_data.target_nodes_indexes.indexes_in_integral_graph] = (
+                        _sampled_target_nodes_inferences
+                    )
+                x: torch.Tensor = __next_x
+            " The following procedures are for the top layer "
+            if mask_or_target_nodes_indexes.dtype == torch.bool:
+                target_nodes_indexes: _typing.Any = (
+                    torch.where(mask_or_target_nodes_indexes)[0]
                 )
+            else:
+                target_nodes_indexes: _typing.Any = mask_or_target_nodes_indexes.long()
 
-        return torch.from_numpy(prediction_batch_cumulative_builder.compose()[1])
+            _optional_neighbor_sampler: _typing.Optional[NeighborSampler] = (
+                self.__neighbor_sampler_store[target_nodes_indexes]
+            )
+            if (
+                    _optional_neighbor_sampler is not None and
+                    type(_optional_neighbor_sampler) == NeighborSampler
+            ):
+                current_neighbor_sampler: NeighborSampler = _optional_neighbor_sampler
+            else:
+                current_neighbor_sampler: NeighborSampler = NeighborSampler(
+                    integral_data.edge_index, target_nodes_indexes,
+                    [-1], batch_size=self.__predicting_batch_size,
+                    num_workers=self.__predicting_sampler_num_workers, shuffle=False
+                )
+                self.__neighbor_sampler_store[target_nodes_indexes] = current_neighbor_sampler
+
+            prediction_batch_cumulative_builder = (
+                EvaluatorUtility.PredictionBatchCumulativeBuilder()
+            )
+            for _target_dependant_sampled_data in current_neighbor_sampler:
+                _sampled_graph: autogl.data.Data = autogl.data.Data(
+                    x[_target_dependant_sampled_data.all_sampled_nodes_indexes],
+                    _target_dependant_sampled_data.sampled_edges_for_layers[0].edge_index_for_sampled_graph
+                )
+                _sampled_graph.edge_weight: torch.Tensor = (
+                    _target_dependant_sampled_data.sampled_edges_for_layers[0].edge_weight
+                )
+                _sampled_graph: autogl.data.Data = _sampled_graph.to(self.device)
+                with torch.no_grad():
+                    prediction_batch_cumulative_builder.add_batch(
+                        _target_dependant_sampled_data.target_nodes_indexes.indexes_in_integral_graph.cpu().numpy(),
+                        sequential_gnn_model.decode(
+                            sequential_gnn_model.encoder_sequential_modules[-1](_sampled_graph)
+                        )[_target_dependant_sampled_data.target_nodes_indexes.indexes_in_sampled_graph].cpu().numpy()
+                    )
+            return torch.from_numpy(prediction_batch_cumulative_builder.compose()[1])
+        else:
+            if mask_or_target_nodes_indexes.dtype == torch.bool:
+                target_nodes_indexes: _typing.Any = (
+                    torch.where(mask_or_target_nodes_indexes)[0]
+                )
+            else:
+                target_nodes_indexes: _typing.Any = mask_or_target_nodes_indexes.long()
+            neighbor_sampler: NeighborSampler = NeighborSampler(
+                torch_geometric.utils.add_remaining_self_loops(integral_data.edge_index)[0],
+                target_nodes_indexes, [-1 for _ in self.__sampled_node_sizes],
+                batch_size=self.__predicting_batch_size,
+                num_workers=self.__predicting_sampler_num_workers,
+                shuffle=False
+            )
+            prediction_batch_cumulative_builder = (
+                EvaluatorUtility.PredictionBatchCumulativeBuilder()
+            )
+            self.model.model.eval()
+            for sampled_data in neighbor_sampler:
+                sampled_data: TargetDependantSampledData = sampled_data
+                sampled_graph: autogl.data.Data = autogl.data.Data(
+                    integral_data.x[sampled_data.all_sampled_nodes_indexes],
+                    integral_data.y[sampled_data.all_sampled_nodes_indexes]
+                )
+                sampled_graph.to(self.device)
+                sampled_graph.edge_indexes: _typing.Sequence[torch.LongTensor] = [
+                    current_layer.edge_index_for_sampled_graph.to(self.device)
+                    for current_layer in sampled_data.sampled_edges_for_layers
+                ]
+                sampled_graph.edge_weights: _typing.Sequence[torch.FloatTensor] = [
+                    current_layer.edge_weight.to(self.device)
+                    for current_layer in sampled_data.sampled_edges_for_layers
+                ]
+                with torch.no_grad():
+                    prediction_batch_cumulative_builder.add_batch(
+                        sampled_data.target_nodes_indexes.indexes_in_integral_graph.cpu().numpy(),
+                        self.model.model(sampled_graph)[
+                            sampled_data.target_nodes_indexes.indexes_in_sampled_graph
+                        ].cpu().numpy()
+                    )
+            return torch.from_numpy(prediction_batch_cumulative_builder.compose()[1])
 
     def predict_proba(
-            self, dataset, mask: _typing.Optional[str]=None,
-            in_log_format: bool=False
+            self, dataset, mask: _typing.Optional[str] = None,
+            in_log_format: bool = False
     ):
         """
         The function of predicting the probability on the given dataset.
@@ -688,7 +816,7 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
         :param in_log_format:
         :return:
         """
-        data = dataset[0].to(self.device)
+        data = dataset[0].to(torch.device("cpu"))
         if mask is not None and type(mask) == str:
             if mask.lower() == "train":
                 _mask: torch.BoolTensor = data.train_mask
@@ -750,6 +878,7 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
         data = dataset[0]
         self.__train_only(data)
         if keep_valid_result:
+            data = data.to(torch.device("cpu"))
             prediction: torch.Tensor = self.__predict_only(data, data.val_mask)
             self._valid_result: torch.Tensor = prediction.max(1)[1]
             self._valid_result_prob: torch.Tensor = prediction
@@ -784,28 +913,19 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
             raise TypeError
         self._hyper_parameter_space = hp_space
 
-    def get_name_with_hp(self) -> str:
-        name = "-".join(
-            [
-                str(self._optimizer_class),
-                str(self._learning_rate),
-                str(self._max_epoch),
-                str(self._early_stopping.patience),
-                str(self.model),
-                str(self.device),
-            ]
-        )
-        name = (
-            name
-            + "|"
-            + "-".join(
-                [
-                    str(x[0]) + "-" + str(x[1])
-                    for x in self.model.get_hyper_parameter().items()
-                ]
-            )
+    def __repr__(self) -> str:
+        import yaml
+        return yaml.dump(
+            {
+                "trainer_name": self.__class__.__name__,
+                "optimizer": self._optimizer_class,
+                "learning_rate": self._learning_rate,
+                "max_epoch": self._max_epoch,
+                "early_stopping_round": self._early_stopping.patience,
+                "sampling_sizes": self.__sampled_node_sizes,
+                "model": repr(self.model)
+            }
         )
-        return name
 
     def duplicate_from_hyper_parameter(
             self,
@@ -896,6 +1016,9 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         self._valid_result_prob: torch.Tensor = torch.zeros(0)
         self._valid_score: _typing.Sequence[float] = ()
 
+        """ Set hyper-parameter """
+        self.__sampling_sizes: _typing.Sequence[int] = kwargs.get("sampling_sizes")
+
         self.__training_batch_size: int = kwargs.get("training_batch_size", 1024)
         if not self.__training_batch_size > 0:
             self.__training_batch_size: int = 1024
@@ -919,8 +1042,9 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             model, num_features, num_classes, device, init, feval, loss
         )
 
-        """ Set hyper-parameter """
-        self.__sampling_sizes: _typing.Sequence[int] = kwargs.get("sampling_sizes")
+        self.__neighbor_sampler_store: _DeterministicNeighborSamplerStore = (
+            _DeterministicNeighborSamplerStore()
+        )
 
         self.__is_initialized: bool = False
         if init:
@@ -994,7 +1118,6 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             for sampled_data in neighbor_sampler:
                 optimizer.zero_grad()
                 sampled_data: TargetDependantSampledData = sampled_data
-                # 由于现在的Model设计是接受Data的，所以只能组装一个采样的Data作为参数
                 sampled_graph: autogl.data.Data = autogl.data.Data(
                     x=integral_data.x[sampled_data.all_sampled_nodes_indexes],
                     y=integral_data.y[sampled_data.all_sampled_nodes_indexes]
@@ -1057,6 +1180,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         """
         self.model.model.eval()
         integral_data = integral_data.to(torch.device("cpu"))
+        mask_or_target_nodes_indexes = mask_or_target_nodes_indexes.to(torch.device("cpu"))
         if isinstance(self.model.model, SequentialGraphNeuralNetwork):
             sequential_gnn_model: SequentialGraphNeuralNetwork = self.model.model
             __num_layers: int = len(self.__sampling_sizes)
@@ -1064,11 +1188,24 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             x: torch.Tensor = getattr(integral_data, "x")
             for _current_layer_index in range(__num_layers - 1):
                 __next_x: _typing.Optional[torch.Tensor] = None
-                current_neighbor_sampler: NeighborSampler = NeighborSampler(
-                    integral_data.edge_index, torch.arange(x.size(0)).unique(),
-                    [-1], batch_size=self.__predicting_batch_size,
-                    num_workers=self.__predicting_sampler_num_workers, shuffle=False
+
+                _optional_neighbor_sampler: _typing.Optional[NeighborSampler] = (
+                    self.__neighbor_sampler_store[torch.arange(x.size(0)).unique()]
                 )
+                if (
+                        _optional_neighbor_sampler is not None and
+                        type(_optional_neighbor_sampler) == NeighborSampler
+                ):
+                    current_neighbor_sampler: NeighborSampler = _optional_neighbor_sampler
+                else:
+                    current_neighbor_sampler: NeighborSampler = NeighborSampler(
+                        integral_data.edge_index, torch.arange(x.size(0)).unique(),
+                        [-1], batch_size=self.__predicting_batch_size,
+                        num_workers=self.__predicting_sampler_num_workers, shuffle=False
+                    )
+                    __temp: _typing.Any = torch.arange(x.size(0))
+                    self.__neighbor_sampler_store[__temp] = current_neighbor_sampler
+
                 for _target_dependant_sampled_data in current_neighbor_sampler:
                     _target_dependant_sampled_data: TargetDependantSampledData = (
                         _target_dependant_sampled_data
@@ -1085,13 +1222,13 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
                         __sampled_graph_inferences: torch.Tensor = (
                             sequential_gnn_model.encoder_sequential_modules[_current_layer_index](_sampled_graph)
                         )
-                    __sampled_graph_inferences: torch.Tensor = __sampled_graph_inferences.cpu()
+                        _sampled_target_nodes_inferences: torch.Tensor = __sampled_graph_inferences[
+                            _target_dependant_sampled_data.target_nodes_indexes.indexes_in_sampled_graph
+                        ].cpu()
                     if __next_x is None:
                         __next_x: torch.Tensor = torch.zeros(x.size(0), __sampled_graph_inferences.size(1))
                     __next_x[_target_dependant_sampled_data.target_nodes_indexes.indexes_in_integral_graph] = (
-                        __sampled_graph_inferences[
-                            _target_dependant_sampled_data.target_nodes_indexes.indexes_in_sampled_graph
-                        ]
+                        _sampled_target_nodes_inferences
                     )
                 x: torch.Tensor = __next_x
             # The following procedures are for the top layer
@@ -1102,11 +1239,22 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             else:
                 target_nodes_indexes: _typing.Any = mask_or_target_nodes_indexes.long()
 
-            current_neighbor_sampler: NeighborSampler = NeighborSampler(
-                integral_data.edge_index, target_nodes_indexes,
-                [-1], batch_size=self.__predicting_batch_size,
-                num_workers=self.__predicting_sampler_num_workers, shuffle=False
+            _optional_neighbor_sampler: _typing.Optional[NeighborSampler] = (
+                self.__neighbor_sampler_store[target_nodes_indexes]
             )
+            if (
+                    _optional_neighbor_sampler is not None and
+                    type(_optional_neighbor_sampler) == NeighborSampler
+            ):
+                current_neighbor_sampler: NeighborSampler = _optional_neighbor_sampler
+            else:
+                current_neighbor_sampler: NeighborSampler = NeighborSampler(
+                    integral_data.edge_index, target_nodes_indexes,
+                    [-1], batch_size=self.__predicting_batch_size,
+                    num_workers=self.__predicting_sampler_num_workers, shuffle=False
+                )
+                self.__neighbor_sampler_store[target_nodes_indexes] = current_neighbor_sampler
+
             prediction_batch_cumulative_builder = (
                 EvaluatorUtility.PredictionBatchCumulativeBuilder()
             )
@@ -1173,7 +1321,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         :param in_log_format:
         :return:
         """
-        data = dataset[0].to(self.device)
+        data = dataset[0].to(torch.device("cpu"))
         if mask is not None and type(mask) == str:
             if mask.lower() == "train":
                 _mask: torch.BoolTensor = data.train_mask
@@ -1238,6 +1386,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         data = dataset[0]
         self.__train_only(data)
         if keep_valid_result:
+            data = data.to(torch.device("cpu"))
             prediction: torch.Tensor = self.__predict_only(data, data.val_mask)
             self._valid_result: torch.Tensor = prediction.max(1)[1]
             self._valid_result_prob: torch.Tensor = prediction
@@ -1272,28 +1421,19 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             raise TypeError
         self._hyper_parameter_space = hp_space
 
-    def get_name_with_hp(self) -> str:
-        name = "-".join(
-            [
-                str(self._optimizer_class),
-                str(self._learning_rate),
-                str(self._max_epoch),
-                str(self._early_stopping.patience),
-                str(self.model),
-                str(self.device),
-            ]
-        )
-        name = (
-            name
-            + "|"
-            + "-".join(
-                [
-                    str(x[0]) + "-" + str(x[1])
-                    for x in self.model.get_hyper_parameter().items()
-                ]
-            )
+    def __repr__(self) -> str:
+        import yaml
+        return yaml.dump(
+            {
+                "trainer_name": self.__class__.__name__,
+                "optimizer": self._optimizer_class,
+                "learning_rate": self._learning_rate,
+                "max_epoch": self._max_epoch,
+                "early_stopping_round": self._early_stopping.patience,
+                "sampling_sizes": self.__sampling_sizes,
+                "model": repr(self.model)
+            }
         )
-        return name
 
     def duplicate_from_hyper_parameter(
             self,
diff --git a/autogl/module/train/sampling/sampler/neighbor_sampler.py b/autogl/module/train/sampling/sampler/neighbor_sampler.py
index 97e8e66..540a36a 100644
--- a/autogl/module/train/sampling/sampler/neighbor_sampler.py
+++ b/autogl/module/train/sampling/sampler/neighbor_sampler.py
@@ -5,6 +5,16 @@ from .target_dependant_sampler import TargetDependantSampler, TargetDependantSam
 
 
 class NeighborSampler(TargetDependantSampler, _typing.Iterable):
+    class _SequenceDataset(torch.utils.data.Dataset):
+        def __init__(self, sequence):
+            self.__sequence = sequence
+
+        def __len__(self):
+            return len(self.__sequence)
+
+        def __getitem__(self, idx):
+            return self.__sequence[idx]
+
     @classmethod
     def __compute_edge_weight(cls, edge_index: torch.LongTensor) -> torch.Tensor:
         __num_nodes = max(int(edge_index[0].max()), int(edge_index[1].max())) + 1
@@ -28,6 +38,17 @@ class NeighborSampler(TargetDependantSampler, _typing.Iterable):
             batch_size: int = 1, num_workers: int = 0,
             shuffle: bool = True, **kwargs
     ):
+        def is_deterministic(__cached: bool = bool(kwargs.get("cached", True))) -> bool:
+            if not __cached:
+                return False
+            _deterministic: bool = True
+            for _sampling_size in sampling_sizes:
+                if type(_sampling_size) != int:
+                    raise TypeError("The sampling_sizes argument must be a sequence of integer")
+                if _sampling_size >= 0:
+                    _deterministic = False
+                    break
+            return _deterministic
         self.__edge_weight: torch.Tensor = self.__compute_edge_weight(edge_index)
         self.__pyg_neighbor_sampler: torch_geometric.data.NeighborSampler = (
             torch_geometric.data.NeighborSampler(
@@ -37,6 +58,16 @@ class NeighborSampler(TargetDependantSampler, _typing.Iterable):
             )
         )
 
+        if is_deterministic():
+            pyg_neighbor_sampler: _typing.Iterable = self.__pyg_neighbor_sampler
+            self.__cached_sampled_data_list: _typing.Optional[
+                _typing.List[TargetDependantSampledData]
+            ] = [sampled_data for sampled_data in pyg_neighbor_sampler]
+        else:
+            self.__cached_sampled_data_list: _typing.Optional[
+                _typing.List[TargetDependantSampledData]
+            ] = None
+
     def _transform(
         self, batch_size: int, n_id: torch.LongTensor,
         adj_or_adj_list: _typing.Union[
@@ -68,7 +99,16 @@ class NeighborSampler(TargetDependantSampler, _typing.Iterable):
             )
 
     def __iter__(self):
-        return iter(self.__pyg_neighbor_sampler)
+        if (
+                self.__cached_sampled_data_list is not None and
+                isinstance(self.__cached_sampled_data_list, _typing.Sequence)
+        ):
+            return iter(torch.utils.data.DataLoader(
+                self._SequenceDataset(self.__cached_sampled_data_list),
+                collate_fn=lambda x: x[0]
+            ))
+        else:
+            return iter(self.__pyg_neighbor_sampler)
 
     @classmethod
     def create_basic_sampler(
diff --git a/autogl/solver/__init__.py b/autogl/solver/__init__.py
index 54172f8..7fce9f4 100644
--- a/autogl/solver/__init__.py
+++ b/autogl/solver/__init__.py
@@ -3,6 +3,4 @@ Auto solver for various graph tasks
 """
 
 from .classifier import AutoGraphClassifier, AutoNodeClassifier
-from .utils import Leaderboard
-
-__all__ = ["AutoNodeClassifier", "AutoGraphClassifier", "Leaderboard"]
+from .utils import LeaderBoard
diff --git a/autogl/solver/base.py b/autogl/solver/base.py
index 94e7c0a..693b89d 100644
--- a/autogl/solver/base.py
+++ b/autogl/solver/base.py
@@ -12,7 +12,7 @@ from ..module.feature import FEATURE_DICT
 from ..module.hpo import HPO_DICT
 from ..module.model import MODEL_DICT
 from ..module import BaseFeatureAtom, BaseHPOptimizer, BaseTrainer
-from .utils import Leaderboard
+from .utils import LeaderBoard
 from ..utils import get_logger
 
 LOGGER = get_logger("BaseSolver")
@@ -307,7 +307,7 @@ class BaseSolver:
         """
         raise NotImplementedError()
 
-    def get_leaderboard(self) -> Leaderboard:
+    def get_leaderboard(self) -> LeaderBoard:
         r"""
         Get the current leaderboard of this solver.
 
diff --git a/autogl/solver/classifier/graph_classifier.py b/autogl/solver/classifier/graph_classifier.py
index 7427e13..a4c0674 100644
--- a/autogl/solver/classifier/graph_classifier.py
+++ b/autogl/solver/classifier/graph_classifier.py
@@ -15,7 +15,7 @@ from ...module.feature import FEATURE_DICT
 from ...module.model import BaseModel, MODEL_DICT
 from ...module.train import TRAINER_DICT, get_feval, BaseGraphClassificationTrainer
 from ..base import _initialize_single_model, _parse_hp_space
-from ..utils import Leaderboard, set_seed
+from ..utils import LeaderBoard, set_seed
 from ...datasets import utils
 from ...utils import get_logger
 
@@ -284,7 +284,7 @@ class AutoGraphClassifier(BaseClassifier):
         assert isinstance(evaluation_method, list)
         evaluator_list = get_feval(evaluation_method)
 
-        self.leaderboard = Leaderboard(
+        self.leaderboard = LeaderBoard(
             [e.get_eval_name() for e in evaluator_list],
             {e.get_eval_name(): e.is_higher_better() for e in evaluator_list},
         )
@@ -365,14 +365,15 @@ class AutoGraphClassifier(BaseClassifier):
                     )
                 # to save memory, all the trainer derived will be mapped to cpu
                 optimized.to(torch.device("cpu"))
-                name = optimized.get_name_with_hp()
+                name = str(optimized)
                 names.append(name)
                 performance_on_valid, _ = optimized.get_valid_score(return_major=False)
                 result_valid.append(
                     optimized.get_valid_predict_proba().detach().cpu().numpy()
                 )
-                self.leaderboard.insert_model_performance(
+                self.leaderboard.add_performance(
                     name,
+                    repr(optimized),
                     dict(
                         zip(
                             [e.get_eval_name() for e in evaluator_list],
@@ -409,7 +410,7 @@ class AutoGraphClassifier(BaseClassifier):
                         )
                     # to save memory, all the trainer derived will be mapped to cpu
                     optimized.to(torch.device("cpu"))
-                    name = optimized.get_name_with_hp() + "_cv%d_idx%d" % (i, idx)
+                    name = str(optimized) + "_cv%d_idx%d" % (i, idx)
                     names.append(name)
                     # evaluate on val_split of input dataset
                     performance_on_valid = optimized.evaluate(dataset, mask="val")
@@ -419,8 +420,8 @@ class AutoGraphClassifier(BaseClassifier):
                         .cpu()
                         .numpy()
                     )
-                    self.leaderboard.insert_model_performance(
-                        name,
+                    self.leaderboard.add_performance(
+                        name, repr(optimized),
                         dict(
                             zip(
                                 [e.get_eval_name() for e in evaluator_list],
@@ -439,8 +440,8 @@ class AutoGraphClassifier(BaseClassifier):
                 evaluator_list,
                 n_classes=dataset.num_classes,
             )
-            self.leaderboard.insert_model_performance(
-                "ensemble",
+            self.leaderboard.add_performance(
+                "ensemble", "ensemble",
                 dict(zip([e.get_eval_name() for e in evaluator_list], performance)),
             )
 
diff --git a/autogl/solver/classifier/node_classifier.py b/autogl/solver/classifier/node_classifier.py
index cd0ed86..18ffe6e 100644
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -16,7 +16,7 @@ from ...module.feature import FEATURE_DICT
 from ...module.model import MODEL_DICT, BaseModel
 from ...module.train import TRAINER_DICT, BaseNodeClassificationTrainer
 from ...module.train import get_feval
-from ..utils import Leaderboard, set_seed
+from ..utils import LeaderBoard, set_seed
 from ...datasets import utils
 from ...utils import get_logger
 
@@ -268,7 +268,7 @@ class AutoNodeClassifier(BaseClassifier):
         assert isinstance(evaluation_method, list)
         evaluator_list = get_feval(evaluation_method)
 
-        self.leaderboard = Leaderboard(
+        self.leaderboard = LeaderBoard(
             [e.get_eval_name() for e in evaluator_list],
             {e.get_eval_name(): e.is_higher_better() for e in evaluator_list},
         )
@@ -340,12 +340,12 @@ class AutoNodeClassifier(BaseClassifier):
                 )
             # to save memory, all the trainer derived will be mapped to cpu
             optimized.to(torch.device("cpu"))
-            name = optimized.get_name_with_hp() + "_idx%d" % (idx)
+            name = str(optimized)
             names.append(name)
             performance_on_valid, _ = optimized.get_valid_score(return_major=False)
             result_valid.append(optimized.get_valid_predict_proba().cpu().numpy())
-            self.leaderboard.insert_model_performance(
-                name,
+            self.leaderboard.add_performance(
+                name, repr(optimized),
                 dict(
                     zip(
                         [e.get_eval_name() for e in evaluator_list],
@@ -364,8 +364,8 @@ class AutoNodeClassifier(BaseClassifier):
                 evaluator_list,
                 n_classes=dataset.num_classes,
             )
-            self.leaderboard.insert_model_performance(
-                "ensemble",
+            self.leaderboard.add_performance(
+                "ensemble", "ensemble",
                 dict(zip([e.get_eval_name() for e in evaluator_list], performance)),
             )
 
diff --git a/autogl/solver/utils.py b/autogl/solver/utils.py
index 89cad69..0dae7e0 100644
--- a/autogl/solver/utils.py
+++ b/autogl/solver/utils.py
@@ -1,23 +1,23 @@
 """
-Util tools used by solver
+Utilities used by the solver
 
-* leaderboard: The leaderboard that maintains the performance of models.
+* LeaderBoard: The LeaderBoard that maintains the performance of models.
 """
 
 import random
-
-import torch
+import typing as _typing
+import torch.backends.cudnn
 import numpy as np
 import pandas as pd
 
 from ..utils import get_logger
 
-LOGGER = get_logger("leaderboard")
+LOGGER = get_logger("LeaderBoard")
 
 
-class Leaderboard:
+class LeaderBoard:
     """
-    The leaderboard that can be used to store / sort the model performance automatically.
+    The LeaderBoard that can be used to store / sort the model performance automatically.
 
     Parameters
     ----------
@@ -29,16 +29,38 @@ class Leaderboard:
         A list of indicator that whether the field score is higher better.
     """
 
-    def __init__(self, fields, is_higher_better):
-        assert isinstance(fields, list)
-        self.keys = ["name"] + fields
-        self.perform_dict = pd.DataFrame(columns=self.keys)
-        self.is_higher_better = is_higher_better
-        self.major_field = fields[0]
+    def __init__(
+            self, fields: _typing.Sequence[str],
+            is_higher_better: _typing.Union[
+                _typing.Sequence[bool],
+                _typing.Dict[str, bool]
+            ]
+    ):
+        if not isinstance(fields, _typing.Sequence):
+            raise TypeError
+        for _field in fields:
+            if type(_field) != str:
+                raise TypeError
+        if isinstance(is_higher_better, dict):
+            self.__is_higher_better: _typing.Sequence[bool] = [
+                bool(is_higher_better[field]) for field in fields
+            ]
+        elif isinstance(is_higher_better, _typing.Sequence):
+            self.__is_higher_better: _typing.Sequence[bool] = [
+                bool(item) for item in is_higher_better
+            ]
+        else:
+            raise TypeError
+        self.__fields: _typing.Sequence[str] = fields
+        self.__major_field: str = fields[0]
+
+        self.__performance_data_frame: pd.DataFrame = pd.DataFrame(
+            columns=["name", "representation"] + list(fields)
+        )
 
     def set_major_field(self, field) -> None:
         """
-        Set the major field of current leaderboard.
+        Set the major field of current LeaderBoard.
 
         Parameters
         ----------
@@ -49,64 +71,74 @@ class Leaderboard:
         -------
         None
         """
-        if field in self.keys and not field == "name":
-            self.major_field = field
+        if field in self.__fields:
+            self.__major_field = field
         else:
             LOGGER.warning(
-                "do not find major field %s in current leaderboard, will ignore.", field
+                "do not find major field %s in the current LeaderBoard, will ignore.", field
             )
 
-    def insert_model_performance(self, name, performance) -> None:
+    def add_performance(
+            self, name: str,
+            representation: _typing.Union[str, _typing.Dict[str, _typing.Any]],
+            performance: _typing.Dict[str, float]
+    ) -> 'LeaderBoard':
         """
-        Add/Override a record of model performance. If name given is already in the leaderboard,
-        will overrride the slot.
+        Add a record of model performance.
 
         Parameters
         ----------
         name: `str`
             The model name/identifier that identifies the model.
 
+        representation: `str` or `dict`
+            The representation of the corresponding methodology.
+
         performance: `dict`
             The performance dict. The key inside the dict should be the fields when initialized.
             The value of the dict should be the corresponding scores.
 
         Returns
         -------
-        None
+        self:
+            this `LeaderBoard` instance for chained call
         """
-        if name not in self.perform_dict["name"]:
-            # we just add a new row
-            performance["name"] = name
-            new = pd.DataFrame(performance, index=[0])
-            self.perform_dict = self.perform_dict.append(new, ignore_index=True)
+        import yaml
+        if isinstance(representation, dict):
+            __representation: str = yaml.dump(representation)
+        elif isinstance(representation, str):
+            __representation: str = representation
         else:
-            LOGGER.warning(
-                "model already in the leaderboard, will override current result."
-            )
-            self.remove_model_performance(name)
-            self.insert_model_performance(name, performance)
+            raise TypeError
 
-    def remove_model_performance(self, name) -> None:
+        __dict = {"name": name, "representation": __representation}
+        __dict.update(performance)
+        self.__performance_data_frame = self.__performance_data_frame.append(
+            pd.DataFrame(__dict, index=[0]), ignore_index=True
+        )
+        return self
+
+    def insert_model_performance(
+            self, name: str, performance: _typing.Dict[str, _typing.Any]
+    ) -> None:
         """
-        Remove the record of given models.
+        Add a record of model performance.
+        todo: This method will be deprecated
 
         Parameters
         ----------
         name: `str`
-            The model name/identifier that needed to be removed.
+            The model name/identifier that identifies the model.
+
+        performance: `dict`
+            The performance dict. The key inside the dict should be the fields when initialized.
+            The value of the dict should be the corresponding scores.
 
         Returns
         -------
         None
         """
-        if name not in self.perform_dict["name"]:
-            LOGGER.warning(
-                "no model detected in current leaderboard, will ignore removing action."
-            )
-            return
-        index = self.perform_dict["name"][self.perform_dict["name"] == name].index
-        self.perform_dict.drop(self.perform_dict.index[index], inplace=True)
-        return
+        self.add_performance(name, name, performance)
 
     def get_best_model(self, index=0) -> str:
         """
@@ -122,34 +154,67 @@ class Leaderboard:
         name: `str`
             The name/identifier of the required model.
         """
-        sorted_df = self.perform_dict.sort_values(
-            by=self.major_field, ascending=not self.is_higher_better[self.major_field]
+        sorted_performance_df = self.__performance_data_frame.sort_values(
+            self.__major_field,
+            ascending=not (
+                dict(zip(self.__fields, self.__is_higher_better))[self.__major_field]
+                if self.__major_field in self.__fields else True
+            )
         )
-        name_list = sorted_df["name"].tolist()
+        name_list = sorted_performance_df["name"].tolist()
         if "ensemble" in name_list:
             name_list.remove("ensemble")
         return name_list[index]
 
-    def show(self, top_k=-1) -> None:
+    def show(self, top_k=0) -> None:
         """
-        Show current leaderboard (from good model to bad).
+        Show current LeaderBoard (from best model to worst).
 
         Parameters
         ----------
         top_k: `int`
-            Controls the number model shown. If below `0`, will show all the models. Default `-1`.
+            Controls the number model shown.
+            If less than or equal to `0`, will show all the models. Default to `0`.
 
         Returns
         -------
         None
         """
-        if top_k == -1:
-            top_k = len(self.perform_dict["name"])
+        top_k: int = top_k if top_k > 0 else len(self.__performance_data_frame)
+
+        '''
+        reindex self.__performance_data_frame
+        to ensure the columns of name and representation are in left-side of the data frame
+        '''
+        _columns = self.__performance_data_frame.columns.tolist()
+        maxcolwidths: _typing.List[_typing.Optional[int]] = []
+        if "representation" in _columns:
+            _columns.remove("representation")
+            _columns.insert(0, "representation")
+            maxcolwidths.append(40)
+        if "name" in _columns:
+            _columns.remove("name")
+            _columns.insert(0, "name")
+            maxcolwidths.append(40)
+        self.__performance_data_frame = self.__performance_data_frame[_columns]
+
+        sorted_performance_df: pd.DataFrame = self.__performance_data_frame.sort_values(
+            self.__major_field,
+            ascending=not (
+                dict(zip(self.__fields, self.__is_higher_better))[self.__major_field]
+                if self.__major_field in self.__fields else True
+            )
+        )
+        sorted_performance_df = sorted_performance_df.head(top_k)
+
+        from tabulate import tabulate
+        _columns = sorted_performance_df.columns.tolist()
+        maxcolwidths.extend([None for _ in range(len(_columns) - len(maxcolwidths))])
         print(
-            self.perform_dict.sort_values(
-                by=self.major_field,
-                ascending=not self.is_higher_better[self.major_field],
-            ).head(top_k)
+            tabulate(
+                list(zip(*[sorted_performance_df[column] for column in _columns])),
+                headers=_columns, tablefmt="grid"
+            )
         )
 
 

From 4878a0abe50e14aa04c220d601208935cf04069e Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Mon, 31 May 2021 13:19:39 +0800
Subject: [PATCH 074/144] adjust metrics input

---
 autogl/module/train/evaluation.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/autogl/module/train/evaluation.py b/autogl/module/train/evaluation.py
index af3a79a..d36f708 100644
--- a/autogl/module/train/evaluation.py
+++ b/autogl/module/train/evaluation.py
@@ -120,6 +120,7 @@ class Auc(Evaluation):
         if len(predict.shape) == 1:
             pos_predict = predict
         else:
+            assert predict.shape[1] == 2, "Cannot use auc on given data with %d classes!" % (predict.shape[1])
             pos_predict = predict[:, 1]
         return roc_auc_score(label, pos_predict)
 
@@ -142,7 +143,11 @@ class Acc(Evaluation):
         """
         Should return: the evaluation result (float)
         """
-        return accuracy_score(label, np.argmax(predict, axis=1))
+        if len(predict.shape) == 2:
+            predict = np.argmax(predict, axis=1)
+        else:
+            predict = [1 if p > 0.5 else 0 for p in predict]
+        return accuracy_score(label, predict)
 
 
 @register_evaluate("mrr")
@@ -163,5 +168,9 @@ class Mrr(Evaluation):
         """
         Should return: the evaluation result (float)
         """
-        pos_predict = predict[:, 1]
+        if len(predict.shape) == 2:
+            assert predict.shape[1] == 2, "Cannot use mrr on given data with %d classes!" % (predict.shape[1])
+            pos_predict = predict[:, 1]
+        else:
+            pos_predict = predict
         return label_ranking_average_precision_score(label, pos_predict)

From 4078e51e36e15872e55f633370f4056c045ae2d1 Mon Sep 17 00:00:00 2001
From: null <null>
Date: Tue, 1 Jun 2021 23:02:00 +0800
Subject: [PATCH 075/144] Improve the implementation for LADIES

Fix bugs of LADIES, caused by carelessness.
Improve the Performance of LayerWiseDependentImpotanceSampler by utilizing scipy.sparse.
---
 autogl/module/model/base.py                   |   4 +
 autogl/module/model/gcn.py                    |   2 +-
 autogl/module/train/base.py                   |   3 -
 .../node_classification_sampled_trainer.py    |   4 +
 .../layer_dependent_importance_sampler.py     | 185 ++++++++++++++----
 .../sampler/target_dependant_sampler.py       |  37 ++--
 6 files changed, 183 insertions(+), 52 deletions(-)

diff --git a/autogl/module/model/base.py b/autogl/module/model/base.py
index 67b80c2..b7469f6 100644
--- a/autogl/module/model/base.py
+++ b/autogl/module/model/base.py
@@ -321,6 +321,10 @@ class ClassificationModel(_BaseModel):
         else:
             self.__num_graph_features: int = 0
 
+    def __repr__(self) -> str:
+        import yaml
+        return yaml.dump(self.hyper_parameter)
+
     @property
     def num_classes(self) -> int:
         return self.__num_classes
diff --git a/autogl/module/model/gcn.py b/autogl/module/model/gcn.py
index c2e0910..1f970ac 100644
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -142,7 +142,7 @@ class GCN(SequentialGraphNeuralNetwork):
         if (
                 hasattr(data, "edge_weights") and
                 isinstance(getattr(data, "edge_weights"), _typing.Sequence) and
-                len(getattr(data, "edge_weights")) == len(self.__convolution_layers)
+                len(getattr(data, "edge_weights")) == len(self.__sequential_module_list)
         ):
             return [
                 __compose_edge_index_and_weight(_edge_index, _edge_weight)
diff --git a/autogl/module/train/base.py b/autogl/module/train/base.py
index 25cfa39..eb71652 100644
--- a/autogl/module/train/base.py
+++ b/autogl/module/train/base.py
@@ -290,9 +290,6 @@ class BaseTrainer:
     def __repr__(self) -> str:
         raise NotImplementedError
 
-    def __str__(self) -> str:
-        return repr(self)
-
     def evaluate(self, dataset, mask=None, feval=None):
         """
 
diff --git a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
index 895ca32..1f5323f 100644
--- a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
+++ b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
@@ -613,6 +613,10 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
                     current_layer.edge_index_for_sampled_graph.to(self.device)
                     for current_layer in sampled_data.sampled_edges_for_layers
                 ]
+                sampled_graph.edge_weights: _typing.Sequence[torch.Tensor] = [
+                    current_layer.edge_weight.to(self.device)
+                    for current_layer in sampled_data.sampled_edges_for_layers
+                ]
                 prediction: torch.Tensor = self.model.model(sampled_graph)
                 if not hasattr(torch.nn.functional, self.loss):
                     raise TypeError(
diff --git a/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py b/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py
index 1c50cbe..45c8f69 100644
--- a/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py
+++ b/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py
@@ -1,4 +1,5 @@
 import numpy as np
+import scipy.sparse as sp
 import torch
 import torch.utils.data
 import typing as _typing
@@ -6,7 +7,7 @@ import torch_geometric
 from . import target_dependant_sampler
 
 
-class LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTargetDependantSampler):
+class _LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTargetDependantSampler):
     class _Utility:
         @classmethod
         def compute_edge_weights(cls, __all_edge_index_with_self_loops: torch.Tensor) -> torch.Tensor:
@@ -15,9 +16,6 @@ class LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTar
             __in_degree: torch.Tensor = \
                 torch_geometric.utils.degree(__all_edge_index_with_self_loops[1])
 
-            # temp_tensor: torch.Tensor = torch.zeros_like(__all_edge_index_with_self_loops)
-            # temp_tensor[0] = __out_degree[__all_edge_index_with_self_loops[0]]
-            # temp_tensor[1] = __in_degree[__all_edge_index_with_self_loops[1]]
             temp_tensor: torch.Tensor = torch.stack(
                 [
                     __out_degree[__all_edge_index_with_self_loops[0]],
@@ -30,7 +28,7 @@ class LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTar
 
         @classmethod
         def get_candidate_source_nodes_probabilities(
-                cls, all_candidate_edge_indexes: torch.Tensor,
+                cls, all_candidate_edge_indexes: torch.LongTensor,
                 all_edge_index_with_self_loops: torch.Tensor,
                 all_edge_weights: torch.Tensor
         ) -> _typing.Tuple[torch.LongTensor, torch.Tensor]:
@@ -40,16 +38,17 @@ class LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTar
             :param all_edge_weights:
             :return: (all_source_nodes_indexes, all_source_nodes_probabilities)
             """
-            _all_candidate_edges: torch.Tensor = \
-                all_edge_index_with_self_loops[:, all_candidate_edge_indexes]
-            _all_candidate_edges_weights: torch.Tensor = \
-                all_edge_weights[all_candidate_edge_indexes]
-
-            all_candidate_source_nodes_indexes: torch.LongTensor = _all_candidate_edges[0].unique()
+            all_candidate_edge_indexes: torch.LongTensor = all_candidate_edge_indexes.unique()
+            _all_candidate_edges_weights: torch.Tensor = all_edge_weights[all_candidate_edge_indexes]
+            all_candidate_source_nodes_indexes: torch.LongTensor = (
+                all_edge_index_with_self_loops[0, all_candidate_edge_indexes].unique()
+            )
             all_candidate_source_nodes_probabilities: torch.Tensor = torch.tensor(
                 [
                     torch.sum(
-                        _all_candidate_edges_weights[_all_candidate_edges[0] == _current_source_node_index]
+                        _all_candidate_edges_weights[
+                            all_edge_index_with_self_loops[0, all_candidate_edge_indexes] == _current_source_node_index
+                            ]
                     ).item() / torch.sum(_all_candidate_edges_weights).item()
                     for _current_source_node_index in all_candidate_source_nodes_indexes.tolist()
                 ]
@@ -108,12 +107,14 @@ class LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTar
         self.__all_edge_weights: torch.Tensor = self._Utility.compute_edge_weights(self._edge_index)
 
     def _sample_edges_for_layer(
-            self, target_nodes_indexes: torch.LongTensor,
+            self, __current_layer_target_nodes_indexes: torch.LongTensor,
+            __top_layer_target_nodes_indexes: torch.LongTensor,
             layer_argument: _typing.Any, *args, **kwargs
     ) -> _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]:
         """
         Sample edges for one layer
-        :param target_nodes_indexes: indexes of target nodes
+        :param __current_layer_target_nodes_indexes: target nodes for current layer
+        :param __top_layer_target_nodes_indexes: target nodes for top layer
         :param layer_argument: argument for current layer
         :param args: remaining positional arguments
         :param kwargs: remaining keyword arguments
@@ -129,7 +130,7 @@ class LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTar
         all_candidate_edge_indexes: torch.LongTensor = torch.cat(
             [
                 torch.where(self._edge_index[1] == current_target_node_index)[0]
-                for current_target_node_index in target_nodes_indexes.unique().tolist()
+                for current_target_node_index in __current_layer_target_nodes_indexes.unique().tolist()
             ]
         ).unique()
         __all_candidate_source_nodes_indexes, all_candidate_source_nodes_probabilities = \
@@ -145,31 +146,32 @@ class LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTar
                 torch.from_numpy(
                     np.unique(np.random.choice(
                         np.arange(__all_candidate_source_nodes_indexes.numel()), sampled_node_size_budget,
-                        p=all_candidate_source_nodes_probabilities.numpy()
+                        p=all_candidate_source_nodes_probabilities.numpy(), replace=False
                     ))
                 ).unique()
             ].unique()
         else:
             selected_source_node_indexes: torch.LongTensor = __all_candidate_source_nodes_indexes
+        selected_source_node_indexes: torch.LongTensor = torch.cat(
+            [selected_source_node_indexes, __top_layer_target_nodes_indexes]
+        ).unique()
 
         __selected_edges_indexes: torch.LongTensor = (
             self._Utility.filter_selected_edges_by_source_nodes_and_target_nodes(
-                self._edge_index,
-                selected_source_node_indexes, target_nodes_indexes
-            )
-        ).unique()
+                self._edge_index, selected_source_node_indexes, __current_layer_target_nodes_indexes
+            ).unique()
+        )
 
         non_normalized_selected_edges_weight: torch.Tensor = (
-                self.__all_edge_weights[__selected_edges_indexes] / (
-                    torch.tensor(
-                        [
-                            all_candidate_source_nodes_probabilities[
-                                __all_candidate_source_nodes_indexes == current_source_node_index
-                                ].item()
-                            for current_source_node_index
-                            in self._edge_index[0, __selected_edges_indexes].tolist()
-                        ]
-                    )
+                self.__all_edge_weights[__selected_edges_indexes] /
+                torch.tensor(
+                    [
+                        all_candidate_source_nodes_probabilities[
+                            __all_candidate_source_nodes_indexes == current_source_node_index
+                            ].item()
+                        for current_source_node_index
+                        in self._edge_index[0, __selected_edges_indexes].tolist()
+                    ]
                 )
         )
 
@@ -183,9 +185,8 @@ class LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTar
                         __edge_index[1] == current_target_node_index
                 )
                 __edge_weight[__current_mask_for_edges] = (
-                        __edge_weight[__current_mask_for_edges] / (
-                            torch.sum(__edge_weight[__current_mask_for_edges])
-                        )
+                        __edge_weight[__current_mask_for_edges] /
+                        torch.sum(__edge_weight[__current_mask_for_edges])
                 )
             return __edge_weight
 
@@ -194,3 +195,121 @@ class LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTar
             non_normalized_selected_edges_weight
         )
         return __selected_edges_indexes, normalized_selected_edges_weight
+
+
+class LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTargetDependantSampler):
+    @classmethod
+    def __compute_edge_weight(cls, edge_index: torch.Tensor) -> torch.Tensor:
+        __num_nodes: int = max(int(edge_index[0].max()), int(edge_index[1].max())) + 1
+        _temp_tensor: torch.Tensor = torch.stack([
+            torch_geometric.utils.degree(edge_index[0], __num_nodes)[edge_index[0]],
+            torch_geometric.utils.degree(edge_index[1], __num_nodes)[edge_index[1]]
+        ])
+        _temp_tensor: torch.Tensor = torch.pow(_temp_tensor, -0.5)
+        _temp_tensor[torch.isinf(_temp_tensor)] = 0
+        return _temp_tensor[0] * _temp_tensor[1]
+
+    def __init__(
+            self, edge_index: torch.LongTensor,
+            target_nodes_indexes: torch.LongTensor,
+            layer_wise_arguments: _typing.Sequence,
+            batch_size: _typing.Optional[int] = 1, num_workers: int = 0,
+            shuffle: bool = True, **kwargs
+    ):
+        super(LayerDependentImportanceSampler, self).__init__(
+            torch_geometric.utils.add_remaining_self_loops(edge_index)[0],
+            target_nodes_indexes, layer_wise_arguments, batch_size, num_workers, shuffle, **kwargs
+        )
+        self.__edge_weight: torch.Tensor = self.__compute_edge_weight(self._edge_index)
+        self.__integral_normalized_l_matrix: sp.csr_matrix = sp.csr_matrix((
+            self.__edge_weight.numpy(),
+            (self._edge_index[1].numpy(), self._edge_index[0].numpy())
+        ))
+        self.__integral_edges_indexes_sparse_matrix: sp.csr_matrix = sp.csr_matrix((
+            np.arange(self._edge_index.size(1)),
+            (self._edge_index[1].numpy(), self._edge_index[0].numpy())
+        ))
+
+    def __sample_edges(
+            self, __current_layer_target_nodes_indexes: np.ndarray,
+            __top_layer_target_nodes_indexes: np.ndarray, sampled_source_nodes_budget: int
+    ) -> _typing.Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """
+
+        :param __current_layer_target_nodes_indexes: indexes of target nodes for current layer
+        :param __top_layer_target_nodes_indexes: indexes of target nodes for top layer
+        :param sampled_source_nodes_budget: sampled source nodes budget
+        :return: (
+                    sampled_edges_indexes,
+                    sampled_source_nodes_indexes,
+                    corresponding probabilities for sampled_source_nodes_indexes
+        )
+        """
+        partial_l_matrix: sp.csr_matrix = (
+            self.__integral_normalized_l_matrix[__current_layer_target_nodes_indexes, :]
+        )
+        p: np.ndarray = np.array(np.sum(partial_l_matrix.multiply(partial_l_matrix), axis=0))[0]
+        p: np.ndarray = p / np.sum(p)
+        _number_of_nodes_to_sample = np.min([np.sum(p > 0), sampled_source_nodes_budget])
+        _selected_source_nodes: np.ndarray = np.unique(np.concatenate([
+            np.random.choice(
+                p.size, _number_of_nodes_to_sample, replace=False, p=p
+            ),
+            __top_layer_target_nodes_indexes
+        ]))
+
+        _sampled_edges_indexes_sparse_matrix: sp.csr_matrix = (
+            self.__integral_edges_indexes_sparse_matrix[__current_layer_target_nodes_indexes, :]
+        )
+        _sampled_edges_indexes_sparse_matrix: sp.csc_matrix = (
+            _sampled_edges_indexes_sparse_matrix.tocsc()[:, _selected_source_nodes]
+        )
+        _sampled_edges_indexes: np.ndarray = np.unique(_sampled_edges_indexes_sparse_matrix.data)
+
+        return _sampled_edges_indexes, _selected_source_nodes, p[_selected_source_nodes]
+
+    def _sample_edges_for_layer(
+            self, __current_layer_target_nodes_indexes: torch.LongTensor,
+            __top_layer_target_nodes_indexes: torch.LongTensor,
+            layer_argument: _typing.Any, *args, **kwargs
+    ) -> _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]:
+        """
+        Sample edges for specific layer
+        :param __current_layer_target_nodes_indexes: target nodes for current layer
+        :param __top_layer_target_nodes_indexes: target nodes for top layer
+        :param layer_argument: sampled_source_nodes_budget
+        :param args: remaining positional arguments
+        :param kwargs: remaining keyword arguments
+        :return: (edge_id_in_integral_graph, edge_weight)
+        """
+        __wrapped_result: _typing.Tuple[np.ndarray, np.ndarray, np.ndarray] = self.__sample_edges(
+            __current_layer_target_nodes_indexes.numpy(),
+            __top_layer_target_nodes_indexes.numpy(),
+            layer_argument
+        )
+        _sampled_edges_indexes: torch.Tensor = torch.from_numpy(__wrapped_result[0])
+        _selected_source_nodes: torch.Tensor = torch.from_numpy(__wrapped_result[1])
+        _selected_source_nodes_probabilities: torch.Tensor = torch.from_numpy(__wrapped_result[2])
+
+        """ Multiply corresponding discount weights """
+        __selected_source_node_probability_mapping: _typing.Dict[int, float] = dict(
+            zip(_selected_source_nodes.tolist(), _selected_source_nodes_probabilities.tolist())
+        )
+        _selected_edges_weight: torch.Tensor = self.__edge_weight[_sampled_edges_indexes]
+        _selected_edges_weight: torch.Tensor = _selected_edges_weight / torch.tensor([
+            __selected_source_node_probability_mapping.get(_current_source_node_index)
+            for _current_source_node_index in self._edge_index[0, _sampled_edges_indexes].tolist()
+        ])
+
+        """ Normalize edge weight for selected edges by target nodes """
+        for _current_target_node_index in self._edge_index[1, _sampled_edges_indexes].unique().tolist():
+            _current_mask_for_selected_edges: torch.BoolTensor = (
+                    self._edge_index[1, _sampled_edges_indexes] == _current_target_node_index
+            )
+            _selected_edges_weight[_current_mask_for_selected_edges] = (
+                    _selected_edges_weight[_current_mask_for_selected_edges] /
+                    torch.sum(_selected_edges_weight[_current_mask_for_selected_edges])
+            )
+
+        _sampled_edges_indexes: _typing.Union[torch.LongTensor, torch.Tensor] = _sampled_edges_indexes
+        return _sampled_edges_indexes, _selected_edges_weight
diff --git a/autogl/module/train/sampling/sampler/target_dependant_sampler.py b/autogl/module/train/sampling/sampler/target_dependant_sampler.py
index ab8620f..db51654 100644
--- a/autogl/module/train/sampling/sampler/target_dependant_sampler.py
+++ b/autogl/module/train/sampling/sampler/target_dependant_sampler.py
@@ -134,12 +134,11 @@ class BasicLayerWiseTargetDependantSampler(TargetDependantSampler):
             shuffle: bool = True, **kwargs
     ):
         self._edge_index: torch.LongTensor = edge_index
-        self.__target_nodes_indexes: torch.LongTensor = target_nodes_indexes
         self.__layer_wise_arguments: _typing.Sequence = layer_wise_arguments
         if "collate_fn" in kwargs:
             del kwargs["collate_fn"]
         super(BasicLayerWiseTargetDependantSampler, self).__init__(
-            self.__target_nodes_indexes.unique().tolist(),
+            target_nodes_indexes.unique().tolist(),
             batch_size, shuffle, num_workers=num_workers,
             collate_fn=self._collate_fn, **kwargs
         )
@@ -169,12 +168,14 @@ class BasicLayerWiseTargetDependantSampler(TargetDependantSampler):
         )
 
     def _sample_edges_for_layer(
-            self, target_nodes_indexes: torch.LongTensor,
+            self, __current_layer_target_nodes_indexes: torch.LongTensor,
+            __top_layer_target_nodes_indexes: torch.LongTensor,
             layer_argument: _typing.Any, *args, **kwargs
     ) -> _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]:
         """
         Sample edges for one layer
-        :param target_nodes_indexes: indexes of target nodes
+        :param __current_layer_target_nodes_indexes: target nodes for current layer
+        :param __top_layer_target_nodes_indexes: target nodes for top layer
         :param layer_argument: argument for current layer
         :param args: remaining positional arguments
         :param kwargs: remaining keyword arguments
@@ -185,27 +186,28 @@ class BasicLayerWiseTargetDependantSampler(TargetDependantSampler):
     def _collate_fn(
             self, top_layer_target_nodes_indexes_list: _typing.List[int]
     ) -> TargetDependantSampledData:
-        return self.__sample_layers(top_layer_target_nodes_indexes_list)
+        return self.__sample_layers(torch.tensor(top_layer_target_nodes_indexes_list).unique())
 
     def __sample_layers(
-            self, top_layer_target_nodes_indexes_list: _typing.Sequence[int]
+            self, __top_layer_target_nodes_indexes: torch.LongTensor
     ) -> TargetDependantSampledData:
         sampled_edges_for_layers: _typing.List[
             _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]
         ] = list()
-        top_layer_target_nodes_indexes: torch.LongTensor = (
-            torch.tensor(top_layer_target_nodes_indexes_list).unique()
-        )   # sorted
-        target_nodes_indexes: torch.LongTensor = top_layer_target_nodes_indexes
+        __current_layer_target_nodes_indexes: torch.LongTensor = __top_layer_target_nodes_indexes
         " Reverse self.__layer_wise_arguments from bottom-up to top-down "
         for layer_argument in self.__layer_wise_arguments[::-1]:
             current_layer_result: _typing.Tuple[
                 torch.LongTensor, _typing.Optional[torch.Tensor]
-            ] = self._sample_edges_for_layer(target_nodes_indexes, layer_argument)
+            ] = self._sample_edges_for_layer(
+                __current_layer_target_nodes_indexes,
+                __top_layer_target_nodes_indexes,
+                layer_argument
+            )
             __source_nodes_indexes_for_current_layer: torch.Tensor = (
                 self._edge_index[0, current_layer_result[0]]
             )
-            target_nodes_indexes: torch.LongTensor = (
+            __current_layer_target_nodes_indexes: torch.LongTensor = (
                 __source_nodes_indexes_for_current_layer.unique()
             )
             sampled_edges_for_layers.append(current_layer_result)
@@ -252,12 +254,17 @@ class BasicLayerWiseTargetDependantSampler(TargetDependantSampler):
                 torch.tensor(
                     [
                         __sampled_nodes_in_sub_graph_mapping.get(current_target_node_index_in_integral_data)
-                        for current_target_node_index_in_integral_data
-                        in top_layer_target_nodes_indexes.tolist()
+                        for current_target_node_index_in_integral_data in __top_layer_target_nodes_indexes.tolist()
                         if current_target_node_index_in_integral_data in __sampled_nodes_in_sub_graph_mapping
                     ]
                 ).long(),  # Remap
-                top_layer_target_nodes_indexes
+                torch.tensor(
+                    [
+                        current_target_node_index_in_integral_data
+                        for current_target_node_index_in_integral_data in __top_layer_target_nodes_indexes.tolist()
+                        if current_target_node_index_in_integral_data in __sampled_nodes_in_sub_graph_mapping
+                    ]
+                ).long()
             ),
             sampled_nodes_in_sub_graph
         )

From b59c538206b437103a34ae110c6f598931ce7a99 Mon Sep 17 00:00:00 2001
From: generall <general502570@outlook.com>
Date: Wed, 9 Jun 2021 11:58:37 +0800
Subject: [PATCH 076/144] improve acc

---
 autogl/module/nas/space/graph_nas.py |  5 ++---
 examples/test_nas.py                 | 14 ++++++++------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/autogl/module/nas/space/graph_nas.py b/autogl/module/nas/space/graph_nas.py
index 5d44591..5d16a06 100644
--- a/autogl/module/nas/space/graph_nas.py
+++ b/autogl/module/nas/space/graph_nas.py
@@ -169,7 +169,7 @@ class GraphNasNodeClassificationSpace(BaseSpace):
         self,
         hidden_dim: _typ.Optional[int] = 64,
         layer_number: _typ.Optional[int] = 2,
-        dropout: _typ.Optional[float] = 0.2,
+        dropout: _typ.Optional[float] = 0.9,
         input_dim: _typ.Optional[int] = None,
         output_dim: _typ.Optional[int] = None,
         ops: _typ.Tuple = None,
@@ -216,6 +216,7 @@ class GraphNasNodeClassificationSpace(BaseSpace):
 
     def forward(self, data):
         x, edges = data.x, data.edge_index # x [2708,1433] ,[2, 10556]
+        x = F.dropout(x, p=self.dropout, training = self.training)
         pprev_, prev_ = self.preproc0(x), self.preproc1(x)
         prev_nodes_out = [pprev_,prev_]
         for layer in range(2,self.layer_number+2):
@@ -225,7 +226,6 @@ class GraphNasNodeClassificationSpace(BaseSpace):
         if not self.search_act_con:
             x = torch.cat(prev_nodes_out[2:],dim=1)
             x = F.leaky_relu(x)
-            x = F.dropout(x, p=self.dropout, training = self.training)
             x = self.classifier1(x)
         else:
             act=getattr(self, f"act")
@@ -242,7 +242,6 @@ class GraphNasNodeClassificationSpace(BaseSpace):
                         tmp = torch.mul(tmp, states[i])
                 x=tmp
             x = act(x)
-            x = F.dropout(x, p=self.dropout, training = self.training)
             if con=='concat':
                 x=self.classifier1(x)
             else:
diff --git a/examples/test_nas.py b/examples/test_nas.py
index 44d4b76..c013ce4 100644
--- a/examples/test_nas.py
+++ b/examples/test_nas.py
@@ -6,6 +6,7 @@ from autogl.datasets import build_dataset_from_name
 from autogl.solver import AutoNodeClassifier
 from autogl.module.train import NodeClassificationFullTrainer
 from autogl.module.nas import Darts, OneShotEstimator, SinglePathNodeClassificationSpace
+from autogl.module.nas.space.graph_nas import GraphNasNodeClassificationSpace
 from autogl.module.train import Acc
 from autogl.module.nas.algorithm.enas import Enas
 
@@ -19,17 +20,18 @@ if __name__ == '__main__':
         default_trainer=NodeClassificationFullTrainer(
             optimizer=torch.optim.Adam,
             lr=0.01,
-            max_epoch=200,
-            early_stopping_round=200,
-            weight_decay=5e-4,
+            max_epoch=300,
+            early_stopping_round=300,
+            weight_decay=1e-4,
             device="auto",
             init=False,
             feval=['acc'],
             loss="nll_loss",
             lr_scheduler_type=None,),
-        nas_algorithms=[Enas()],
-        #nas_algorithms=[Darts(num_epochs=200)],
-        nas_spaces=[SinglePathNodeClassificationSpace(hidden_dim=16, ops=[GCNConv, GCNConv])],
+        #nas_algorithms=[Enas()],
+        nas_algorithms=[Darts(num_epochs=200)],
+        #nas_spaces=[SinglePathNodeClassificationSpace(hidden_dim=32, ops=[GCNConv, GCNConv])],
+        nas_spaces=[GraphNasNodeClassificationSpace(hidden_dim=64,search_act_con=False,layer_number=4)],
         nas_estimators=[OneShotEstimator()]
     )
     solver.fit(dataset)

From 1edc5692ed083e050eab28b67665672ca72fa4fb Mon Sep 17 00:00:00 2001
From: null <null>
Date: Thu, 10 Jun 2021 02:44:00 +0800
Subject: [PATCH 077/144] Modify the GCN model, Improve sampling speed, GC
 before training

Modify the GCN model to support diverse dropout probabilities in different GCN layers.
Improve the sampling speed for BasicTargetDependentSampler base class.
Apply GC before training for every instance of trainer.
---
 autogl/module/model/gcn.py                    | 43 ++++++++++++++++---
 .../node_classification_sampled_trainer.py    |  6 +++
 .../sampler/target_dependant_sampler.py       |  2 +-
 3 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/autogl/module/model/gcn.py b/autogl/module/model/gcn.py
index 1f970ac..f3abd15 100644
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -72,33 +72,66 @@ class GCN(SequentialGraphNeuralNetwork):
             num_features: int,
             num_classes: int,
             hidden_features: _typing.Sequence[int],
-            dropout: float,
+            dropout: _typing.Union[float, _typing.Sequence[_typing.Optional[float]]],
             activation_name: str,
             add_self_loops: bool = True,
             normalize: bool = True
     ):
+        if isinstance(dropout, _typing.Sequence):
+            if len(dropout) != len(hidden_features) + 1:
+                raise TypeError(
+                    "When the dropout argument is a sequence, "
+                    "The sequence length must equal to the number of layers to construct."
+                )
+            for _dropout in dropout:
+                if _dropout is not None and type(_dropout) != float:
+                    raise TypeError(
+                        "When the dropout argument is a sequence, "
+                        "every item in the sequence must be float or None"
+                    )
+            dropout_list: _typing.Sequence[_typing.Optional[float]] = dropout
+        elif type(dropout) == float:
+            if dropout < 0:
+                dropout = 0
+            if dropout > 1:
+                dropout = 1
+            dropout_list: _typing.Sequence[_typing.Optional[float]] = [
+                dropout for _ in range(len(hidden_features) + 1)
+            ]
+        else:
+            raise TypeError(
+                "The provided dropout argument must be a float "
+                "or a sequence in which each item is either float or None."
+            )
         super().__init__()
         if len(hidden_features) == 0:
             self.__sequential_module_list: torch.nn.ModuleList = torch.nn.ModuleList(
-                (self._GCNLayer(num_features, num_classes, add_self_loops, normalize),)
+                (
+                    self._GCNLayer(
+                        num_features, num_classes, add_self_loops, normalize,
+                        dropout_probability=dropout_list[0]
+                    ),
+                )
             )
         else:
             self.__sequential_module_list: torch.nn.ModuleList = torch.nn.ModuleList()
             self.__sequential_module_list.append(self._GCNLayer(
                 num_features, hidden_features[0], add_self_loops,
-                normalize, activation_name, dropout
+                normalize, activation_name, dropout_list[0]
             ))
             for hidden_feature_index in range(len(hidden_features)):
                 if hidden_feature_index + 1 < len(hidden_features):
                     self.__sequential_module_list.append(self._GCNLayer(
                         hidden_features[hidden_feature_index],
                         hidden_features[hidden_feature_index + 1],
-                        add_self_loops, normalize, activation_name, dropout
+                        add_self_loops, normalize, activation_name,
+                        dropout_list[hidden_feature_index + 1]
                     ))
                 else:
                     self.__sequential_module_list.append(self._GCNLayer(
                         hidden_features[hidden_feature_index], num_classes,
-                        add_self_loops, normalize
+                        add_self_loops, normalize,
+                        dropout_list[-1]
                     ))
 
     def decode(self, x: torch.Tensor) -> torch.Tensor:
diff --git a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
index 1f5323f..1e611c7 100644
--- a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
+++ b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
@@ -319,6 +319,8 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         :param dataset:
         :param keep_valid_result: Whether to save the validation result after training
         """
+        import gc
+        gc.collect()
         data = dataset[0]
         self.__train_only(data)
         if keep_valid_result:
@@ -879,6 +881,8 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
         :param dataset:
         :param keep_valid_result: Whether to save the validation result after training
         """
+        import gc
+        gc.collect()
         data = dataset[0]
         self.__train_only(data)
         if keep_valid_result:
@@ -1387,6 +1391,8 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         :param dataset:
         :param keep_valid_result: Whether to save the validation result after training
         """
+        import gc
+        gc.collect()
         data = dataset[0]
         self.__train_only(data)
         if keep_valid_result:
diff --git a/autogl/module/train/sampling/sampler/target_dependant_sampler.py b/autogl/module/train/sampling/sampler/target_dependant_sampler.py
index db51654..a506323 100644
--- a/autogl/module/train/sampling/sampler/target_dependant_sampler.py
+++ b/autogl/module/train/sampling/sampler/target_dependant_sampler.py
@@ -138,7 +138,7 @@ class BasicLayerWiseTargetDependantSampler(TargetDependantSampler):
         if "collate_fn" in kwargs:
             del kwargs["collate_fn"]
         super(BasicLayerWiseTargetDependantSampler, self).__init__(
-            target_nodes_indexes.unique().tolist(),
+            target_nodes_indexes.unique().numpy(),
             batch_size, shuffle, num_workers=num_workers,
             collate_fn=self._collate_fn, **kwargs
         )

From a08843dc3b10a618dca706e10288ba00f7c6bed3 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Fri, 11 Jun 2021 22:03:34 +0800
Subject: [PATCH 078/144] fix performance bugs of gcn on link prediction

---
 autogl/module/model/gcn.py             | 13 +++++--------
 autogl/module/train/link_prediction.py |  2 +-
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/autogl/module/model/gcn.py b/autogl/module/model/gcn.py
index 71ce274..7f279a7 100644
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -1,6 +1,7 @@
 import torch
 import torch.nn.functional
 import torch_geometric
+from torch_geometric.nn import GCNConv
 import typing as _typing
 from . import register_model
 from .base import BaseModel, activate_func, ClassificationModel
@@ -23,23 +24,19 @@ class GCN(torch.nn.Module):
         num_layers: int = len(hidden_features) + 1
         if num_layers == 1:
             self.__convolution_layers.append(
-                torch_geometric.nn.GCNConv(
-                    num_features, num_classes, add_self_loops=False
-                )
+                GCNConv(num_features, num_classes)
             )
         else:
             self.__convolution_layers.append(
-                torch_geometric.nn.GCNConv(
-                    num_features, hidden_features[0], add_self_loops=False
-                )
+                GCNConv(num_features, hidden_features[0])
             )
             for i in range(len(hidden_features)):
                 self.__convolution_layers.append(
-                    torch_geometric.nn.GCNConv(
+                    GCNConv(
                         hidden_features[i], hidden_features[i + 1]
                     )
                     if i + 1 < len(hidden_features)
-                    else torch_geometric.nn.GCNConv(hidden_features[i], num_classes)
+                    else GCNConv(hidden_features[i], num_classes)
                 )
         self.__dropout: float = dropout
         self.__activation_name: str = activation_name
diff --git a/autogl/module/train/link_prediction.py b/autogl/module/train/link_prediction.py
index 5c17047..a444ae3 100644
--- a/autogl/module/train/link_prediction.py
+++ b/autogl/module/train/link_prediction.py
@@ -217,8 +217,8 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
             self.early_stopping(val_loss, self.model.model)
             if self.early_stopping.early_stop:
                 LOGGER.debug("Early stopping at %d", epoch)
-                self.early_stopping.load_checkpoint(self.model.model)
                 break
+        self.early_stopping.load_checkpoint(self.model.model)
 
     def predict_only(self, data, test_mask=None):
         """

From c28d1c3a3bd9b6ec2b15a25fd789f5ae49e8e6f9 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Thu, 17 Jun 2021 16:20:04 +0000
Subject: [PATCH 079/144] fix ensemble bug

---
 autogl/solver/classifier/link_predictor.py    |  13 +-
 benchmark/lp_pyg.py                           | 126 ------------------
 benchmark/lp_reproduce.py                     |  82 ------------
 configs/lp_benchmark.yml                      |  92 +++++++++++++
 configs/lp_gat_benchmark.yml                  |  61 +++++++++
 ...nchmark_small.yml => lp_gcn_benchmark.yml} |   0
 configs/lp_sage_benchmark.yml                 |  69 ++++++++++
 configs/nodeclf_gat_benchmark_large.yml       |   2 +-
 8 files changed, 233 insertions(+), 212 deletions(-)
 delete mode 100644 benchmark/lp_pyg.py
 delete mode 100644 benchmark/lp_reproduce.py
 create mode 100644 configs/lp_benchmark.yml
 create mode 100644 configs/lp_gat_benchmark.yml
 rename configs/{lp_gcn_benchmark_small.yml => lp_gcn_benchmark.yml} (100%)
 create mode 100644 configs/lp_sage_benchmark.yml

diff --git a/autogl/solver/classifier/link_predictor.py b/autogl/solver/classifier/link_predictor.py
index d2ef6b4..040da36 100644
--- a/autogl/solver/classifier/link_predictor.py
+++ b/autogl/solver/classifier/link_predictor.py
@@ -192,6 +192,13 @@ class AutoLinkPredictor(BaseClassifier):
 
         return self
 
+    def _to_prob(self, sig_prob: np.ndarray):
+        nelements = len(sig_prob)
+        prob = np.zeros([nelements, 2])
+        prob[:,0] = 1 - sig_prob
+        prob[:,1] = sig_prob
+        return prob
+
     # pylint: disable=arguments-differ
     def fit(
         self,
@@ -323,7 +330,7 @@ class AutoLinkPredictor(BaseClassifier):
             name = optimized.get_name_with_hp() + "_idx%d" % (idx)
             names.append(name)
             performance_on_valid, _ = optimized.get_valid_score(return_major=False)
-            result_valid.append(optimized.get_valid_predict_proba().cpu().numpy())
+            result_valid.append(self._to_prob(optimized.get_valid_predict_proba().cpu().numpy()))
             self.leaderboard.insert_model_performance(
                 name,
                 dict(
@@ -512,10 +519,10 @@ class AutoLinkPredictor(BaseClassifier):
             names = []
             for model_name in self.trained_models:
                 predict_result.append(
-                    self._predict_proba_by_name(dataset, model_name, mask)
+                    self._to_prob(self._predict_proba_by_name(dataset, model_name, mask))
                 )
                 names.append(model_name)
-            return self.ensemble_module.ensemble(predict_result, names)
+            return self.ensemble_module.ensemble(predict_result, names)[:,1]
 
         if use_ensemble and self.ensemble_module is None:
             LOGGER.warning(
diff --git a/benchmark/lp_pyg.py b/benchmark/lp_pyg.py
deleted file mode 100644
index 32d8db4..0000000
--- a/benchmark/lp_pyg.py
+++ /dev/null
@@ -1,126 +0,0 @@
-import sys
-sys.path.append('../')
-import os.path as osp
-
-import torch
-import torch.nn.functional as F
-from sklearn.metrics import roc_auc_score
-
-from torch_geometric.utils import negative_sampling
-from torch_geometric.nn import GCNConv, GATConv, SAGEConv
-from torch_geometric.utils import train_test_split_edges
-
-import argparse
-import pickle
-import numpy as np
-
-parser = argparse.ArgumentParser()
-parser.add_argument('--model', choices=['gcn', 'sage', 'gat'], type=str, default='gcn', help='model to train')
-parser.add_argument('--dataset', choices=['cora', 'citseer', 'pubmed'], type=str, default='cora', help='dataset to evaluate')
-parser.add_argument('--times', type=int, default=10, help='time to rerun')
-
-args = parser.parse_args()
-
-DIM = 64
-dataset = pickle.load(open(f'/DATA/DATANAS1/guancy/github/AutoGL/env/cache/{args.dataset}-edge.data', 'rb'))
-data = dataset[0]
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-data = data.to(device)
-
-def _decode(z, pos_edge_index, neg_edge_index):
-    edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
-    return (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
-
-class GNN(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-
-    def encode(self, x, edge_index):
-        return self.conv2(self.conv1(x, edge_index).relu(), edge_index)
-
-class GCN(GNN):
-    def __init__(self, in_channels):
-        super().__init__()
-        self.conv1 = GCNConv(in_channels, 128)
-        self.conv2 = GCNConv(128, DIM)
-
-class GAT(GNN):
-    def __init__(self, in_channels):
-        super().__init__()
-        self.conv1 = GATConv(in_channels, 16, 8)
-        self.conv2 = GATConv(128, DIM // 8, 8)
-
-class SAGE(GNN):
-    def __init__(self, in_channels):
-        super().__init__()
-        self.conv1 = SAGEConv(in_channels, 128)
-        self.conv2 = SAGEConv(128, DIM)
-
-MODEL = {
-    'gcn': GCN,
-    'gat': GAT,
-    'sage': SAGE
-}
-
-scores = []
-
-for t in range(args.times):
-
-    model = MODEL[args.model](dataset.num_features).to(device)
-    optimizer = torch.optim.Adam(params=model.parameters(), lr=0.01)
-
-    def get_link_labels(pos_edge_index, neg_edge_index):
-        num_links = pos_edge_index.size(1) + neg_edge_index.size(1)
-        link_labels = torch.zeros(num_links, dtype=torch.float, device=device)
-        link_labels[:pos_edge_index.size(1)] = 1.
-        return link_labels
-
-    def train(data):
-        model.train()
-
-        neg_edge_index = negative_sampling(
-            edge_index=data.train_pos_edge_index, num_nodes=data.num_nodes,
-            num_neg_samples=data.train_pos_edge_index.size(1))
-
-        optimizer.zero_grad()
-        z = model.encode(data.x, data.train_pos_edge_index)
-        link_logits = _decode(z, data.train_pos_edge_index, neg_edge_index)
-        link_labels = get_link_labels(data.train_pos_edge_index, neg_edge_index)
-        loss = F.binary_cross_entropy_with_logits(link_logits, link_labels)
-        loss.backward()
-        optimizer.step()
-
-        return loss
-
-
-    @torch.no_grad()
-    def test(data):
-        model.eval()
-
-        z = model.encode(data.x, data.train_pos_edge_index)
-
-        results = []
-        for prefix in ['val', 'test']:
-            pos_edge_index = data[f'{prefix}_pos_edge_index']
-            neg_edge_index = data[f'{prefix}_neg_edge_index']
-            link_logits = _decode(z, pos_edge_index, neg_edge_index)
-            link_probs = link_logits.sigmoid()
-            link_labels = get_link_labels(pos_edge_index, neg_edge_index)
-            results.append(roc_auc_score(link_labels.cpu(), link_probs.cpu()))
-        return results
-
-
-    best_val_auc = test_auc = 0
-    for epoch in range(1, 101):
-        loss = train(data)
-        val_auc, tmp_test_auc = test(data)
-        if val_auc > best_val_auc:
-            best_val = val_auc
-            test_auc = tmp_test_auc
-        # print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Val: {val_auc:.4f}, '
-        #     f'Test: {test_auc:.4f}')
-
-    scores.append(test_auc)
-    print('time', t, test_auc)
-print('mean', np.mean(scores), 'std', np.std(scores))
-open('lp_pyg.log', 'a').write('\t'.join([args.dataset, args.model, str(np.mean(scores)), str(np.std(scores)), '\n']))
diff --git a/benchmark/lp_reproduce.py b/benchmark/lp_reproduce.py
deleted file mode 100644
index 1409f17..0000000
--- a/benchmark/lp_reproduce.py
+++ /dev/null
@@ -1,82 +0,0 @@
-"""
-Used to reproduce the statistics from pyg
-"""
-
-import sys
-sys.path.append('../')
-import pickle
-import torch
-import argparse
-import numpy as np
-from sklearn.metrics import roc_auc_score
-
-from autogl.module.train import LinkPredictionTrainer
-# Fix data split
-
-parser = argparse.ArgumentParser()
-parser.add_argument('--model', choices=['gcn', 'sage', 'gat'], type=str, default='gcn', help='model to train')
-parser.add_argument('--dataset', choices=['cora', 'citseer', 'pubmed'], type=str, default='cora', help='dataset to evaluate')
-parser.add_argument('--times', type=int, default=10, help='time to rerun')
-
-args = parser.parse_args()
-
-DIM = 64
-dataset = pickle.load(open(f'/DATA/DATANAS1/guancy/github/AutoGL/env/cache/{args.dataset}-edge.data', 'rb'))
-data = dataset[0]
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-data = data.to(device)
-
-HP = {
-    'gcn' : {
-        'num_layers': 3,
-        'hidden': [128, DIM],
-        'dropout': 0.0,
-        'act': 'relu'
-    },
-    'gat' : {
-        'num_layers': 3,
-        'hidden': [16, DIM // 8],
-        'dropout': 0.0,
-        'act': 'relu',
-        'heads': 8
-    },
-    'sage': {
-        'num_layers': 3,
-        'hidden': [128, DIM],
-        'dropout': 0.0,
-        'act': 'relu',
-        'aggr': 'mean'
-    }
-}
-
-scores = []
-
-for t in range(args.times):
-
-    trainer = LinkPredictionTrainer(
-        args.model,
-        num_features=dataset.num_features,
-        lr=0.01,
-        max_epoch=100,
-        early_stopping_round=101,
-        weight_decay=0,
-        device='cuda',
-        init=False,
-        feval='auc',
-        loss="binary_cross_entropy_with_logits",
-    )
-
-    trainer = trainer.duplicate_from_hyper_parameter(HP[args.model], restricted=False)
-    trainer.train([data], keep_valid_result=True)
-    y = trainer.predict([data], 'test')
-    y_ = y.cpu().numpy()
-    
-    pos_edge_index = data[f'test_pos_edge_index']
-    neg_edge_index = data[f'test_neg_edge_index']
-    link_labels = trainer.get_link_labels(pos_edge_index, neg_edge_index)
-    label = link_labels.cpu().numpy()
-    test_auc = roc_auc_score(label, y_)
-    scores.append(test_auc)
-    print('time', t, test_auc)
-print('mean', np.mean(scores), 'std', np.std(scores))
-open('lp_reproduce.log', 'a').write('\t'.join([args.dataset, args.model, str(np.mean(scores)), str(np.std(scores)), '\n']))
diff --git a/configs/lp_benchmark.yml b/configs/lp_benchmark.yml
new file mode 100644
index 0000000..f1ca373
--- /dev/null
+++ b/configs/lp_benchmark.yml
@@ -0,0 +1,92 @@
+ensemble:
+  name: voting
+feature:
+- name: PYGNormalizeFeatures
+hpo:
+  max_evals: 10
+  name: random
+models:
+- hp_space:
+  - feasiblePoints: 2,3
+    parameterName: num_layers
+    type: DISCRETE
+  - cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 2
+    maxValue:
+    - 256
+    - 256
+    minValue:
+    - 64
+    - 64
+    numericalType: INTEGER
+    parameterName: hidden
+    scalingType: LOG
+    type: NUMERICAL_LIST
+  - maxValue: 0.2
+    minValue: 0.0
+    parameterName: dropout
+    scalingType: LINEAR
+    type: DOUBLE
+  - feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    parameterName: act
+    type: CATEGORICAL
+  name: gcn
+- name: gat
+  hp_space:
+  - feasiblePoints: 2,3
+    parameterName: num_layers
+    type: DISCRETE
+  - cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 2
+    maxValue:
+    - 256
+    - 256
+    minValue:
+    - 64
+    - 64
+    numericalType: INTEGER
+    parameterName: hidden
+    scalingType: LOG
+    type: NUMERICAL_LIST
+  - maxValue: 0.2
+    minValue: 0.0
+    parameterName: dropout
+    scalingType: LINEAR
+    type: DOUBLE
+  - feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    parameterName: act
+    type: CATEGORICAL
+trainer:
+  hp_space:
+  - maxValue: 150
+    minValue: 50
+    parameterName: max_epoch
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 40
+    minValue: 25
+    parameterName: early_stopping_round
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 0.05
+    minValue: 0.005
+    parameterName: lr
+    scalingType: LOG
+    type: DOUBLE
+  - maxValue: 1.0E-7
+    minValue: 1.0E-10
+    parameterName: weight_decay
+    scalingType: LOG
+    type: DOUBLE
diff --git a/configs/lp_gat_benchmark.yml b/configs/lp_gat_benchmark.yml
new file mode 100644
index 0000000..abf8e28
--- /dev/null
+++ b/configs/lp_gat_benchmark.yml
@@ -0,0 +1,61 @@
+ensemble:
+  name: null
+feature:
+- name: PYGNormalizeFeatures
+hpo:
+  max_evals: 10
+  name: random
+models:
+- name: gat
+  hp_space:
+  - feasiblePoints: 2,3
+    parameterName: num_layers
+    type: DISCRETE
+  - cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 2
+    maxValue:
+    - 256
+    - 256
+    minValue:
+    - 64
+    - 64
+    numericalType: INTEGER
+    parameterName: hidden
+    scalingType: LOG
+    type: NUMERICAL_LIST
+  - maxValue: 0.2
+    minValue: 0.0
+    parameterName: dropout
+    scalingType: LINEAR
+    type: DOUBLE
+  - feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    parameterName: act
+    type: CATEGORICAL
+trainer:
+  hp_space:
+  - maxValue: 150
+    minValue: 50
+    parameterName: max_epoch
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 40
+    minValue: 25
+    parameterName: early_stopping_round
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 0.05
+    minValue: 0.005
+    parameterName: lr
+    scalingType: LOG
+    type: DOUBLE
+  - maxValue: 1.0E-7
+    minValue: 1.0E-10
+    parameterName: weight_decay
+    scalingType: LOG
+    type: DOUBLE
diff --git a/configs/lp_gcn_benchmark_small.yml b/configs/lp_gcn_benchmark.yml
similarity index 100%
rename from configs/lp_gcn_benchmark_small.yml
rename to configs/lp_gcn_benchmark.yml
diff --git a/configs/lp_sage_benchmark.yml b/configs/lp_sage_benchmark.yml
new file mode 100644
index 0000000..de0b6ab
--- /dev/null
+++ b/configs/lp_sage_benchmark.yml
@@ -0,0 +1,69 @@
+ensemble:
+  name: null
+feature:
+- name: PYGNormalizeFeatures
+hpo:
+  max_evals: 10
+  name: random
+models:
+- name: sage
+  hp_space:
+  - parameterName: num_layers
+    type: DISCRETE
+    feasiblePoints: 2,3
+  
+  - parameterName: hidden
+    type: NUMERICAL_LIST
+    scalingType: LOG
+    numericalType: INTEGER
+    cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 2
+    maxValue:
+    - 256
+    - 256
+    minValue:
+    - 64
+    - 64
+  
+  - parameterName: dropout
+    type: DOUBLE
+    scalingType: LINEAR
+    maxValue: 0.2
+    minValue: 0.0
+    
+  - parameterName: act
+    type: CATEGORICAL
+    feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    
+  - parameterName: agg
+    type: CATEGORICAL
+    feasiblePoints: ["mean", "add", "max"]    
+
+trainer:
+  hp_space:
+  - maxValue: 150
+    minValue: 50
+    parameterName: max_epoch
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 40
+    minValue: 25
+    parameterName: early_stopping_round
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 0.05
+    minValue: 0.005
+    parameterName: lr
+    scalingType: LOG
+    type: DOUBLE
+  - maxValue: 1.0E-7
+    minValue: 1.0E-10
+    parameterName: weight_decay
+    scalingType: LOG
+    type: DOUBLE
diff --git a/configs/nodeclf_gat_benchmark_large.yml b/configs/nodeclf_gat_benchmark_large.yml
index 1b5933f..fe47281 100644
--- a/configs/nodeclf_gat_benchmark_large.yml
+++ b/configs/nodeclf_gat_benchmark_large.yml
@@ -39,7 +39,7 @@ models:
     - tanh
     parameterName: act
     type: CATEGORICAL
-  name: gcn
+  name: gat
 trainer:
   hp_space:
   - maxValue: 400

From 34a452e25be732845a192d94fce7a185bf7df950 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Thu, 17 Jun 2021 16:21:33 +0000
Subject: [PATCH 080/144] black format

---
 autogl/datasets/utils.py                   |  5 +-
 autogl/module/model/__init__.py            |  3 +-
 autogl/module/model/gcn.py                 | 17 ++-----
 autogl/module/train/__init__.py            |  6 ++-
 autogl/module/train/base.py                |  3 +-
 autogl/module/train/evaluation.py          |  8 +++-
 autogl/module/train/link_prediction.py     | 53 ++++++++++++----------
 autogl/solver/classifier/link_predictor.py | 46 ++++++++++++-------
 8 files changed, 83 insertions(+), 58 deletions(-)

diff --git a/autogl/datasets/utils.py b/autogl/datasets/utils.py
index 7b5679c..4885ea0 100644
--- a/autogl/datasets/utils.py
+++ b/autogl/datasets/utils.py
@@ -9,9 +9,12 @@ from sklearn.model_selection import StratifiedKFold, KFold
 def split_edges(dataset, train_ratio, val_ratio):
     datas = [data for data in dataset]
     for i in range(len(datas)):
-        datas[i] = train_test_split_edges(datas[i], val_ratio, 1 - train_ratio - val_ratio)
+        datas[i] = train_test_split_edges(
+            datas[i], val_ratio, 1 - train_ratio - val_ratio
+        )
     dataset.data, dataset.slices = dataset.collate(datas)
 
+
 def get_label_number(dataset):
     r"""Get the number of labels in this dataset as dict."""
     label_num = {}
diff --git a/autogl/module/model/__init__.py b/autogl/module/model/__init__.py
index 42bdbc4..4bff9bb 100644
--- a/autogl/module/model/__init__.py
+++ b/autogl/module/model/__init__.py
@@ -1,7 +1,8 @@
 from ._model_registry import MODEL_DICT, ModelUniversalRegistry, register_model
 from .base import BaseModel
 from .topkpool import AutoTopkpool
-#from .graph_sage import AutoSAGE
+
+# from .graph_sage import AutoSAGE
 from .graphsage import AutoSAGE
 from .gcn import AutoGCN
 from .gat import AutoGAT
diff --git a/autogl/module/model/gcn.py b/autogl/module/model/gcn.py
index 7f279a7..e28bd80 100644
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -23,18 +23,12 @@ class GCN(torch.nn.Module):
         self.__convolution_layers: torch.nn.ModuleList = torch.nn.ModuleList()
         num_layers: int = len(hidden_features) + 1
         if num_layers == 1:
-            self.__convolution_layers.append(
-                GCNConv(num_features, num_classes)
-            )
+            self.__convolution_layers.append(GCNConv(num_features, num_classes))
         else:
-            self.__convolution_layers.append(
-                GCNConv(num_features, hidden_features[0])
-            )
+            self.__convolution_layers.append(GCNConv(num_features, hidden_features[0]))
             for i in range(len(hidden_features)):
                 self.__convolution_layers.append(
-                    GCNConv(
-                        hidden_features[i], hidden_features[i + 1]
-                    )
+                    GCNConv(hidden_features[i], hidden_features[i + 1])
                     if i + 1 < len(hidden_features)
                     else GCNConv(hidden_features[i], num_classes)
                 )
@@ -109,9 +103,8 @@ class GCN(torch.nn.Module):
         return (prob_adj > 0).nonzero(as_tuple=False).t()
 
 
-
-#@register_model("gcn")
-#class AutoGCN(ClassificationModel):
+# @register_model("gcn")
+# class AutoGCN(ClassificationModel):
 @register_model("gcn")
 class AutoGCN(BaseModel):
     r"""
diff --git a/autogl/module/train/__init__.py b/autogl/module/train/__init__.py
index 8e70ff2..a7fe443 100644
--- a/autogl/module/train/__init__.py
+++ b/autogl/module/train/__init__.py
@@ -4,9 +4,10 @@ from .base import (
     Evaluation,
     BaseNodeClassificationTrainer,
     BaseGraphClassificationTrainer,
-    BaseLinkPredictionTrainer
+    BaseLinkPredictionTrainer,
 )
 
+
 def register_trainer(name):
     def register_trainer_cls(cls):
         if name in TRAINER_DICT:
@@ -20,6 +21,7 @@ def register_trainer(name):
 
     return register_trainer_cls
 
+
 from .graph_classification_full import GraphClassificationFullTrainer
 from .node_classification_full import NodeClassificationFullTrainer
 from .link_prediction import LinkPredictionTrainer
@@ -39,5 +41,5 @@ __all__ = [
     "Auc",
     "Logloss",
     "Mrr",
-    "get_feval"
+    "get_feval",
 ]
diff --git a/autogl/module/train/base.py b/autogl/module/train/base.py
index 76c6a61..ba36e74 100644
--- a/autogl/module/train/base.py
+++ b/autogl/module/train/base.py
@@ -403,6 +403,7 @@ class BaseGraphClassificationTrainer(_BaseClassificationTrainer):
             model, num_features, num_classes, device, init, feval, loss
         )
 
+
 class BaseLinkPredictionTrainer(_BaseClassificationTrainer):
     def __init__(
         self,
@@ -417,4 +418,4 @@ class BaseLinkPredictionTrainer(_BaseClassificationTrainer):
     ):
         super(BaseLinkPredictionTrainer, self).__init__(
             model, num_features, 2, device, init, feval, loss
-        )
\ No newline at end of file
+        )
diff --git a/autogl/module/train/evaluation.py b/autogl/module/train/evaluation.py
index d36f708..db16fd3 100644
--- a/autogl/module/train/evaluation.py
+++ b/autogl/module/train/evaluation.py
@@ -120,7 +120,9 @@ class Auc(Evaluation):
         if len(predict.shape) == 1:
             pos_predict = predict
         else:
-            assert predict.shape[1] == 2, "Cannot use auc on given data with %d classes!" % (predict.shape[1])
+            assert (
+                predict.shape[1] == 2
+            ), "Cannot use auc on given data with %d classes!" % (predict.shape[1])
             pos_predict = predict[:, 1]
         return roc_auc_score(label, pos_predict)
 
@@ -169,7 +171,9 @@ class Mrr(Evaluation):
         Should return: the evaluation result (float)
         """
         if len(predict.shape) == 2:
-            assert predict.shape[1] == 2, "Cannot use mrr on given data with %d classes!" % (predict.shape[1])
+            assert (
+                predict.shape[1] == 2
+            ), "Cannot use mrr on given data with %d classes!" % (predict.shape[1])
             pos_predict = predict[:, 1]
         else:
             pos_predict = predict
diff --git a/autogl/module/train/link_prediction.py b/autogl/module/train/link_prediction.py
index a444ae3..56cf3fe 100644
--- a/autogl/module/train/link_prediction.py
+++ b/autogl/module/train/link_prediction.py
@@ -13,6 +13,7 @@ from ...utils import get_logger
 
 LOGGER = get_logger("link prediction trainer")
 
+
 def get_feval(feval):
     if isinstance(feval, str):
         return EVALUATE_DICT[feval]
@@ -65,12 +66,12 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
         max_epoch=100,
         early_stopping_round=101,
         weight_decay=1e-4,
-        device='auto',
+        device="auto",
         init=True,
         feval=[Auc],
         loss="binary_cross_entropy_with_logits",
         *args,
-        **kwargs
+        **kwargs,
     ):
         super().__init__(model, num_features, device, init, feval, loss)
 
@@ -189,19 +190,27 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
             self.model.model.train()
 
             neg_edge_index = negative_sampling(
-                edge_index=data.train_pos_edge_index, num_nodes=data.num_nodes,
-                num_neg_samples=data.train_pos_edge_index.size(1))
+                edge_index=data.train_pos_edge_index,
+                num_nodes=data.num_nodes,
+                num_neg_samples=data.train_pos_edge_index.size(1),
+            )
 
             optimizer.zero_grad()
             # res = self.model.model.forward(data)
             z = self.model.model.encode(data)
-            link_logits = self.model.model.decode(z, data.train_pos_edge_index, neg_edge_index)
-            link_labels = self.get_link_labels(data.train_pos_edge_index, neg_edge_index)
+            link_logits = self.model.model.decode(
+                z, data.train_pos_edge_index, neg_edge_index
+            )
+            link_labels = self.get_link_labels(
+                data.train_pos_edge_index, neg_edge_index
+            )
             # loss = F.binary_cross_entropy_with_logits(link_logits, link_labels)
             if hasattr(F, self.loss):
                 loss = getattr(F, self.loss)(link_logits, link_labels)
             else:
-                raise TypeError("PyTorch does not support loss type {}".format(self.loss))
+                raise TypeError(
+                    "PyTorch does not support loss type {}".format(self.loss)
+                )
 
             loss.backward()
             optimizer.step()
@@ -211,7 +220,7 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
                 feval = self.feval[0]
             else:
                 feval = self.feval
-            val_loss = self.evaluate([data], mask='val', feval=feval)
+            val_loss = self.evaluate([data], mask="val", feval=feval)
             if feval.is_higher_better() is True:
                 val_loss = -val_loss
             self.early_stopping(val_loss, self.model.model)
@@ -261,10 +270,8 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
         self.train_only(data)
         if keep_valid_result:
             self.valid_result = self.predict_only(data)
-            self.valid_result_prob = self.predict_proba(dataset, 'val')
-            self.valid_score = self.evaluate(
-                dataset, mask='val', feval=self.feval
-            )
+            self.valid_result_prob = self.predict_proba(dataset, "val")
+            self.valid_score = self.evaluate(dataset, mask="val", feval=self.feval)
 
     def predict(self, dataset, mask=None):
         """
@@ -304,11 +311,11 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
         data = dataset[0]
         data = data.to(self.device)
         if mask in ["train", "val", "test"]:
-            pos_edge_index = data[f'{mask}_pos_edge_index']
-            neg_edge_index = data[f'{mask}_neg_edge_index']
+            pos_edge_index = data[f"{mask}_pos_edge_index"]
+            neg_edge_index = data[f"{mask}_neg_edge_index"]
         else:
-            pos_edge_index = data[f'test_pos_edge_index']
-            neg_edge_index = data[f'test_neg_edge_index']
+            pos_edge_index = data[f"test_pos_edge_index"]
+            neg_edge_index = data[f"test_neg_edge_index"]
 
         self.model.model.eval()
         with torch.no_grad():
@@ -400,11 +407,11 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
             feval = get_feval(feval)
 
         if mask in ["train", "val", "test"]:
-            pos_edge_index = data[f'{mask}_pos_edge_index']
-            neg_edge_index = data[f'{mask}_neg_edge_index']
+            pos_edge_index = data[f"{mask}_pos_edge_index"]
+            neg_edge_index = data[f"{mask}_neg_edge_index"]
         else:
-            pos_edge_index = data[f'test_pos_edge_index']
-            neg_edge_index = data[f'test_neg_edge_index']
+            pos_edge_index = data[f"test_pos_edge_index"]
+            neg_edge_index = data[f"test_neg_edge_index"]
 
         self.model.model.eval()
         with torch.no_grad():
@@ -480,7 +487,7 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
             feval=self.feval,
             init=True,
             *self.args,
-            **self.kwargs
+            **self.kwargs,
         )
 
         return ret
@@ -507,5 +514,5 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
     def get_link_labels(self, pos_edge_index, neg_edge_index):
         E = pos_edge_index.size(1) + neg_edge_index.size(1)
         link_labels = torch.zeros(E, dtype=torch.float, device=self.device)
-        link_labels[:pos_edge_index.size(1)] = 1.
-        return link_labels
\ No newline at end of file
+        link_labels[: pos_edge_index.size(1)] = 1.0
+        return link_labels
diff --git a/autogl/solver/classifier/link_predictor.py b/autogl/solver/classifier/link_predictor.py
index 040da36..71fe591 100644
--- a/autogl/solver/classifier/link_predictor.py
+++ b/autogl/solver/classifier/link_predictor.py
@@ -195,8 +195,8 @@ class AutoLinkPredictor(BaseClassifier):
     def _to_prob(self, sig_prob: np.ndarray):
         nelements = len(sig_prob)
         prob = np.zeros([nelements, 2])
-        prob[:,0] = 1 - sig_prob
-        prob[:,1] = sig_prob
+        prob[:, 0] = 1 - sig_prob
+        prob[:, 1] = sig_prob
         return prob
 
     # pylint: disable=arguments-differ
@@ -277,14 +277,19 @@ class AutoLinkPredictor(BaseClassifier):
         if train_split is not None and val_split is not None:
             utils.split_edges(dataset, train_split, val_split)
         else:
-            assert all([hasattr(dataset.data, f'{name}') for name in [
-                'train_pos_edge_index', 
-                'train_neg_adj_mask', 
-                'val_pos_edge_index',
-                'val_neg_edge_index', 
-                'test_pos_edge_index', 
-                'test_neg_edge_index'
-            ]]), (
+            assert all(
+                [
+                    hasattr(dataset.data, f"{name}")
+                    for name in [
+                        "train_pos_edge_index",
+                        "train_neg_adj_mask",
+                        "val_pos_edge_index",
+                        "val_neg_edge_index",
+                        "test_pos_edge_index",
+                        "test_neg_edge_index",
+                    ]
+                ]
+            ), (
                 "The dataset has no default train/val split! Please manually pass "
                 "train and val ratio."
             )
@@ -307,7 +312,9 @@ class AutoLinkPredictor(BaseClassifier):
             num_features=self.dataset[0].x.shape[1],
             feval=evaluator_list,
             device=self.runtime_device,
-            loss="binary_cross_entropy_with_logits" if not hasattr(dataset, "loss") else dataset.loss,
+            loss="binary_cross_entropy_with_logits"
+            if not hasattr(dataset, "loss")
+            else dataset.loss,
         )
 
         # train the models and tune hpo
@@ -330,7 +337,9 @@ class AutoLinkPredictor(BaseClassifier):
             name = optimized.get_name_with_hp() + "_idx%d" % (idx)
             names.append(name)
             performance_on_valid, _ = optimized.get_valid_score(return_major=False)
-            result_valid.append(self._to_prob(optimized.get_valid_predict_proba().cpu().numpy()))
+            result_valid.append(
+                self._to_prob(optimized.get_valid_predict_proba().cpu().numpy())
+            )
             self.leaderboard.insert_model_performance(
                 name,
                 dict(
@@ -344,10 +353,13 @@ class AutoLinkPredictor(BaseClassifier):
 
         # fit the ensemble model
         if self.ensemble_module is not None:
-            pos_edge_index, neg_edge_index = self.dataset[0].val_pos_edge_index, self.dataset[0].val_neg_edge_index
+            pos_edge_index, neg_edge_index = (
+                self.dataset[0].val_pos_edge_index,
+                self.dataset[0].val_neg_edge_index,
+            )
             E = pos_edge_index.size(1) + neg_edge_index.size(1)
             link_labels = torch.zeros(E, dtype=torch.float)
-            link_labels[:pos_edge_index.size(1)] = 1.
+            link_labels[: pos_edge_index.size(1)] = 1.0
 
             performance = self.ensemble_module.fit(
                 result_valid,
@@ -519,10 +531,12 @@ class AutoLinkPredictor(BaseClassifier):
             names = []
             for model_name in self.trained_models:
                 predict_result.append(
-                    self._to_prob(self._predict_proba_by_name(dataset, model_name, mask))
+                    self._to_prob(
+                        self._predict_proba_by_name(dataset, model_name, mask)
+                    )
                 )
                 names.append(model_name)
-            return self.ensemble_module.ensemble(predict_result, names)[:,1]
+            return self.ensemble_module.ensemble(predict_result, names)[:, 1]
 
         if use_ensemble and self.ensemble_module is None:
             LOGGER.warning(

From 8cce6ab539bdd97ffb2218f6a7dafd44f5ca73d5 Mon Sep 17 00:00:00 2001
From: generall <general502570@outlook.com>
Date: Fri, 18 Jun 2021 10:34:59 +0800
Subject: [PATCH 081/144] add graphnas macro space

---
 autogl/module/nas/algorithm/rl.py          |   1 +
 autogl/module/nas/space/graph_nas.py       |   3 +
 autogl/module/nas/space/graph_nas_macro.py | 620 +++++++++++++++++++++
 examples/test_graph_nas_rl.py              |   3 +-
 4 files changed, 626 insertions(+), 1 deletion(-)
 create mode 100644 autogl/module/nas/space/graph_nas_macro.py

diff --git a/autogl/module/nas/algorithm/rl.py b/autogl/module/nas/algorithm/rl.py
index 18f92ad..9d38480 100644
--- a/autogl/module/nas/algorithm/rl.py
+++ b/autogl/module/nas/algorithm/rl.py
@@ -373,6 +373,7 @@ class RL(BaseNAS):
         result = self.controller.resample()
         self.arch=self.model.export(result,device=self.device)
         self.selection=result
+
     def export(self):
         self.controller.eval()
         with torch.no_grad():
diff --git a/autogl/module/nas/space/graph_nas.py b/autogl/module/nas/space/graph_nas.py
index 5d16a06..b56c003 100644
--- a/autogl/module/nas/space/graph_nas.py
+++ b/autogl/module/nas/space/graph_nas.py
@@ -56,6 +56,7 @@ class StrModule(nn.Module):
 
     def __repr__(self):
         return '{}({})'.format(self.__class__.__name__,self.str)
+
 def act_map(act):
     if act == "linear":
         return lambda x: x
@@ -75,8 +76,10 @@ def act_map(act):
         return torch.nn.functional.leaky_relu
     else:
         raise Exception("wrong activate function")
+
 def act_map_nn(act):
     return LambdaModule(act_map(act))
+
 def map_nn(l):
     return [StrModule(x) for x in l]
 
diff --git a/autogl/module/nas/space/graph_nas_macro.py b/autogl/module/nas/space/graph_nas_macro.py
new file mode 100644
index 0000000..a59e8bb
--- /dev/null
+++ b/autogl/module/nas/space/graph_nas_macro.py
@@ -0,0 +1,620 @@
+import torch
+import typing as _typ
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .base import BaseSpace
+from ...model import BaseModel
+from .graph_nas import act_map
+
+from torch.nn import Parameter
+from torch_geometric.nn.inits import glorot, zeros
+from torch_geometric.utils import remove_self_loops, add_self_loops, add_remaining_self_loops, softmax
+from torch_scatter import scatter_add
+import torch_scatter
+
+import inspect
+import sys
+
+special_args = [
+    'edge_index', 'edge_index_i', 'edge_index_j', 'size', 'size_i', 'size_j'
+]
+__size_error_msg__ = ('All tensors which should get mapped to the same source '
+                      'or target nodes must be of same size in dimension 0.')
+
+is_python2 = sys.version_info[0] < 3
+getargspec = inspect.getargspec if is_python2 else inspect.getfullargspec
+
+def scatter_(name, src, index, dim_size=None):
+    r"""Aggregates all values from the :attr:`src` tensor at the indices
+    specified in the :attr:`index` tensor along the first dimension.
+    If multiple indices reference the same location, their contributions
+    are aggregated according to :attr:`name` (either :obj:`"add"`,
+    :obj:`"mean"` or :obj:`"max"`).
+
+    Args:
+        name (string): The aggregation to use (:obj:`"add"`, :obj:`"mean"`,
+            :obj:`"max"`).
+        src (Tensor): The source tensor.
+        index (LongTensor): The indices of elements to scatter.
+        dim_size (int, optional): Automatically create output tensor with size
+            :attr:`dim_size` in the first dimension. If set to :attr:`None`, a
+            minimal sized output tensor is returned. (default: :obj:`None`)
+
+    :rtype: :class:`Tensor`
+    """
+
+    assert name in ['add', 'mean', 'max']
+
+    op = getattr(torch_scatter, 'scatter_{}'.format(name))
+    fill_value = -1e9 if name == 'max' else 0
+
+    out = op(src, index, 0, None, dim_size)
+    if isinstance(out, tuple):
+        out = out[0]
+
+    if name == 'max':
+        out[out == fill_value] = 0
+
+    return out
+
+class MessagePassing(torch.nn.Module):
+
+    def __init__(self, aggr='add', flow='source_to_target'):
+        super(MessagePassing, self).__init__()
+
+        self.aggr = aggr
+        assert self.aggr in ['add', 'mean', 'max']
+
+        self.flow = flow
+        assert self.flow in ['source_to_target', 'target_to_source']
+
+        self.__message_args__ = getargspec(self.message)[0][1:]
+        self.__special_args__ = [(i, arg)
+                                 for i, arg in enumerate(self.__message_args__)
+                                 if arg in special_args]
+        self.__message_args__ = [
+            arg for arg in self.__message_args__ if arg not in special_args
+        ]
+        self.__update_args__ = getargspec(self.update)[0][2:]
+
+    def propagate(self, edge_index, size=None, **kwargs):
+        r"""The initial call to start propagating messages.
+
+        Args:
+            edge_index (Tensor): The indices of a general (sparse) assignment
+                matrix with shape :obj:`[N, M]` (can be directed or
+                undirected).
+            size (list or tuple, optional): The size :obj:`[N, M]` of the
+                assignment matrix. If set to :obj:`None`, the size is tried to
+                get automatically inferrred. (default: :obj:`None`)
+            **kwargs: Any additional data which is needed to construct messages
+                and to update node embeddings.
+        """
+
+        size = [None, None] if size is None else list(size)
+        assert len(size) == 2
+
+        i, j = (0, 1) if self.flow == 'target_to_source' else (1, 0)
+        ij = {"_i": i, "_j": j}
+
+        message_args = []
+        for arg in self.__message_args__:
+            if arg[-2:] in ij.keys():
+                tmp = kwargs.get(arg[:-2], None)
+                if tmp is None:  # pragma: no cover
+                    message_args.append(tmp)
+                else:
+                    idx = ij[arg[-2:]]
+                    if isinstance(tmp, tuple) or isinstance(tmp, list):
+                        assert len(tmp) == 2
+                        if tmp[1 - idx] is not None:
+                            if size[1 - idx] is None:
+                                size[1 - idx] = tmp[1 - idx].size(0)
+                            if size[1 - idx] != tmp[1 - idx].size(0):
+                                raise ValueError(__size_error_msg__)
+                        tmp = tmp[idx]
+
+                    if size[idx] is None:
+                        size[idx] = tmp.size(0)
+                    if size[idx] != tmp.size(0):
+                        raise ValueError(__size_error_msg__)
+
+                    tmp = torch.index_select(tmp, 0, edge_index[idx])
+                    message_args.append(tmp)
+            else:
+                message_args.append(kwargs.get(arg, None))
+
+        size[0] = size[1] if size[0] is None else size[0]
+        size[1] = size[0] if size[1] is None else size[1]
+
+        kwargs['edge_index'] = edge_index
+        kwargs['size'] = size
+
+        for (idx, arg) in self.__special_args__:
+            if arg[-2:] in ij.keys():
+                message_args.insert(idx, kwargs[arg[:-2]][ij[arg[-2:]]])
+            else:
+                message_args.insert(idx, kwargs[arg])
+
+        update_args = [kwargs[arg] for arg in self.__update_args__]
+
+        out = self.message(*message_args)
+        if self.aggr in ["add", "mean", "max"]:
+            out = scatter_(self.aggr, out, edge_index[i], dim_size=size[i])
+        else:
+            pass
+        out = self.update(out, *update_args)
+
+        return out
+
+    def message(self, x_j):  # pragma: no cover
+        r"""Constructs messages in analogy to :math:`\phi_{\mathbf{\Theta}}`
+        for each edge in :math:`(i,j) \in \mathcal{E}`.
+        Can take any argument which was initially passed to :meth:`propagate`.
+        In addition, features can be lifted to the source node :math:`i` and
+        target node :math:`j` by appending :obj:`_i` or :obj:`_j` to the
+        variable name, *.e.g.* :obj:`x_i` and :obj:`x_j`."""
+
+        return x_j
+
+    def update(self, aggr_out):  # pragma: no cover
+        r"""Updates node embeddings in analogy to
+        :math:`\gamma_{\mathbf{\Theta}}` for each node
+        :math:`i \in \mathcal{V}`.
+        Takes in the output of aggregation as first argument and any argument
+        which was initially passed to :meth:`propagate`."""
+
+        return aggr_out
+
+class GeoLayer(MessagePassing):
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 heads=1,
+                 concat=True,
+                 negative_slope=0.2,
+                 dropout=0,
+                 bias=True,
+                 att_type="gat",
+                 agg_type="sum",
+                 pool_dim=0):
+        if agg_type in ["sum", "mlp"]:
+            super(GeoLayer, self).__init__('add')
+        elif agg_type in ["mean", "max"]:
+            super(GeoLayer, self).__init__(agg_type)
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.heads = heads
+        self.concat = concat
+        self.negative_slope = negative_slope
+        self.dropout = dropout
+        self.att_type = att_type
+        self.agg_type = agg_type
+
+        # GCN weight
+        self.gcn_weight = None
+
+        self.weight = Parameter(
+            torch.Tensor(in_channels, heads * out_channels))
+        self.att = Parameter(torch.Tensor(1, heads, 2 * out_channels))
+
+        if bias and concat:
+            self.bias = Parameter(torch.Tensor(heads * out_channels))
+        elif bias and not concat:
+            self.bias = Parameter(torch.Tensor(out_channels))
+        else:
+            self.register_parameter('bias', None)
+
+        if self.att_type in ["generalized_linear"]:
+            self.general_att_layer = torch.nn.Linear(out_channels, 1, bias=False)
+
+        if self.agg_type in ["mean", "max", "mlp"]:
+            if pool_dim <= 0:
+                pool_dim = 128
+        self.pool_dim = pool_dim
+        if pool_dim != 0:
+            self.pool_layer = torch.nn.ModuleList()
+            self.pool_layer.append(torch.nn.Linear(self.out_channels, self.pool_dim))
+            self.pool_layer.append(torch.nn.Linear(self.pool_dim, self.out_channels))
+        else:
+            pass
+        self.reset_parameters()
+
+    @staticmethod
+    def norm(edge_index, num_nodes, edge_weight, improved=False, dtype=None):
+        if edge_weight is None:
+            edge_weight = torch.ones((edge_index.size(1), ),
+                                     dtype=dtype,
+                                     device=edge_index.device)
+
+        fill_value = 1 if not improved else 2
+        edge_index, edge_weight = add_remaining_self_loops(
+            edge_index, edge_weight, fill_value, num_nodes)
+
+        row, col = edge_index
+        deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
+        deg_inv_sqrt = deg.pow(-0.5)
+        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
+
+        return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]
+
+    def reset_parameters(self):
+        glorot(self.weight)
+        glorot(self.att)
+        zeros(self.bias)
+
+        if self.att_type in ["generalized_linear"]:
+            glorot(self.general_att_layer.weight)
+
+        if self.pool_dim != 0:
+            for layer in self.pool_layer:
+                glorot(layer.weight)
+                zeros(layer.bias)
+
+    def forward(self, x, edge_index):
+        """"""
+        edge_index, _ = remove_self_loops(edge_index)
+        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))
+        # prepare
+        x = torch.mm(x, self.weight).view(-1, self.heads, self.out_channels)
+        return self.propagate(edge_index, x=x, num_nodes=x.size(0))
+
+    def message(self, x_i, x_j, edge_index, num_nodes):
+
+        if self.att_type == "const":
+            if self.training and self.dropout > 0:
+                x_j = F.dropout(x_j, p=self.dropout, training=True)
+            neighbor = x_j
+        elif self.att_type == "gcn":
+            if self.gcn_weight is None or self.gcn_weight.size(0) != x_j.size(0):  # 对于不同的图gcn_weight需要重新计算
+                _, norm = self.norm(edge_index, num_nodes, None)
+                self.gcn_weight = norm
+            neighbor = self.gcn_weight.view(-1, 1, 1) * x_j
+        else:
+            # Compute attention coefficients.
+            alpha = self.apply_attention(edge_index, num_nodes, x_i, x_j)
+            alpha = softmax(alpha, edge_index[0], num_nodes = num_nodes)
+            # Sample attention coefficients stochastically.
+            if self.training and self.dropout > 0:
+                alpha = F.dropout(alpha, p=self.dropout, training=True)
+
+            neighbor = x_j * alpha.view(-1, self.heads, 1)
+        if self.pool_dim > 0:
+            for layer in self.pool_layer:
+                neighbor = layer(neighbor)
+        return neighbor
+
+    def apply_attention(self, edge_index, num_nodes, x_i, x_j):
+        if self.att_type == "gat":
+            alpha = (torch.cat([x_i, x_j], dim=-1) * self.att).sum(dim=-1)
+            alpha = F.leaky_relu(alpha, self.negative_slope)
+
+        elif self.att_type == "gat_sym":
+            wl = self.att[:, :, :self.out_channels]  # weight left
+            wr = self.att[:, :, self.out_channels:]  # weight right
+            alpha = (x_i * wl).sum(dim=-1) + (x_j * wr).sum(dim=-1)
+            alpha_2 = (x_j * wl).sum(dim=-1) + (x_i * wr).sum(dim=-1)
+            alpha = F.leaky_relu(alpha, self.negative_slope) + F.leaky_relu(alpha_2, self.negative_slope)
+
+        elif self.att_type == "linear":
+            wl = self.att[:, :, :self.out_channels]  # weight left
+            wr = self.att[:, :, self.out_channels:]  # weight right
+            al = x_j * wl
+            ar = x_j * wr
+            alpha = al.sum(dim=-1) + ar.sum(dim=-1)
+            alpha = torch.tanh(alpha)
+        elif self.att_type == "cos":
+            wl = self.att[:, :, :self.out_channels]  # weight left
+            wr = self.att[:, :, self.out_channels:]  # weight right
+            alpha = x_i * wl * x_j * wr
+            alpha = alpha.sum(dim=-1)
+
+        elif self.att_type == "generalized_linear":
+            wl = self.att[:, :, :self.out_channels]  # weight left
+            wr = self.att[:, :, self.out_channels:]  # weight right
+            al = x_i * wl
+            ar = x_j * wr
+            alpha = al + ar
+            alpha = torch.tanh(alpha)
+            alpha = self.general_att_layer(alpha)
+        else:
+            raise Exception("Wrong attention type:", self.att_type)
+        return alpha
+
+    def update(self, aggr_out):
+        if self.concat is True:
+            aggr_out = aggr_out.view(-1, self.heads * self.out_channels)
+        else:
+            aggr_out = aggr_out.mean(dim=1)
+
+        if self.bias is not None:
+            aggr_out = aggr_out + self.bias
+        return aggr_out
+
+    def __repr__(self):
+        return '{}({}, {}, heads={})'.format(self.__class__.__name__,
+                                             self.in_channels,
+                                             self.out_channels, self.heads)
+
+    def get_param_dict(self):
+        params = {}
+        key = f"{self.att_type}_{self.agg_type}_{self.in_channels}_{self.out_channels}_{self.heads}"
+        weight_key = key + "_weight"
+        att_key = key + "_att"
+        agg_key = key + "_agg"
+        bais_key = key + "_bais"
+
+        params[weight_key] = self.weight
+        params[att_key] = self.att
+        params[bais_key] = self.bias
+        if hasattr(self, "pool_layer"):
+            params[agg_key] = self.pool_layer.state_dict()
+
+        return params
+
+    def load_param(self, params):
+        key = f"{self.att_type}_{self.agg_type}_{self.in_channels}_{self.out_channels}_{self.heads}"
+        weight_key = key + "_weight"
+        att_key = key + "_att"
+        agg_key = key + "_agg"
+        bais_key = key + "_bais"
+
+        if weight_key in params:
+            self.weight = params[weight_key]
+
+        if att_key in params:
+            self.att = params[att_key]
+
+        if bais_key in params:
+            self.bias = params[bais_key]
+
+        if agg_key in params and hasattr(self, "pool_layer"):
+            self.pool_layer.load_state_dict(params[agg_key])
+
+class StrModule(nn.Module):
+    def __init__(self, lambd):
+        super().__init__()
+        self.str = lambd
+
+    def forward(self, *args,**kwargs):
+        return self.str  
+
+    def __repr__(self):
+        return '{}({})'.format(self.__class__.__name__,self.str)
+
+def map_nn(l):
+    return [StrModule(x) for x in l]
+
+class GraphNasMacroNodeClfSpace(BaseSpace):
+    def __init__(
+        self,
+        hidden_dim: _typ.Optional[int] = 64,
+        layer_number: _typ.Optional[int] = 2,
+        dropout: _typ.Optional[float] = 0.9,
+        input_dim: _typ.Optional[int] = None,
+        output_dim: _typ.Optional[int] = None,
+        ops: _typ.Tuple = None,
+        init: bool = False,
+        search_act_con=False
+    ):
+        super().__init__()
+        self.layer_number = layer_number
+        self.hidden_dim = hidden_dim
+        self.input_dim = input_dim
+        self.output_dim = output_dim
+        self.ops = ops
+        self.dropout = dropout
+        self.search_act_con=search_act_con
+
+    def _instantiate(
+        self,
+        hidden_dim: _typ.Optional[int] = None,
+        layer_number: _typ.Optional[int] = None,
+        input_dim: _typ.Optional[int] = None,
+        output_dim: _typ.Optional[int] = None,
+        ops: _typ.Tuple = None,
+        dropout = None
+    ):
+        self.hidden_dim = hidden_dim or self.hidden_dim
+        self.layer_number = layer_number or self.layer_number
+        self.input_dim = input_dim or self.input_dim
+        self.output_dim = output_dim or self.output_dim
+        self.ops = ops or self.ops
+        self.dropout = dropout or self.dropout
+
+        num_feat = self.input_dim
+        num_label = self.output_dim
+
+        layer_nums = self.layer_number
+        state_num = 5
+
+        # build hidden layer
+        for i in range(layer_nums):
+            # extract layer information
+            setattr(self,f"attention_{i}",self.setLayerChoice(i * state_num + 0, map_nn(["gat", "gcn", "cos", "const", "gat_sym", 'linear', 'generalized_linear']), key = f"attention_{i}"))
+            setattr(self,f"aggregator_{i}",self.setLayerChoice(i * state_num + 1, map_nn(["sum", "mean", "max", "mlp", ]), key = f"aggregator_{i}"))
+            setattr(self,f"act_{i}",self.setLayerChoice(i * state_num + 0, map_nn(["sigmoid", "tanh", "relu", "linear",
+                                      "softplus", "leaky_relu", "relu6", "elu"]), key=f"act_{i}"))
+            setattr(self,f"head_{i}",self.setLayerChoice(i * state_num + 0, map_nn([1, 2, 4, 6, 8, 16]), key= f"head_{i}"))
+            if i < layer_nums - 1:
+                setattr(self,f"out_channels_{i}",self.setLayerChoice(i * state_num + 0, map_nn([4, 8, 16, 32, 64, 128, 256]), key=f"out_channels_{i}"))
+
+    def export(self, selection, device) -> BaseModel:
+        sel_list = []
+        for i in range(self.layer_number):
+            sel_list.append(["gat", "gcn", "cos", "const", "gat_sym", 'linear', 'generalized_linear'][selection[f"attention_{i}"]])
+            sel_list.append(["sum", "mean", "max", "mlp", ][selection[f"aggregator_{i}"]])
+            sel_list.append(["sigmoid", "tanh", "relu", "linear","softplus", "leaky_relu", "relu6", "elu"][selection[f"act_{i}"]])
+            sel_list.append([1, 2, 4, 6, 8, 16][selection[f"head_{i}"]])
+            if i < self.layer_number - 1:
+                sel_list.append([4, 8, 16, 32, 64, 128, 256][selection[f"out_channels_{i}"]])
+        sel_list.append(self.output_dim)
+        model = ModelBox(device, sel_list, self.input_dim, self.output_dim, self.dropout, multi_label=False, batch_normal=False, layers = self.layer_number)
+        return model
+
+class ModelBox(BaseModel):
+    def __init__(self, device, *args, **kwargs):
+        super().__init__(init=True)
+        self.init = True
+        self.space = []
+        self.hyperparams = {}
+        space_model = GraphNet(*args, **kwargs)
+        self._model = space_model.to(device)
+        self.num_features = self._model.num_feat
+        self.num_classes = self._model.num_label
+        self.params = {"num_class": self.num_classes, "features_num": self.num_features}
+        self.device = device
+
+    def to(self, device):
+        if isinstance(device, (str, torch.device)):
+            self.device = device
+        return super().to(device)
+
+    def forward(self, *args, **kwargs):
+        return self._model(*args, **kwargs)
+
+    def from_hyper_parameter(self, hp):
+        """
+        receive no hp, just copy self and reset the learnable parameters.
+        """
+
+        ret_self = deepcopy(self)
+        ret_self._model.instantiate()
+        apply_fixed_architecture(ret_self._model, ret_self.selection, verbose=False)
+        ret_self.to(self.device)
+        return ret_self
+
+    @property
+    def model(self):
+        return self._model
+
+class GraphNet(BaseModel):
+
+    def __init__(self, actions, num_feat, num_label, drop_out=0.6, multi_label=False, batch_normal=True, state_num=5,
+                 residual=False, layers = 2):
+        self.residual = residual
+        self.batch_normal = batch_normal
+        self.layer_nums = layers
+        self.multi_label = multi_label
+        self.num_feat = num_feat
+        self.num_label = num_label
+        self.dropout = drop_out
+        
+        super().__init__()
+        self.build_model(actions, batch_normal, drop_out, num_feat, num_label, state_num)
+
+    def build_model(self, actions, batch_normal, drop_out, num_feat, num_label, state_num):
+        if self.residual:
+            self.fcs = torch.nn.ModuleList()
+        if self.batch_normal:
+            self.bns = torch.nn.ModuleList()
+        self.layers = torch.nn.ModuleList()
+        self.acts = []
+        self.gates = torch.nn.ModuleList()
+        self.build_hidden_layers(actions, batch_normal, drop_out, self.layer_nums, num_feat, num_label, state_num)
+
+    def build_hidden_layers(self, actions, batch_normal, drop_out, layer_nums, num_feat, num_label, state_num=6):
+
+        # build hidden layer
+        for i in range(layer_nums):
+
+            if i == 0:
+                in_channels = num_feat
+            else:
+                in_channels = out_channels * head_num
+
+            # extract layer information
+            attention_type = actions[i * state_num + 0]
+            aggregator_type = actions[i * state_num + 1]
+            act = actions[i * state_num + 2]
+            head_num = actions[i * state_num + 3]
+            out_channels = actions[i * state_num + 4]
+            concat = True
+            if i == layer_nums - 1:
+                concat = False
+            if self.batch_normal:
+                self.bns.append(torch.nn.BatchNorm1d(in_channels, momentum=0.5))
+            self.layers.append(
+                GeoLayer(in_channels, out_channels, head_num, concat, dropout=self.dropout,
+                         att_type=attention_type, agg_type=aggregator_type, ))
+            self.acts.append(act_map(act))
+            if self.residual:
+                if concat:
+                    self.fcs.append(torch.nn.Linear(in_channels, out_channels * head_num))
+                else:
+                    self.fcs.append(torch.nn.Linear(in_channels, out_channels))
+
+    def forward(self, data):
+        output, edge_index_all = data.x, data.edge_index # x [2708,1433] ,[2, 10556]
+        if self.residual:
+            for i, (act, layer, fc) in enumerate(zip(self.acts, self.layers, self.fcs)):
+                output = F.dropout(output, p=self.dropout, training=self.training)
+                if self.batch_normal:
+                    output = self.bns[i](output)
+
+                output = act(layer(output, edge_index_all) + fc(output))
+        else:
+            for i, (act, layer) in enumerate(zip(self.acts, self.layers)):
+                output = F.dropout(output, p=self.dropout, training=self.training)
+                if self.batch_normal:
+                    output = self.bns[i](output)
+                output = act(layer(output, edge_index_all))
+        if not self.multi_label:
+            output = F.log_softmax(output, dim=1)
+        return output
+
+    def __repr__(self):
+        result_lines = ""
+        for each in self.layers:
+            result_lines += str(each)
+        return result_lines
+
+    @staticmethod
+    def merge_param(old_param, new_param, update_all):
+        for key in new_param:
+            if update_all or key not in old_param:
+                old_param[key] = new_param[key]
+        return old_param
+
+    def get_param_dict(self, old_param=None, update_all=True):
+        if old_param is None:
+            result = {}
+        else:
+            result = old_param
+        for i in range(self.layer_nums):
+            key = "layer_%d" % i
+            new_param = self.layers[i].get_param_dict()
+            if key in result:
+                new_param = self.merge_param(result[key], new_param, update_all)
+                result[key] = new_param
+            else:
+                result[key] = new_param
+        if self.residual:
+            for i, fc in enumerate(self.fcs):
+                key = f"layer_{i}_fc_{fc.weight.size(0)}_{fc.weight.size(1)}"
+                result[key] = self.fcs[i]
+        if self.batch_normal:
+            for i, bn in enumerate(self.bns):
+                key = f"layer_{i}_fc_{bn.weight.size(0)}"
+                result[key] = self.bns[i]
+        return result
+
+    def load_param(self, param):
+        if param is None:
+            return
+
+        for i in range(self.layer_nums):
+            self.layers[i].load_param(param["layer_%d" % i])
+
+        if self.residual:
+            for i, fc in enumerate(self.fcs):
+                key = f"layer_{i}_fc_{fc.weight.size(0)}_{fc.weight.size(1)}"
+                if key in param:
+                    self.fcs[i] = param[key]
+        if self.batch_normal:
+            for i, bn in enumerate(self.bns):
+                key = f"layer_{i}_fc_{bn.weight.size(0)}"
+                if key in param:
+                    self.bns[i] = param[key]
diff --git a/examples/test_graph_nas_rl.py b/examples/test_graph_nas_rl.py
index 5ec5480..439888a 100644
--- a/examples/test_graph_nas_rl.py
+++ b/examples/test_graph_nas_rl.py
@@ -7,6 +7,7 @@ from autogl.solver import AutoNodeClassifier
 from autogl.module.train import NodeClassificationFullTrainer
 from autogl.module.nas import Darts, OneShotEstimator
 from autogl.module.nas.space.graph_nas import GraphNasNodeClassificationSpace
+from autogl.module.nas.space.graph_nas_macro import GraphNasMacroNodeClfSpace
 from autogl.module.train import Acc
 from autogl.module.nas.algorithm.enas import Enas
 from autogl.module.nas.algorithm.rl import RL
@@ -33,7 +34,7 @@ if __name__ == '__main__':
             lr_scheduler_type=None,),
         nas_algorithms=[RL(num_epochs=400)],
         #nas_algorithms=[Darts(num_epochs=200)],
-        nas_spaces=[GraphNasNodeClassificationSpace(hidden_dim=16,search_act_con=True,layer_number=2)],
+        nas_spaces=[GraphNasMacroNodeClfSpace(hidden_dim=16,search_act_con=True,layer_number=2)],
         nas_estimators=[TrainEstimator()]
     )
     solver.fit(dataset)

From 57d01800e2d022285ae5e07ac85fda62d94dc913 Mon Sep 17 00:00:00 2001
From: wondergo2017 <wondergo2017@gmail.com>
Date: Fri, 18 Jun 2021 06:21:10 +0000
Subject: [PATCH 082/144] add random search;achieve 0.82 in 400 epochs

---
 autogl/module/nas/algorithm/random_search.py | 81 ++++++++++++++++++++
 examples/test_graph_nas_rl.py                |  4 +-
 2 files changed, 84 insertions(+), 1 deletion(-)
 create mode 100644 autogl/module/nas/algorithm/random_search.py

diff --git a/autogl/module/nas/algorithm/random_search.py b/autogl/module/nas/algorithm/random_search.py
new file mode 100644
index 0000000..797c6dd
--- /dev/null
+++ b/autogl/module/nas/algorithm/random_search.py
@@ -0,0 +1,81 @@
+import copy
+import logging
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .base import BaseNAS
+from ..space import BaseSpace
+from ..utils import AverageMeterGroup, replace_layer_choice, replace_input_choice, get_module_order, sort_replaced_module
+from nni.nas.pytorch.fixed import apply_fixed_architecture
+from tqdm import tqdm
+_logger = logging.getLogger(__name__)
+from .rl import PathSamplingLayerChoice,PathSamplingInputChoice
+import numpy as np
+class RSBox:
+    '''get selection space for model `space` '''
+    def __init__(self,space):
+        self.model = space
+        self.nas_modules = []
+        k2o = get_module_order(self.model)
+        replace_layer_choice(self.model, PathSamplingLayerChoice, self.nas_modules)
+        replace_input_choice(self.model, PathSamplingInputChoice, self.nas_modules)
+        self.nas_modules = sort_replaced_module(k2o, self.nas_modules) 
+        nm=self.nas_modules
+        selection_range={}
+        for k,v in nm:
+            selection_range[k]=len(v)
+        self.selection_dict=selection_range
+        
+        
+        space_size=np.prod(list(selection_range.values()))
+        print(f'Using random search Box. Total space size: {space_size}')
+        print('Searching Space:',selection_range)
+    def export(self):
+        return self.selection_dict #{k:v}, means action ranges 0 to v-1 for layer named k
+    def sample(self):
+        # uniformly sample
+        selection={}
+        sdict=self.export()
+        for k,v in sdict.items():
+            selection[k]=np.random.choice(range(v))
+        return selection
+
+class RandomSearch(BaseNAS):
+    '''
+    uniformly search
+    '''
+    def __init__(self, device='cuda',num_epochs=400,disable_progress=False,*args,**kwargs):
+        super().__init__(device)
+        self.num_epochs=num_epochs
+        self.disable_progress=disable_progress
+    def search(self, space: BaseSpace, dset, estimator):
+        self.estimator=estimator
+        self.dataset=dset
+        self.space=space
+        self.box=RSBox(self.space)
+        arch_perfs=[]
+        cache={}
+        with tqdm(range(self.num_epochs),disable=self.disable_progress) as bar:
+            for i in bar:
+                selection=self.export() 
+                # print(selection)
+                vec=tuple(list(selection.values()))
+                if vec not in cache:
+                    self.arch=space.export(selection,self.device)
+                    metric,loss=self._infer(mask='val')
+                    arch_perfs.append([metric,selection])
+                    cache[vec]=metric
+                bar.set_postfix(acc=metric,max_acc=max(cache.values()))
+        selection=arch_perfs[np.argmax([x[0] for x in arch_perfs])][1]
+        arch=space.export(selection,self.device)
+        return arch 
+    
+    def export(self):
+        arch=self.box.sample()
+        return arch
+
+    def _infer(self,mask='train'):
+        metric, loss = self.estimator.infer(self.arch, self.dataset,mask=mask)
+        return metric, loss
diff --git a/examples/test_graph_nas_rl.py b/examples/test_graph_nas_rl.py
index 439888a..b5d4363 100644
--- a/examples/test_graph_nas_rl.py
+++ b/examples/test_graph_nas_rl.py
@@ -12,6 +12,7 @@ from autogl.module.train import Acc
 from autogl.module.nas.algorithm.enas import Enas
 from autogl.module.nas.algorithm.rl import RL
 from autogl.module.nas.estimator.one_shot import TrainEstimator
+from autogl.module.nas.algorithm.random_search import RandomSearch
 import logging
 if __name__ == '__main__':
     logging.getLogger().setLevel(logging.WARNING)
@@ -32,7 +33,8 @@ if __name__ == '__main__':
             feval=['acc'],
             loss="nll_loss",
             lr_scheduler_type=None,),
-        nas_algorithms=[RL(num_epochs=400)],
+        # nas_algorithms=[RL(num_epochs=400)],
+        nas_algorithms=[RandomSearch(num_epochs=400)],
         #nas_algorithms=[Darts(num_epochs=200)],
         nas_spaces=[GraphNasMacroNodeClfSpace(hidden_dim=16,search_act_con=True,layer_number=2)],
         nas_estimators=[TrainEstimator()]

From aeef16fcca02486cefa0776856e0b29d968ce866 Mon Sep 17 00:00:00 2001
From: wondergo2017 <wondergo2017@gmail.com>
Date: Fri, 18 Jun 2021 10:56:42 +0000
Subject: [PATCH 083/144] add graphnasrl

---
 autogl/module/nas/algorithm/rl.py          | 163 +++++++++++++++++++--
 autogl/module/nas/estimator/one_shot.py    |   6 +-
 autogl/module/nas/space/graph_nas_macro.py |   2 +-
 examples/test_graph_nas_rl.py              |   6 +-
 4 files changed, 158 insertions(+), 19 deletions(-)

diff --git a/autogl/module/nas/algorithm/rl.py b/autogl/module/nas/algorithm/rl.py
index 9d38480..3f011c0 100644
--- a/autogl/module/nas/algorithm/rl.py
+++ b/autogl/module/nas/algorithm/rl.py
@@ -11,6 +11,8 @@ from ..space import BaseSpace
 from ..utils import AverageMeterGroup, replace_layer_choice, replace_input_choice, get_module_order, sort_replaced_module
 from nni.nas.pytorch.fixed import apply_fixed_architecture
 from tqdm import tqdm
+from datetime import datetime
+
 _logger = logging.getLogger(__name__)
 def _get_mask(sampled, total):
     multihot = [i == sampled or (isinstance(sampled, list) and i in sampled) for i in range(total)]
@@ -229,7 +231,7 @@ class ReinforceController(nn.Module):
 
 class RL(BaseNAS):
     """
-    ENAS trainer.
+    RL in GraphNas.
 
     Parameters
     ----------
@@ -293,7 +295,7 @@ class RL(BaseNAS):
         self.n_warmup=n_warmup
         self.model_lr = model_lr
         self.model_wd = model_wd
-        self.log=open('log.txt','w')
+        self.log=open('../tmp/log.txt','w')
     def search(self, space: BaseSpace, dset, estimator):
         self.model = space
         self.dataset = dset#.to(self.device)
@@ -318,16 +320,6 @@ class RL(BaseNAS):
         with tqdm(range(self.num_epochs)) as bar:
             for i in bar:
                 l2=self._train_controller(i)
-
-                # try:
-                #     l2=self._train_controller(i)
-                # except Exception as e:
-                #     print(e)
-                #     nm=self.nas_modules
-                #     for i in range(len(nm)):
-                #         print(nm[i][1].sampled)
-                #     # import pdb
-                #     # pdb.set_trace()
                 bar.set_postfix(reward_controller=l2)
         
         selection=self.export()
@@ -382,3 +374,150 @@ class RL(BaseNAS):
     def _infer(self,mask='train'):
         metric, loss = self.estimator.infer(self.arch, self.dataset,mask=mask)
         return metric, loss
+
+
+class GraphNasRL(BaseNAS):
+    """
+    RL in GraphNas.
+
+    Parameters
+    ----------
+    model : nn.Module
+        PyTorch model to be trained.
+    loss : callable
+        Receives logits and ground truth label, return a loss tensor.
+    metrics : callable
+        Receives logits and ground truth label, return a dict of metrics.
+    reward_function : callable
+        Receives logits and ground truth label, return a tensor, which will be feeded to RL controller as reward.
+    optimizer : Optimizer
+        The optimizer used for optimizing the model.
+    num_epochs : int
+        Number of epochs planned for training.
+    dataset : Dataset
+        Dataset for training. Will be split for training weights and architecture weights.
+    batch_size : int
+        Batch size.
+    workers : int
+        Workers for data loading.
+    device : torch.device
+        ``torch.device("cpu")`` or ``torch.device("cuda")``.
+    log_frequency : int
+        Step count per logging.
+    grad_clip : float
+        Gradient clipping. Set to 0 to disable. Default: 5.
+    entropy_weight : float
+        Weight of sample entropy loss.
+    skip_weight : float
+        Weight of skip penalty loss.
+    baseline_decay : float
+        Decay factor of baseline. New baseline will be equal to ``baseline_decay * baseline_old + reward * (1 - baseline_decay)``.
+    ctrl_lr : float
+        Learning rate for RL controller.
+    ctrl_steps_aggregate : int
+        Number of steps that will be aggregated into one mini-batch for RL controller.
+    ctrl_steps : int
+        Number of mini-batches for each epoch of RL controller learning.
+    ctrl_kwargs : dict
+        Optional kwargs that will be passed to :class:`ReinforceController`.
+    """
+
+    def __init__(self, device='cuda', workers=4,log_frequency=None,
+                 grad_clip=5., entropy_weight=0.0001, skip_weight=0, baseline_decay=0.95,
+                 ctrl_lr=0.00035, ctrl_steps_aggregate=100, ctrl_kwargs=None,n_warmup=100,model_lr=5e-3,model_wd=5e-4,*args,**kwargs):
+        super().__init__(device)
+        self.device=device
+        self.num_epochs = kwargs.get("num_epochs", 10)
+        self.workers = workers
+        self.log_frequency = log_frequency
+        self.entropy_weight = entropy_weight
+        self.skip_weight = skip_weight
+        self.baseline_decay = baseline_decay
+        self.ctrl_steps_aggregate = ctrl_steps_aggregate
+        self.grad_clip = grad_clip
+        self.workers = workers
+        self.ctrl_kwargs=ctrl_kwargs
+        self.ctrl_lr=ctrl_lr
+        self.n_warmup=n_warmup
+        self.model_lr = model_lr
+        self.model_wd = model_wd
+        timestamp=datetime.now().strftime('%m%d-%H-%M-%S')
+        self.log=open(f'../tmp/log-{timestamp}.txt','w')
+    def search(self, space: BaseSpace, dset, estimator):
+        self.model = space
+        self.dataset = dset#.to(self.device)
+        self.estimator = estimator    
+        # replace choice
+        self.nas_modules = []
+
+        k2o = get_module_order(self.model)
+        replace_layer_choice(self.model, PathSamplingLayerChoice, self.nas_modules)
+        replace_input_choice(self.model, PathSamplingInputChoice, self.nas_modules)
+        self.nas_modules = sort_replaced_module(k2o, self.nas_modules)
+
+        # to device
+        self.model = self.model.to(self.device)
+        # fields
+        self.nas_fields = [ReinforceField(name, len(module),
+                                          isinstance(module, PathSamplingLayerChoice) or module.n_chosen == 1)
+                           for name, module in self.nas_modules]
+        self.controller = ReinforceController(self.nas_fields,lstm_size=100,temperature=5.0,tanh_constant=2.5, **(self.ctrl_kwargs or {}))
+        self.ctrl_optim = torch.optim.Adam(self.controller.parameters(), lr=self.ctrl_lr)
+        # train
+        with tqdm(range(self.num_epochs)) as bar:
+            for i in bar:
+                l2=self._train_controller(i)
+                bar.set_postfix(reward_controller=l2)
+        
+        selection=self.export()
+        arch=space.export(selection,self.device)
+        print(selection,arch)
+        return arch
+    
+    def _train_controller(self, epoch):
+        self.model.eval()
+        self.controller.train()
+        self.ctrl_optim.zero_grad()
+        rewards=[]
+        baseline=None
+        with tqdm(range(self.ctrl_steps_aggregate)) as bar:
+            for ctrl_step in bar:
+                self._resample()
+                metric,loss=self._infer(mask='val')
+
+                bar.set_postfix(acc=metric,loss=loss.item())
+                self.log.write(f'{self.arch}\n{self.selection}\n{metric},{loss}\n')
+                self.log.flush()
+                reward =metric 
+                rewards.append(reward)
+                
+                if self.entropy_weight:
+                    reward += self.entropy_weight * self.controller.sample_entropy.item()
+
+                if not baseline:
+                    baseline= reward
+                else:
+                    baseline = baseline * self.baseline_decay + reward * (1 - self.baseline_decay)
+
+                loss = self.controller.sample_log_prob * (reward - baseline)
+                self.ctrl_optim.zero_grad()
+                loss.backward()
+        
+                self.ctrl_optim.step()
+                
+                bar.set_postfix(acc=metric,max_acc=max(rewards))
+        return sum(rewards)/len(rewards)
+
+    def _resample(self):
+        result = self.controller.resample()
+        self.arch=self.model.export(result,device=self.device)
+        self.selection=result
+
+    def export(self):
+        self.controller.eval()
+        with torch.no_grad():
+            return self.controller.resample()
+
+    def _infer(self,mask='train'):
+        metric, loss = self.estimator.infer(self.arch, self.dataset,mask=mask)
+        return metric, loss
\ No newline at end of file
diff --git a/autogl/module/nas/estimator/one_shot.py b/autogl/module/nas/estimator/one_shot.py
index 4964947..9fc33be 100644
--- a/autogl/module/nas/estimator/one_shot.py
+++ b/autogl/module/nas/estimator/one_shot.py
@@ -31,9 +31,9 @@ class TrainEstimator(BaseEstimator):
         self.trainer=NodeClassificationFullTrainer(
                     model=model,
                     optimizer=torch.optim.Adam,
-                    lr=0.01,
-                    max_epoch=200,
-                    early_stopping_round=200,
+                    lr=0.005,
+                    max_epoch=300,
+                    early_stopping_round=30,
                     weight_decay=5e-4,
                     device="auto",
                     init=False,
diff --git a/autogl/module/nas/space/graph_nas_macro.py b/autogl/module/nas/space/graph_nas_macro.py
index a59e8bb..963c13a 100644
--- a/autogl/module/nas/space/graph_nas_macro.py
+++ b/autogl/module/nas/space/graph_nas_macro.py
@@ -392,7 +392,7 @@ class GraphNasMacroNodeClfSpace(BaseSpace):
         self,
         hidden_dim: _typ.Optional[int] = 64,
         layer_number: _typ.Optional[int] = 2,
-        dropout: _typ.Optional[float] = 0.9,
+        dropout: _typ.Optional[float] = 0.6,
         input_dim: _typ.Optional[int] = None,
         output_dim: _typ.Optional[int] = None,
         ops: _typ.Tuple = None,
diff --git a/examples/test_graph_nas_rl.py b/examples/test_graph_nas_rl.py
index b5d4363..55e47db 100644
--- a/examples/test_graph_nas_rl.py
+++ b/examples/test_graph_nas_rl.py
@@ -10,7 +10,7 @@ from autogl.module.nas.space.graph_nas import GraphNasNodeClassificationSpace
 from autogl.module.nas.space.graph_nas_macro import GraphNasMacroNodeClfSpace
 from autogl.module.train import Acc
 from autogl.module.nas.algorithm.enas import Enas
-from autogl.module.nas.algorithm.rl import RL
+from autogl.module.nas.algorithm.rl import RL,GraphNasRL
 from autogl.module.nas.estimator.one_shot import TrainEstimator
 from autogl.module.nas.algorithm.random_search import RandomSearch
 import logging
@@ -25,7 +25,7 @@ if __name__ == '__main__':
         default_trainer=NodeClassificationFullTrainer(
             optimizer=torch.optim.Adam,
             lr=0.01,
-            max_epoch=200,
+            max_epoch=300,
             early_stopping_round=200,
             weight_decay=5e-4,
             device="auto",
@@ -34,7 +34,7 @@ if __name__ == '__main__':
             loss="nll_loss",
             lr_scheduler_type=None,),
         # nas_algorithms=[RL(num_epochs=400)],
-        nas_algorithms=[RandomSearch(num_epochs=400)],
+        nas_algorithms=[GraphNasRL(num_epochs=100)],
         #nas_algorithms=[Darts(num_epochs=200)],
         nas_spaces=[GraphNasMacroNodeClfSpace(hidden_dim=16,search_act_con=True,layer_number=2)],
         nas_estimators=[TrainEstimator()]

From 5c70d9e6ff66cafab97dd73f3e4d058b4f646168 Mon Sep 17 00:00:00 2001
From: wondergo2017 <wondergo2017@gmail.com>
Date: Fri, 18 Jun 2021 12:10:24 +0000
Subject: [PATCH 084/144] refine hyperparam, achieve 0.838 in val at best

---
 autogl/module/nas/algorithm/rl.py | 19 ++++++++++++++-----
 examples/test_graph_nas_rl.py     |  6 +++---
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/autogl/module/nas/algorithm/rl.py b/autogl/module/nas/algorithm/rl.py
index 3f011c0..9fa5a90 100644
--- a/autogl/module/nas/algorithm/rl.py
+++ b/autogl/module/nas/algorithm/rl.py
@@ -375,7 +375,6 @@ class RL(BaseNAS):
         metric, loss = self.estimator.infer(self.arch, self.dataset,mask=mask)
         return metric, loss
 
-
 class GraphNasRL(BaseNAS):
     """
     RL in GraphNas.
@@ -424,7 +423,7 @@ class GraphNasRL(BaseNAS):
 
     def __init__(self, device='cuda', workers=4,log_frequency=None,
                  grad_clip=5., entropy_weight=0.0001, skip_weight=0, baseline_decay=0.95,
-                 ctrl_lr=0.00035, ctrl_steps_aggregate=100, ctrl_kwargs=None,n_warmup=100,model_lr=5e-3,model_wd=5e-4,*args,**kwargs):
+                 ctrl_lr=0.00035, ctrl_steps_aggregate=100, ctrl_kwargs=None,n_warmup=100,model_lr=5e-3,model_wd=5e-4,topk=2,*args,**kwargs):
         super().__init__(device)
         self.device=device
         self.num_epochs = kwargs.get("num_epochs", 10)
@@ -443,6 +442,8 @@ class GraphNasRL(BaseNAS):
         self.model_wd = model_wd
         timestamp=datetime.now().strftime('%m%d-%H-%M-%S')
         self.log=open(f'../tmp/log-{timestamp}.txt','w')
+        self.hist=[]
+        self.topk=topk
     def search(self, space: BaseSpace, dset, estimator):
         self.model = space
         self.dataset = dset#.to(self.device)
@@ -469,7 +470,9 @@ class GraphNasRL(BaseNAS):
                 l2=self._train_controller(i)
                 bar.set_postfix(reward_controller=l2)
         
-        selection=self.export()
+        # selection=self.export()
+        # diff: graphnas use top 5 models, can evaluate 20 times epoch and choose the best. we just choose the top1.
+        selection=self.hist[0][1] 
         arch=space.export(selection,self.device)
         print(selection,arch)
         return arch
@@ -480,15 +483,21 @@ class GraphNasRL(BaseNAS):
         self.ctrl_optim.zero_grad()
         rewards=[]
         baseline=None
+        # diff: graph nas train 100 and derive 100 for every epoch(10 epochs), we just train 100(20 epochs). totol num of samples are same (2000)
         with tqdm(range(self.ctrl_steps_aggregate)) as bar:
             for ctrl_step in bar:
                 self._resample()
                 metric,loss=self._infer(mask='val')
 
-                bar.set_postfix(acc=metric,loss=loss.item())
+                # bar.set_postfix(acc=metric,loss=loss.item())
                 self.log.write(f'{self.arch}\n{self.selection}\n{metric},{loss}\n')
                 self.log.flush()
-                reward =metric 
+                # diff: not do reward shaping as in graphnas code
+                reward =metric
+                self.hist.append([-metric,self.selection])
+                if len(self.hist)>=self.topk:
+                    self.hist.sort(key=lambda x:x[0])
+                    self.hist.pop()
                 rewards.append(reward)
                 
                 if self.entropy_weight:
diff --git a/examples/test_graph_nas_rl.py b/examples/test_graph_nas_rl.py
index 55e47db..dd3f464 100644
--- a/examples/test_graph_nas_rl.py
+++ b/examples/test_graph_nas_rl.py
@@ -24,9 +24,9 @@ if __name__ == '__main__':
         ensemble_module=None,
         default_trainer=NodeClassificationFullTrainer(
             optimizer=torch.optim.Adam,
-            lr=0.01,
+            lr=0.005,
             max_epoch=300,
-            early_stopping_round=200,
+            early_stopping_round=30,
             weight_decay=5e-4,
             device="auto",
             init=False,
@@ -34,7 +34,7 @@ if __name__ == '__main__':
             loss="nll_loss",
             lr_scheduler_type=None,),
         # nas_algorithms=[RL(num_epochs=400)],
-        nas_algorithms=[GraphNasRL(num_epochs=100)],
+        nas_algorithms=[GraphNasRL(num_epochs=20)],
         #nas_algorithms=[Darts(num_epochs=200)],
         nas_spaces=[GraphNasMacroNodeClfSpace(hidden_dim=16,search_act_con=True,layer_number=2)],
         nas_estimators=[TrainEstimator()]

From 9815e39d82a76741873a10112f692ac87152e174 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sat, 19 Jun 2021 06:19:23 +0000
Subject: [PATCH 085/144] fix tuple bugs, add lp to solver __init__

---
 autogl/solver/__init__.py                    | 9 +++++++--
 autogl/solver/classifier/__init__.py         | 8 +++++++-
 autogl/solver/classifier/graph_classifier.py | 2 +-
 autogl/solver/classifier/link_predictor.py   | 8 ++++++--
 autogl/solver/classifier/node_classifier.py  | 2 +-
 5 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/autogl/solver/__init__.py b/autogl/solver/__init__.py
index 54172f8..0fae590 100644
--- a/autogl/solver/__init__.py
+++ b/autogl/solver/__init__.py
@@ -2,7 +2,12 @@
 Auto solver for various graph tasks
 """
 
-from .classifier import AutoGraphClassifier, AutoNodeClassifier
+from .classifier import AutoGraphClassifier, AutoNodeClassifier, AutoLinkPredictor
 from .utils import Leaderboard
 
-__all__ = ["AutoNodeClassifier", "AutoGraphClassifier", "Leaderboard"]
+__all__ = [
+    "AutoNodeClassifier",
+    "AutoGraphClassifier",
+    "AutoLinkPredictor",
+    "Leaderboard",
+]
diff --git a/autogl/solver/classifier/__init__.py b/autogl/solver/classifier/__init__.py
index fc74cd6..e30c582 100644
--- a/autogl/solver/classifier/__init__.py
+++ b/autogl/solver/classifier/__init__.py
@@ -5,5 +5,11 @@ Auto classifier for classification problems.
 from .base import BaseClassifier
 from .graph_classifier import AutoGraphClassifier
 from .node_classifier import AutoNodeClassifier
+from .link_predictor import AutoLinkPredictor
 
-__all__ = ["BaseClassifier", "AutoGraphClassifier", "AutoNodeClassifier"]
+__all__ = [
+    "BaseClassifier",
+    "AutoGraphClassifier",
+    "AutoNodeClassifier",
+    "AutoLinkPredictor",
+]
diff --git a/autogl/solver/classifier/graph_classifier.py b/autogl/solver/classifier/graph_classifier.py
index 7427e13..0ceb15d 100644
--- a/autogl/solver/classifier/graph_classifier.py
+++ b/autogl/solver/classifier/graph_classifier.py
@@ -111,7 +111,7 @@ class AutoGraphClassifier(BaseClassifier):
     ) -> "AutoGraphClassifier":
         # load graph network module
         self.graph_model_list = []
-        if isinstance(graph_models, list):
+        if isinstance(graph_models, (list, tuple)):
             for model in graph_models:
                 if isinstance(model, str):
                     if model in MODEL_DICT:
diff --git a/autogl/solver/classifier/link_predictor.py b/autogl/solver/classifier/link_predictor.py
index 71fe591..17b110e 100644
--- a/autogl/solver/classifier/link_predictor.py
+++ b/autogl/solver/classifier/link_predictor.py
@@ -105,7 +105,7 @@ class AutoLinkPredictor(BaseClassifier):
     ) -> "AutoLinkPredictor":
         # load graph network module
         self.graph_model_list = []
-        if isinstance(graph_models, list):
+        if isinstance(graph_models, (list, tuple)):
             for model in graph_models:
                 if isinstance(model, str):
                     if model in MODEL_DICT:
@@ -577,6 +577,7 @@ class AutoLinkPredictor(BaseClassifier):
         use_best=True,
         name=None,
         mask="test",
+        threshold=0.5,
     ) -> np.ndarray:
         """
         Predict the node class number.
@@ -611,6 +612,9 @@ class AutoLinkPredictor(BaseClassifier):
         mask: str
             The data split to give prediction on. Default ``test``.
 
+        threshold: float
+            The threshold to judge whether the edges are positive or not.
+
         Returns
         -------
         result: np.ndarray
@@ -620,7 +624,7 @@ class AutoLinkPredictor(BaseClassifier):
         proba = self.predict_proba(
             dataset, inplaced, inplace, use_ensemble, use_best, name, mask
         )
-        return np.argmax(proba, axis=1)
+        return (proba > threshold).astype("int")
 
     @classmethod
     def from_config(cls, path_or_dict, filetype="auto") -> "AutoLinkPredictor":
diff --git a/autogl/solver/classifier/node_classifier.py b/autogl/solver/classifier/node_classifier.py
index cd0ed86..79a882f 100644
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -105,7 +105,7 @@ class AutoNodeClassifier(BaseClassifier):
     ) -> "AutoNodeClassifier":
         # load graph network module
         self.graph_model_list = []
-        if isinstance(graph_models, list):
+        if isinstance(graph_models, (list, tuple)):
             for model in graph_models:
                 if isinstance(model, str):
                     if model in MODEL_DICT:

From 0b75bd920c28f8a8e40172cf1356db9393d9ae5f Mon Sep 17 00:00:00 2001
From: wondergo2017 <wondergo2017@gmail.com>
Date: Sat, 19 Jun 2021 14:33:38 +0000
Subject: [PATCH 086/144] achieve 0.839 for cora test dataset

---
 examples/test_graph_nas_rl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/test_graph_nas_rl.py b/examples/test_graph_nas_rl.py
index dd3f464..fbe1052 100644
--- a/examples/test_graph_nas_rl.py
+++ b/examples/test_graph_nas_rl.py
@@ -26,7 +26,7 @@ if __name__ == '__main__':
             optimizer=torch.optim.Adam,
             lr=0.005,
             max_epoch=300,
-            early_stopping_round=30,
+            early_stopping_round=20,
             weight_decay=5e-4,
             device="auto",
             init=False,

From 776ba82b26464580cbf0d79a184ea6d8fa635e72 Mon Sep 17 00:00:00 2001
From: wondergo2017 <wondergo2017@gmail.com>
Date: Sun, 20 Jun 2021 06:40:19 +0000
Subject: [PATCH 087/144] add topk and  reevaluate to choose best

---
 autogl/module/nas/algorithm/rl.py | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/autogl/module/nas/algorithm/rl.py b/autogl/module/nas/algorithm/rl.py
index 9fa5a90..9f152a9 100644
--- a/autogl/module/nas/algorithm/rl.py
+++ b/autogl/module/nas/algorithm/rl.py
@@ -12,7 +12,7 @@ from ..utils import AverageMeterGroup, replace_layer_choice, replace_input_choic
 from nni.nas.pytorch.fixed import apply_fixed_architecture
 from tqdm import tqdm
 from datetime import datetime
-
+import numpy as np
 _logger = logging.getLogger(__name__)
 def _get_mask(sampled, total):
     multihot = [i == sampled or (isinstance(sampled, list) and i in sampled) for i in range(total)]
@@ -423,7 +423,7 @@ class GraphNasRL(BaseNAS):
 
     def __init__(self, device='cuda', workers=4,log_frequency=None,
                  grad_clip=5., entropy_weight=0.0001, skip_weight=0, baseline_decay=0.95,
-                 ctrl_lr=0.00035, ctrl_steps_aggregate=100, ctrl_kwargs=None,n_warmup=100,model_lr=5e-3,model_wd=5e-4,topk=2,*args,**kwargs):
+                 ctrl_lr=0.00035, ctrl_steps_aggregate=100, ctrl_kwargs=None,n_warmup=100,model_lr=5e-3,model_wd=5e-4,topk=5,*args,**kwargs):
         super().__init__(device)
         self.device=device
         self.num_epochs = kwargs.get("num_epochs", 10)
@@ -471,12 +471,29 @@ class GraphNasRL(BaseNAS):
                 bar.set_postfix(reward_controller=l2)
         
         # selection=self.export()
-        # diff: graphnas use top 5 models, can evaluate 20 times epoch and choose the best. we just choose the top1.
-        selection=self.hist[0][1] 
+        
+        selections=[x[1] for x in self.hist]
+        candidiate_accs=[-x[0] for x in self.hist]
+        print('candidiate accuracies',candidiate_accs)
+        selection=self._choose_best(selections)
         arch=space.export(selection,self.device)
         print(selection,arch)
         return arch
-    
+    def _choose_best(self,selections):
+        # graphnas use top 5 models, can evaluate 20 times epoch and choose the best.
+        results=[]
+        for selection in selections:
+            accs=[]
+            for i in tqdm(range(20)):
+                self.arch=self.model.export(selection,device=self.device)
+                metric,loss=self._infer(mask='val')
+                accs.append(metric)
+            result=np.mean(accs) 
+            print('selection {} \n acc {:.4f} +- {:.4f}'.format(selection,np.mean(accs),np.std(accs)/np.sqrt(20)))
+            results.append(result)
+        best_selection=selections[np.argmax(results)]
+        return best_selection
+            
     def _train_controller(self, epoch):
         self.model.eval()
         self.controller.train()
@@ -495,7 +512,7 @@ class GraphNasRL(BaseNAS):
                 # diff: not do reward shaping as in graphnas code
                 reward =metric
                 self.hist.append([-metric,self.selection])
-                if len(self.hist)>=self.topk:
+                if len(self.hist)>self.topk:
                     self.hist.sort(key=lambda x:x[0])
                     self.hist.pop()
                 rewards.append(reward)

From bcc6d555a963e9e9b044a9335d85f38b96851416 Mon Sep 17 00:00:00 2001
From: generall <general502570@outlook.com>
Date: Mon, 21 Jun 2021 11:38:59 +0800
Subject: [PATCH 088/144] add init

---
 autogl/module/nas/algorithm/__init__.py      | 42 +++++++++++++++++-
 autogl/module/nas/estimator/__init__.py      | 45 +++++++++++++++++---
 autogl/module/nas/estimator/one_shot.py      | 23 ----------
 autogl/module/nas/estimator/train_scratch.py | 29 +++++++++++++
 autogl/module/nas/space/__init__.py          | 42 +++++++++++++++++-
 5 files changed, 151 insertions(+), 30 deletions(-)
 create mode 100644 autogl/module/nas/estimator/train_scratch.py

diff --git a/autogl/module/nas/algorithm/__init__.py b/autogl/module/nas/algorithm/__init__.py
index 537f223..eacc45b 100644
--- a/autogl/module/nas/algorithm/__init__.py
+++ b/autogl/module/nas/algorithm/__init__.py
@@ -2,8 +2,48 @@
 NAS algorithms
 """
 
+import importlib
+import os
 from .base import BaseNAS
+
+NAS_ALGO_DICT = {}
+
+def register_nas_algo(name):
+    def register_nas_algo_cls(cls):
+        if name in NAS_ALGO_DICT:
+            raise ValueError("Cannot register duplicate NAS algorithm ({})".format(name))
+        if not issubclass(cls, BaseNAS):
+            raise ValueError(
+                "Model ({}: {}) must extend NAS algorithm".format(name, cls.__name__)
+            )
+        NAS_ALGO_DICT[name] = cls
+        return cls
+
+    return register_nas_algo_cls
+
 from .darts import Darts
 from .enas import Enas
+from .random_search import RandomSearch
+from .rl import RL, GraphNasRL
+
+def build_nas_algo_from_name(name: str) -> BaseNAS:
+    """
+    Parameters
+    ----------
+    name: ``str``
+        the name of nas algorithm.
+
+    Returns
+    -------
+    BaseNAS:
+        the NAS algorithm built using default parameters
+
+    Raises
+    ------
+    AssertionError
+        If an invalid name is passed in
+    """
+    assert name in NAS_ALGO_DICT, "HPO module do not have name " + name
+    return NAS_ALGO_DICT[name]()
 
-__all__ = ["BaseNAS", "Darts", "Enas"]
+__all__ = ["BaseNAS", "Darts", "Enas", "RandomSearch", "RL", "GraphNasRL"]
diff --git a/autogl/module/nas/estimator/__init__.py b/autogl/module/nas/estimator/__init__.py
index 9184f64..e768aa2 100644
--- a/autogl/module/nas/estimator/__init__.py
+++ b/autogl/module/nas/estimator/__init__.py
@@ -1,8 +1,43 @@
-"""
-NAS Estimator
-"""
-
+import importlib
+import os
 from .base import BaseEstimator
+
+NAS_ESTIMATOR_DICT = {}
+
+def register_nas_estimator(name):
+    def register_nas_estimator_cls(cls):
+        if name in NAS_ESTIMATOR_DICT:
+            raise ValueError("Cannot register duplicate NAS estimator ({})".format(name))
+        if not issubclass(cls, BaseEstimator):
+            raise ValueError(
+                "Model ({}: {}) must extend NAS estimator".format(name, cls.__name__)
+            )
+        NAS_ESTIMATOR_DICT[name] = cls
+        return cls
+
+    return register_nas_estimator_cls
+
 from .one_shot import OneShotEstimator
+from .train_scratch import TrainEstimator
+
+def build_nas_estimator_from_name(name: str) -> BaseEstimator:
+    """
+    Parameters
+    ----------
+    name: ``str``
+        the name of nas estimator.
+
+    Returns
+    -------
+    BaseEstimator:
+        the NAS estimator built using default parameters
+
+    Raises
+    ------
+    AssertionError
+        If an invalid name is passed in
+    """
+    assert name in NAS_ESTIMATOR_DICT, "HPO module do not have name " + name
+    return NAS_ESTIMATOR_DICT[name]()
 
-__all__ = ["BaseEstimator", "OneShotEstimator"]
+__all__ = ["BaseEstimator", "OneShotEstimator", "TrainEstimator"]
diff --git a/autogl/module/nas/estimator/one_shot.py b/autogl/module/nas/estimator/one_shot.py
index 9fc33be..e43695c 100644
--- a/autogl/module/nas/estimator/one_shot.py
+++ b/autogl/module/nas/estimator/one_shot.py
@@ -20,26 +20,3 @@ class OneShotEstimator(BaseEstimator):
         loss = F.nll_loss(pred, y)
         acc=sum(pred.max(1)[1]==y).item()/y.size(0)
         return acc, loss
-
-from autogl.module.train import NodeClassificationFullTrainer
-class TrainEstimator(BaseEstimator):
-    def __init__(self):
-        self.estimator=OneShotEstimator()
-    def infer(self,model: BaseSpace, dataset, mask="train"):
-        # self.trainer.model=model
-        # self.trainer.device=model.device
-        self.trainer=NodeClassificationFullTrainer(
-                    model=model,
-                    optimizer=torch.optim.Adam,
-                    lr=0.005,
-                    max_epoch=300,
-                    early_stopping_round=30,
-                    weight_decay=5e-4,
-                    device="auto",
-                    init=False,
-                    feval=['acc'],
-                    loss="nll_loss",
-                    lr_scheduler_type=None)
-        self.trainer.train(dataset)
-        with torch.no_grad():
-            return self.estimator.infer(model,dataset,mask)
diff --git a/autogl/module/nas/estimator/train_scratch.py b/autogl/module/nas/estimator/train_scratch.py
new file mode 100644
index 0000000..437af5f
--- /dev/null
+++ b/autogl/module/nas/estimator/train_scratch.py
@@ -0,0 +1,29 @@
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..space import BaseSpace
+from .base import BaseEstimator
+import torch
+
+from autogl.module.train import NodeClassificationFullTrainer
+class TrainEstimator(BaseEstimator):
+    def __init__(self):
+        self.estimator=OneShotEstimator()
+    def infer(self,model: BaseSpace, dataset, mask="train"):
+        # self.trainer.model=model
+        # self.trainer.device=model.device
+        self.trainer=NodeClassificationFullTrainer(
+                    model=model,
+                    optimizer=torch.optim.Adam,
+                    lr=0.005,
+                    max_epoch=300,
+                    early_stopping_round=30,
+                    weight_decay=5e-4,
+                    device="auto",
+                    init=False,
+                    feval=['acc'],
+                    loss="nll_loss",
+                    lr_scheduler_type=None)
+        self.trainer.train(dataset)
+        with torch.no_grad():
+            return self.estimator.infer(model,dataset,mask)
diff --git a/autogl/module/nas/space/__init__.py b/autogl/module/nas/space/__init__.py
index b8f7aa8..9618048 100644
--- a/autogl/module/nas/space/__init__.py
+++ b/autogl/module/nas/space/__init__.py
@@ -1,4 +1,44 @@
+import importlib
+import os
 from .base import BaseSpace
+
+NAS_SPACE_DICT = {}
+
+def register_nas_space(name):
+    def register_nas_space_cls(cls):
+        if name in NAS_SPACE_DICT:
+            raise ValueError("Cannot register duplicate NAS space ({})".format(name))
+        if not issubclass(cls, BaseSpace):
+            raise ValueError(
+                "Model ({}: {}) must extend NAS space".format(name, cls.__name__)
+            )
+        NAS_SPACE_DICT[name] = cls
+        return cls
+
+    return register_nas_space_cls
+
+from .graph_nas_macro import GraphNasMacroNodeClfSpace
+from .graph_nas import GraphNasNodeClassificationSpace
 from .single_path import SinglePathNodeClassificationSpace
 
-__all__ = ["BaseSpace", "SinglePathNodeClassificationSpace"]
+def build_nas_space_from_name(name: str) -> BaseSpace:
+    """
+    Parameters
+    ----------
+    name: ``str``
+        the name of nas space.
+
+    Returns
+    -------
+    BaseSpace:
+        the NAS space built using default parameters
+
+    Raises
+    ------
+    AssertionError
+        If an invalid name is passed in
+    """
+    assert name in NAS_SPACE_DICT, "HPO module do not have name " + name
+    return NAS_SPACE_DICT[name]()
+
+__all__ = ["BaseSpace", "GraphNasMacroNodeClfSpace", "GraphNasNodeClassificationSpace", "SinglePathNodeClassificationSpace"]

From 2838eaa106e879c6cc6bdfc684de9ba685435849 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Mon, 21 Jun 2021 04:09:46 +0000
Subject: [PATCH 089/144] fix logs

---
 autogl/module/nas/algorithm/rl.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/autogl/module/nas/algorithm/rl.py b/autogl/module/nas/algorithm/rl.py
index 9f152a9..a81da43 100644
--- a/autogl/module/nas/algorithm/rl.py
+++ b/autogl/module/nas/algorithm/rl.py
@@ -13,6 +13,7 @@ from nni.nas.pytorch.fixed import apply_fixed_architecture
 from tqdm import tqdm
 from datetime import datetime
 import numpy as np
+
 _logger = logging.getLogger(__name__)
 def _get_mask(sampled, total):
     multihot = [i == sampled or (isinstance(sampled, list) and i in sampled) for i in range(total)]
@@ -295,7 +296,6 @@ class RL(BaseNAS):
         self.n_warmup=n_warmup
         self.model_lr = model_lr
         self.model_wd = model_wd
-        self.log=open('../tmp/log.txt','w')
     def search(self, space: BaseSpace, dset, estimator):
         self.model = space
         self.dataset = dset#.to(self.device)
@@ -337,8 +337,7 @@ class RL(BaseNAS):
                 self._resample()
                 metric,loss=self._infer(mask='val')
                 bar.set_postfix(acc=metric,loss=loss.item())
-                self.log.write(f'{self.arch}\n{self.selection}\n{metric},{loss}\n')
-                self.log.flush()
+                _logger.debug(f'{self.arch}\n{self.selection}\n{metric},{loss}')
                 reward =metric 
                 rewards.append(reward)
                 if self.entropy_weight:
@@ -440,8 +439,6 @@ class GraphNasRL(BaseNAS):
         self.n_warmup=n_warmup
         self.model_lr = model_lr
         self.model_wd = model_wd
-        timestamp=datetime.now().strftime('%m%d-%H-%M-%S')
-        self.log=open(f'../tmp/log-{timestamp}.txt','w')
         self.hist=[]
         self.topk=topk
     def search(self, space: BaseSpace, dset, estimator):
@@ -507,8 +504,7 @@ class GraphNasRL(BaseNAS):
                 metric,loss=self._infer(mask='val')
 
                 # bar.set_postfix(acc=metric,loss=loss.item())
-                self.log.write(f'{self.arch}\n{self.selection}\n{metric},{loss}\n')
-                self.log.flush()
+                _logger.debug(f'{self.arch}\n{self.selection}\n{metric},{loss}')
                 # diff: not do reward shaping as in graphnas code
                 reward =metric
                 self.hist.append([-metric,self.selection])

From 2b251671bf19fbb7670c123249721212e2d517d3 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Mon, 21 Jun 2021 04:16:19 +0000
Subject: [PATCH 090/144] move lp solver

---
 examples/link_prediction.py        | 134 +++++++++++++++++++----------
 examples/link_prediction_solver.py |  90 -------------------
 2 files changed, 87 insertions(+), 137 deletions(-)
 delete mode 100644 examples/link_prediction_solver.py

diff --git a/examples/link_prediction.py b/examples/link_prediction.py
index 331ec31..4de4393 100644
--- a/examples/link_prediction.py
+++ b/examples/link_prediction.py
@@ -1,53 +1,93 @@
-import os.path as osp
 import sys
-sys.path.insert(0, '../')
-import torch
+
+sys.path.append("../")
 from autogl.datasets import build_dataset_from_name
-from autogl.module.train import LinkPredictionTrainer
+from autogl.solver.classifier.link_predictor import AutoLinkPredictor
+from autogl.module.train.evaluation import Auc
+import yaml
+import random
+import torch
 import numpy as np
-from torch_geometric.utils import train_test_split_edges
-from sklearn.metrics import roc_auc_score
-
-dataset = build_dataset_from_name('cora')
-
-print('len', len(dataset))
-print('num_class', dataset.num_classes)
-print('num_node_features', dataset.num_node_features)
-
-a = []
-for _ in range(10):
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    data = dataset[0]
-
-    data = data.to(device)
-    data.train_mask = data.val_mask = data.test_mask = data.y = None
-    data = train_test_split_edges(data)
-
-    clf = LinkPredictionTrainer(
-        'gcn',
-        num_features=dataset.num_node_features,
-        num_classes=dataset.num_classes,
-        max_epoch=100,
-        early_stopping_round=101,
-        feval=['auc'],
-        lr=0.01,
-        weight_decay=0,
-        lr_scheduler_type=None,
+
+if __name__ == "__main__":
+
+    from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
+
+    parser = ArgumentParser(
+        "auto link prediction", formatter_class=ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "--dataset",
+        default="cora",
+        type=str,
+        help="dataset to use",
+        choices=[
+            "cora",
+            "pubmed",
+            "citeseer",
+            "coauthor_cs",
+            "coauthor_physics",
+            "amazon_computers",
+            "amazon_photo",
+        ],
+    )
+    parser.add_argument(
+        "--configs",
+        type=str,
+        default="../configs/lp_gcn_benchmark.yml",
+        help="config to use",
+    )
+    # following arguments will override parameters in the config file
+    parser.add_argument("--hpo", type=str, default="tpe", help="hpo methods")
+    parser.add_argument(
+        "--max_eval", type=int, default=50, help="max hpo evaluation times"
     )
-    clf.train([data], keep_valid_result=True)
-    print(clf.valid_score, end=',')
-    y = clf.predict([data], 'test')
-    y_ = y.cpu().numpy()
-    # acc_ = y.eq(data.y[data.test_mask]).sum().item() / data.test_mask.sum().item()
-    # print(acc_, end=',')
-
-    pos_edge_index = data[f'test_pos_edge_index']
-    neg_edge_index = data[f'test_neg_edge_index']
-    link_labels = clf.get_link_labels(pos_edge_index, neg_edge_index)
-    label = link_labels.cpu().numpy()
-    ret = roc_auc_score(label, y_)
-    print(ret)
-    a.append(ret)
-print(np.mean(a), np.std(a))
+    parser.add_argument("--seed", type=int, default=0, help="random seed")
+    parser.add_argument("--device", default=0, type=int, help="GPU device")
 
+    args = parser.parse_args()
+    if torch.cuda.is_available():
+        torch.cuda.set_device(args.device)
+    seed = args.seed
+    # set random seed
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
 
+    dataset = build_dataset_from_name(args.dataset)
+
+    configs = yaml.load(open(args.configs, "r").read(), Loader=yaml.FullLoader)
+    configs["hpo"]["name"] = args.hpo
+    configs["hpo"]["max_evals"] = args.max_eval
+    autoClassifier = AutoLinkPredictor.from_config(configs)
+
+    # train
+    autoClassifier.fit(
+        dataset,
+        time_limit=3600,
+        evaluation_method=[Auc],
+        seed=seed,
+        train_split=0.85,
+        val_split=0.05,
+    )
+    autoClassifier.get_leaderboard().show()
+
+    # test
+    predict_result = autoClassifier.predict_proba()
+
+    pos_edge_index, neg_edge_index = (
+        dataset[0].test_pos_edge_index,
+        dataset[0].test_neg_edge_index,
+    )
+    E = pos_edge_index.size(1) + neg_edge_index.size(1)
+    link_labels = torch.zeros(E)
+    link_labels[: pos_edge_index.size(1)] = 1.0
+
+    print(
+        "test auc: %.4f"
+        % (Auc.evaluate(predict_result, link_labels.detach().cpu().numpy()))
+    )
diff --git a/examples/link_prediction_solver.py b/examples/link_prediction_solver.py
deleted file mode 100644
index a2051d7..0000000
--- a/examples/link_prediction_solver.py
+++ /dev/null
@@ -1,90 +0,0 @@
-import sys
-
-sys.path.append("../")
-from autogl.datasets import build_dataset_from_name
-from autogl.solver.classifier.link_predictor import AutoLinkPredictor
-from autogl.module.train.evaluation import Auc
-import yaml
-import random
-import torch
-import numpy as np
-
-if __name__ == "__main__":
-
-    from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
-
-    parser = ArgumentParser(
-        "auto link prediction", formatter_class=ArgumentDefaultsHelpFormatter
-    )
-    parser.add_argument(
-        "--dataset",
-        default="cora",
-        type=str,
-        help="dataset to use",
-        choices=[
-            "cora",
-            "pubmed",
-            "citeseer",
-            "coauthor_cs",
-            "coauthor_physics",
-            "amazon_computers",
-            "amazon_photo",
-        ],
-    )
-    parser.add_argument(
-        "--configs",
-        type=str,
-        default="../configs/lp_gcn_benchmark_small.yml",
-        help="config to use",
-    )
-    # following arguments will override parameters in the config file
-    parser.add_argument("--hpo", type=str, default="tpe", help="hpo methods")
-    parser.add_argument(
-        "--max_eval", type=int, default=50, help="max hpo evaluation times"
-    )
-    parser.add_argument("--seed", type=int, default=0, help="random seed")
-    parser.add_argument("--device", default=0, type=int, help="GPU device")
-
-    args = parser.parse_args()
-    if torch.cuda.is_available():
-        torch.cuda.set_device(args.device)
-    seed = args.seed
-    # set random seed
-    random.seed(seed)
-    np.random.seed(seed)
-    torch.manual_seed(seed)
-    if torch.cuda.is_available():
-        torch.cuda.manual_seed(seed)
-        torch.backends.cudnn.deterministic = True
-        torch.backends.cudnn.benchmark = False
-
-    dataset = build_dataset_from_name(args.dataset)
-
-    configs = yaml.load(open(args.configs, "r").read(), Loader=yaml.FullLoader)
-    configs["hpo"]["name"] = args.hpo
-    configs["hpo"]["max_evals"] = args.max_eval
-    autoClassifier = AutoLinkPredictor.from_config(configs)
-
-    # train
-    autoClassifier.fit(
-        dataset,
-        time_limit=3600,
-        evaluation_method=[Auc],
-        seed=seed,
-        train_split=0.85,
-        val_split=0.05,
-    )
-    autoClassifier.get_leaderboard().show()
-
-    # test
-    predict_result = autoClassifier.predict_proba()
-
-    pos_edge_index, neg_edge_index = dataset[0].test_pos_edge_index, dataset[0].test_neg_edge_index
-    E = pos_edge_index.size(1) + neg_edge_index.size(1)
-    link_labels = torch.zeros(E)
-    link_labels[:pos_edge_index.size(1)] = 1.
-
-    print(
-        "test auc: %.4f"
-        % (Auc.evaluate(predict_result, link_labels.detach().cpu().numpy()))
-    )

From 793578ea3664cdb000aa24d127d9292561a4c0d0 Mon Sep 17 00:00:00 2001
From: generall <general502570@outlook.com>
Date: Mon, 21 Jun 2021 13:52:11 +0800
Subject: [PATCH 091/144] improve ordered mutables

---
 autogl/module/nas/space/base.py | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/autogl/module/nas/space/base.py b/autogl/module/nas/space/base.py
index 4c38584..7ae693a 100644
--- a/autogl/module/nas/space/base.py
+++ b/autogl/module/nas/space/base.py
@@ -8,14 +8,16 @@ class OrderedMutable():
         self.order = order
 
 class OrderedLayerChoice(OrderedMutable, mutables.LayerChoice):
-    def __init__(self, order, *args, **kwargs):
+    def __init__(self, order, op_candidates, reduction="sum", return_mask=False, key=None):
         OrderedMutable.__init__(self, order)
-        mutables.LayerChoice.__init__(self, *args, **kwargs)
+        mutables.LayerChoice.__init__(self, op_candidates, reduction, return_mask, key)
 
 class OrderedInputChoice(OrderedMutable, mutables.InputChoice):
-    def __init__(self, order, *args, **kwargs):
+    def __init__(self, order, n_candidates=None, choose_from=None, n_chosen=None,
+                 reduction="sum", return_mask=False, key=None):
         OrderedMutable.__init__(self, order)
-        mutables.InputChoice.__init__(self, *args, **kwargs)
+        mutables.InputChoice.__init__(self, n_candidates, choose_from, n_chosen,
+                 reduction, return_mask, key)
 
 class BaseSpace(nn.Module):
     """
@@ -73,24 +75,26 @@ class BaseSpace(nn.Module):
         if not self._initialized:
             self._initialized = True
 
-    def setLayerChoice(self, *args, **kwargs):
+    def setLayerChoice(self, order, op_candidates, reduction="sum", return_mask=False, orikey=None):
         """
         Give a unique key if not given
         """
-        if len(args) < 5 and not "key" in kwargs:
+        if orikey == None:
             key = f"default_key_{self._default_key}"
             self._default_key += 1
-            kwargs["key"] = key
-        layer = OrderedLayerChoice(*args, **kwargs)
+            orikry = key
+        layer = OrderedLayerChoice(order, op_candidates, reduction, return_mask, orikey)
         return layer
 
-    def setInputChoice(self, *args, **kwargs):
+    def setInputChoice(self, order, n_candidates=None, choose_from=None, n_chosen=None,
+                 reduction="sum", return_mask=False, orikey=None):
         """
         Give a unique key if not given
         """
-        if len(args) < 7 and not "key" in kwargs:
+        if orikey == None:
             key = f"default_key_{self._default_key}"
             self._default_key += 1
-            kwargs["key"] = key
-        layer = OrderedInputChoice(*args, **kwargs)
+            orikey = key
+        layer = OrderedInputChoice(order, n_candidates, choose_from, n_chosen,
+                 reduction, return_mask, orikey)
         return layer

From c15dcb73932b83cc9ddfb9289bba745baf8a7e85 Mon Sep 17 00:00:00 2001
From: generall <general502570@outlook.com>
Date: Mon, 21 Jun 2021 16:41:01 +0800
Subject: [PATCH 092/144] add fixed input choice

---
 autogl/module/nas/estimator/train_scratch.py |   2 +
 autogl/module/nas/space/base.py              | 103 ++++++++++++++++++-
 autogl/module/nas/space/single_path.py       |   2 +-
 3 files changed, 104 insertions(+), 3 deletions(-)

diff --git a/autogl/module/nas/estimator/train_scratch.py b/autogl/module/nas/estimator/train_scratch.py
index 437af5f..82b6d09 100644
--- a/autogl/module/nas/estimator/train_scratch.py
+++ b/autogl/module/nas/estimator/train_scratch.py
@@ -3,12 +3,14 @@ import torch.nn.functional as F
 
 from ..space import BaseSpace
 from .base import BaseEstimator
+from .one_shot import OneShotEstimator
 import torch
 
 from autogl.module.train import NodeClassificationFullTrainer
 class TrainEstimator(BaseEstimator):
     def __init__(self):
         self.estimator=OneShotEstimator()
+
     def infer(self,model: BaseSpace, dataset, mask="train"):
         # self.trainer.model=model
         # self.trainer.device=model.device
diff --git a/autogl/module/nas/space/base.py b/autogl/module/nas/space/base.py
index 7ae693a..eaa595a 100644
--- a/autogl/module/nas/space/base.py
+++ b/autogl/module/nas/space/base.py
@@ -2,6 +2,8 @@ from abc import abstractmethod
 from autogl.module.model import BaseModel
 import torch.nn as nn
 from nni.nas.pytorch import mutables
+from nni.nas.pytorch.fixed import FixedArchitecture
+import json
 
 class OrderedMutable():
     def __init__(self, order):
@@ -75,10 +77,11 @@ class BaseSpace(nn.Module):
         if not self._initialized:
             self._initialized = True
 
-    def setLayerChoice(self, order, op_candidates, reduction="sum", return_mask=False, orikey=None):
+    def setLayerChoice(self, order, op_candidates, reduction="sum", return_mask=False, key=None):
         """
         Give a unique key if not given
         """
+        orikey = key
         if orikey == None:
             key = f"default_key_{self._default_key}"
             self._default_key += 1
@@ -87,10 +90,11 @@ class BaseSpace(nn.Module):
         return layer
 
     def setInputChoice(self, order, n_candidates=None, choose_from=None, n_chosen=None,
-                 reduction="sum", return_mask=False, orikey=None):
+                 reduction="sum", return_mask=False, key=None):
         """
         Give a unique key if not given
         """
+        orikey = key
         if orikey == None:
             key = f"default_key_{self._default_key}"
             self._default_key += 1
@@ -98,3 +102,98 @@ class BaseSpace(nn.Module):
         layer = OrderedInputChoice(order, n_candidates, choose_from, n_chosen,
                  reduction, return_mask, orikey)
         return layer
+
+class FixedInputChoice(nn.Module):
+    def __init__(self, mask):
+        self.mask_len = len(mask)
+        for i in range(self.mask_len):
+            if mask[i]:
+                self.selected = i
+                break
+        super().__init__()
+
+    def forward(self, optional_inputs):
+        if len(optional_inputs) == self.mask_len:
+            return optional_inputs[self.selected]
+
+class CleanFixedArchitecture(FixedArchitecture):
+    """
+    Fixed architecture mutator that always selects a certain graph, allowing deepcopy
+
+    Parameters
+    ----------
+    model : nn.Module
+        A mutable network.
+    fixed_arc : dict
+        Preloaded architecture object.
+    strict : bool
+        Force everything that appears in ``fixed_arc`` to be used at least once.
+    verbose : bool
+        Print log messages if set to True
+    """
+
+    def __init__(self, model, fixed_arc, strict=True, verbose=True):
+        super().__init__(model, fixed_arc, strict, verbose)
+
+    def replace_all_choice(self, module=None, prefix=""):
+        """
+        Replace all choices with selected candidates. It's done with best effort.
+        In case of weighted choices or multiple choices. if some of the choices on weighted with zero, delete them.
+        If single choice, replace the module with a normal module.
+
+        Parameters
+        ----------
+        module : nn.Module
+            Module to be processed.
+        prefix : str
+            Module name under global namespace.
+        """
+        if module is None:
+            module = self.model
+        for name, mutable in module.named_children():
+            global_name = (prefix + "." if prefix else "") + name
+            if isinstance(mutable, OrderedLayerChoice):
+                chosen = self._fixed_arc[mutable.key]
+                if sum(chosen) == 1 and max(chosen) == 1 and not mutable.return_mask:
+                    # sum is one, max is one, there has to be an only one
+                    # this is compatible with both integer arrays, boolean arrays and float arrays
+                    setattr(module, name, mutable[chosen.index(1)])
+                else:
+                    # remove unused parameters
+                    for ch, n in zip(chosen, mutable.names):
+                        if ch == 0 and not isinstance(ch, float):
+                            setattr(mutable, n, None)
+            elif isinstance(mutable, OrderedInputChoice):
+                chosen = self._fixed_arc[mutable.key]
+                setattr(module, name, FixedInputChoice(chosen))
+            else:
+                self.replace_all_choice(mutable, global_name)
+
+def apply_fixed_architecture(model, fixed_arc, verbose=True):
+    """
+    Load architecture from `fixed_arc` and apply to model.
+
+    Parameters
+    ----------
+    model : torch.nn.Module
+        Model with mutables.
+    fixed_arc : str or dict
+        Path to the JSON that stores the architecture, or dict that stores the exported architecture.
+    verbose : bool
+        Print log messages if set to True
+
+    Returns
+    -------
+    FixedArchitecture
+        Mutator that is responsible for fixes the graph.
+    """
+
+    if isinstance(fixed_arc, str):
+        with open(fixed_arc) as f:
+            fixed_arc = json.load(f)
+    architecture = CleanFixedArchitecture(model, fixed_arc, verbose)
+    architecture.reset()
+
+    # for the convenience of parameters counting
+    architecture.replace_all_choice()
+    return architecture
diff --git a/autogl/module/nas/space/single_path.py b/autogl/module/nas/space/single_path.py
index fcbffa2..8db9fb7 100644
--- a/autogl/module/nas/space/single_path.py
+++ b/autogl/module/nas/space/single_path.py
@@ -4,7 +4,7 @@ import torch
 
 import torch.nn.functional as F
 from nni.nas.pytorch import mutables
-from nni.nas.pytorch.fixed import apply_fixed_architecture
+from .base import apply_fixed_architecture
 from .base import BaseSpace
 from ...model import BaseModel
 from ....utils import get_logger

From 4085f308da30dc29c40a8043ca3304b7720a164f Mon Sep 17 00:00:00 2001
From: null <null>
Date: Mon, 21 Jun 2021 16:26:00 +0800
Subject: [PATCH 093/144] Totally re-implement GraphSAINT

---
 autogl/datasets/__init__.py                   |   1 -
 autogl/datasets/matlab_matrix.py              |  14 +-
 autogl/datasets/pyg.py                        |  13 +
 autogl/module/model/__init__.py               |   1 +
 autogl/module/model/graph_saint.py            | 344 ++++++++++++++
 .../node_classification_sampled_trainer.py    | 436 +++++++++++-------
 .../sampling/sampler/graphsaint_sampler.py    | 139 ++----
 7 files changed, 651 insertions(+), 297 deletions(-)
 create mode 100644 autogl/module/model/graph_saint.py

diff --git a/autogl/datasets/__init__.py b/autogl/datasets/__init__.py
index c1fccae..6b6919a 100644
--- a/autogl/datasets/__init__.py
+++ b/autogl/datasets/__init__.py
@@ -92,7 +92,6 @@ from .han_data import HANDataset, ACM_HANDataset, DBLP_HANDataset, IMDB_HANDatas
 from .matlab_matrix import (
     MatlabMatrix,
     BlogcatalogDataset,
-    FlickrDataset,
     WikipediaDataset,
     PPIDataset,
 )
diff --git a/autogl/datasets/matlab_matrix.py b/autogl/datasets/matlab_matrix.py
index 50ba412..a6ba4f1 100644
--- a/autogl/datasets/matlab_matrix.py
+++ b/autogl/datasets/matlab_matrix.py
@@ -69,13 +69,13 @@ class BlogcatalogDataset(MatlabMatrix):
         super(BlogcatalogDataset, self).__init__(path, filename, url)
 
 
-@register_dataset("flickr")
-class FlickrDataset(MatlabMatrix):
-    def __init__(self, path):
-        dataset, filename = "flickr", "flickr"
-        url = "http://leitang.net/code/social-dimension/data/"
-        # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
-        super(FlickrDataset, self).__init__(path, filename, url)
+# @register_dataset("flickr")
+# class FlickrDataset(MatlabMatrix):
+#     def __init__(self, path):
+#         dataset, filename = "flickr", "flickr"
+#         url = "http://leitang.net/code/social-dimension/data/"
+#         # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
+#         super(FlickrDataset, self).__init__(path, filename, url)
 
 
 @register_dataset("wikipedia")
diff --git a/autogl/datasets/pyg.py b/autogl/datasets/pyg.py
index d98b927..22eaf3f 100644
--- a/autogl/datasets/pyg.py
+++ b/autogl/datasets/pyg.py
@@ -10,6 +10,7 @@ from torch_geometric.datasets import (
     QM9,
     Amazon,
     Coauthor,
+    Flickr
 )
 from torch_geometric.utils import remove_self_loops
 from . import register_dataset
@@ -127,6 +128,18 @@ class RedditDataset(Reddit):
         return super(RedditDataset, self).get(idx)
 
 
+@register_dataset("flickr")
+class FlickrDataset(Flickr):
+    def __init__(self, path):
+        Flickr(path)
+        super(FlickrDataset, self).__init__(path)
+
+    def get(self, idx):
+        if hasattr(self, "__data_list__"):
+            delattr(self, "__data_list__")
+        return super(FlickrDataset, self).get(idx)
+
+
 @register_dataset("mutag")
 class MUTAGDataset(TUDataset):
     def __init__(self, path):
diff --git a/autogl/module/model/__init__.py b/autogl/module/model/__init__.py
index ef2a92d..d559120 100644
--- a/autogl/module/model/__init__.py
+++ b/autogl/module/model/__init__.py
@@ -2,6 +2,7 @@ from ._model_registry import MODEL_DICT, ModelUniversalRegistry, register_model
 from .base import BaseModel
 from .topkpool import AutoTopkpool
 from .graph_sage import AutoSAGE
+from .graph_saint import GraphSAINTAggregationModel
 from .gcn import AutoGCN
 from .gat import AutoGAT
 from .gin import AutoGIN
diff --git a/autogl/module/model/graph_saint.py b/autogl/module/model/graph_saint.py
new file mode 100644
index 0000000..566e171
--- /dev/null
+++ b/autogl/module/model/graph_saint.py
@@ -0,0 +1,344 @@
+import typing as _typing
+import torch.nn.functional
+from torch_geometric.nn.conv import MessagePassing
+from torch_sparse import SparseTensor, matmul
+
+from . import register_model
+from .base import ClassificationModel, SequentialGraphNeuralNetwork
+
+
+class _GraphSAINTAggregationLayers:
+    class MultiOrderAggregationLayer(torch.nn.Module):
+        class Order0Aggregator(torch.nn.Module):
+            def __init__(
+                    self, input_dimension: int, output_dimension: int, bias: bool = True,
+                    activation: _typing.Optional[str] = "ReLU", batch_norm: bool = True
+            ):
+                super().__init__()
+                if not type(input_dimension) == type(output_dimension) == int:
+                    raise TypeError
+                if not (input_dimension > 0 and output_dimension > 0):
+                    raise ValueError
+                if not type(bias) == bool:
+                    raise TypeError
+                self.__linear_transform = torch.nn.Linear(input_dimension, output_dimension, bias)
+                self.__linear_transform.reset_parameters()
+                if type(activation) == str:
+                    if activation.lower() == "ReLU".lower():
+                        self.__activation = torch.nn.functional.relu
+                    elif activation.lower() == "elu":
+                        self.__activation = torch.nn.functional.elu
+                    elif (
+                            hasattr(torch.nn.functional, activation) and
+                            callable(getattr(torch.nn.functional, activation))
+                    ):
+                        self.__activation = getattr(torch.nn.functional, activation)
+                    else:
+                        self.__activation = lambda x: x
+                else:
+                    self.__activation = lambda x: x
+                if type(batch_norm) != bool:
+                    raise TypeError
+                else:
+                    self.__optional_batch_normalization: _typing.Optional[torch.nn.BatchNorm1d] = (
+                        torch.nn.BatchNorm1d(output_dimension, 1e-8)
+                        if batch_norm else None
+                    )
+
+            def forward(
+                    self, x: _typing.Union[torch.Tensor, _typing.Tuple[torch.Tensor, torch.Tensor]],
+                    _edge_index: torch.Tensor, _edge_weight: _typing.Optional[torch.Tensor] = None,
+                    _size: _typing.Optional[_typing.Tuple[int, int]] = None
+            ) -> torch.Tensor:
+                __output: torch.Tensor = self.__linear_transform(x)
+                if self.__activation is not None and callable(self.__activation):
+                    __output: torch.Tensor = self.__activation(__output)
+                if (
+                        self.__optional_batch_normalization is not None and
+                        isinstance(self.__optional_batch_normalization, torch.nn.BatchNorm1d)
+                ):
+                    __output: torch.Tensor = self.__optional_batch_normalization(__output)
+                return __output
+
+        class Order1Aggregator(MessagePassing):
+            def __init__(
+                    self, input_dimension: int, output_dimension: int, bias: bool = True,
+                    activation: _typing.Optional[str] = "ReLU", batch_norm: bool = True
+            ):
+                super().__init__(aggr="add")
+                if not type(input_dimension) == type(output_dimension) == int:
+                    raise TypeError
+                if not (input_dimension > 0 and output_dimension > 0):
+                    raise ValueError
+                if not type(bias) == bool:
+                    raise TypeError
+                self.__linear_transform = torch.nn.Linear(input_dimension, output_dimension, bias)
+                self.__linear_transform.reset_parameters()
+                if type(activation) == str:
+                    if activation.lower() == "ReLU".lower():
+                        self.__activation = torch.nn.functional.relu
+                    elif activation.lower() == "elu":
+                        self.__activation = torch.nn.functional.elu
+                    elif (
+                            hasattr(torch.nn.functional, activation) and
+                            callable(getattr(torch.nn.functional, activation))
+                    ):
+                        self.__activation = getattr(torch.nn.functional, activation)
+                    else:
+                        self.__activation = lambda x: x
+                else:
+                    self.__activation = lambda x: x
+                if type(batch_norm) != bool:
+                    raise TypeError
+                else:
+                    self.__optional_batch_normalization: _typing.Optional[torch.nn.BatchNorm1d] = (
+                        torch.nn.BatchNorm1d(output_dimension, 1e-8)
+                        if batch_norm else None
+                    )
+
+            def forward(
+                    self, x: _typing.Union[torch.Tensor, _typing.Tuple[torch.Tensor, torch.Tensor]],
+                    _edge_index: torch.Tensor, _edge_weight: _typing.Optional[torch.Tensor] = None,
+                    _size: _typing.Optional[_typing.Tuple[int, int]] = None
+            ) -> torch.Tensor:
+
+                if type(x) == torch.Tensor:
+                    x: _typing.Tuple[torch.Tensor, torch.Tensor] = (x, x)
+
+                __output = self.propagate(
+                    _edge_index, x=x, edge_weight=_edge_weight, size=_size
+                )
+                __output: torch.Tensor = self.__linear_transform(__output)
+                if self.__activation is not None and callable(self.__activation):
+                    __output: torch.Tensor = self.__activation(__output)
+                if (
+                        self.__optional_batch_normalization is not None and
+                        isinstance(self.__optional_batch_normalization, torch.nn.BatchNorm1d)
+                ):
+                    __output: torch.Tensor = self.__optional_batch_normalization(__output)
+                return __output
+
+            def message(self, x_j: torch.Tensor, edge_weight: _typing.Optional[torch.Tensor]) -> torch.Tensor:
+                return x_j if edge_weight is None else edge_weight.view(-1, 1) * x_j
+
+            def message_and_aggregate(
+                    self, adj_t: SparseTensor,
+                    x: _typing.Union[torch.Tensor, _typing.Tuple[torch.Tensor, torch.Tensor]]
+            ) -> torch.Tensor:
+                return matmul(adj_t, x[0], reduce=self.aggr)
+
+        @property
+        def integral_output_dimension(self) -> int:
+            return (self._order + 1) * self._each_order_output_dimension
+
+        def __init__(
+                self, _input_dimension: int, _each_order_output_dimension: int, _order: int,
+                bias: bool = True, activation: _typing.Optional[str] = "ReLU",
+                batch_norm: bool = True, _dropout: _typing.Optional[float] = ...
+        ):
+            super().__init__()
+            if not (
+                    type(_input_dimension) == type(_order) == int and
+                    type(_each_order_output_dimension) == int
+            ):
+                raise TypeError
+            if _input_dimension <= 0 or _each_order_output_dimension <= 0:
+                raise ValueError
+            if _order not in (0, 1):
+                raise ValueError("Unsupported order number")
+            self._input_dimension: int = _input_dimension
+            self._each_order_output_dimension: int = _each_order_output_dimension
+            self._order: int = _order
+            if type(bias) != bool:
+                raise TypeError
+            self.__order0_transform = self.Order0Aggregator(
+                self._input_dimension, self._each_order_output_dimension, bias,
+                activation, batch_norm
+            )
+            if _order == 1:
+                self.__order1_transform = self.Order1Aggregator(
+                    self._input_dimension, self._each_order_output_dimension, bias,
+                    activation, batch_norm
+                )
+            else:
+                self.__order1_transform = None
+            if _dropout is not None and type(_dropout) == float:
+                if _dropout < 0:
+                    _dropout = 0
+                if _dropout > 1:
+                    _dropout = 1
+                self.__optional_dropout: _typing.Optional[torch.nn.Dropout] = (
+                    torch.nn.Dropout(_dropout)
+                )
+            else:
+                self.__optional_dropout: _typing.Optional[torch.nn.Dropout] = None
+
+        def _forward(
+                self, x: _typing.Union[torch.Tensor, _typing.Tuple[torch.Tensor, torch.Tensor]],
+                edge_index: torch.Tensor, edge_weight: _typing.Optional[torch.Tensor] = None,
+                size: _typing.Optional[_typing.Tuple[int, int]] = None
+        ) -> torch.Tensor:
+            if (
+                    self.__order1_transform is not None and
+                    isinstance(self.__order1_transform, self.Order1Aggregator)
+            ):
+                __output: torch.Tensor = torch.cat(
+                    [
+                        self.__order0_transform(x, edge_index, edge_weight, size),
+                        self.__order1_transform(x, edge_index, edge_weight, size)
+                    ],
+                    dim=1
+                )
+            else:
+                __output: torch.Tensor = self.__order0_transform(x, edge_index, edge_weight, size)
+            if (
+                    self.__optional_dropout is not None and
+                    isinstance(self.__optional_dropout, torch.nn.Dropout)
+            ):
+                __output: torch.Tensor = self.__optional_dropout(__output)
+            return __output
+
+        def forward(self, data) -> torch.Tensor:
+            x: torch.Tensor = getattr(data, "x")
+            if type(x) != torch.Tensor:
+                raise TypeError
+            edge_index: torch.LongTensor = getattr(data, "edge_index")
+            if type(edge_index) != torch.Tensor:
+                raise TypeError
+            edge_weight: _typing.Optional[torch.Tensor] = getattr(data, "edge_weight", None)
+            if edge_weight is not None and type(edge_weight) != torch.Tensor:
+                raise TypeError
+            return self._forward(x, edge_index, edge_weight)
+
+    class WrappedDropout(torch.nn.Module):
+        def __init__(self, dropout_module: torch.nn.Dropout):
+            super().__init__()
+            self.__dropout_module: torch.nn.Dropout = dropout_module
+
+        def forward(self, tenser_or_data) -> torch.Tensor:
+            if type(tenser_or_data) == torch.Tensor:
+                return self.__dropout_module(tenser_or_data)
+            elif (
+                    hasattr(tenser_or_data, "x") and
+                    type(getattr(tenser_or_data, "x")) == torch.Tensor
+            ):
+                return self.__dropout_module(getattr(tenser_or_data, "x"))
+            else:
+                raise TypeError
+
+
+class GraphSAINTMultiOrderAggregationModel(SequentialGraphNeuralNetwork):
+    def __init__(
+            self, num_features: int, num_classes: int,
+            _output_dimension_for_each_order: int,
+            _layers_order_list: _typing.Sequence[int],
+            _pre_dropout: float,
+            _layers_dropout: _typing.Union[float, _typing.Sequence[float]],
+            activation: _typing.Optional[str] = "ReLU",
+            bias: bool = True, batch_norm: bool = True,
+            normalize: bool = True
+    ):
+        super(GraphSAINTMultiOrderAggregationModel, self).__init__()
+        if type(_output_dimension_for_each_order) != int:
+            raise TypeError
+        if not _output_dimension_for_each_order > 0:
+            raise ValueError
+        self._layers_order_list: _typing.Sequence[int] = _layers_order_list
+
+        if isinstance(_layers_dropout, _typing.Sequence):
+            if len(_layers_dropout) != len(_layers_order_list):
+                raise ValueError
+            else:
+                self._layers_dropout: _typing.Sequence[float] = _layers_dropout
+        elif type(_layers_dropout) == float:
+            if _layers_dropout < 0:
+                _layers_dropout = 0
+            if _layers_dropout > 1:
+                _layers_dropout = 1
+            self._layers_dropout: _typing.Sequence[float] = [
+                _layers_dropout for _ in _layers_order_list
+            ]
+        else:
+            raise TypeError
+        if type(_pre_dropout) != float:
+            raise TypeError
+        else:
+            if _pre_dropout < 0:
+                _pre_dropout = 0
+            if _pre_dropout > 1:
+                _pre_dropout = 1
+        self.__sequential_encoding_layers: torch.nn.ModuleList = torch.nn.ModuleList(
+            (
+                _GraphSAINTAggregationLayers.WrappedDropout(torch.nn.Dropout(_pre_dropout)),
+                _GraphSAINTAggregationLayers.MultiOrderAggregationLayer(
+                    num_features, _output_dimension_for_each_order, _layers_order_list[0], bias,
+                    activation, batch_norm, _layers_dropout[0]
+                ),
+            )
+        )
+        for _layer_index in range(1, len(_layers_order_list)):
+            self.__sequential_encoding_layers.append(
+                _GraphSAINTAggregationLayers.MultiOrderAggregationLayer(
+                    self.__sequential_encoding_layers[-1].integral_output_dimension,
+                    _output_dimension_for_each_order, _layers_order_list[_layer_index], bias,
+                    activation, batch_norm, _layers_dropout[_layer_index]
+
+                )
+            )
+        self.__apply_normalize: bool = normalize
+        self.__linear_transform: torch.nn.Linear = torch.nn.Linear(
+            self.__sequential_encoding_layers[-1].integral_output_dimension, num_classes, bias
+        )
+
+    def decode(self, x: torch.Tensor) -> torch.Tensor:
+        if self.__apply_normalize:
+            x: torch.Tensor = torch.nn.functional.normalize(x, p=2, dim=1)
+        return torch.nn.functional.log_softmax(self.__linear_transform(x), dim=1)
+
+    def encode(self, data) -> torch.Tensor:
+        if type(getattr(data, "x")) != torch.Tensor:
+            raise TypeError
+        if type(getattr(data, "edge_index")) != torch.Tensor:
+            raise TypeError
+        if (
+                getattr(data, "edge_weight", None) is not None and
+                type(getattr(data, "edge_weight")) != torch.Tensor
+        ):
+            raise TypeError
+        for encoding_layer in self.__sequential_encoding_layers:
+            setattr(data, "x", encoding_layer(data))
+        return getattr(data, "x")
+
+    @property
+    def encoder_sequential_modules(self) -> torch.nn.ModuleList:
+        return self.__sequential_encoding_layers
+
+
+@register_model("GraphSAINTAggregationModel")
+class GraphSAINTAggregationModel(ClassificationModel):
+    def __init__(
+            self,
+            num_features: int = ...,
+            num_classes: int = ...,
+            device: _typing.Union[str, torch.device] = ...,
+            init: bool = False,
+            **kwargs
+    ):
+        super(GraphSAINTAggregationModel, self).__init__(
+            num_features, num_classes, device=device, init=init, **kwargs
+        )
+        # todo: Initialize with default hyper parameter space and hyper parameter
+
+    def _initialize(self):
+        """ Initialize model """
+        self.model = GraphSAINTMultiOrderAggregationModel(
+            self.num_features, self.num_classes,
+            self.hyper_parameter.get("output_dimension_for_each_order"),
+            self.hyper_parameter.get("layers_order_list"),
+            self.hyper_parameter.get("pre_dropout"),
+            self.hyper_parameter.get("layers_dropout"),
+            self.hyper_parameter.get("activation", "ReLU"),
+            bool(self.hyper_parameter.get("bias", True)),
+            bool(self.hyper_parameter.get("batch_norm", True)),
+            bool(self.hyper_parameter.get("normalize", True))
+        ).to(self.device)
diff --git a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
index 1e611c7..9c0fa7c 100644
--- a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
+++ b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
@@ -21,27 +21,65 @@ from ...model.base import SequentialGraphNeuralNetwork
 LOGGER: logging.Logger = logging.getLogger("Node classification sampling trainer")
 
 
+class _DeterministicNeighborSamplerStore:
+    def __init__(self):
+        self.__neighbor_sampler_mapping: _typing.List[
+            _typing.Tuple[torch.LongTensor, NeighborSampler]
+        ] = []
+
+    @classmethod
+    def __is_target_node_indexes_equal(cls, a: torch.LongTensor, b: torch.LongTensor) -> bool:
+        if not a.dtype == b.dtype == torch.int64:
+            return False
+        if a.size() != b.size():
+            return False
+        return torch.where(a != b)[0].size(0) == 0
+
+    def __setitem__(self, target_nodes: torch.Tensor, neighbor_sampler: NeighborSampler):
+        target_nodes: _typing.Any = target_nodes.cpu()
+        if type(target_nodes) != torch.Tensor or target_nodes.dtype != torch.int64:
+            raise TypeError
+        if type(neighbor_sampler) != NeighborSampler:
+            raise TypeError
+        for i in range(len(self.__neighbor_sampler_mapping)):
+            if self.__is_target_node_indexes_equal(
+                    target_nodes, self.__neighbor_sampler_mapping[i][0]
+            ):
+                self.__neighbor_sampler_mapping[i] = (target_nodes, neighbor_sampler)
+                return
+        self.__neighbor_sampler_mapping.append((target_nodes, neighbor_sampler))
+
+    def __getitem__(self, target_nodes: torch.Tensor) -> _typing.Optional[NeighborSampler]:
+        target_nodes: _typing.Any = target_nodes.cpu()
+        if type(target_nodes) != torch.Tensor or target_nodes.dtype != torch.int64:
+            raise TypeError
+        for __current_target_nodes, __neighbor_sampler in self.__neighbor_sampler_mapping:
+            if self.__is_target_node_indexes_equal(target_nodes, __current_target_nodes):
+                return __neighbor_sampler
+        return None
+
+
 @register_trainer("NodeClassificationGraphSAINTTrainer")
 class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
     def __init__(
-        self,
-        model: _typing.Union[BaseModel],
-        num_features: int,
-        num_classes: int,
-        optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None] = ...,
-        lr: float = 1e-4,
-        max_epoch: int = 100,
-        early_stopping_round: int = 100,
-        weight_decay: float = 1e-4,
-        device: _typing.Optional[torch.device] = None,
-        init: bool = True,
-        feval: _typing.Union[
-            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
-        ] = (Logloss,),
-        loss: str = "nll_loss",
-        lr_scheduler_type: _typing.Optional[str] = None,
-        **kwargs,
-    ) -> None:
+            self,
+            model: _typing.Union[BaseModel, str],
+            num_features: int,
+            num_classes: int,
+            optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None] = ...,
+            lr: float = 1e-4,
+            max_epoch: int = 100,
+            early_stopping_round: int = 100,
+            weight_decay: float = 1e-4,
+            device: _typing.Optional[torch.device] = None,
+            init: bool = True,
+            feval: _typing.Union[
+                _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+            ] = (MicroF1,),
+            loss: str = "nll_loss",
+            lr_scheduler_type: _typing.Optional[str] = None,
+            **kwargs,
+    ):
         if isinstance(optimizer, type) and issubclass(optimizer, torch.optim.Optimizer):
             self._optimizer_class: _typing.Type[torch.optim.Optimizer] = optimizer
         elif type(optimizer) == str:
@@ -69,38 +107,76 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         self._lr_scheduler_type: _typing.Optional[str] = lr_scheduler_type
         self._max_epoch: int = max_epoch if max_epoch > 0 else 1e2
         self._weight_decay: float = weight_decay if weight_decay > 0 else 1e-4
-        early_stopping_round: int = (
-            early_stopping_round if early_stopping_round > 0 else 1e2
-        )
         self._early_stopping = EarlyStopping(
-            patience=early_stopping_round, verbose=False
+            patience=early_stopping_round if early_stopping_round > 0 else 1e2,
+            verbose=False
         )
-
-        # Assign an empty initial hyper parameter space
-        self._hyper_parameter_space: _typing.Sequence[
-            _typing.Dict[str, _typing.Any]
-        ] = []
+        """ Assign an empty initial hyper parameter space """
+        self._hyper_parameter_space: _typing.Sequence[_typing.Dict[str, _typing.Any]] = []
 
         self._valid_result: torch.Tensor = torch.zeros(0)
         self._valid_result_prob: torch.Tensor = torch.zeros(0)
         self._valid_score: _typing.Sequence[float] = ()
 
+        """ Set GraphSAINT hyper-parameters """
+        " Set sampler_type "
+        sampler_type: str = kwargs.get("sampler_type", "edge")
+        if type(sampler_type) != str:
+            raise TypeError
+        else:
+            sampler_type: str = sampler_type.strip().lower()
+        if sampler_type not in ("node", "edge", "rw"):
+            sampler_type: str = "edge"  # default to edge sampler
+        self.__sampler_type: str = sampler_type
+
+        " Set num_graphs_per_epoch "
+        num_graphs_per_epoch: int = kwargs.get("num_graphs_per_epoch", 50)
+        if type(num_graphs_per_epoch) != int:
+            raise TypeError
+        elif not num_graphs_per_epoch > 0:
+            num_graphs_per_epoch = 50
+        self.__num_graphs_per_epoch: int = num_graphs_per_epoch
+
+        " Set sampled_budget "
+        sampled_budget: int = kwargs.get("sampled_budget")
+        # todo: This is a version caused by current unreasonable initialization process
+        # todo: Refactor the framework for trainer to fix in future version
+        # if type(sampled_budget) != int:
+        #     raise TypeError
+        # if not sampled_budget > 0:
+        #     raise ValueError
+        self.__sampled_budget: int = sampled_budget
+
+        " Set walk_length "
+        walk_length: int = kwargs.get("walk_length", 2)
+        if type(walk_length) != int:
+            raise TypeError
+        if not walk_length > 0:
+            raise ValueError
+        self.__walk_length: int = walk_length
+
+        " Set sample_coverage_factor "
+        sample_coverage_factor: int = kwargs.get("sample_coverage_factor", 50)
+        if type(sample_coverage_factor) != int:
+            raise TypeError
+        elif not sample_coverage_factor > 0:
+            sample_coverage_factor = 50
+        self.__sample_coverage_factor: int = sample_coverage_factor
+
+        """ Set num_workers """
+
+        def _cpu_count() -> int:
+            __cpu_count: _typing.Optional[int] = os.cpu_count()
+            return __cpu_count if __cpu_count else 0
+
+        self.__training_sampler_num_workers: int = kwargs.get(
+            "training_sampler_num_workers", _cpu_count()
+        )
+        if not 0 <= self.__training_sampler_num_workers <= _cpu_count():
+            self.__training_sampler_num_workers: int = _cpu_count()
         super(NodeClassificationGraphSAINTTrainer, self).__init__(
             model, num_features, num_classes, device, init, feval, loss
         )
-
-        """ Set hyper parameters """
-        self.__num_subgraphs: int = kwargs.get("num_subgraphs")
-        self.__sampling_budget: int = kwargs.get("sampling_budget")
-        if (
-                kwargs.get("sampling_method") is not None
-                and type(kwargs.get("sampling_method")) == str
-                and kwargs.get("sampling_method") in ("node", "edge")
-        ):
-            self.__sampling_method_identifier: str = kwargs.get("sampling_method")
-        else:
-            self.__sampling_method_identifier: str = "node"
-
         self.__is_initialized: bool = False
         if init:
             self.initialize()
@@ -120,17 +196,16 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
     def get_model(self):
         return self.model
 
-    def __train_only(self, data):
+    def __train_only(self, integral_data):
         """
         The function of training on the given dataset and mask.
-        :param data: data of a specific graph
-        :return: self
+        :param integral_data: data of a specific graph
+        :return: None
         """
-        data = data.to(self.device)
         optimizer: torch.optim.Optimizer = self._optimizer_class(
             self.model.model.parameters(),
             lr=self._learning_rate,
-            weight_decay=self._weight_decay,
+            weight_decay=self._weight_decay
         )
         if type(self._lr_scheduler_type) == str:
             if self._lr_scheduler_type.lower() == "step" + "lr":
@@ -160,65 +235,76 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
                 torch.optim.lr_scheduler.LambdaLR(optimizer, lambda _: 1.0)
             )
 
-        if self.__sampling_method_identifier.lower() == "edge":
-            sub_graph_sampler = GraphSAINTRandomEdgeSampler(
-                self.__sampling_budget, self.__num_subgraphs
+        setattr(
+            integral_data, "edge_weight",
+            self.__compute_normalized_edge_weight(getattr(integral_data, "edge_index"))
+        )
+        " Generate Sampler "
+        if self.__sampler_type.lower() == "edge":
+            _sampler: torch_geometric.data.GraphSAINTEdgeSampler = (
+                GraphSAINTSamplerFactory.create_edge_sampler(
+                    integral_data, self.__num_graphs_per_epoch, self.__sampled_budget,
+                    self.__sample_coverage_factor,
+                    num_workers=self.__training_sampler_num_workers
+                )
+            )
+        elif self.__sampler_type.lower() == "node":
+            _sampler: torch_geometric.data.GraphSAINTNodeSampler = (
+                GraphSAINTSamplerFactory.create_node_sampler(
+                    integral_data, self.__num_graphs_per_epoch, self.__sampled_budget,
+                    self.__sample_coverage_factor,
+                    num_workers=self.__training_sampler_num_workers
+                )
+            )
+        elif self.__sampler_type.lower() == "rw":
+            _sampler: torch_geometric.data.GraphSAINTRandomWalkSampler = (
+                GraphSAINTSamplerFactory.create_random_walk_sampler(
+                    integral_data, self.__num_graphs_per_epoch,
+                    self.__sampled_budget, self.__walk_length,
+                    self.__sample_coverage_factor,
+                    num_workers=self.__training_sampler_num_workers
+                )
             )
         else:
-            sub_graph_sampler = GraphSAINTRandomNodeSampler(
-                self.__sampling_budget, self.__num_subgraphs
+            _sampler: torch_geometric.data.GraphSAINTEdgeSampler = (
+                GraphSAINTSamplerFactory.create_edge_sampler(
+                    integral_data, self.__num_graphs_per_epoch, self.__sampled_budget,
+                    num_workers=self.__training_sampler_num_workers
+                )
             )
 
         for current_epoch in range(self._max_epoch):
             self.model.model.train()
+            optimizer.zero_grad()
             """ epoch start """
-            """ Sample sub-graphs """
-            sub_graph_set = sub_graph_sampler.sample(data)
-            sub_graphs_loader: torch.utils.data.DataLoader = (
-                torch.utils.data.DataLoader(sub_graph_set)
-            )
-            integral_alpha: torch.Tensor = getattr(sub_graph_set, "alpha")
-            integral_lambda: torch.Tensor = getattr(sub_graph_set, "lambda")
-            """ iterate sub-graphs """
-            for sub_graph_data in sub_graphs_loader:
+            for sampled_data in _sampler:
+                sampled_data = sampled_data.to(self.device)
+                setattr(
+                    sampled_data, "edge_weight",
+                    getattr(sampled_data, "edge_norm") * getattr(sampled_data, "edge_weight")
+                )
                 optimizer.zero_grad()
-                sampled_edge_indexes: torch.Tensor = sub_graph_data.sampled_edge_indexes
-                sampled_node_indexes: torch.Tensor = sub_graph_data.sampled_node_indexes
-                sampled_train_mask: torch.Tensor = sub_graph_data.train_mask
-
-                sampled_alpha = integral_alpha[sampled_edge_indexes]
-                sub_graph_data.edge_weight = 1 / sampled_alpha
-
-                prediction: torch.Tensor = self.model.model(sub_graph_data)
-
+                prediction: torch.Tensor = self.model.model(sampled_data)
                 if not hasattr(torch.nn.functional, self.loss):
-                    raise TypeError(f"PyTorch does not support loss type {self.loss}")
-                loss_func = getattr(torch.nn.functional, self.loss)
-                unreduced_loss: torch.Tensor = loss_func(
-                    prediction[sampled_train_mask],
-                    data.y[sampled_train_mask],
-                    reduction="none",
-                )
-
-                sampled_lambda: torch.Tensor = integral_lambda[sampled_node_indexes]
-                sampled_train_lambda: torch.Tensor = sampled_lambda[sampled_train_mask]
-                assert unreduced_loss.size() == sampled_train_lambda.size()
-                loss_weighted_sum: torch.Tensor = torch.sum(
-                    unreduced_loss / sampled_train_lambda
+                    raise TypeError(
+                        f"PyTorch does not support loss type {self.loss}"
+                    )
+                loss_function = getattr(torch.nn.functional, self.loss)
+                loss_value: torch.Tensor = loss_function(
+                    prediction, getattr(sampled_data, "y"), reduction='none'
                 )
-                loss_weighted_sum.backward()
+                loss_value = (loss_value * getattr(sampled_data, "node_norm"))[sampled_data.train_mask].sum()
+                loss_value.backward()
                 optimizer.step()
 
-            if lr_scheduler is not None:
-                lr_scheduler.step()
-
-            """ Validate performance """
+            lr_scheduler.step()
             if (
-                    hasattr(data, "val_mask")
-                    and type(getattr(data, "val_mask")) == torch.Tensor
+                    hasattr(integral_data, "val_mask") and
+                    getattr(integral_data, "val_mask") is not None and
+                    type(getattr(integral_data, "val_mask")) == torch.Tensor
             ):
                 validation_results: _typing.Sequence[float] = self.evaluate(
-                    (data,), "val", [self.feval[0]]
+                    (integral_data,), "val", [self.feval[0]]
                 )
                 if self.feval[0].is_higher_better():
                     validation_loss: float = -validation_results[0]
@@ -228,24 +314,40 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
                 if self._early_stopping.early_stop:
                     LOGGER.debug("Early stopping at %d", current_epoch)
                     break
-        if hasattr(data, "val_mask") and data.val_mask is not None:
+        if (
+                hasattr(integral_data, "val_mask") and
+                getattr(integral_data, "val_mask") is not None and
+                type(getattr(integral_data, "val_mask")) == torch.Tensor
+        ):
             self._early_stopping.load_checkpoint(self.model.model)
-        return self
 
-    def __predict_only(self, data):
+    def __predict_only(
+            self, integral_data,
+            mask_or_target_nodes_indexes: _typing.Union[
+                torch.BoolTensor, torch.LongTensor
+            ]
+    ) -> torch.Tensor:
         """
         The function of predicting on the given data.
-        :param data: data of a specific graph
+        :param integral_data: data of a specific graph
+        :param mask_or_target_nodes_indexes: ...
         :return: the result of prediction on the given dataset
         """
-        data = data.to(self.device)
+        import copy
+        integral_data = copy.copy(integral_data)
         self.model.model.eval()
+        setattr(
+            integral_data, "edge_weight",
+            self.__compute_normalized_edge_weight(getattr(integral_data, "edge_index"))
+        )
+        integral_data = integral_data.to(self.device)
         with torch.no_grad():
-            predicted_x: torch.Tensor = self.model.model(data)
-        return predicted_x
+            prediction = self.model.model(integral_data)
+        return prediction[mask_or_target_nodes_indexes]
 
     def predict_proba(
-            self, dataset, mask: _typing.Optional[str] = None, in_log_format=False
+            self, dataset, mask: _typing.Optional[str] = None,
+            in_log_format: bool = False
     ):
         """
         The function of predicting the probability on the given dataset.
@@ -254,19 +356,19 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         :param in_log_format:
         :return:
         """
-        data = dataset[0].to(self.device)
+        data = dataset[0].to(torch.device("cpu"))
         if mask is not None and type(mask) == str:
             if mask.lower() == "train":
-                _mask: torch.Tensor = data.train_mask
+                _mask: torch.BoolTensor = data.train_mask
             elif mask.lower() == "test":
-                _mask: torch.Tensor = data.test_mask
+                _mask: torch.BoolTensor = data.test_mask
             elif mask.lower() == "val":
-                _mask: torch.Tensor = data.val_mask
+                _mask: torch.BoolTensor = data.val_mask
             else:
-                _mask: torch.Tensor = data.test_mask
+                _mask: torch.BoolTensor = data.test_mask
         else:
-            _mask: torch.Tensor = data.test_mask
-        result = self.__predict_only(data)[_mask]
+            _mask: torch.BoolTensor = data.test_mask
+        result = self.__predict_only(data, _mask)
         return result if in_log_format else torch.exp(result)
 
     def predict(self, dataset, mask: _typing.Optional[str] = None) -> torch.Tensor:
@@ -300,18 +402,39 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         prediction_probability: torch.Tensor = self.predict_proba(dataset, mask)
         y_ground_truth: torch.Tensor = data.y[_mask]
 
-        eval_results = []
-        for f in _feval:
-            try:
-                eval_results.append(f.evaluate(prediction_probability, y_ground_truth))
-            except:
-                eval_results.append(
-                    f.evaluate(
-                        prediction_probability.cpu().numpy(),
-                        y_ground_truth.cpu().numpy(),
-                    )
-                )
-        return eval_results
+        return [
+            f.evaluate(
+                prediction_probability.cpu().numpy(),
+                y_ground_truth.cpu().numpy(),
+            ) for f in _feval
+        ]
+
+    @classmethod
+    def __compute_normalized_edge_weight(
+            cls, edge_index: torch.LongTensor,
+            original_edge_weight: _typing.Optional[torch.Tensor] = ...
+    ) -> torch.Tensor:
+        if type(edge_index) != torch.Tensor:
+            raise TypeError
+        if original_edge_weight in (None, Ellipsis, ...):
+            original_edge_weight: torch.Tensor = torch.ones(edge_index.size(1))
+        elif type(original_edge_weight) != torch.Tensor:
+            raise TypeError
+        elif original_edge_weight.numel() != edge_index.size(1):
+            raise ValueError
+        elif original_edge_weight.size() != (edge_index.size(1),):
+            original_edge_weight = original_edge_weight.resize(edge_index.size(1))
+
+        __out_degree: torch.Tensor = \
+            torch_geometric.utils.degree(edge_index[0])
+        __in_degree: torch.Tensor = \
+            torch_geometric.utils.degree(edge_index[1])
+        temp_tensor: torch.Tensor = torch.stack(
+            [__out_degree[edge_index[0]], __in_degree[edge_index[1]]]
+        )
+        temp_tensor: torch.Tensor = torch.pow(temp_tensor, -0.5)
+        temp_tensor[torch.isinf(temp_tensor)] = 0.0
+        return original_edge_weight * temp_tensor[0] * temp_tensor[1]
 
     def train(self, dataset, keep_valid_result: bool = True):
         """
@@ -321,12 +444,12 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         """
         import gc
         gc.collect()
-        data = dataset[0]
+        data = dataset[0].to(torch.device("cpu"))
         self.__train_only(data)
         if keep_valid_result:
-            prediction: torch.Tensor = self.__predict_only(data)
-            self._valid_result: torch.Tensor = prediction[data.val_mask].max(1)[1]
-            self._valid_result_prob: torch.Tensor = prediction[data.val_mask]
+            prediction: torch.Tensor = self.__predict_only(data, data.val_mask)
+            self._valid_result: torch.Tensor = prediction.max(1)[1]
+            self._valid_result_prob: torch.Tensor = prediction
             self._valid_score: _typing.Sequence[float] = self.evaluate(dataset, "val")
 
     def get_valid_predict(self) -> torch.Tensor:
@@ -337,16 +460,14 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
 
     def get_valid_score(
             self, return_major: bool = True
-    ) -> _typing.Tuple[
-        _typing.Union[float, _typing.Sequence[float]],
-        _typing.Union[bool, _typing.Sequence[bool]],
+    ) -> _typing.Union[
+        _typing.Tuple[float, bool],
+        _typing.Tuple[_typing.Sequence[float], _typing.Sequence[bool]]
     ]:
         if return_major:
             return self._valid_score[0], self.feval[0].is_higher_better()
         else:
-            return (
-                self._valid_score, [f.is_higher_better() for f in self.feval]
-            )
+            return self._valid_score, [f.is_higher_better() for f in self.feval]
 
     @property
     def hyper_parameter_space(self) -> _typing.Sequence[_typing.Dict[str, _typing.Any]]:
@@ -360,19 +481,20 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
             raise TypeError
         self._hyper_parameter_space = hp_space
 
-    def __repr__(self) -> dict:
-        return {
+    def __repr__(self) -> str:
+        import yaml
+        __repr: dict = {
             "trainer_name": self.__class__.__name__,
-            "optimizer": self.optimizer,
-            "learning_rate": self.lr,
-            "max_epoch": self.max_epoch,
-            "early_stopping_round": self.early_stopping_round,
-            "model": repr(self.model)
+            "learning_rate": self._learning_rate,
+            "model": repr(self.model),
+            "max_epoch": self._max_epoch,
+            "early_stopping_round": self._early_stopping.patience,
+            "sampler_type": self.__sampler_type,
+            "sampled_budget": self.__sampled_budget
         }
-
-    def __str__(self) -> str:
-        import yaml
-        return yaml.dump(repr(self))
+        if self.__sampler_type == "rw":
+            __repr.update({"walk_length": self.__walk_length})
+        return yaml.dump(__repr)
 
     def duplicate_from_hyper_parameter(
             self,
@@ -400,48 +522,10 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
             feval=self.feval,
             loss=self.loss,
             lr_scheduler_type=self._lr_scheduler_type,
-            **hp,
+            **hp
         )
 
 
-class _DeterministicNeighborSamplerStore:
-    def __init__(self):
-        self.__neighbor_sampler_mapping: _typing.List[
-            _typing.Tuple[torch.LongTensor, NeighborSampler]
-        ] = []
-
-    @classmethod
-    def __is_target_node_indexes_equal(cls, a: torch.LongTensor, b: torch.LongTensor) -> bool:
-        if not a.dtype == b.dtype == torch.int64:
-            return False
-        if a.size() != b.size():
-            return False
-        return torch.where(a != b)[0].size(0) == 0
-
-    def __setitem__(self, target_nodes: torch.Tensor, neighbor_sampler: NeighborSampler):
-        target_nodes: _typing.Any = target_nodes.cpu()
-        if type(target_nodes) != torch.Tensor or target_nodes.dtype != torch.int64:
-            raise TypeError
-        if type(neighbor_sampler) != NeighborSampler:
-            raise TypeError
-        for i in range(len(self.__neighbor_sampler_mapping)):
-            if self.__is_target_node_indexes_equal(
-                    target_nodes, self.__neighbor_sampler_mapping[i][0]
-            ):
-                self.__neighbor_sampler_mapping[i] = (target_nodes, neighbor_sampler)
-                return
-        self.__neighbor_sampler_mapping.append((target_nodes, neighbor_sampler))
-
-    def __getitem__(self, target_nodes: torch.Tensor) -> _typing.Optional[NeighborSampler]:
-        target_nodes: _typing.Any = target_nodes.cpu()
-        if type(target_nodes) != torch.Tensor or target_nodes.dtype != torch.int64:
-            raise TypeError
-        for __current_target_nodes, __neighbor_sampler in self.__neighbor_sampler_mapping:
-            if self.__is_target_node_indexes_equal(target_nodes, __current_target_nodes):
-                return __neighbor_sampler
-        return None
-
-
 @register_trainer("NodeClassificationLayerDependentImportanceSamplingTrainer")
 class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassificationTrainer):
     def __init__(
diff --git a/autogl/module/train/sampling/sampler/graphsaint_sampler.py b/autogl/module/train/sampling/sampler/graphsaint_sampler.py
index 27434e2..19936c0 100644
--- a/autogl/module/train/sampling/sampler/graphsaint_sampler.py
+++ b/autogl/module/train/sampling/sampler/graphsaint_sampler.py
@@ -1,121 +1,34 @@
-import copy
-import typing as _typing
-import torch.utils.data
 import torch_geometric
 
 
-class _SubGraphSet(torch.utils.data.Dataset):
-    def __init__(self, datalist: _typing.Sequence[_typing.Any], *args, **kwargs):
-        self.__graphs: _typing.Sequence[_typing.Any] = datalist
-        self.__remaining_args: _typing.Sequence[_typing.Any] = args
-        for key, value in kwargs.items():
-            setattr(self, key, value)
-
-    def __len__(self) -> int:
-        return len(self.__graphs)
-
-    def __getitem__(self, index: int) -> _typing.Any:
-        if not 0 <= index < len(self.__graphs):
-            raise IndexError
-        return self.__graphs[index]
-
-
-class _GraphSAINTSubGraphSampler:
-    def __init__(
-        self,
-        sampler_class: _typing.Type[torch_geometric.data.GraphSAINTSampler],
-        budget: int,
-        num_graphs: int = 1,
-        walk_length: int = 1,
-        num_workers: int = 0,
-    ):
-        """
-        :param sampler_class: class of torch_geometric.data.GraphSAINTSampler
-        :param budget: general budget
-        :param num_graphs: number of sub-graphs to sample, i.e. N in the paper
-        :param walk_length: walk length for RandomWalk Sampler
-        :param num_workers: how many sub-processes to use for data loading.
-                            0 means that the data will be loaded in the main process.
-        """
-        self.__sampler_class: _typing.Type[
-            torch_geometric.data.GraphSAINTSampler
-        ] = sampler_class
-        self.__budget: int = budget
-        self.__num_graphs: int = num_graphs
-        self.__walk_length: int = walk_length
-        self.__num_workers: int = num_workers if num_workers > 0 else 0
-
-    def sample(self, _integral_data) -> _SubGraphSet:
-        """
-        :param _integral_data: conventional data for an integral graph
-        :return: instance of _SubGraphSet
-        """
-        data = copy.copy(_integral_data)
-        data.sampled_node_indexes = torch.arange(data.num_nodes, dtype=torch.int64)
-        data.sampled_edge_indexes = torch.arange(data.num_edges, dtype=torch.int64)
-        if (
-            type(self.__sampler_class)
-            == torch_geometric.data.GraphSAINTRandomWalkSampler
-        ):
-            _sampler: torch_geometric.data.GraphSAINTRandomWalkSampler = (
-                torch_geometric.data.GraphSAINTRandomWalkSampler(
-                    data,
-                    self.__budget,
-                    self.__walk_length,
-                    self.__num_graphs,
-                    num_workers=self.__num_workers,
-                )
-            )
-        else:
-            _sampler: torch_geometric.data.GraphSAINTSampler = self.__sampler_class(
-                data, self.__budget, self.__num_graphs, num_workers=self.__num_workers
-            )
-        """ Sample sub-graphs """
-        datalist: list = [d for d in _sampler]
-        """ Compute the normalization """
-        node_sampled_count = torch.zeros(data.num_nodes, dtype=torch.int64)
-        edge_sampled_count = torch.zeros(data.num_edges, dtype=torch.int64)
-        concatenated_sampled_nodes: torch.Tensor = torch.cat(
-            [sub_graph.sampled_node_indexes for sub_graph in datalist]
-        )
-        concatenated_sampled_edges: torch.Tensor = torch.cat(
-            [sub_graph.sampled_edge_indexes for sub_graph in datalist]
+class GraphSAINTSamplerFactory:
+    @classmethod
+    def create_node_sampler(
+            cls, data, num_graphs_per_epoch: int, node_budget: int,
+            sample_coverage_factor: int = 50, **kwargs
+    ) -> torch_geometric.data.GraphSAINTNodeSampler:
+        return torch_geometric.data.GraphSAINTNodeSampler(
+            data, node_budget,
+            num_graphs_per_epoch, sample_coverage_factor, log=False, **kwargs
         )
-        for current_sampled_node_index in concatenated_sampled_nodes.unique():
-            node_sampled_count[current_sampled_node_index] = torch.where(
-                concatenated_sampled_nodes == current_sampled_node_index
-            )[0].size(0)
-        for current_sampled_edge_index in concatenated_sampled_edges.unique():
-            edge_sampled_count[current_sampled_edge_index] = torch.where(
-                concatenated_sampled_edges == current_sampled_edge_index
-            )[0].size(0)
-        _alpha: torch.Tensor = (
-            edge_sampled_count / node_sampled_count[data.edge_index[1]]
-        )
-        _alpha[torch.isnan(_alpha) | torch.isinf(_alpha)] = 0
-        _lambda: torch.Tensor = node_sampled_count / self.__num_graphs
-        return _SubGraphSet(datalist, **{"alpha": _alpha, "lambda": _lambda})
-
-
-class GraphSAINTRandomNodeSampler(_GraphSAINTSubGraphSampler):
-    def __init__(self, node_budget: int, num_graphs: int = 1):
-        super(GraphSAINTRandomNodeSampler, self).__init__(
-            torch_geometric.data.GraphSAINTNodeSampler, node_budget, num_graphs
-        )
-
 
-class GraphSAINTRandomEdgeSampler(_GraphSAINTSubGraphSampler):
-    def __init__(self, edge_budget: int, num_graphs: int = 1):
-        super(GraphSAINTRandomEdgeSampler, self).__init__(
-            torch_geometric.data.GraphSAINTNodeSampler, edge_budget, num_graphs
+    @classmethod
+    def create_edge_sampler(
+            cls, data, num_graphs_per_epoch: int, edge_budget: int,
+            sample_coverage_factor: int = 50, **kwargs
+    ) -> torch_geometric.data.GraphSAINTEdgeSampler:
+        return torch_geometric.data.GraphSAINTEdgeSampler(
+            data, edge_budget,
+            num_graphs_per_epoch, sample_coverage_factor, log=False, **kwargs
         )
 
-
-class GraphSAINTRandomWalkSampler(_GraphSAINTSubGraphSampler):
-    def __init__(self, edge_budget: int, num_graphs: int = 1, walk_length: int = 4):
-        super(GraphSAINTRandomWalkSampler, self).__init__(
-            torch_geometric.data.GraphSAINTRandomWalkSampler,
-            edge_budget,
-            num_graphs,
-            walk_length,
+    @classmethod
+    def create_random_walk_sampler(
+            cls, data, num_graphs_per_epoch: int,
+            num_walks: int, walk_length: int,
+            sample_coverage_factor: int = 50, **kwargs
+    ) -> torch_geometric.data.GraphSAINTRandomWalkSampler:
+        return torch_geometric.data.GraphSAINTRandomWalkSampler(
+            data, num_walks, walk_length,
+            num_graphs_per_epoch, sample_coverage_factor, log=False, **kwargs
         )

From 07479564de1139633fb67f2c4b8e63ca8bf904f9 Mon Sep 17 00:00:00 2001
From: generall <general502570@outlook.com>
Date: Tue, 22 Jun 2021 16:23:12 +0800
Subject: [PATCH 094/144] solve comments in RL

---
 autogl/module/nas/algorithm/darts.py         |  56 ++++----
 autogl/module/nas/algorithm/enas.py          |  68 ++++------
 autogl/module/nas/algorithm/random_search.py |  33 +++--
 autogl/module/nas/algorithm/rl.py            | 114 +++++++---------
 autogl/module/nas/estimator/one_shot.py      |   2 +
 autogl/module/nas/estimator/train_scratch.py |   3 +
 autogl/module/nas/space/__init__.py          |   4 +-
 autogl/module/nas/space/base.py              |   2 +-
 autogl/module/nas/space/graph_nas.py         | 129 ++-----------------
 autogl/module/nas/space/graph_nas_macro.py   |   9 +-
 autogl/module/nas/space/operation.py         | 120 +++++++++++++++++
 autogl/module/nas/space/single_path.py       |   6 +-
 examples/test_graph_nas_rand.py              |  71 ++++++++++
 examples/test_graph_nas_rl.py                |  20 ++-
 14 files changed, 355 insertions(+), 282 deletions(-)
 create mode 100644 autogl/module/nas/space/operation.py
 create mode 100644 examples/test_graph_nas_rand.py

diff --git a/autogl/module/nas/algorithm/darts.py b/autogl/module/nas/algorithm/darts.py
index c7510c7..3894b3d 100644
--- a/autogl/module/nas/algorithm/darts.py
+++ b/autogl/module/nas/algorithm/darts.py
@@ -7,6 +7,7 @@ import torch.optim
 import torch.nn as nn
 import torch.nn.functional as F
 
+from . import register_nas_algo
 from .base import BaseNAS
 from ..estimator.base import BaseEstimator
 from ..space import BaseSpace
@@ -16,55 +17,42 @@ from nni.retiarii.oneshot.pytorch.darts import DartsLayerChoice, DartsInputChoic
 
 _logger = logging.getLogger(__name__)
 
-
+@register_nas_algo("darts")
 class Darts(BaseNAS):
     """
     DARTS trainer.
 
     Parameters
     ----------
-    model : nn.Module
-        PyTorch model to be trained.
-    loss : callable
-        Receives logits and ground truth label, return a loss tensor.
-    metrics : callable
-        Receives logits and ground truth label, return a dict of metrics.
-    optimizer : Optimizer
-        The optimizer used for optimizing the model.
     num_epochs : int
         Number of epochs planned for training.
-    dataset : Dataset
-        Dataset for training. Will be split for training weights and architecture weights.
-    grad_clip : float
-        Gradient clipping. Set to 0 to disable. Default: 5.
-    learning_rate : float
-        Learning rate to optimize the model.
-    batch_size : int
-        Batch size.
     workers : int
         Workers for data loading.
+    gradient_clip : float
+        Gradient clipping. Set to 0 to disable. Default: 5.
+    model_lr : float
+        Learning rate to optimize the model.
+    model_wd : float
+        Weight decay to optimize the model.
+    arch_lr : float
+        Learning rate to optimize the architecture.
+    arch_wd : float
+        Weight decay to optimize the architecture.
     device : str or torch.device
         The device of the whole process
-    log_frequency : int
-        Step count per logging.
-    arc_learning_rate : float
-        Learning rate of architecture parameters.
-    unrolled : float
-        ``True`` if using second order optimization, else first order optimization.
     """
 
-    def __init__(self, num_epochs=5, device="cuda"):
+    def __init__(self, num_epochs=5, workers = 4, gradient_clip = 5.0, model_lr = 1e-3, model_wd = 5e-4, arch_lr = 3e-4, arch_wd = 1e-3, device="cuda"):
         super().__init__(device=device)
         self.num_epochs = num_epochs
-        self.workers = 4
-        self.log_frequency = None
-        self.gradient_clip = 5.0
+        self.workers = workers
+        self.gradient_clip = gradient_clip
         self.model_optimizer = torch.optim.Adam
         self.arch_optimizer = torch.optim.Adam
-        self.model_lr = 0.001
-        self.model_wd = 5e-4
-        self.arch_lr = 3e-4
-        self.arch_wd = 1e-3
+        self.model_lr = model_lr
+        self.model_wd = model_wd
+        self.arch_lr = arch_lr
+        self.arch_wd = arch_wd
 
     def search(self, space: BaseSpace, dataset, estimator):
         model_optim = self.model_optimizer(
@@ -95,7 +83,7 @@ class Darts(BaseNAS):
             )
 
         selection = self.export(nas_modules)
-        return space.export(selection, self.device)
+        return space.parse_model(selection, self.device)
 
     def _train_one_epoch(
         self,
@@ -124,7 +112,7 @@ class Darts(BaseNAS):
             nn.utils.clip_grad_norm_(model.parameters(), self.gradient_clip)
         model_optim.step()
 
-    def _infer(self, model: BaseModel, dataset, estimator: BaseEstimator, mask="train"):
+    def _infer(self, model: BaseSpace, dataset, estimator: BaseEstimator, mask="train"):
         metric, loss = estimator.infer(model, dataset, mask=mask)
         return metric, loss
 
@@ -133,5 +121,5 @@ class Darts(BaseNAS):
         result = dict()
         for name, module in nas_modules:
             if name not in result:
-                result[name] = module.export()
+                result[name] = module.parse_model()
         return result
diff --git a/autogl/module/nas/algorithm/enas.py b/autogl/module/nas/algorithm/enas.py
index f9962fb..16ed39a 100644
--- a/autogl/module/nas/algorithm/enas.py
+++ b/autogl/module/nas/algorithm/enas.py
@@ -1,45 +1,31 @@
 # codes in this file are reproduced from https://github.com/microsoft/nni with some changes.
 import copy
-import logging
 
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
+from . import register_nas_algo
 from .base import BaseNAS
 from ..space import BaseSpace
 from ..utils import AverageMeterGroup, replace_layer_choice, replace_input_choice, get_module_order, sort_replaced_module
-from nni.nas.pytorch.fixed import apply_fixed_architecture
-from tqdm import tqdm
-_logger = logging.getLogger(__name__)
+from tqdm import tqdm, trange
 from .rl import PathSamplingLayerChoice,PathSamplingInputChoice,ReinforceField,ReinforceController
+from ....utils import get_logger
 
+LOGGER = get_logger("ENAS")
+
+@register_nas_algo("enas")
 class Enas(BaseNAS):
     """
     ENAS trainer.
 
     Parameters
     ----------
-    model : nn.Module
-        PyTorch model to be trained.
-    loss : callable
-        Receives logits and ground truth label, return a loss tensor.
-    metrics : callable
-        Receives logits and ground truth label, return a dict of metrics.
-    reward_function : callable
-        Receives logits and ground truth label, return a tensor, which will be feeded to RL controller as reward.
-    optimizer : Optimizer
-        The optimizer used for optimizing the model.
     num_epochs : int
         Number of epochs planned for training.
-    dataset : Dataset
-        Dataset for training. Will be split for training weights and architecture weights.
-    batch_size : int
-        Batch size.
-    workers : int
-        Workers for data loading.
-    device : torch.device
-        ``torch.device("cpu")`` or ``torch.device("cuda")``.
+    n_warmup : int
+        Number of epochs for training super network.
     log_frequency : int
         Step count per logging.
     grad_clip : float
@@ -54,19 +40,23 @@ class Enas(BaseNAS):
         Learning rate for RL controller.
     ctrl_steps_aggregate : int
         Number of steps that will be aggregated into one mini-batch for RL controller.
-    ctrl_steps : int
-        Number of mini-batches for each epoch of RL controller learning.
     ctrl_kwargs : dict
         Optional kwargs that will be passed to :class:`ReinforceController`.
+    model_lr : float
+        Learning rate for super network.
+    model_wd : float
+        Weight decay for super network.
+    disable_progeress: boolean
+        Control whether show the progress bar.
+    device : str or torch.device
+        The device of the whole process, e.g. "cuda", torch.device("cpu")
     """
 
-    def __init__(self, device='cuda', workers=4,log_frequency=None,
-                 grad_clip=5., entropy_weight=0.0001, skip_weight=0.8, baseline_decay=0.999,
-                 ctrl_lr=0.00035, ctrl_steps_aggregate=20, ctrl_kwargs=None,n_warmup=100,model_lr=5e-3,model_wd=5e-4,*args,**kwargs):
+    def __init__(self, num_epochs = 5, n_warmup = 100, log_frequency=None, grad_clip=5., entropy_weight=0.0001, skip_weight=0.8, baseline_decay=0.999,
+                 ctrl_lr=0.00035, ctrl_steps_aggregate=20, ctrl_kwargs=None,model_lr=5e-3,model_wd=5e-4, disable_progress = True, device="cuda"):
         super().__init__(device)
         self.device=device
-        self.num_epochs = kwargs.get("num_epochs", 5)
-        self.workers = workers
+        self.num_epochs = num_epochs
         self.log_frequency = log_frequency
         self.entropy_weight = entropy_weight
         self.skip_weight = skip_weight
@@ -74,12 +64,13 @@ class Enas(BaseNAS):
         self.baseline = 0.
         self.ctrl_steps_aggregate = ctrl_steps_aggregate
         self.grad_clip = grad_clip
-        self.workers = workers
         self.ctrl_kwargs=ctrl_kwargs
         self.ctrl_lr=ctrl_lr
         self.n_warmup=n_warmup
         self.model_lr = model_lr
         self.model_wd = model_wd
+        self.disable_progress = disable_progress
+
     def search(self, space: BaseSpace, dset, estimator):
         self.model = space
         self.dataset = dset#.to(self.device)
@@ -105,14 +96,15 @@ class Enas(BaseNAS):
         self.ctrl_optim = torch.optim.Adam(self.controller.parameters(), lr=self.ctrl_lr)
 
         # warm up supernet
-        with tqdm(range(self.n_warmup)) as bar:
+        with tqdm(range(self.n_warmup), disable=self.disable_progress) as bar:
             for i in bar:
                 acc,l1=self._train_model(i)
                 with torch.no_grad():
                     val_acc,val_loss=self._infer('val')
                 bar.set_postfix(loss=l1,acc=acc,val_acc=val_acc,val_loss=val_loss)
+
         # train
-        with tqdm(range(self.num_epochs)) as bar:
+        with tqdm(range(self.num_epochs), disable=self.disable_progress) as bar:
             for i in bar:
                 try:
                     l1=self._train_model(i)
@@ -122,15 +114,11 @@ class Enas(BaseNAS):
                     nm=self.nas_modules
                     for i in range(len(nm)):
                         print(nm[i][1].sampled)
-                    import pdb
-                    pdb.set_trace()
-                    
-
                 bar.set_postfix(loss_model=l1,reward_controller=l2)
-        
+            
         selection=self.export()
-        print(selection)
-        return space.export(selection,self.device)
+        #print(selection)
+        return space.parse_model(selection,self.device)
     
     def _train_model(self, epoch): 
         self.model.train()
@@ -172,7 +160,7 @@ class Enas(BaseNAS):
                 self.ctrl_optim.zero_grad()
 
             if self.log_frequency is not None and ctrl_step % self.log_frequency == 0:
-                _logger.info('RL Epoch [%d/%d] Step [%d/%d]  %s', epoch + 1, self.num_epochs,
+                LOGGER.info('RL Epoch [%d/%d] Step [%d/%d]  %s', epoch + 1, self.num_epochs,
                                 ctrl_step + 1, self.ctrl_steps_aggregate)
         return sum(rewards)/len(rewards)
 
diff --git a/autogl/module/nas/algorithm/random_search.py b/autogl/module/nas/algorithm/random_search.py
index 797c6dd..527ca4d 100644
--- a/autogl/module/nas/algorithm/random_search.py
+++ b/autogl/module/nas/algorithm/random_search.py
@@ -1,18 +1,19 @@
 import copy
-import logging
-
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
+from . import register_nas_algo
 from .base import BaseNAS
 from ..space import BaseSpace
 from ..utils import AverageMeterGroup, replace_layer_choice, replace_input_choice, get_module_order, sort_replaced_module
 from nni.nas.pytorch.fixed import apply_fixed_architecture
 from tqdm import tqdm
-_logger = logging.getLogger(__name__)
 from .rl import PathSamplingLayerChoice,PathSamplingInputChoice
 import numpy as np
+from ....utils import get_logger
+
+LOGGER = get_logger("random_search_NAS")
 class RSBox:
     '''get selection space for model `space` '''
     def __init__(self,space):
@@ -30,10 +31,12 @@ class RSBox:
         
         
         space_size=np.prod(list(selection_range.values()))
-        print(f'Using random search Box. Total space size: {space_size}')
-        print('Searching Space:',selection_range)
+        #print(f'Using random search Box. Total space size: {space_size}')
+        #print('Searching Space:',selection_range)
+         
     def export(self):
         return self.selection_dict #{k:v}, means action ranges 0 to v-1 for layer named k
+
     def sample(self):
         # uniformly sample
         selection={}
@@ -42,14 +45,25 @@ class RSBox:
             selection[k]=np.random.choice(range(v))
         return selection
 
+@register_nas_algo("random")
 class RandomSearch(BaseNAS):
     '''
-    uniformly search
+    Uniformly random architecture search
+
+    Parameters
+    ----------
+    device : str or torch.device
+        The device of the whole process, e.g. "cuda", torch.device("cpu")
+    num_epochs : int
+        Number of epochs planned for training.
+    disable_progeress: boolean
+        Control whether show the progress bar.
     '''
-    def __init__(self, device='cuda',num_epochs=400,disable_progress=False,*args,**kwargs):
+    def __init__(self, device='cuda', num_epochs=400, disable_progress=False):
         super().__init__(device)
         self.num_epochs=num_epochs
         self.disable_progress=disable_progress
+
     def search(self, space: BaseSpace, dset, estimator):
         self.estimator=estimator
         self.dataset=dset
@@ -60,16 +74,15 @@ class RandomSearch(BaseNAS):
         with tqdm(range(self.num_epochs),disable=self.disable_progress) as bar:
             for i in bar:
                 selection=self.export() 
-                # print(selection)
                 vec=tuple(list(selection.values()))
                 if vec not in cache:
-                    self.arch=space.export(selection,self.device)
+                    self.arch=space.parse_model(selection,self.device)
                     metric,loss=self._infer(mask='val')
                     arch_perfs.append([metric,selection])
                     cache[vec]=metric
                 bar.set_postfix(acc=metric,max_acc=max(cache.values()))
         selection=arch_perfs[np.argmax([x[0] for x in arch_perfs])][1]
-        arch=space.export(selection,self.device)
+        arch=space.parse_model(selection,self.device)
         return arch 
     
     def export(self):
diff --git a/autogl/module/nas/algorithm/rl.py b/autogl/module/nas/algorithm/rl.py
index 9f152a9..ad73eaf 100644
--- a/autogl/module/nas/algorithm/rl.py
+++ b/autogl/module/nas/algorithm/rl.py
@@ -1,11 +1,9 @@
 # codes in this file are reproduced from https://github.com/microsoft/nni with some changes.
-import copy
-import logging
-
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
+from . import register_nas_algo
 from .base import BaseNAS
 from ..space import BaseSpace
 from ..utils import AverageMeterGroup, replace_layer_choice, replace_input_choice, get_module_order, sort_replaced_module
@@ -13,7 +11,9 @@ from nni.nas.pytorch.fixed import apply_fixed_architecture
 from tqdm import tqdm
 from datetime import datetime
 import numpy as np
-_logger = logging.getLogger(__name__)
+from ....utils import get_logger
+
+LOGGER = get_logger("random_search_NAS")
 def _get_mask(sampled, total):
     multihot = [i == sampled or (isinstance(sampled, list) and i in sampled) for i in range(total)]
     return torch.tensor(multihot, dtype=torch.bool)  # pylint: disable=not-callable
@@ -108,7 +108,6 @@ class StackedLSTMCell(nn.Module):
             inputs = curr_h[-1].view(1, -1)
         return next_h, next_c
 
-
 class ReinforceField:
     """
     A field with ``name``, with ``total`` choices. ``choose_one`` is true if one and only one is meant to be
@@ -123,7 +122,6 @@ class ReinforceField:
     def __repr__(self):
         return f'ReinforceField(name={self.name}, total={self.total}, choose_one={self.choose_one})'
 
-
 class ReinforceController(nn.Module):
     """
     A controller that mutates the graph with RL.
@@ -228,31 +226,15 @@ class ReinforceController(nn.Module):
             sampled = sampled[0]
         return sampled
 
-
+@register_nas_algo("rl")
 class RL(BaseNAS):
     """
     RL in GraphNas.
 
     Parameters
     ----------
-    model : nn.Module
-        PyTorch model to be trained.
-    loss : callable
-        Receives logits and ground truth label, return a loss tensor.
-    metrics : callable
-        Receives logits and ground truth label, return a dict of metrics.
-    reward_function : callable
-        Receives logits and ground truth label, return a tensor, which will be feeded to RL controller as reward.
-    optimizer : Optimizer
-        The optimizer used for optimizing the model.
     num_epochs : int
         Number of epochs planned for training.
-    dataset : Dataset
-        Dataset for training. Will be split for training weights and architecture weights.
-    batch_size : int
-        Batch size.
-    workers : int
-        Workers for data loading.
     device : torch.device
         ``torch.device("cpu")`` or ``torch.device("cuda")``.
     log_frequency : int
@@ -273,15 +255,22 @@ class RL(BaseNAS):
         Number of mini-batches for each epoch of RL controller learning.
     ctrl_kwargs : dict
         Optional kwargs that will be passed to :class:`ReinforceController`.
+    n_warmup : int
+        Number of epochs for training super network.
+    model_lr : float
+        Learning rate for super network.
+    model_wd : float
+        Weight decay for super network.
+    disable_progeress: boolean
+        Control whether show the progress bar.
     """
 
-    def __init__(self, device='cuda', workers=4,log_frequency=None,
+    def __init__(self, num_epochs = 5, device='cuda', log_frequency=None,
                  grad_clip=5., entropy_weight=0.0001, skip_weight=0.8, baseline_decay=0.999,
-                 ctrl_lr=0.00035, ctrl_steps_aggregate=20, ctrl_kwargs=None,n_warmup=100,model_lr=5e-3,model_wd=5e-4,*args,**kwargs):
+                 ctrl_lr=0.00035, ctrl_steps_aggregate=20, ctrl_kwargs=None,n_warmup=100,model_lr=5e-3,model_wd=5e-4, disable_progress=True):
         super().__init__(device)
         self.device=device
-        self.num_epochs = kwargs.get("num_epochs", 5)
-        self.workers = workers
+        self.num_epochs = num_epochs
         self.log_frequency = log_frequency
         self.entropy_weight = entropy_weight
         self.skip_weight = skip_weight
@@ -289,13 +278,14 @@ class RL(BaseNAS):
         self.baseline = 0.
         self.ctrl_steps_aggregate = ctrl_steps_aggregate
         self.grad_clip = grad_clip
-        self.workers = workers
         self.ctrl_kwargs=ctrl_kwargs
         self.ctrl_lr=ctrl_lr
         self.n_warmup=n_warmup
         self.model_lr = model_lr
         self.model_wd = model_wd
+        self.disable_progress = disable_progress
         self.log=open('../tmp/log.txt','w')
+
     def search(self, space: BaseSpace, dset, estimator):
         self.model = space
         self.dataset = dset#.to(self.device)
@@ -317,14 +307,14 @@ class RL(BaseNAS):
         self.controller = ReinforceController(self.nas_fields, **(self.ctrl_kwargs or {}))
         self.ctrl_optim = torch.optim.Adam(self.controller.parameters(), lr=self.ctrl_lr)
         # train
-        with tqdm(range(self.num_epochs)) as bar:
+        with tqdm(range(self.num_epochs), disable = self.disable_progress) as bar:
             for i in bar:
                 l2=self._train_controller(i)
                 bar.set_postfix(reward_controller=l2)
         
         selection=self.export()
         arch=space.export(selection,self.device)
-        print(selection,arch)
+        #print(selection,arch)
         return arch
     
     def _train_controller(self, epoch):
@@ -332,7 +322,7 @@ class RL(BaseNAS):
         self.controller.train()
         self.ctrl_optim.zero_grad()
         rewards=[]
-        with tqdm(range(self.ctrl_steps_aggregate)) as bar:
+        with tqdm(range(self.ctrl_steps_aggregate), disable=self.disable_progress) as bar:
             for ctrl_step in bar:
                 self._resample()
                 metric,loss=self._infer(mask='val')
@@ -375,32 +365,17 @@ class RL(BaseNAS):
         metric, loss = self.estimator.infer(self.arch, self.dataset,mask=mask)
         return metric, loss
 
+@register_nas_algo("graphnas")
 class GraphNasRL(BaseNAS):
     """
     RL in GraphNas.
 
     Parameters
     ----------
-    model : nn.Module
-        PyTorch model to be trained.
-    loss : callable
-        Receives logits and ground truth label, return a loss tensor.
-    metrics : callable
-        Receives logits and ground truth label, return a dict of metrics.
-    reward_function : callable
-        Receives logits and ground truth label, return a tensor, which will be feeded to RL controller as reward.
-    optimizer : Optimizer
-        The optimizer used for optimizing the model.
-    num_epochs : int
-        Number of epochs planned for training.
-    dataset : Dataset
-        Dataset for training. Will be split for training weights and architecture weights.
-    batch_size : int
-        Batch size.
-    workers : int
-        Workers for data loading.
     device : torch.device
         ``torch.device("cpu")`` or ``torch.device("cuda")``.
+    num_epochs : int
+        Number of epochs planned for training.
     log_frequency : int
         Step count per logging.
     grad_clip : float
@@ -419,22 +394,30 @@ class GraphNasRL(BaseNAS):
         Number of mini-batches for each epoch of RL controller learning.
     ctrl_kwargs : dict
         Optional kwargs that will be passed to :class:`ReinforceController`.
+    n_warmup : int
+        Number of epochs for training super network.
+    model_lr : float
+        Learning rate for super network.
+    model_wd : float
+        Weight decay for super network.
+    topk : int
+        Number of architectures kept in training process.
+    disable_progeress: boolean
+        Control whether show the progress bar.
     """
 
-    def __init__(self, device='cuda', workers=4,log_frequency=None,
+    def __init__(self, device='cuda', num_epochs=10, log_frequency=None,
                  grad_clip=5., entropy_weight=0.0001, skip_weight=0, baseline_decay=0.95,
-                 ctrl_lr=0.00035, ctrl_steps_aggregate=100, ctrl_kwargs=None,n_warmup=100,model_lr=5e-3,model_wd=5e-4,topk=5,*args,**kwargs):
+                 ctrl_lr=0.00035, ctrl_steps_aggregate=100, ctrl_kwargs=None, n_warmup=100, model_lr=5e-3, model_wd=5e-4, topk=5, disable_progress = True):
         super().__init__(device)
         self.device=device
-        self.num_epochs = kwargs.get("num_epochs", 10)
-        self.workers = workers
+        self.num_epochs = num_epochs
         self.log_frequency = log_frequency
         self.entropy_weight = entropy_weight
         self.skip_weight = skip_weight
         self.baseline_decay = baseline_decay
         self.ctrl_steps_aggregate = ctrl_steps_aggregate
         self.grad_clip = grad_clip
-        self.workers = workers
         self.ctrl_kwargs=ctrl_kwargs
         self.ctrl_lr=ctrl_lr
         self.n_warmup=n_warmup
@@ -444,6 +427,8 @@ class GraphNasRL(BaseNAS):
         self.log=open(f'../tmp/log-{timestamp}.txt','w')
         self.hist=[]
         self.topk=topk
+        self.disable_progress=disable_progress
+
     def search(self, space: BaseSpace, dset, estimator):
         self.model = space
         self.dataset = dset#.to(self.device)
@@ -465,7 +450,7 @@ class GraphNasRL(BaseNAS):
         self.controller = ReinforceController(self.nas_fields,lstm_size=100,temperature=5.0,tanh_constant=2.5, **(self.ctrl_kwargs or {}))
         self.ctrl_optim = torch.optim.Adam(self.controller.parameters(), lr=self.ctrl_lr)
         # train
-        with tqdm(range(self.num_epochs)) as bar:
+        with tqdm(range(self.num_epochs), disable=self.disable_progress) as bar:
             for i in bar:
                 l2=self._train_controller(i)
                 bar.set_postfix(reward_controller=l2)
@@ -474,22 +459,23 @@ class GraphNasRL(BaseNAS):
         
         selections=[x[1] for x in self.hist]
         candidiate_accs=[-x[0] for x in self.hist]
-        print('candidiate accuracies',candidiate_accs)
+        #print('candidiate accuracies',candidiate_accs)
         selection=self._choose_best(selections)
-        arch=space.export(selection,self.device)
-        print(selection,arch)
+        arch=space.parse_model(selection,self.device)
+        #print(selection,arch)
         return arch
+
     def _choose_best(self,selections):
         # graphnas use top 5 models, can evaluate 20 times epoch and choose the best.
         results=[]
         for selection in selections:
             accs=[]
-            for i in tqdm(range(20)):
-                self.arch=self.model.export(selection,device=self.device)
+            for i in tqdm(range(20), disable=self.disable_progress):
+                self.arch=self.model.parse_model(selection,device=self.device)
                 metric,loss=self._infer(mask='val')
                 accs.append(metric)
             result=np.mean(accs) 
-            print('selection {} \n acc {:.4f} +- {:.4f}'.format(selection,np.mean(accs),np.std(accs)/np.sqrt(20)))
+            LOGGER.info('selection {} \n acc {:.4f} +- {:.4f}'.format(selection,np.mean(accs),np.std(accs)/np.sqrt(20)))
             results.append(result)
         best_selection=selections[np.argmax(results)]
         return best_selection
@@ -501,7 +487,7 @@ class GraphNasRL(BaseNAS):
         rewards=[]
         baseline=None
         # diff: graph nas train 100 and derive 100 for every epoch(10 epochs), we just train 100(20 epochs). totol num of samples are same (2000)
-        with tqdm(range(self.ctrl_steps_aggregate)) as bar:
+        with tqdm(range(self.ctrl_steps_aggregate), disable=self.disable_progress) as bar:
             for ctrl_step in bar:
                 self._resample()
                 metric,loss=self._infer(mask='val')
@@ -536,7 +522,7 @@ class GraphNasRL(BaseNAS):
 
     def _resample(self):
         result = self.controller.resample()
-        self.arch=self.model.export(result,device=self.device)
+        self.arch=self.model.parse_model(result,device=self.device)
         self.selection=result
 
     def export(self):
@@ -545,5 +531,5 @@ class GraphNasRL(BaseNAS):
             return self.controller.resample()
 
     def _infer(self,mask='train'):
-        metric, loss = self.estimator.infer(self.arch, self.dataset,mask=mask)
+        metric, loss = self.estimator.infer(self.arch._model, self.dataset,mask=mask)
         return metric, loss
\ No newline at end of file
diff --git a/autogl/module/nas/estimator/one_shot.py b/autogl/module/nas/estimator/one_shot.py
index e43695c..4df9d50 100644
--- a/autogl/module/nas/estimator/one_shot.py
+++ b/autogl/module/nas/estimator/one_shot.py
@@ -1,10 +1,12 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
+from . import register_nas_estimator
 from ..space import BaseSpace
 from .base import BaseEstimator
 import torch
 
+@register_nas_estimator("oneshot")
 class OneShotEstimator(BaseEstimator):
     """
     One shot estimator.
diff --git a/autogl/module/nas/estimator/train_scratch.py b/autogl/module/nas/estimator/train_scratch.py
index 82b6d09..9083871 100644
--- a/autogl/module/nas/estimator/train_scratch.py
+++ b/autogl/module/nas/estimator/train_scratch.py
@@ -1,12 +1,15 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
+from . import register_nas_estimator
 from ..space import BaseSpace
 from .base import BaseEstimator
 from .one_shot import OneShotEstimator
 import torch
 
 from autogl.module.train import NodeClassificationFullTrainer
+
+@register_nas_estimator("scratch")
 class TrainEstimator(BaseEstimator):
     def __init__(self):
         self.estimator=OneShotEstimator()
diff --git a/autogl/module/nas/space/__init__.py b/autogl/module/nas/space/__init__.py
index 9618048..60c1054 100644
--- a/autogl/module/nas/space/__init__.py
+++ b/autogl/module/nas/space/__init__.py
@@ -17,7 +17,7 @@ def register_nas_space(name):
 
     return register_nas_space_cls
 
-from .graph_nas_macro import GraphNasMacroNodeClfSpace
+from .graph_nas_macro import GraphNasMacroNodeClassificationSpace
 from .graph_nas import GraphNasNodeClassificationSpace
 from .single_path import SinglePathNodeClassificationSpace
 
@@ -41,4 +41,4 @@ def build_nas_space_from_name(name: str) -> BaseSpace:
     assert name in NAS_SPACE_DICT, "HPO module do not have name " + name
     return NAS_SPACE_DICT[name]()
 
-__all__ = ["BaseSpace", "GraphNasMacroNodeClfSpace", "GraphNasNodeClassificationSpace", "SinglePathNodeClassificationSpace"]
+__all__ = ["BaseSpace", "GraphNasMacroNodeClassificationSpace", "GraphNasNodeClassificationSpace", "SinglePathNodeClassificationSpace"]
diff --git a/autogl/module/nas/space/base.py b/autogl/module/nas/space/base.py
index eaa595a..bcbfefc 100644
--- a/autogl/module/nas/space/base.py
+++ b/autogl/module/nas/space/base.py
@@ -53,7 +53,7 @@ class BaseSpace(nn.Module):
         raise NotImplementedError()
 
     @abstractmethod
-    def export(self, selection: dict, device) -> BaseModel:
+    def parse_model(self, selection: dict, device) -> BaseModel:
         """
         Export the searched model from space.
 
diff --git a/autogl/module/nas/space/graph_nas.py b/autogl/module/nas/space/graph_nas.py
index b56c003..83c4502 100644
--- a/autogl/module/nas/space/graph_nas.py
+++ b/autogl/module/nas/space/graph_nas.py
@@ -1,40 +1,20 @@
-from copy import deepcopy
+# codes in this file are reproduced from https://github.com/GraphNAS/GraphNAS with some changes.
 import typing as _typ
 import torch
 
 import torch.nn.functional as F
 from nni.nas.pytorch import mutables
-from nni.nas.pytorch.fixed import apply_fixed_architecture
+
+from . import register_nas_space
 from .base import BaseSpace
 from ...model import BaseModel
 from ....utils import get_logger
 
 from ...model import AutoGCN
 from .single_path import FixedNodeClassificationModel
-from .base import OrderedLayerChoice,OrderedInputChoice
 from torch import nn
-
-from torch_geometric.nn.conv import *
-from pdb import set_trace
-gnn_list = [
-    "gat_8",  # GAT with 8 heads
-    "gat_6",  # GAT with 6 heads
-    "gat_4",  # GAT with 4 heads
-    "gat_2",  # GAT with 2 heads
-    "gat_1",  # GAT with 1 heads
-    "gcn",  # GCN
-    "cheb",  # chebnet
-    "sage",  # sage
-    "arma",
-    "sg",  # simplifying gcn
-    "linear",  # skip connection
-    "zero",  # skip connection
-]
-act_list = [
-    # "sigmoid", "tanh", "relu", "linear",
-    #  "softplus", "leaky_relu", "relu6", "elu"
-    "sigmoid", "tanh", "relu", "linear", "elu"
-]
+from .operation import gnn_list, act_list, act_map, gnn_map
+from torch.autograd import Function
 
 class LambdaModule(nn.Module):
     def __init__(self, lambd):
@@ -46,6 +26,7 @@ class LambdaModule(nn.Module):
     
     def __repr__(self):
         return '{}({})'.format(self.__class__.__name__,self.lambd)
+
 class StrModule(nn.Module):
     def __init__(self, lambd):
         super().__init__()
@@ -57,91 +38,12 @@ class StrModule(nn.Module):
     def __repr__(self):
         return '{}({})'.format(self.__class__.__name__,self.str)
 
-def act_map(act):
-    if act == "linear":
-        return lambda x: x
-    elif act == "elu":
-        return F.elu
-    elif act == "sigmoid":
-        return torch.sigmoid
-    elif act == "tanh":
-        return torch.tanh
-    elif act == "relu":
-        return torch.nn.functional.relu
-    elif act == "relu6":
-        return torch.nn.functional.relu6
-    elif act == "softplus":
-        return torch.nn.functional.softplus
-    elif act == "leaky_relu":
-        return torch.nn.functional.leaky_relu
-    else:
-        raise Exception("wrong activate function")
-
 def act_map_nn(act):
     return LambdaModule(act_map(act))
 
 def map_nn(l):
     return [StrModule(x) for x in l]
 
-def gnn_map(gnn_name, in_dim, out_dim, concat=False, bias=True) -> nn.Module:
-    '''
-
-    :param gnn_name:
-    :param in_dim:
-    :param out_dim:
-    :param concat: for gat, concat multi-head output or not
-    :return: GNN model
-    '''
-    if gnn_name == "gat_8":
-        return GATConv(in_dim, out_dim, 8, concat=concat, bias=bias)
-    elif gnn_name == "gat_6":
-        return GATConv(in_dim, out_dim, 6, concat=concat, bias=bias)
-    elif gnn_name == "gat_4":
-        return GATConv(in_dim, out_dim, 4, concat=concat, bias=bias)
-    elif gnn_name == "gat_2":
-        return GATConv(in_dim, out_dim, 2, concat=concat, bias=bias)
-    elif gnn_name in ["gat_1", "gat"]:
-        return GATConv(in_dim, out_dim, 1, concat=concat, bias=bias)
-    elif gnn_name == "gcn":
-        return GCNConv(in_dim, out_dim)
-    elif gnn_name == "cheb":
-        return ChebConv(in_dim, out_dim, K=2, bias=bias)
-    elif gnn_name == "sage":
-        return SAGEConv(in_dim, out_dim, bias=bias)
-    elif gnn_name == "gated":
-        return GatedGraphConv(in_dim, out_dim, bias=bias)
-    elif gnn_name == "arma":
-        return ARMAConv(in_dim, out_dim, bias=bias)
-    elif gnn_name == "sg":
-        return SGConv(in_dim, out_dim, bias=bias)
-    elif gnn_name == "linear":
-        return LinearConv(in_dim, out_dim, bias=bias)
-    elif gnn_name == "zero":
-        # return ZeroConv(in_dim, out_dim, bias=bias)
-        return Identity()
-class Identity(nn.Module):
-    def forward(self, x, edge_index, edge_weight=None):
-        return x
-class LinearConv(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 bias=True):
-        super(LinearConv, self).__init__()
-
-        self.in_channels = in_channels
-        self.out_channels = out_channels
-        self.linear = torch.nn.Linear(in_channels, out_channels, bias)
-
-    def forward(self, x, edge_index, edge_weight=None):
-        return self.linear(x)
-
-    def __repr__(self):
-        return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
-                                   self.out_channels)
-
-
-from torch.autograd import Function
 class ZeroConvFunc(Function):
     @staticmethod
     def forward(ctx,x):
@@ -150,23 +52,8 @@ class ZeroConvFunc(Function):
     @staticmethod
     def backward(ctx, grad_output):
         return 0
-class ZeroConv(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 bias=True):
-        super(ZeroConv, self).__init__()
-        self.in_channels = in_channels
-        self.out_channels = out_channels
-        self.out_dim = out_channels
-
-    def forward(self, x, edge_index, edge_weight=None):
-        return ZeroConvFunc.apply(torch.zeros([x.size(0), self.out_dim]).to(x.device))
-
-    def __repr__(self):
-        return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
-                                   self.out_channels)
 
+@register_nas_space("graphnas")
 class GraphNasNodeClassificationSpace(BaseSpace):
     def __init__(
         self,
@@ -251,6 +138,6 @@ class GraphNasNodeClassificationSpace(BaseSpace):
                 x=self.classifier2(x)
         return F.log_softmax(x, dim=1)
 
-    def export(self, selection, device) -> BaseModel:
+    def parse_model(self, selection, device) -> BaseModel:
         #return AutoGCN(self.input_dim, self.output_dim, device)
         return FixedNodeClassificationModel(self, selection, device)
\ No newline at end of file
diff --git a/autogl/module/nas/space/graph_nas_macro.py b/autogl/module/nas/space/graph_nas_macro.py
index 963c13a..748b230 100644
--- a/autogl/module/nas/space/graph_nas_macro.py
+++ b/autogl/module/nas/space/graph_nas_macro.py
@@ -3,9 +3,10 @@ import typing as _typ
 import torch.nn as nn
 import torch.nn.functional as F
 
+from . import register_nas_space
 from .base import BaseSpace
 from ...model import BaseModel
-from .graph_nas import act_map
+from .operation import act_map
 
 from torch.nn import Parameter
 from torch_geometric.nn.inits import glorot, zeros
@@ -387,7 +388,8 @@ class StrModule(nn.Module):
 def map_nn(l):
     return [StrModule(x) for x in l]
 
-class GraphNasMacroNodeClfSpace(BaseSpace):
+@register_nas_space("graphnasmacro")
+class GraphNasMacroNodeClassificationSpace(BaseSpace):
     def __init__(
         self,
         hidden_dim: _typ.Optional[int] = 64,
@@ -441,7 +443,7 @@ class GraphNasMacroNodeClfSpace(BaseSpace):
             if i < layer_nums - 1:
                 setattr(self,f"out_channels_{i}",self.setLayerChoice(i * state_num + 0, map_nn([4, 8, 16, 32, 64, 128, 256]), key=f"out_channels_{i}"))
 
-    def export(self, selection, device) -> BaseModel:
+    def parse_model(self, selection, device) -> BaseModel:
         sel_list = []
         for i in range(self.layer_number):
             sel_list.append(["gat", "gcn", "cos", "const", "gat_sym", 'linear', 'generalized_linear'][selection[f"attention_{i}"]])
@@ -451,6 +453,7 @@ class GraphNasMacroNodeClfSpace(BaseSpace):
             if i < self.layer_number - 1:
                 sel_list.append([4, 8, 16, 32, 64, 128, 256][selection[f"out_channels_{i}"]])
         sel_list.append(self.output_dim)
+        #sel_list = ['const', 'sum', 'relu6', 2, 128, 'gat', 'sum', 'linear', 2, 7]
         model = ModelBox(device, sel_list, self.input_dim, self.output_dim, self.dropout, multi_label=False, batch_normal=False, layers = self.layer_number)
         return model
 
diff --git a/autogl/module/nas/space/operation.py b/autogl/module/nas/space/operation.py
new file mode 100644
index 0000000..762eab0
--- /dev/null
+++ b/autogl/module/nas/space/operation.py
@@ -0,0 +1,120 @@
+# codes in this file are reproduced from https://github.com/GraphNAS/GraphNAS with some changes.
+from torch_geometric.nn.conv import *
+import torch
+from torch import nn
+
+gnn_list = [
+    "gat_8",  # GAT with 8 heads
+    "gat_6",  # GAT with 6 heads
+    "gat_4",  # GAT with 4 heads
+    "gat_2",  # GAT with 2 heads
+    "gat_1",  # GAT with 1 heads
+    "gcn",  # GCN
+    "cheb",  # chebnet
+    "sage",  # sage
+    "arma",
+    "sg",  # simplifying gcn
+    "linear",  # skip connection
+    "zero",  # skip connection
+]
+act_list = [
+    # "sigmoid", "tanh", "relu", "linear",
+    #  "softplus", "leaky_relu", "relu6", "elu"
+    "sigmoid", "tanh", "relu", "linear", "elu"
+]
+
+def act_map(act):
+    if act == "linear":
+        return lambda x: x
+    elif act == "elu":
+        return F.elu
+    elif act == "sigmoid":
+        return torch.sigmoid
+    elif act == "tanh":
+        return torch.tanh
+    elif act == "relu":
+        return torch.nn.functional.relu
+    elif act == "relu6":
+        return torch.nn.functional.relu6
+    elif act == "softplus":
+        return torch.nn.functional.softplus
+    elif act == "leaky_relu":
+        return torch.nn.functional.leaky_relu
+    else:
+        raise Exception("wrong activate function")
+
+def gnn_map(gnn_name, in_dim, out_dim, concat=False, bias=True) -> nn.Module:
+    '''
+
+    :param gnn_name:
+    :param in_dim:
+    :param out_dim:
+    :param concat: for gat, concat multi-head output or not
+    :return: GNN model
+    '''
+    if gnn_name == "gat_8":
+        return GATConv(in_dim, out_dim, 8, concat=concat, bias=bias)
+    elif gnn_name == "gat_6":
+        return GATConv(in_dim, out_dim, 6, concat=concat, bias=bias)
+    elif gnn_name == "gat_4":
+        return GATConv(in_dim, out_dim, 4, concat=concat, bias=bias)
+    elif gnn_name == "gat_2":
+        return GATConv(in_dim, out_dim, 2, concat=concat, bias=bias)
+    elif gnn_name in ["gat_1", "gat"]:
+        return GATConv(in_dim, out_dim, 1, concat=concat, bias=bias)
+    elif gnn_name == "gcn":
+        return GCNConv(in_dim, out_dim)
+    elif gnn_name == "cheb":
+        return ChebConv(in_dim, out_dim, K=2, bias=bias)
+    elif gnn_name == "sage":
+        return SAGEConv(in_dim, out_dim, bias=bias)
+    elif gnn_name == "gated":
+        return GatedGraphConv(in_dim, out_dim, bias=bias)
+    elif gnn_name == "arma":
+        return ARMAConv(in_dim, out_dim, bias=bias)
+    elif gnn_name == "sg":
+        return SGConv(in_dim, out_dim, bias=bias)
+    elif gnn_name == "linear":
+        return LinearConv(in_dim, out_dim, bias=bias)
+    elif gnn_name == "zero":
+        # return ZeroConv(in_dim, out_dim, bias=bias)
+        return Identity()
+
+class Identity(nn.Module):
+    def forward(self, x, edge_index, edge_weight=None):
+        return x
+
+class LinearConv(nn.Module):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 bias=True):
+        super(LinearConv, self).__init__()
+
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.linear = torch.nn.Linear(in_channels, out_channels, bias)
+
+    def forward(self, x, edge_index, edge_weight=None):
+        return self.linear(x)
+
+    def __repr__(self):
+        return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
+                                   self.out_channels)
+
+class ZeroConv(nn.Module):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 bias=True):
+        super(ZeroConv, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.out_dim = out_channels
+
+    def forward(self, x, edge_index, edge_weight=None):
+        return ZeroConvFunc.apply(torch.zeros([x.size(0), self.out_dim]).to(x.device))
+
+    def __repr__(self):
+        return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
+                                   self.out_channels)
\ No newline at end of file
diff --git a/autogl/module/nas/space/single_path.py b/autogl/module/nas/space/single_path.py
index 8db9fb7..2cd99cf 100644
--- a/autogl/module/nas/space/single_path.py
+++ b/autogl/module/nas/space/single_path.py
@@ -4,6 +4,8 @@ import torch
 
 import torch.nn.functional as F
 from nni.nas.pytorch import mutables
+
+from . import register_nas_space
 from .base import apply_fixed_architecture
 from .base import BaseSpace
 from ...model import BaseModel
@@ -51,7 +53,7 @@ class FixedNodeClassificationModel(BaseModel):
     def model(self):
         return self._model
 
-
+@register_nas_space("singlepath")
 class SinglePathNodeClassificationSpace(BaseSpace):
     def __init__(
         self,
@@ -114,6 +116,6 @@ class SinglePathNodeClassificationSpace(BaseSpace):
                 x = F.dropout(x, p=self.dropout, training = self.training)
         return F.log_softmax(x, dim=1)
 
-    def export(self, selection, device) -> BaseModel:
+    def parse_model(self, selection, device) -> BaseModel:
         #return AutoGCN(self.input_dim, self.output_dim, device)
         return FixedNodeClassificationModel(self, selection, device)
diff --git a/examples/test_graph_nas_rand.py b/examples/test_graph_nas_rand.py
new file mode 100644
index 0000000..bf906de
--- /dev/null
+++ b/examples/test_graph_nas_rand.py
@@ -0,0 +1,71 @@
+import sys
+sys.path.append('../')
+from torch_geometric.nn import GCNConv
+import torch
+from autogl.datasets import build_dataset_from_name
+from autogl.solver import AutoNodeClassifier
+from autogl.module.train import NodeClassificationFullTrainer
+from autogl.module.nas import Darts, OneShotEstimator
+from autogl.module.nas.space.graph_nas import GraphNasNodeClassificationSpace
+from autogl.module.nas.space.graph_nas_macro import GraphNasMacroNodeClassificationSpace
+from autogl.module.train import Acc
+from autogl.module.nas.algorithm.enas import Enas
+from autogl.module.nas.algorithm.rl import RL,GraphNasRL
+from autogl.module.nas.estimator.train_scratch import TrainEstimator
+from autogl.module.nas.algorithm.random_search import RandomSearch
+import numpy as np
+import logging
+def one_run():
+    logging.getLogger().setLevel(logging.WARNING)
+    cora = build_dataset_from_name('cora')
+
+    clf = AutoNodeClassifier(
+        feature_module='PYGNormalizeFeatures',
+        graph_models=[],
+        nas_algorithms=[Enas(num_epochs=10)],
+        nas_spaces=[GraphNasNodeClassificationSpace()],
+        nas_estimators=[OneShotEstimator()],
+        max_evals=2
+    )
+
+    clf.fit(cora)
+    clf.predict(cora)
+
+    return
+
+    dataset = build_dataset_from_name('cora')
+    solver = AutoNodeClassifier(
+        feature_module='PYGNormalizeFeatures',
+        graph_models=[],
+        hpo_module=None,
+        ensemble_module=None,
+        default_trainer=NodeClassificationFullTrainer(
+            optimizer=torch.optim.Adam,
+            lr=0.005,
+            max_epoch=300,
+            early_stopping_round=20,
+            weight_decay=5e-4,
+            device="auto",
+            init=False,
+            feval=['acc'],
+            loss="nll_loss",
+            lr_scheduler_type=None,),
+        # nas_algorithms=[RL(num_epochs=400)],
+        nas_algorithms=[GraphNasRL(num_epochs=1)],
+        #nas_algorithms=[Darts(num_epochs=200)],
+        nas_spaces=[GraphNasMacroNodeClfSpace(hidden_dim=16,search_act_con=True,layer_number=2)],
+        nas_estimators=[TrainEstimator()]
+    )
+
+    solver.fit(dataset)
+    solver.get_leaderboard().show()
+    out = solver.predict_proba()
+    acc = Acc.evaluate(out, dataset[0].y[dataset[0].test_mask].detach().numpy())
+    print('acc on cora', acc)
+    return acc
+
+if __name__ == '__main__':
+    acc_li = []
+    for i in range(2):
+        acc_li.append(one_run())
+    print("results:", np.mean(acc_li), np.std(acc_li))
\ No newline at end of file
diff --git a/examples/test_graph_nas_rl.py b/examples/test_graph_nas_rl.py
index fbe1052..1170674 100644
--- a/examples/test_graph_nas_rl.py
+++ b/examples/test_graph_nas_rl.py
@@ -7,14 +7,15 @@ from autogl.solver import AutoNodeClassifier
 from autogl.module.train import NodeClassificationFullTrainer
 from autogl.module.nas import Darts, OneShotEstimator
 from autogl.module.nas.space.graph_nas import GraphNasNodeClassificationSpace
-from autogl.module.nas.space.graph_nas_macro import GraphNasMacroNodeClfSpace
+from autogl.module.nas.space.graph_nas_macro import GraphNasMacroNodeClassificationSpace
 from autogl.module.train import Acc
 from autogl.module.nas.algorithm.enas import Enas
 from autogl.module.nas.algorithm.rl import RL,GraphNasRL
-from autogl.module.nas.estimator.one_shot import TrainEstimator
+from autogl.module.nas.estimator.train_scratch import TrainEstimator
 from autogl.module.nas.algorithm.random_search import RandomSearch
+import numpy as np
 import logging
-if __name__ == '__main__':
+def one_run():
     logging.getLogger().setLevel(logging.WARNING)
     dataset = build_dataset_from_name('cora')
     solver = AutoNodeClassifier(
@@ -34,12 +35,21 @@ if __name__ == '__main__':
             loss="nll_loss",
             lr_scheduler_type=None,),
         # nas_algorithms=[RL(num_epochs=400)],
-        nas_algorithms=[GraphNasRL(num_epochs=20)],
+        nas_algorithms=[GraphNasRL(num_epochs=1)],
         #nas_algorithms=[Darts(num_epochs=200)],
         nas_spaces=[GraphNasMacroNodeClfSpace(hidden_dim=16,search_act_con=True,layer_number=2)],
         nas_estimators=[TrainEstimator()]
     )
+
     solver.fit(dataset)
     solver.get_leaderboard().show()
     out = solver.predict_proba()
-    print('acc on cora', Acc.evaluate(out, dataset[0].y[dataset[0].test_mask].detach().numpy()))
+    acc = Acc.evaluate(out, dataset[0].y[dataset[0].test_mask].detach().numpy())
+    print('acc on cora', acc)
+    return acc
+
+if __name__ == '__main__':
+    acc_li = []
+    for i in range(100):
+        acc_li.append(one_run())
+    print("results:", np.mean(acc_li), np.std(acc_li))
\ No newline at end of file

From 1159328106bbe499ffbbbb55c34f50f7230b3b35 Mon Sep 17 00:00:00 2001
From: lihy96 <lhy237451594@vip.qq.com>
Date: Tue, 22 Jun 2021 16:42:43 +0800
Subject: [PATCH 095/144] change model function name decode to lp_decode

---
 autogl/module/model/gat.py             | 4 ++--
 autogl/module/model/gcn.py             | 4 ++--
 autogl/module/model/graphsage.py       | 4 ++--
 autogl/module/train/link_prediction.py | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/autogl/module/model/gat.py b/autogl/module/model/gat.py
index f0963fc..c0b9b4b 100644
--- a/autogl/module/model/gat.py
+++ b/autogl/module/model/gat.py
@@ -104,12 +104,12 @@ class GAT(torch.nn.Module):
                 # x = F.dropout(x, p=self.args["dropout"], training=self.training)
         return x
 
-    def decode(self, z, pos_edge_index, neg_edge_index):
+    def lp_decode(self, z, pos_edge_index, neg_edge_index):
         edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
         logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
         return logits
 
-    def decode_all(self, z):
+    def lp_decode_all(self, z):
         prob_adj = z @ z.t()
         return (prob_adj > 0).nonzero(as_tuple=False).t()
 
diff --git a/autogl/module/model/gcn.py b/autogl/module/model/gcn.py
index e28bd80..02507e7 100644
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -93,12 +93,12 @@ class GCN(torch.nn.Module):
                 # x = F.dropout(x, p=self.args["dropout"], training=self.training)
         return x
 
-    def decode(self, z, pos_edge_index, neg_edge_index):
+    def lp_decode(self, z, pos_edge_index, neg_edge_index):
         edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
         logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
         return logits
 
-    def decode_all(self, z):
+    def lp_decode_all(self, z):
         prob_adj = z @ z.t()
         return (prob_adj > 0).nonzero(as_tuple=False).t()
 
diff --git a/autogl/module/model/graphsage.py b/autogl/module/model/graphsage.py
index 5b09817..56463e1 100644
--- a/autogl/module/model/graphsage.py
+++ b/autogl/module/model/graphsage.py
@@ -180,12 +180,12 @@ class GraphSAGE(torch.nn.Module):
                 # x = F.dropout(x, p=self.args["dropout"], training=self.training)
         return x
 
-    def decode(self, z, pos_edge_index, neg_edge_index):
+    def lp_decode(self, z, pos_edge_index, neg_edge_index):
         edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
         logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
         return logits
 
-    def decode_all(self, z):
+    def lp_decode_all(self, z):
         prob_adj = z @ z.t()
         return (prob_adj > 0).nonzero(as_tuple=False).t()
 
diff --git a/autogl/module/train/link_prediction.py b/autogl/module/train/link_prediction.py
index 56cf3fe..056060f 100644
--- a/autogl/module/train/link_prediction.py
+++ b/autogl/module/train/link_prediction.py
@@ -198,7 +198,7 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
             optimizer.zero_grad()
             # res = self.model.model.forward(data)
             z = self.model.model.encode(data)
-            link_logits = self.model.model.decode(
+            link_logits = self.model.model.lp_decode(
                 z, data.train_pos_edge_index, neg_edge_index
             )
             link_labels = self.get_link_labels(
@@ -320,7 +320,7 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
         self.model.model.eval()
         with torch.no_grad():
             z = self.predict_only(data)
-            link_logits = self.model.model.decode(z, pos_edge_index, neg_edge_index)
+            link_logits = self.model.model.lp_decode(z, pos_edge_index, neg_edge_index)
             link_probs = link_logits.sigmoid()
 
         return link_probs

From 86d29a2a6ad0bd2e51e3ff4560155f3d62d13ce0 Mon Sep 17 00:00:00 2001
From: lihy96 <lhy237451594@vip.qq.com>
Date: Tue, 22 Jun 2021 17:10:05 +0800
Subject: [PATCH 096/144] add some details on documents and tutorials

---
 docs/docfile/tutorial/t_trainer.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/docfile/tutorial/t_trainer.rst b/docs/docfile/tutorial/t_trainer.rst
index 70dc7b4..da59681 100644
--- a/docs/docfile/tutorial/t_trainer.rst
+++ b/docs/docfile/tutorial/t_trainer.rst
@@ -7,6 +7,8 @@ AutoGL project use ``trainer`` to handle the auto-training of tasks. Currently,
 
 * ``NodeClassificationTrainer`` for semi-supervised node classification
 * ``GraphClassificationTrainer`` for supervised graph classification
+* ``LinkPredictionTrainer`` for link prediction
+
 
 Initialization
 --------------
@@ -41,7 +43,7 @@ Train and Predict
 -----------------
 After initializing a trainer, you can train it on the given datasets.
 
-We have given the training and testing functions for the tasks of node classification and graph classification up to now. You can also create your tasks following the similar patterns with ours. For training, you need to define ``train_only()`` and use it in ``train()``. For testing, you need to define ``predict_proba()`` and use it in ``predict()``.
+We have given the training and testing functions for the tasks of node classification, graph classification, and link prediction up to now. You can also create your tasks following the similar patterns with ours. For training, you need to define ``train_only()`` and use it in ``train()``. For testing, you need to define ``predict_proba()`` and use it in ``predict()``.
 
 The evaluation funtion is defined in ``evaluate()``, you can use your our evaluation metrics and methods.
 

From e349156f787cdf498d6bd87c14c14f90e227bd9b Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Tue, 22 Jun 2021 07:13:01 +0000
Subject: [PATCH 097/144] add documents, remove unsupported nas tasks

---
 autogl/solver/classifier/graph_classifier.py | 18 ++++++++++++------
 autogl/solver/classifier/link_predictor.py   |  3 +++
 autogl/utils/device.py                       | 19 ++++++++++++++++++-
 3 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/autogl/solver/classifier/graph_classifier.py b/autogl/solver/classifier/graph_classifier.py
index 6175c85..d4cd5f7 100644
--- a/autogl/solver/classifier/graph_classifier.py
+++ b/autogl/solver/classifier/graph_classifier.py
@@ -74,9 +74,9 @@ class AutoGraphClassifier(BaseClassifier):
         self,
         feature_module=None,
         graph_models=["gin", "topkpool"],
-        nas_algorithms=None,
-        nas_spaces=None,
-        nas_estimators=None,
+        # nas_algorithms=None,
+        # nas_spaces=None,
+        # nas_estimators=None,
         hpo_module="anneal",
         ensemble_module="voting",
         max_evals=50,
@@ -90,9 +90,9 @@ class AutoGraphClassifier(BaseClassifier):
         super().__init__(
             feature_module=feature_module,
             graph_models=graph_models,
-            nas_algorithms=nas_algorithms,
-            nas_spaces=nas_spaces,
-            nas_estimators=nas_estimators,
+            nas_algorithms=None, #nas_algorithms,
+            nas_spaces=None, #nas_spaces,
+            nas_estimators=None, #nas_estimators,
             hpo_module=hpo_module,
             ensemble_module=ensemble_module,
             max_evals=max_evals,
@@ -211,6 +211,8 @@ class AutoGraphClassifier(BaseClassifier):
 
         return self
 
+    """
+    # currently disabled
     def _init_nas_module(
         self, num_features, num_classes, num_graph_features, feval, device, loss
     ):
@@ -219,6 +221,7 @@ class AutoGraphClassifier(BaseClassifier):
         ):
             # TODO: initialize important parameters
             pass
+    """
 
     # pylint: disable=arguments-differ
     def fit(
@@ -359,6 +362,8 @@ class AutoGraphClassifier(BaseClassifier):
             else dataset.data.gf.size(1),
         )
 
+        # currently disabled
+        """
         self._init_nas_module(
             num_features=dataset.num_node_features,
             num_classes=dataset.num_classes,
@@ -378,6 +383,7 @@ class AutoGraphClassifier(BaseClassifier):
             ):
                 trainer = algo.search(space, self.dataset, estimator)
                 self.graph_model_list.append(trainer)
+        """
 
         # train the models and tune hpo
         result_valid = []
diff --git a/autogl/solver/classifier/link_predictor.py b/autogl/solver/classifier/link_predictor.py
index 17b110e..fe839d1 100644
--- a/autogl/solver/classifier/link_predictor.py
+++ b/autogl/solver/classifier/link_predictor.py
@@ -87,6 +87,9 @@ class AutoLinkPredictor(BaseClassifier):
         super().__init__(
             feature_module=feature_module,
             graph_models=graph_models,
+            nas_algorithms=None,
+            nas_spaces=None,
+            nas_estimators=None,
             hpo_module=hpo_module,
             ensemble_module=ensemble_module,
             max_evals=max_evals,
diff --git a/autogl/utils/device.py b/autogl/utils/device.py
index a316839..b331ace 100644
--- a/autogl/utils/device.py
+++ b/autogl/utils/device.py
@@ -1,6 +1,23 @@
 import torch
+from typing import Union
 
-def get_device(device):
+def get_device(device: Union[str, torch.device]):
+    """
+    Get device of passed argument. Will return a torch.device based on passed arguments.
+    Can parse auto, cpu, gpu, cpu:x, gpu:x, etc. If auto is given, will automatically find
+    available devices.
+
+    
+    Parameters
+    ----------
+    device: ``str`` or ``torch.device``
+        The device to parse. If ``auto`` if given, will determine automatically.
+    
+    Returns
+    -------
+    device: ``torch.device``
+        The parsed device.
+    """
     assert isinstance(device, (str, torch.device)), "Only support device of str or torch.device, get {} instead".format(device)
     if device == 'auto':
         device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

From 9f75968808f8c4f332af93f62992f1dc527fadb6 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Tue, 22 Jun 2021 07:54:40 +0000
Subject: [PATCH 098/144] add nas config support

---
 autogl/solver/base.py                       | 54 ++++++---------------
 autogl/solver/classifier/node_classifier.py | 23 +++++++++
 2 files changed, 38 insertions(+), 39 deletions(-)

diff --git a/autogl/solver/base.py b/autogl/solver/base.py
index 411f1c4..b68afa4 100644
--- a/autogl/solver/base.py
+++ b/autogl/solver/base.py
@@ -262,7 +262,7 @@ class BaseSolver:
             )
 
     def set_nas_module(
-        self, nas_algorithms, nas_spaces=None, nas_estimators=None
+        self, nas_algorithms=None, nas_spaces=None, nas_estimators=None
     ) -> "BaseSolver":
         """
         Set the neural architecture search module in current solver.
@@ -291,44 +291,20 @@ class BaseSolver:
         self: autogl.solver.BaseSolver
             A reference of current solver.
         """
-        self.nas_algorithms = nas_algorithms
-        if self.nas_algorithms is not None:
-            max_number = -1
-            if isinstance(self.nas_algorithms, list):
-                max_number = len(self.nas_algorithms)
-            if isinstance(nas_spaces, list):
-                if max_number == -1:
-                    max_number = len(nas_spaces)
-                else:
-                    assert (
-                        len(nas_spaces) == max_number
-                    ), "lengths of algorithms/spaces/estimators do not match!"
-            if isinstance(nas_estimators, list):
-                if max_number == -1:
-                    max_number = len(nas_estimators)
-                else:
-                    assert (
-                        len(nas_estimators) == max_number
-                    ), "lengths of algorithms/spaces/estimators do not match!"
-            if max_number < 0:
-                self.nas_algorithms = [self.nas_algorithms]
-                self.nas_spaces = [nas_spaces]
-                self.nas_estimators = [nas_estimators]
-            else:
-                if not isinstance(self.nas_algorithms, list):
-                    self.nas_algorithms = [
-                        deepcopy(self.nas_algorithms) for _ in range(max_number)
-                    ]
-                if not isinstance(nas_spaces, list):
-                    self.nas_spaces = [deepcopy(nas_spaces) for _ in range(max_number)]
-                else:
-                    self.nas_spaces = nas_spaces
-                if not isinstance(nas_estimators, list):
-                    self.nas_estimators = [
-                        deepcopy(nas_estimators) for _ in range(max_number)
-                    ]
-                else:
-                    self.nas_estimators = nas_estimators
+        if nas_algorithms is None and nas_estimators is None and nas_spaces is None:
+            return
+        assert None not in [nas_algorithms, nas_estimators, nas_spaces], "The algorithms, estimators and spaces should all be set"
+
+        nas_algorithms = nas_algorithms if isinstance(nas_algorithms, (list, tuple)) else [nas_algorithms]
+        nas_spaces = nas_spaces if isinstance(nas_spaces, (list, tuple)) else [nas_spaces]
+        nas_estimators = nas_estimators if isinstance(nas_estimators, (list, tuple)) else [nas_estimators]
+        
+        max_number = max([len(x) for x in [nas_algorithms, nas_spaces, nas_estimators]])
+        assert all([len(x) in [1, max_number] for x in [nas_algorithms, nas_spaces, nas_estimators]]), "lengths of algorithms/spaces/estimators do not match!"
+
+        self.nas_algorithms = [deepcopy(nas_algorithms) for _ in range(max_number)] if len(nas_algorithms) == 1 and max_number > 1 else nas_algorithms
+        self.nas_spaces = [deepcopy(nas_spaces) for _ in range(max_number)] if len(nas_spaces) == 1 and max_number > 1 else nas_spaces
+        self.nas_estimators = [deepcopy(nas_estimators) for _ in range(max_number)] if len(nas_estimators) == 1 and max_number > 1 else nas_estimators
 
     def set_ensemble_module(self, ensemble_module, *args, **kwargs) -> "BaseSolver":
         r"""
diff --git a/autogl/solver/classifier/node_classifier.py b/autogl/solver/classifier/node_classifier.py
index e13693b..722adf5 100644
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -16,6 +16,9 @@ from ...module.feature import FEATURE_DICT
 from ...module.model import MODEL_DICT, BaseModel
 from ...module.train import TRAINER_DICT, BaseNodeClassificationTrainer
 from ...module.train import get_feval
+from ...module.nas.space import NAS_SPACE_DICT
+from ...module.nas.algorithm import NAS_ALGO_DICT
+from ...module.nas.estimator import NAS_ESTIMATOR_DICT
 from ..utils import Leaderboard, set_seed
 from ...datasets import utils
 from ...utils import get_logger
@@ -799,5 +802,25 @@ class AutoNodeClassifier(BaseClassifier):
         if ensemble_dict is not None:
             name = ensemble_dict.pop("name")
             solver.set_ensemble_module(name, **ensemble_dict)
+        
+        nas_dict = path_or_dict.pop("nas", None)
+        if nas_dict is not None:
+            keys: set = set(nas_dict.keys())
+            needed = {'space', 'algorithm', 'estimator'}
+            if keys != needed:
+                LOGGER.error('Key mismatch, we need %s, you give %s' % (needed, keys))
+                raise KeyError('Key mismatch, we need %s, you give %s' % (needed, keys))
+
+            spaces, algorithms, estimators = [], [], []
+
+            for container, indexer, k in zip([spaces, algorithms, estimators], [NAS_SPACE_DICT, NAS_ALGO_DICT, NAS_ESTIMATOR_DICT], ['space', 'algorithm', 'estimator']):
+                configs = nas_dict[k]
+                if isinstance(configs, list):
+                    for item in configs:
+                        container.append(indexer[item.pop('name')](**item, init=False))
+                else:
+                    container.append(indexer[configs.pop('name')](**configs, init=False))
+            
+            solver.set_nas_module(algorithms, spaces, estimators)
 
         return solver

From 0ded8904b9761c6853406735178008767ad51692 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Wed, 23 Jun 2021 03:05:26 +0000
Subject: [PATCH 099/144] parse nas parameters from str

---
 autogl/solver/base.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/autogl/solver/base.py b/autogl/solver/base.py
index b68afa4..056de2d 100644
--- a/autogl/solver/base.py
+++ b/autogl/solver/base.py
@@ -12,6 +12,9 @@ import torch
 from ..module.feature import FEATURE_DICT
 from ..module.hpo import HPO_DICT
 from ..module.model import MODEL_DICT
+from ..module.nas.algorithm import NAS_ALGO_DICT
+from ..module.nas.estimator import NAS_ESTIMATOR_DICT
+from ..module.nas.space import NAS_SPACE_DICT
 from ..module import BaseFeatureAtom, BaseHPOptimizer, BaseTrainer
 from .utils import Leaderboard
 from ..utils import get_logger
@@ -260,6 +263,7 @@ class BaseSolver:
                 type(hpo_module),
                 "instead.",
             )
+        return self
 
     def set_nas_module(
         self, nas_algorithms=None, nas_spaces=None, nas_estimators=None
@@ -298,6 +302,11 @@ class BaseSolver:
         nas_algorithms = nas_algorithms if isinstance(nas_algorithms, (list, tuple)) else [nas_algorithms]
         nas_spaces = nas_spaces if isinstance(nas_spaces, (list, tuple)) else [nas_spaces]
         nas_estimators = nas_estimators if isinstance(nas_estimators, (list, tuple)) else [nas_estimators]
+
+        # parse all str elements
+        nas_algorithms = [algo if not isinstance(algo, str) else NAS_ALGO_DICT[algo]() for algo in nas_algorithms]
+        nas_spaces = [space if not isinstance(space, str) else NAS_SPACE_DICT[space]() for space in nas_spaces]
+        nas_estimators = [estimator if not isinstance(estimator, str) else NAS_ESTIMATOR_DICT[estimator]() for estimator in nas_estimators]
         
         max_number = max([len(x) for x in [nas_algorithms, nas_spaces, nas_estimators]])
         assert all([len(x) in [1, max_number] for x in [nas_algorithms, nas_spaces, nas_estimators]]), "lengths of algorithms/spaces/estimators do not match!"
@@ -306,6 +315,8 @@ class BaseSolver:
         self.nas_spaces = [deepcopy(nas_spaces) for _ in range(max_number)] if len(nas_spaces) == 1 and max_number > 1 else nas_spaces
         self.nas_estimators = [deepcopy(nas_estimators) for _ in range(max_number)] if len(nas_estimators) == 1 and max_number > 1 else nas_estimators
 
+        return self
+
     def set_ensemble_module(self, ensemble_module, *args, **kwargs) -> "BaseSolver":
         r"""
         Set the ensemble module used in current solver.

From a37e67f2e21ead3706e092497b8467c5c4a5555c Mon Sep 17 00:00:00 2001
From: generall <general502570@outlook.com>
Date: Wed, 23 Jun 2021 11:35:57 +0800
Subject: [PATCH 100/144] Merge branch 'nas' of github.com:THUMNLab/AutoGL into
 nas

---
 autogl/module/nas/algorithm/random_search.py | 59 +++++++-------------
 autogl/module/nas/algorithm/rl.py            |  7 ++-
 autogl/module/nas/space/base.py              |  4 +-
 autogl/module/nas/space/graph_nas.py         |  1 -
 autogl/module/nas/space/graph_nas_macro.py   |  1 -
 autogl/module/nas/space/single_path.py       |  1 -
 6 files changed, 27 insertions(+), 46 deletions(-)

diff --git a/autogl/module/nas/algorithm/random_search.py b/autogl/module/nas/algorithm/random_search.py
index 527ca4d..c307122 100644
--- a/autogl/module/nas/algorithm/random_search.py
+++ b/autogl/module/nas/algorithm/random_search.py
@@ -1,4 +1,3 @@
-import copy
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
@@ -7,43 +6,12 @@ from . import register_nas_algo
 from .base import BaseNAS
 from ..space import BaseSpace
 from ..utils import AverageMeterGroup, replace_layer_choice, replace_input_choice, get_module_order, sort_replaced_module
-from nni.nas.pytorch.fixed import apply_fixed_architecture
 from tqdm import tqdm
 from .rl import PathSamplingLayerChoice,PathSamplingInputChoice
 import numpy as np
 from ....utils import get_logger
 
 LOGGER = get_logger("random_search_NAS")
-class RSBox:
-    '''get selection space for model `space` '''
-    def __init__(self,space):
-        self.model = space
-        self.nas_modules = []
-        k2o = get_module_order(self.model)
-        replace_layer_choice(self.model, PathSamplingLayerChoice, self.nas_modules)
-        replace_input_choice(self.model, PathSamplingInputChoice, self.nas_modules)
-        self.nas_modules = sort_replaced_module(k2o, self.nas_modules) 
-        nm=self.nas_modules
-        selection_range={}
-        for k,v in nm:
-            selection_range[k]=len(v)
-        self.selection_dict=selection_range
-        
-        
-        space_size=np.prod(list(selection_range.values()))
-        #print(f'Using random search Box. Total space size: {space_size}')
-        #print('Searching Space:',selection_range)
-         
-    def export(self):
-        return self.selection_dict #{k:v}, means action ranges 0 to v-1 for layer named k
-
-    def sample(self):
-        # uniformly sample
-        selection={}
-        sdict=self.export()
-        for k,v in sdict.items():
-            selection[k]=np.random.choice(range(v))
-        return selection
 
 @register_nas_algo("random")
 class RandomSearch(BaseNAS):
@@ -68,12 +36,24 @@ class RandomSearch(BaseNAS):
         self.estimator=estimator
         self.dataset=dset
         self.space=space
-        self.box=RSBox(self.space)
+        
+        self.nas_modules = []
+        k2o = get_module_order(self.space)
+        replace_layer_choice(self.space, PathSamplingLayerChoice, self.nas_modules)
+        replace_input_choice(self.space, PathSamplingInputChoice, self.nas_modules)
+        self.nas_modules = sort_replaced_module(k2o, self.nas_modules) 
+        selection_range={}
+        for k,v in self.nas_modules:
+            selection_range[k]=len(v)
+        self.selection_dict=selection_range
+        
+        #space_size=np.prod(list(selection_range.values()))
+
         arch_perfs=[]
         cache={}
         with tqdm(range(self.num_epochs),disable=self.disable_progress) as bar:
             for i in bar:
-                selection=self.export() 
+                selection=self.sample() 
                 vec=tuple(list(selection.values()))
                 if vec not in cache:
                     self.arch=space.parse_model(selection,self.device)
@@ -85,10 +65,13 @@ class RandomSearch(BaseNAS):
         arch=space.parse_model(selection,self.device)
         return arch 
     
-    def export(self):
-        arch=self.box.sample()
-        return arch
+    def sample(self):
+        # uniformly sample
+        selection={}
+        for k,v in self.selection_dict.items():
+            selection[k]=np.random.choice(range(v))
+        return selection
 
     def _infer(self,mask='train'):
-        metric, loss = self.estimator.infer(self.arch, self.dataset,mask=mask)
+        metric, loss = self.estimator.infer(self.arch._model, self.dataset, mask=mask)
         return metric, loss
diff --git a/autogl/module/nas/algorithm/rl.py b/autogl/module/nas/algorithm/rl.py
index cbd831b..ae8b5ff 100644
--- a/autogl/module/nas/algorithm/rl.py
+++ b/autogl/module/nas/algorithm/rl.py
@@ -261,7 +261,7 @@ class RL(BaseNAS):
         Learning rate for super network.
     model_wd : float
         Weight decay for super network.
-    disable_progeress: boolean
+    disable_progress: boolean
         Control whether show the progress bar.
     """
 
@@ -283,6 +283,7 @@ class RL(BaseNAS):
         self.n_warmup=n_warmup
         self.model_lr = model_lr
         self.model_wd = model_wd
+        self.disable_progress=disable_progress
 
     def search(self, space: BaseSpace, dset, estimator):
         self.model = space
@@ -350,7 +351,7 @@ class RL(BaseNAS):
 
     def _resample(self):
         result = self.controller.resample()
-        self.arch=self.model.export(result,device=self.device)
+        self.arch=self.model.parse_model(result,device=self.device)
         self.selection=result
 
     def export(self):
@@ -359,7 +360,7 @@ class RL(BaseNAS):
             return self.controller.resample()
 
     def _infer(self,mask='train'):
-        metric, loss = self.estimator.infer(self.arch, self.dataset,mask=mask)
+        metric, loss = self.estimator.infer(self.arch._model, self.dataset,mask=mask)
         return metric, loss
 
 @register_nas_algo("graphnas")
diff --git a/autogl/module/nas/space/base.py b/autogl/module/nas/space/base.py
index d65e32c..d632796 100644
--- a/autogl/module/nas/space/base.py
+++ b/autogl/module/nas/space/base.py
@@ -83,7 +83,7 @@ class BaseSpace(nn.Module):
         Whether to initialize the whole space. Default: `False`
     """
 
-    def __init__(self, init=False):
+    def __init__(self):
         super().__init__()
         self._initialized = False
 
@@ -134,7 +134,7 @@ class BaseSpace(nn.Module):
         if orikey == None:
             key = f"default_key_{self._default_key}"
             self._default_key += 1
-            orikry = key
+            orikey = key
         layer = OrderedLayerChoice(order, op_candidates, reduction, return_mask, orikey)
         return layer
 
diff --git a/autogl/module/nas/space/graph_nas.py b/autogl/module/nas/space/graph_nas.py
index 875a46c..92c48b7 100644
--- a/autogl/module/nas/space/graph_nas.py
+++ b/autogl/module/nas/space/graph_nas.py
@@ -62,7 +62,6 @@ class GraphNasNodeClassificationSpace(BaseSpace):
         input_dim: _typ.Optional[int] = None,
         output_dim: _typ.Optional[int] = None,
         ops: _typ.Tuple = None,
-        init: bool = False,
         search_act_con=False
     ):
         super().__init__()
diff --git a/autogl/module/nas/space/graph_nas_macro.py b/autogl/module/nas/space/graph_nas_macro.py
index b0cd920..cc91534 100644
--- a/autogl/module/nas/space/graph_nas_macro.py
+++ b/autogl/module/nas/space/graph_nas_macro.py
@@ -398,7 +398,6 @@ class GraphNasMacroNodeClassificationSpace(BaseSpace):
         input_dim: _typ.Optional[int] = None,
         output_dim: _typ.Optional[int] = None,
         ops: _typ.Tuple = None,
-        init: bool = False,
         search_act_con=False
     ):
         super().__init__()
diff --git a/autogl/module/nas/space/single_path.py b/autogl/module/nas/space/single_path.py
index 5005108..3bbd983 100644
--- a/autogl/module/nas/space/single_path.py
+++ b/autogl/module/nas/space/single_path.py
@@ -21,7 +21,6 @@ class SinglePathNodeClassificationSpace(BaseSpace):
         input_dim: _typ.Optional[int] = None,
         output_dim: _typ.Optional[int] = None,
         ops: _typ.Tuple = None,
-        init: bool = False,
     ):
         super().__init__()
         self.layer_number = layer_number

From f28e744fd2c0228c2e38113160d0d83484525aa9 Mon Sep 17 00:00:00 2001
From: generall <general502570@outlook.com>
Date: Wed, 23 Jun 2021 11:36:48 +0800
Subject: [PATCH 101/144] fix some bugs

---
 docs/docfile/tutorial/t_nas.rst | 157 ++++++++++++++++++++++++++++++++
 1 file changed, 157 insertions(+)
 create mode 100644 docs/docfile/tutorial/t_nas.rst

diff --git a/docs/docfile/tutorial/t_nas.rst b/docs/docfile/tutorial/t_nas.rst
new file mode 100644
index 0000000..c31b8f9
--- /dev/null
+++ b/docs/docfile/tutorial/t_nas.rst
@@ -0,0 +1,157 @@
+.. _nas:
+
+Neural Architecture Search
+============================
+
+We support different neural architecture search algorithm in variant search space.
+To be more flexible, we modulize NAS process with three part: algorithm, space and estimator.
+Different models in different parts can be composed in some certain constrains.
+If you want to design your own NAS process, you can change any of those parts according to your demand.
+
+Search Space
+------------
+
+The space definition is base on mutable fashion used in NNI.
+There are mainly two ways ti define your search space, one can be performed with one-shot fashion while the other cannot.
+If you need one-shot fashion, you should use 
+Three types of search space are supported, use ``dict`` in python to define your search space.
+For numerical list search space. You can either assign a fixed length for the list, if so, you need not provide ``cutPara`` and ``cutFunc``.
+Or you can let HPO cut the list to a certain length which is dependent on other parameters. You should provide those parameters' names in ``curPara`` and the function to calculate the cut length in "cutFunc". 
+
+.. code-block:: python
+
+    # numerical search space:
+    {
+        "parameterName": "xxx",
+        "type": "DOUBLE" / "INTEGER",
+        "minValue": xx,
+        "maxValue": xx,
+        "scalingType": "LINEAR" / "LOG"
+    }
+
+    # numerical list search space:
+    {
+        "parameterName": "xxx",
+        "type": "NUMERICAL_LIST",
+        "numericalType": "DOUBLE" / "INTEGER",
+        "length": 3,
+        "cutPara": ("para_a", "para_b"),
+        "cutFunc": lambda x: x[0] - 1,
+        "minValue": [xx,xx,xx],
+        "maxValue": [xx,xx,xx],
+        "scalingType": "LINEAR" / "LOG"
+    }
+
+    # categorical search space:
+    {
+        "parameterName": xxx,
+        "type": "CATEGORICAL"
+        "feasiblePoints": [a,b,c]
+    }
+
+    # fixed parameter as search space:
+    {
+        "parameterName": xxx,
+        "type": "FIXED",
+        "value": xxx
+    }
+        
+How given HPO algorithms support search space is listed as follows:
+
++------------+------------+--------------+-----------+------------+
+| Algorithm  | numerical  |numerical list|categorical| fixed      |
++============+============+==============+===========+============+
+| Grid       |            |              |  ✓        | ✓          |
++------------+------------+--------------+-----------+------------+
+| Random     | ✓          |  ✓           |  ✓        | ✓          |
++------------+------------+--------------+-----------+------------+
+| Anneal     | ✓          |  ✓           |  ✓        | ✓          |
++------------+------------+--------------+-----------+------------+
+| Bayes      | ✓          |  ✓           |  ✓        | ✓          |
++------------+------------+--------------+-----------+------------+
+| TPE        | ✓          |  ✓           |  ✓        | ✓          |
++------------+------------+--------------+-----------+------------+
+| CMAES      | ✓          |  ✓           |  ✓        | ✓          |
++------------+------------+--------------+-----------+------------+
+| MOCMAES    | ✓          |  ✓           |  ✓        | ✓          |
++------------+------------+--------------+-----------+------------+
+|Quasi random| ✓          |  ✓           |  ✓        | ✓          |
++------------+------------+--------------+-----------+------------+
+| AutoNE     | ✓          |  ✓           |  ✓        | ✓          |
++------------+------------+--------------+-----------+------------+
+
+Here, TPE is from [1], CMAES is from [2], MOCMAES is from [3], quasi random is from [4], AutoNE is from [5].
+
+[1] Bergstra, James S., et al. "Algorithms for hyper-parameter optimization." Advances in neural information processing systems. 2011.
+[2] Arnold, Dirk V., and Nikolaus Hansen. "Active covariance matrix adaptation for the (1+ 1)-CMA-ES." Proceedings of the 12th annual conference on Genetic and evolutionary computation. 2010.
+[3] Voß, Thomas, Nikolaus Hansen, and Christian Igel. "Improved step size adaptation for the MO-CMA-ES." Proceedings of the 12th annual conference on Genetic and evolutionary computation. 2010.
+[4] Bratley, Paul, Bennett L. Fox, and Harald Niederreiter. "Programs to generate Niederreiter's low-discrepancy sequences." ACM Transactions on Mathematical Software (TOMS) 20.4 (1994): 494-495.
+[5] Tu, Ke, et al. "Autone: Hyperparameter optimization for massive network embedding." Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. 2019.
+
+Add Your HPOptimizer
+--------------------
+
+If you want to add your own HPOptimizer, the only thing you should do is finishing ``optimize`` function in you HPOptimizer:
+
+.. code-block:: python
+
+    # For example, create a random HPO by yourself
+    import random
+    from autogl.module.hpo.base import BaseHPOptimizer
+    class RandomOptimizer(BaseHPOptimizer):
+        # Get essential parameters at initialization
+        def __init__(self, *args, **kwargs):
+            super().__init__(*args, **kwargs)
+            self.max_evals = kwargs.get("max_evals", 2)
+
+        # The most important thing you should do is completing optimization function
+        def optimize(self, trainer, dataset, time_limit=None, memory_limit=None):
+            # 1. Get the search space from trainer.
+            space = trainer.hyper_parameter_space + trainer.model.hyper_parameter_space
+            # optional: use self._encode_para (in BaseOptimizer) to pretreat the space
+            # If you use _encode_para, the NUMERICAL_LIST will be spread to DOUBLE or INTEGER, LOG scaling type will be changed to LINEAR, feasible points in CATEGORICAL will be changed to discrete numbers.
+            # You should also use _decode_para to transform the types of parameters back.
+            current_space = self._encode_para(space)
+
+            # 2. Define your function to get the performance.
+            def fn(dset, para):
+                current_trainer = trainer.duplicate_from_hyper_parameter(para)
+                current_trainer.train(dset)
+                loss, self.is_higher_better = current_trainer.get_valid_score(dset)
+                # For convenience, we change the score which is higher better to negative, then we should only minimize the score.
+                if self.is_higher_better:
+                    loss = -loss
+                return current_trainer, loss
+
+            # 3. Define the how to get HP suggestions, it should return a parameter dict. You can use history trials to give new suggestions
+            def get_random(history_trials):
+                hps = {}
+                for para in current_space:
+                    # Because we use _encode_para function before, we should only deal with DOUBLE, INTEGER and DISCRETE
+                    if para["type"] == "DOUBLE" or para["type"] == "INTEGER":
+                        hp = random.random() * (para["maxValue"] - para["minValue"]) + para["minValue"]
+                        if para["type"] == "INTEGER":
+                            hp = round(hp)
+                        hps[para["parameterName"]] = hp
+                    elif para["type"] == "DISCRETE":
+                        feasible_points = para["feasiblePoints"].split(",")
+                        hps[para["parameterName"]] = random.choice(feasible_points)
+                return hps
+
+            # 4. Run your algorithm. For each turn, get a set of parameters according to history information and evaluate it.
+            best_trainer, best_para, best_perf = None, None, None
+            self.trials = []
+            for i in range(self.max_evals):
+                # in this example, we don't need history trails. Since we pass None to history_trails
+                new_hp = get_random(None)
+                # optional: if you use _encode_para, use _decode_para as well. para_for_trainer undos all transformation in _encode_para, and turns double parameter to interger if needed. para_for_hpo only turns double parameter to interger.
+                para_for_trainer, para_for_hpo = self._decode_para(new_hp)
+                current_trainer, perf = fn(dataset, para_for_trainer)
+                self.trials.append((para_for_hpo, perf))
+                if not best_perf or perf < best_perf:
+                    best_perf = perf
+                    best_trainer = current_trainer
+                    best_para = para_for_trainer
+
+            # 5. Return the best trainer and parameter.
+            return best_trainer, best_para
\ No newline at end of file

From b6c65e014d41d0ea21b86019edd51a19889a551b Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Wed, 23 Jun 2021 03:36:25 +0000
Subject: [PATCH 102/144] add documentation of node solver

---
 autogl/solver/classifier/node_classifier.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/autogl/solver/classifier/node_classifier.py b/autogl/solver/classifier/node_classifier.py
index 722adf5..92474ae 100644
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -42,6 +42,15 @@ class AutoNodeClassifier(BaseClassifier):
 
     graph_models: list of autogl.module.model.BaseModel or list of str
         The (name of) models to be optimized as backbone. Default ``['gat', 'gcn']``.
+    
+    nas_algorithms: (list of) autogl.module.nas.algorithm.BaseNAS or str (Optional)
+        The (name of) nas algorithms used. Default ``None``.
+    
+    nas_spaces: (list of) autogl.module.nas.space.BaseSpace or str (Optional)
+        The (name of) nas spaces used. Default ``None``.
+    
+    nas_estimators: (list of) autogl.module.nas.estimator.BaseEstimator or str (Optional)
+        The (name of) nas estimators used. Default ``None``.
 
     hpo_module: autogl.module.hpo.BaseHPOptimizer or str or None
         The (name of) hpo module used to search for best hyper parameters. Default ``anneal``.
@@ -808,7 +817,7 @@ class AutoNodeClassifier(BaseClassifier):
             keys: set = set(nas_dict.keys())
             needed = {'space', 'algorithm', 'estimator'}
             if keys != needed:
-                LOGGER.error('Key mismatch, we need %s, you give %s' % (needed, keys))
+                LOGGER.error('Key mismatch, we need %s, you give %s', needed, keys)
                 raise KeyError('Key mismatch, we need %s, you give %s' % (needed, keys))
 
             spaces, algorithms, estimators = [], [], []

From 136bddfdc1dcfa2f07648dd722570e0c06b90e0b Mon Sep 17 00:00:00 2001
From: null <null>
Date: Wed, 23 Jun 2021 14:00:00 +0800
Subject: [PATCH 103/144] merge dev and revert some mutations for LeaderBoard

---
 autogl/datasets/utils.py                      |  10 +
 autogl/module/model/__init__.py               |   4 +-
 autogl/module/model/base.py                   |  25 +-
 autogl/module/model/gat.py                    |  18 +
 autogl/module/model/gcn.py                    | 126 +--
 autogl/module/model/graph_saint.py            |  11 +-
 .../model/{graph_sage.py => graphsage.py}     | 185 +++--
 autogl/module/train/__init__.py               |  22 +-
 autogl/module/train/base.py                   |  17 +
 autogl/module/train/evaluation.py             |  32 +-
 .../module/train/graph_classification_full.py |  11 +-
 autogl/module/train/link_prediction.py        | 518 ++++++++++++
 .../module/train/node_classification_full.py  |  34 +-
 .../node_classification_sampled_trainer.py    |  50 +-
 autogl/solver/__init__.py                     |   9 +-
 autogl/solver/classifier/__init__.py          |   8 +-
 autogl/solver/classifier/graph_classifier.py  |  13 +-
 autogl/solver/classifier/link_predictor.py    | 747 ++++++++++++++++++
 autogl/solver/classifier/node_classifier.py   |  12 +-
 autogl/solver/utils.py                        | 137 ++--
 configs/lp_benchmark.yml                      |  92 +++
 configs/lp_gat_benchmark.yml                  |  61 ++
 configs/lp_gcn_benchmark.yml                  |  61 ++
 configs/lp_sage_benchmark.yml                 |  69 ++
 configs/nodeclf_gat_benchmark_large.yml       |   2 +-
 configs/nodeclf_ladies_gcn.yml                |  84 --
 configs/nodeclf_sage_benchmark_large.yml      |   2 +-
 configs/nodeclf_sage_benchmark_small.yml      |   2 +-
 28 files changed, 2001 insertions(+), 361 deletions(-)
 rename autogl/module/model/{graph_sage.py => graphsage.py} (50%)
 create mode 100644 autogl/module/train/link_prediction.py
 create mode 100644 autogl/solver/classifier/link_predictor.py
 create mode 100644 configs/lp_benchmark.yml
 create mode 100644 configs/lp_gat_benchmark.yml
 create mode 100644 configs/lp_gcn_benchmark.yml
 create mode 100644 configs/lp_sage_benchmark.yml
 delete mode 100644 configs/nodeclf_ladies_gcn.yml

diff --git a/autogl/datasets/utils.py b/autogl/datasets/utils.py
index b0708db..4885ea0 100644
--- a/autogl/datasets/utils.py
+++ b/autogl/datasets/utils.py
@@ -2,9 +2,19 @@ from pdb import set_trace
 import torch
 import numpy as np
 from torch_geometric.data import DataLoader
+from torch_geometric.utils import train_test_split_edges
 from sklearn.model_selection import StratifiedKFold, KFold
 
 
+def split_edges(dataset, train_ratio, val_ratio):
+    datas = [data for data in dataset]
+    for i in range(len(datas)):
+        datas[i] = train_test_split_edges(
+            datas[i], val_ratio, 1 - train_ratio - val_ratio
+        )
+    dataset.data, dataset.slices = dataset.collate(datas)
+
+
 def get_label_number(dataset):
     r"""Get the number of labels in this dataset as dict."""
     label_num = {}
diff --git a/autogl/module/model/__init__.py b/autogl/module/model/__init__.py
index d559120..67d778c 100644
--- a/autogl/module/model/__init__.py
+++ b/autogl/module/model/__init__.py
@@ -1,7 +1,9 @@
 from ._model_registry import MODEL_DICT, ModelUniversalRegistry, register_model
 from .base import BaseModel
 from .topkpool import AutoTopkpool
-from .graph_sage import AutoSAGE
+
+# from .graph_sage import AutoSAGE
+from .graphsage import AutoSAGE
 from .graph_saint import GraphSAINTAggregationModel
 from .gcn import AutoGCN
 from .gat import AutoGAT
diff --git a/autogl/module/model/base.py b/autogl/module/model/base.py
index b7469f6..e251468 100644
--- a/autogl/module/model/base.py
+++ b/autogl/module/model/base.py
@@ -380,19 +380,30 @@ class ClassificationModel(_BaseModel):
                 self.__num_graph_features = 0
 
 
-class SequentialGraphNeuralNetwork(torch.nn.Module):
+class _ClassificationModel(torch.nn.Module):
     def __init__(self):
-        super(SequentialGraphNeuralNetwork, self).__init__()
+        super(_ClassificationModel, self).__init__()
 
-    def decode(self, x: torch.Tensor) -> torch.Tensor:
+    def cls_encode(self, data) -> torch.Tensor:
         raise NotImplementedError
 
-    def encode(self, data) -> torch.Tensor:
+    def cls_decode(self, x: torch.Tensor) -> torch.Tensor:
         raise NotImplementedError
 
+    def cls_forward(self, data) -> torch.Tensor:
+        return self.cls_decode(self.cls_encode(data))
+
+
+class ClassificationSupportedSequentialModel(_ClassificationModel):
+    def __init__(self):
+        super(ClassificationSupportedSequentialModel, self).__init__()
+
     @property
-    def encoder_sequential_modules(self) -> torch.nn.ModuleList:
+    def sequential_encoding_layers(self) -> torch.nn.ModuleList:
         raise NotImplementedError
 
-    def forward(self, data) -> torch.Tensor:
-        return self.decode(self.encode(data))
+    def cls_encode(self, data) -> torch.Tensor:
+        raise NotImplementedError
+
+    def cls_decode(self, x: torch.Tensor) -> torch.Tensor:
+        raise NotImplementedError
diff --git a/autogl/module/model/gat.py b/autogl/module/model/gat.py
index 92b5dff..07e7800 100644
--- a/autogl/module/model/gat.py
+++ b/autogl/module/model/gat.py
@@ -95,6 +95,24 @@ class GAT(torch.nn.Module):
 
         return F.log_softmax(x, dim=1)
 
+    def lp_encode(self, data):
+        x = data.x
+        for i in range(self.num_layer - 1):
+            x = self.convs[i](x, data.train_pos_edge_index)
+            if i != self.num_layer - 2:
+                x = activate_func(x, self.args["act"])
+                # x = F.dropout(x, p=self.args["dropout"], training=self.training)
+        return x
+
+    def lp_decode(self, z, pos_edge_index, neg_edge_index):
+        edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
+        logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
+        return logits
+
+    def lp_decode_all(self, z):
+        prob_adj = z @ z.t()
+        return (prob_adj > 0).nonzero(as_tuple=False).t()
+
 
 @register_model("gat")
 class AutoGAT(BaseModel):
diff --git a/autogl/module/model/gcn.py b/autogl/module/model/gcn.py
index f3abd15..742da83 100644
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -1,16 +1,17 @@
 import torch
 import torch.nn.functional
-from torch_geometric.nn.conv import GCNConv
 import typing as _typing
+
+from torch_geometric.nn.conv import GCNConv
 import autogl.data
 from . import register_model
-from .base import activate_func, ClassificationModel, SequentialGraphNeuralNetwork
+from .base import BaseModel, activate_func, ClassificationSupportedSequentialModel
 from ...utils import get_logger
 
 LOGGER = get_logger("GCNModel")
 
 
-class GCN(SequentialGraphNeuralNetwork):
+class GCN(ClassificationSupportedSequentialModel):
     class _GCNLayer(torch.nn.Module):
         def __init__(
                 self, input_channels: int, output_channels: int,
@@ -72,10 +73,11 @@ class GCN(SequentialGraphNeuralNetwork):
             num_features: int,
             num_classes: int,
             hidden_features: _typing.Sequence[int],
-            dropout: _typing.Union[float, _typing.Sequence[_typing.Optional[float]]],
             activation_name: str,
-            add_self_loops: bool = True,
-            normalize: bool = True
+            dropout: _typing.Union[
+                _typing.Optional[float], _typing.Sequence[_typing.Optional[float]]
+            ] = None,
+            add_self_loops: bool = True, normalize: bool = True
     ):
         if isinstance(dropout, _typing.Sequence):
             if len(dropout) != len(hidden_features) + 1:
@@ -98,14 +100,18 @@ class GCN(SequentialGraphNeuralNetwork):
             dropout_list: _typing.Sequence[_typing.Optional[float]] = [
                 dropout for _ in range(len(hidden_features) + 1)
             ]
+        elif dropout in (None, Ellipsis, ...):
+            dropout_list: _typing.Sequence[_typing.Optional[float]] = [
+                None for _ in range(len(hidden_features) + 1)
+            ]
         else:
             raise TypeError(
-                "The provided dropout argument must be a float "
-                "or a sequence in which each item is either float or None."
+                "The provided dropout argument must be a float number or None or "
+                "a sequence in which each item is either a float Number or None."
             )
         super().__init__()
         if len(hidden_features) == 0:
-            self.__sequential_module_list: torch.nn.ModuleList = torch.nn.ModuleList(
+            self.__sequential_encoding_layers: torch.nn.ModuleList = torch.nn.ModuleList(
                 (
                     self._GCNLayer(
                         num_features, num_classes, add_self_loops, normalize,
@@ -114,32 +120,29 @@ class GCN(SequentialGraphNeuralNetwork):
                 )
             )
         else:
-            self.__sequential_module_list: torch.nn.ModuleList = torch.nn.ModuleList()
-            self.__sequential_module_list.append(self._GCNLayer(
+            self.__sequential_encoding_layers: torch.nn.ModuleList = torch.nn.ModuleList()
+            self.__sequential_encoding_layers.append(self._GCNLayer(
                 num_features, hidden_features[0], add_self_loops,
                 normalize, activation_name, dropout_list[0]
             ))
             for hidden_feature_index in range(len(hidden_features)):
                 if hidden_feature_index + 1 < len(hidden_features):
-                    self.__sequential_module_list.append(self._GCNLayer(
+                    self.__sequential_encoding_layers.append(self._GCNLayer(
                         hidden_features[hidden_feature_index],
                         hidden_features[hidden_feature_index + 1],
                         add_self_loops, normalize, activation_name,
                         dropout_list[hidden_feature_index + 1]
                     ))
                 else:
-                    self.__sequential_module_list.append(self._GCNLayer(
+                    self.__sequential_encoding_layers.append(self._GCNLayer(
                         hidden_features[hidden_feature_index], num_classes,
                         add_self_loops, normalize,
                         dropout_list[-1]
                     ))
 
-    def decode(self, x: torch.Tensor) -> torch.Tensor:
-        return torch.nn.functional.log_softmax(x, dim=1)
-
     @property
-    def encoder_sequential_modules(self) -> torch.nn.ModuleList:
-        return self.__sequential_module_list
+    def sequential_encoding_layers(self) -> torch.nn.ModuleList:
+        return self.__sequential_encoding_layers
 
     def __extract_edge_indexes_and_weights(self, data) -> _typing.Union[
         _typing.Sequence[_typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]],
@@ -161,7 +164,7 @@ class GCN(SequentialGraphNeuralNetwork):
         if not (
                 hasattr(data, "edge_indexes") and
                 isinstance(getattr(data, "edge_indexes"), _typing.Sequence) and
-                len(getattr(data, "edge_indexes")) == len(self.__sequential_module_list)
+                len(getattr(data, "edge_indexes")) == len(self.__sequential_encoding_layers)
         ):
             return __compose_edge_index_and_weight(
                 getattr(data, "edge_index"), getattr(data, "edge_weight", None)
@@ -175,7 +178,7 @@ class GCN(SequentialGraphNeuralNetwork):
         if (
                 hasattr(data, "edge_weights") and
                 isinstance(getattr(data, "edge_weights"), _typing.Sequence) and
-                len(getattr(data, "edge_weights")) == len(self.__sequential_module_list)
+                len(getattr(data, "edge_weights")) == len(self.__sequential_encoding_layers)
         ):
             return [
                 __compose_edge_index_and_weight(_edge_index, _edge_weight)
@@ -188,7 +191,7 @@ class GCN(SequentialGraphNeuralNetwork):
                 for __edge_index in getattr(data, "edge_indexes")
             ]
 
-    def encode(self, data) -> torch.Tensor:
+    def cls_encode(self, data) -> torch.Tensor:
         edge_indexes_and_weights: _typing.Union[
             _typing.Sequence[_typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]],
             _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]
@@ -199,10 +202,10 @@ class GCN(SequentialGraphNeuralNetwork):
                 and isinstance(edge_indexes_and_weights[0], tuple)
         ):
             """ edge_indexes_and_weights is sequence of (edge_index, edge_weight) """
-            assert len(edge_indexes_and_weights) == len(self.__sequential_module_list)
+            assert len(edge_indexes_and_weights) == len(self.__sequential_encoding_layers)
             x: torch.Tensor = getattr(data, "x")
             for _edge_index_and_weight, gcn in zip(
-                    edge_indexes_and_weights, self.__sequential_module_list
+                    edge_indexes_and_weights, self.__sequential_encoding_layers
             ):
                 _temp_data = autogl.data.Data(x=x, edge_index=_edge_index_and_weight[0])
                 _temp_data.edge_weight = _edge_index_and_weight[1]
@@ -211,15 +214,32 @@ class GCN(SequentialGraphNeuralNetwork):
         else:
             """ edge_indexes_and_weights is (edge_index, edge_weight) """
             x = getattr(data, "x")
-            for gcn in self.__sequential_module_list:
+            for gcn in self.__sequential_encoding_layers:
                 _temp_data = autogl.data.Data(x=x, edge_index=edge_indexes_and_weights[0])
                 _temp_data.edge_weight = edge_indexes_and_weights[1]
                 x = gcn(_temp_data)
             return x
 
+    def cls_decode(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.nn.functional.log_softmax(x, dim=1)
+
+    def lp_encode(self, data):
+        for i in range(len(self.__sequential_encoding_layers) - 1):
+            data.x = self.__sequential_encoding_layers[i](data)
+        return getattr(data, "x")
+
+    def lp_decode(self, z, pos_edge_index, neg_edge_index):
+        edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
+        logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
+        return logits
+
+    def lp_decode_all(self, z):
+        prob_adj = z @ z.t()
+        return (prob_adj > 0).nonzero(as_tuple=False).t()
+
 
 @register_model("gcn")
-class AutoGCN(ClassificationModel):
+class AutoGCN(BaseModel):
     r"""
     AutoGCN.
     The model used in this automodel is GCN, i.e., the graph convolutional network from the
@@ -258,7 +278,16 @@ class AutoGCN(ClassificationModel):
         init: bool = False,
         **kwargs
     ) -> None:
-        default_hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]] = [
+        super().__init__()
+        self.num_features = num_features
+        self.num_classes = num_classes
+        self.device = device
+
+        self.params = {
+            "features_num": self.num_features,
+            "num_class": self.num_classes,
+        }
+        self.space = [
             {
                 "parameterName": "add_self_loops",
                 "type": "CATEGORICAL",
@@ -298,29 +327,36 @@ class AutoGCN(ClassificationModel):
                 "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
             },
         ]
-        default_hp = {
-            "add_self_loops": 1,
-            "normalize": 1,
-            "num_layers": 2,
-            "hidden": [16],
-            "dropout": 0.2,
-            "act": "leaky_relu",
+
+        # initial point of hp search
+        # self.hyperparams = {
+        #     "num_layers": 2,
+        #     "hidden": [16],
+        #     "dropout": 0.2,
+        #     "act": "leaky_relu",
+        # }
+
+        self.hyperparams = {
+            "num_layers": 3,
+            "hidden": [128, 64],
+            "dropout": 0,
+            "act": "relu",
         }
 
-        super(AutoGCN, self).__init__(
-            num_features, num_classes, device=device,
-            hyper_parameter_space=default_hp_space,
-            hyper_parameter=default_hp, init=init,
-            **kwargs
-        )
+        self.initialized = False
+        if init is True:
+            self.initialize()
 
-    def _initialize(self):
+    def initialize(self):
+        if self.initialized:
+            return
+        self.initialized = True
         self.model = GCN(
             self.num_features,
             self.num_classes,
-            self.hyper_parameter.get("hidden"),
-            self.hyper_parameter.get("dropout"),
-            self.hyper_parameter.get("act"),
-            add_self_loops=bool(self.hyper_parameter.get("add_self_loops", True)),
-            normalize=bool(self.hyper_parameter.get("normalize", True))
+            self.hyperparams.get("hidden"),
+            self.hyperparams.get("act"),
+            self.hyperparams.get("dropout"),
+            bool(self.hyperparams.get("add_self_loops", True)),
+            bool(self.hyperparams.get("normalize", True))
         ).to(self.device)
diff --git a/autogl/module/model/graph_saint.py b/autogl/module/model/graph_saint.py
index 566e171..9ebbb04 100644
--- a/autogl/module/model/graph_saint.py
+++ b/autogl/module/model/graph_saint.py
@@ -4,7 +4,7 @@ from torch_geometric.nn.conv import MessagePassing
 from torch_sparse import SparseTensor, matmul
 
 from . import register_model
-from .base import ClassificationModel, SequentialGraphNeuralNetwork
+from .base import ClassificationModel, ClassificationSupportedSequentialModel
 
 
 class _GraphSAINTAggregationLayers:
@@ -227,7 +227,7 @@ class _GraphSAINTAggregationLayers:
                 raise TypeError
 
 
-class GraphSAINTMultiOrderAggregationModel(SequentialGraphNeuralNetwork):
+class GraphSAINTMultiOrderAggregationModel(ClassificationSupportedSequentialModel):
     def __init__(
             self, num_features: int, num_classes: int,
             _output_dimension_for_each_order: int,
@@ -289,13 +289,14 @@ class GraphSAINTMultiOrderAggregationModel(SequentialGraphNeuralNetwork):
         self.__linear_transform: torch.nn.Linear = torch.nn.Linear(
             self.__sequential_encoding_layers[-1].integral_output_dimension, num_classes, bias
         )
+        self.__linear_transform.reset_parameters()
 
-    def decode(self, x: torch.Tensor) -> torch.Tensor:
+    def cls_decode(self, x: torch.Tensor) -> torch.Tensor:
         if self.__apply_normalize:
             x: torch.Tensor = torch.nn.functional.normalize(x, p=2, dim=1)
         return torch.nn.functional.log_softmax(self.__linear_transform(x), dim=1)
 
-    def encode(self, data) -> torch.Tensor:
+    def cls_encode(self, data) -> torch.Tensor:
         if type(getattr(data, "x")) != torch.Tensor:
             raise TypeError
         if type(getattr(data, "edge_index")) != torch.Tensor:
@@ -310,7 +311,7 @@ class GraphSAINTMultiOrderAggregationModel(SequentialGraphNeuralNetwork):
         return getattr(data, "x")
 
     @property
-    def encoder_sequential_modules(self) -> torch.nn.ModuleList:
+    def sequential_encoding_layers(self) -> torch.nn.ModuleList:
         return self.__sequential_encoding_layers
 
 
diff --git a/autogl/module/model/graph_sage.py b/autogl/module/model/graphsage.py
similarity index 50%
rename from autogl/module/model/graph_sage.py
rename to autogl/module/model/graphsage.py
index f7073b6..a990701 100644
--- a/autogl/module/model/graph_sage.py
+++ b/autogl/module/model/graphsage.py
@@ -1,17 +1,17 @@
-import typing as _typing
 import torch
-import torch.nn.functional
-from torch_geometric.nn.conv import SAGEConv
+import typing as _typing
 
+from torch_geometric.nn.conv import SAGEConv
+import torch.nn.functional
 import autogl.data
 from . import register_model
-from .base import (
-    ClassificationModel, activate_func,
-    SequentialGraphNeuralNetwork
-)
+from .base import BaseModel, activate_func, ClassificationSupportedSequentialModel
+from ...utils import get_logger
+
+LOGGER = get_logger("SAGEModel")
 
 
-class GraphSAGE(SequentialGraphNeuralNetwork):
+class GraphSAGE(ClassificationSupportedSequentialModel):
     class _SAGELayer(torch.nn.Module):
         def __init__(
                 self, input_channels: int, output_channels: int, aggr: str,
@@ -59,49 +59,81 @@ class GraphSAGE(SequentialGraphNeuralNetwork):
             return x
 
     def __init__(
-        self,
-        num_features: int,
-        num_classes: int,
-        hidden_features: _typing.Sequence[int],
-        dropout: float,
-        activation_name: str,
-        aggr: str = "mean"
+            self, num_features: int, num_classes: int,
+            hidden_features: _typing.Sequence[int],
+            activation_name: str,
+            layers_dropout: _typing.Union[
+                _typing.Optional[float], _typing.Sequence[_typing.Optional[float]]
+            ] = None,
+            aggr: str = "mean"
     ):
-        super(GraphSAGE, self).__init__()
-        if type(aggr) != str:
+        super().__init__()
+        if not type(num_features) == type(num_classes) == int:
+            raise TypeError
+        if not isinstance(hidden_features, _typing.Sequence):
+            raise TypeError
+        for hidden_feature in hidden_features:
+            if type(hidden_feature) != int:
+                raise TypeError
+            elif hidden_feature <= 0:
+                raise ValueError
+        if isinstance(layers_dropout, _typing.Sequence):
+            if len(layers_dropout) != (len(hidden_features) + 1):
+                raise TypeError
+            for d in layers_dropout:
+                if d is not None and type(d) != float:
+                    raise TypeError
+            _layers_dropout: _typing.Sequence[_typing.Optional[float]] = layers_dropout
+        elif layers_dropout is None or type(layers_dropout) == float:
+            _layers_dropout: _typing.Sequence[_typing.Optional[float]] = [
+                layers_dropout for _ in range(len(hidden_features) + 1)
+            ]
+        else:
+            raise TypeError
+        if not type(activation_name) == type(aggr) == str:
             raise TypeError
         if aggr not in ("add", "max", "mean"):
             aggr = "mean"
 
         if len(hidden_features) == 0:
-            self.__sequential_module_list: torch.nn.ModuleList = torch.nn.ModuleList(
-                (self._SAGELayer(num_features, num_classes, aggr),)
-            )
+            self.__sequential_encoding_layers: torch.nn.ModuleList = torch.nn.ModuleList([
+                self._SAGELayer(
+                    num_features, num_classes,
+                    aggr, activation_name, _layers_dropout[0]
+                )
+            ])
         else:
-            self.__sequential_module_list: torch.nn.ModuleList = torch.nn.ModuleList()
-            self.__sequential_module_list.append(self._SAGELayer(
-                num_features, hidden_features[0], aggr, activation_name, dropout
-            ))
+            self.__sequential_encoding_layers: torch.nn.ModuleList = torch.nn.ModuleList([
+                self._SAGELayer(
+                    num_features, hidden_features[0],
+                    aggr, activation_name, _layers_dropout[0]
+                )
+            ])
             for i in range(len(hidden_features)):
                 if i + 1 < len(hidden_features):
-                    self.__sequential_module_list.append(self._SAGELayer(
-                        hidden_features[i], hidden_features[i + 1], aggr,
-                        activation_name, dropout
-                    ))
+                    self.__sequential_encoding_layers.append(
+                        self._SAGELayer(
+                            hidden_features[i], hidden_features[i + 1],
+                            aggr, activation_name, _layers_dropout[i + 1]
+                        )
+                    )
                 else:
-                    self.__sequential_module_list.append(self._SAGELayer(
-                        hidden_features[i], num_classes, aggr
-                    ))
+                    self.__sequential_encoding_layers.append(
+                        self._SAGELayer(
+                            hidden_features[i], num_classes,
+                            aggr, activation_name, _layers_dropout[i + 1]
+                        )
+                    )
 
     @property
-    def encoder_sequential_modules(self) -> torch.nn.ModuleList:
-        return self.__sequential_module_list
+    def sequential_encoding_layers(self) -> torch.nn.ModuleList:
+        return self.__sequential_encoding_layers
 
-    def encode(self, data) -> torch.Tensor:
+    def cls_encode(self, data) -> torch.Tensor:
         if (
-            hasattr(data, "edge_indexes") and
-            isinstance(getattr(data, "edge_indexes"), _typing.Sequence) and
-            len(getattr(data, "edge_indexes")) == len(self.__sequential_module_list)
+                hasattr(data, "edge_indexes") and
+                isinstance(getattr(data, "edge_indexes"), _typing.Sequence) and
+                len(getattr(data, "edge_indexes")) == len(self.__sequential_encoding_layers)
         ):
             for __edge_index in getattr(data, "edge_indexes"):
                 if type(__edge_index) != torch.Tensor:
@@ -112,19 +144,33 @@ class GraphSAGE(SequentialGraphNeuralNetwork):
                 _intermediate_data: autogl.data.Data = autogl.data.Data(
                     x=x, edge_index=__edge_index
                 )
-                x: torch.Tensor = self.encoder_sequential_modules[i](_intermediate_data)
+                x: torch.Tensor = self.__sequential_encoding_layers[i](_intermediate_data)
             return x
         else:
-            for i in range(len(self.encoder_sequential_modules)):
-                data.x = self.encoder_sequential_modules[i](data)
+            for i in range(len(self.__sequential_encoding_layers)):
+                data.x = self.__sequential_encoding_layers[i](data)
             return data.x
 
-    def decode(self, x: torch.Tensor) -> torch.Tensor:
+    def cls_decode(self, x: torch.Tensor) -> torch.Tensor:
         return torch.nn.functional.log_softmax(x, dim=1)
 
+    def lp_encode(self, data):
+        for i in range(len(self.__sequential_encoding_layers) - 1):
+            data.x = self.__sequential_encoding_layers[i](data)
+        return getattr(data, "x")
+
+    def lp_decode(self, z, pos_edge_index, neg_edge_index):
+        edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
+        logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
+        return logits
+
+    def lp_decode_all(self, z):
+        prob_adj = z @ z.t()
+        return (prob_adj > 0).nonzero(as_tuple=False).t()
+
 
 @register_model("sage")
-class AutoSAGE(ClassificationModel):
+class AutoSAGE(BaseModel):
     r"""
     AutoSAGE. The model used in this automodel is GraphSAGE, i.e., the GraphSAGE from the `"Inductive Representation Learning on
     Large Graphs" <https://arxiv.org/abs/1706.02216>`_ paper. The layer is
@@ -147,17 +193,25 @@ class AutoSAGE(ClassificationModel):
 
     init: `bool`.
         If True(False), the model will (not) be initialized.
+
     """
 
     def __init__(
-            self,
-            num_features: int = ...,
-            num_classes: int = ...,
-            device: _typing.Union[str, torch.device] = ...,
-            init: bool = False,
-            **kwargs
+        self, num_features=None, num_classes=None, device=None, init=False, **args
     ):
-        default_hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]] = [
+
+        super(AutoSAGE, self).__init__()
+
+        self.num_features = num_features if num_features is not None else 0
+        self.num_classes = int(num_classes) if num_classes is not None else 0
+        self.device = device if device is not None else "cpu"
+        self.init = True
+
+        self.params = {
+            "features_num": self.num_features,
+            "num_class": self.num_classes,
+        }
+        self.space = [
             {
                 "parameterName": "num_layers",
                 "type": "DISCRETE",
@@ -187,31 +241,32 @@ class AutoSAGE(ClassificationModel):
                 "feasiblePoints": ["leaky_relu", "relu", "elu", "tanh"],
             },
             {
-                "parameterName": "aggr",
+                "parameterName": "agg",
                 "type": "CATEGORICAL",
                 "feasiblePoints": ["mean", "add", "max"],
             },
         ]
-        default_hp = {
+
+        self.hyperparams = {
             "num_layers": 3,
             "hidden": [64, 32],
             "dropout": 0.5,
             "act": "relu",
             "agg": "mean",
         }
-        super(AutoSAGE, self).__init__(
-            num_features, num_classes, device=device,
-            hyper_parameter_space=default_hp_space,
-            hyper_parameter=default_hp, init=init, **kwargs
-        )
-
-    def _initialize(self):
-        """ Initialize model """
+
+        self.initialized = False
+        if init is True:
+            self.initialize()
+
+    def initialize(self):
+        if self.initialized:
+            return
+        self.initialized = True
         self.model = GraphSAGE(
-            self.num_features,
-            self.num_classes,
-            self.hyper_parameter.get("hidden"),
-            self.hyper_parameter.get("dropout"),
-            self.hyper_parameter.get("act"),
-            self.hyper_parameter.get("aggr")
+            self.num_features, self.num_classes,
+            self.hyperparams.get("hidden"),
+            self.hyperparams.get("act", "relu"),
+            self.hyperparams.get("dropout", None),
+            self.hyperparams.get("agg", "mean")
         ).to(self.device)
diff --git a/autogl/module/train/__init__.py b/autogl/module/train/__init__.py
index b10a057..33fa5ef 100644
--- a/autogl/module/train/__init__.py
+++ b/autogl/module/train/__init__.py
@@ -4,8 +4,8 @@ from .base import (
     Evaluation,
     BaseNodeClassificationTrainer,
     BaseGraphClassificationTrainer,
+    BaseLinkPredictionTrainer,
 )
-from .evaluation import get_feval
 
 
 def register_trainer(name):
@@ -24,5 +24,23 @@ def register_trainer(name):
 
 from .graph_classification_full import GraphClassificationFullTrainer
 from .node_classification_full import NodeClassificationFullTrainer
+from .link_prediction import LinkPredictionTrainer
 from .node_classification_trainer import *
-from .evaluation import get_feval, Acc, Auc, Logloss
+from .evaluation import get_feval, Acc, Auc, Logloss, Mrr, MicroF1
+
+__all__ = [
+    "BaseTrainer",
+    "Evaluation",
+    "BaseGraphClassificationTrainer",
+    "BaseNodeClassificationTrainer",
+    "BaseLinkPredictionTrainer",
+    "GraphClassificationFullTrainer",
+    "NodeClassificationFullTrainer",
+    "LinkPredictionTrainer",
+    "Acc",
+    "Auc",
+    "Logloss",
+    "Mrr",
+    "MicroF1",
+    "get_feval",
+]
diff --git a/autogl/module/train/base.py b/autogl/module/train/base.py
index eb71652..6c6ab65 100644
--- a/autogl/module/train/base.py
+++ b/autogl/module/train/base.py
@@ -401,3 +401,20 @@ class BaseGraphClassificationTrainer(_BaseClassificationTrainer):
         super(BaseGraphClassificationTrainer, self).__init__(
             model, num_features, num_classes, device, init, feval, loss
         )
+
+
+class BaseLinkPredictionTrainer(_BaseClassificationTrainer):
+    def __init__(
+        self,
+        model: _typing.Union[BaseModel, str],
+        num_features: int,
+        device: _typing.Union[torch.device, str, None] = None,
+        init: bool = True,
+        feval: _typing.Union[
+            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = (Acc,),
+        loss: str = "nll_loss",
+    ):
+        super(BaseLinkPredictionTrainer, self).__init__(
+            model, num_features, 2, device, init, feval, loss
+        )
diff --git a/autogl/module/train/evaluation.py b/autogl/module/train/evaluation.py
index 1ebb324..d2a390f 100644
--- a/autogl/module/train/evaluation.py
+++ b/autogl/module/train/evaluation.py
@@ -27,8 +27,16 @@ class Evaluation:
 
 
 class EvaluatorUtility:
+    """ Auxiliary utilities for evaluation """
     class PredictionBatchCumulativeBuilder:
-        """ Batch-cumulative builder for prediction """
+        """
+        Batch-cumulative builder for prediction
+        For large graph, as it is infeasible to predict all the nodes
+        in validation set and test set in single batch,
+        and layer-wise prediction mechanism is a practical evaluation approach,
+        a batch-cumulative prediction collector `PredictionBatchCumulativeBuilder`
+        is implemented for prediction in mini-batch manner.
+        """
         def __init__(self):
             self.__indexes_in_integral_data: _typing.Optional[np.ndarray] = None
             self.__prediction: _typing.Optional[np.ndarray] = None
@@ -177,7 +185,13 @@ class Auc(Evaluation):
         """
         Should return: the evaluation result (float)
         """
-        pos_predict = predict[:, 1]
+        if len(predict.shape) == 1:
+            pos_predict = predict
+        else:
+            assert (
+                predict.shape[1] == 2
+            ), "Cannot use auc on given data with %d classes!" % (predict.shape[1])
+            pos_predict = predict[:, 1]
         return roc_auc_score(label, pos_predict)
 
 
@@ -199,7 +213,11 @@ class Acc(Evaluation):
         """
         Should return: the evaluation result (float)
         """
-        return accuracy_score(label, np.argmax(predict, axis=1))
+        if len(predict.shape) == 2:
+            predict = np.argmax(predict, axis=1)
+        else:
+            predict = [1 if p > 0.5 else 0 for p in predict]
+        return accuracy_score(label, predict)
 
 
 @register_evaluate("mrr")
@@ -220,7 +238,13 @@ class Mrr(Evaluation):
         """
         Should return: the evaluation result (float)
         """
-        pos_predict = predict[:, 1]
+        if len(predict.shape) == 2:
+            assert (
+                predict.shape[1] == 2
+            ), "Cannot use mrr on given data with %d classes!" % (predict.shape[1])
+            pos_predict = predict[:, 1]
+        else:
+            pos_predict = predict
         return label_ranking_average_precision_score(label, pos_predict)
 
 
diff --git a/autogl/module/train/graph_classification_full.py b/autogl/module/train/graph_classification_full.py
index f9f548c..7504da8 100644
--- a/autogl/module/train/graph_classification_full.py
+++ b/autogl/module/train/graph_classification_full.py
@@ -397,19 +397,16 @@ class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
         else:
             return self.valid_score, self.feval.is_higher_better()
 
-    def __repr__(self) -> dict:
-        return {
+    def __repr__(self) -> str:
+        import yaml
+        return yaml.dump({
             "trainer_name": self.__class__.__name__,
             "optimizer": self.optimizer,
             "learning_rate": self.lr,
             "max_epoch": self.max_epoch,
             "early_stopping_round": self.early_stopping_round,
             "model": repr(self.model)
-        }
-
-    def __str__(self) -> str:
-        import yaml
-        return yaml.dump(repr(self))
+        })
 
     def evaluate(self, dataset, mask="val", feval=None):
         """
diff --git a/autogl/module/train/link_prediction.py b/autogl/module/train/link_prediction.py
new file mode 100644
index 0000000..3462797
--- /dev/null
+++ b/autogl/module/train/link_prediction.py
@@ -0,0 +1,518 @@
+from . import register_trainer, Evaluation
+import torch
+from torch.optim.lr_scheduler import StepLR
+import torch.nn.functional as F
+from ..model import MODEL_DICT, BaseModel
+from .evaluation import Auc, EVALUATE_DICT
+from .base import EarlyStopping, BaseLinkPredictionTrainer
+from typing import Union
+from copy import deepcopy
+from torch_geometric.utils import negative_sampling
+
+from ...utils import get_logger
+
+LOGGER = get_logger("link prediction trainer")
+
+
+def get_feval(feval):
+    if isinstance(feval, str):
+        return EVALUATE_DICT[feval]
+    if isinstance(feval, type) and issubclass(feval, Evaluation):
+        return feval
+    if isinstance(feval, list):
+        return [get_feval(f) for f in feval]
+    raise ValueError("feval argument of type", type(feval), "is not supported!")
+
+
+@register_trainer("LinkPredictionFull")
+class LinkPredictionTrainer(BaseLinkPredictionTrainer):
+    """
+    The link prediction trainer.
+
+    Used to automatically train the link prediction problem.
+
+    Parameters
+    ----------
+    model: ``BaseModel`` or ``str``
+        The (name of) model used to train and predict.
+
+    optimizer: ``Optimizer`` of ``str``
+        The (name of) optimizer used to train and predict.
+
+    lr: ``float``
+        The learning rate of link prediction task.
+
+    max_epoch: ``int``
+        The max number of epochs in training.
+
+    early_stopping_round: ``int``
+        The round of early stop.
+
+    device: ``torch.device`` or ``str``
+        The device where model will be running on.
+
+    init: ``bool``
+        If True(False), the model will (not) be initialized.
+    """
+
+    space = None
+
+    def __init__(
+        self,
+        model: Union[BaseModel, str] = None,
+        num_features=None,
+        optimizer=None,
+        lr=1e-4,
+        max_epoch=100,
+        early_stopping_round=101,
+        weight_decay=1e-4,
+        device="auto",
+        init=True,
+        feval=[Auc],
+        loss="binary_cross_entropy_with_logits",
+        *args,
+        **kwargs,
+    ):
+        super().__init__(model, num_features, device, init, feval, loss)
+
+        if type(optimizer) == str and optimizer.lower() == "adam":
+            self.optimizer = torch.optim.Adam
+        elif type(optimizer) == str and optimizer.lower() == "sgd":
+            self.optimizer = torch.optim.SGD
+        else:
+            self.optimizer = torch.optim.Adam
+
+        self.lr = lr
+        self.max_epoch = max_epoch
+        self.early_stopping_round = early_stopping_round
+        self.device = device
+        self.args = args
+        self.kwargs = kwargs
+        self.weight_decay = weight_decay
+
+        self.early_stopping = EarlyStopping(
+            patience=early_stopping_round, verbose=False
+        )
+
+        self.valid_result = None
+        self.valid_result_prob = None
+        self.valid_score = None
+
+        self.initialized = False
+        self.device = device
+
+        self.space = [
+            {
+                "parameterName": "max_epoch",
+                "type": "INTEGER",
+                "maxValue": 500,
+                "minValue": 10,
+                "scalingType": "LINEAR",
+            },
+            {
+                "parameterName": "early_stopping_round",
+                "type": "INTEGER",
+                "maxValue": 30,
+                "minValue": 10,
+                "scalingType": "LINEAR",
+            },
+            {
+                "parameterName": "lr",
+                "type": "DOUBLE",
+                "maxValue": 1e-1,
+                "minValue": 1e-4,
+                "scalingType": "LOG",
+            },
+            {
+                "parameterName": "weight_decay",
+                "type": "DOUBLE",
+                "maxValue": 1e-2,
+                "minValue": 1e-4,
+                "scalingType": "LOG",
+            },
+        ]
+
+        LinkPredictionTrainer.space = self.space
+
+        self.hyperparams = {
+            "max_epoch": self.max_epoch,
+            "early_stopping_round": self.early_stopping_round,
+            "lr": self.lr,
+            "weight_decay": self.weight_decay,
+        }
+
+        if init is True:
+            self.initialize()
+
+    def initialize(self):
+        #  Initialize the auto model in trainer.
+        if self.initialized is True:
+            return
+        self.initialized = True
+        self.model.set_num_classes(self.num_classes)
+        self.model.set_num_features(self.num_features)
+        self.model.initialize()
+
+    def get_model(self):
+        # Get auto model used in trainer.
+        return self.model
+
+    @classmethod
+    def get_task_name(cls):
+        # Get task name, i.e., `LinkPrediction`.
+        return "LinkPrediction"
+
+    def train_only(self, data, train_mask=None):
+        """
+        The function of training on the given dataset and mask.
+
+        Parameters
+        ----------
+        data: The link prediction dataset used to be trained. It should consist of masks, including train_mask, and etc.
+        train_mask: The mask used in training stage.
+
+        Returns
+        -------
+        self: ``autogl.train.LinkPredictionTrainer``
+            A reference of current trainer.
+
+        """
+
+        # data.train_mask = data.val_mask = data.test_mask = data.y = None
+        # data = train_test_split_edges(data)
+        data = data.to(self.device)
+        # mask = data.train_mask if train_mask is None else train_mask
+        optimizer = self.optimizer(
+            self.model.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
+        )
+        scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
+        for epoch in range(1, self.max_epoch):
+            self.model.model.train()
+
+            neg_edge_index = negative_sampling(
+                edge_index=data.train_pos_edge_index,
+                num_nodes=data.num_nodes,
+                num_neg_samples=data.train_pos_edge_index.size(1),
+            )
+
+            optimizer.zero_grad()
+            # res = self.model.model.forward(data)
+            z = self.model.model.lp_encode(data)
+            link_logits = self.model.model.lp_decode(
+                z, data.train_pos_edge_index, neg_edge_index
+            )
+            link_labels = self.get_link_labels(
+                data.train_pos_edge_index, neg_edge_index
+            )
+            # loss = F.binary_cross_entropy_with_logits(link_logits, link_labels)
+            if hasattr(F, self.loss):
+                loss = getattr(F, self.loss)(link_logits, link_labels)
+            else:
+                raise TypeError(
+                    "PyTorch does not support loss type {}".format(self.loss)
+                )
+
+            loss.backward()
+            optimizer.step()
+            scheduler.step()
+
+            if type(self.feval) is list:
+                feval = self.feval[0]
+            else:
+                feval = self.feval
+            val_loss = self.evaluate([data], mask="val", feval=feval)
+            if feval.is_higher_better() is True:
+                val_loss = -val_loss
+            self.early_stopping(val_loss, self.model.model)
+            if self.early_stopping.early_stop:
+                LOGGER.debug("Early stopping at %d", epoch)
+                break
+        self.early_stopping.load_checkpoint(self.model.model)
+
+    def predict_only(self, data, test_mask=None):
+        """
+        The function of predicting on the given dataset and mask.
+
+        Parameters
+        ----------
+        data: The link prediction dataset used to be predicted.
+        train_mask: The mask used in training stage.
+
+        Returns
+        -------
+        res: The result of predicting on the given dataset.
+
+        """
+        data = data.to(self.device)
+        self.model.model.eval()
+        with torch.no_grad():
+            z = self.model.model.lp_encode(data)
+        return z
+
+    def train(self, dataset, keep_valid_result=True):
+        """
+        The function of training on the given dataset and keeping valid result.
+
+        Parameters
+        ----------
+        dataset: The link prediction dataset used to be trained.
+
+        keep_valid_result: ``bool``
+            If True(False), save the validation result after training.
+
+        Returns
+        -------
+        self: ``autogl.train.LinkPredictionTrainer``
+            A reference of current trainer.
+
+        """
+        data = dataset[0]
+        self.train_only(data)
+        if keep_valid_result:
+            self.valid_result = self.predict_only(data)
+            self.valid_result_prob = self.predict_proba(dataset, "val")
+            self.valid_score = self.evaluate(dataset, mask="val", feval=self.feval)
+
+    def predict(self, dataset, mask=None):
+        """
+        The function of predicting on the given dataset.
+
+        Parameters
+        ----------
+        dataset: The link prediction dataset used to be predicted.
+
+        mask: ``train``, ``val``, or ``test``.
+            The dataset mask.
+
+        Returns
+        -------
+        The prediction result of ``predict_proba``.
+        """
+        return self.predict_proba(dataset, mask=mask, in_log_format=False)
+
+    def predict_proba(self, dataset, mask=None, in_log_format=False):
+        """
+        The function of predicting the probability on the given dataset.
+
+        Parameters
+        ----------
+        dataset: The link prediction dataset used to be predicted.
+
+        mask: ``train``, ``val``, or ``test``.
+            The dataset mask.
+
+        in_log_format: ``bool``.
+            If True(False), the probability will (not) be log format.
+
+        Returns
+        -------
+        The prediction result.
+        """
+        data = dataset[0]
+        data = data.to(self.device)
+        if mask in ["train", "val", "test"]:
+            pos_edge_index = data[f"{mask}_pos_edge_index"]
+            neg_edge_index = data[f"{mask}_neg_edge_index"]
+        else:
+            pos_edge_index = data[f"test_pos_edge_index"]
+            neg_edge_index = data[f"test_neg_edge_index"]
+
+        self.model.model.eval()
+        with torch.no_grad():
+            z = self.predict_only(data)
+            link_logits = self.model.model.lp_decode(z, pos_edge_index, neg_edge_index)
+            link_probs = link_logits.sigmoid()
+
+        return link_probs
+
+    def get_valid_predict(self):
+        # """Get the valid result."""
+        return self.valid_result
+
+    def get_valid_predict_proba(self):
+        # """Get the valid result (prediction probability)."""
+        return self.valid_result_prob
+
+    def get_valid_score(self, return_major=True):
+        """
+        The function of getting the valid score.
+
+        Parameters
+        ----------
+        return_major: ``bool``.
+            If True, the return only consists of the major result.
+            If False, the return consists of the all results.
+
+        Returns
+        -------
+        result: The valid score in training stage.
+        """
+        if isinstance(self.feval, list):
+            if return_major:
+                return self.valid_score[0], self.feval[0].is_higher_better()
+            else:
+                return self.valid_score, [f.is_higher_better() for f in self.feval]
+        else:
+            return self.valid_score, self.feval.is_higher_better()
+
+    def get_name_with_hp(self):
+        # """Get the name of hyperparameter."""
+        name = "-".join(
+            [
+                str(self.optimizer),
+                str(self.lr),
+                str(self.max_epoch),
+                str(self.early_stopping_round),
+                str(self.model),
+                str(self.device),
+            ]
+        )
+        name = (
+            name
+            + "|"
+            + "-".join(
+                [
+                    str(x[0]) + "-" + str(x[1])
+                    for x in self.model.get_hyper_parameter().items()
+                ]
+            )
+        )
+        return name
+
+    def evaluate(self, dataset, mask=None, feval=None):
+        """
+        The function of training on the given dataset and keeping valid result.
+
+        Parameters
+        ----------
+        dataset: The link prediction dataset used to be evaluated.
+
+        mask: ``train``, ``val``, or ``test``.
+            The dataset mask.
+
+        feval: ``str``.
+            The evaluation method used in this function.
+
+        Returns
+        -------
+        res: The evaluation result on the given dataset.
+
+        """
+        data = dataset[0]
+        data = data.to(self.device)
+        test_mask = mask
+        if feval is None:
+            feval = self.feval
+        else:
+            feval = get_feval(feval)
+
+        if mask in ["train", "val", "test"]:
+            pos_edge_index = data[f"{mask}_pos_edge_index"]
+            neg_edge_index = data[f"{mask}_neg_edge_index"]
+        else:
+            pos_edge_index = data[f"test_pos_edge_index"]
+            neg_edge_index = data[f"test_neg_edge_index"]
+
+        self.model.model.eval()
+        with torch.no_grad():
+            link_probs = self.predict_proba(dataset, mask)
+            link_labels = self.get_link_labels(pos_edge_index, neg_edge_index)
+
+        if not isinstance(feval, list):
+            feval = [feval]
+            return_signle = True
+        else:
+            return_signle = False
+
+        res = []
+        for f in feval:
+            res.append(f.evaluate(link_probs.cpu().numpy(), link_labels.cpu().numpy()))
+        if return_signle:
+            return res[0]
+        return res
+
+    def to(self, new_device):
+        assert isinstance(new_device, torch.device)
+        self.device = new_device
+        if self.model is not None:
+            self.model.to(self.device)
+
+    def duplicate_from_hyper_parameter(self, hp: dict, model=None, restricted=True):
+        """
+        The function of duplicating a new instance from the given hyperparameter.
+
+        Parameters
+        ----------
+        hp: ``dict``.
+            The hyperparameter used in the new instance.
+
+        model: The model used in the new instance of trainer.
+
+        restricted: ``bool``.
+            If False(True), the hyperparameter should (not) be updated from origin hyperparameter.
+
+        Returns
+        -------
+        self: ``autogl.train.LinkPredictionTrainer``
+            A new instance of trainer.
+
+        """
+        if not restricted:
+            origin_hp = deepcopy(self.hyperparams)
+            origin_hp.update(hp)
+            hp = origin_hp
+        if model is None:
+            model = self.model
+        model.set_num_classes(self.num_classes)
+        model.set_num_features(self.num_features)
+        model = model.from_hyper_parameter(
+            dict(
+                [
+                    x
+                    for x in hp.items()
+                    if x[0] in [y["parameterName"] for y in model.space]
+                ]
+            )
+        )
+
+        ret = self.__class__(
+            model=model,
+            num_features=self.num_features,
+            optimizer=self.optimizer,
+            lr=hp["lr"],
+            max_epoch=hp["max_epoch"],
+            early_stopping_round=hp["early_stopping_round"],
+            device=self.device,
+            weight_decay=hp["weight_decay"],
+            feval=self.feval,
+            init=True,
+            *self.args,
+            **self.kwargs,
+        )
+
+        return ret
+
+    def set_feval(self, feval):
+        # """Set the evaluation metrics."""
+        self.feval = get_feval(feval)
+
+    @property
+    def hyper_parameter_space(self):
+        # """Get the space of hyperparameter."""
+        return self.space
+
+    @hyper_parameter_space.setter
+    def hyper_parameter_space(self, space):
+        # """Set the space of hyperparameter."""
+        self.space = space
+        LinkPredictionTrainer.space = space
+
+    def get_hyper_parameter(self):
+        # """Get the hyperparameter in this trainer."""
+        return self.hyperparams
+
+    def get_link_labels(self, pos_edge_index, neg_edge_index):
+        E = pos_edge_index.size(1) + neg_edge_index.size(1)
+        link_labels = torch.zeros(E, dtype=torch.float, device=self.device)
+        link_labels[: pos_edge_index.size(1)] = 1.0
+        return link_labels
diff --git a/autogl/module/train/node_classification_full.py b/autogl/module/train/node_classification_full.py
index 0fca8c7..46315b5 100644
--- a/autogl/module/train/node_classification_full.py
+++ b/autogl/module/train/node_classification_full.py
@@ -14,6 +14,7 @@ from torch.optim.lr_scheduler import (
 )
 import torch.nn.functional as F
 from ..model import MODEL_DICT, BaseModel
+from ..model.base import ClassificationSupportedSequentialModel
 from .evaluation import get_feval, Logloss
 from typing import Union
 from copy import deepcopy
@@ -216,7 +217,10 @@ class NodeClassificationFullTrainer(BaseNodeClassificationTrainer):
         for epoch in range(1, self.max_epoch):
             self.model.model.train()
             optimizer.zero_grad()
-            res = self.model.model.forward(data)
+            if isinstance(self.model.model, ClassificationSupportedSequentialModel):
+                res = self.model.model.cls_forward(data)
+            else:
+                res = self.model.model.forward(data)
             if hasattr(F, self.loss):
                 loss = getattr(F, self.loss)(res[mask], data.y[mask])
             else:
@@ -262,7 +266,10 @@ class NodeClassificationFullTrainer(BaseNodeClassificationTrainer):
         data = data.to(self.device)
         self.model.model.eval()
         with torch.no_grad():
-            res = self.model.model.forward(data)
+            if isinstance(self.model.model, ClassificationSupportedSequentialModel):
+                res = self.model.model.cls_forward(data)
+            else:
+                res = self.model.model.forward(data)
         return res
 
     def train(self, dataset, keep_valid_result=True):
@@ -373,19 +380,18 @@ class NodeClassificationFullTrainer(BaseNodeClassificationTrainer):
         else:
             return self.valid_score, self.feval.is_higher_better()
 
-    def __repr__(self) -> dict:
-        return {
-            "trainer_name": self.__class__.__name__,
-            "optimizer": self.optimizer,
-            "learning_rate": self.lr,
-            "max_epoch": self.max_epoch,
-            "early_stopping_round": self.early_stopping_round,
-            "model": repr(self.model)
-        }
-
-    def __str__(self) -> str:
+    def __repr__(self) -> str:
         import yaml
-        return yaml.dump(repr(self))
+        return yaml.dump(
+            {
+                "trainer_name": self.__class__.__name__,
+                "optimizer": self.optimizer,
+                "learning_rate": self.lr,
+                "max_epoch": self.max_epoch,
+                "early_stopping_round": self.early_stopping_round,
+                "model": repr(self.model)
+            }
+        )
 
     def evaluate(self, dataset, mask=None, feval=None):
         """
diff --git a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
index 9c0fa7c..dac3792 100644
--- a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
+++ b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
@@ -16,7 +16,7 @@ from ..sampling.sampler.layer_dependent_importance_sampler import (
     LayerDependentImportanceSampler
 )
 from ...model import BaseModel
-from ...model.base import SequentialGraphNeuralNetwork
+from ...model.base import ClassificationSupportedSequentialModel
 
 LOGGER: logging.Logger = logging.getLogger("Node classification sampling trainer")
 
@@ -266,9 +266,11 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
                 )
             )
         else:
-            _sampler: torch_geometric.data.GraphSAINTEdgeSampler = (
-                GraphSAINTSamplerFactory.create_edge_sampler(
-                    integral_data, self.__num_graphs_per_epoch, self.__sampled_budget,
+            _sampler: torch_geometric.data.GraphSAINTRandomWalkSampler = (
+                GraphSAINTSamplerFactory.create_random_walk_sampler(
+                    integral_data, self.__num_graphs_per_epoch,
+                    self.__sampled_budget, self.__walk_length,
+                    self.__sample_coverage_factor,
                     num_workers=self.__training_sampler_num_workers
                 )
             )
@@ -284,7 +286,10 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
                     getattr(sampled_data, "edge_norm") * getattr(sampled_data, "edge_weight")
                 )
                 optimizer.zero_grad()
-                prediction: torch.Tensor = self.model.model(sampled_data)
+                if isinstance(self.model.model, ClassificationSupportedSequentialModel):
+                    prediction: torch.Tensor = self.model.model.cls_forward(sampled_data)
+                else:
+                    prediction: torch.Tensor = self.model.model(sampled_data)
                 if not hasattr(torch.nn.functional, self.loss):
                     raise TypeError(
                         f"PyTorch does not support loss type {self.loss}"
@@ -342,7 +347,10 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         )
         integral_data = integral_data.to(self.device)
         with torch.no_grad():
-            prediction = self.model.model(integral_data)
+            if isinstance(self.model.model, ClassificationSupportedSequentialModel):
+                prediction: torch.Tensor = self.model.model.cls_forward(integral_data)
+            else:
+                prediction: torch.Tensor = self.model.model(integral_data)
         return prediction[mask_or_target_nodes_indexes]
 
     def predict_proba(
@@ -703,7 +711,10 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
                     current_layer.edge_weight.to(self.device)
                     for current_layer in sampled_data.sampled_edges_for_layers
                 ]
-                prediction: torch.Tensor = self.model.model(sampled_graph)
+                if isinstance(self.model.model, ClassificationSupportedSequentialModel):
+                    prediction: torch.Tensor = self.model.model.cls_forward(sampled_graph)
+                else:
+                    prediction: torch.Tensor = self.model.model(sampled_graph)
                 if not hasattr(torch.nn.functional, self.loss):
                     raise TypeError(
                         f"PyTorch does not support loss type {self.loss}"
@@ -757,8 +768,8 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
         self.model.model.eval()
         integral_data = integral_data.to(torch.device("cpu"))
         mask_or_target_nodes_indexes = mask_or_target_nodes_indexes.to(torch.device("cpu"))
-        if isinstance(self.model.model, SequentialGraphNeuralNetwork):
-            sequential_gnn_model: SequentialGraphNeuralNetwork = self.model.model
+        if isinstance(self.model.model, ClassificationSupportedSequentialModel):
+            sequential_gnn_model: ClassificationSupportedSequentialModel = self.model.model
             __num_layers: int = len(self.__sampled_node_sizes)
 
             x: torch.Tensor = getattr(integral_data, "x")
@@ -798,7 +809,7 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
 
                     with torch.no_grad():
                         __sampled_graph_inferences: torch.Tensor = (
-                            sequential_gnn_model.encoder_sequential_modules[_current_layer_index](_sampled_graph)
+                            sequential_gnn_model.sequential_encoding_layers[_current_layer_index](_sampled_graph)
                         )
                         _sampled_target_nodes_inferences: torch.Tensor = __sampled_graph_inferences[
                             _target_dependant_sampled_data.target_nodes_indexes.indexes_in_sampled_graph
@@ -848,8 +859,8 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
                 with torch.no_grad():
                     prediction_batch_cumulative_builder.add_batch(
                         _target_dependant_sampled_data.target_nodes_indexes.indexes_in_integral_graph.cpu().numpy(),
-                        sequential_gnn_model.decode(
-                            sequential_gnn_model.encoder_sequential_modules[-1](_sampled_graph)
+                        sequential_gnn_model.cls_decode(
+                            sequential_gnn_model.sequential_encoding_layers[-1](_sampled_graph)
                         )[_target_dependant_sampled_data.target_nodes_indexes.indexes_in_sampled_graph].cpu().numpy()
                     )
             return torch.from_numpy(prediction_batch_cumulative_builder.compose()[1])
@@ -1219,7 +1230,10 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
                     current_layer.edge_index_for_sampled_graph.to(self.device)
                     for current_layer in sampled_data.sampled_edges_for_layers
                 ]
-                prediction: torch.Tensor = self.model.model(sampled_graph)
+                if isinstance(self.model.model, ClassificationSupportedSequentialModel):
+                    prediction: torch.Tensor = self.model.model.cls_forward(sampled_graph)
+                else:
+                    prediction: torch.Tensor = self.model.model(sampled_graph)
                 if not hasattr(torch.nn.functional, self.loss):
                     raise TypeError(
                         f"PyTorch does not support loss type {self.loss}"
@@ -1273,8 +1287,8 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         self.model.model.eval()
         integral_data = integral_data.to(torch.device("cpu"))
         mask_or_target_nodes_indexes = mask_or_target_nodes_indexes.to(torch.device("cpu"))
-        if isinstance(self.model.model, SequentialGraphNeuralNetwork):
-            sequential_gnn_model: SequentialGraphNeuralNetwork = self.model.model
+        if isinstance(self.model.model, ClassificationSupportedSequentialModel):
+            sequential_gnn_model: ClassificationSupportedSequentialModel = self.model.model
             __num_layers: int = len(self.__sampling_sizes)
 
             x: torch.Tensor = getattr(integral_data, "x")
@@ -1312,7 +1326,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
 
                     with torch.no_grad():
                         __sampled_graph_inferences: torch.Tensor = (
-                            sequential_gnn_model.encoder_sequential_modules[_current_layer_index](_sampled_graph)
+                            sequential_gnn_model.sequential_encoding_layers[_current_layer_index](_sampled_graph)
                         )
                         _sampled_target_nodes_inferences: torch.Tensor = __sampled_graph_inferences[
                             _target_dependant_sampled_data.target_nodes_indexes.indexes_in_sampled_graph
@@ -1359,8 +1373,8 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
                 with torch.no_grad():
                     prediction_batch_cumulative_builder.add_batch(
                         _target_dependant_sampled_data.target_nodes_indexes.indexes_in_integral_graph.cpu().numpy(),
-                        sequential_gnn_model.decode(
-                            sequential_gnn_model.encoder_sequential_modules[-1](_sampled_graph)
+                        sequential_gnn_model.cls_decode(
+                            sequential_gnn_model.sequential_encoding_layers[-1](_sampled_graph)
                         )[_target_dependant_sampled_data.target_nodes_indexes.indexes_in_sampled_graph].cpu().numpy()
                     )
             return torch.from_numpy(prediction_batch_cumulative_builder.compose()[1])
diff --git a/autogl/solver/__init__.py b/autogl/solver/__init__.py
index 7fce9f4..1cba0b1 100644
--- a/autogl/solver/__init__.py
+++ b/autogl/solver/__init__.py
@@ -2,5 +2,12 @@
 Auto solver for various graph tasks
 """
 
-from .classifier import AutoGraphClassifier, AutoNodeClassifier
+from .classifier import AutoGraphClassifier, AutoNodeClassifier, AutoLinkPredictor
 from .utils import LeaderBoard
+
+__all__ = [
+    "AutoNodeClassifier",
+    "AutoGraphClassifier",
+    "AutoLinkPredictor",
+    "LeaderBoard",
+]
diff --git a/autogl/solver/classifier/__init__.py b/autogl/solver/classifier/__init__.py
index fc74cd6..e30c582 100644
--- a/autogl/solver/classifier/__init__.py
+++ b/autogl/solver/classifier/__init__.py
@@ -5,5 +5,11 @@ Auto classifier for classification problems.
 from .base import BaseClassifier
 from .graph_classifier import AutoGraphClassifier
 from .node_classifier import AutoNodeClassifier
+from .link_predictor import AutoLinkPredictor
 
-__all__ = ["BaseClassifier", "AutoGraphClassifier", "AutoNodeClassifier"]
+__all__ = [
+    "BaseClassifier",
+    "AutoGraphClassifier",
+    "AutoNodeClassifier",
+    "AutoLinkPredictor",
+]
diff --git a/autogl/solver/classifier/graph_classifier.py b/autogl/solver/classifier/graph_classifier.py
index a4c0674..f204b32 100644
--- a/autogl/solver/classifier/graph_classifier.py
+++ b/autogl/solver/classifier/graph_classifier.py
@@ -111,7 +111,7 @@ class AutoGraphClassifier(BaseClassifier):
     ) -> "AutoGraphClassifier":
         # load graph network module
         self.graph_model_list = []
-        if isinstance(graph_models, list):
+        if isinstance(graph_models, (list, tuple)):
             for model in graph_models:
                 if isinstance(model, str):
                     if model in MODEL_DICT:
@@ -371,9 +371,8 @@ class AutoGraphClassifier(BaseClassifier):
                 result_valid.append(
                     optimized.get_valid_predict_proba().detach().cpu().numpy()
                 )
-                self.leaderboard.add_performance(
+                self.leaderboard.insert_model_performance(
                     name,
-                    repr(optimized),
                     dict(
                         zip(
                             [e.get_eval_name() for e in evaluator_list],
@@ -420,8 +419,8 @@ class AutoGraphClassifier(BaseClassifier):
                         .cpu()
                         .numpy()
                     )
-                    self.leaderboard.add_performance(
-                        name, repr(optimized),
+                    self.leaderboard.insert_model_performance(
+                        name,
                         dict(
                             zip(
                                 [e.get_eval_name() for e in evaluator_list],
@@ -440,8 +439,8 @@ class AutoGraphClassifier(BaseClassifier):
                 evaluator_list,
                 n_classes=dataset.num_classes,
             )
-            self.leaderboard.add_performance(
-                "ensemble", "ensemble",
+            self.leaderboard.insert_model_performance(
+                "ensemble",
                 dict(zip([e.get_eval_name() for e in evaluator_list], performance)),
             )
 
diff --git a/autogl/solver/classifier/link_predictor.py b/autogl/solver/classifier/link_predictor.py
new file mode 100644
index 0000000..17d6228
--- /dev/null
+++ b/autogl/solver/classifier/link_predictor.py
@@ -0,0 +1,747 @@
+"""
+Auto Classfier for Node Classification
+"""
+import time
+import json
+
+from copy import deepcopy
+
+import torch
+import numpy as np
+import yaml
+
+from .base import BaseClassifier
+from ..base import _parse_hp_space, _initialize_single_model
+from ...module.feature import FEATURE_DICT
+from ...module.model import MODEL_DICT, BaseModel
+from ...module.train import TRAINER_DICT, BaseLinkPredictionTrainer
+from ...module.train import get_feval
+from ..utils import LeaderBoard, set_seed
+from ...datasets import utils
+from ...utils import get_logger
+
+LOGGER = get_logger("LinkPredictor")
+
+
+class AutoLinkPredictor(BaseClassifier):
+    """
+    Auto Link Predictor.
+
+    Used to automatically solve the link prediction problems.
+
+    Parameters
+    ----------
+    feature_module: autogl.module.feature.BaseFeatureEngineer or str or None
+        The (name of) auto feature engineer used to process the given dataset. Default ``deepgl``.
+        Disable feature engineer by setting it to ``None``.
+
+    graph_models: list of autogl.module.model.BaseModel or list of str
+        The (name of) models to be optimized as backbone. Default ``['gat', 'gcn']``.
+
+    hpo_module: autogl.module.hpo.BaseHPOptimizer or str or None
+        The (name of) hpo module used to search for best hyper parameters. Default ``anneal``.
+        Disable hpo by setting it to ``None``.
+
+    ensemble_module: autogl.module.ensemble.BaseEnsembler or str or None
+        The (name of) ensemble module used to ensemble the multi-models found. Default ``voting``.
+        Disable ensemble by setting it to ``None``.
+
+    max_evals: int (Optional)
+        If given, will set the number eval times the hpo module will use.
+        Only be effective when hpo_module is ``str``. Default ``None``.
+
+    trainer_hp_space: list of dict (Optional)
+        trainer hp space or list of trainer hp spaces configuration.
+        If a single trainer hp is given, will specify the hp space of trainer for every model.
+        If a list of trainer hp is given, will specify every model with corrsponding
+        trainer hp space.
+        Default ``None``.
+
+    model_hp_spaces: list of list of dict (Optional)
+        model hp space configuration.
+        If given, will specify every hp space of every passed model. Default ``None``.
+
+    size: int (Optional)
+        The max models ensemble module will use. Default ``None``.
+
+    device: torch.device or str
+        The device where model will be running on. If set to ``auto``, will use gpu when available.
+        You can also specify the device by directly giving ``gpu`` or ``cuda:0``, etc.
+        Default ``auto``.
+    """
+
+    def __init__(
+        self,
+        feature_module=None,
+        graph_models=("gat", "gcn"),
+        hpo_module="anneal",
+        ensemble_module="voting",
+        max_evals=50,
+        default_trainer=None,
+        trainer_hp_space=None,
+        model_hp_spaces=None,
+        size=4,
+        device="auto",
+    ):
+
+        super().__init__(
+            feature_module=feature_module,
+            graph_models=graph_models,
+            hpo_module=hpo_module,
+            ensemble_module=ensemble_module,
+            max_evals=max_evals,
+            default_trainer=default_trainer or "LinkPredictionFull",
+            trainer_hp_space=trainer_hp_space,
+            model_hp_spaces=model_hp_spaces,
+            size=size,
+            device=device,
+        )
+
+        # data to be kept when fit
+        self.dataset = None
+
+    def _init_graph_module(
+        self, graph_models, num_features, feval, device, loss
+    ) -> "AutoLinkPredictor":
+        # load graph network module
+        self.graph_model_list = []
+        if isinstance(graph_models, (list, tuple)):
+            for model in graph_models:
+                if isinstance(model, str):
+                    if model in MODEL_DICT:
+                        self.graph_model_list.append(
+                            MODEL_DICT[model](
+                                num_classes=1,
+                                num_features=num_features,
+                                device=device,
+                                init=False,
+                            )
+                        )
+                    else:
+                        raise KeyError("cannot find model %s" % (model))
+                elif isinstance(model, type) and issubclass(model, BaseModel):
+                    self.graph_model_list.append(
+                        model(
+                            num_classes=1,
+                            num_features=num_features,
+                            device=device,
+                            init=False,
+                        )
+                    )
+                elif isinstance(model, BaseModel):
+                    # setup the hp of num_classes and num_features
+                    model.set_num_classes(1)
+                    model.set_num_features(num_features)
+                    self.graph_model_list.append(model.to(device))
+                elif isinstance(model, BaseLinkPredictionTrainer):
+                    # receive a trainer list, put trainer to list
+                    assert (
+                        model.get_model() is not None
+                    ), "Passed trainer should contain a model"
+                    model.model.set_num_classes(1)
+                    model.model.set_num_features(num_features)
+                    model.update_parameters(
+                        num_classes=1,
+                        num_features=num_features,
+                        loss=loss,
+                        feval=feval,
+                        device=device,
+                    )
+                    self.graph_model_list.append(model)
+                else:
+                    raise KeyError("cannot find graph network %s." % (model))
+        else:
+            raise ValueError(
+                "need graph network to be (list of) str or a BaseModel class/instance, get",
+                graph_models,
+                "instead.",
+            )
+
+        # wrap all model_cls with specified trainer
+        for i, model in enumerate(self.graph_model_list):
+            # set model hp space
+            if self._model_hp_spaces is not None:
+                if self._model_hp_spaces[i] is not None:
+                    if isinstance(model, BaseLinkPredictionTrainer):
+                        model.model.hyper_parameter_space = self._model_hp_spaces[i]
+                    else:
+                        model.hyper_parameter_space = self._model_hp_spaces[i]
+            # initialize trainer if needed
+            if isinstance(model, BaseModel):
+                name = (
+                    self._default_trainer
+                    if isinstance(self._default_trainer, str)
+                    else self._default_trainer[i]
+                )
+                model = TRAINER_DICT[name](
+                    model=model,
+                    num_features=num_features,
+                    loss=loss,
+                    feval=feval,
+                    device=device,
+                    init=False,
+                )
+            # set trainer hp space
+            if self._trainer_hp_space is not None:
+                if isinstance(self._trainer_hp_space[0], list):
+                    current_hp_for_trainer = self._trainer_hp_space[i]
+                else:
+                    current_hp_for_trainer = self._trainer_hp_space
+                model.hyper_parameter_space = current_hp_for_trainer
+            self.graph_model_list[i] = model
+
+        return self
+
+    def _to_prob(self, sig_prob: np.ndarray):
+        nelements = len(sig_prob)
+        prob = np.zeros([nelements, 2])
+        prob[:, 0] = 1 - sig_prob
+        prob[:, 1] = sig_prob
+        return prob
+
+    # pylint: disable=arguments-differ
+    def fit(
+        self,
+        dataset,
+        time_limit=-1,
+        inplace=False,
+        train_split=None,
+        val_split=None,
+        evaluation_method="infer",
+        seed=None,
+    ) -> "AutoLinkPredictor":
+        """
+        Fit current solver on given dataset.
+
+        Parameters
+        ----------
+        dataset: torch_geometric.data.dataset.Dataset
+            The dataset needed to fit on. This dataset must have only one graph.
+
+        time_limit: int
+            The time limit of the whole fit process (in seconds). If set below 0,
+            will ignore time limit. Default ``-1``.
+
+        inplace: bool
+            Whether we process the given dataset in inplace manner. Default ``False``.
+            Set it to True if you want to save memory by modifying the given dataset directly.
+
+        train_split: float or int (Optional)
+            The train ratio (in ``float``) or number (in ``int``) of dataset. If you want to
+            use default train/val/test split in dataset, please set this to ``None``.
+            Default ``None``.
+
+        val_split: float or int (Optional)
+            The validation ratio (in ``float``) or number (in ``int``) of dataset. If you want
+            to use default train/val/test split in dataset, please set this to ``None``.
+            Default ``None``.
+
+        evaluation_method: (list of) str or autogl.module.train.evaluation
+            A (list of) evaluation method for current solver. If ``infer``, will automatically
+            determine. Default ``infer``.
+
+        seed: int (Optional)
+            The random seed. If set to ``None``, will run everything at random.
+            Default ``None``.
+
+        Returns
+        -------
+        self: autogl.solver.AutoNodeClassifier
+            A reference of current solver.
+        """
+        set_seed(seed)
+
+        if time_limit < 0:
+            time_limit = 3600 * 24
+        time_begin = time.time()
+
+        # initialize leaderboard
+        if evaluation_method == "infer":
+            if hasattr(dataset, "metric"):
+                evaluation_method = [dataset.metric]
+            else:
+                num_of_label = dataset.num_classes
+                if num_of_label == 2:
+                    evaluation_method = ["auc"]
+                else:
+                    evaluation_method = ["acc"]
+        assert isinstance(evaluation_method, list)
+        evaluator_list = get_feval(evaluation_method)
+
+        self.leaderboard = LeaderBoard(
+            [e.get_eval_name() for e in evaluator_list],
+            {e.get_eval_name(): e.is_higher_better() for e in evaluator_list},
+        )
+
+        # set up the dataset
+        if train_split is not None and val_split is not None:
+            utils.split_edges(dataset, train_split, val_split)
+        else:
+            assert all(
+                [
+                    hasattr(dataset.data, f"{name}")
+                    for name in [
+                        "train_pos_edge_index",
+                        "train_neg_adj_mask",
+                        "val_pos_edge_index",
+                        "val_neg_edge_index",
+                        "test_pos_edge_index",
+                        "test_neg_edge_index",
+                    ]
+                ]
+            ), (
+                "The dataset has no default train/val split! Please manually pass "
+                "train and val ratio."
+            )
+            LOGGER.info("Use the default train/val/test ratio in given dataset")
+
+        # feature engineering
+        if self.feature_module is not None:
+            dataset = self.feature_module.fit_transform(dataset, inplace=inplace)
+
+        self.dataset = dataset
+        assert self.dataset[0].x is not None, (
+            "Does not support fit on non node-feature dataset!"
+            " Please add node features to dataset or specify feature engineers that generate"
+            " node features."
+        )
+
+        # initialize graph networks
+        self._init_graph_module(
+            self.gml,
+            num_features=self.dataset[0].x.shape[1],
+            feval=evaluator_list,
+            device=self.runtime_device,
+            loss="binary_cross_entropy_with_logits"
+            if not hasattr(dataset, "loss")
+            else dataset.loss,
+        )
+
+        # train the models and tune hpo
+        result_valid = []
+        names = []
+        for idx, model in enumerate(self.graph_model_list):
+            time_for_each_model = (time_limit - time.time() + time_begin) / (
+                len(self.graph_model_list) - idx
+            )
+            if self.hpo_module is None:
+                model.initialize()
+                model.train(self.dataset, True)
+                optimized = model
+            else:
+                optimized, _ = self.hpo_module.optimize(
+                    trainer=model, dataset=self.dataset, time_limit=time_for_each_model
+                )
+            # to save memory, all the trainer derived will be mapped to cpu
+            optimized.to(torch.device("cpu"))
+            name = optimized.get_name_with_hp() + "_idx%d" % (idx)
+            names.append(name)
+            performance_on_valid, _ = optimized.get_valid_score(return_major=False)
+            result_valid.append(
+                self._to_prob(optimized.get_valid_predict_proba().cpu().numpy())
+            )
+            self.leaderboard.insert_model_performance(
+                name,
+                dict(
+                    zip(
+                        [e.get_eval_name() for e in evaluator_list],
+                        performance_on_valid,
+                    )
+                ),
+            )
+            self.trained_models[name] = optimized
+
+        # fit the ensemble model
+        if self.ensemble_module is not None:
+            pos_edge_index, neg_edge_index = (
+                self.dataset[0].val_pos_edge_index,
+                self.dataset[0].val_neg_edge_index,
+            )
+            E = pos_edge_index.size(1) + neg_edge_index.size(1)
+            link_labels = torch.zeros(E, dtype=torch.float)
+            link_labels[: pos_edge_index.size(1)] = 1.0
+
+            performance = self.ensemble_module.fit(
+                result_valid,
+                link_labels.detach().cpu().numpy(),
+                names,
+                evaluator_list,
+                n_classes=dataset.num_classes,
+            )
+            self.leaderboard.insert_model_performance(
+                "ensemble",
+                dict(zip([e.get_eval_name() for e in evaluator_list], performance)),
+            )
+
+        return self
+
+    def fit_predict(
+        self,
+        dataset,
+        time_limit=-1,
+        inplace=False,
+        train_split=None,
+        val_split=None,
+        evaluation_method="infer",
+        use_ensemble=True,
+        use_best=True,
+        name=None,
+    ) -> np.ndarray:
+        """
+        Fit current solver on given dataset and return the predicted value.
+
+        Parameters
+        ----------
+        dataset: torch_geometric.data.dataset.Dataset
+            The dataset needed to fit on. This dataset must have only one graph.
+
+        time_limit: int
+            The time limit of the whole fit process (in seconds).
+            If set below 0, will ignore time limit. Default ``-1``.
+
+        inplace: bool
+            Whether we process the given dataset in inplace manner. Default ``False``.
+            Set it to True if you want to save memory by modifying the given dataset directly.
+
+        train_split: float or int (Optional)
+            The train ratio (in ``float``) or number (in ``int``) of dataset. If you want to
+            use default train/val/test split in dataset, please set this to ``None``.
+            Default ``None``.
+
+        val_split: float or int (Optional)
+            The validation ratio (in ``float``) or number (in ``int``) of dataset. If you want
+            to use default train/val/test split in dataset, please set this to ``None``.
+            Default ``None``.
+
+        balanced: bool
+            Wether to create the train/valid/test split in a balanced way.
+            If set to ``True``, the train/valid will have the same number of different classes.
+            Default ``False``.
+
+        evaluation_method: (list of) str or autogl.module.train.evaluation
+            A (list of) evaluation method for current solver. If ``infer``, will automatically
+            determine. Default ``infer``.
+
+        use_ensemble: bool
+            Whether to use ensemble to do the predict. Default ``True``.
+
+        use_best: bool
+            Whether to use the best single model to do the predict. Will only be effective when
+            ``use_ensemble`` is ``False``.
+            Default ``True``.
+
+        name: str or None
+            The name of model used to predict. Will only be effective when ``use_ensemble`` and
+            ``use_best`` both are ``False``.
+            Default ``None``.
+
+        Returns
+        -------
+        result: np.ndarray
+            An array of shape ``(N,)``, where ``N`` is the number of test nodes. The prediction
+            on given dataset.
+        """
+        self.fit(
+            dataset=dataset,
+            time_limit=time_limit,
+            inplace=inplace,
+            train_split=train_split,
+            val_split=val_split,
+            evaluation_method=evaluation_method,
+        )
+        return self.predict(
+            dataset=dataset,
+            inplaced=inplace,
+            inplace=inplace,
+            use_ensemble=use_ensemble,
+            use_best=use_best,
+            name=name,
+        )
+
+    def predict_proba(
+        self,
+        dataset=None,
+        inplaced=False,
+        inplace=False,
+        use_ensemble=True,
+        use_best=True,
+        name=None,
+        mask="test",
+    ) -> np.ndarray:
+        """
+        Predict the node probability.
+
+        Parameters
+        ----------
+        dataset: torch_geometric.data.dataset.Dataset or None
+            The dataset needed to predict. If ``None``, will use the processed dataset passed
+            to ``fit()`` instead. Default ``None``.
+
+        inplaced: bool
+            Whether the given dataset is processed. Only be effective when ``dataset``
+            is not ``None``. If you pass the dataset to ``fit()`` with ``inplace=True``, and
+            you pass the dataset again to this method, you should set this argument to ``True``.
+            Otherwise ``False``. Default ``False``.
+
+        inplace: bool
+            Whether we process the given dataset in inplace manner. Default ``False``. Set it to
+            True if you want to save memory by modifying the given dataset directly.
+
+        use_ensemble: bool
+            Whether to use ensemble to do the predict. Default ``True``.
+
+        use_best: bool
+            Whether to use the best single model to do the predict. Will only be effective when
+            ``use_ensemble`` is ``False``. Default ``True``.
+
+        name: str or None
+            The name of model used to predict. Will only be effective when ``use_ensemble`` and
+            ``use_best`` both are ``False``. Default ``None``.
+
+        mask: str
+            The data split to give prediction on. Default ``test``.
+
+        Returns
+        -------
+        result: np.ndarray
+            An array of shape ``(N,C,)``, where ``N`` is the number of test nodes and ``C`` is
+            the number of classes. The prediction on given dataset.
+        """
+        if dataset is None:
+            dataset = self.dataset
+            assert dataset is not None, (
+                "Please execute fit() first before" " predicting on remembered dataset"
+            )
+        elif not inplaced and self.feature_module is not None:
+            dataset = self.feature_module.transform(dataset, inplace=inplace)
+
+        if use_ensemble:
+            LOGGER.info("Ensemble argument on, will try using ensemble model.")
+
+        if not use_ensemble and use_best:
+            LOGGER.info(
+                "Ensemble argument off and best argument on, will try using best model."
+            )
+
+        if (use_ensemble and self.ensemble_module is not None) or (
+            not use_best and name == "ensemble"
+        ):
+            # we need to get all the prediction of every model trained
+            predict_result = []
+            names = []
+            for model_name in self.trained_models:
+                predict_result.append(
+                    self._to_prob(
+                        self._predict_proba_by_name(dataset, model_name, mask)
+                    )
+                )
+                names.append(model_name)
+            return self.ensemble_module.ensemble(predict_result, names)[:, 1]
+
+        if use_ensemble and self.ensemble_module is None:
+            LOGGER.warning(
+                "Cannot use ensemble because no ensebmle module is given. "
+                "Will use best model instead."
+            )
+
+        if use_best or (use_ensemble and self.ensemble_module is None):
+            # just return the best model we have found
+            name = self.leaderboard.get_best_model()
+            return self._predict_proba_by_name(dataset, name, mask)
+
+        if name is not None:
+            # return model performance by name
+            return self._predict_proba_by_name(dataset, name, mask)
+
+        LOGGER.error(
+            "No model name is given while ensemble and best arguments are off."
+        )
+        raise ValueError(
+            "You need to specify a model name if you do not want use ensemble and best model."
+        )
+
+    def _predict_proba_by_name(self, dataset, name, mask="test"):
+        self.trained_models[name].to(self.runtime_device)
+        predicted = (
+            self.trained_models[name].predict_proba(dataset, mask=mask).cpu().numpy()
+        )
+        self.trained_models[name].to(torch.device("cpu"))
+        return predicted
+
+    def predict(
+        self,
+        dataset=None,
+        inplaced=False,
+        inplace=False,
+        use_ensemble=True,
+        use_best=True,
+        name=None,
+        mask="test",
+        threshold=0.5,
+    ) -> np.ndarray:
+        """
+        Predict the node class number.
+
+        Parameters
+        ----------
+        dataset: torch_geometric.data.dataset.Dataset or None
+            The dataset needed to predict. If ``None``, will use the processed dataset passed
+            to ``fit()`` instead. Default ``None``.
+
+        inplaced: bool
+            Whether the given dataset is processed. Only be effective when ``dataset``
+            is not ``None``. If you pass the dataset to ``fit()`` with ``inplace=True``,
+            and you pass the dataset again to this method, you should set this argument
+            to ``True``. Otherwise ``False``. Default ``False``.
+
+        inplace: bool
+            Whether we process the given dataset in inplace manner. Default ``False``.
+            Set it to True if you want to save memory by modifying the given dataset directly.
+
+        use_ensemble: bool
+            Whether to use ensemble to do the predict. Default ``True``.
+
+        use_best: bool
+            Whether to use the best single model to do the predict. Will only be effective
+            when ``use_ensemble`` is ``False``. Default ``True``.
+
+        name: str or None
+            The name of model used to predict. Will only be effective when ``use_ensemble``
+            and ``use_best`` both are ``False``. Default ``None``.
+
+        mask: str
+            The data split to give prediction on. Default ``test``.
+
+        threshold: float
+            The threshold to judge whether the edges are positive or not.
+
+        Returns
+        -------
+        result: np.ndarray
+            An array of shape ``(N,)``, where ``N`` is the number of test nodes.
+            The prediction on given dataset.
+        """
+        proba = self.predict_proba(
+            dataset, inplaced, inplace, use_ensemble, use_best, name, mask
+        )
+        return (proba > threshold).astype("int")
+
+    @classmethod
+    def from_config(cls, path_or_dict, filetype="auto") -> "AutoLinkPredictor":
+        """
+        Load solver from config file.
+
+        You can use this function to directly load a solver from predefined config dict
+        or config file path. Currently, only support file type of ``json`` or ``yaml``,
+        if you pass a path.
+
+        Parameters
+        ----------
+        path_or_dict: str or dict
+            The path to the config file or the config dictionary object
+
+        filetype: str
+            The filetype the given file if the path is specified. Currently only support
+            ``json`` or ``yaml``. You can set to ``auto`` to automatically detect the file
+            type (from file name). Default ``auto``.
+
+        Returns
+        -------
+        solver: autogl.solver.AutoGraphClassifier
+            The solver that is created from given file or dictionary.
+        """
+        assert filetype in ["auto", "yaml", "json"], (
+            "currently only support yaml file or json file type, but get type "
+            + filetype
+        )
+        if isinstance(path_or_dict, str):
+            if filetype == "auto":
+                if path_or_dict.endswith(".yaml") or path_or_dict.endswith(".yml"):
+                    filetype = "yaml"
+                elif path_or_dict.endswith(".json"):
+                    filetype = "json"
+                else:
+                    LOGGER.error(
+                        "cannot parse the type of the given file name, "
+                        "please manually set the file type"
+                    )
+                    raise ValueError(
+                        "cannot parse the type of the given file name, "
+                        "please manually set the file type"
+                    )
+            if filetype == "yaml":
+                path_or_dict = yaml.load(
+                    open(path_or_dict, "r").read(), Loader=yaml.FullLoader
+                )
+            else:
+                path_or_dict = json.load(open(path_or_dict, "r"))
+
+        path_or_dict = deepcopy(path_or_dict)
+        solver = cls(None, [], None, None)
+        fe_list = path_or_dict.pop("feature", None)
+        if fe_list is not None:
+            fe_list_ele = []
+            for feature_engineer in fe_list:
+                name = feature_engineer.pop("name")
+                if name is not None:
+                    fe_list_ele.append(FEATURE_DICT[name](**feature_engineer))
+            if fe_list_ele != []:
+                solver.set_feature_module(fe_list_ele)
+
+        models = path_or_dict.pop("models", [{"name": "gcn"}, {"name": "gat"}])
+        model_hp_space = [
+            _parse_hp_space(model.pop("hp_space", None)) for model in models
+        ]
+        model_list = [
+            _initialize_single_model(model.pop("name"), model) for model in models
+        ]
+
+        trainer = path_or_dict.pop("trainer", None)
+        default_trainer = "LinkPredictionFull"
+        trainer_space = None
+        if isinstance(trainer, dict):
+            # global default
+            default_trainer = trainer.pop("name", "LinkPredictionFull")
+            trainer_space = _parse_hp_space(trainer.pop("hp_space", None))
+            default_kwargs = {"num_features": None}
+            default_kwargs.update(trainer)
+            default_kwargs["init"] = False
+            for i in range(len(model_list)):
+                model = model_list[i]
+                trainer_wrap = TRAINER_DICT[default_trainer](
+                    model=model, **default_kwargs
+                )
+                model_list[i] = trainer_wrap
+        elif isinstance(trainer, list):
+            # sequential trainer definition
+            assert len(trainer) == len(
+                model_list
+            ), "The number of trainer and model does not match"
+            trainer_space = []
+            for i in range(len(model_list)):
+                train, model = trainer[i], model_list[i]
+                default_trainer = train.pop("name", "LinkPredictionFull")
+                trainer_space.append(_parse_hp_space(train.pop("hp_space", None)))
+                default_kwargs = {"num_features": None}
+                default_kwargs.update(train)
+                default_kwargs["init"] = False
+                trainer_wrap = TRAINER_DICT[default_trainer](
+                    model=model, **default_kwargs
+                )
+                model_list[i] = trainer_wrap
+
+        solver.set_graph_models(
+            model_list, default_trainer, trainer_space, model_hp_space
+        )
+
+        hpo_dict = path_or_dict.pop("hpo", {"name": "anneal"})
+        if hpo_dict is not None:
+            name = hpo_dict.pop("name")
+            solver.set_hpo_module(name, **hpo_dict)
+
+        ensemble_dict = path_or_dict.pop("ensemble", {"name": "voting"})
+        if ensemble_dict is not None:
+            name = ensemble_dict.pop("name")
+            solver.set_ensemble_module(name, **ensemble_dict)
+
+        return solver
diff --git a/autogl/solver/classifier/node_classifier.py b/autogl/solver/classifier/node_classifier.py
index 18ffe6e..538978c 100644
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -105,7 +105,7 @@ class AutoNodeClassifier(BaseClassifier):
     ) -> "AutoNodeClassifier":
         # load graph network module
         self.graph_model_list = []
-        if isinstance(graph_models, list):
+        if isinstance(graph_models, (list, tuple)):
             for model in graph_models:
                 if isinstance(model, str):
                     if model in MODEL_DICT:
@@ -340,12 +340,12 @@ class AutoNodeClassifier(BaseClassifier):
                 )
             # to save memory, all the trainer derived will be mapped to cpu
             optimized.to(torch.device("cpu"))
-            name = str(optimized)
+            name = str(optimized) + "_idx%d" % (idx)
             names.append(name)
             performance_on_valid, _ = optimized.get_valid_score(return_major=False)
             result_valid.append(optimized.get_valid_predict_proba().cpu().numpy())
-            self.leaderboard.add_performance(
-                name, repr(optimized),
+            self.leaderboard.insert_model_performance(
+                name,
                 dict(
                     zip(
                         [e.get_eval_name() for e in evaluator_list],
@@ -364,8 +364,8 @@ class AutoNodeClassifier(BaseClassifier):
                 evaluator_list,
                 n_classes=dataset.num_classes,
             )
-            self.leaderboard.add_performance(
-                "ensemble", "ensemble",
+            self.leaderboard.insert_model_performance(
+                "ensemble",
                 dict(zip([e.get_eval_name() for e in evaluator_list], performance)),
             )
 
diff --git a/autogl/solver/utils.py b/autogl/solver/utils.py
index 0dae7e0..03fc36b 100644
--- a/autogl/solver/utils.py
+++ b/autogl/solver/utils.py
@@ -17,7 +17,7 @@ LOGGER = get_logger("LeaderBoard")
 
 class LeaderBoard:
     """
-    The LeaderBoard that can be used to store / sort the model performance automatically.
+    The leaderBoard that can be used to store / sort the model performance automatically.
 
     Parameters
     ----------
@@ -25,38 +25,16 @@ class LeaderBoard:
         A list of field name that shows the model performance. The first field is used as
         the major field for sorting the model performances.
 
-    is_higher_better: list of `bool`
-        A list of indicator that whether the field score is higher better.
+    is_higher_better: `dict` of *field* -> `bool`
+        A mapping of indicator that whether each field is higher better.
     """
 
-    def __init__(
-            self, fields: _typing.Sequence[str],
-            is_higher_better: _typing.Union[
-                _typing.Sequence[bool],
-                _typing.Dict[str, bool]
-            ]
-    ):
-        if not isinstance(fields, _typing.Sequence):
-            raise TypeError
-        for _field in fields:
-            if type(_field) != str:
-                raise TypeError
-        if isinstance(is_higher_better, dict):
-            self.__is_higher_better: _typing.Sequence[bool] = [
-                bool(is_higher_better[field]) for field in fields
-            ]
-        elif isinstance(is_higher_better, _typing.Sequence):
-            self.__is_higher_better: _typing.Sequence[bool] = [
-                bool(item) for item in is_higher_better
-            ]
-        else:
-            raise TypeError
-        self.__fields: _typing.Sequence[str] = fields
-        self.__major_field: str = fields[0]
-
-        self.__performance_data_frame: pd.DataFrame = pd.DataFrame(
-            columns=["name", "representation"] + list(fields)
-        )
+    def __init__(self, fields, is_higher_better):
+        assert isinstance(fields, list)
+        self.keys = ["name"] + fields
+        self.perform_dict = pd.DataFrame(columns=self.keys)
+        self.is_higher_better = is_higher_better
+        self.major_field = fields[0]
 
     def set_major_field(self, field) -> None:
         """
@@ -71,74 +49,62 @@ class LeaderBoard:
         -------
         None
         """
-        if field in self.__fields:
-            self.__major_field = field
+        if field in self.keys and not field == "name":
+            self.major_field = field
         else:
-            LOGGER.warning(
-                "do not find major field %s in the current LeaderBoard, will ignore.", field
-            )
+            LOGGER.warning(f"Field [{field}] NOT found in the current LeaderBoard, will ignore.")
 
-    def add_performance(
-            self, name: str,
-            representation: _typing.Union[str, _typing.Dict[str, _typing.Any]],
-            performance: _typing.Dict[str, float]
-    ) -> 'LeaderBoard':
+    def insert_model_performance(self, name, performance) -> None:
         """
-        Add a record of model performance.
+        Add/Override a record of model performance. If name given is already in the leaderboard,
+        will overrride the slot.
 
         Parameters
         ----------
         name: `str`
             The model name/identifier that identifies the model.
 
-        representation: `str` or `dict`
-            The representation of the corresponding methodology.
-
         performance: `dict`
             The performance dict. The key inside the dict should be the fields when initialized.
             The value of the dict should be the corresponding scores.
 
         Returns
         -------
-        self:
-            this `LeaderBoard` instance for chained call
+        None
         """
-        import yaml
-        if isinstance(representation, dict):
-            __representation: str = yaml.dump(representation)
-        elif isinstance(representation, str):
-            __representation: str = representation
+        if name not in self.perform_dict["name"]:
+            # we just add a new row
+            performance["name"] = name
+            new = pd.DataFrame(performance, index=[0])
+            self.perform_dict = self.perform_dict.append(new, ignore_index=True)
         else:
-            raise TypeError
-
-        __dict = {"name": name, "representation": __representation}
-        __dict.update(performance)
-        self.__performance_data_frame = self.__performance_data_frame.append(
-            pd.DataFrame(__dict, index=[0]), ignore_index=True
-        )
-        return self
+            LOGGER.warning(
+                "model already in the leaderboard, will override current result."
+            )
+            self.remove_model_performance(name)
+            self.insert_model_performance(name, performance)
 
-    def insert_model_performance(
-            self, name: str, performance: _typing.Dict[str, _typing.Any]
-    ) -> None:
+    def remove_model_performance(self, name) -> None:
         """
-        Add a record of model performance.
-        todo: This method will be deprecated
+        Remove the record of given models.
 
         Parameters
         ----------
         name: `str`
-            The model name/identifier that identifies the model.
-
-        performance: `dict`
-            The performance dict. The key inside the dict should be the fields when initialized.
-            The value of the dict should be the corresponding scores.
+            The model name/identifier that needed to be removed.
 
         Returns
         -------
         None
         """
-        self.add_performance(name, name, performance)
+        if name not in self.perform_dict["name"]:
+            LOGGER.warning(
+                "no model detected in current leaderboard, will ignore removing action."
+            )
+            return
+        index = self.perform_dict["name"][self.perform_dict["name"] == name].index
+        self.perform_dict.drop(self.perform_dict.index[index], inplace=True)
+        return
 
     def get_best_model(self, index=0) -> str:
         """
@@ -154,14 +120,10 @@ class LeaderBoard:
         name: `str`
             The name/identifier of the required model.
         """
-        sorted_performance_df = self.__performance_data_frame.sort_values(
-            self.__major_field,
-            ascending=not (
-                dict(zip(self.__fields, self.__is_higher_better))[self.__major_field]
-                if self.__major_field in self.__fields else True
-            )
+        sorted_df = self.perform_dict.sort_values(
+            by=self.major_field, ascending=not self.is_higher_better[self.major_field]
         )
-        name_list = sorted_performance_df["name"].tolist()
+        name_list = sorted_df["name"].tolist()
         if "ensemble" in name_list:
             name_list.remove("ensemble")
         return name_list[index]
@@ -180,30 +142,23 @@ class LeaderBoard:
         -------
         None
         """
-        top_k: int = top_k if top_k > 0 else len(self.__performance_data_frame)
+        top_k: int = top_k if top_k > 0 else len(self.perform_dict)
 
         '''
         reindex self.__performance_data_frame
         to ensure the columns of name and representation are in left-side of the data frame
         '''
-        _columns = self.__performance_data_frame.columns.tolist()
+        _columns = self.perform_dict.columns.tolist()
         maxcolwidths: _typing.List[_typing.Optional[int]] = []
-        if "representation" in _columns:
-            _columns.remove("representation")
-            _columns.insert(0, "representation")
-            maxcolwidths.append(40)
         if "name" in _columns:
             _columns.remove("name")
             _columns.insert(0, "name")
             maxcolwidths.append(40)
-        self.__performance_data_frame = self.__performance_data_frame[_columns]
+        self.perform_dict = self.perform_dict[_columns]
 
-        sorted_performance_df: pd.DataFrame = self.__performance_data_frame.sort_values(
-            self.__major_field,
-            ascending=not (
-                dict(zip(self.__fields, self.__is_higher_better))[self.__major_field]
-                if self.__major_field in self.__fields else True
-            )
+        sorted_performance_df: pd.DataFrame = self.perform_dict.sort_values(
+            self.major_field,
+            ascending=not self.is_higher_better[self.major_field]
         )
         sorted_performance_df = sorted_performance_df.head(top_k)
 
diff --git a/configs/lp_benchmark.yml b/configs/lp_benchmark.yml
new file mode 100644
index 0000000..f1ca373
--- /dev/null
+++ b/configs/lp_benchmark.yml
@@ -0,0 +1,92 @@
+ensemble:
+  name: voting
+feature:
+- name: PYGNormalizeFeatures
+hpo:
+  max_evals: 10
+  name: random
+models:
+- hp_space:
+  - feasiblePoints: 2,3
+    parameterName: num_layers
+    type: DISCRETE
+  - cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 2
+    maxValue:
+    - 256
+    - 256
+    minValue:
+    - 64
+    - 64
+    numericalType: INTEGER
+    parameterName: hidden
+    scalingType: LOG
+    type: NUMERICAL_LIST
+  - maxValue: 0.2
+    minValue: 0.0
+    parameterName: dropout
+    scalingType: LINEAR
+    type: DOUBLE
+  - feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    parameterName: act
+    type: CATEGORICAL
+  name: gcn
+- name: gat
+  hp_space:
+  - feasiblePoints: 2,3
+    parameterName: num_layers
+    type: DISCRETE
+  - cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 2
+    maxValue:
+    - 256
+    - 256
+    minValue:
+    - 64
+    - 64
+    numericalType: INTEGER
+    parameterName: hidden
+    scalingType: LOG
+    type: NUMERICAL_LIST
+  - maxValue: 0.2
+    minValue: 0.0
+    parameterName: dropout
+    scalingType: LINEAR
+    type: DOUBLE
+  - feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    parameterName: act
+    type: CATEGORICAL
+trainer:
+  hp_space:
+  - maxValue: 150
+    minValue: 50
+    parameterName: max_epoch
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 40
+    minValue: 25
+    parameterName: early_stopping_round
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 0.05
+    minValue: 0.005
+    parameterName: lr
+    scalingType: LOG
+    type: DOUBLE
+  - maxValue: 1.0E-7
+    minValue: 1.0E-10
+    parameterName: weight_decay
+    scalingType: LOG
+    type: DOUBLE
diff --git a/configs/lp_gat_benchmark.yml b/configs/lp_gat_benchmark.yml
new file mode 100644
index 0000000..abf8e28
--- /dev/null
+++ b/configs/lp_gat_benchmark.yml
@@ -0,0 +1,61 @@
+ensemble:
+  name: null
+feature:
+- name: PYGNormalizeFeatures
+hpo:
+  max_evals: 10
+  name: random
+models:
+- name: gat
+  hp_space:
+  - feasiblePoints: 2,3
+    parameterName: num_layers
+    type: DISCRETE
+  - cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 2
+    maxValue:
+    - 256
+    - 256
+    minValue:
+    - 64
+    - 64
+    numericalType: INTEGER
+    parameterName: hidden
+    scalingType: LOG
+    type: NUMERICAL_LIST
+  - maxValue: 0.2
+    minValue: 0.0
+    parameterName: dropout
+    scalingType: LINEAR
+    type: DOUBLE
+  - feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    parameterName: act
+    type: CATEGORICAL
+trainer:
+  hp_space:
+  - maxValue: 150
+    minValue: 50
+    parameterName: max_epoch
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 40
+    minValue: 25
+    parameterName: early_stopping_round
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 0.05
+    minValue: 0.005
+    parameterName: lr
+    scalingType: LOG
+    type: DOUBLE
+  - maxValue: 1.0E-7
+    minValue: 1.0E-10
+    parameterName: weight_decay
+    scalingType: LOG
+    type: DOUBLE
diff --git a/configs/lp_gcn_benchmark.yml b/configs/lp_gcn_benchmark.yml
new file mode 100644
index 0000000..8fb9d8a
--- /dev/null
+++ b/configs/lp_gcn_benchmark.yml
@@ -0,0 +1,61 @@
+ensemble:
+  name: null
+feature:
+- name: PYGNormalizeFeatures
+hpo:
+  max_evals: 10
+  name: random
+models:
+- hp_space:
+  - feasiblePoints: 2,3
+    parameterName: num_layers
+    type: DISCRETE
+  - cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 2
+    maxValue:
+    - 256
+    - 256
+    minValue:
+    - 64
+    - 64
+    numericalType: INTEGER
+    parameterName: hidden
+    scalingType: LOG
+    type: NUMERICAL_LIST
+  - maxValue: 0.2
+    minValue: 0.0
+    parameterName: dropout
+    scalingType: LINEAR
+    type: DOUBLE
+  - feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    parameterName: act
+    type: CATEGORICAL
+  name: gcn
+trainer:
+  hp_space:
+  - maxValue: 150
+    minValue: 50
+    parameterName: max_epoch
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 40
+    minValue: 25
+    parameterName: early_stopping_round
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 0.05
+    minValue: 0.005
+    parameterName: lr
+    scalingType: LOG
+    type: DOUBLE
+  - maxValue: 1.0E-7
+    minValue: 1.0E-10
+    parameterName: weight_decay
+    scalingType: LOG
+    type: DOUBLE
diff --git a/configs/lp_sage_benchmark.yml b/configs/lp_sage_benchmark.yml
new file mode 100644
index 0000000..de0b6ab
--- /dev/null
+++ b/configs/lp_sage_benchmark.yml
@@ -0,0 +1,69 @@
+ensemble:
+  name: null
+feature:
+- name: PYGNormalizeFeatures
+hpo:
+  max_evals: 10
+  name: random
+models:
+- name: sage
+  hp_space:
+  - parameterName: num_layers
+    type: DISCRETE
+    feasiblePoints: 2,3
+  
+  - parameterName: hidden
+    type: NUMERICAL_LIST
+    scalingType: LOG
+    numericalType: INTEGER
+    cutFunc: lambda x:x[0] - 1
+    cutPara:
+    - num_layers
+    length: 2
+    maxValue:
+    - 256
+    - 256
+    minValue:
+    - 64
+    - 64
+  
+  - parameterName: dropout
+    type: DOUBLE
+    scalingType: LINEAR
+    maxValue: 0.2
+    minValue: 0.0
+    
+  - parameterName: act
+    type: CATEGORICAL
+    feasiblePoints:
+    - leaky_relu
+    - relu
+    - elu
+    - tanh
+    
+  - parameterName: agg
+    type: CATEGORICAL
+    feasiblePoints: ["mean", "add", "max"]    
+
+trainer:
+  hp_space:
+  - maxValue: 150
+    minValue: 50
+    parameterName: max_epoch
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 40
+    minValue: 25
+    parameterName: early_stopping_round
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 0.05
+    minValue: 0.005
+    parameterName: lr
+    scalingType: LOG
+    type: DOUBLE
+  - maxValue: 1.0E-7
+    minValue: 1.0E-10
+    parameterName: weight_decay
+    scalingType: LOG
+    type: DOUBLE
diff --git a/configs/nodeclf_gat_benchmark_large.yml b/configs/nodeclf_gat_benchmark_large.yml
index 1b5933f..fe47281 100644
--- a/configs/nodeclf_gat_benchmark_large.yml
+++ b/configs/nodeclf_gat_benchmark_large.yml
@@ -39,7 +39,7 @@ models:
     - tanh
     parameterName: act
     type: CATEGORICAL
-  name: gcn
+  name: gat
 trainer:
   hp_space:
   - maxValue: 400
diff --git a/configs/nodeclf_ladies_gcn.yml b/configs/nodeclf_ladies_gcn.yml
deleted file mode 100644
index c61ce3b..0000000
--- a/configs/nodeclf_ladies_gcn.yml
+++ /dev/null
@@ -1,84 +0,0 @@
-ensemble:
-  name: null
-feature:
-- name: PYGNormalizeFeatures
-hpo:
-  max_evals: 10
-  name: random
-models:
-- hp_space:
-  - parameterName: num_layers
-    type: FIXED
-    value: 5
-  - parameterName: hidden
-    type: CATEGORICAL_LIST
-    cutFunc: lambda x:x[0] - 1
-    cutPara:
-    - num_layers
-    length: 4
-    feasiblePoints:
-    - 128
-    - 256
-    - 512
-  - maxValue: 0.8
-    minValue: 0.2
-    parameterName: dropout
-    scalingType: LINEAR
-    type: DOUBLE
-  - feasiblePoints:
-    - leaky_relu
-    - relu
-    - elu
-    - tanh
-    parameterName: act
-    type: CATEGORICAL
-  - parameterName: add_self_loops
-    type: FIXED
-    value: 0
-  - parameterName: normalize
-    type: FIXED
-    value: 0
-  name: gcn
-trainer:
-  name: NodeClassificationLayerDependentImportanceSamplingTrainer
-  hp_space:
-  - parameterName: sampled_node_sizes
-    type: CATEGORICAL_LIST
-    length: 5
-    feasiblePoints:
-    - 128
-    - 256
-    - 512
-    - 1024
-    cutFunc: lambda x:x[0]
-    cutPara:
-    - num_layers
-  - maxValue: 128
-    minValue: 64
-    parameterName: max_epoch
-    scalingType: LINEAR
-    type: INTEGER
-  - maxValue: 16
-    minValue: 8
-    parameterName: early_stopping_round
-    scalingType: LINEAR
-    type: INTEGER
-  - parameterName: training_batch_size
-    type: FIXED
-    value: 1024
-  - parameterName: predicting_batch_size
-    type: FIXED
-    value: 1024
-  - parameterName: predicting_sampler_num_workers
-    type: FIXED
-    value: 0
-  - maxValue: 0.05
-    minValue: 0.01
-    parameterName: lr
-    scalingType: LOG
-    type: DOUBLE
-  - maxValue: 0.0005
-    minValue: 0.0001
-    parameterName: weight_decay
-    scalingType: LOG
-    type: DOUBLE
diff --git a/configs/nodeclf_sage_benchmark_large.yml b/configs/nodeclf_sage_benchmark_large.yml
index 2cdf556..eb43eea 100644
--- a/configs/nodeclf_sage_benchmark_large.yml
+++ b/configs/nodeclf_sage_benchmark_large.yml
@@ -31,7 +31,7 @@ models:
     type: DOUBLE
   - feasiblePoints:
     - mean
-    parameterName: aggr
+    parameterName: agg
     type: CATEGORICAL
   - feasiblePoints:
     - leaky_relu
diff --git a/configs/nodeclf_sage_benchmark_small.yml b/configs/nodeclf_sage_benchmark_small.yml
index 9bd7aaa..7bc18fb 100644
--- a/configs/nodeclf_sage_benchmark_small.yml
+++ b/configs/nodeclf_sage_benchmark_small.yml
@@ -33,7 +33,7 @@ models:
     - mean
     - add
     - max
-    parameterName: aggr
+    parameterName: agg
     type: CATEGORICAL
   - feasiblePoints:
     - leaky_relu

From ff02f3a951d55e796e385682eb94c50212f885c2 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Wed, 23 Jun 2021 08:41:30 +0000
Subject: [PATCH 104/144] complete solver, refine operations

---
 autogl/module/nas/space/graph_nas.py   | 101 ++++++++++++-----------
 autogl/module/nas/space/operation.py   | 110 +++++++++++--------------
 autogl/module/nas/space/single_path.py |   6 +-
 autogl/solver/base.py                  |   1 +
 4 files changed, 108 insertions(+), 110 deletions(-)

diff --git a/autogl/module/nas/space/graph_nas.py b/autogl/module/nas/space/graph_nas.py
index 92c48b7..7583bfd 100644
--- a/autogl/module/nas/space/graph_nas.py
+++ b/autogl/module/nas/space/graph_nas.py
@@ -8,12 +8,30 @@ from nni.nas.pytorch import mutables
 from . import register_nas_space
 from .base import BaseSpace
 from ...model import BaseModel
-from ....utils import get_logger
 
-from ...model import AutoGCN
 from torch import nn
-from .operation import gnn_list, act_list, act_map, gnn_map
-from torch.autograd import Function
+from .operation import act_map, gnn_map
+
+GRAPHNAS_DEFAULT_GNN_OPS = [
+    "gat_8",  # GAT with 8 heads
+    "gat_6",  # GAT with 6 heads
+    "gat_4",  # GAT with 4 heads
+    "gat_2",  # GAT with 2 heads
+    "gat_1",  # GAT with 1 heads
+    "gcn",  # GCN
+    "cheb",  # chebnet
+    "sage",  # sage
+    "arma",
+    "sg",  # simplifying gcn
+    "linear",  # skip connection
+    "zero",  # skip connection
+]
+
+GRAPHNAS_DEFAULT_ACT_OPS = [
+    # "sigmoid", "tanh", "relu", "linear",
+    #  "softplus", "leaky_relu", "relu6", "elu"
+    "sigmoid", "tanh", "relu", "linear", "elu"
+]
 
 class LambdaModule(nn.Module):
     def __init__(self, lambd):
@@ -43,15 +61,6 @@ def act_map_nn(act):
 def map_nn(l):
     return [StrModule(x) for x in l]
 
-class ZeroConvFunc(Function):
-    @staticmethod
-    def forward(ctx,x):
-        return x
-
-    @staticmethod
-    def backward(ctx, grad_output):
-        return 0
-
 @register_nas_space("graphnas")
 class GraphNasNodeClassificationSpace(BaseSpace):
     def __init__(
@@ -61,43 +70,44 @@ class GraphNasNodeClassificationSpace(BaseSpace):
         dropout: _typ.Optional[float] = 0.9,
         input_dim: _typ.Optional[int] = None,
         output_dim: _typ.Optional[int] = None,
-        ops: _typ.Tuple = None,
-        search_act_con=False
+        gnn_ops: _typ.Sequence[_typ.Union[str, _typ.Any]] = GRAPHNAS_DEFAULT_GNN_OPS,
+        act_ops: _typ.Sequence[_typ.Union[str, _typ.Any]] = GRAPHNAS_DEFAULT_ACT_OPS
     ):
         super().__init__()
         self.layer_number = layer_number
         self.hidden_dim = hidden_dim
         self.input_dim = input_dim
         self.output_dim = output_dim
-        self.ops = ops
+        self.gnn_ops = gnn_ops
+        self.act_ops = act_ops
         self.dropout = dropout
-        self.search_act_con=search_act_con
-
+        
     def _instantiate(
         self,
         hidden_dim: _typ.Optional[int] = None,
         layer_number: _typ.Optional[int] = None,
+        dropout: _typ.Optional[float] = None,
         input_dim: _typ.Optional[int] = None,
         output_dim: _typ.Optional[int] = None,
-        ops: _typ.Tuple = None,
-        dropout = None
+        gnn_ops: _typ.Sequence[_typ.Union[str, _typ.Any]] = None,
+        act_ops: _typ.Sequence[_typ.Union[str, _typ.Any]] = None
     ):
+        self.dropout = dropout or self.dropout
         self.hidden_dim = hidden_dim or self.hidden_dim
         self.layer_number = layer_number or self.layer_number
         self.input_dim = input_dim or self.input_dim
         self.output_dim = output_dim or self.output_dim
-        self.ops = ops or self.ops
-        self.dropout = dropout or self.dropout
+        self.gnn_ops = gnn_ops or self.gnn_ops
+        self.act_ops = act_ops or self.act_ops
         self.preproc0 = nn.Linear(self.input_dim, self.hidden_dim)
         self.preproc1 = nn.Linear(self.input_dim, self.hidden_dim)
         node_labels = [mutables.InputChoice.NO_KEY, mutables.InputChoice.NO_KEY]
         for layer in range(2,self.layer_number+2):
             node_labels.append(f"op_{layer}")
             setattr(self,f"in_{layer}",self.setInputChoice(layer,choose_from=node_labels[:-1], n_chosen=1, return_mask=False,key=f"in_{layer}"))
-            setattr(self,f"op_{layer}",self.setLayerChoice(layer,[gnn_map(op,self.hidden_dim,self.hidden_dim)for op in gnn_list],key=f"op_{layer}"))
-        if self.search_act_con:
-            setattr(self,f"act",self.setLayerChoice(2*layer,[act_map_nn(a)for a in act_list],key=f"act"))
-            setattr(self,f"concat",self.setLayerChoice(2*layer+1,map_nn(["add", "product", "concat"]) ,key=f"concat"))
+            setattr(self,f"op_{layer}",self.setLayerChoice(layer,[gnn_map(op,self.hidden_dim,self.hidden_dim)for op in self.gnn_ops],key=f"op_{layer}"))
+        setattr(self,"act",self.setLayerChoice(2*layer,[act_map_nn(a)for a in self.act_ops],key="act"))
+        setattr(self,"concat",self.setLayerChoice(2*layer+1,map_nn(["add", "product", "concat"]) ,key="concat"))
         self._initialized = True
         self.classifier1 = nn.Linear(self.hidden_dim*self.layer_number, self.output_dim)
         self.classifier2 = nn.Linear(self.hidden_dim, self.output_dim)
@@ -111,29 +121,24 @@ class GraphNasNodeClassificationSpace(BaseSpace):
             node_in = getattr(self, f"in_{layer}")(prev_nodes_out)
             node_out= getattr(self, f"op_{layer}")(node_in,edges)
             prev_nodes_out.append(node_out)
-        if not self.search_act_con:
-            x = torch.cat(prev_nodes_out[2:],dim=1)
-            x = F.leaky_relu(x)
-            x = self.classifier1(x)
+        act=getattr(self, "act")
+        con=getattr(self, "concat")()
+        states=prev_nodes_out
+        if con == "concat":
+            x=torch.cat(states[2:], dim=1)
+        else:
+            tmp = states[2]
+            for i in range(2,len(states)):
+                if con == "add":
+                    tmp = torch.add(tmp, states[i])
+                elif con == "product":
+                    tmp = torch.mul(tmp, states[i])
+            x=tmp
+        x = act(x)
+        if con=='concat':
+            x=self.classifier1(x)
         else:
-            act=getattr(self, f"act")
-            con=getattr(self, f"concat")()
-            states=prev_nodes_out
-            if con == "concat":
-                x=torch.cat(states[2:], dim=1)
-            else:
-                tmp = states[2]
-                for i in range(2,len(states)):
-                    if con == "add":
-                        tmp = torch.add(tmp, states[i])
-                    elif con == "product":
-                        tmp = torch.mul(tmp, states[i])
-                x=tmp
-            x = act(x)
-            if con=='concat':
-                x=self.classifier1(x)
-            else:
-                x=self.classifier2(x)
+            x=self.classifier2(x)
         return F.log_softmax(x, dim=1)
 
     def parse_model(self, selection, device) -> BaseModel:
diff --git a/autogl/module/nas/space/operation.py b/autogl/module/nas/space/operation.py
index 493cb83..e642928 100644
--- a/autogl/module/nas/space/operation.py
+++ b/autogl/module/nas/space/operation.py
@@ -1,28 +1,49 @@
 # codes in this file are reproduced from https://github.com/GraphNAS/GraphNAS with some changes.
-from torch_geometric.nn.conv import *
+
+from torch_geometric.nn import (
+    GATConv,
+    GCNConv,
+    ChebConv,
+    SAGEConv,
+    GatedGraphConv,
+    ARMAConv,
+    SGConv,
+)
+import torch_geometric.nn
 import torch
 from torch import nn
 import torch.nn.functional as F
 
-gnn_list = [
-    "gat_8",  # GAT with 8 heads
-    "gat_6",  # GAT with 6 heads
-    "gat_4",  # GAT with 4 heads
-    "gat_2",  # GAT with 2 heads
-    "gat_1",  # GAT with 1 heads
-    "gcn",  # GCN
-    "cheb",  # chebnet
-    "sage",  # sage
-    "arma",
-    "sg",  # simplifying gcn
-    "linear",  # skip connection
-    "zero",  # skip connection
-]
-act_list = [
-    # "sigmoid", "tanh", "relu", "linear",
-    #  "softplus", "leaky_relu", "relu6", "elu"
-    "sigmoid", "tanh", "relu", "linear", "elu"
-]
+class LinearConv(nn.Module):
+    def __init__(self, in_channels, out_channels, bias=True):
+        super(LinearConv, self).__init__()
+
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.linear = torch.nn.Linear(in_channels, out_channels, bias)
+
+    def forward(self, x, edge_index, edge_weight=None):
+        return self.linear(x)
+
+    def __repr__(self):
+        return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
+                                   self.out_channels)
+
+class ZeroConv(nn.Module):
+
+    def forward(self, x, edge_index, edge_weight=None):
+        out = torch.zeros_like(x)
+        out.requires_grad = True
+        return out
+
+    def __repr__(self):
+        return 'ZeroConv()'
+
+class Identity(nn.Module):
+    def forward(self, x, edge_index, edge_weight=None):
+        return x
+    def __repr__(self):
+        return 'Identity()'
 
 def act_map(act):
     if act == "linear":
@@ -78,44 +99,13 @@ def gnn_map(gnn_name, in_dim, out_dim, concat=False, bias=True) -> nn.Module:
     elif gnn_name == "linear":
         return LinearConv(in_dim, out_dim, bias=bias)
     elif gnn_name == "zero":
-        # return ZeroConv(in_dim, out_dim, bias=bias)
+        return ZeroConv()
+    elif gnn_name == 'identity':
         return Identity()
-
-class Identity(nn.Module):
-    def forward(self, x, edge_index, edge_weight=None):
-        return x
-
-class LinearConv(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 bias=True):
-        super(LinearConv, self).__init__()
-
-        self.in_channels = in_channels
-        self.out_channels = out_channels
-        self.linear = torch.nn.Linear(in_channels, out_channels, bias)
-
-    def forward(self, x, edge_index, edge_weight=None):
-        return self.linear(x)
-
-    def __repr__(self):
-        return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
-                                   self.out_channels)
-
-class ZeroConv(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 bias=True):
-        super(ZeroConv, self).__init__()
-        self.in_channels = in_channels
-        self.out_channels = out_channels
-        self.out_dim = out_channels
-
-    def forward(self, x, edge_index, edge_weight=None):
-        return ZeroConvFunc.apply(torch.zeros([x.size(0), self.out_dim]).to(x.device))
-
-    def __repr__(self):
-        return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
-                                   self.out_channels)
\ No newline at end of file
+    elif hasattr(torch_geometric.nn, gnn_name):
+        cls = getattr(torch_geometric.nn, gnn_name)
+        assert isinstance(cls, type), "Only support modules, get %s" % (gnn_name)
+        kwargs = {'in_channels': in_dim, 'out_channels': out_dim, 'concat': concat, 'bias': bias}
+        kwargs = {key: kwargs[key] for key in cls.__init__.__code__.co_varnames if key in kwargs}
+        return cls(**kwargs)
+    raise KeyError("Cannot parse key %s" % (gnn_name))
diff --git a/autogl/module/nas/space/single_path.py b/autogl/module/nas/space/single_path.py
index 3bbd983..00b3a0f 100644
--- a/autogl/module/nas/space/single_path.py
+++ b/autogl/module/nas/space/single_path.py
@@ -1,3 +1,4 @@
+from autogl.module.nas.space.operation import gnn_map
 import typing as _typ
 import torch
 
@@ -20,7 +21,7 @@ class SinglePathNodeClassificationSpace(BaseSpace):
         dropout: _typ.Optional[float] = 0.2,
         input_dim: _typ.Optional[int] = None,
         output_dim: _typ.Optional[int] = None,
-        ops: _typ.Tuple = None,
+        ops: _typ.Tuple = ['GCNConv', 'GATConv'],
     ):
         super().__init__()
         self.layer_number = layer_number
@@ -57,7 +58,8 @@ class SinglePathNodeClassificationSpace(BaseSpace):
                             self.output_dim
                             if layer == self.layer_number - 1
                             else self.hidden_dim,
-                        )
+                        ) if isinstance(op, type) else gnn_map(op, self.input_dim if layer == 0 else self.hidden_dim,
+                            self.output_dim if layer == self.layer_number - 1 else self.hidden_dim)
                         for op in self.ops
                     ],
                 ),
diff --git a/autogl/solver/base.py b/autogl/solver/base.py
index 056de2d..4bc92c9 100644
--- a/autogl/solver/base.py
+++ b/autogl/solver/base.py
@@ -296,6 +296,7 @@ class BaseSolver:
             A reference of current solver.
         """
         if nas_algorithms is None and nas_estimators is None and nas_spaces is None:
+            self.nas_algorithms = self.nas_estimators = self.nas_spaces = None
             return
         assert None not in [nas_algorithms, nas_estimators, nas_spaces], "The algorithms, estimators and spaces should all be set"
 

From 9ed6ec594d8f7d729afe96ddb7965c6827bd6a25 Mon Sep 17 00:00:00 2001
From: lihy96 <lhy237451594@vip.qq.com>
Date: Wed, 23 Jun 2021 16:59:47 +0800
Subject: [PATCH 105/144] doc

---
 docs/docfile/tutorial/t_model.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/docfile/tutorial/t_model.rst b/docs/docfile/tutorial/t_model.rst
index dad0fb9..41f6b8e 100644
--- a/docs/docfile/tutorial/t_model.rst
+++ b/docs/docfile/tutorial/t_model.rst
@@ -18,6 +18,8 @@ Define your own model and automodel
 
 If you want to add your own model and automodel for some task, the only thing you should do is add a new model where the forward function should be fulfilled and a new automodel inherited from the basemodel.
 
+For new models used in link prediction tasks, you should fulfill the lp_encode and lp_decode function. The difference between lp_encode and forward function is that there is not classification layer in lp_encode.
+
 Firstly, you should define your model if it does not belong to the models above.
 
 Secondly, you should define your corresponding automodel.

From 37a9f44d43b823514f9f9a5ccb0c8c9fff344346 Mon Sep 17 00:00:00 2001
From: generall <general502570@outlook.com>
Date: Wed, 23 Jun 2021 17:35:41 +0800
Subject: [PATCH 106/144] fix some bugs

---
 autogl/module/nas/algorithm/darts.py         |  2 +-
 autogl/module/nas/estimator/one_shot.py      |  1 -
 autogl/module/nas/estimator/train_scratch.py |  3 ++
 autogl/module/nas/space/base.py              | 40 +++++++++++++++++++-
 autogl/module/nas/space/graph_nas.py         |  3 +-
 autogl/module/nas/space/graph_nas_macro.py   |  3 +-
 autogl/module/nas/space/single_path.py       |  3 +-
 7 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/autogl/module/nas/algorithm/darts.py b/autogl/module/nas/algorithm/darts.py
index 3894b3d..924ab7f 100644
--- a/autogl/module/nas/algorithm/darts.py
+++ b/autogl/module/nas/algorithm/darts.py
@@ -121,5 +121,5 @@ class Darts(BaseNAS):
         result = dict()
         for name, module in nas_modules:
             if name not in result:
-                result[name] = module.parse_model()
+                result[name] = module.export()
         return result
diff --git a/autogl/module/nas/estimator/one_shot.py b/autogl/module/nas/estimator/one_shot.py
index 4df9d50..d08171c 100644
--- a/autogl/module/nas/estimator/one_shot.py
+++ b/autogl/module/nas/estimator/one_shot.py
@@ -4,7 +4,6 @@ import torch.nn.functional as F
 from . import register_nas_estimator
 from ..space import BaseSpace
 from .base import BaseEstimator
-import torch
 
 @register_nas_estimator("oneshot")
 class OneShotEstimator(BaseEstimator):
diff --git a/autogl/module/nas/estimator/train_scratch.py b/autogl/module/nas/estimator/train_scratch.py
index be61cea..160f9bd 100644
--- a/autogl/module/nas/estimator/train_scratch.py
+++ b/autogl/module/nas/estimator/train_scratch.py
@@ -11,6 +11,9 @@ from autogl.module.train import NodeClassificationFullTrainer
 
 @register_nas_estimator("scratch")
 class TrainEstimator(BaseEstimator):
+    """
+    An estimator which trans from scratch
+    """
     def __init__(self):
         self.estimator=OneShotEstimator()
 
diff --git a/autogl/module/nas/space/base.py b/autogl/module/nas/space/base.py
index d632796..9218ece 100644
--- a/autogl/module/nas/space/base.py
+++ b/autogl/module/nas/space/base.py
@@ -12,6 +12,14 @@ from ....utils import get_logger
 from ...model import AutoGCN
 
 class OrderedMutable():
+    """
+    An abstract class with order, enabling to sort mutables with a certain rank.
+
+    Parameters
+    ----------
+    order : int
+        The order of the mutable
+    """
     def __init__(self, order):
         self.order = order
 
@@ -28,6 +36,16 @@ class OrderedInputChoice(OrderedMutable, mutables.InputChoice):
                  reduction, return_mask, key)
 
 class BoxModel(BaseModel):
+    """
+    The box wrapping a space, can be passed to later procedure or trainer
+
+    Parameters
+    ----------
+    space_model : BaseSpace
+        The space which should be wrapped
+    device : str or torch.device
+        The device to place the model
+    """
     _logger = get_logger("space model")
 
     def __init__(self, space_model, device=torch.device("cuda")):
@@ -42,6 +60,14 @@ class BoxModel(BaseModel):
         self.device = device
 
     def fix(self, selection):
+        """
+        To fix self._model with a selection 
+
+        Parameters
+        ----------
+        selection : dict
+            A seletion indicating the choices of mutables
+        """
         self.selection = selection
         self._model.instantiate()
         apply_fixed_architecture(self._model, selection, verbose=False)
@@ -120,9 +146,11 @@ class BaseSpace(nn.Module):
         """
         raise NotImplementedError()
 
-    def instantiate(self, *args, **kwargs):
+    def instantiate(self):
+        """
+        Instantiate the space, reset default key for the mutables here/
+        """
         self._default_key = 0
-        self._instantiate(*args, **kwargs)
         if not self._initialized:
             self._initialized = True
 
@@ -161,6 +189,14 @@ class BaseSpace(nn.Module):
         return BoxModel(self, device) 
 
 class FixedInputChoice(nn.Module):
+    """
+    Use to replace `InputChoice` Mutable in fix process
+
+    Parameters
+    ----------
+    mask : list
+        The mask indicating which input to choose
+    """
     def __init__(self, mask):
         self.mask_len = len(mask)
         for i in range(self.mask_len):
diff --git a/autogl/module/nas/space/graph_nas.py b/autogl/module/nas/space/graph_nas.py
index 7583bfd..42ce952 100644
--- a/autogl/module/nas/space/graph_nas.py
+++ b/autogl/module/nas/space/graph_nas.py
@@ -82,7 +82,7 @@ class GraphNasNodeClassificationSpace(BaseSpace):
         self.act_ops = act_ops
         self.dropout = dropout
         
-    def _instantiate(
+    def instantiate(
         self,
         hidden_dim: _typ.Optional[int] = None,
         layer_number: _typ.Optional[int] = None,
@@ -92,6 +92,7 @@ class GraphNasNodeClassificationSpace(BaseSpace):
         gnn_ops: _typ.Sequence[_typ.Union[str, _typ.Any]] = None,
         act_ops: _typ.Sequence[_typ.Union[str, _typ.Any]] = None
     ):
+        super().instantiate()
         self.dropout = dropout or self.dropout
         self.hidden_dim = hidden_dim or self.hidden_dim
         self.layer_number = layer_number or self.layer_number
diff --git a/autogl/module/nas/space/graph_nas_macro.py b/autogl/module/nas/space/graph_nas_macro.py
index cc91534..cf15849 100644
--- a/autogl/module/nas/space/graph_nas_macro.py
+++ b/autogl/module/nas/space/graph_nas_macro.py
@@ -409,7 +409,7 @@ class GraphNasMacroNodeClassificationSpace(BaseSpace):
         self.dropout = dropout
         self.search_act_con=search_act_con
 
-    def _instantiate(
+    def instantiate(
         self,
         hidden_dim: _typ.Optional[int] = None,
         layer_number: _typ.Optional[int] = None,
@@ -418,6 +418,7 @@ class GraphNasMacroNodeClassificationSpace(BaseSpace):
         ops: _typ.Tuple = None,
         dropout = None
     ):
+        super().instantiate()
         self.hidden_dim = hidden_dim or self.hidden_dim
         self.layer_number = layer_number or self.layer_number
         self.input_dim = input_dim or self.input_dim
diff --git a/autogl/module/nas/space/single_path.py b/autogl/module/nas/space/single_path.py
index 00b3a0f..3475b42 100644
--- a/autogl/module/nas/space/single_path.py
+++ b/autogl/module/nas/space/single_path.py
@@ -31,7 +31,7 @@ class SinglePathNodeClassificationSpace(BaseSpace):
         self.ops = ops
         self.dropout = dropout
 
-    def _instantiate(
+    def instantiate(
         self,
         hidden_dim: _typ.Optional[int] = None,
         layer_number: _typ.Optional[int] = None,
@@ -40,6 +40,7 @@ class SinglePathNodeClassificationSpace(BaseSpace):
         ops: _typ.Tuple = None,
         dropout = None
     ):
+        super().instantiate()
         self.hidden_dim = hidden_dim or self.hidden_dim
         self.layer_number = layer_number or self.layer_number
         self.input_dim = input_dim or self.input_dim

From b27c5e4590e39bc12e0bcf2de06e0e1d7d58fbfd Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Thu, 24 Jun 2021 03:23:28 +0000
Subject: [PATCH 107/144] fix nas on clf, rename example

---
 autogl/solver/classifier/node_classifier.py |  4 +--
 configs/nodeclf_nas_benchmark.yml           | 40 +++++++++++++++++++++
 examples/graphnas.py                        | 20 +++++++++++
 examples/test_nas.py                        | 40 ---------------------
 4 files changed, 62 insertions(+), 42 deletions(-)
 create mode 100644 configs/nodeclf_nas_benchmark.yml
 create mode 100644 examples/graphnas.py
 delete mode 100644 examples/test_nas.py

diff --git a/autogl/solver/classifier/node_classifier.py b/autogl/solver/classifier/node_classifier.py
index 92474ae..8738fe9 100644
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -826,9 +826,9 @@ class AutoNodeClassifier(BaseClassifier):
                 configs = nas_dict[k]
                 if isinstance(configs, list):
                     for item in configs:
-                        container.append(indexer[item.pop('name')](**item, init=False))
+                        container.append(indexer[item.pop('name')](**item))
                 else:
-                    container.append(indexer[configs.pop('name')](**configs, init=False))
+                    container.append(indexer[configs.pop('name')](**configs))
             
             solver.set_nas_module(algorithms, spaces, estimators)
 
diff --git a/configs/nodeclf_nas_benchmark.yml b/configs/nodeclf_nas_benchmark.yml
new file mode 100644
index 0000000..bb757c8
--- /dev/null
+++ b/configs/nodeclf_nas_benchmark.yml
@@ -0,0 +1,40 @@
+ensemble:
+  name: null
+feature:
+- name: PYGNormalizeFeatures
+hpo:
+  max_evals: 10
+  name: random
+nas:
+  space:
+    name: graphnas
+    hidden_dim: 64
+    layer_number: 4
+  algorithm:
+    name: rl
+    num_epochs: 200
+  estimator:
+    name: scratch
+models: []
+trainer:
+  hp_space:
+  - maxValue: 300
+    minValue: 100
+    parameterName: max_epoch
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 30
+    minValue: 10
+    parameterName: early_stopping_round
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 0.05
+    minValue: 0.01
+    parameterName: lr
+    scalingType: LOG
+    type: DOUBLE
+  - maxValue: 0.0005
+    minValue: 5.0e-05
+    parameterName: weight_decay
+    scalingType: LOG
+    type: DOUBLE
diff --git a/examples/graphnas.py b/examples/graphnas.py
new file mode 100644
index 0000000..d0028be
--- /dev/null
+++ b/examples/graphnas.py
@@ -0,0 +1,20 @@
+import sys
+sys.path.append('../')
+from autogl.datasets import build_dataset_from_name
+from autogl.solver import AutoNodeClassifier
+from autogl.module.train import Acc
+import argparse
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--config', type=str, default='../configs/nodeclf_nas_benchmark.yml')
+    parser.add_argument('--dataset', choices=['cora', 'citeseer', 'pubmed'], default='cora', type=str)
+
+    args = parser.parse_args()
+
+    dataset = build_dataset_from_name('cora')
+    solver = AutoNodeClassifier.from_config(args.config)
+    solver.fit(dataset)
+    solver.get_leaderboard().show()
+    out = solver.predict_proba()
+    print('acc on cora', Acc.evaluate(out, dataset[0].y[dataset[0].test_mask].detach().numpy()))
diff --git a/examples/test_nas.py b/examples/test_nas.py
deleted file mode 100644
index c013ce4..0000000
--- a/examples/test_nas.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import sys
-sys.path.append('../')
-from torch_geometric.nn import GCNConv
-import torch
-from autogl.datasets import build_dataset_from_name
-from autogl.solver import AutoNodeClassifier
-from autogl.module.train import NodeClassificationFullTrainer
-from autogl.module.nas import Darts, OneShotEstimator, SinglePathNodeClassificationSpace
-from autogl.module.nas.space.graph_nas import GraphNasNodeClassificationSpace
-from autogl.module.train import Acc
-from autogl.module.nas.algorithm.enas import Enas
-
-if __name__ == '__main__':
-    dataset = build_dataset_from_name('cora')
-    solver = AutoNodeClassifier(
-        feature_module='PYGNormalizeFeatures',
-        graph_models=[],
-        hpo_module=None,
-        ensemble_module=None,
-        default_trainer=NodeClassificationFullTrainer(
-            optimizer=torch.optim.Adam,
-            lr=0.01,
-            max_epoch=300,
-            early_stopping_round=300,
-            weight_decay=1e-4,
-            device="auto",
-            init=False,
-            feval=['acc'],
-            loss="nll_loss",
-            lr_scheduler_type=None,),
-        #nas_algorithms=[Enas()],
-        nas_algorithms=[Darts(num_epochs=200)],
-        #nas_spaces=[SinglePathNodeClassificationSpace(hidden_dim=32, ops=[GCNConv, GCNConv])],
-        nas_spaces=[GraphNasNodeClassificationSpace(hidden_dim=64,search_act_con=False,layer_number=4)],
-        nas_estimators=[OneShotEstimator()]
-    )
-    solver.fit(dataset)
-    solver.get_leaderboard().show()
-    out = solver.predict_proba()
-    print('acc on cora', Acc.evaluate(out, dataset[0].y[dataset[0].test_mask].detach().numpy()))

From b90b8576da8e22867c5b7af619502ee9acbde203 Mon Sep 17 00:00:00 2001
From: lihy96 <lhy237451594@vip.qq.com>
Date: Thu, 24 Jun 2021 13:51:47 +0800
Subject: [PATCH 108/144] fix typos

---
 docs/docfile/tutorial/t_model.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/docfile/tutorial/t_model.rst b/docs/docfile/tutorial/t_model.rst
index 41f6b8e..a188280 100644
--- a/docs/docfile/tutorial/t_model.rst
+++ b/docs/docfile/tutorial/t_model.rst
@@ -18,7 +18,7 @@ Define your own model and automodel
 
 If you want to add your own model and automodel for some task, the only thing you should do is add a new model where the forward function should be fulfilled and a new automodel inherited from the basemodel.
 
-For new models used in link prediction tasks, you should fulfill the lp_encode and lp_decode function. The difference between lp_encode and forward function is that there is not classification layer in lp_encode.
+For new models used in link prediction tasks, you should fulfill the lp_encode and lp_decode function. The difference between lp_encode and forward function is that there is no classification layer in lp_encode.
 
 Firstly, you should define your model if it does not belong to the models above.
 

From 844ca6138e3ab64e9afbfdd3f6e0b55c025a9a02 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Thu, 24 Jun 2021 06:23:02 +0000
Subject: [PATCH 109/144] fix bugs in rl

---
 autogl/module/nas/algorithm/rl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autogl/module/nas/algorithm/rl.py b/autogl/module/nas/algorithm/rl.py
index ae8b5ff..364b594 100644
--- a/autogl/module/nas/algorithm/rl.py
+++ b/autogl/module/nas/algorithm/rl.py
@@ -312,7 +312,7 @@ class RL(BaseNAS):
                 bar.set_postfix(reward_controller=l2)
         
         selection=self.export()
-        arch=space.export(selection,self.device)
+        arch=space.parse_model(selection,self.device)
         #print(selection,arch)
         return arch
     

From e5af115712b09d054c9165faa22876fb4f4fe57d Mon Sep 17 00:00:00 2001
From: generall <general502570@outlook.com>
Date: Fri, 25 Jun 2021 10:46:12 +0800
Subject: [PATCH 110/144] add some messages

---
 autogl/module/nas/algorithm/enas.py          |   2 +-
 autogl/module/nas/algorithm/random_search.py |   2 +-
 autogl/module/nas/algorithm/rl.py            |   4 +-
 autogl/module/nas/estimator/base.py          |  24 +-
 autogl/module/nas/estimator/one_shot.py      |   8 +-
 autogl/module/nas/space/base.py              |  31 ++
 autogl/module/nas/space/graph_nas_macro.py   |  16 +-
 docs/docfile/tutorial/t_nas.rst              | 344 +++++++++++--------
 8 files changed, 269 insertions(+), 162 deletions(-)

diff --git a/autogl/module/nas/algorithm/enas.py b/autogl/module/nas/algorithm/enas.py
index 16ed39a..7e09dfb 100644
--- a/autogl/module/nas/algorithm/enas.py
+++ b/autogl/module/nas/algorithm/enas.py
@@ -176,4 +176,4 @@ class Enas(BaseNAS):
 
     def _infer(self,mask='train'):
         metric, loss = self.estimator.infer(self.model, self.dataset,mask=mask)
-        return metric, loss
+        return metric[0], loss
diff --git a/autogl/module/nas/algorithm/random_search.py b/autogl/module/nas/algorithm/random_search.py
index c307122..b3286ac 100644
--- a/autogl/module/nas/algorithm/random_search.py
+++ b/autogl/module/nas/algorithm/random_search.py
@@ -74,4 +74,4 @@ class RandomSearch(BaseNAS):
 
     def _infer(self,mask='train'):
         metric, loss = self.estimator.infer(self.arch._model, self.dataset, mask=mask)
-        return metric, loss
+        return metric[0], loss
diff --git a/autogl/module/nas/algorithm/rl.py b/autogl/module/nas/algorithm/rl.py
index 364b594..a8fdbcf 100644
--- a/autogl/module/nas/algorithm/rl.py
+++ b/autogl/module/nas/algorithm/rl.py
@@ -361,7 +361,7 @@ class RL(BaseNAS):
 
     def _infer(self,mask='train'):
         metric, loss = self.estimator.infer(self.arch._model, self.dataset,mask=mask)
-        return metric, loss
+        return metric[0], loss
 
 @register_nas_algo("graphnas")
 class GraphNasRL(BaseNAS):
@@ -527,4 +527,4 @@ class GraphNasRL(BaseNAS):
 
     def _infer(self,mask='train'):
         metric, loss = self.estimator.infer(self.arch._model, self.dataset,mask=mask)
-        return metric, loss
\ No newline at end of file
+        return metric[0], loss
\ No newline at end of file
diff --git a/autogl/module/nas/estimator/base.py b/autogl/module/nas/estimator/base.py
index 388a951..89d7dfe 100644
--- a/autogl/module/nas/estimator/base.py
+++ b/autogl/module/nas/estimator/base.py
@@ -5,13 +5,31 @@ Base estimator of NAS
 from abc import abstractmethod
 from ..space import BaseSpace
 from typing import Tuple
+from ...train.evaluation import Evaluation, Acc
+import torch.nn.functional as F
 import torch
 
-
 class BaseEstimator:
     """
     The estimator of NAS model.
+
+    Parameters
+    ----------
+    loss_f: callable
+        Default loss function for evaluation
+
+    evaluation: list of autogl.module.train.evaluation.Evaluation
+        Default evaluation metric
     """
+    def __init__(self, loss_f = F.nll_loss, evaluation = [Acc()]):
+        self.loss_f = loss_f
+        self.evaluation = evaluation
+
+    def setLossFunction(self, loss_f):
+        self.loss_f = loss_f
+    
+    def setEvaluation(self, evaluation):
+        self.evaluation = evaluation
 
     @abstractmethod
     def infer(
@@ -34,8 +52,8 @@ class BaseEstimator:
 
         Return
         ------
-        metric: torch.Tensor
-            the metric on given datasets.
+        metrics: list of float
+            the metrics on given datasets.
         loss: torch.Tensor
             the loss on given datasets. Note that loss should be differentiable.
         """
diff --git a/autogl/module/nas/estimator/one_shot.py b/autogl/module/nas/estimator/one_shot.py
index d08171c..c6ed2df 100644
--- a/autogl/module/nas/estimator/one_shot.py
+++ b/autogl/module/nas/estimator/one_shot.py
@@ -18,6 +18,8 @@ class OneShotEstimator(BaseEstimator):
         dset = dataset[0].to(device)
         pred = model(dset)[getattr(dset, f"{mask}_mask")]
         y = dset.y[getattr(dset, f'{mask}_mask')]
-        loss = F.nll_loss(pred, y)
-        acc=sum(pred.max(1)[1]==y).item()/y.size(0)
-        return acc, loss
+        loss = self.loss_f(pred, y)
+        #acc=sum(pred.max(1)[1]==y).item()/y.size(0)
+        probs = F.softmax(pred, dim = 1)
+        metrics = [eva.evaluate(probs, y) for eva in self.evaluation]
+        return metrics, loss
diff --git a/autogl/module/nas/space/base.py b/autogl/module/nas/space/base.py
index 9218ece..a4c247c 100644
--- a/autogl/module/nas/space/base.py
+++ b/autogl/module/nas/space/base.py
@@ -35,6 +35,37 @@ class OrderedInputChoice(OrderedMutable, mutables.InputChoice):
         mutables.InputChoice.__init__(self, n_candidates, choose_from, n_chosen,
                  reduction, return_mask, key)
 
+class StrModule(nn.Module):
+    """
+    A shell used to wrap choices as nn.Module for non-one-shot space definition
+    You can use ``map_nn`` function
+
+    Parameters
+    ----------
+    name : anything
+        the name of module, can be any type
+    """
+    def __init__(self, name):
+        super().__init__()
+        self.str = name
+
+    def forward(self, *args,**kwargs):
+        return self.str  
+
+    def __repr__(self):
+        return '{}({})'.format(self.__class__.__name__,self.str)
+
+def map_nn(names):
+    """
+    A function used to wrap choices as nn.Module for non-one-shot space definition
+
+    Parameters
+    ----------
+    name : list of anything
+        the names of module, can be any type
+    """
+    return [StrModule(x) for x in names]
+
 class BoxModel(BaseModel):
     """
     The box wrapping a space, can be passed to later procedure or trainer
diff --git a/autogl/module/nas/space/graph_nas_macro.py b/autogl/module/nas/space/graph_nas_macro.py
index cf15849..ddca04a 100644
--- a/autogl/module/nas/space/graph_nas_macro.py
+++ b/autogl/module/nas/space/graph_nas_macro.py
@@ -4,7 +4,7 @@ import torch.nn as nn
 import torch.nn.functional as F
 
 from . import register_nas_space
-from .base import BaseSpace
+from .base import BaseSpace, map_nn
 from ...model import BaseModel
 from .operation import act_map
 
@@ -374,20 +374,6 @@ class GeoLayer(MessagePassing):
         if agg_key in params and hasattr(self, "pool_layer"):
             self.pool_layer.load_state_dict(params[agg_key])
 
-class StrModule(nn.Module):
-    def __init__(self, lambd):
-        super().__init__()
-        self.str = lambd
-
-    def forward(self, *args,**kwargs):
-        return self.str  
-
-    def __repr__(self):
-        return '{}({})'.format(self.__class__.__name__,self.str)
-
-def map_nn(l):
-    return [StrModule(x) for x in l]
-
 @register_nas_space("graphnasmacro")
 class GraphNasMacroNodeClassificationSpace(BaseSpace):
     def __init__(
diff --git a/docs/docfile/tutorial/t_nas.rst b/docs/docfile/tutorial/t_nas.rst
index c31b8f9..5922e5f 100644
--- a/docs/docfile/tutorial/t_nas.rst
+++ b/docs/docfile/tutorial/t_nas.rst
@@ -11,147 +11,217 @@ If you want to design your own NAS process, you can change any of those parts ac
 Search Space
 ------------
 
-The space definition is base on mutable fashion used in NNI.
-There are mainly two ways ti define your search space, one can be performed with one-shot fashion while the other cannot.
-If you need one-shot fashion, you should use 
-Three types of search space are supported, use ``dict`` in python to define your search space.
-For numerical list search space. You can either assign a fixed length for the list, if so, you need not provide ``cutPara`` and ``cutFunc``.
-Or you can let HPO cut the list to a certain length which is dependent on other parameters. You should provide those parameters' names in ``curPara`` and the function to calculate the cut length in "cutFunc". 
+The space definition is base on mutable fashion used in NNI, which is defined as a model inheriting BaseSpace
+There are mainly two ways to define your search space, one can be performed with one-shot fashion while the other cannot.
+If you need one-shot fashion, you should use the function ``setLayerChoice`` and ``setInputChoice`` to construct the super network.
+Here is an example.
 
 .. code-block:: python
 
-    # numerical search space:
-    {
-        "parameterName": "xxx",
-        "type": "DOUBLE" / "INTEGER",
-        "minValue": xx,
-        "maxValue": xx,
-        "scalingType": "LINEAR" / "LOG"
-    }
-
-    # numerical list search space:
-    {
-        "parameterName": "xxx",
-        "type": "NUMERICAL_LIST",
-        "numericalType": "DOUBLE" / "INTEGER",
-        "length": 3,
-        "cutPara": ("para_a", "para_b"),
-        "cutFunc": lambda x: x[0] - 1,
-        "minValue": [xx,xx,xx],
-        "maxValue": [xx,xx,xx],
-        "scalingType": "LINEAR" / "LOG"
-    }
-
-    # categorical search space:
-    {
-        "parameterName": xxx,
-        "type": "CATEGORICAL"
-        "feasiblePoints": [a,b,c]
-    }
-
-    # fixed parameter as search space:
-    {
-        "parameterName": xxx,
-        "type": "FIXED",
-        "value": xxx
-    }
-        
-How given HPO algorithms support search space is listed as follows:
-
-+------------+------------+--------------+-----------+------------+
-| Algorithm  | numerical  |numerical list|categorical| fixed      |
-+============+============+==============+===========+============+
-| Grid       |            |              |  ✓        | ✓          |
-+------------+------------+--------------+-----------+------------+
-| Random     | ✓          |  ✓           |  ✓        | ✓          |
-+------------+------------+--------------+-----------+------------+
-| Anneal     | ✓          |  ✓           |  ✓        | ✓          |
-+------------+------------+--------------+-----------+------------+
-| Bayes      | ✓          |  ✓           |  ✓        | ✓          |
-+------------+------------+--------------+-----------+------------+
-| TPE        | ✓          |  ✓           |  ✓        | ✓          |
-+------------+------------+--------------+-----------+------------+
-| CMAES      | ✓          |  ✓           |  ✓        | ✓          |
-+------------+------------+--------------+-----------+------------+
-| MOCMAES    | ✓          |  ✓           |  ✓        | ✓          |
-+------------+------------+--------------+-----------+------------+
-|Quasi random| ✓          |  ✓           |  ✓        | ✓          |
-+------------+------------+--------------+-----------+------------+
-| AutoNE     | ✓          |  ✓           |  ✓        | ✓          |
-+------------+------------+--------------+-----------+------------+
-
-Here, TPE is from [1], CMAES is from [2], MOCMAES is from [3], quasi random is from [4], AutoNE is from [5].
-
-[1] Bergstra, James S., et al. "Algorithms for hyper-parameter optimization." Advances in neural information processing systems. 2011.
-[2] Arnold, Dirk V., and Nikolaus Hansen. "Active covariance matrix adaptation for the (1+ 1)-CMA-ES." Proceedings of the 12th annual conference on Genetic and evolutionary computation. 2010.
-[3] Voß, Thomas, Nikolaus Hansen, and Christian Igel. "Improved step size adaptation for the MO-CMA-ES." Proceedings of the 12th annual conference on Genetic and evolutionary computation. 2010.
-[4] Bratley, Paul, Bennett L. Fox, and Harald Niederreiter. "Programs to generate Niederreiter's low-discrepancy sequences." ACM Transactions on Mathematical Software (TOMS) 20.4 (1994): 494-495.
-[5] Tu, Ke, et al. "Autone: Hyperparameter optimization for massive network embedding." Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. 2019.
-
-Add Your HPOptimizer
---------------------
-
-If you want to add your own HPOptimizer, the only thing you should do is finishing ``optimize`` function in you HPOptimizer:
+    # For example, create an NAS search space by yourself
+    from autogl.module.nas.space.base import BaseSpace
+    from autogl.module.nas.space.operation import gnn_map
+    class YourOneShotSpace(BaseSpace):
+        # Get essential parameters at initialization
+        def __init__(self, input_dim = None, output_dim = None):
+            super().__init__()
+            # must contain input_dim and output_dim in space, or you can initialize these two parameters in function `instantiate`
+            self.input_dim = input_dim
+            self.output_dim = output_dim
+
+        # Instantiate the super network
+        def instantiate(self, input_dim, output_dim):
+            # must call super in this function
+            super().instantiate()
+            self.input_dim = input_dim or self.input_dim
+            self.output_dim = output_dim or self.output_dim
+            # define two layers with order 0 and 1
+            self.layer0 = self.setLayerChoice(order = 0, [gnn_map(op,self.input_dim,self.output_dim)for op in ['gcn', 'gat']])
+            self.layer1 = self.setLayerChoice(order = 1, [gnn_map(op,self.input_dim,self.output_dim)for op in ['gcn', 'gat']])
+            # define an input choice two choose from the result of the two layer
+            self.input_layer = self.setInputChoice(order = 2, n_candidates = 2)
+
+        # Define the forward process
+        def forward(self, data):
+            x, edges = data.x, data.edge_index
+            x_0 = self.layer0(x, edges)
+            x_1 = self.layer1(x, edges)
+            y = self.input_layer([x_0, x_1])
+            return y
+
+        # For one-shot fashion, you can directly use following scheme in ``parse_model``
+        def parse_model(self, selection, device) -> BaseModel:
+            return self.wrap(device).fix(selection)
+
+Also, you can use the way which does not support one shot fashion.
+In this way, you can directly copy you model with few changes.
+But you can only use sample-based search strategy.
 
 .. code-block:: python
 
-    # For example, create a random HPO by yourself
-    import random
-    from autogl.module.hpo.base import BaseHPOptimizer
-    class RandomOptimizer(BaseHPOptimizer):
+    # For example, create an NAS search space by yourself
+    from autogl.module.nas.space.base import BaseSpace, map_nn
+    from autogl.module.nas.space.operation import gnn_map
+    from torch_geometric.nn import GATConv, GATv2Conv, SuperGATConv
+    class YourNonOneShotSpace(BaseSpace):
         # Get essential parameters at initialization
-        def __init__(self, *args, **kwargs):
-            super().__init__(*args, **kwargs)
-            self.max_evals = kwargs.get("max_evals", 2)
-
-        # The most important thing you should do is completing optimization function
-        def optimize(self, trainer, dataset, time_limit=None, memory_limit=None):
-            # 1. Get the search space from trainer.
-            space = trainer.hyper_parameter_space + trainer.model.hyper_parameter_space
-            # optional: use self._encode_para (in BaseOptimizer) to pretreat the space
-            # If you use _encode_para, the NUMERICAL_LIST will be spread to DOUBLE or INTEGER, LOG scaling type will be changed to LINEAR, feasible points in CATEGORICAL will be changed to discrete numbers.
-            # You should also use _decode_para to transform the types of parameters back.
-            current_space = self._encode_para(space)
-
-            # 2. Define your function to get the performance.
-            def fn(dset, para):
-                current_trainer = trainer.duplicate_from_hyper_parameter(para)
-                current_trainer.train(dset)
-                loss, self.is_higher_better = current_trainer.get_valid_score(dset)
-                # For convenience, we change the score which is higher better to negative, then we should only minimize the score.
-                if self.is_higher_better:
-                    loss = -loss
-                return current_trainer, loss
-
-            # 3. Define the how to get HP suggestions, it should return a parameter dict. You can use history trials to give new suggestions
-            def get_random(history_trials):
-                hps = {}
-                for para in current_space:
-                    # Because we use _encode_para function before, we should only deal with DOUBLE, INTEGER and DISCRETE
-                    if para["type"] == "DOUBLE" or para["type"] == "INTEGER":
-                        hp = random.random() * (para["maxValue"] - para["minValue"]) + para["minValue"]
-                        if para["type"] == "INTEGER":
-                            hp = round(hp)
-                        hps[para["parameterName"]] = hp
-                    elif para["type"] == "DISCRETE":
-                        feasible_points = para["feasiblePoints"].split(",")
-                        hps[para["parameterName"]] = random.choice(feasible_points)
-                return hps
-
-            # 4. Run your algorithm. For each turn, get a set of parameters according to history information and evaluate it.
-            best_trainer, best_para, best_perf = None, None, None
-            self.trials = []
-            for i in range(self.max_evals):
-                # in this example, we don't need history trails. Since we pass None to history_trails
-                new_hp = get_random(None)
-                # optional: if you use _encode_para, use _decode_para as well. para_for_trainer undos all transformation in _encode_para, and turns double parameter to interger if needed. para_for_hpo only turns double parameter to interger.
-                para_for_trainer, para_for_hpo = self._decode_para(new_hp)
-                current_trainer, perf = fn(dataset, para_for_trainer)
-                self.trials.append((para_for_hpo, perf))
-                if not best_perf or perf < best_perf:
-                    best_perf = perf
-                    best_trainer = current_trainer
-                    best_para = para_for_trainer
-
-            # 5. Return the best trainer and parameter.
-            return best_trainer, best_para
\ No newline at end of file
+        def __init__(self, input_dim = None, output_dim = None):
+            super().__init__()
+            # must contain input_dim and output_dim in space, or you can initialize these two parameters in function `instantiate`
+            self.input_dim = input_dim
+            self.output_dim = output_dim
+
+        # Instantiate the super network
+        def instantiate(self, input_dim, output_dim):
+            # must call super in this function
+            super().instantiate()
+            self.input_dim = input_dim or self.input_dim
+            self.output_dim = output_dim or self.output_dim
+            # set your choices as LayerChoices
+            self.choice0 = self.setLayerChoice(0, map_nn(["gat", "gatv2", "supergat"]), key="conv")
+            self.choice1 = self.setLayerChoice(1, map_nn([1, 2, 4, 8]), key="head")
+
+        # You do not need to define forward process here
+        # For non-one-shot fashion, you can directly return your model based on the choices
+        # ``YourModel`` must inherit BaseSpace.
+        def parse_model(self, selection, device) -> BaseModel:
+            model = YourModel(selection, self.input_dim, self.output_dim).wrap(device)
+            return model
+
+    # YourModel can be defined as follows
+    class YourModel(BaseSpace):
+        def __init__(self, selection, input_dim, output_dim):
+            self.input_dim = input_dim
+            self.output_dim = output_dim
+            if selection["conv"] == "gat":
+                conv = GATConv
+            elif selection["conv"] == "gatv2":
+                conv = GATv2Conv
+            elif selection["conv"] == "supergat":
+                conv = SuperGATConv
+            self.layer = conv(input_dim, output_dim, selection["head"])
+
+        def forward(self, data):
+            x, edges = data.x, data.edge_index
+            y = self.layer(x, edges)
+            return y
+
+Performance Estimator
+------------
+
+The performance estimator estimates the performance of an architecture.
+Here is an example of estimating an architecture without training (used in one-shot space).
+
+.. code-block:: python
+
+    # For example, create an NAS estimator by yourself
+    from autogl.module.nas.estimator.base import BaseEstimator
+    class YourOneShotEstimator(BaseEstimator):
+        # The only thing you should do is defining ``infer`` function
+        def infer(self, model: BaseSpace, dataset, mask="train"):
+            device = next(model.parameters()).device
+            dset = dataset[0].to(device)
+            # Forward the architecture
+            pred = model(dset)[getattr(dset, f"{mask}_mask")]
+            y = dset.y[getattr(dset, f'{mask}_mask')]
+            # Use default loss function and metrics to evaluate the architecture
+            loss = self.loss_f(pred, y)
+            probs = F.softmax(pred, dim = 1)
+            metrics = [eva.evaluate(probs, y) for eva in self.evaluation]
+            return metrics, loss
+
+Search Strategy
+------------
+
+The space strategy defines how to find an architecture.
+
+Sample-based strategy without weight sharing is simpler than strategies with weight sharing.
+We show how to define your strategy here with Random Search as an example.
+If you want to define more complex strategy, you can refer to Darts, Enas or other strategies in NNI.
+
+.. code-block:: python
+
+    from autogl.module.nas.algorithm.base import BaseNAS
+    class RandomSearch(BaseNAS):
+        # Get the number of samples at initialization
+        def __init__(self, n_sample):
+            super().__init__()
+            self.n_sample = n_sample
+
+        # The key process in NAS algorithm, search for an architecture given space, dataset and estimator
+        def search(self, space: BaseSpace, dset, estimator):
+            self.estimator=estimator
+            self.dataset=dset
+            self.space=space
+                
+            self.nas_modules = []
+            k2o = get_module_order(self.space)
+            # collect all mutables in the space
+            replace_layer_choice(self.space, PathSamplingLayerChoice, self.nas_modules)
+            replace_input_choice(self.space, PathSamplingInputChoice, self.nas_modules)
+            # sort all mutables with given orders
+            self.nas_modules = sort_replaced_module(k2o, self.nas_modules) 
+            # get a dict cantaining all chioces
+            selection_range={}
+            for k,v in self.nas_modules:
+                selection_range[k]=len(v)
+            self.selection_dict=selection_range
+                
+            arch_perfs=[]
+            cache={}
+
+            # sample architectures one by one
+            for i in range(self.n_sample):
+                selection=self.sample() 
+                vec=tuple(list(selection.values()))
+                if vec not in cache:
+                    self.arch=space.parse_model(selection,self.device)
+                    metric,loss=self._infer(mask='val')
+                    arch_perfs.append([metric,selection])
+                    cache[vec]=metric
+                
+            # get the architecture with the best performance
+            selection=arch_perfs[np.argmax([x[0] for x in arch_perfs])][1]
+            arch=space.parse_model(selection,self.device)
+            return arch 
+
+        # Sample an architecture from the space  
+        def sample(self):
+            selection={}
+            for k,v in self.selection_dict.items():
+                selection[k]=np.random.choice(range(v))
+            return selection
+
+Different search strategies should be combined with different search spaces and estimators in usage.
+
++------------+-------------+-------------+------------------+
+| Sapce      | single path | GraphNAS[1] | GraphNAS-macro[1]|
++============+=============+=============+==================+
+| Random     |  ✓          |  ✓          |  ✓               | 
++------------+-------------+-------------+------------------+
+| RL         |  ✓          |  ✓          |  ✓               |
++------------+-------------+-------------+------------------+
+| GraphNAS[1]|  ✓          |  ✓          |  ✓               |
++------------+-------------+-------------+------------------+
+| ENAS[2]    |  ✓          |             |                  |
++------------+-------------+-------------+------------------+
+| DARTS[3]   |  ✓          |             |                  |
++------------+-------------+-------------+------------------+
+
++------------+-------------+-------------+
+| Estimator  | one-shot    | Train       |
++============+=============+=============+
+| Random     |             |  ✓          | 
++------------+-------------+-------------+
+| RL         |             |  ✓          |
++------------+-------------+-------------+
+| GraphNAS[1]|             |  ✓          |
++------------+-------------+-------------+
+| ENAS[2]    |  ✓          |             |
++------------+-------------+-------------+
+| DARTS[3]   |  ✓          |             |
++------------+-------------+-------------+
+
+[1] Gao, Yang, et al. "Graph neural architecture search." IJCAI. Vol. 20. 2020.
+[2] Pham, Hieu, et al. "Efficient neural architecture search via parameters sharing." International Conference on Machine Learning. PMLR, 2018.
+[3] Liu, Hanxiao, Karen Simonyan, and Yiming Yang. "DARTS: Differentiable Architecture Search." International Conference on Learning Representations. 2018.

From 2a168b7f0dcbf5cab3e1c5c5984328de96794900 Mon Sep 17 00:00:00 2001
From: null <null>
Date: Fri, 25 Jun 2021 15:15:00 +0800
Subject: [PATCH 111/144] Fix bugs detected through test for refactoring

Merge from refactor_test branch and already fixed bugs detected through tests for refactor, and temporarily override  method of dataset provided by PyTorch-Geometric to maintain compabillity with PyTorch-Geometric>=1.7.0
---
 autogl/datasets/modelnet.py            |  8 +++++
 autogl/datasets/ogb.py                 | 30 ++++++++++++++++++
 autogl/datasets/pyg.py                 | 44 ++++++++++++++++++++++++++
 autogl/module/model/gcn.py             | 24 ++++++++------
 autogl/module/model/graphsage.py       | 42 ++++++++++++++----------
 autogl/module/train/link_prediction.py |  2 ++
 docs/docfile/tutorial/t_model.rst      |  2 ++
 docs/docfile/tutorial/t_trainer.rst    |  4 ++-
 8 files changed, 129 insertions(+), 27 deletions(-)

diff --git a/autogl/datasets/modelnet.py b/autogl/datasets/modelnet.py
index 67c958f..7348246 100644
--- a/autogl/datasets/modelnet.py
+++ b/autogl/datasets/modelnet.py
@@ -26,6 +26,8 @@ class ModelNet10Train(ModelNet):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(ModelNet10Train, self).get(idx)
 
 
@@ -37,6 +39,8 @@ class ModelNet10Test(ModelNet):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(ModelNet10Test, self).get(idx)
 
 
@@ -48,6 +52,8 @@ class ModelNet40Train(ModelNet):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(ModelNet40Train, self).get(idx)
 
 
@@ -59,4 +65,6 @@ class ModelNet40Test(ModelNet):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(ModelNet40Test, self).get(idx)
diff --git a/autogl/datasets/ogb.py b/autogl/datasets/ogb.py
index a27ea8e..b133b85 100644
--- a/autogl/datasets/ogb.py
+++ b/autogl/datasets/ogb.py
@@ -39,6 +39,8 @@ class OGBNproductsDataset(PygNodePropPredDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(OGBNproductsDataset, self).get(idx)
 
 
@@ -72,6 +74,8 @@ class OGBNproteinsDataset(PygNodePropPredDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(OGBNproteinsDataset, self).get(idx)
 
 
@@ -97,6 +101,8 @@ class OGBNarxivDataset(PygNodePropPredDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(OGBNarxivDataset, self).get(idx)
 
 
@@ -121,6 +127,8 @@ class OGBNpapers100MDataset(PygNodePropPredDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(OGBNpapers100MDataset, self).get(idx)
 
 
@@ -159,6 +167,8 @@ class OGBNmagDataset(PygNodePropPredDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(OGBNmagDataset, self).get(idx)
 
 
@@ -178,6 +188,8 @@ class OGBGmolhivDataset(PygGraphPropPredDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(OGBGmolhivDataset, self).get(idx)
 
 
@@ -194,6 +206,8 @@ class OGBGmolpcbaDataset(PygGraphPropPredDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(OGBGmolpcbaDataset, self).get(idx)
 
 
@@ -210,6 +224,8 @@ class OGBGppaDataset(PygGraphPropPredDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(OGBGppaDataset, self).get(idx)
 
 
@@ -226,6 +242,8 @@ class OGBGcodeDataset(PygGraphPropPredDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(OGBGcodeDataset, self).get(idx)
 
 
@@ -245,6 +263,8 @@ class OGBLppaDataset(PygLinkPropPredDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(OGBLppaDataset, self).get(idx)
 
 
@@ -261,6 +281,8 @@ class OGBLcollabDataset(PygLinkPropPredDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(OGBLcollabDataset, self).get(idx)
 
 
@@ -277,6 +299,8 @@ class OGBLddiDataset(PygLinkPropPredDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(OGBLddiDataset, self).get(idx)
 
 
@@ -293,6 +317,8 @@ class OGBLcitationDataset(PygLinkPropPredDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(OGBLcitationDataset, self).get(idx)
 
 
@@ -309,6 +335,8 @@ class OGBLwikikgDataset(PygLinkPropPredDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(OGBLwikikgDataset, self).get(idx)
 
 
@@ -325,4 +353,6 @@ class OGBLbiokgDataset(PygLinkPropPredDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(OGBLbiokgDataset, self).get(idx)
diff --git a/autogl/datasets/pyg.py b/autogl/datasets/pyg.py
index 22eaf3f..df9f480 100644
--- a/autogl/datasets/pyg.py
+++ b/autogl/datasets/pyg.py
@@ -27,6 +27,8 @@ class AmazonComputersDataset(Amazon):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(AmazonComputersDataset, self).get(idx)
 
 
@@ -41,6 +43,8 @@ class AmazonPhotoDataset(Amazon):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(AmazonPhotoDataset, self).get(idx)
 
 
@@ -55,6 +59,8 @@ class CoauthorPhysicsDataset(Coauthor):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(CoauthorPhysicsDataset, self).get(idx)
 
 
@@ -69,6 +75,8 @@ class CoauthorCSDataset(Coauthor):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(CoauthorCSDataset, self).get(idx)
 
 
@@ -83,6 +91,8 @@ class CoraDataset(Planetoid):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(CoraDataset, self).get(idx)
 
 
@@ -97,6 +107,8 @@ class CiteSeerDataset(Planetoid):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(CiteSeerDataset, self).get(idx)
 
 
@@ -111,6 +123,8 @@ class PubMedDataset(Planetoid):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(PubMedDataset, self).get(idx)
 
 
@@ -125,6 +139,8 @@ class RedditDataset(Reddit):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(RedditDataset, self).get(idx)
 
 
@@ -137,6 +153,8 @@ class FlickrDataset(Flickr):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(FlickrDataset, self).get(idx)
 
 
@@ -151,6 +169,8 @@ class MUTAGDataset(TUDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(MUTAGDataset, self).get(idx)
 
 
@@ -165,6 +185,8 @@ class IMDBBinaryDataset(TUDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(IMDBBinaryDataset, self).get(idx)
 
 
@@ -179,6 +201,8 @@ class IMDBMultiDataset(TUDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(IMDBMultiDataset, self).get(idx)
 
 
@@ -193,6 +217,8 @@ class CollabDataset(TUDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(CollabDataset, self).get(idx)
 
 
@@ -207,6 +233,8 @@ class ProteinsDataset(TUDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(ProteinsDataset, self).get(idx)
 
 
@@ -221,6 +249,8 @@ class REDDITBinary(TUDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(REDDITBinary, self).get(idx)
 
 
@@ -235,6 +265,8 @@ class REDDITMulti5K(TUDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(REDDITMulti5K, self).get(idx)
 
 
@@ -249,6 +281,8 @@ class REDDITMulti12K(TUDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(REDDITMulti12K, self).get(idx)
 
 
@@ -263,6 +297,8 @@ class PTCMRDataset(TUDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(PTCMRDataset, self).get(idx)
 
 
@@ -277,6 +313,8 @@ class NCI1Dataset(TUDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(NCI1Dataset, self).get(idx)
 
 
@@ -291,6 +329,8 @@ class NCI109Dataset(TUDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(NCI109Dataset, self).get(idx)
 
 
@@ -316,6 +356,8 @@ class ENZYMES(TUDataset):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(ENZYMES, self).get(idx)
 
 
@@ -360,4 +402,6 @@ class QM9Dataset(QM9):
     def get(self, idx):
         if hasattr(self, "__data_list__"):
             delattr(self, "__data_list__")
+        if hasattr(self, "_data_list"):
+            delattr(self, "_data_list")
         return super(QM9Dataset, self).get(idx)
diff --git a/autogl/module/model/gcn.py b/autogl/module/model/gcn.py
index bd088be..6862036 100644
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -48,10 +48,10 @@ class GCN(ClassificationSupportedSequentialModel):
             else:
                 self._dropout: _typing.Optional[torch.nn.Dropout] = None
 
-        def forward(self, data) -> torch.Tensor:
+        def forward(self, data, enable_activation: bool = True) -> torch.Tensor:
             x: torch.Tensor = getattr(data, "x")
             edge_index: torch.LongTensor = getattr(data, "edge_index")
-            edge_weight: _typing.Optional[torch.Tensor] = getattr(data, "edge_weight")
+            edge_weight: _typing.Optional[torch.Tensor] = getattr(data, "edge_weight", None)
             """ Validate the arguments """
             if not type(x) == type(edge_index) == torch.Tensor:
                 raise TypeError
@@ -62,7 +62,7 @@ class GCN(ClassificationSupportedSequentialModel):
                 edge_weight: _typing.Optional[torch.Tensor] = None
 
             x: torch.Tensor = self._convolution.forward(x, edge_index, edge_weight)
-            if self._activation_name is not None:
+            if self._activation_name is not None and enable_activation:
                 x: torch.Tensor = activate_func(x, self._activation_name)
             if self._dropout is not None:
                 x: torch.Tensor = self._dropout.forward(x)
@@ -97,9 +97,9 @@ class GCN(ClassificationSupportedSequentialModel):
                 dropout = 0
             if dropout > 1:
                 dropout = 1
-            dropout_list: _typing.Sequence[_typing.Optional[float]] = [
-                dropout for _ in range(len(hidden_features) + 1)
-            ]
+            dropout_list: _typing.Sequence[_typing.Optional[float]] = (
+                [dropout for _ in range(len(hidden_features))] + [None]
+            )
         elif dropout in (None, Ellipsis, ...):
             dropout_list: _typing.Sequence[_typing.Optional[float]] = [
                 None for _ in range(len(hidden_features) + 1)
@@ -224,9 +224,15 @@ class GCN(ClassificationSupportedSequentialModel):
         return torch.nn.functional.log_softmax(x, dim=1)
 
     def lp_encode(self, data):
-        for i in range(len(self.__sequential_encoding_layers) - 1):
-            data.x = self.__sequential_encoding_layers[i](data)
-        return getattr(data, "x")
+        x: torch.Tensor = getattr(data, "x")
+        for i in range(len(self.__sequential_encoding_layers) - 2):
+            x = self.__sequential_encoding_layers[i](
+                autogl.data.Data(x, getattr(data, "edge_index"))
+            )
+        x = self.__sequential_encoding_layers[-2](
+            autogl.data.Data(x, getattr(data, "edge_index")), enable_activation=False
+        )
+        return x
 
     def lp_decode(self, z, pos_edge_index, neg_edge_index):
         edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
diff --git a/autogl/module/model/graphsage.py b/autogl/module/model/graphsage.py
index a990701..727b9d0 100644
--- a/autogl/module/model/graphsage.py
+++ b/autogl/module/model/graphsage.py
@@ -45,14 +45,14 @@ class GraphSAGE(ClassificationSupportedSequentialModel):
             else:
                 self._dropout: _typing.Optional[torch.nn.Dropout] = None
 
-        def forward(self, data) -> torch.Tensor:
+        def forward(self, data, enable_activation: bool = True) -> torch.Tensor:
             x: torch.Tensor = getattr(data, "x")
             edge_index: torch.Tensor = getattr(data, "edge_index")
             if type(x) != torch.Tensor or type(edge_index) != torch.Tensor:
                 raise TypeError
 
             x: torch.Tensor = self._convolution.forward(x, edge_index)
-            if self._activation_name is not None:
+            if self._activation_name is not None and enable_activation:
                 x: torch.Tensor = activate_func(x, self._activation_name)
             if self._dropout is not None:
                 x: torch.Tensor = self._dropout.forward(x)
@@ -85,9 +85,9 @@ class GraphSAGE(ClassificationSupportedSequentialModel):
                     raise TypeError
             _layers_dropout: _typing.Sequence[_typing.Optional[float]] = layers_dropout
         elif layers_dropout is None or type(layers_dropout) == float:
-            _layers_dropout: _typing.Sequence[_typing.Optional[float]] = [
-                layers_dropout for _ in range(len(hidden_features) + 1)
-            ]
+            _layers_dropout: _typing.Sequence[_typing.Optional[float]] = (
+                [layers_dropout for _ in range(len(hidden_features))] + [None]
+            )
         else:
             raise TypeError
         if not type(activation_name) == type(aggr) == str:
@@ -113,15 +113,15 @@ class GraphSAGE(ClassificationSupportedSequentialModel):
                 if i + 1 < len(hidden_features):
                     self.__sequential_encoding_layers.append(
                         self._SAGELayer(
-                            hidden_features[i], hidden_features[i + 1],
-                            aggr, activation_name, _layers_dropout[i + 1]
+                            hidden_features[i], hidden_features[i + 1], aggr,
+                            activation_name, _layers_dropout[i + 1]
                         )
                     )
                 else:
                     self.__sequential_encoding_layers.append(
                         self._SAGELayer(
-                            hidden_features[i], num_classes,
-                            aggr, activation_name, _layers_dropout[i + 1]
+                            hidden_features[i], num_classes, aggr,
+                            _layers_dropout[i + 1]
                         )
                     )
 
@@ -141,23 +141,31 @@ class GraphSAGE(ClassificationSupportedSequentialModel):
             """ Layer-wise encode """
             x: torch.Tensor = getattr(data, "x")
             for i, __edge_index in enumerate(getattr(data, "edge_indexes")):
-                _intermediate_data: autogl.data.Data = autogl.data.Data(
-                    x=x, edge_index=__edge_index
+                x: torch.Tensor = self.__sequential_encoding_layers[i](
+                    autogl.data.Data(x=x, edge_index=__edge_index)
                 )
-                x: torch.Tensor = self.__sequential_encoding_layers[i](_intermediate_data)
             return x
         else:
+            x: torch.Tensor = getattr(data, "x")
             for i in range(len(self.__sequential_encoding_layers)):
-                data.x = self.__sequential_encoding_layers[i](data)
-            return data.x
+                x = self.__sequential_encoding_layers[i](
+                    autogl.data.Data(x, getattr(data, "edge_index"))
+                )
+            return x
 
     def cls_decode(self, x: torch.Tensor) -> torch.Tensor:
         return torch.nn.functional.log_softmax(x, dim=1)
 
     def lp_encode(self, data):
-        for i in range(len(self.__sequential_encoding_layers) - 1):
-            data.x = self.__sequential_encoding_layers[i](data)
-        return getattr(data, "x")
+        x: torch.Tensor = getattr(data, "x")
+        for i in range(len(self.__sequential_encoding_layers) - 2):
+            x = self.__sequential_encoding_layers[i](
+                autogl.data.Data(x, getattr(data, "edge_index"))
+            )
+        x = self.__sequential_encoding_layers[-2](
+            autogl.data.Data(x, getattr(data, "edge_index")), enable_activation=False
+        )
+        return x
 
     def lp_decode(self, z, pos_edge_index, neg_edge_index):
         edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
diff --git a/autogl/module/train/link_prediction.py b/autogl/module/train/link_prediction.py
index 3462797..4020481 100644
--- a/autogl/module/train/link_prediction.py
+++ b/autogl/module/train/link_prediction.py
@@ -267,6 +267,7 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
 
         """
         data = dataset[0]
+        data.edge_index = data.train_pos_edge_index
         self.train_only(data)
         if keep_valid_result:
             self.valid_result = self.predict_only(data)
@@ -309,6 +310,7 @@ class LinkPredictionTrainer(BaseLinkPredictionTrainer):
         The prediction result.
         """
         data = dataset[0]
+        data.edge_index = data.train_pos_edge_index
         data = data.to(self.device)
         if mask in ["train", "val", "test"]:
             pos_edge_index = data[f"{mask}_pos_edge_index"]
diff --git a/docs/docfile/tutorial/t_model.rst b/docs/docfile/tutorial/t_model.rst
index dad0fb9..41f6b8e 100644
--- a/docs/docfile/tutorial/t_model.rst
+++ b/docs/docfile/tutorial/t_model.rst
@@ -18,6 +18,8 @@ Define your own model and automodel
 
 If you want to add your own model and automodel for some task, the only thing you should do is add a new model where the forward function should be fulfilled and a new automodel inherited from the basemodel.
 
+For new models used in link prediction tasks, you should fulfill the lp_encode and lp_decode function. The difference between lp_encode and forward function is that there is not classification layer in lp_encode.
+
 Firstly, you should define your model if it does not belong to the models above.
 
 Secondly, you should define your corresponding automodel.
diff --git a/docs/docfile/tutorial/t_trainer.rst b/docs/docfile/tutorial/t_trainer.rst
index 70dc7b4..da59681 100644
--- a/docs/docfile/tutorial/t_trainer.rst
+++ b/docs/docfile/tutorial/t_trainer.rst
@@ -7,6 +7,8 @@ AutoGL project use ``trainer`` to handle the auto-training of tasks. Currently,
 
 * ``NodeClassificationTrainer`` for semi-supervised node classification
 * ``GraphClassificationTrainer`` for supervised graph classification
+* ``LinkPredictionTrainer`` for link prediction
+
 
 Initialization
 --------------
@@ -41,7 +43,7 @@ Train and Predict
 -----------------
 After initializing a trainer, you can train it on the given datasets.
 
-We have given the training and testing functions for the tasks of node classification and graph classification up to now. You can also create your tasks following the similar patterns with ours. For training, you need to define ``train_only()`` and use it in ``train()``. For testing, you need to define ``predict_proba()`` and use it in ``predict()``.
+We have given the training and testing functions for the tasks of node classification, graph classification, and link prediction up to now. You can also create your tasks following the similar patterns with ours. For training, you need to define ``train_only()`` and use it in ``train()``. For testing, you need to define ``predict_proba()`` and use it in ``predict()``.
 
 The evaluation funtion is defined in ``evaluate()``, you can use your our evaluation metrics and methods.
 

From db4479e0a8cc4149b3d099aa3ee6e2cf96d5248e Mon Sep 17 00:00:00 2001
From: generall <general502570@outlook.com>
Date: Fri, 25 Jun 2021 17:05:23 +0800
Subject: [PATCH 112/144] fix bugs in doc

---
 autogl/module/nas/estimator/one_shot.py |  3 +-
 docs/docfile/tutorial/t_nas.rst         | 60 +++++++++++++------------
 2 files changed, 33 insertions(+), 30 deletions(-)

diff --git a/autogl/module/nas/estimator/one_shot.py b/autogl/module/nas/estimator/one_shot.py
index c6ed2df..206f539 100644
--- a/autogl/module/nas/estimator/one_shot.py
+++ b/autogl/module/nas/estimator/one_shot.py
@@ -20,6 +20,7 @@ class OneShotEstimator(BaseEstimator):
         y = dset.y[getattr(dset, f'{mask}_mask')]
         loss = self.loss_f(pred, y)
         #acc=sum(pred.max(1)[1]==y).item()/y.size(0)
-        probs = F.softmax(pred, dim = 1)
+        probs = F.softmax(pred, dim = 1).cpu().numpy()
+        y = y.cpu()
         metrics = [eva.evaluate(probs, y) for eva in self.evaluation]
         return metrics, loss
diff --git a/docs/docfile/tutorial/t_nas.rst b/docs/docfile/tutorial/t_nas.rst
index 5922e5f..e5770c3 100644
--- a/docs/docfile/tutorial/t_nas.rst
+++ b/docs/docfile/tutorial/t_nas.rst
@@ -36,10 +36,10 @@ Here is an example.
             self.input_dim = input_dim or self.input_dim
             self.output_dim = output_dim or self.output_dim
             # define two layers with order 0 and 1
-            self.layer0 = self.setLayerChoice(order = 0, [gnn_map(op,self.input_dim,self.output_dim)for op in ['gcn', 'gat']])
-            self.layer1 = self.setLayerChoice(order = 1, [gnn_map(op,self.input_dim,self.output_dim)for op in ['gcn', 'gat']])
+            self.layer0 = self.setLayerChoice(0, [gnn_map(op,self.input_dim,self.output_dim)for op in ['gcn', 'gat']])
+            self.layer1 = self.setLayerChoice(1, [gnn_map(op,self.input_dim,self.output_dim)for op in ['gcn', 'gat']])
             # define an input choice two choose from the result of the two layer
-            self.input_layer = self.setInputChoice(order = 2, n_candidates = 2)
+            self.input_layer = self.setInputChoice(2, n_candidates = 2)
 
         # Define the forward process
         def forward(self, data):
@@ -62,7 +62,9 @@ But you can only use sample-based search strategy.
     # For example, create an NAS search space by yourself
     from autogl.module.nas.space.base import BaseSpace, map_nn
     from autogl.module.nas.space.operation import gnn_map
-    from torch_geometric.nn import GATConv, GATv2Conv, SuperGATConv
+    # here we search from three types of graph convolution with `head` as a parameter
+    # we should search `heads` at the same time with the convolution
+    from torch_geometric.nn import GATConv, FeaStConv, TransformerConv
     class YourNonOneShotSpace(BaseSpace):
         # Get essential parameters at initialization
         def __init__(self, input_dim = None, output_dim = None):
@@ -78,7 +80,7 @@ But you can only use sample-based search strategy.
             self.input_dim = input_dim or self.input_dim
             self.output_dim = output_dim or self.output_dim
             # set your choices as LayerChoices
-            self.choice0 = self.setLayerChoice(0, map_nn(["gat", "gatv2", "supergat"]), key="conv")
+            self.choice0 = self.setLayerChoice(0, map_nn(["gat", "feast", "transformer"]), key="conv")
             self.choice1 = self.setLayerChoice(1, map_nn([1, 2, 4, 8]), key="head")
 
         # You do not need to define forward process here
@@ -95,10 +97,10 @@ But you can only use sample-based search strategy.
             self.output_dim = output_dim
             if selection["conv"] == "gat":
                 conv = GATConv
-            elif selection["conv"] == "gatv2":
-                conv = GATv2Conv
-            elif selection["conv"] == "supergat":
-                conv = SuperGATConv
+            elif selection["conv"] == "feast":
+                conv = FeaStConv
+            elif selection["conv"] == "transformer":
+                conv = TransformerConv
             self.layer = conv(input_dim, output_dim, selection["head"])
 
         def forward(self, data):
@@ -136,7 +138,7 @@ Search Strategy
 The space strategy defines how to find an architecture.
 
 Sample-based strategy without weight sharing is simpler than strategies with weight sharing.
-We show how to define your strategy here with Random Search as an example.
+We show how to define your strategy here with DFS as an example.
 If you want to define more complex strategy, you can refer to Darts, Enas or other strategies in NNI.
 
 .. code-block:: python
@@ -168,30 +170,30 @@ If you want to define more complex strategy, you can refer to Darts, Enas or oth
             self.selection_dict=selection_range
                 
             arch_perfs=[]
-            cache={}
-
-            # sample architectures one by one
-            for i in range(self.n_sample):
-                selection=self.sample() 
-                vec=tuple(list(selection.values()))
-                if vec not in cache:
-                    self.arch=space.parse_model(selection,self.device)
-                    metric,loss=self._infer(mask='val')
-                    arch_perfs.append([metric,selection])
-                    cache[vec]=metric
-                
+            # define DFS process
+            self.selection = {}
+            last_k = list(self.selection_dict.keys())[-1]
+            def dfs():
+                for k,v in self.selection_dict.items():
+                    if not k in self.selection:
+                        for i in range(v):
+                            self.selection[k] = i
+                            if k == last_k:
+                                # evaluate an architecture
+                                self.arch=space.parse_model(self.selection,self.device)
+                                metric,loss=self._infer(mask='val')
+                                arch_perfs.append([metric, self.selection.copy()])
+                            else:
+                                dfs()
+                        del self.selection[k]
+                        break
+            dfs()
+
             # get the architecture with the best performance
             selection=arch_perfs[np.argmax([x[0] for x in arch_perfs])][1]
             arch=space.parse_model(selection,self.device)
             return arch 
 
-        # Sample an architecture from the space  
-        def sample(self):
-            selection={}
-            for k,v in self.selection_dict.items():
-                selection[k]=np.random.choice(range(v))
-            return selection
-
 Different search strategies should be combined with different search spaces and estimators in usage.
 
 +------------+-------------+-------------+------------------+

From eb6afdfc17c50fc8687c8fa62da77fc78b201771 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sat, 26 Jun 2021 08:24:04 +0000
Subject: [PATCH 113/144] fix device error

---
 autogl/module/nas/estimator/one_shot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autogl/module/nas/estimator/one_shot.py b/autogl/module/nas/estimator/one_shot.py
index 206f539..21d296c 100644
--- a/autogl/module/nas/estimator/one_shot.py
+++ b/autogl/module/nas/estimator/one_shot.py
@@ -20,7 +20,7 @@ class OneShotEstimator(BaseEstimator):
         y = dset.y[getattr(dset, f'{mask}_mask')]
         loss = self.loss_f(pred, y)
         #acc=sum(pred.max(1)[1]==y).item()/y.size(0)
-        probs = F.softmax(pred, dim = 1).cpu().numpy()
+        probs = F.softmax(pred, dim = 1).detach().cpu().numpy()
         y = y.cpu()
         metrics = [eva.evaluate(probs, y) for eva in self.evaluation]
         return metrics, loss

From ec429ec457c59b11d626b99156fbd134094d4df0 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sat, 26 Jun 2021 10:43:46 +0000
Subject: [PATCH 114/144] rewise document of nas

---
 autogl/datasets/utils.py                     |  4 ++--
 autogl/module/feature/auto_feature.py        |  6 ++----
 autogl/module/feature/generators/eigen.py    |  4 ++--
 autogl/module/feature/generators/graphlet.py |  4 ++--
 autogl/module/feature/subgraph/netlsd.py     |  4 ++--
 docs/docfile/documentation/module.rst        | 14 ++++++++++++++
 docs/docfile/tutorial/t_nas.rst              |  4 ++--
 docs/index.rst                               |  1 +
 8 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/autogl/datasets/utils.py b/autogl/datasets/utils.py
index 4885ea0..d35f887 100644
--- a/autogl/datasets/utils.py
+++ b/autogl/datasets/utils.py
@@ -91,11 +91,11 @@ def random_splits_mask_class(
     num_test=None,
     seed=None,
 ):
-    r"""If the data has masks for train/val/test, return the splits with specific number of samples from every class for training as suggested in Pitfalls of graph neural network evaluation [1]_ for semi-supervised learning.
+    r"""If the data has masks for train/val/test, return the splits with specific number of samples from every class for training as suggested in Pitfalls of graph neural network evaluation [#]_ for semi-supervised learning.
 
     References
     ----------
-    .. [1] Shchur, O., Mumme, M., Bojchevski, A., & Günnemann, S. (2018).
+    .. [#] Shchur, O., Mumme, M., Bojchevski, A., & Günnemann, S. (2018).
         Pitfalls of graph neural network evaluation.
         arXiv preprint arXiv:1811.05868.
 
diff --git a/autogl/module/feature/auto_feature.py b/autogl/module/feature/auto_feature.py
index 345447c..a1f4015 100644
--- a/autogl/module/feature/auto_feature.py
+++ b/autogl/module/feature/auto_feature.py
@@ -120,14 +120,12 @@ class Timer:
 class AutoFeatureEngineer(BaseFeatureEngineer):
     r"""
 
-    Notes
-    -----
-    An implementation of auto feature engineering method Deepgl [1]_ ,which iteratively generates features by aggregating neighbour features
+    An implementation of auto feature engineering method Deepgl [#]_ , which iteratively generates features by aggregating neighbour features
     and select a fixed number of  features to automatically add important graph-aware features.
 
     References
     ----------
-    .. [1] Rossi, R. A., Zhou, R., & Ahmed, N. K. (2020).
+    .. [#] Rossi, R. A., Zhou, R., & Ahmed, N. K. (2020).
         Deep Inductive Graph Representation Learning.
         IEEE Transactions on Knowledge and Data Engineering, 32(3), 438–452.
         https://doi.org/10.1109/TKDE.2018.2878247
diff --git a/autogl/module/feature/generators/eigen.py b/autogl/module/feature/generators/eigen.py
index ca5c311..f135330 100644
--- a/autogl/module/feature/generators/eigen.py
+++ b/autogl/module/feature/generators/eigen.py
@@ -50,11 +50,11 @@ class GeEigen(BaseGenerator):
 
     Notes
     -----
-    An implementation of [1]_
+    An implementation of [#]_
 
     References
     ----------
-    .. [1] Ziwei Zhang, Peng Cui, Jian Pei, Xin Wang, Wenwu Zhu:
+    .. [#] Ziwei Zhang, Peng Cui, Jian Pei, Xin Wang, Wenwu Zhu:
         Eigen-GNN: A Graph Structure Preserving Plug-in for GNNs. CoRR abs/2006.04330 (2020)
         https://arxiv.org/abs/2006.04330
 
diff --git a/autogl/module/feature/generators/graphlet.py b/autogl/module/feature/generators/graphlet.py
index 02fc06c..1dfac40 100644
--- a/autogl/module/feature/generators/graphlet.py
+++ b/autogl/module/feature/generators/graphlet.py
@@ -272,11 +272,11 @@ class Graphlet:
 
 @register_feature("graphlet")
 class GeGraphlet(BaseGenerator):
-    r"""generate local graphlet numbers as features. The implementation refers to [1]_ .
+    r"""generate local graphlet numbers as features. The implementation refers to [#]_ .
 
     References
     ----------
-    .. [1] Ahmed, N. K., Willke, T. L., & Rossi, R. A. (2016).
+    .. [#] Ahmed, N. K., Willke, T. L., & Rossi, R. A. (2016).
         Estimation of local subgraph counts. Proceedings - 2016 IEEE International Conference on Big Data, Big Data 2016, 586–595.
         https://doi.org/10.1109/BigData.2016.7840651
 
diff --git a/autogl/module/feature/subgraph/netlsd.py b/autogl/module/feature/subgraph/netlsd.py
index 7fc83dd..52a946e 100644
--- a/autogl/module/feature/subgraph/netlsd.py
+++ b/autogl/module/feature/subgraph/netlsd.py
@@ -10,11 +10,11 @@ class SgNetLSD(BaseSubgraph):
     r"""
     Notes
     -----
-    a subgraph feature generation method. This is a simple wrapper of NetLSD [1]_.
+    a subgraph feature generation method. This is a simple wrapper of NetLSD [#]_.
 
     References
     ----------
-    .. [1] A. Tsitsulin, D. Mottin, P. Karras, A. Bronstein, and E. Müller, “NetLSD: Hearing the shape of a graph,”
+    .. [#] A. Tsitsulin, D. Mottin, P. Karras, A. Bronstein, and E. Müller, “NetLSD: Hearing the shape of a graph,”
      Proc. ACM SIGKDD Int. Conf. Knowl. Discov. Data Min., pp. 2347–2356, 2018.
 
     """
diff --git a/docs/docfile/documentation/module.rst b/docs/docfile/documentation/module.rst
index 3a90727..719198a 100644
--- a/docs/docfile/documentation/module.rst
+++ b/docs/docfile/documentation/module.rst
@@ -44,6 +44,20 @@ hyper parameter optimization
 .. automodule:: autogl.module.hpo
     :members:
 
+.. _neural architecture search:
+
+neural architecture search
+--------------------------
+
+.. automodule:: autogl.module.nas.algorithm
+    :members:
+
+.. automodule:: autogl.module.nas.space
+    :members:
+
+.. automodule:: autogl.module.nas.estimator
+    :members:
+
 .. _ensemble documentation:
 
 ensemble
diff --git a/docs/docfile/tutorial/t_nas.rst b/docs/docfile/tutorial/t_nas.rst
index e5770c3..6c300d0 100644
--- a/docs/docfile/tutorial/t_nas.rst
+++ b/docs/docfile/tutorial/t_nas.rst
@@ -109,7 +109,7 @@ But you can only use sample-based search strategy.
             return y
 
 Performance Estimator
-------------
+---------------------
 
 The performance estimator estimates the performance of an architecture.
 Here is an example of estimating an architecture without training (used in one-shot space).
@@ -133,7 +133,7 @@ Here is an example of estimating an architecture without training (used in one-s
             return metrics, loss
 
 Search Strategy
-------------
+---------------
 
 The space strategy defines how to find an architecture.
 
diff --git a/docs/index.rst b/docs/index.rst
index 96e6021..3e7e565 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -89,6 +89,7 @@ In AutoGL, the tasks are solved by corresponding learners, which in general do t
    docfile/tutorial/t_model
    docfile/tutorial/t_trainer
    docfile/tutorial/t_hpo
+   docfile/tutorial/t_nas
    docfile/tutorial/t_ensemble
    docfile/tutorial/t_solver
 

From cc9c4a0fddf4232080a34b0fba2b40a3f374bc99 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sat, 26 Jun 2021 10:53:20 +0000
Subject: [PATCH 115/144] change reference style

---
 docs/docfile/tutorial/t_nas.rst | 62 ++++++++++++++++-----------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/docs/docfile/tutorial/t_nas.rst b/docs/docfile/tutorial/t_nas.rst
index 6c300d0..cf65a7d 100644
--- a/docs/docfile/tutorial/t_nas.rst
+++ b/docs/docfile/tutorial/t_nas.rst
@@ -196,34 +196,34 @@ If you want to define more complex strategy, you can refer to Darts, Enas or oth
 
 Different search strategies should be combined with different search spaces and estimators in usage.
 
-+------------+-------------+-------------+------------------+
-| Sapce      | single path | GraphNAS[1] | GraphNAS-macro[1]|
-+============+=============+=============+==================+
-| Random     |  ✓          |  ✓          |  ✓               | 
-+------------+-------------+-------------+------------------+
-| RL         |  ✓          |  ✓          |  ✓               |
-+------------+-------------+-------------+------------------+
-| GraphNAS[1]|  ✓          |  ✓          |  ✓               |
-+------------+-------------+-------------+------------------+
-| ENAS[2]    |  ✓          |             |                  |
-+------------+-------------+-------------+------------------+
-| DARTS[3]   |  ✓          |             |                  |
-+------------+-------------+-------------+------------------+
-
-+------------+-------------+-------------+
-| Estimator  | one-shot    | Train       |
-+============+=============+=============+
-| Random     |             |  ✓          | 
-+------------+-------------+-------------+
-| RL         |             |  ✓          |
-+------------+-------------+-------------+
-| GraphNAS[1]|             |  ✓          |
-+------------+-------------+-------------+
-| ENAS[2]    |  ✓          |             |
-+------------+-------------+-------------+
-| DARTS[3]   |  ✓          |             |
-+------------+-------------+-------------+
-
-[1] Gao, Yang, et al. "Graph neural architecture search." IJCAI. Vol. 20. 2020.
-[2] Pham, Hieu, et al. "Efficient neural architecture search via parameters sharing." International Conference on Machine Learning. PMLR, 2018.
-[3] Liu, Hanxiao, Karen Simonyan, and Yiming Yang. "DARTS: Differentiable Architecture Search." International Conference on Learning Representations. 2018.
++----------------+-------------+-------------+------------------+
+| Sapce          | single path | GraphNAS[1] | GraphNAS-macro[1]|
++================+=============+=============+==================+
+| Random         |  ✓          |  ✓          |  ✓               | 
++----------------+-------------+-------------+------------------+
+| RL             |  ✓          |  ✓          |  ✓               |
++----------------+-------------+-------------+------------------+
+| GraphNAS [1]_  |  ✓          |  ✓          |  ✓               |
++----------------+-------------+-------------+------------------+
+| ENAS [2]_      |  ✓          |             |                  |
++----------------+-------------+-------------+------------------+
+| DARTS [3]_     |  ✓          |             |                  |
++----------------+-------------+-------------+------------------+
+
++----------------+-------------+-------------+
+| Estimator      | one-shot    | Train       |
++================+=============+=============+
+| Random         |             |  ✓          | 
++----------------+-------------+-------------+
+| RL             |             |  ✓          |
++----------------+-------------+-------------+
+| GraphNAS [1]_  |             |  ✓          |
++----------------+-------------+-------------+
+| ENAS [2]_      |  ✓          |             |
++----------------+-------------+-------------+
+| DARTS [3]_     |  ✓          |             |
++----------------+-------------+-------------+
+
+.. [1] Gao, Yang, et al. "Graph neural architecture search." IJCAI. Vol. 20. 2020.
+.. [2] Pham, Hieu, et al. "Efficient neural architecture search via parameters sharing." International Conference on Machine Learning. PMLR, 2018.
+.. [3] Liu, Hanxiao, Karen Simonyan, and Yiming Yang. "DARTS: Differentiable Architecture Search." International Conference on Learning Representations. 2018.

From caf8815d1d9e809c6cf2077bcac7772a63dd8feb Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sun, 27 Jun 2021 02:12:23 +0000
Subject: [PATCH 116/144] change default loss argument to string

---
 autogl/module/nas/estimator/base.py          |  4 ++--
 autogl/module/nas/estimator/one_shot.py      |  2 +-
 autogl/module/nas/estimator/train_scratch.py | 11 ++++++-----
 autogl/solver/classifier/node_classifier.py  | 15 +++++++++------
 4 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/autogl/module/nas/estimator/base.py b/autogl/module/nas/estimator/base.py
index 89d7dfe..b846dc1 100644
--- a/autogl/module/nas/estimator/base.py
+++ b/autogl/module/nas/estimator/base.py
@@ -21,11 +21,11 @@ class BaseEstimator:
     evaluation: list of autogl.module.train.evaluation.Evaluation
         Default evaluation metric
     """
-    def __init__(self, loss_f = F.nll_loss, evaluation = [Acc()]):
+    def __init__(self, loss_f: str = 'nll_loss', evaluation = [Acc()]):
         self.loss_f = loss_f
         self.evaluation = evaluation
 
-    def setLossFunction(self, loss_f):
+    def setLossFunction(self, loss_f: str):
         self.loss_f = loss_f
     
     def setEvaluation(self, evaluation):
diff --git a/autogl/module/nas/estimator/one_shot.py b/autogl/module/nas/estimator/one_shot.py
index 21d296c..5a49dca 100644
--- a/autogl/module/nas/estimator/one_shot.py
+++ b/autogl/module/nas/estimator/one_shot.py
@@ -18,7 +18,7 @@ class OneShotEstimator(BaseEstimator):
         dset = dataset[0].to(device)
         pred = model(dset)[getattr(dset, f"{mask}_mask")]
         y = dset.y[getattr(dset, f'{mask}_mask')]
-        loss = self.loss_f(pred, y)
+        loss = getattr(F, self.loss_f)(pred, y)
         #acc=sum(pred.max(1)[1]==y).item()/y.size(0)
         probs = F.softmax(pred, dim = 1).detach().cpu().numpy()
         y = y.cpu()
diff --git a/autogl/module/nas/estimator/train_scratch.py b/autogl/module/nas/estimator/train_scratch.py
index 160f9bd..6767ca3 100644
--- a/autogl/module/nas/estimator/train_scratch.py
+++ b/autogl/module/nas/estimator/train_scratch.py
@@ -7,15 +7,16 @@ from .base import BaseEstimator
 from .one_shot import OneShotEstimator
 import torch
 
-from autogl.module.train import NodeClassificationFullTrainer
+from autogl.module.train import NodeClassificationFullTrainer, Acc
 
 @register_nas_estimator("scratch")
 class TrainEstimator(BaseEstimator):
     """
     An estimator which trans from scratch
     """
-    def __init__(self):
-        self.estimator=OneShotEstimator()
+    def __init__(self, loss_f = "nll_loss", evaluation = [Acc()]):
+        super().__init__(loss_f, evaluation)
+        self.estimator=OneShotEstimator(self.loss_f, self.evaluation)
 
     def infer(self, model: BaseSpace, dataset, mask="train"):
         # self.trainer.model=model
@@ -30,8 +31,8 @@ class TrainEstimator(BaseEstimator):
                     weight_decay=5e-4,
                     device="auto",
                     init=False,
-                    feval=['acc'],
-                    loss="nll_loss",
+                    feval=self.evaluation,
+                    loss=self.loss_f,
                     lr_scheduler_type=None)
         self.trainer.train(dataset)
         with torch.no_grad():
diff --git a/autogl/solver/classifier/node_classifier.py b/autogl/solver/classifier/node_classifier.py
index aab2f86..da676bc 100644
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -7,6 +7,7 @@ import json
 from copy import deepcopy
 
 import torch
+import torch.nn.functional as F
 import numpy as np
 import yaml
 
@@ -18,7 +19,7 @@ from ...module.train import TRAINER_DICT, BaseNodeClassificationTrainer
 from ...module.train import get_feval
 from ...module.nas.space import NAS_SPACE_DICT
 from ...module.nas.algorithm import NAS_ALGO_DICT
-from ...module.nas.estimator import NAS_ESTIMATOR_DICT
+from ...module.nas.estimator import NAS_ESTIMATOR_DICT, BaseEstimator
 from ..utils import LeaderBoard, set_seed
 from ...datasets import utils
 from ...utils import get_logger
@@ -218,9 +219,11 @@ class AutoNodeClassifier(BaseClassifier):
         for algo, space, estimator in zip(
             self.nas_algorithms, self.nas_spaces, self.nas_estimators
         ):
-            # TODO: initialize important parameters
+            estimator: BaseEstimator
             algo.to(device)
             space.instantiate(input_dim=num_features, output_dim=num_classes)
+            estimator.setEvaluation(feval)
+            estimator.setLossFunction(loss)
 
     # pylint: disable=arguments-differ
     def fit(
@@ -349,7 +352,7 @@ class AutoNodeClassifier(BaseClassifier):
             num_classes=dataset.num_classes,
             feval=evaluator_list,
             device=self.runtime_device,
-            loss="cross_entropy" if not hasattr(dataset, "loss") else dataset.loss,
+            loss="nll_loss" if not hasattr(dataset, "loss") else dataset.loss,
         )
 
         if self.nas_algorithms is not None:
@@ -359,7 +362,7 @@ class AutoNodeClassifier(BaseClassifier):
                 num_classes=self.dataset.num_classes,
                 feval=evaluator_list,
                 device=self.runtime_device,
-                loss="cross_entropy" if not hasattr(dataset, "loss") else dataset.loss,
+                loss="nll_loss" if not hasattr(dataset, "loss") else dataset.loss,
             )
 
             assert not isinstance(self._default_trainer, list) or len(self.nas_algorithms) == len(self._default_trainer) - len(self.graph_model_list), "length of default trainer should match total graph models and nas models passed"
@@ -381,7 +384,7 @@ class AutoNodeClassifier(BaseClassifier):
                         model=model,
                         num_features=self.dataset[0].x.shape[1],
                         num_classes=self.dataset.num_classes,
-                        loss="cross_entropy" if not hasattr(dataset, "loss") else dataset.loss,
+                        loss="nll_loss" if not hasattr(dataset, "loss") else dataset.loss,
                         feval=evaluator_list,
                         device=self.runtime_device,
                         init=False,
@@ -392,7 +395,7 @@ class AutoNodeClassifier(BaseClassifier):
                     trainer.update_parameters(
                         num_classes=self.dataset.num_classes,
                         num_features=self.dataset[0].x.shape[1],
-                        loss="cross_entropy" if not hasattr(dataset, "loss") else dataset.loss,
+                        loss="nll_loss" if not hasattr(dataset, "loss") else dataset.loss,
                         feval=evaluator_list,
                         device=self.runtime_device,
                     

From 68f61e018164c8a957bd52b9416e63776957e596 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sun, 27 Jun 2021 02:14:12 +0000
Subject: [PATCH 117/144] fix bug in doc

---
 docs/docfile/tutorial/t_nas.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/docfile/tutorial/t_nas.rst b/docs/docfile/tutorial/t_nas.rst
index cf65a7d..7d60674 100644
--- a/docs/docfile/tutorial/t_nas.rst
+++ b/docs/docfile/tutorial/t_nas.rst
@@ -127,7 +127,7 @@ Here is an example of estimating an architecture without training (used in one-s
             pred = model(dset)[getattr(dset, f"{mask}_mask")]
             y = dset.y[getattr(dset, f'{mask}_mask')]
             # Use default loss function and metrics to evaluate the architecture
-            loss = self.loss_f(pred, y)
+            loss = getattr(F, self.loss_f)(pred, y)
             probs = F.softmax(pred, dim = 1)
             metrics = [eva.evaluate(probs, y) for eva in self.evaluation]
             return metrics, loss

From 55c3c6bd552b1c1baeac4907c04d50b1e3a3139c Mon Sep 17 00:00:00 2001
From: generall <general502570@outlook.com>
Date: Mon, 28 Jun 2021 14:41:12 +0800
Subject: [PATCH 118/144] add exmaple

---
 autogl/module/nas/space/base.py         |  4 ++-
 configs/nodeclf_nas_darts_benchmark.yml | 42 +++++++++++++++++++++++++
 configs/nodeclf_nas_enas_benchmark.yml  | 42 +++++++++++++++++++++++++
 configs/nodeclf_nas_macro_benchmark.yml | 40 +++++++++++++++++++++++
 examples/test_nas.py                    | 22 +++++++++++++
 5 files changed, 149 insertions(+), 1 deletion(-)
 create mode 100644 configs/nodeclf_nas_darts_benchmark.yml
 create mode 100644 configs/nodeclf_nas_enas_benchmark.yml
 create mode 100644 configs/nodeclf_nas_macro_benchmark.yml
 create mode 100644 examples/test_nas.py

diff --git a/autogl/module/nas/space/base.py b/autogl/module/nas/space/base.py
index a4c247c..433157d 100644
--- a/autogl/module/nas/space/base.py
+++ b/autogl/module/nas/space/base.py
@@ -89,6 +89,7 @@ class BoxModel(BaseModel):
         self.num_classes = self._model.output_dim
         self.params = {"num_class": self.num_classes, "features_num": self.num_features}
         self.device = device
+        self.selection = None
 
     def fix(self, selection):
         """
@@ -119,7 +120,8 @@ class BoxModel(BaseModel):
 
         ret_self = deepcopy(self)
         ret_self._model.instantiate()
-        apply_fixed_architecture(ret_self._model, ret_self.selection, verbose=False)
+        if ret_self.selection:
+            apply_fixed_architecture(ret_self._model, ret_self.selection, verbose=False)
         ret_self.to(self.device)
         return ret_self
 
diff --git a/configs/nodeclf_nas_darts_benchmark.yml b/configs/nodeclf_nas_darts_benchmark.yml
new file mode 100644
index 0000000..154bffa
--- /dev/null
+++ b/configs/nodeclf_nas_darts_benchmark.yml
@@ -0,0 +1,42 @@
+ensemble:
+  name: null
+feature:
+- name: PYGNormalizeFeatures
+hpo:
+  max_evals: 10
+  name: random
+nas:
+  space:
+    name: singlepath
+    hidden_dim: 64
+    layer_number: 2
+    dropout: 0.8
+    ops: ['gcn', 'gat', 'linear'] 
+  algorithm:
+    name: darts
+    num_epochs: 200
+  estimator:
+    name: oneshot
+models: []
+trainer:
+  hp_space:
+  - maxValue: 300
+    minValue: 100
+    parameterName: max_epoch
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 30
+    minValue: 10
+    parameterName: early_stopping_round
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 0.05
+    minValue: 0.01
+    parameterName: lr
+    scalingType: LOG
+    type: DOUBLE
+  - maxValue: 0.0005
+    minValue: 5.0e-05
+    parameterName: weight_decay
+    scalingType: LOG
+    type: DOUBLE
diff --git a/configs/nodeclf_nas_enas_benchmark.yml b/configs/nodeclf_nas_enas_benchmark.yml
new file mode 100644
index 0000000..59d0767
--- /dev/null
+++ b/configs/nodeclf_nas_enas_benchmark.yml
@@ -0,0 +1,42 @@
+ensemble:
+  name: null
+feature:
+- name: PYGNormalizeFeatures
+hpo:
+  max_evals: 10
+  name: random
+nas:
+  space:
+    name: singlepath
+    hidden_dim: 64
+    layer_number: 2
+    dropout: 0.8
+    ops: ['gcn', 'gat', 'linear'] 
+  algorithm:
+    name: enas
+    num_epochs: 200
+  estimator:
+    name: oneshot
+models: []
+trainer:
+  hp_space:
+  - maxValue: 300
+    minValue: 100
+    parameterName: max_epoch
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 30
+    minValue: 10
+    parameterName: early_stopping_round
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 0.05
+    minValue: 0.01
+    parameterName: lr
+    scalingType: LOG
+    type: DOUBLE
+  - maxValue: 0.0005
+    minValue: 5.0e-05
+    parameterName: weight_decay
+    scalingType: LOG
+    type: DOUBLE
diff --git a/configs/nodeclf_nas_macro_benchmark.yml b/configs/nodeclf_nas_macro_benchmark.yml
new file mode 100644
index 0000000..f6e9fc2
--- /dev/null
+++ b/configs/nodeclf_nas_macro_benchmark.yml
@@ -0,0 +1,40 @@
+ensemble:
+  name: null
+feature:
+- name: PYGNormalizeFeatures
+hpo:
+  max_evals: 10
+  name: random
+nas:
+  space:
+    name: graphnasmacro
+    hidden_dim: 64
+    layer_number: 2
+  algorithm:
+    name: rl
+    num_epochs: 200
+  estimator:
+    name: scratch
+models: []
+trainer:
+  hp_space:
+  - maxValue: 300
+    minValue: 100
+    parameterName: max_epoch
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 30
+    minValue: 10
+    parameterName: early_stopping_round
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 0.05
+    minValue: 0.01
+    parameterName: lr
+    scalingType: LOG
+    type: DOUBLE
+  - maxValue: 0.0005
+    minValue: 5.0e-05
+    parameterName: weight_decay
+    scalingType: LOG
+    type: DOUBLE
diff --git a/examples/test_nas.py b/examples/test_nas.py
new file mode 100644
index 0000000..57dad32
--- /dev/null
+++ b/examples/test_nas.py
@@ -0,0 +1,22 @@
+import sys
+sys.path.append('../')
+from autogl.datasets import build_dataset_from_name
+from autogl.solver import AutoNodeClassifier
+from autogl.module.train import Acc
+from autogl.solver.utils import set_seed
+import argparse
+
+if __name__ == '__main__':
+    set_seed(202106)
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--config', type=str, default='../configs/nodeclf_nas_darts_benchmark.yml')
+    parser.add_argument('--dataset', choices=['cora', 'citeseer', 'pubmed'], default='cora', type=str)
+
+    args = parser.parse_args()
+
+    dataset = build_dataset_from_name('cora')
+    solver = AutoNodeClassifier.from_config(args.config)
+    solver.fit(dataset)
+    solver.get_leaderboard().show()
+    out = solver.predict_proba()
+    print('acc on dataset', Acc.evaluate(out, dataset[0].y[dataset[0].test_mask].detach().numpy()))

From 342849cfba8bcd3045a9d62015f8b55f8b42db70 Mon Sep 17 00:00:00 2001
From: CoreLeader <Core-Leader@outlook.com>
Date: Wed, 30 Jun 2021 15:32:00 +0800
Subject: [PATCH 119/144] Revise example and documents for sampling

---
 autogl/module/train/__init__.py               |   3 +
 .../node_classification_sampled_trainer.py    | 371 +++++++++++++++++-
 .../sampling/sampler/graphsaint_sampler.py    |  88 +++++
 .../layer_dependent_importance_sampler.py     |  63 ++-
 .../sampling/sampler/neighbor_sampler.py      |  53 +++
 .../sampler/target_dependant_sampler.py       |  97 ++++-
 configs/nodeclf_ladies_reproduction.yml       |  78 ++++
 examples/node_classification.py               |  11 +-
 examples/nodeclf_reproducing_ladies.py        |  64 +++
 9 files changed, 779 insertions(+), 49 deletions(-)
 create mode 100644 configs/nodeclf_ladies_reproduction.yml
 create mode 100644 examples/nodeclf_reproducing_ladies.py

diff --git a/autogl/module/train/__init__.py b/autogl/module/train/__init__.py
index 33fa5ef..cacdc8d 100644
--- a/autogl/module/train/__init__.py
+++ b/autogl/module/train/__init__.py
@@ -36,6 +36,9 @@ __all__ = [
     "BaseLinkPredictionTrainer",
     "GraphClassificationFullTrainer",
     "NodeClassificationFullTrainer",
+    "NodeClassificationGraphSAINTTrainer",
+    "NodeClassificationLayerDependentImportanceSamplingTrainer",
+    "NodeClassificationNeighborSamplingTrainer",
     "LinkPredictionTrainer",
     "Acc",
     "Auc",
diff --git a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
index dac3792..9ccdfce 100644
--- a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
+++ b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
@@ -61,6 +61,34 @@ class _DeterministicNeighborSamplerStore:
 
 @register_trainer("NodeClassificationGraphSAINTTrainer")
 class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
+    """
+    The node classification trainer utilizing GraphSAINT technique.
+
+    Parameters
+    ------------
+    model: ``BaseModel`` or ``str``
+        The name or class of model adopted
+    num_features: ``int``
+        number of features for each node provided by dataset
+    num_classes: ``int``
+        number of classes to classify
+    optimizer: ``Optimizer`` of ``str``
+        The (name of) optimizer used to train and predict.
+    lr: ``float``
+        The learning rate of link prediction task.
+    max_epoch: ``int``
+        The max number of epochs in training.
+    early_stopping_round: ``int``
+        The round of early stop.
+    weight_decay: ``float``
+        The weight decay argument for optimizer
+    device: ``torch.device`` or ``str``
+        The device where model will be running on.
+    init: ``bool``
+        If True(False), the model will (not) be initialized.
+    feval: ``str``.
+        The evaluation method adopted in this function.
+    """
     def __init__(
             self,
             model: _typing.Union[BaseModel, str],
@@ -359,10 +387,21 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
     ):
         """
         The function of predicting the probability on the given dataset.
-        :param dataset: The node classification dataset used to be predicted.
-        :param mask:
-        :param in_log_format:
-        :return:
+
+        Parameters
+        ----------
+        dataset:
+            The dataset containing conventional data of integral graph
+            adopted to train for node classification.
+        mask: .
+            The `str` of ``train``, ``val``, or ``test``,
+            representing the identifier for specific dataset mask.
+        in_log_format: ``bool``.
+            If True(False), the probability will (not) be log format.
+
+        Returns
+        -------
+        The prediction result.
         """
         data = dataset[0].to(torch.device("cpu"))
         if mask is not None and type(mask) == str:
@@ -380,6 +419,22 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         return result if in_log_format else torch.exp(result)
 
     def predict(self, dataset, mask: _typing.Optional[str] = None) -> torch.Tensor:
+        """
+        The function of predicting on the given dataset.
+
+        Parameters
+        ----------
+        dataset:
+            The dataset containing conventional data of integral graph
+            adopted to train for node classification.
+        mask: .
+            The `str` of ``train``, ``val``, or ``test``,
+            representing the identifier for specific dataset mask.
+
+        Returns
+        -------
+        The prediction result of ``predict_proba``.
+        """
         return self.predict_proba(dataset, mask, in_log_format=True).max(1)[1]
 
     def evaluate(
@@ -390,6 +445,24 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
                 None, _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
             ] = None,
     ) -> _typing.Sequence[float]:
+        """
+        The function of training on the given dataset and keeping valid result.
+
+        Parameters
+        ------------
+        dataset:
+            The dataset containing conventional data of integral graph
+            adopted to train for node classification.
+        mask: .
+            The `str` of ``train``, ``val``, or ``test``,
+            representing the identifier for specific dataset mask.
+        feval: ``str``.
+            The evaluation method adopted in this function.
+
+        Returns
+        -------
+        result: The evaluation result on the given dataset.
+        """
         data = dataset[0]
         data = data.to(self.device)
         if feval is None:
@@ -447,8 +520,18 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
     def train(self, dataset, keep_valid_result: bool = True):
         """
         The function of training on the given dataset and keeping valid result.
-        :param dataset:
-        :param keep_valid_result: Whether to save the validation result after training
+
+        Parameters
+        ------------
+        dataset:
+            The dataset containing conventional data of integral graph
+            adopted to train for node classification.
+        keep_valid_result: ``bool``
+            If True(False), save the validation result after training.
+
+        Returns
+        --------
+        None
         """
         import gc
         gc.collect()
@@ -472,6 +555,19 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         _typing.Tuple[float, bool],
         _typing.Tuple[_typing.Sequence[float], _typing.Sequence[bool]]
     ]:
+        """
+        The function of getting the valid score.
+
+        Parameters
+        ----------
+        return_major: ``bool``.
+            If True, the return only consists of the major result.
+            If False, the return consists of the all results.
+
+        Returns
+        -------
+        result: The valid score.
+        """
         if return_major:
             return self._valid_score[0], self.feval[0].is_higher_better()
         else:
@@ -509,6 +605,21 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
             hp: _typing.Dict[str, _typing.Any],
             model: _typing.Optional[BaseModel] = None,
     ) -> "NodeClassificationGraphSAINTTrainer":
+        """
+        The function of duplicating a new instance from the given hyper-parameter.
+
+        Parameters
+        ------------
+        hp: ``dict``.
+            The hyper-parameter settings for the new instance.
+        model: ``BaseModel``
+            The name or class of model adopted
+
+        Returns
+        --------
+        instance: ``NodeClassificationGraphSAINTTrainer``
+            A new instance of trainer.
+        """
         if model is None or not isinstance(model, BaseModel):
             model: BaseModel = self.model
         model = model.from_hyper_parameter(
@@ -536,6 +647,34 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
 
 @register_trainer("NodeClassificationLayerDependentImportanceSamplingTrainer")
 class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassificationTrainer):
+    """
+    The node classification trainer utilizing Layer dependent importance sampling technique.
+
+    Parameters
+    ------------
+    model: ``BaseModel`` or ``str``
+        The name or class of model adopted
+    num_features: ``int``
+        number of features for each node provided by dataset
+    num_classes: ``int``
+        number of classes to classify
+    optimizer: ``Optimizer`` of ``str``
+        The (name of) optimizer used to train and predict.
+    lr: ``float``
+        The learning rate of link prediction task.
+    max_epoch: ``int``
+        The max number of epochs in training.
+    early_stopping_round: ``int``
+        The round of early stop.
+    weight_decay: ``float``
+        The weight decay argument for optimizer
+    device: ``torch.device`` or ``str``
+        The device where model will be running on.
+    init: ``bool``
+        If True(False), the model will (not) be initialized.
+    feval: ``str``.
+        The evaluation method adopted in this function.
+    """
     def __init__(
             self,
             model: _typing.Union[BaseModel, str],
@@ -912,10 +1051,21 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
     ):
         """
         The function of predicting the probability on the given dataset.
-        :param dataset: The node classification dataset used to be predicted.
-        :param mask:
-        :param in_log_format:
-        :return:
+
+        Parameters
+        ----------
+        dataset:
+            The dataset containing conventional data of integral graph
+            adopted to train for node classification.
+        mask: .
+            The `str` of ``train``, ``val``, or ``test``,
+            representing the identifier for specific dataset mask.
+        in_log_format: ``bool``.
+            If True(False), the probability will (not) be log format.
+
+        Returns
+        -------
+        The prediction result.
         """
         data = dataset[0].to(torch.device("cpu"))
         if mask is not None and type(mask) == str:
@@ -933,6 +1083,22 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
         return result if in_log_format else torch.exp(result)
 
     def predict(self, dataset, mask: _typing.Optional[str] = None) -> torch.Tensor:
+        """
+        The function of predicting on the given dataset.
+
+        Parameters
+        ----------
+        dataset:
+            The dataset containing conventional data of integral graph
+            adopted to train for node classification.
+        mask: .
+            The `str` of ``train``, ``val``, or ``test``,
+            representing the identifier for specific dataset mask.
+
+        Returns
+        -------
+        The prediction result of ``predict_proba``.
+        """
         return self.predict_proba(dataset, mask, in_log_format=True).max(1)[1]
 
     def evaluate(
@@ -943,6 +1109,24 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
                 None, _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
             ] = None,
     ) -> _typing.Sequence[float]:
+        """
+        The function of training on the given dataset and keeping valid result.
+
+        Parameters
+        ------------
+        dataset:
+            The dataset containing conventional data of integral graph
+            adopted to train for node classification.
+        mask: .
+            The `str` of ``train``, ``val``, or ``test``,
+            representing the identifier for specific dataset mask.
+        feval: ``str``.
+            The evaluation method adopted in this function.
+
+        Returns
+        -------
+        result: The evaluation result on the given dataset.
+        """
         data = dataset[0]
         data = data.to(self.device)
         if feval is None:
@@ -973,8 +1157,18 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
     def train(self, dataset, keep_valid_result: bool = True):
         """
         The function of training on the given dataset and keeping valid result.
-        :param dataset:
-        :param keep_valid_result: Whether to save the validation result after training
+
+        Parameters
+        ------------
+        dataset:
+            The dataset containing conventional data of integral graph
+            adopted to train for node classification.
+        keep_valid_result: ``bool``
+            If True(False), save the validation result after training.
+
+        Returns
+        --------
+        None
         """
         import gc
         gc.collect()
@@ -999,6 +1193,19 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
         _typing.Tuple[float, bool],
         _typing.Tuple[_typing.Sequence[float], _typing.Sequence[bool]]
     ]:
+        """
+        The function of getting the valid score.
+
+        Parameters
+        ----------
+        return_major: ``bool``.
+            If True, the return only consists of the major result.
+            If False, the return consists of the all results.
+
+        Returns
+        -------
+        result: The valid score.
+        """
         if return_major:
             return self._valid_score[0], self.feval[0].is_higher_better()
         else:
@@ -1035,6 +1242,21 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
             hp: _typing.Dict[str, _typing.Any],
             model: _typing.Optional[BaseModel] = None,
     ) -> "NodeClassificationLayerDependentImportanceSamplingTrainer":
+        """
+        The function of duplicating a new instance from the given hyper-parameter.
+
+        Parameters
+        ------------
+        hp: ``dict``.
+            The hyper-parameter settings for the new instance.
+        model: ``BaseModel``
+            The name or class of model adopted
+
+        Returns
+        --------
+        instance: ``NodeClassificationLayerDependentImportanceSamplingTrainer``
+            A new instance of trainer.
+        """
         if model is None or not isinstance(model, BaseModel):
             model: BaseModel = self.model
         model = model.from_hyper_parameter(
@@ -1060,8 +1282,36 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
         )
 
 
-@register_trainer("NodeClassificationNeighborSampling")
+@register_trainer("NodeClassificationNeighborSamplingTrainer")
 class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
+    """
+    The node classification trainer utilizing Layer dependent importance sampling technique.
+
+    Parameters
+    ------------
+    model: ``BaseModel`` or ``str``
+        The name or class of model adopted
+    num_features: ``int``
+        number of features for each node provided by dataset
+    num_classes: ``int``
+        number of classes to classify
+    optimizer: ``Optimizer`` of ``str``
+        The (name of) optimizer used to train and predict.
+    lr: ``float``
+        The learning rate of link prediction task.
+    max_epoch: ``int``
+        The max number of epochs in training.
+    early_stopping_round: ``int``
+        The round of early stop.
+    weight_decay: ``float``
+        The weight decay argument for optimizer
+    device: ``torch.device`` or ``str``
+        The device where model will be running on.
+    init: ``bool``
+        If True(False), the model will (not) be initialized.
+    feval: ``str``.
+        The evaluation method adopted in this function.
+    """
     def __init__(
             self,
             model: _typing.Union[BaseModel, str],
@@ -1422,10 +1672,21 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
     ):
         """
         The function of predicting the probability on the given dataset.
-        :param dataset: The node classification dataset used to be predicted.
-        :param mask:
-        :param in_log_format:
-        :return:
+
+        Parameters
+        ----------
+        dataset:
+            The dataset containing conventional data of integral graph
+            adopted to train for node classification.
+        mask: .
+            The `str` of ``train``, ``val``, or ``test``,
+            representing the identifier for specific dataset mask.
+        in_log_format: ``bool``.
+            If True(False), the probability will (not) be log format.
+
+        Returns
+        -------
+        The prediction result.
         """
         data = dataset[0].to(torch.device("cpu"))
         if mask is not None and type(mask) == str:
@@ -1443,6 +1704,22 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         return result if in_log_format else torch.exp(result)
 
     def predict(self, dataset, mask: _typing.Optional[str] = None) -> torch.Tensor:
+        """
+        The function of predicting on the given dataset.
+
+        Parameters
+        ----------
+        dataset:
+            The dataset containing conventional data of integral graph
+            adopted to train for node classification.
+        mask: .
+            The `str` of ``train``, ``val``, or ``test``,
+            representing the identifier for specific dataset mask.
+
+        Returns
+        -------
+        The prediction result of ``predict_proba``.
+        """
         return self.predict_proba(dataset, mask, in_log_format=True).max(1)[1]
 
     def evaluate(
@@ -1453,6 +1730,24 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
                 None, _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
             ] = None,
     ) -> _typing.Sequence[float]:
+        """
+        The function of training on the given dataset and keeping valid result.
+
+        Parameters
+        ------------
+        dataset:
+            The dataset containing conventional data of integral graph
+            adopted to train for node classification.
+        mask: .
+            The `str` of ``train``, ``val``, or ``test``,
+            representing the identifier for specific dataset mask.
+        feval: ``str``.
+            The evaluation method adopted in this function.
+
+        Returns
+        -------
+        result: The evaluation result on the given dataset.
+        """
         data = dataset[0]
         data = data.to(self.device)
         if feval is None:
@@ -1486,8 +1781,18 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
     def train(self, dataset, keep_valid_result: bool = True):
         """
         The function of training on the given dataset and keeping valid result.
-        :param dataset:
-        :param keep_valid_result: Whether to save the validation result after training
+
+        Parameters
+        ------------
+        dataset:
+            The dataset containing conventional data of integral graph
+            adopted to train for node classification.
+        keep_valid_result: ``bool``
+            If True(False), save the validation result after training.
+
+        Returns
+        --------
+        None
         """
         import gc
         gc.collect()
@@ -1512,6 +1817,19 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         _typing.Tuple[float, bool],
         _typing.Tuple[_typing.Sequence[float], _typing.Sequence[bool]]
     ]:
+        """
+        The function of getting the valid score.
+
+        Parameters
+        ----------
+        return_major: ``bool``.
+            If True, the return only consists of the major result.
+            If False, the return consists of the all results.
+
+        Returns
+        -------
+        result: The valid score.
+        """
         if return_major:
             return self._valid_score[0], self.feval[0].is_higher_better()
         else:
@@ -1548,6 +1866,21 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             hp: _typing.Dict[str, _typing.Any],
             model: _typing.Optional[BaseModel] = None,
     ) -> "NodeClassificationNeighborSamplingTrainer":
+        """
+        The function of duplicating a new instance from the given hyper-parameter.
+
+        Parameters
+        ------------
+        hp: ``dict``.
+            The hyper-parameter settings for the new instance.
+        model: ``BaseModel``
+            The name or class of model adopted
+
+        Returns
+        --------
+        instance: ``NodeClassificationLayerDependentImportanceSamplingTrainer``
+            A new instance of trainer.
+        """
         if model is None or not isinstance(model, BaseModel):
             model: BaseModel = self.model
         model = model.from_hyper_parameter(
diff --git a/autogl/module/train/sampling/sampler/graphsaint_sampler.py b/autogl/module/train/sampling/sampler/graphsaint_sampler.py
index 19936c0..8948990 100644
--- a/autogl/module/train/sampling/sampler/graphsaint_sampler.py
+++ b/autogl/module/train/sampling/sampler/graphsaint_sampler.py
@@ -2,11 +2,51 @@ import torch_geometric
 
 
 class GraphSAINTSamplerFactory:
+    """
+    A simple factory class for creating varieties of
+    :class:`torch_geometric.data.GraphSAINTSampler`.
+    There exists potential sampling performance issues for
+    the implementation of :class:`torch_geometric.data.GraphSAINTEdgeSampler`
+    provided by PyTorch Geometric. Considering that the ultimate performance of
+    GraphSAINT Edge Sampler and GraphSAINT Random Walk Sampler are similar
+    according to the original literature
+    `"GraphSAINT: Graph Sampling Based Inductive Learning Method"
+    <https://arxiv.org/abs/1907.04931>`_ which introduces the GraphSAINT approach,
+    nevertheless, when the walk length for GraphSAINT Random Walk Sampler is specified as `2`,
+    the Random walk operation is actually selecting edges.
+    Therefore an effective implementation for GraphSAINT Edge Sampler is not very urgently needed.
+    Meanwhile, the varieties of Subgraph-wise sampling is scheduled to be redesigned and refactored.
+    With the aim of abstracting a unified sampling module for representative mainstream varieties of
+    Node-wise Sampling, Layer-wise Sampling, and Subgraph-wise Sampling.
+    """
     @classmethod
     def create_node_sampler(
             cls, data, num_graphs_per_epoch: int, node_budget: int,
             sample_coverage_factor: int = 50, **kwargs
     ) -> torch_geometric.data.GraphSAINTNodeSampler:
+        """
+        A simple static method for instantiating :class:`torch_geometric.data.GraphSAINTNodeSampler`
+        with more explicit arguments.
+
+        Arguments
+        ------------
+        data:
+            The conventional data of integral graph for sampling.
+        num_graphs_per_epoch:
+            number of subgraphs to sampler per epoch.
+        node_budget:
+            budget of nodes to sample for one sampled subgraph.
+        sample_coverage_factor:
+            The average number of samples per node should be used to
+            compute normalization statistics.
+        **kwargs:
+            Additional optional arguments of :class:`torch.utils.data.DataLoader`,
+            including :obj:`batch_size` or :obj:`num_workers`.
+
+        Returns
+        --------
+        Instance of :class:`torch_geometric.data.GraphSAINTNodeSampler`.
+        """
         return torch_geometric.data.GraphSAINTNodeSampler(
             data, node_budget,
             num_graphs_per_epoch, sample_coverage_factor, log=False, **kwargs
@@ -17,6 +57,29 @@ class GraphSAINTSamplerFactory:
             cls, data, num_graphs_per_epoch: int, edge_budget: int,
             sample_coverage_factor: int = 50, **kwargs
     ) -> torch_geometric.data.GraphSAINTEdgeSampler:
+        """
+        A simple static method for instantiating :class:`torch_geometric.data.GraphSAINTEdgeSampler`
+        with more explicit arguments.
+
+        Arguments
+        ------------
+        data:
+            The conventional data of integral graph for sampling.
+        num_graphs_per_epoch:
+            number of subgraphs to sampler per epoch.
+        edge_budget:
+            budget of edges to sample for one sampled subgraph.
+        sample_coverage_factor:
+            The average number of samples per node should be used to
+            compute normalization statistics.
+        **kwargs:
+            Additional optional arguments of :class:`torch.utils.data.DataLoader`,
+            including :obj:`batch_size` or :obj:`num_workers`.
+
+        Returns
+        --------
+        Instance of :class:`torch_geometric.data.GraphSAINTEdgeSampler`.
+        """
         return torch_geometric.data.GraphSAINTEdgeSampler(
             data, edge_budget,
             num_graphs_per_epoch, sample_coverage_factor, log=False, **kwargs
@@ -28,6 +91,31 @@ class GraphSAINTSamplerFactory:
             num_walks: int, walk_length: int,
             sample_coverage_factor: int = 50, **kwargs
     ) -> torch_geometric.data.GraphSAINTRandomWalkSampler:
+        """
+        A simple static method for instantiating :class:`torch_geometric.data.GraphSAINTEdgeSampler`
+        with more explicit arguments.
+
+        Arguments
+        ------------
+        data:
+            The conventional data of integral graph for sampling.
+        num_graphs_per_epoch:
+            number of subgraphs to sampler per epoch.
+        num_walks:
+            The number of random walks for sampling.
+        walk_length:
+            The length of each random walk.
+        sample_coverage_factor:
+            The average number of samples per node should be used to
+            compute normalization statistics.
+        **kwargs:
+            Additional optional arguments of :class:`torch.utils.data.DataLoader`,
+            including :obj:`batch_size` or :obj:`num_workers`.
+
+        Returns
+        --------
+        Instance of :class:`torch_geometric.data.GraphSAINTEdgeSampler`.
+        """
         return torch_geometric.data.GraphSAINTRandomWalkSampler(
             data, num_walks, walk_length,
             num_graphs_per_epoch, sample_coverage_factor, log=False, **kwargs
diff --git a/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py b/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py
index 45c8f69..ffb7df5 100644
--- a/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py
+++ b/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py
@@ -8,6 +8,9 @@ from . import target_dependant_sampler
 
 
 class _LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTargetDependantSampler):
+    """
+    Obsolete implementation, unused
+    """
     class _Utility:
         @classmethod
         def compute_edge_weights(cls, __all_edge_index_with_self_loops: torch.Tensor) -> torch.Tensor:
@@ -198,6 +201,38 @@ class _LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTa
 
 
 class LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTargetDependantSampler):
+    """
+    The layer-dependent importance sampler from the
+    `"Layer-Dependent Importance Sampling for Training Deep and Large Graph Convolutional Networks"
+    <https://arxiv.org/abs/1911.07323>`_ literature,  which allows
+    for mini-batch training of GNNs on large-scale graphs where full-batch training is not feasible.
+
+    Arguments
+    ------------
+    edge_index:
+        A :obj:`torch.LongTensor` that defines the underlying graph
+        connectivity/message passing flow.
+        :obj:`edge_index` holds the indices of a (sparse) adjacency matrix.
+        If :obj:`edge_index` is of type :obj:`torch.LongTensor`, its shape
+        must be defined as :obj:`[2, num_edges]`, where messages from nodes
+        :obj:`edge_index[0]` are sent to nodes in :obj:`edge_index[1]`
+        (in case :obj:`flow="source_to_target"`).
+    target_nodes_indexes:
+        indexes of target nodes to learn representation.
+    layer_wise_arguments:
+        The number of nodes to sample for each layer.
+        It's noteworthy that the target nodes for a specific layer
+        always be preserved as source nodes for that layer,
+        such that the self loops for those target nodes
+        are generally preserved for representation learning.
+    batch_size:
+        number of target nodes for each mini-batch.
+    num_workers:
+        num_workers argument for inner :class:`torch.utils.data.DataLoader`
+    shuffle:
+        whether to shuffle target nodes for mini-batches.
+    """
+
     @classmethod
     def __compute_edge_weight(cls, edge_index: torch.Tensor) -> torch.Tensor:
         __num_nodes: int = max(int(edge_index[0].max()), int(edge_index[1].max())) + 1
@@ -274,13 +309,27 @@ class LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTar
             layer_argument: _typing.Any, *args, **kwargs
     ) -> _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]:
         """
-        Sample edges for specific layer
-        :param __current_layer_target_nodes_indexes: target nodes for current layer
-        :param __top_layer_target_nodes_indexes: target nodes for top layer
-        :param layer_argument: sampled_source_nodes_budget
-        :param args: remaining positional arguments
-        :param kwargs: remaining keyword arguments
-        :return: (edge_id_in_integral_graph, edge_weight)
+        Sample edges for one specific layer, expected to be implemented in subclass.
+
+        Parameters
+        ------------
+        __current_layer_target_nodes_indexes:
+            target nodes for current layer
+        __top_layer_target_nodes_indexes:
+            target nodes for top layer
+        layer_argument:
+            argument for current layer
+        args:
+            remaining positional arguments
+        kwargs:
+            remaining keyword arguments
+
+        Returns
+        --------
+        edge_id_in_integral_graph:
+            the corresponding positional indexes for the `edge_index` of integral graph
+        edge_weight:
+            the optional `edge_weight` for aggregation
         """
         __wrapped_result: _typing.Tuple[np.ndarray, np.ndarray, np.ndarray] = self.__sample_edges(
             __current_layer_target_nodes_indexes.numpy(),
diff --git a/autogl/module/train/sampling/sampler/neighbor_sampler.py b/autogl/module/train/sampling/sampler/neighbor_sampler.py
index 540a36a..9d98b88 100644
--- a/autogl/module/train/sampling/sampler/neighbor_sampler.py
+++ b/autogl/module/train/sampling/sampler/neighbor_sampler.py
@@ -5,6 +5,34 @@ from .target_dependant_sampler import TargetDependantSampler, TargetDependantSam
 
 
 class NeighborSampler(TargetDependantSampler, _typing.Iterable):
+    """
+    The neighbor sampler from the `"Inductive Representation Learning on
+    Large Graphs" <https://arxiv.org/abs/1706.02216>`_ literature, which allows
+    for mini-batch training of GNNs on large-scale graphs where full-batch
+    training is not feasible.
+
+    Arguments
+    ------------
+    edge_index:
+        A :obj:`torch.LongTensor` that defines the underlying graph
+        connectivity/message passing flow.
+        :obj:`edge_index` holds the indices of a (sparse) adjacency matrix.
+        If :obj:`edge_index` is of type :obj:`torch.LongTensor`, its shape
+        must be defined as :obj:`[2, num_edges]`, where messages from nodes
+        :obj:`edge_index[0]` are sent to nodes in :obj:`edge_index[1]`
+        (in case :obj:`flow="source_to_target"`).
+    target_nodes_indexes:
+        indexes of target nodes to learn representation.
+    sampling_sizes:
+        The number of neighbors to sample for each node in each layer.
+        If set to :obj:`sampling_sizes[l] = -1`, all neighbors are included in layer :obj:`l`.
+    batch_size:
+        number of target nodes for each mini-batch.
+    num_workers:
+        num_workers argument for inner :class:`torch.utils.data.DataLoader`
+    shuffle:
+        whether to shuffle target nodes for mini-batches.
+    """
     class _SequenceDataset(torch.utils.data.Dataset):
         def __init__(self, sequence):
             self.__sequence = sequence
@@ -118,6 +146,31 @@ class NeighborSampler(TargetDependantSampler, _typing.Iterable):
             batch_size: int = 1, num_workers: int = 1,
             shuffle: bool = True, *args, **kwargs
     ) -> TargetDependantSampler:
+        """
+        A static factory method to create instance of :class:`NeighborSampler`
+
+        Arguments
+        ------------
+        edge_index:
+            A :obj:`torch.LongTensor` that defines the underlying graph
+            connectivity/message passing flow.
+            :obj:`edge_index` holds the indices of a (sparse) adjacency matrix.
+            If :obj:`edge_index` is of type :obj:`torch.LongTensor`, its shape
+            must be defined as :obj:`[2, num_edges]`, where messages from nodes
+            :obj:`edge_index[0]` are sent to nodes in :obj:`edge_index[1]`
+            (in case :obj:`flow="source_to_target"`).
+        target_nodes_indexes:
+            indexes of target nodes to learn representation.
+        layer_wise_arguments:
+            The number of neighbors to sample for each node in each layer.
+            If set to :obj:`sampling_sizes[l] = -1`, all neighbors are included in layer :obj:`l`.
+        batch_size:
+            number of target nodes for each mini-batch.
+        num_workers:
+            num_workers argument for inner :class:`torch.utils.data.DataLoader`
+        shuffle:
+            whether to shuffle target nodes for mini-batches.
+        """
         return cls(
             edge_index, target_nodes_indexes, layer_wise_arguments,
             batch_size, num_workers, shuffle, **kwargs
diff --git a/autogl/module/train/sampling/sampler/target_dependant_sampler.py b/autogl/module/train/sampling/sampler/target_dependant_sampler.py
index a506323..f6b3547 100644
--- a/autogl/module/train/sampling/sampler/target_dependant_sampler.py
+++ b/autogl/module/train/sampling/sampler/target_dependant_sampler.py
@@ -3,6 +3,41 @@ import typing as _typing
 
 
 class TargetDependantSampledData:
+    """
+    A uniform aggregation of sampled data for one mini-batch,
+    generally sampler by target-dependent sampler.
+    Node-wise Sampling and Layer-wise Sampling techniques are definitely target-dependent,
+    for which each sampled subgraph depends on the corresponding target nodes.
+    Besides, the Subgraph-wise Sampling mechanism can also be treated as target-dependent,
+    however, each set of target nodes for Subgraph-wise Sampling is determined by the sampled graph.
+
+    Parameters
+    ------------
+    sampled_edges_for_layers:
+        A sequence of tuple denoted as
+        `( edge_index_for_sampled_graph, edge_id_in_integral_graph, (optional)edge_weight )`,
+        where the `edge_index_for_sampled_graph` represents the sampled `edge_index` for sampled subgraph,
+        the `edge_id_in_integral_graph` represents
+        the corresponding positional indexes for the `edge_index` of integral graph,
+        and the optional `edge_weight` for aggregation can also be provided.
+    target_nodes_indexes:
+        A tuple consists of (`torch.Tensor`, `torch.Tensor`),
+        in which the first element represents the indexes of target nodes in sampled subgraph,
+        and the second element represents the indexes of target nodes in the integral graph.
+    all_sampled_nodes_indexes:
+        Indexes of all sampled nodes for mini-batch.
+
+    Attributes
+    ------------
+    target_nodes_indexes:
+        A combined aggregation composed of
+        `indexes_in_sampled_graph` and `indexes_in_integral_graph`
+    all_sampled_nodes_indexes:
+        Indexes of all sampled nodes for mini-batch.
+    sampled_edges_for_layers:
+        The stored sequence of tuple
+        `( edge_index_for_sampled_graph, edge_id_in_integral_graph, (optional)edge_weight )`.
+    """
     class _LayerSampledEdgeData:
         def __init__(
                 self, edge_index_for_sampled_graph: torch.Tensor,
@@ -77,16 +112,6 @@ class TargetDependantSampledData:
             target_nodes_indexes: _typing.Tuple[torch.Tensor, torch.Tensor],
             all_sampled_nodes_indexes: torch.Tensor
     ):
-        """
-
-        :param sampled_edges_for_layers: Sequence of tuple (
-                                             edge_index_for_sampled_graph,
-                                             edge_id_in_integral_graph,
-                                             optional edge_weight
-                                         )
-        :param target_nodes_indexes: (indexes_in_sampled_data, indexes_in_integral_data)
-        :param all_sampled_nodes_indexes: torch.Tensor
-        """
         self.__sampled_edges_for_layers: _typing.Sequence[
             TargetDependantSampledData._LayerSampledEdgeData
         ] = [
@@ -100,6 +125,9 @@ class TargetDependantSampledData:
 
 
 class TargetDependantSampler(torch.utils.data.DataLoader, _typing.Iterable):
+    """
+    An abstract base class for various target-dependent sampler
+    """
     @classmethod
     def create_basic_sampler(
             cls, edge_index: torch.LongTensor,
@@ -126,6 +154,27 @@ class TargetDependantSampler(torch.utils.data.DataLoader, _typing.Iterable):
 
 
 class BasicLayerWiseTargetDependantSampler(TargetDependantSampler):
+    """
+    The base class for various Layer-wise Sampling techniques,
+    providing basic functionality of composing sampled data for mini-batches.
+
+    Parameters
+    ------------
+    edge_index:
+        edge index of integral graph
+    target_nodes_indexes:
+        indexes of target nodes in the integral graph
+    layer_wise_arguments:
+        layer-wise arguments for sampling
+    batch_size:
+        batch size for target nodes
+    num_workers:
+        number of workers
+    shuffle:
+        flag for shuffling, default to True
+    kwargs:
+        remaining keyword arguments
+    """
     def __init__(
             self, edge_index: torch.LongTensor,
             target_nodes_indexes: torch.LongTensor,
@@ -173,13 +222,27 @@ class BasicLayerWiseTargetDependantSampler(TargetDependantSampler):
             layer_argument: _typing.Any, *args, **kwargs
     ) -> _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]:
         """
-        Sample edges for one layer
-        :param __current_layer_target_nodes_indexes: target nodes for current layer
-        :param __top_layer_target_nodes_indexes: target nodes for top layer
-        :param layer_argument: argument for current layer
-        :param args: remaining positional arguments
-        :param kwargs: remaining keyword arguments
-        :return: (edge_id_in_integral_graph, edge_weight)
+        Sample edges for one specific layer, expected to be implemented in subclass.
+
+        Parameters
+        ------------
+        __current_layer_target_nodes_indexes:
+            target nodes for current layer
+        __top_layer_target_nodes_indexes:
+            target nodes for top layer
+        layer_argument:
+            argument for current layer
+        args:
+            remaining positional arguments
+        kwargs:
+            remaining keyword arguments
+
+        Returns
+        --------
+        edge_id_in_integral_graph:
+            the corresponding positional indexes for the `edge_index` of integral graph
+        edge_weight:
+            the optional `edge_weight` for aggregation
         """
         raise NotImplementedError
 
diff --git a/configs/nodeclf_ladies_reproduction.yml b/configs/nodeclf_ladies_reproduction.yml
new file mode 100644
index 0000000..e8989b2
--- /dev/null
+++ b/configs/nodeclf_ladies_reproduction.yml
@@ -0,0 +1,78 @@
+ensemble:
+  name: null
+feature:
+- name: PYGNormalizeFeatures
+hpo:
+  max_evals: 20
+  name: random
+models:
+- hp_space:
+  - feasiblePoints: [5]
+    parameterName: num_layers
+    type: CATEGORICAL
+  - parameterName: hidden
+    type: CATEGORICAL_LIST
+    cutFunc: lambda x:x[0] - 1
+    cutPara: [num_layers]
+    length: 4
+    feasiblePoints: [256]
+  - cutFunc: lambda x:x[0]
+    cutPara: [num_layers]
+    length: 5
+    maxValue: [0.2, 0.2, 0.2, 0.2, 0.6]
+    minValue: [0.2, 0.2, 0.2, 0.2, 0.6]
+    numericalType: DOUBLE
+    parameterName: dropout
+    scalingType: LINEAR
+    type: NUMERICAL_LIST
+  - feasiblePoints: [elu]
+    parameterName: act
+    type: CATEGORICAL
+  - parameterName: add_self_loops
+    type: FIXED
+    value: 0
+  - parameterName: normalize
+    type: FIXED
+    value: 0
+  name: gcn
+trainer:
+  name: NodeClassificationLayerDependentImportanceSamplingTrainer
+  hp_space:
+  - parameterName: sampled_node_sizes
+    type: CATEGORICAL_LIST
+    length: 5
+    feasiblePoints: [512]
+    cutFunc: lambda x:x[0]
+    cutPara: [num_layers]
+  - maxValue: 128
+    minValue: 64
+    parameterName: max_epoch
+    scalingType: LINEAR
+    type: INTEGER
+  - maxValue: 12
+    minValue: 6
+    parameterName: early_stopping_round
+    scalingType: LOG
+    type: INTEGER
+  - parameterName: training_batch_size
+    type: FIXED
+    value: 512
+  - parameterName: predicting_batch_size
+    type: FIXED
+    value: 1024
+  - parameterName: training_sampler_num_workers
+    type: FIXED
+    value: 0
+  - parameterName: predicting_sampler_num_workers
+    type: FIXED
+    value: 0
+  - maxValue: 0.001
+    minValue: 0.0005
+    parameterName: lr
+    scalingType: LOG
+    type: DOUBLE
+  - maxValue: 0
+    minValue: 0
+    parameterName: weight_decay
+    scalingType: LINEAR
+    type: DOUBLE
diff --git a/examples/node_classification.py b/examples/node_classification.py
index f60950b..7a1c631 100644
--- a/examples/node_classification.py
+++ b/examples/node_classification.py
@@ -1,13 +1,12 @@
 import sys
-
-sys.path.append("../")
-from autogl.datasets import build_dataset_from_name
-from autogl.solver import AutoNodeClassifier
-from autogl.module import Acc
 import yaml
 import random
-import torch
+import torch.backends.cudnn
 import numpy as np
+from autogl.datasets import build_dataset_from_name
+from autogl.solver import AutoNodeClassifier
+from autogl.module import Acc
+sys.path.append("../")
 
 if __name__ == "__main__":
 
diff --git a/examples/nodeclf_reproducing_ladies.py b/examples/nodeclf_reproducing_ladies.py
new file mode 100644
index 0000000..fd79c6c
--- /dev/null
+++ b/examples/nodeclf_reproducing_ladies.py
@@ -0,0 +1,64 @@
+import sys
+import yaml
+import random
+import torch.backends.cudnn
+import numpy as np
+from autogl.datasets import build_dataset_from_name
+from autogl.solver import AutoNodeClassifier
+from autogl.module import MicroF1
+sys.path.append("../")
+
+
+torch.multiprocessing.set_sharing_strategy('file_system')
+
+
+if __name__ == '__main__':
+    import argparse
+    argument_parser: argparse.ArgumentParser = argparse.ArgumentParser()
+    argument_parser.add_argument(
+        "--dataset",
+        default="pubmed",
+        type=str,
+        help="dataset to use",
+        choices=[
+            "cora",
+            "pubmed",
+            "citeseer",
+            "reddit"
+        ],
+    )
+    argument_parser.add_argument(
+        "--configs",
+        type=str,
+        default="../configs/nodeclf_ladies_reproduction.yml",
+        help="configuration file to adopt",
+    )
+    argument_parser.add_argument("--seed", type=int, default=0, help="random seed")
+    argument_parser.add_argument("--device", default=0, type=int, help="GPU device")
+
+    arguments = argument_parser.parse_args()
+    if torch.cuda.is_available():
+        torch.cuda.set_device(arguments.device)
+    seed = arguments.seed
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+
+    dataset = build_dataset_from_name(arguments.dataset)
+    configs = yaml.load(
+        open(arguments.configs, "r").read(),
+        Loader=yaml.FullLoader
+    )
+    autoClassifier = AutoNodeClassifier.from_config(configs)
+    # The running time is likely to exceed 1 hour when CiteSeer or Reddit dataset is adopted
+    autoClassifier.fit(dataset, time_limit=24 * 3600, evaluation_method=[MicroF1])
+    autoClassifier.get_leaderboard().show()
+    predict_result = autoClassifier.predict_proba()
+    print(
+        "test micro-f1: %.4f"
+        % (MicroF1.evaluate(predict_result, dataset.data.y[dataset.data.test_mask].numpy()))
+    )

From a240af0ce5b93cb7f5c82c8113245a0d48c790b1 Mon Sep 17 00:00:00 2001
From: CoreLeader <Core-Leader@outlook.com>
Date: Wed, 30 Jun 2021 20:00:00 +0800
Subject: [PATCH 120/144] Revise example and tutorial for sampling

---
 docs/docfile/tutorial/t_trainer.rst    | 49 +++++++++++++++++++++++++-
 examples/node_classification.py        |  2 +-
 examples/nodeclf_reproducing_ladies.py |  2 +-
 3 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/docs/docfile/tutorial/t_trainer.rst b/docs/docfile/tutorial/t_trainer.rst
index da59681..86979e5 100644
--- a/docs/docfile/tutorial/t_trainer.rst
+++ b/docs/docfile/tutorial/t_trainer.rst
@@ -45,6 +45,53 @@ After initializing a trainer, you can train it on the given datasets.
 
 We have given the training and testing functions for the tasks of node classification, graph classification, and link prediction up to now. You can also create your tasks following the similar patterns with ours. For training, you need to define ``train_only()`` and use it in ``train()``. For testing, you need to define ``predict_proba()`` and use it in ``predict()``.
 
-The evaluation funtion is defined in ``evaluate()``, you can use your our evaluation metrics and methods.
+The evaluation function is defined in ``evaluate()``, you can use your our evaluation metrics and methods.
+
+Node Classification with Sampling
+------------------------------------
+According to various present studies, training with spatial sampling has been demonstrated
+as an efficient technique for representation learning on large-scale graph.
+We provide implementations for various representative sampling mechanisms including
+Neighbor Sampling, Layer Dependent Importance Sampling (LADIES), and GraphSAINT.
+With the leverage of various efficient sampling mechanisms,
+users can utilize this library on large-scale graph dataset, e.g. Reddit.
+
+Specifically, as various sampling techniques generally require model to support
+some layer-wise processing in forwarding, now only the provided GCN and GraphSAGE models are ready for
+Node-wise Sampling (Neighbor Sampling) and Layer-wise Sampling (LADIES).
+More models and more tasks are scheduled to support sampling in future version.
+
+* Node-wise Sampling (GraphSAGE)
+    Both ``GCN`` and ``GraphSAGE`` models are supported.
+
+* Layer-wise Sampling (Layer Dependent Importance Sampling)
+    Only the ``GCN`` model is supported in current version.
+
+* Subgraph-wise Sampling (GraphSAINT)
+    As The GraphSAINT sampling technique have no specific requirements for model to adopt,
+    most of the available models are feasible for adopting GraphSAINT technique.
+    However, the prediction process is a potential bottleneck or even obstacle
+    when the GraphSAINT technique is actually applied on large-scale graph,
+    thus the the model to adopt is better to support layer-wise prediction,
+    and the provided ``GCN`` model already meet that enhanced requirement.
+    According to empirical experiments,
+    the implementation of GraphSAINT now has the leverage to support
+    an integral graph smaller than the *Flickr* graph data.
+
+The sampling techniques can be utilized by adopting corresponding trainer
+``NodeClassificationGraphSAINTTrainer``,
+``NodeClassificationLayerDependentImportanceSamplingTrainer``,
+and ``NodeClassificationNeighborSamplingTrainer``.
+You can either specify the corresponding name of trainer in YAML configuration file
+or instantiate the solver ``AutoNodeClassifier``
+with the instance of specific trainer as ``model`` argument.
+
+A brief example is demonstrated as follows:
 
+.. code-block:: python
 
+    ladies_sampling_trainer = NodeClassificationLayerDependentImportanceSamplingTrainer(
+        model='gcn', num_features=dataset.num_features, num_classes=dataset.num_classes,
+        ...
+    )
+    AutoNodeClassifier(graph_models=(ladies_sampling_trainer,), ...)
diff --git a/examples/node_classification.py b/examples/node_classification.py
index 7a1c631..f850cb5 100644
--- a/examples/node_classification.py
+++ b/examples/node_classification.py
@@ -3,10 +3,10 @@ import yaml
 import random
 import torch.backends.cudnn
 import numpy as np
+sys.path.append("../")
 from autogl.datasets import build_dataset_from_name
 from autogl.solver import AutoNodeClassifier
 from autogl.module import Acc
-sys.path.append("../")
 
 if __name__ == "__main__":
 
diff --git a/examples/nodeclf_reproducing_ladies.py b/examples/nodeclf_reproducing_ladies.py
index fd79c6c..34b3382 100644
--- a/examples/nodeclf_reproducing_ladies.py
+++ b/examples/nodeclf_reproducing_ladies.py
@@ -3,10 +3,10 @@ import yaml
 import random
 import torch.backends.cudnn
 import numpy as np
+sys.path.append("../")
 from autogl.datasets import build_dataset_from_name
 from autogl.solver import AutoNodeClassifier
 from autogl.module import MicroF1
-sys.path.append("../")
 
 
 torch.multiprocessing.set_sharing_strategy('file_system')

From c19b31e62565b78b17f90dcad6de0242a53f35e7 Mon Sep 17 00:00:00 2001
From: generall <general502570@outlook.com>
Date: Mon, 5 Jul 2021 10:58:46 +0800
Subject: [PATCH 121/144] use graphnas instead of rl

---
 configs/nodeclf_nas_benchmark.yml       | 2 +-
 configs/nodeclf_nas_macro_benchmark.yml | 2 +-
 examples/test_nas.py                    | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/configs/nodeclf_nas_benchmark.yml b/configs/nodeclf_nas_benchmark.yml
index bb757c8..dd52a7f 100644
--- a/configs/nodeclf_nas_benchmark.yml
+++ b/configs/nodeclf_nas_benchmark.yml
@@ -11,7 +11,7 @@ nas:
     hidden_dim: 64
     layer_number: 4
   algorithm:
-    name: rl
+    name: graphnas
     num_epochs: 200
   estimator:
     name: scratch
diff --git a/configs/nodeclf_nas_macro_benchmark.yml b/configs/nodeclf_nas_macro_benchmark.yml
index f6e9fc2..2ddb04f 100644
--- a/configs/nodeclf_nas_macro_benchmark.yml
+++ b/configs/nodeclf_nas_macro_benchmark.yml
@@ -11,7 +11,7 @@ nas:
     hidden_dim: 64
     layer_number: 2
   algorithm:
-    name: rl
+    name: graphnas
     num_epochs: 200
   estimator:
     name: scratch
diff --git a/examples/test_nas.py b/examples/test_nas.py
index 57dad32..0211403 100644
--- a/examples/test_nas.py
+++ b/examples/test_nas.py
@@ -9,7 +9,7 @@ import argparse
 if __name__ == '__main__':
     set_seed(202106)
     parser = argparse.ArgumentParser()
-    parser.add_argument('--config', type=str, default='../configs/nodeclf_nas_darts_benchmark.yml')
+    parser.add_argument('--config', type=str, default='../configs/nodeclf_nas_macro_benchmark.yml')
     parser.add_argument('--dataset', choices=['cora', 'citeseer', 'pubmed'], default='cora', type=str)
 
     args = parser.parse_args()

From ed9597d6b080f586db7e940690b79ad084d42bdc Mon Sep 17 00:00:00 2001
From: frozenmad <frozenmad2015@outlook.com>
Date: Thu, 8 Jul 2021 11:08:41 +0800
Subject: [PATCH 122/144] revise model tutorial

---
 docs/docfile/tutorial/t_model.rst | 291 +++++++++++++++++++++++++-----
 1 file changed, 243 insertions(+), 48 deletions(-)

diff --git a/docs/docfile/tutorial/t_model.rst b/docs/docfile/tutorial/t_model.rst
index b1ee81e..73ae421 100644
--- a/docs/docfile/tutorial/t_model.rst
+++ b/docs/docfile/tutorial/t_model.rst
@@ -3,68 +3,263 @@
 AutoGL Model
 ============
 
-AutoGL project uses ``model`` to define the common graph nerual networks and ``automodel`` to denote the relative class that includes some auto functions. Currently, we support the following models and automodels:
+In AutoGL, we use ``model`` and ``automodel`` to define the logic of graph nerual networks and make it compatible with hyper parameter optimization. Currently we support the following models for given tasks.
 
-* ``GCN`` and ``AutoGCN`` : graph convolutional network from https://arxiv.org/abs/1609.02907
-* ``GAT`` and ``AutoGAT`` : graph attentional network from https://arxiv.org/abs/1710.10903
-* ``GraphSAGE`` and ``AutoGraphSAGE`` : from the "Inductive Representation Learning on Large Graphs" https://arxiv.org/abs/1706.02216
++----------------------+----------------------------+
+| Tasks                | Models                     |
++======================+============================+
+| Node Classification  | ``gcn``, ``gat``, ``sage`` |
++----------------------+----------------------------+
+| Graph Classification | ``gin``, ``topk``          |
++----------------------+----------------------------+
+| Link Prediction      | ``gcn``, ``gat``, ``sage`` |
++----------------------+----------------------------+
 
-And we also support the following models and automodels for graph classification tasks:
-* ``GIN`` and ``AutoGIN`` : graph isomorphism network from https://arxiv.org/abs/1810.00826
-* ``Topkpool`` and ``AutoTopkpool`` : graph U-Net from https://arxiv.org/abs/1905.05178, https://arxiv.org/abs/1905.02850
+Lazy Initialization
+-------------------
+
+In current AutoGL pipeline, some important hyper-parameters related with model cannot be set outside before the pipeline (e.g. input dimensions, which can only be caluclated during running after feature engineered). Therefore, in ``automodel``, we use lazy initialization to initialize the core ``model``. When the ``automodel`` initialization method ``__init__()`` is called with argument ``init`` be ``False``, only (part of) the hyper-parameters will be set. The ``automodel`` will have its core ``model`` only after ``initialize()`` is explicitly called, which will be done automatically in ``solver`` and ``from_hyper_parameter()``, after all the hyper-parameters are set properly.
 
 Define your own model and automodel
 -----------------------------------
 
-If you want to add your own model and automodel for some task, the only thing you should do is add a new model where the forward function should be fulfilled and a new automodel inherited from the basemodel.
+We highly recommend you to define both ``model`` and ``automodel``, although you only need your ``automodel`` to communicate with ``solver`` and ``trainer``. The ``model`` will be responsible for the parameters initialization and forward logic declaration, while the ``automodel`` will be responsible for the hyper-parameter definiton and organization.
+
+General customization
+^^^^^^^^^^^^^^^^^^^^^
 
-For new models used in link prediction tasks, you should fulfill the lp_encode and lp_decode function. The difference between lp_encode and forward function is that there is not classification layer in lp_encode.
+Let's say you want to implement a simple MLP for node classification and want to let AutoGL find the best hyper-parameters for you. You can first define the logics assuming all the hyper-parameters are given.
+
+.. code-block:: python
 
+    import torch
+
+    # define mlp model, need to inherit from torch.nn.Module
+    class MyMLP(torch.nn.Module):
+        # assume you already get all the hyper-parameters
+        def __init__(self, in_channels, num_classes, layer_num, dim):
+            super().__init__()
+            if layer_num == 1:
+                ops = [torch.nn.Linear(in_channels, num_classes)]
+            else:
+                ops = [torch.nn.Linear(in_channels, dim)]
+                for i in range(layer_num - 2):
+                    ops.append(torch.nn.Linear(dim, dim))
+                ops.append(torch.nn.Linear(dim, num_classes))
+        
+            self.core = torch.nn.Sequential(*ops)
+        
+        # this method is mendatory to have
+        def forward(self, data):
+            # data: torch_geometric.data.Data
+            assert hasattr(data, 'x'), 'MLP only support graph data with features'
+            x = data.x
+            return torch.nn.functional.log_softmax(self.core(x))
 
-Firstly, you should define your model if it does not belong to the models above.
 
-Secondly, you should define your corresponding automodel.
+After you define the logic of ``model``, you can now define your ``automodel`` to manage the hyper-parameters.
 
 .. code-block:: python
 
-    # 1. define your search space to self.space of your automodel instance
-    [
-        {'parameterName': 'num_layers', 'type': 'DISCRETE', 'feasiblePoints': '2,3,4'},
-        {"parameterName": 'hidden', "type": "NUMERICAL_LIST", "numericalType": "INTEGER", "length": 3, "minValue": [8, 8, 8], "maxValue": [64, 64, 64], "scalingType": "LOG"},
-        {'parameterName': 'dropout', 'type': 'DOUBLE', 'maxValue': 0.9, 'minValue': 0.1, 'scalingType': 'LINEAR'},
-        {'parameterName': 'act', 'type': 'CATEGORICAL_LIST', "feasiblePoints": ['leaky_relu', 'relu', 'elu', 'tanh']},
-    ]
-    # 2. define the default point to self.hyperparams of your automodel instance
-    {
-        'num_layers': 2,
-        'hidden': [16],
-        'dropout': 0.2,
-        'act': 'leaky_relu'
-    }
-
-Where ``self.space`` is a list of dictionary indicating the name, type, feasible point, min/max value and some properties of the parameter. ``self.hyperparams`` is a dictionary indicating the hyper-parameters used in this model.
-
-Finally, you can use the defined model and automodel for the specific need.
+    from autogl.module.model import BaseModel
+    
+    # define your automodel, need to inherit from BaseModel
+    class MyAutoMLP(BaseModel):
+        def __init__(self):
+            # (mendatory) make sure you call __init__ of super with init argument properly set.
+            # if you do not want to initialize inside __init__, please pass False.
+            super().__init__(init=False)
+
+            # (mendatory) define the search space
+            self.space = [
+                {'parameterName': 'layer_num', 'type': 'INTEGER', 'minValue': 1, 'maxValue': 5, 'scalingType': 'LINEAR'},
+                {'parameterName': 'dim', 'type': 'INTEGER', 'minValue': 64, 'maxValue': 128, 'scalingType': 'LINEAR'}
+            ]
+
+            # set default hyper-parameters
+            self.layer_num = 2
+            self.dim = 72
+
+            # for the hyper-parameters that are related with dataset, you can just set them to None
+            self.num_classes = None
+            self.num_features = None
+
+            # (mendatory) since we don't know the num_classes and num_features until we see the dataset,
+            # we cannot initialize the models when instantiated. the initialized will be set to False.
+            self.initialized = False
+
+            # (mendatory) set the device of current auto model
+            self.device = torch.device('cuda')
+
+        # (mendatory) get current hyper-parameters of this automodel
+        # need to return a dictionary whose keys are the same with self.space
+        def get_hyper_parameter(self):
+            return {
+                'layer_num': self.layer_num,
+                'dim': self.dim
+            }
+        
+        # (mendatory) override to interact with num_classes
+        def get_num_classes(self):
+            return self.num_classes
+        
+        # (mendatory) override to interact with num_classes
+        def set_num_classes(self, n_classes):
+            self.num_classes = n_classes
+        
+        # (mendatory) override to interact with num_features
+        def get_num_features(self):
+            return self.num_features
+        
+        # (mendatory) override to interact with num_features
+        def set_num_features(self, n_features):
+            self.num_features = n_features
+
+        # (mendatory) create the core MLP model
+        def initialize(self):
+            # (mendatory) you need to make sure the core model is named as `self.model`
+            self.model = MyMLP(
+                in_channels = self.num_features,
+                num_classes = self.num_classes,
+                layer_num = self.layer_num,
+                dim = self.dim
+            ).to(self.device)
+
+            self.initialized = True
+        
+        # (mendatory) override to create a copy of model using current hyper-parameters
+        def from_hyper_parameter(self, hp):
+            # hp is a dictionary that contains keys and values corrsponding to your self.space
+            # in this case, it will be in form {'layer_num': XX, 'dim': XX}
+            
+            # create a new instance
+            ret = self.__class__()
+
+            # set the hyper-parameters related to dataset and device
+            ret.num_classes = self.num_classes
+            ret.num_features = self.num_features
+            ret.device = self.device
+
+            # set the hyper-parameters according to hp
+            ret.layer_num = hp['layer_num']
+            ret.dim = hp['dim']
+
+            # initialize it before returning
+            ret.initialize()
+
+            return ret
+        
+
+Then, you can use this node classification model as part of AutoNodeClassifier ``solver``.
 
 .. code-block :: python
 
-    # for example
-    import torch
-    from .base import BaseModel
-    class YourGNN(torch.nn.Module):
+    from autogl.solver import AutoNodeClassifier
+
+    solver = AutoNodeClassifier(graph_models=(MyAutoMLP(),))
+
+
+The model for graph classification is generally the same, except that you can now also receive the ``num_graph_features`` (the dimension of the graph-level feature) through overriding ``set_num_graph_features(self, n_graph_features)`` of ``BaseModel``. Also, please remember to return graph-level logits instead of node-level one in ``forward()`` of ``model``.
+
+Model for link prediction
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+For link prediction, the definition of model is a bit different with the common forward definition. You need to implement the ``lp_encode(self, data)`` and ``lp_decode(self, x, pos_edge_index, neg_edge_index)`` to interact with ``LinkPredictionTrainer`` and ``AutoLinkPredictor``. Taking the class ``MyMLP`` defined above for example, if you want to perform link prediction:
+
+.. code-block:: python
+
+    class MyMLPForLP(torch.nn.Module):
+        # num_classes is removed since it is invalid for link prediction
+        def __init__(self, in_channels, layer_num, dim):
+            super().__init__()
+            ops = [torch.nn.Linear(in_channels, dim)]
+            for i in range(layer_num - 1):
+                ops.append(torch.nn.Linear(dim, dim))
+        
+            self.core = torch.nn.Sequential(*ops)
+
+        # (mendatory) for interaction with link prediction trainer and solver        
+        def lp_encode(self, data):
+            return self.core(data.x)
+
+        # (mendatory) for interaction with link prediction trainer and solver
+        def lp_decode(self, x, pos_edge_index, neg_edge_index):
+            # first, get all the edge_index need calculated
+            edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
+            # then, use dot-products to calculate logits, you can use whatever decode method you want
+            logits = (x[edge_index[0]] * x[edge_index[1]]).sum(dim=-1)
+            return logits
+
+    class MyAutoMLPForLP(MyAutoMLP):
+        def initialize(self):
+            # init MyMLPForLP instead of MyMLP
+            self.model = MyMLPForLP(
+                in_channels = self.num_features,
+                layer_num = self.layer_num,
+                dim = self.dim
+            ).to(self.device)
+
+            self.initialized = True
+
+
+Model with sampling support
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+AutoGL now support sampling training for large-scale dataset. Currently, node-wise sampling, layer-wise sampling and graph-wise sampling are supported for node classification. See more about sampling in :ref:`trainer`.
+
+If you want to use the corresponding sampling methods on your customized dataset, you may need to make further adaptation. Currently, sampling trainer only support **Sequential Model**, which means it only consists of a sequence of layers, with each layer taking a ``Data`` as an example. The Data has the same organization structure with ``torch_geometric.data.Data``, which contains keywords ``x``, ``edge_index`` and ``edge_weight`` for you to use.
+
+You need to make your sampling ``model`` inherit from ``ClassificationSupportedSequentialModel``:
+
+.. code-block:: python
+
+    import autogl
+    from autogl.module.model.base import ClassificationSupportedSequentialModel
+
+    # override Linear so that it can take graph data as input
+    class Linear(torch.nn.Linear):
         def forward(self, data):
-            pass  # Your forward function
-
-    class YourAutoGNN(BaseModel):
-        def __init__(self, num_features=None, num_classes=None, device=None, init=True, **args):
-            """
-            num_features: the number of features
-            num_classes: the number of classes
-            device: your device to run code
-            init: if True, the model will be initialize
-            """
-            self.space = XXX  # Define your search space
-            self.hyperparams = XXX  # Define your hyper-parameters
-            self.initialized = False
-            if init is True:
-                self.initialize()
+            return super().forward(data.x)
+
+    class MyMLPSampling(ClassificationSupportedSequentialModel):
+        def __init__(self, in_channels, num_classes, layer_num, dim):
+                super().__init__()
+            if layer_num == 1:
+                ops = [Linear(in_channels, num_classes)]
+            else:
+                ops = [Linear(in_channels, dim)]
+                for i in range(layer_num - 2):
+                    ops.append(Linear(dim, dim))
+                ops.append(Linear(dim, num_classes))
+
+            self.core = torch.nn.ModuleList(ops)
+
+        # (mendatory) override sequential_encoding_layers property to interact with sampling        
+        @property
+        def sequential_encoding_layers(self) -> torch.nn.ModuleList:
+            return self.core
+        
+        # (mendatory) define the encode logic of classification for sampling
+        def cls_encode(self, data):
+            # if you use sampling, the data will be passed in two possible ways,
+            # you can judge it use following rules
+            if hasattr(data, 'edge_indexes'):
+                # the edge_indexes are a list of edge_index, one for each layer
+                edge_indexes = data.edge_indexes
+                edge_weights = [None] * len(self.core) if getattr(data, 'edge_weights', None) is None else data.edge_weights
+            else:
+                # the edge_index and edge_weight will stay the same as default
+                edge_indexes = [data.edge_index] * len(self.core)
+                edge_weights = [getattr(data, 'edge_weight', None)] * len(self.core)
+
+            x = data.x
+            for i in range(len(self.core)):
+                data = autogl.data.Data(x=x, edge_index=edge_indexes[i])
+                data.edge_weight = edge_weight
+                x = self.sequential_encoding_layers[i](data)
+            return x
+
+        # (mendatory) define the decode logic of classification for sampling
+        def cls_decode(self, x):
+            return torch.nn.functional.log_softmax(x)
+

From 89be0a29228b3aa3d948b558596d680d85424012 Mon Sep 17 00:00:00 2001
From: generall <general502570@outlook.com>
Date: Fri, 9 Jul 2021 09:31:57 +0800
Subject: [PATCH 123/144] fix graphnasmacro oom bug

---
 autogl/module/nas/estimator/train_scratch.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/autogl/module/nas/estimator/train_scratch.py b/autogl/module/nas/estimator/train_scratch.py
index 6767ca3..2077b7a 100644
--- a/autogl/module/nas/estimator/train_scratch.py
+++ b/autogl/module/nas/estimator/train_scratch.py
@@ -16,6 +16,7 @@ class TrainEstimator(BaseEstimator):
     """
     def __init__(self, loss_f = "nll_loss", evaluation = [Acc()]):
         super().__init__(loss_f, evaluation)
+        self.evaluation = evaluation
         self.estimator=OneShotEstimator(self.loss_f, self.evaluation)
 
     def infer(self, model: BaseSpace, dataset, mask="train"):
@@ -34,6 +35,14 @@ class TrainEstimator(BaseEstimator):
                     feval=self.evaluation,
                     loss=self.loss_f,
                     lr_scheduler_type=None)
-        self.trainer.train(dataset)
-        with torch.no_grad():
-            return self.estimator.infer(boxmodel.model, dataset, mask)
+        try:
+            self.trainer.train(dataset)
+            with torch.no_grad():
+                return self.estimator.infer(boxmodel.model, dataset, mask)
+        except RuntimeError as e:
+            if "cuda" in str(e) or "CUDA" in str(e):
+                INF = 100
+                fin = [-INF if eva.is_higher_better else INF for eva in self.evaluation] 
+                return fin, 0
+            else:
+                raise e

From 20a50992847d240df634287496e60ff78c517f03 Mon Sep 17 00:00:00 2001
From: CoreLeader <Core-Leader@outlook.com>
Date: Sat, 10 Jul 2021 13:24:00 +0800
Subject: [PATCH 124/144] Revise tutorial of model, correct typo

The words *mendatory* and *mandatory* have different meanings, take a look at dictionary for more details. I replaced the *mendatory* (possibly a typo for word *mandatory*) with *required*. The pair `optional <-> required` is more commonly used in programming.
---
 docs/docfile/tutorial/t_model.rst | 49 ++++++++++++++++---------------
 1 file changed, 26 insertions(+), 23 deletions(-)

diff --git a/docs/docfile/tutorial/t_model.rst b/docs/docfile/tutorial/t_model.rst
index 73ae421..ce9c4b6 100644
--- a/docs/docfile/tutorial/t_model.rst
+++ b/docs/docfile/tutorial/t_model.rst
@@ -49,7 +49,7 @@ Let's say you want to implement a simple MLP for node classification and want to
         
             self.core = torch.nn.Sequential(*ops)
         
-        # this method is mendatory to have
+        # this method is required
         def forward(self, data):
             # data: torch_geometric.data.Data
             assert hasattr(data, 'x'), 'MLP only support graph data with features'
@@ -66,11 +66,11 @@ After you define the logic of ``model``, you can now define your ``automodel`` t
     # define your automodel, need to inherit from BaseModel
     class MyAutoMLP(BaseModel):
         def __init__(self):
-            # (mendatory) make sure you call __init__ of super with init argument properly set.
+            # (required) make sure you call __init__ of super with init argument properly set.
             # if you do not want to initialize inside __init__, please pass False.
             super().__init__(init=False)
 
-            # (mendatory) define the search space
+            # (required) define the search space
             self.space = [
                 {'parameterName': 'layer_num', 'type': 'INTEGER', 'minValue': 1, 'maxValue': 5, 'scalingType': 'LINEAR'},
                 {'parameterName': 'dim', 'type': 'INTEGER', 'minValue': 64, 'maxValue': 128, 'scalingType': 'LINEAR'}
@@ -84,14 +84,14 @@ After you define the logic of ``model``, you can now define your ``automodel`` t
             self.num_classes = None
             self.num_features = None
 
-            # (mendatory) since we don't know the num_classes and num_features until we see the dataset,
+            # (required) since we don't know the num_classes and num_features until we see the dataset,
             # we cannot initialize the models when instantiated. the initialized will be set to False.
             self.initialized = False
 
-            # (mendatory) set the device of current auto model
+            # (required) set the device of current auto model
             self.device = torch.device('cuda')
 
-        # (mendatory) get current hyper-parameters of this automodel
+        # (required) get current hyper-parameters of this automodel
         # need to return a dictionary whose keys are the same with self.space
         def get_hyper_parameter(self):
             return {
@@ -99,25 +99,25 @@ After you define the logic of ``model``, you can now define your ``automodel`` t
                 'dim': self.dim
             }
         
-        # (mendatory) override to interact with num_classes
+        # (required) override to interact with num_classes
         def get_num_classes(self):
             return self.num_classes
         
-        # (mendatory) override to interact with num_classes
+        # (required) override to interact with num_classes
         def set_num_classes(self, n_classes):
             self.num_classes = n_classes
         
-        # (mendatory) override to interact with num_features
+        # (required) override to interact with num_features
         def get_num_features(self):
             return self.num_features
         
-        # (mendatory) override to interact with num_features
+        # (required) override to interact with num_features
         def set_num_features(self, n_features):
             self.num_features = n_features
 
-        # (mendatory) create the core MLP model
+        # (required) instantiate the core MLP model using corresponding hyper-parameters
         def initialize(self):
-            # (mendatory) you need to make sure the core model is named as `self.model`
+            # (required) you need to make sure the core model is named as `self.model`
             self.model = MyMLP(
                 in_channels = self.num_features,
                 num_classes = self.num_classes,
@@ -127,7 +127,7 @@ After you define the logic of ``model``, you can now define your ``automodel`` t
 
             self.initialized = True
         
-        # (mendatory) override to create a copy of model using current hyper-parameters
+        # (required) override to create a copy of model using provided hyper-parameters
         def from_hyper_parameter(self, hp):
             # hp is a dictionary that contains keys and values corrsponding to your self.space
             # in this case, it will be in form {'layer_num': XX, 'dim': XX}
@@ -178,11 +178,11 @@ For link prediction, the definition of model is a bit different with the common
         
             self.core = torch.nn.Sequential(*ops)
 
-        # (mendatory) for interaction with link prediction trainer and solver        
+        # (required) for interaction with link prediction trainer and solver
         def lp_encode(self, data):
             return self.core(data.x)
 
-        # (mendatory) for interaction with link prediction trainer and solver
+        # (required) for interaction with link prediction trainer and solver
         def lp_decode(self, x, pos_edge_index, neg_edge_index):
             # first, get all the edge_index need calculated
             edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
@@ -205,11 +205,14 @@ For link prediction, the definition of model is a bit different with the common
 Model with sampling support
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-AutoGL now support sampling training for large-scale dataset. Currently, node-wise sampling, layer-wise sampling and graph-wise sampling are supported for node classification. See more about sampling in :ref:`trainer`.
+Towards efficient representation learning on large-scale graph, AutoGL currently support node classification using sampling techniques including node-wise sampling, layer-wise sampling, and graph-wise sampling. See more about sampling in :ref:`trainer`.
 
-If you want to use the corresponding sampling methods on your customized dataset, you may need to make further adaptation. Currently, sampling trainer only support **Sequential Model**, which means it only consists of a sequence of layers, with each layer taking a ``Data`` as an example. The Data has the same organization structure with ``torch_geometric.data.Data``, which contains keywords ``x``, ``edge_index`` and ``edge_weight`` for you to use.
-
-You need to make your sampling ``model`` inherit from ``ClassificationSupportedSequentialModel``:
+In order to conduct node classification using sampling technique with your custom model, further adaptation and modification are generally required.
+According to the Message Passing mechanism of Graph Neural Network (GNN), numerous nodes in the multi-hop neighborhood of evaluation set or test set are potentially involved to evaluate the GNN model on large-scale graph dataset.
+As the representations for those numerous nodes are likely to occupy large amount of computational resource, the common forwarding process is generally infeasible for model evaluation on large-scale graph.
+An iterative representation learning mechanism is a practical and feasible way to evaluate **Sequential Model**,
+which only consists of multiple sequential layers, with each layer taking a ``Data`` aggregate as input. The input ``Data`` has the same functionality with ``torch_geometric.data.Data``, which conventionally provides properties ``x``, ``edge_index``, and optional ``edge_weight``.
+If your custom model is composed of concatenated layers, you would better make your model inherit ``ClassificationSupportedSequentialModel`` to utilize the layer-wise representation learning mechanism to efficiently conduct representation learning for your custom sequential model.
 
 .. code-block:: python
 
@@ -223,7 +226,7 @@ You need to make your sampling ``model`` inherit from ``ClassificationSupportedS
 
     class MyMLPSampling(ClassificationSupportedSequentialModel):
         def __init__(self, in_channels, num_classes, layer_num, dim):
-                super().__init__()
+            super().__init__()
             if layer_num == 1:
                 ops = [Linear(in_channels, num_classes)]
             else:
@@ -234,12 +237,12 @@ You need to make your sampling ``model`` inherit from ``ClassificationSupportedS
 
             self.core = torch.nn.ModuleList(ops)
 
-        # (mendatory) override sequential_encoding_layers property to interact with sampling        
+        # (required) override sequential_encoding_layers property to interact with sampling
         @property
         def sequential_encoding_layers(self) -> torch.nn.ModuleList:
             return self.core
         
-        # (mendatory) define the encode logic of classification for sampling
+        # (required) define the encode logic of classification for sampling
         def cls_encode(self, data):
             # if you use sampling, the data will be passed in two possible ways,
             # you can judge it use following rules
@@ -259,7 +262,7 @@ You need to make your sampling ``model`` inherit from ``ClassificationSupportedS
                 x = self.sequential_encoding_layers[i](data)
             return x
 
-        # (mendatory) define the decode logic of classification for sampling
+        # (required) define the decode logic of classification for sampling
         def cls_decode(self, x):
             return torch.nn.functional.log_softmax(x)
 

From b6c85ced8d509a2f86f1cc4a0dea57bff54bafec Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sun, 11 Jul 2021 03:15:56 +0000
Subject: [PATCH 125/144] fix typo

---
 docs/docfile/tutorial/t_model.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/docfile/tutorial/t_model.rst b/docs/docfile/tutorial/t_model.rst
index ce9c4b6..e46a94c 100644
--- a/docs/docfile/tutorial/t_model.rst
+++ b/docs/docfile/tutorial/t_model.rst
@@ -258,7 +258,7 @@ If your custom model is composed of concatenated layers, you would better make y
             x = data.x
             for i in range(len(self.core)):
                 data = autogl.data.Data(x=x, edge_index=edge_indexes[i])
-                data.edge_weight = edge_weight
+                data.edge_weight = edge_weights[i]
                 x = self.sequential_encoding_layers[i](data)
             return x
 

From cdded157c00f112476ea74aea4246935518310aa Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sun, 11 Jul 2021 10:17:57 +0000
Subject: [PATCH 126/144] add default parameters for budget of SAINT, set
 num_workers to 0.

---
 .../node_classification_sampled_trainer.py      | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
index 9ccdfce..415e9bd 100644
--- a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
+++ b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
@@ -166,7 +166,7 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         self.__num_graphs_per_epoch: int = num_graphs_per_epoch
 
         " Set sampled_budget "
-        sampled_budget: int = kwargs.get("sampled_budget")
+        sampled_budget: int = kwargs.get("sampled_budget", 1e4)
         # todo: This is a version caused by current unreasonable initialization process
         # todo: Refactor the framework for trainer to fix in future version
         # if type(sampled_budget) != int:
@@ -197,11 +197,16 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
             __cpu_count: _typing.Optional[int] = os.cpu_count()
             return __cpu_count if __cpu_count else 0
 
-        self.__training_sampler_num_workers: int = kwargs.get(
-            "training_sampler_num_workers", _cpu_count()
-        )
-        if not 0 <= self.__training_sampler_num_workers <= _cpu_count():
-            self.__training_sampler_num_workers: int = _cpu_count()
+        # self.__training_sampler_num_workers: int = kwargs.get(
+        #     "training_sampler_num_workers", _cpu_count()
+        # )
+        
+        # if not 0 <= self.__training_sampler_num_workers <= _cpu_count():
+        #     self.__training_sampler_num_workers: int = _cpu_count()
+
+        # force to be 0 to be compactible with current pyg solution.
+        self.__training_sampler_num_workers: int = 0
+        
         super(NodeClassificationGraphSAINTTrainer, self).__init__(
             model, num_features, num_classes, device, init, feval, loss
         )

From b7def7c676acf4a607ac9a80f17b9c00276801a6 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sun, 11 Jul 2021 10:18:14 +0000
Subject: [PATCH 127/144] update trainer tutorials.

---
 docs/docfile/tutorial/t_trainer.rst | 157 ++++++++++++++++++++++------
 1 file changed, 125 insertions(+), 32 deletions(-)

diff --git a/docs/docfile/tutorial/t_trainer.rst b/docs/docfile/tutorial/t_trainer.rst
index 86979e5..36f9888 100644
--- a/docs/docfile/tutorial/t_trainer.rst
+++ b/docs/docfile/tutorial/t_trainer.rst
@@ -10,34 +10,10 @@ AutoGL project use ``trainer`` to handle the auto-training of tasks. Currently,
 * ``LinkPredictionTrainer`` for link prediction
 
 
-Initialization
---------------
+Lazy Initialization
+-------------------
+Similar reason to :ref:model, we also use lazy initialization for all trainers. Only (part of) the hyper-parameters will be set when ``__init__()`` is called. The ``trainer`` will have its core ``model`` only after ``initialize()`` is explicitly called, which will be done automatically in ``solver`` and ``duplicate_from_hyper_parameter()``, after all the hyper-parameters are set properly.
 
-A trainer can either be initialized from its ``__init__()``. If you want to build a trainer by ``__init__()``, you need to pass the following parameters to it, namely as ``model``, ``num_features``, and ``num_classes`` and ``auto ensemble``. You can also define some parameters alternatively, including ``optimizer``, ``lr``, ``max_epoch``, ``early_stopping_round``, ``weight_decay`` and etc.
-
-In the ``__init__()``, you need to define the space and hyperparameter of your trainer:  
-
-.. code-block:: python
-
-    # 1. define your search space of trainer
-    self.space = [
-        {'parameterName': 'max_epoch', 'type': 'INTEGER', 'maxValue': 300, 'minValue': 10, 'scalingType': 'LINEAR'},
-        {'parameterName': 'early_stopping_round', 'type': 'INTEGER', 'maxValue': 30, 'minValue': 10,
-             'scalingType': 'LINEAR'},
-        {'parameterName': 'lr', 'type': 'DOUBLE', 'maxValue': 1e-3, 'minValue': 1e-4, 'scalingType': 'LOG'},
-        {'parameterName': 'weight_decay', 'type': 'DOUBLE', 'maxValue': 5e-3, 'minValue': 5e-4,
-             'scalingType': 'LOG'},
-    ]
-
-    # 2. define the initial point of hyperparameter search of your trainer
-    self.hyperparams = {
-        'max_epoch': self.max_epoch,
-        'early_stopping_round': self.early_stopping_round,
-        'lr': self.lr,
-        'weight_decay': self.weight_decay
-    }
-
-Where ``self.space`` is a list of dictionary indicating the name, type, and some properties of the parameter. ``self.hyperparams`` is a dictionary indicating the hyper-parameters used in this trainer.
 
 Train and Predict
 -----------------
@@ -48,7 +24,7 @@ We have given the training and testing functions for the tasks of node classific
 The evaluation function is defined in ``evaluate()``, you can use your our evaluation metrics and methods.
 
 Node Classification with Sampling
-------------------------------------
+---------------------------------
 According to various present studies, training with spatial sampling has been demonstrated
 as an efficient technique for representation learning on large-scale graph.
 We provide implementations for various representative sampling mechanisms including
@@ -84,14 +60,131 @@ The sampling techniques can be utilized by adopting corresponding trainer
 and ``NodeClassificationNeighborSamplingTrainer``.
 You can either specify the corresponding name of trainer in YAML configuration file
 or instantiate the solver ``AutoNodeClassifier``
-with the instance of specific trainer as ``model`` argument.
+with the instance of specific trainer. However, please make sure to manange some key
+hyper-paramters properly inside the hyper-parameter space. Specifically:
+
+For ``NodeClassificationLayerDependentImportanceSamplingTrainer``, you need to set the
+hyper-parameter ``sampled_node_sizes`` properly. The space of ``sampled_node_sizes`` should
+be a list of the same size with your **Sequential Model**. For example, if you have a
+model with layer number 4, you need to pass the hyper-parameter space properly:
+
+.. code-block:: python
+
+    solver = AutoNodeClassifier(
+        graph_models=(A_MODEL_WITH_4_LAYERS,),
+        default_trainer='NodeClassificationLayerDependentImportanceSamplingTrainer',
+        trainer_hp_space=[
+            # (required) you need to set the trainer_hp_space properly.
+            {
+                'parameterName': 'sampled_node_sizes',
+                'type': 'NUMERICAL_LIST', 
+                "numericalType": "INTEGER",
+                "length": 4,                    # same with the layer number of your model
+                "minValue": [200,200,200,200],
+                "maxValue": [1000,1000,1000,1000],
+                "scalingType": "LOG"
+            },
+            ...
+        ]
+    )
+
+If the layer number of your model is a searchable hyper-parameters, you can also set the ``cutPara``
+and ``cutFunc`` properly, to make it connected with your layer number hyper-parameters of model.
+
+.. code-block:: python
+
+    '''
+    Suppose the layer number of your model is of the following forms:
+    {
+        'parameterName': 'layer_number',
+        'type': 'INTEGER',
+        'minValue': 2,
+        'maxValue': 4,
+        'scalingType': 'LOG'
+    }
+    '''
+
+    solver = AutoNodeClassifier(
+        graph_models=(A_MODEL_WITH_DYNAMIC_LAYERS,),
+        default_trainer='NodeClassificationLayerDependentImportanceSamplingTrainer',
+        trainer_hp_space=[
+            # (required) you need to set the trainer_hp_space properly.
+            {
+                'parameterName': 'sampled_node_sizes',
+                'type': 'NUMERICAL_LIST', 
+                "numericalType": "INTEGER",
+                "length": 4,                    # max length
+                "cutPara": ("layer_number", ),  # link with layer_number
+                "cutFunc": lambda x:x[0],       # link with layer_number
+                "minValue": [200,200,200,200],
+                "maxValue": [1000,1000,1000,1000],
+                "scalingType": "LOG"
+            },
+            ...
+        ]
+    )
+
+
+Similarly, if you want to use ``NodeClassificationNeighborSamplingTrainer``, you need to
+make sure setting the hyper-parameter ``sampling_sizes`` the same length as the layer number
+of your model. For example:
+
+.. code-block:: python
+
+    '''
+    Suppose the layer number of your model is of the following forms:
+    {
+        'parameterName': 'layer_number',
+        'type': 'INTEGER',
+        'minValue': 2,
+        'maxValue': 4,
+        'scalingType': 'LOG'
+    }
+    '''
+
+    solver = AutoNodeClassifier(
+        graph_models=(A_MODEL_WITH_DYNAMIC_LAYERS,),
+        default_trainer='NodeClassificationNeighborSamplingTrainer',
+        trainer_hp_space=[
+            # (required) you need to set the trainer_hp_space properly.
+            {
+                'parameterName': 'sampling_sizes',
+                'type': 'NUMERICAL_LIST', 
+                "numericalType": "INTEGER",
+                "length": 4,                    # max length
+                "cutPara": ("layer_number", ),  # link with layer_number
+                "cutFunc": lambda x:x[0],       # link with layer_number
+                "minValue": [20,20,20,20],
+                "maxValue": [100,100,100,100],
+                "scalingType": "LOG"
+            },
+            ...
+        ]
+    )
 
-A brief example is demonstrated as follows:
+
+You can also pass a trainer inside model list directly. A brief example is demonstrated as follows:
 
 .. code-block:: python
 
     ladies_sampling_trainer = NodeClassificationLayerDependentImportanceSamplingTrainer(
-        model='gcn', num_features=dataset.num_features, num_classes=dataset.num_classes,
-        ...
+        model='gcn', num_features=dataset.num_features, num_classes=dataset.num_classes, ...
     )
+
+    ladies_sampling_trainer.hyper_parameter_space = [
+        # (required) you need to set the trainer_hp_space properly.
+        {
+            'parameterName': 'sampled_node_sizes',
+            'type': 'NUMERICAL_LIST', 
+            "numericalType": "INTEGER",
+            "length": 4,                    # max length
+            "cutPara": ("num_layers", ),    # link with layer_number
+            "cutFunc": lambda x:x[0],       # link with layer_number
+            "minValue": [200,200,200,200],
+            "maxValue": [1000,1000,1000,1000],
+            "scalingType": "LOG"
+        },
+        ...
+    ]
+
     AutoNodeClassifier(graph_models=(ladies_sampling_trainer,), ...)

From 14c146fa4af654f2489fc01cbcef06e7f00b6f79 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Thu, 24 Jun 2021 15:29:42 +0000
Subject: [PATCH 128/144] add version check

---
 setup.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/setup.py b/setup.py
index db1b30a..34cccfe 100644
--- a/setup.py
+++ b/setup.py
@@ -12,6 +12,9 @@ try:
     import torch_cluster
     import torch_spline_conv
     import torch_geometric
+    PYG_VER = torch_geometric.__version__.split('.')
+    PYG_VER = [int(PYG_VER[0]), int(PYG_VER[1])]
+    assert PYG_VER >= [1, 7], "torch geometric version should be at least 1.7.0"
 except ModuleNotFoundError:
     raise ModuleNotFoundError(
         "PyTorch-Geometric not fully installed. "

From 0ceb1f76fbe6cf537751c0daa21e2045e913c34f Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Fri, 25 Jun 2021 07:46:43 +0000
Subject: [PATCH 129/144] optimize check for newest pyg installation

---
 setup.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index 34cccfe..6fb7c9e 100644
--- a/setup.py
+++ b/setup.py
@@ -9,12 +9,10 @@ except ModuleNotFoundError:
 try:
     import torch_scatter
     import torch_sparse
-    import torch_cluster
-    import torch_spline_conv
     import torch_geometric
     PYG_VER = torch_geometric.__version__.split('.')
     PYG_VER = [int(PYG_VER[0]), int(PYG_VER[1])]
-    assert PYG_VER >= [1, 7], "torch geometric version should be at least 1.7.0"
+    assert PYG_VER >= [1, 7], "PyTorch-Geometric version should be at least 1.7.0"
 except ModuleNotFoundError:
     raise ModuleNotFoundError(
         "PyTorch-Geometric not fully installed. "
@@ -22,6 +20,17 @@ except ModuleNotFoundError:
         "see https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html for installation."
     )
 
+if torch.__version__.startswith('1.8.'):
+    try:
+        import torch_cluster
+        import torch_spline_conv
+    except ModuleNotFoundError:
+        raise ModuleNotFoundError(
+            "PyTorch-Geometric not fully installed. "
+            "For PyTorch version 1.8.x, you should also install torch_cluster and torch_spline_conv "
+            "see https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html for installation."
+        )
+
 from setuptools import setup, find_packages
 
 with open("README.md", 'r') as fh:

From 178b69bc305375629597e4805ea0f73682dba7e1 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Fri, 25 Jun 2021 08:32:14 +0000
Subject: [PATCH 130/144] update version number, info of project

---
 autogl/__init__.py |  2 +-
 pyproject.toml     |  7 +++++++
 setup.py           | 17 +++++++----------
 3 files changed, 15 insertions(+), 11 deletions(-)
 create mode 100644 pyproject.toml

diff --git a/autogl/__init__.py b/autogl/__init__.py
index 485f44a..d3ec452 100644
--- a/autogl/__init__.py
+++ b/autogl/__init__.py
@@ -1 +1 @@
-__version__ = "0.1.1"
+__version__ = "0.2.0"
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..91c6895
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,7 @@
+[build-system]
+requires = [
+    "torch>=1.6.0",
+    "torch-geometric>=1.7.0",
+    "torch-scatter",
+    "torch-sparse"
+]
diff --git a/setup.py b/setup.py
index 6fb7c9e..9d9458f 100644
--- a/setup.py
+++ b/setup.py
@@ -39,11 +39,11 @@ with open("README.md", 'r') as fh:
 ''' https://packaging.python.org/guides/distributing-packages-using-setuptools/ '''
 ''' https://setuptools.readthedocs.io/en/latest/ '''
 setup(
-    name='auto-graph-learning',
-    version='0.1.1',
+    name='autogl',
+    version='0.2.0',
     author='THUMNLab/aglteam',
     maintainer='THUMNLab/aglteam',
-    author_email='xin_wang@tsinghua.edu.cn',
+    author_email='autogl@tsinghua.edu.cn',
     description='AutoML tools for graph-structure dataset',
     long_description=long_description,
     long_description_content_type='text/markdown',
@@ -53,11 +53,10 @@ setup(
     python_requires='~=3.6',
     # https://pypi.org/classifiers/
     classifiers=[
-        "Development Status :: 2 - Pre-Alpha",
-        "Programming Language :: Python :: 3.6",
-        "Programming Language :: Python :: 3.7",
-        "Programming Language :: Python :: 3.8",
-        "Programming Language :: Python :: 3.9"
+        "Development Status :: 4 - Beta",
+        "License :: OSI Approved :: Apache Software License",
+        "Programming Language :: Python :: 3 :: Only",
+        "Programming Language :: Python :: 3.6"
     ],
     # https://setuptools.readthedocs.io/en/latest/userguide/dependency_management.html
     # note that setup_requires and tests_require are deprecated
@@ -81,8 +80,6 @@ setup(
         'torch-geometric',
         'torch-scatter',
         'torch-sparse',
-        'torch-cluster',
-        'torch-spline-conv',
         'tqdm'
     ]
 )
\ No newline at end of file

From d0205f53e0ebdfb61cb5d1bfa1343b57be09614c Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Fri, 25 Jun 2021 08:34:33 +0000
Subject: [PATCH 131/144] update to apache license

---
 LICENSE | 223 ++++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 202 insertions(+), 21 deletions(-)

diff --git a/LICENSE b/LICENSE
index 9e82d05..7a4a3ea 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,21 +1,202 @@
-MIT License
-
-Copyright (c) 2020 THUMNLab aglteam
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
\ No newline at end of file

From c7499ae390d484db7d6fe6f7bd8195389e95200a Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Fri, 25 Jun 2021 08:46:10 +0000
Subject: [PATCH 132/144] remove pyproject, revise readme

---
 README.md      | 2 +-
 pyproject.toml | 7 -------
 2 files changed, 1 insertion(+), 8 deletions(-)
 delete mode 100644 pyproject.toml

diff --git a/README.md b/README.md
index dc0fe10..fd04576 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ AutoGL is developed for researchers and developers to quickly conduct autoML on
 
 The workflow below shows the overall framework of AutoGL.
 
-<img src="./resources/workflow.svg">
+<img src="https://raw.githubusercontent.com/THUMNLab/AutoGL/a062f6c535feb2cd856f51d41a88df99b02e82e7/resources/workflow.svg">
 
 AutoGL uses `datasets` to maintain dataset for graph-based machine learning, which is based on Dataset in PyTorch Geometric with some support added to corporate with the auto solver framework.
 
diff --git a/pyproject.toml b/pyproject.toml
deleted file mode 100644
index 91c6895..0000000
--- a/pyproject.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[build-system]
-requires = [
-    "torch>=1.6.0",
-    "torch-geometric>=1.7.0",
-    "torch-scatter",
-    "torch-sparse"
-]

From 7d25cde54db9fbcf17bfdac4f30fe627cbb2823c Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sat, 26 Jun 2021 05:50:37 +0000
Subject: [PATCH 133/144] add all the modules

---
 autogl/__init__.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/autogl/__init__.py b/autogl/__init__.py
index d3ec452..97b5085 100644
--- a/autogl/__init__.py
+++ b/autogl/__init__.py
@@ -1 +1,18 @@
+from . import (
+    data,
+    datasets,
+    module,
+    solver,
+    utils,
+)
+
+from .module import (
+    ensemble,
+    feature,
+    hpo,
+    model,
+    nas,
+    train,
+)
+
 __version__ = "0.2.0"

From d6d64fe870400faaedea6fab22347b8094c71fc6 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sun, 27 Jun 2021 06:36:25 +0000
Subject: [PATCH 134/144] adjust imports

---
 autogl/datasets/__init__.py                  | 61 ++++++++++++++-
 autogl/module/__init__.py                    |  9 +++
 autogl/module/feature/__init__.py            | 78 ++++++++++++++++++--
 autogl/module/feature/generators/__init__.py | 15 +++-
 autogl/module/feature/selectors/__init__.py  |  2 -
 autogl/module/feature/subgraph/__init__.py   | 43 ++++++++++-
 autogl/module/feature/subgraph/netlsd.py     |  1 -
 autogl/module/feature/subgraph/nx.py         |  4 -
 autogl/module/feature/subgraph/stats.py      |  1 -
 autogl/module/model/__init__.py              | 12 +++
 autogl/module/nas/__init__.py                | 12 ++-
 11 files changed, 215 insertions(+), 23 deletions(-)
 delete mode 100644 autogl/module/feature/subgraph/stats.py

diff --git a/autogl/datasets/__init__.py b/autogl/datasets/__init__.py
index 6b6919a..363b5d1 100644
--- a/autogl/datasets/__init__.py
+++ b/autogl/datasets/__init__.py
@@ -136,10 +136,6 @@ __all__ = [
     "register_dataset",
     "build_dataset",
     "build_dataset_from_name",
-    "GatneDataset",
-    "GTNDataset",
-    "HANDataset",
-    "MatlabMatrix",
     "get_label_number",
     "random_splits_mask",
     "random_splits_mask_class",
@@ -147,4 +143,61 @@ __all__ = [
     "graph_set_fold_id",
     "graph_random_splits",
     "graph_get_split",
+    "AmazonComputersDataset",
+    "AmazonPhotoDataset",
+    "CoauthorPhysicsDataset",
+    "CoauthorCSDataset",
+    "CoraDataset",
+    "CiteSeerDataset",
+    "PubMedDataset",
+    "RedditDataset",
+    "MUTAGDataset",
+    "IMDBBinaryDataset",
+    "IMDBMultiDataset",
+    "CollabDataset",
+    "ProteinsDataset",
+    "REDDITBinary",
+    "REDDITMulti5K",
+    "REDDITMulti12K",
+    "PTCMRDataset",
+    "NCI1Dataset",
+    "ENZYMES",
+    "QM9Dataset",
+    "OGBNproductsDataset",
+    "OGBNproteinsDataset",
+    "OGBNarxivDataset",
+    "OGBNpapers100MDataset",
+    "OGBNmagDataset",
+    "OGBGmolhivDataset",
+    "OGBGmolpcbaDataset",
+    "OGBGppaDataset",
+    "OGBGcodeDataset",
+    "OGBLppaDataset",
+    "OGBLcollabDataset",
+    "OGBLddiDataset",
+    "OGBLcitationDataset",
+    "OGBLwikikgDataset",
+    "OGBLbiokgDataset",
+    "GatneDataset", 
+    "AmazonDataset", 
+    "TwitterDataset", 
+    "YouTubeDataset",
+    "GTNDataset", 
+    "ACM_GTNDataset", 
+    "DBLP_GTNDataset", 
+    "IMDB_GTNDataset",
+    "HANDataset", 
+    "ACM_HANDataset", 
+    "DBLP_HANDataset", 
+    "IMDB_HANDataset",
+    "MatlabMatrix",
+    "BlogcatalogDataset",
+    "WikipediaDataset",
+    "PPIDataset",
+    "ModelNet10",
+    "ModelNet40",
+    "ModelNet10Train",
+    "ModelNet10Test",
+    "ModelNet40Train",
+    "ModelNet40Test",
 ]
diff --git a/autogl/module/__init__.py b/autogl/module/__init__.py
index bf9a71b..80f94da 100644
--- a/autogl/module/__init__.py
+++ b/autogl/module/__init__.py
@@ -1,3 +1,12 @@
+from . import (
+    feature,
+    model,
+    train,
+    hpo,
+    nas,
+    ensemble
+)
+
 from .ensemble import *
 from .feature import *
 from .hpo import *
diff --git a/autogl/module/feature/__init__.py b/autogl/module/feature/__init__.py
index 20738f7..ec789a1 100644
--- a/autogl/module/feature/__init__.py
+++ b/autogl/module/feature/__init__.py
@@ -1,5 +1,3 @@
-import importlib
-import os
 from .base import BaseFeatureAtom
 from .base import BaseFeatureEngineer
 
@@ -28,13 +26,83 @@ def register_feature(name):
 from .auto_feature import AutoFeatureEngineer
 from .base import BaseFeatureEngineer
 
-from .generators import BaseGenerator
-from .selectors import BaseSelector
+from .generators import (
+    BaseGenerator,
+    GeGraphlet,
+    GeEigen,
+    GePageRank,
+    register_pyg,
+    pygfunc,
+    PYGGenerator,
+    PYGLocalDegreeProfile,
+    PYGNormalizeFeatures,
+    PYGOneHotDegree
+)
 
-from .subgraph import BaseSubgraph
+from .selectors import (
+    BaseSelector,
+    SeFilterConstant, 
+    SeGBDT
+)
+
+from .subgraph import (
+    BaseSubgraph,
+    SgNetLSD,
+    register_nx,
+    NxSubgraph,
+    nxfunc,
+    NxLargeCliqueSize,
+    NxAverageClusteringApproximate,
+    NxDegreeAssortativityCoefficient,
+    NxDegreePearsonCorrelationCoefficient,
+    NxHasBridge,
+    NxGraphCliqueNumber,
+    NxGraphNumberOfCliques,
+    NxTransitivity,
+    NxAverageClustering,
+    NxIsConnected,
+    NxNumberConnectedComponents,
+    NxIsDistanceRegular,
+    NxLocalEfficiency,
+    NxGlobalEfficiency,
+    NxIsEulerian,
+)
 
 __all__ = [
     "BaseFeatureEngineer",
     "AutoFeatureEngineer",
     "BaseFeatureAtom",
+    "BaseGenerator",
+    "GeGraphlet",
+    "GeEigen",
+    "GePageRank",
+    "register_pyg",
+    "pygfunc",
+    "PYGGenerator",
+    "PYGLocalDegreeProfile",
+    "PYGNormalizeFeatures",
+    "PYGOneHotDegree",
+    "BaseSelector",
+    "SeFilterConstant",
+    "SeGBDT",
+    "BaseSubgraph",
+    "SgNetLSD",
+    "register_nx",
+    "NxSubgraph",
+    "nxfunc",
+    "NxLargeCliqueSize",
+    "NxAverageClusteringApproximate",
+    "NxDegreeAssortativityCoefficient",
+    "NxDegreePearsonCorrelationCoefficient",
+    "NxHasBridge",
+    "NxGraphCliqueNumber",
+    "NxGraphNumberOfCliques",
+    "NxTransitivity",
+    "NxAverageClustering",
+    "NxIsConnected",
+    "NxNumberConnectedComponents",
+    "NxIsDistanceRegular",
+    "NxLocalEfficiency",
+    "NxGlobalEfficiency",
+    "NxIsEulerian",
 ]
diff --git a/autogl/module/feature/generators/__init__.py b/autogl/module/feature/generators/__init__.py
index 3c3e2da..c15a9b3 100644
--- a/autogl/module/feature/generators/__init__.py
+++ b/autogl/module/feature/generators/__init__.py
@@ -2,6 +2,17 @@ from .base import BaseGenerator
 from .graphlet import GeGraphlet
 from .eigen import GeEigen
 from .page_rank import GePageRank
-from .pyg import *
+from .pyg import register_pyg, PYGGenerator, pygfunc, PYGLocalDegreeProfile, PYGNormalizeFeatures, PYGOneHotDegree
 
-__all__ = ["BaseGenerator", "GeGraphlet", "GeEigen", "GePageRank"]
+__all__ = [
+    "BaseGenerator",
+    "GeGraphlet",
+    "GeEigen",
+    "GePageRank",
+    "register_pyg",
+    "pygfunc",
+    "PYGGenerator",
+    "PYGLocalDegreeProfile",
+    "PYGNormalizeFeatures",
+    "PYGOneHotDegree"
+]
diff --git a/autogl/module/feature/selectors/__init__.py b/autogl/module/feature/selectors/__init__.py
index 8faf9aa..8a8d689 100644
--- a/autogl/module/feature/selectors/__init__.py
+++ b/autogl/module/feature/selectors/__init__.py
@@ -1,5 +1,3 @@
-import importlib
-import os
 from .base import BaseSelector
 from .se_filter_constant import SeFilterConstant
 from .se_gbdt import SeGBDT
diff --git a/autogl/module/feature/subgraph/__init__.py b/autogl/module/feature/subgraph/__init__.py
index 3058285..abf1b99 100644
--- a/autogl/module/feature/subgraph/__init__.py
+++ b/autogl/module/feature/subgraph/__init__.py
@@ -1,4 +1,45 @@
 from .netlsd import SgNetLSD
 from .base import BaseSubgraph
+from .nx import (
+    register_nx,
+    NxSubgraph,
+    nxfunc,
+    NxLargeCliqueSize,
+    NxAverageClusteringApproximate,
+    NxDegreeAssortativityCoefficient,
+    NxDegreePearsonCorrelationCoefficient,
+    NxHasBridge,
+    NxGraphCliqueNumber,
+    NxGraphNumberOfCliques,
+    NxTransitivity,
+    NxAverageClustering,
+    NxIsConnected,
+    NxNumberConnectedComponents,
+    NxIsDistanceRegular,
+    NxLocalEfficiency,
+    NxGlobalEfficiency,
+    NxIsEulerian,
+)
 
-__all__ = ["SgNetLSD", "BaseSubgraph"]
+__all__ = [
+    "SgNetLSD",
+    "BaseSubgraph",
+    "register_nx",
+    "NxSubgraph",
+    "nxfunc",
+    "NxLargeCliqueSize",
+    "NxAverageClusteringApproximate",
+    "NxDegreeAssortativityCoefficient",
+    "NxDegreePearsonCorrelationCoefficient",
+    "NxHasBridge",
+    "NxGraphCliqueNumber",
+    "NxGraphNumberOfCliques",
+    "NxTransitivity",
+    "NxAverageClustering",
+    "NxIsConnected",
+    "NxNumberConnectedComponents",
+    "NxIsDistanceRegular",
+    "NxLocalEfficiency",
+    "NxGlobalEfficiency",
+    "NxIsEulerian",
+]
diff --git a/autogl/module/feature/subgraph/netlsd.py b/autogl/module/feature/subgraph/netlsd.py
index 52a946e..65b3644 100644
--- a/autogl/module/feature/subgraph/netlsd.py
+++ b/autogl/module/feature/subgraph/netlsd.py
@@ -1,6 +1,5 @@
 import netlsd
 from .base import BaseSubgraph
-import numpy as np
 import torch
 from .. import register_feature
 
diff --git a/autogl/module/feature/subgraph/nx.py b/autogl/module/feature/subgraph/nx.py
index ff2ba16..0653cc9 100644
--- a/autogl/module/feature/subgraph/nx.py
+++ b/autogl/module/feature/subgraph/nx.py
@@ -4,7 +4,6 @@ from networkx.algorithms.efficiency_measures import local_efficiency
 from networkx.algorithms.distance_regular import is_distance_regular
 from networkx.algorithms.components import number_connected_components
 from networkx.algorithms.components import is_connected
-from networkx.algorithms.cluster import average_clustering
 from networkx.algorithms.cluster import transitivity
 from networkx.algorithms.clique import graph_number_of_cliques
 from networkx.algorithms.clique import graph_clique_number
@@ -13,11 +12,8 @@ from networkx.algorithms.assortativity import degree_pearson_correlation_coeffic
 from networkx.algorithms.assortativity import degree_assortativity_coefficient
 from networkx.algorithms.approximation.clustering_coefficient import average_clustering
 from networkx.algorithms.approximation.clique import large_clique_size
-import netlsd
 from .base import BaseSubgraph
-import numpy as np
 import torch
-from functools import wraps
 from .. import register_feature
 
 NX_EXTRACTORS = []
diff --git a/autogl/module/feature/subgraph/stats.py b/autogl/module/feature/subgraph/stats.py
deleted file mode 100644
index a22b22b..0000000
--- a/autogl/module/feature/subgraph/stats.py
+++ /dev/null
@@ -1 +0,0 @@
-import numpy as np
diff --git a/autogl/module/model/__init__.py b/autogl/module/model/__init__.py
index 67d778c..0a816a2 100644
--- a/autogl/module/model/__init__.py
+++ b/autogl/module/model/__init__.py
@@ -8,3 +8,15 @@ from .graph_saint import GraphSAINTAggregationModel
 from .gcn import AutoGCN
 from .gat import AutoGAT
 from .gin import AutoGIN
+
+__all__ = [
+    "ModelUniversalRegistry",
+    "register_model",
+    "BaseModel",
+    "AutoTopkpool",
+    "AutoSAGE",
+    "GraphSAINTAggregationModel",
+    "AutoGCN",
+    "AutoGAT",
+    "AutoGIN",
+]
diff --git a/autogl/module/nas/__init__.py b/autogl/module/nas/__init__.py
index b31ae1f..be2b683 100644
--- a/autogl/module/nas/__init__.py
+++ b/autogl/module/nas/__init__.py
@@ -1,3 +1,9 @@
-from .algorithm import *
-from .estimator import *
-from .space import *
+from . import (
+    algorithm,
+    estimator,
+    space
+)
+
+from .algorithm import NAS_ALGO_DICT
+from .estimator import NAS_ESTIMATOR_DICT
+from .space import NAS_SPACE_DICT

From 99ae9a3828a49870b8ae3f6cacbe0dd6cdacbc26 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sun, 27 Jun 2021 07:03:28 +0000
Subject: [PATCH 135/144] reorganize document files

---
 docs/docfile/documentation/data.rst     |  4 +-
 docs/docfile/documentation/dataset.rst  |  4 +-
 docs/docfile/documentation/ensemble.rst |  7 +++
 docs/docfile/documentation/feature.rst  |  9 ++++
 docs/docfile/documentation/hpo.rst      |  7 +++
 docs/docfile/documentation/model.rst    |  7 +++
 docs/docfile/documentation/module.rst   | 67 -------------------------
 docs/docfile/documentation/nas.rst      | 13 +++++
 docs/docfile/documentation/solver.rst   |  4 +-
 docs/docfile/documentation/train.rst    |  7 +++
 docs/docfile/tutorial/t_hpo.rst         | 59 +++++++++++-----------
 docs/index.rst                          |  7 ++-
 12 files changed, 91 insertions(+), 104 deletions(-)
 create mode 100644 docs/docfile/documentation/ensemble.rst
 create mode 100644 docs/docfile/documentation/feature.rst
 create mode 100644 docs/docfile/documentation/hpo.rst
 create mode 100644 docs/docfile/documentation/model.rst
 delete mode 100644 docs/docfile/documentation/module.rst
 create mode 100644 docs/docfile/documentation/nas.rst
 create mode 100644 docs/docfile/documentation/train.rst

diff --git a/docs/docfile/documentation/data.rst b/docs/docfile/documentation/data.rst
index 3d11e7a..ebf5a32 100644
--- a/docs/docfile/documentation/data.rst
+++ b/docs/docfile/documentation/data.rst
@@ -1,7 +1,7 @@
 .. _data documentation:
 
-data
-====
+autogl.data
+===========
 
 .. automodule:: autogl.data
    :members:
\ No newline at end of file
diff --git a/docs/docfile/documentation/dataset.rst b/docs/docfile/documentation/dataset.rst
index c212ae3..c1314ec 100644
--- a/docs/docfile/documentation/dataset.rst
+++ b/docs/docfile/documentation/dataset.rst
@@ -1,7 +1,7 @@
 .. _dataset documentation:
 
-dataset
-=======
+autogl.datasets
+===============
 
 We integrate the datasets from `PyTorch Geometric <https://pytorch-geometric.readthedocs.io/en/latest/modules/datasets.html>`_, `CogDL <https://cogdl.readthedocs.io/en/latest/autoapi/datasets/index.html>`_ and `OGB <https://ogb.stanford.edu/docs/dataset_overview/>`_. We also list some datasets from `CogDL` for simplicity.
 
diff --git a/docs/docfile/documentation/ensemble.rst b/docs/docfile/documentation/ensemble.rst
new file mode 100644
index 0000000..ebfffb5
--- /dev/null
+++ b/docs/docfile/documentation/ensemble.rst
@@ -0,0 +1,7 @@
+.. _ensemble documentation:
+
+autogl.module.ensemble
+----------------------
+
+.. automodule:: autogl.module.ensemble
+    :members:
diff --git a/docs/docfile/documentation/feature.rst b/docs/docfile/documentation/feature.rst
new file mode 100644
index 0000000..5f7ebbf
--- /dev/null
+++ b/docs/docfile/documentation/feature.rst
@@ -0,0 +1,9 @@
+.. _feature documentation:
+
+autogl.module.feature
+=====================
+
+Several feature engineering operations are collected manually, or from PyTorch Geometric, NetworkX, etc.
+
+.. automodule:: autogl.module.feature
+	:members:
diff --git a/docs/docfile/documentation/hpo.rst b/docs/docfile/documentation/hpo.rst
new file mode 100644
index 0000000..13991fa
--- /dev/null
+++ b/docs/docfile/documentation/hpo.rst
@@ -0,0 +1,7 @@
+.. _hpo documentation:
+
+autogl.module.hpo
+-----------------
+
+.. automodule:: autogl.module.hpo
+    :members:
diff --git a/docs/docfile/documentation/model.rst b/docs/docfile/documentation/model.rst
new file mode 100644
index 0000000..502697f
--- /dev/null
+++ b/docs/docfile/documentation/model.rst
@@ -0,0 +1,7 @@
+.. _model documentation:
+
+autogl.module.model
+-------------------
+
+.. automodule:: autogl.module.model
+    :members:
diff --git a/docs/docfile/documentation/module.rst b/docs/docfile/documentation/module.rst
deleted file mode 100644
index 719198a..0000000
--- a/docs/docfile/documentation/module.rst
+++ /dev/null
@@ -1,67 +0,0 @@
-module
-======
-
-The four main modules for auto graph learning are listed here.
-
-.. _feature documentation:
-
-feature 
--------
-
-.. automodule:: autogl.module.feature
-	:members:
-
-.. automodule:: autogl.module.feature.generators
-    :members:
-
-.. automodule:: autogl.module.feature.selectors
-    :members:
-
-.. automodule:: autogl.module.feature.subgraph
-    :members:
-
-.. _model documentation:
-
-model
------
-
-.. automodule:: autogl.module.model
-    :members:
-
-.. _train documentation:
-
-train
------
-
-.. automodule:: autogl.module.train
-    :members:
-
-.. _hpo documentation:
-
-hyper parameter optimization
-----------------------------
-
-.. automodule:: autogl.module.hpo
-    :members:
-
-.. _neural architecture search:
-
-neural architecture search
---------------------------
-
-.. automodule:: autogl.module.nas.algorithm
-    :members:
-
-.. automodule:: autogl.module.nas.space
-    :members:
-
-.. automodule:: autogl.module.nas.estimator
-    :members:
-
-.. _ensemble documentation:
-
-ensemble
---------
-
-.. automodule:: autogl.module.ensemble
-    :members:
diff --git a/docs/docfile/documentation/nas.rst b/docs/docfile/documentation/nas.rst
new file mode 100644
index 0000000..929d069
--- /dev/null
+++ b/docs/docfile/documentation/nas.rst
@@ -0,0 +1,13 @@
+.. _neural architecture search:
+
+autogl.module.nas
+-----------------
+
+.. automodule:: autogl.module.nas.algorithm
+    :members:
+
+.. automodule:: autogl.module.nas.space
+    :members:
+
+.. automodule:: autogl.module.nas.estimator
+    :members:
diff --git a/docs/docfile/documentation/solver.rst b/docs/docfile/documentation/solver.rst
index f059c13..e1fc22e 100644
--- a/docs/docfile/documentation/solver.rst
+++ b/docs/docfile/documentation/solver.rst
@@ -1,7 +1,7 @@
 .. _solver documentation:
 
-solver
-======
+autogl.solver
+=============
 
 .. automodule:: autogl.solver
     :members:
\ No newline at end of file
diff --git a/docs/docfile/documentation/train.rst b/docs/docfile/documentation/train.rst
new file mode 100644
index 0000000..f92fec7
--- /dev/null
+++ b/docs/docfile/documentation/train.rst
@@ -0,0 +1,7 @@
+.. _train documentation:
+
+autogl.module.train
+-------------------
+
+.. automodule:: autogl.module.train
+    :members:
diff --git a/docs/docfile/tutorial/t_hpo.rst b/docs/docfile/tutorial/t_hpo.rst
index 9f28075..5fbf8ca 100644
--- a/docs/docfile/tutorial/t_hpo.rst
+++ b/docs/docfile/tutorial/t_hpo.rst
@@ -52,35 +52,27 @@ Or you can let HPO cut the list to a certain length which is dependent on other
         
 How given HPO algorithms support search space is listed as follows:
 
-+------------+------------+--------------+-----------+------------+
-| Algorithm  | numerical  |numerical list|categorical| fixed      |
-+============+============+==============+===========+============+
-| Grid       |            |              |  ✓        | ✓          |
-+------------+------------+--------------+-----------+------------+
-| Random     | ✓          |  ✓           |  ✓        | ✓          |
-+------------+------------+--------------+-----------+------------+
-| Anneal     | ✓          |  ✓           |  ✓        | ✓          |
-+------------+------------+--------------+-----------+------------+
-| Bayes      | ✓          |  ✓           |  ✓        | ✓          |
-+------------+------------+--------------+-----------+------------+
-| TPE        | ✓          |  ✓           |  ✓        | ✓          |
-+------------+------------+--------------+-----------+------------+
-| CMAES      | ✓          |  ✓           |  ✓        | ✓          |
-+------------+------------+--------------+-----------+------------+
-| MOCMAES    | ✓          |  ✓           |  ✓        | ✓          |
-+------------+------------+--------------+-----------+------------+
-|Quasi random| ✓          |  ✓           |  ✓        | ✓          |
-+------------+------------+--------------+-----------+------------+
-| AutoNE     | ✓          |  ✓           |  ✓        | ✓          |
-+------------+------------+--------------+-----------+------------+
-
-Here, TPE is from [1], CMAES is from [2], MOCMAES is from [3], quasi random is from [4], AutoNE is from [5].
-
-[1] Bergstra, James S., et al. "Algorithms for hyper-parameter optimization." Advances in neural information processing systems. 2011.
-[2] Arnold, Dirk V., and Nikolaus Hansen. "Active covariance matrix adaptation for the (1+ 1)-CMA-ES." Proceedings of the 12th annual conference on Genetic and evolutionary computation. 2010.
-[3] Voß, Thomas, Nikolaus Hansen, and Christian Igel. "Improved step size adaptation for the MO-CMA-ES." Proceedings of the 12th annual conference on Genetic and evolutionary computation. 2010.
-[4] Bratley, Paul, Bennett L. Fox, and Harald Niederreiter. "Programs to generate Niederreiter's low-discrepancy sequences." ACM Transactions on Mathematical Software (TOMS) 20.4 (1994): 494-495.
-[5] Tu, Ke, et al. "Autone: Hyperparameter optimization for massive network embedding." Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. 2019.
++------------------+------------+--------------+-----------+------------+
+| Algorithm        | numerical  |numerical list|categorical| fixed      |
++==================+============+==============+===========+============+
+| Grid             |            |              |  ✓        | ✓          |
++------------------+------------+--------------+-----------+------------+
+| Random           | ✓          |  ✓           |  ✓        | ✓          |
++------------------+------------+--------------+-----------+------------+
+| Anneal           | ✓          |  ✓           |  ✓        | ✓          |
++------------------+------------+--------------+-----------+------------+
+| Bayes            | ✓          |  ✓           |  ✓        | ✓          |
++------------------+------------+--------------+-----------+------------+
+| TPE [1]_         | ✓          |  ✓           |  ✓        | ✓          |
++------------------+------------+--------------+-----------+------------+
+| CMAES [2]_       | ✓          |  ✓           |  ✓        | ✓          |
++------------------+------------+--------------+-----------+------------+
+| MOCMAES [3]_     | ✓          |  ✓           |  ✓        | ✓          |
++------------------+------------+--------------+-----------+------------+
+|Quasi random [4]_ | ✓          |  ✓           |  ✓        | ✓          |
++------------------+------------+--------------+-----------+------------+
+| AutoNE  [5]_     | ✓          |  ✓           |  ✓        | ✓          |
++------------------+------------+--------------+-----------+------------+
 
 Add Your HPOptimizer
 --------------------
@@ -148,4 +140,11 @@ If you want to add your own HPOptimizer, the only thing you should do is finishi
                     best_para = para_for_trainer
 
             # 5. Return the best trainer and parameter.
-            return best_trainer, best_para
\ No newline at end of file
+            return best_trainer, best_para
+
+
+.. [1] Bergstra, James S., et al. "Algorithms for hyper-parameter optimization." Advances in neural information processing systems. 2011.
+.. [2] Arnold, Dirk V., and Nikolaus Hansen. "Active covariance matrix adaptation for the (1+ 1)-CMA-ES." Proceedings of the 12th annual conference on Genetic and evolutionary computation. 2010.
+.. [3] Voß, Thomas, Nikolaus Hansen, and Christian Igel. "Improved step size adaptation for the MO-CMA-ES." Proceedings of the 12th annual conference on Genetic and evolutionary computation. 2010.
+.. [4] Bratley, Paul, Bennett L. Fox, and Harald Niederreiter. "Programs to generate Niederreiter's low-discrepancy sequences." ACM Transactions on Mathematical Software (TOMS) 20.4 (1994): 494-495.
+.. [5] Tu, Ke, et al. "Autone: Hyperparameter optimization for massive network embedding." Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. 2019.
diff --git a/docs/index.rst b/docs/index.rst
index 3e7e565..e2c0a60 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -99,7 +99,12 @@ In AutoGL, the tasks are solved by corresponding learners, which in general do t
 
    docfile/documentation/data
    docfile/documentation/dataset
-   docfile/documentation/module
+   docfile/documentation/feature
+   docfile/documentation/model
+   docfile/documentation/train
+   docfile/documentation/hpo
+   docfile/documentation/nas
+   docfile/documentation/ensemble
    docfile/documentation/solver
 
 Indices and tables

From eade801d88503384a9ab1faccc3791c2ddd296fc Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sun, 27 Jun 2021 10:36:13 +0000
Subject: [PATCH 136/144] update documents for new version

---
 README.md                              | 24 ++++++--
 docs/docfile/tutorial/t_nas.rst        | 77 +++++++++++++++++++++++++-
 docs/docfile/tutorial/t_quickstart.rst |  2 +-
 docs/docfile/tutorial/t_solver.rst     | 24 +++-----
 docs/index.rst                         | 14 +++--
 5 files changed, 110 insertions(+), 31 deletions(-)

diff --git a/README.md b/README.md
index fd04576..205fb43 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,7 @@ The workflow below shows the overall framework of AutoGL.
 
 AutoGL uses `datasets` to maintain dataset for graph-based machine learning, which is based on Dataset in PyTorch Geometric with some support added to corporate with the auto solver framework.
 
-Different graph-based machine learning tasks are solved by different `AutoGL solvers`, which make use of four main modules to automatically solve given tasks, namely `auto feature engineer`, `auto model`, `hyperparameter optimization`, and `auto ensemble`. 
+Different graph-based machine learning tasks are solved by different `AutoGL solvers`, which make use of five main modules to automatically solve given tasks, namely `auto feature engineer`, `neural architecture search`, `auto model`, `hyperparameter optimization`, and `auto ensemble`. 
 
 Currently, the following algorithms are supported in AutoGL:
 
@@ -33,6 +33,7 @@ Currently, the following algorithms are supported in AutoGL:
     <tr valign="top">
         <td>Feature Engineer</td>
         <td>Model</td>
+        <td>NAS</td>
         <td>HPO</td>
         <td>Ensemble</td>
     </tr>
@@ -40,13 +41,26 @@ Currently, the following algorithms are supported in AutoGL:
         <!--<td><b>Generators</b><br>graphlet <br> eigen <br> pagerank <br> PYGLocalDegreeProfile <br> PYGNormalizeFeatures <br> PYGOneHotDegree <br> onehot <br> <br><b>Selectors</b><br> SeFilterConstant<br> gbdt <br> <br><b>Subgraph</b><br> NxLargeCliqueSize<br> NxAverageClusteringApproximate<br> NxDegreeAssortativityCoefficient<br> NxDegreePearsonCorrelationCoefficient<br> NxHasBridge <br>NxGraphCliqueNumber<br> NxGraphNumberOfCliques<br> NxTransitivity<br> NxAverageClustering<br> NxIsConnected<br> NxNumberConnectedComponents<br> NxIsDistanceRegular<br> NxLocalEfficiency<br> NxGlobalEfficiency<br> NxIsEulerian </td>-->
         <td><b>Generators</b><br>graphlet <br> eigen <br> <a href="https://autogl.readthedocs.io/en/latest/docfile/tutorial/t_fe.html">more ...</a><br><br><b>Selectors</b><br> SeFilterConstant<br> gbdt <br> <br><b>Subgraph</b><br> netlsd<br> NxAverageClustering<br> <a href="https://autogl.readthedocs.io/en/latest/docfile/tutorial/t_fe.html">more ...</a></td>
         <td><b>Node Classification</b><br> GCN <br> GAT <br> GraphSAGE <br><br><b>Graph Classification</b><br> GIN <br> TopKPool </td>
+        <td>
+        <b>Algorithms</b><br>
+        Random<br>
+        RL<br>
+        <a href='#'>more ...</a><br><br>
+        <b>Spaces</b><br>
+        SinglePath<br>
+        GraphNas<br>
+        <a href='#'>more ...</a><br><br>
+        <b>Estimators</b><br>
+        Oneshot<br>
+        Scratch<br>
+        </td>
         <td> Grid <br> Random <br> Anneal <br> Bayes <br> CAMES <br> MOCAMES <br> Quasi random <br> TPE <br> AutoNE </td>
         <td> Voting <br> Stacking </td>
     </tr>
     </tbody>
 </table>
 
-This toolkit also serves as a platform for users to implement and test their own autoML or graph-based machine learning models.
+This toolkit also serves as a framework for users to implement and test their own autoML or graph-based machine learning models.
 
 ## Installation
 
@@ -56,11 +70,11 @@ Please make sure you meet the following requirements before installing AutoGL.
 
 1. Python >= 3.6.0
 
-2. PyTorch (>=1.5.1)
+2. PyTorch (>=1.6.0)
 
     see <https://pytorch.org/> for installation.
 
-3. PyTorch Geometric
+3. PyTorch Geometric (>=1.7.0)
 
     see <https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html> for installation.
 
@@ -71,7 +85,7 @@ Please make sure you meet the following requirements before installing AutoGL.
 Run the following command to install this package through `pip`.
 
 ```
-pip install auto-graph-learning
+pip install autogl
 ```
 
 #### Install from source
diff --git a/docs/docfile/tutorial/t_nas.rst b/docs/docfile/tutorial/t_nas.rst
index 7d60674..10ed15d 100644
--- a/docs/docfile/tutorial/t_nas.rst
+++ b/docs/docfile/tutorial/t_nas.rst
@@ -8,11 +8,59 @@ To be more flexible, we modulize NAS process with three part: algorithm, space a
 Different models in different parts can be composed in some certain constrains.
 If you want to design your own NAS process, you can change any of those parts according to your demand.
 
+Usage
+-----
+
+You can directly enable architecture search for node classification tasks by passing the algorithms, spaces and estimators to
+solver. Following shows an example:
+
+.. code-block:: python
+
+    # Use graphnas to solve cora
+    from autogl.datasets import build_dataset_from_name
+    from autogl.solver import AutoNodeClassifier
+
+    solver = AutoNodeClassifier(
+        feature = 'PYGNormalizeFeatures',
+        graph_models = (),
+        hpo = 'tpe',
+        ensemble = None,
+        nas_algorithms='rl',
+        nas_spaces='graphnasmacro',
+        nas_estimators='scratch'
+    )
+
+    cora = build_dataset_from_name('cora')
+    solver.fit(cora)
+
+The code above will first find the best architecture in space ``graphnasmacro`` using ``rl`` search algorithm.
+Then the searched architecture will be further optimized through hyperparameter-optimization ``tpe``.
+
+.. note:: The ``graph_models`` argument is not conflict with nas module. You can set ``graph_models`` to
+    other hand-crafted models beside the ones found by nas. Once the architectures are derived from nas module,
+    they act in the same way as hand-crafted models directly passed through graph_models.
+
 Search Space
 ------------
 
 The space definition is base on mutable fashion used in NNI, which is defined as a model inheriting BaseSpace
 There are mainly two ways to define your search space, one can be performed with one-shot fashion while the other cannot.
+Currently, we support following search space:
+
++------------------------+-----------------------------------------------------------------+
+| Space                  | Description                                                     |
++========================+=================================================================+
+| ``singlepath`` [4]_    | Architectures with several sequential layers with each layer    |
+|                        | choosing only one path                                          |
++------------------------+-----------------------------------------------------------------+
+| ``graphnas``   [1]_    | The graph nas micro search space designed for fully supervised  |
+|                        | node classification models                                      |
++------------------------+-----------------------------------------------------------------+
+| ``graphnasmacro`` [1]_ | The graph nas macro search space designed for semi-superwised   |
+|                        | node classification models                                      |
++------------------------+-----------------------------------------------------------------+
+
+You can also define your own nas search space. 
 If you need one-shot fashion, you should use the function ``setLayerChoice`` and ``setInputChoice`` to construct the super network.
 Here is an example.
 
@@ -111,8 +159,17 @@ But you can only use sample-based search strategy.
 Performance Estimator
 ---------------------
 
-The performance estimator estimates the performance of an architecture.
-Here is an example of estimating an architecture without training (used in one-shot space).
+The performance estimator estimates the performance of an architecture. Currently we support following estimators:
+
++-------------------------+-------------------------------------------------------+
+| Estimator               | Description                                           |
++=========================+=======================================================+
+| ``oneshot``             | Directly evaluating the given models without training |
++-------------------------+-------------------------------------------------------+
+| ``scratch``             | Train the models from scratch and then evaluate them  |
++-------------------------+-------------------------------------------------------+
+
+You can also write your own estimator. Here is an example of estimating an architecture without training (used in one-shot space).
 
 .. code-block:: python
 
@@ -135,7 +192,20 @@ Here is an example of estimating an architecture without training (used in one-s
 Search Strategy
 ---------------
 
-The space strategy defines how to find an architecture.
+The space strategy defines how to find an architecture. We currently support following search strategies:
+
++-------------------------+-------------------------------------------------------+
+| Strategy                | Description                                           |
++=========================+=======================================================+
+| ``random``              | Random search by uniform sampling                     |
++-------------------------+-------------------------------------------------------+
+| ``rl`` [1]_             | Use rl as architecture generator agent                |
++-------------------------+-------------------------------------------------------+
+| ``enas`` [2]_           | efficient neural architecture search                  |
++-------------------------+-------------------------------------------------------+
+| ``darts`` [3]_          | differentiable neural architecture search             |
++-------------------------+-------------------------------------------------------+
+
 
 Sample-based strategy without weight sharing is simpler than strategies with weight sharing.
 We show how to define your strategy here with DFS as an example.
@@ -227,3 +297,4 @@ Different search strategies should be combined with different search spaces and
 .. [1] Gao, Yang, et al. "Graph neural architecture search." IJCAI. Vol. 20. 2020.
 .. [2] Pham, Hieu, et al. "Efficient neural architecture search via parameters sharing." International Conference on Machine Learning. PMLR, 2018.
 .. [3] Liu, Hanxiao, Karen Simonyan, and Yiming Yang. "DARTS: Differentiable Architecture Search." International Conference on Learning Representations. 2018.
+.. [4] Guo, Zichao, et al. “Single Path One-Shot Neural Architecture Search with Uniform Sampling.” European Conference on Computer Vision, 2019, pp. 544–560.
diff --git a/docs/docfile/tutorial/t_quickstart.rst b/docs/docfile/tutorial/t_quickstart.rst
index cd27ad0..78a6765 100644
--- a/docs/docfile/tutorial/t_quickstart.rst
+++ b/docs/docfile/tutorial/t_quickstart.rst
@@ -12,7 +12,7 @@ Based on the concept of autoML, auto graph learning aims at **automatically** so
 
 The diagram below describes the workflow of AutoGL framework.
 
-To reach the aim of autoML, our proposed auto graph learning framework is organized as follows. We have ``dataset`` to maintain the graph datasets given by users. A ``solver`` object needs to be built for specifying the target tasks. Inside ``solver``, there are four submodules to help complete the auto graph tasks, namely ``auto feature engineer``, ``auto model``, ``hyperparameter optimization`` and ``auto ensemble``, which will automatically preprocess/enhance your data, choose and optimize deep models and ensemble them in the best way for you.
+To reach the aim of autoML, our proposed auto graph learning framework is organized as follows. We have ``dataset`` to maintain the graph datasets given by users. A ``solver`` object needs to be built for specifying the target tasks. Inside ``solver``, there are five submodules to help complete the auto graph tasks, namely ``auto feature engineer``, ``auto model``, ``neural architecture search``, ``hyperparameter optimization`` and ``auto ensemble``, which will automatically preprocess/enhance your data, choose and optimize deep models and ensemble them in the best way for you.
 
 Let's say you want to conduct an auto graph learning on dataset ``Cora``. First, you can easily get the ``Cora`` dataset using the ``dataset`` module:
 
diff --git a/docs/docfile/tutorial/t_solver.rst b/docs/docfile/tutorial/t_solver.rst
index 59c6ccf..028e36c 100644
--- a/docs/docfile/tutorial/t_solver.rst
+++ b/docs/docfile/tutorial/t_solver.rst
@@ -3,10 +3,11 @@
 AutoGL Solver
 =============
 
-Our AutoGL project use ``solver`` to handle the auto-solvation of tasks. Currently, we support the following tasks:
+AutoGL project use ``solver`` to handle the auto-solvation of tasks. Currently, we support the following tasks:
 
 * ``AutoNodeClassifier`` for semi-supervised node classification
 * ``AutoGraphClassifier`` for supervised graph classification
+* ``AutoLinkPredictor`` for link prediction
 
 Initialization
 --------------
@@ -16,7 +17,7 @@ A solver can either be initialized from its ``__init__()`` or from a config dict
 Initialize from ``__init__()``
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-If you want to build a solver by ``__init__()``, you need to pass the four key modules to it, namely as ``auto feature engineer``, ``auto model list``, ``hyperparameter optimizer`` and ``auto ensemble``. You can either pass the keywords of corresponding modules or the initialized instances:
+If you want to build a solver by ``__init__()``, you need to pass the key modules to it. You can either pass the keywords of corresponding modules or the initialized instances:
 
 .. code-block:: python
 
@@ -43,13 +44,13 @@ If you want to build a solver by ``__init__()``, you need to pass the four key m
 
 Where, the argument ``device`` means where to perform the training and searching, by setting to ``auto``, the ``cuda`` is used when it is available.
 
-If you want to disable one module (except graphModuleList), you can set it to ``None``:
+If you want to disable one module, you can set it to ``None``:
 
 .. code-block:: python
 
     solver = AutoNodeClassifier(feature_module=None, hpo_module=None, ensemble_module=None)
 
-You can also pass some important arguments of modules directly to solver, which will automatically set them for you:
+You can also pass some important arguments of modules directly to solver, which will automatically be set for you:
 
 .. code-block:: python
 
@@ -89,9 +90,9 @@ You can use ``fit()`` or ``fit_predict()`` to perform optimization, which shares
 
     # load your dataset here
     dataset = some_dataset()
-    solver.fit(dataset, inplace=True, time_limit=3600)
+    solver.fit(dataset, inplace=True)
 
-The inplace argument is used for saving memory if set to ``True``. It will modify your dataset in an inplace manner during feature engineering. You can also set ``time_limit`` to limit the time cost of the whole auto process.
+The inplace argument is used for saving memory if set to ``True``. It will modify your dataset in an inplace manner during feature engineering.
 
 You can also specify the ``train_split`` and ``val_split`` arguments to let solver auto-split the given dataset. If these arguments are given, the split dataset will be used instead of the default split specified by the dataset provided. All the models will be trained on ``train dataset``. Their hyperparameters will be optimized based on the performance of ``valid dataset``, as well as the final ensemble method. For example:
 
@@ -107,16 +108,7 @@ You can also specify the ``train_split`` and ``val_split`` arguments to let solv
 
 For the node classification problem, we also support balanced sampling of train and valid: force the number of sampled nodes in different classes to be the same. The balanced mode can be turned on by setting ``balanced=True`` in ``fit()``, which is by default set to ``True``.
 
-For the graph classification problem, we also provide a way to conduct cross-validation. You can enable cross-validation by specifying ``cross_validation=True``. ``cv_fold`` is also provided to determine the number of folds. Then, the ``train dataset`` will be further split into ``cv_fold`` folds for each model to be trained and optimized hyperparameters on. The auto ensemble will base on the model performance of ``valid dataset``.
-
-.. note:: 
-
-    If you want to use cross validation, please make sure the ``dataset`` receives train/val/test split before cross validated. By default, the graph dataset derived directly from ``build_dataset_from_name`` is not splitted yet. To split the dataset, you can:
-    
-    * use ``autogl.datasets.utils.graph_random_split`` to pre-split ``dataset`` outside of ``solver``.
-    * pass ``train_split`` and ``val_split`` directly to ``solver``, which will pre-split the ``dataset`` for you.
-
-.. note:: Solver will maintain the models with the best hyper-parameter of each model architecture you pass to solver (the ``graphModelList`` argument when initialized). The maintained models will then be ensembled by ensemble module. When cross-validation is used, solver will maintain ``cv_fold`` models of each model architecture for each fold.
+.. note:: Solver will maintain the models with the best hyper-parameter of each model architecture you pass to solver (the ``graph_models`` argument when initialized). The maintained models will then be ensembled by ensemble module.
 
 After ``fit()``, solver maintains the performances of every single model and the ensemble model in one leaderboard instance. You can output the performances on valid dataset by:
 
diff --git a/docs/index.rst b/docs/index.rst
index e2c0a60..e203f08 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -6,7 +6,7 @@ AutoGL
 
 *Actively under development by @THUMNLab*
 
-AutoGL is developed for researchers and developers to quickly conduct autoML on the graph datasets & tasks. See our documentation for detailed information!
+AutoGL is developed for researchers and developers to quickly conduct autoML on the graph datasets & tasks.
 
 The workflow below shows the overall framework of AutoGL.
 
@@ -15,7 +15,7 @@ The workflow below shows the overall framework of AutoGL.
 
 AutoGL uses ``AutoGL Dataset`` to maintain datasets for graph-based machine learning, which is based on the dataset in PyTorch Geometric with some support added to corporate with the auto solver framework.
 
-Different graph-based machine learning tasks are solved by different ``AutoGL Solvers`` , which make use of four main modules to automatically solve given tasks, namely ``Auto Feature Engineer``, ``Auto Model``, ``HyperParameter Optimization``, and ``Auto Ensemble``. 
+Different graph-based machine learning tasks are solved by different ``AutoGL Solvers`` , which make use of four main modules to automatically solve given tasks, namely ``Auto Feature Engineer``, ``Auto Model``, ``Neural Architecture Search``, ``HyperParameter Optimization``, and ``Auto Ensemble``. 
 
 Installation
 ------------
@@ -27,11 +27,11 @@ Please make sure you meet the following requirements before installing AutoGL.
 
 1. Python >= 3.6.0
 
-2. PyTorch (>=1.5.1)
+2. PyTorch (>=1.6.0)
 
     see `PyTorch <https://pytorch.org/>`_ for installation.
 
-3. PyTorch Geometric
+3. PyTorch Geometric (>=1.7.0)
 
     see `PyTorch Geometric <https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html>`_ for installation.
 
@@ -45,7 +45,7 @@ Run the following command to install this package through pip.
 
 .. code-block:: shell
 
-   pip install auto-graph-learning
+   pip install autogl
 
 Install from source
 ^^^^^^^^^^^^^^^^^^^
@@ -71,10 +71,12 @@ If you are a developer of the AutoGL project, please use the following command t
 Modules
 -------
 
-In AutoGL, the tasks are solved by corresponding learners, which in general do the following things:
+In AutoGL, the tasks are solved by corresponding solvers, which in general do the following things:
 
 1. Preprocess and feature engineer the given datasets. This is done by the module named **auto feature engineer**, which can automatically add/delete useful/useless attributes in the given datasets. Some topological features may also be extracted & combined to form stronger features for current tasks.
 
+2. Find the best suitable model architectures through neural architecture search. This is done by modules named **nas**. AutoGL provides several search spaces, algorithms and estimators for finding the best architectures.
+
 2. Automatically train and tune popular models specified by users. This is done by modules named **auto model** and **hyperparameter optimization**. In the auto model, several commonly used graph deep models are provided, together with their hyperparameter spaces. These kinds of models can be tuned using **hyperparameter optimization** module to find the best hyperparameter for the current task.
 
 3. Find the best way to ensemble models found and trained in the last step. This is done by the module named **auto ensemble**. The suitable models available are ensembled here to form a more powerful learner.

From fb6254e0055f6ace58995ee931606dda4cb3b2dd Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sun, 27 Jun 2021 10:41:51 +0000
Subject: [PATCH 137/144] remove cross validation

---
 autogl/solver/classifier/graph_classifier.py | 136 ++++---------------
 1 file changed, 30 insertions(+), 106 deletions(-)

diff --git a/autogl/solver/classifier/graph_classifier.py b/autogl/solver/classifier/graph_classifier.py
index 5c4b35e..e77d00d 100644
--- a/autogl/solver/classifier/graph_classifier.py
+++ b/autogl/solver/classifier/graph_classifier.py
@@ -231,8 +231,6 @@ class AutoGraphClassifier(BaseClassifier):
         inplace=False,
         train_split=None,
         val_split=None,
-        cross_validation=False,
-        cv_split=10,
         evaluation_method="infer",
         seed=None,
     ) -> "AutoGraphClassifier":
@@ -262,13 +260,6 @@ class AutoGraphClassifier(BaseClassifier):
             use default train/val/test split in dataset, please set this to ``None``.
             Default ``None``.
 
-        cross_validation: bool
-            Whether to use cross validation to fit on train dataset. Default ``False``.
-
-        cv_split: int
-            The cross validation split number. Only be effective when ``cross_validation=True``.
-            Default ``10``.
-
         evaluation_method: (list of) str autogl.module.train.evaluation
             A (list of) evaluation method for current solver. If ``infer``, will automatically
             determine. Default ``infer``.
@@ -319,13 +310,6 @@ class AutoGraphClassifier(BaseClassifier):
 
         elif train_split is not None and val_split is not None:
             utils.graph_random_splits(dataset, train_split, val_split, seed=seed)
-            if cross_validation:
-                assert (
-                    val_split > 0
-                ), "You should set val_split > 0 to use cross_validation"
-                utils.graph_cross_validation(
-                    dataset.train_split, cv_split, random_seed=seed
-                )
         else:
             LOGGER.error(
                 "Please set both train_split and val_split explicitly. Detect %s is None.",
@@ -388,88 +372,39 @@ class AutoGraphClassifier(BaseClassifier):
         # train the models and tune hpo
         result_valid = []
         names = []
-        if not cross_validation:
-            for idx, model in enumerate(self.graph_model_list):
-                if time_limit < 0:
-                    time_for_each_model = None
-                else:
-                    time_for_each_model = (time_limit - time.time() + time_begin) / (
-                        len(self.graph_model_list) - idx
-                    )
-                if self.hpo_module is None:
-                    model.initialize()
-                    model.train(dataset, True)
-                    optimized = model
-                else:
-                    optimized, _ = self.hpo_module.optimize(
-                        trainer=model, dataset=dataset, time_limit=time_for_each_model
-                    )
-                # to save memory, all the trainer derived will be mapped to cpu
-                optimized.to(torch.device("cpu"))
-                name = str(optimized)
-                names.append(name)
-                performance_on_valid, _ = optimized.get_valid_score(return_major=False)
-                result_valid.append(
-                    optimized.get_valid_predict_proba().detach().cpu().numpy()
+        for idx, model in enumerate(self.graph_model_list):
+            if time_limit < 0:
+                time_for_each_model = None
+            else:
+                time_for_each_model = (time_limit - time.time() + time_begin) / (
+                    len(self.graph_model_list) - idx
                 )
-                self.leaderboard.insert_model_performance(
-                    name,
-                    dict(
-                        zip(
-                            [e.get_eval_name() for e in evaluator_list],
-                            performance_on_valid,
-                        )
-                    ),
+            if self.hpo_module is None:
+                model.initialize()
+                model.train(dataset, True)
+                optimized = model
+            else:
+                optimized, _ = self.hpo_module.optimize(
+                    trainer=model, dataset=dataset, time_limit=time_for_each_model
                 )
-                self.trained_models[name] = optimized
-        else:
-            for i in range(dataset.train_split.n_splits):
-                utils.graph_set_fold_id(dataset.train_split, i)
-                if time_limit < 0:
-                    time_for_each_cv = None
-                else:
-                    time_for_each_cv = (time_limit - time.time() + time_begin) / (
-                        dataset.train_split.n_splits - i
-                    )
-                time_cv_begin = time.time()
-                for idx, model in enumerate(self.graph_model_list):
-                    if time_for_each_cv is None:
-                        time_for_each_model = None
-                    else:
-                        time_for_each_model = (
-                            time_for_each_cv - time.time() + time_cv_begin
-                        ) / (len(self.graph_model_list) - idx)
-                    if self.hpo_module is None:
-                        model.train(dataset.train_split, False)
-                        optimized = model
-                    else:
-                        optimized, _ = self.hpo_module.optimize(
-                            trainer=model,
-                            dataset=dataset.train_split,
-                            time_limit=time_for_each_model,
-                        )
-                    # to save memory, all the trainer derived will be mapped to cpu
-                    optimized.to(torch.device("cpu"))
-                    name = str(optimized) + "_cv%d_idx%d" % (i, idx)
-                    names.append(name)
-                    # evaluate on val_split of input dataset
-                    performance_on_valid = optimized.evaluate(dataset, mask="val")
-                    result_valid.append(
-                        optimized.predict_proba(dataset, mask="val")
-                        .detach()
-                        .cpu()
-                        .numpy()
-                    )
-                    self.leaderboard.insert_model_performance(
-                        name,
-                        dict(
-                            zip(
-                                [e.get_eval_name() for e in evaluator_list],
-                                performance_on_valid,
-                            )
-                        ),
+            # to save memory, all the trainer derived will be mapped to cpu
+            optimized.to(torch.device("cpu"))
+            name = str(optimized)
+            names.append(name)
+            performance_on_valid, _ = optimized.get_valid_score(return_major=False)
+            result_valid.append(
+                optimized.get_valid_predict_proba().detach().cpu().numpy()
+            )
+            self.leaderboard.insert_model_performance(
+                name,
+                dict(
+                    zip(
+                        [e.get_eval_name() for e in evaluator_list],
+                        performance_on_valid,
                     )
-                    self.trained_models[name] = optimized
+                ),
+            )
+            self.trained_models[name] = optimized
 
         # fit the ensemble model
         if self.ensemble_module is not None:
@@ -494,8 +429,6 @@ class AutoGraphClassifier(BaseClassifier):
         inplace=False,
         train_split=None,
         val_split=None,
-        cross_validation=True,
-        cv_split=10,
         evaluation_method="infer",
         seed=None,
         use_ensemble=True,
@@ -528,13 +461,6 @@ class AutoGraphClassifier(BaseClassifier):
             to use default train/val/test split in dataset, please set this to ``None``.
             Default ``None``.
 
-        cross_validation: bool
-            Whether to use cross validation to fit on train dataset. Default ``True``.
-
-        cv_split: int
-            The cross validation split number. Only be effective when ``cross_validation=True``.
-            Default ``10``.
-
         evaluation_method: (list of) str or autogl.module.train.evaluation
             A (list of) evaluation method for current solver. If ``infer``, will automatically
             determine. Default ``infer``.
@@ -566,8 +492,6 @@ class AutoGraphClassifier(BaseClassifier):
             inplace=inplace,
             train_split=train_split,
             val_split=val_split,
-            cross_validation=cross_validation,
-            cv_split=cv_split,
             evaluation_method=evaluation_method,
             seed=seed,
         )

From 6574b7286dcb751a294c40da030b3d7ad8212285 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sun, 11 Jul 2021 12:05:24 +0000
Subject: [PATCH 138/144] update workflow

---
 README.md              | 2 +-
 resources/workflow.svg | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 205fb43..511818d 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ AutoGL is developed for researchers and developers to quickly conduct autoML on
 
 The workflow below shows the overall framework of AutoGL.
 
-<img src="https://raw.githubusercontent.com/THUMNLab/AutoGL/a062f6c535feb2cd856f51d41a88df99b02e82e7/resources/workflow.svg">
+<img src="./resources/workflow.svg">
 
 AutoGL uses `datasets` to maintain dataset for graph-based machine learning, which is based on Dataset in PyTorch Geometric with some support added to corporate with the auto solver framework.
 
diff --git a/resources/workflow.svg b/resources/workflow.svg
index 080bd64..9fb5973 100644
--- a/resources/workflow.svg
+++ b/resources/workflow.svg
@@ -1 +1 @@
-<svg width="1234" height="437" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" overflow="hidden"><defs><clipPath id="clip0"><rect x="24" y="142" width="1234" height="437"/></clipPath><linearGradient x1="287" y1="294" x2="287" y2="427" gradientUnits="userSpaceOnUse" spreadMethod="reflect" id="fill1"><stop offset="0" stop-color="#F7BDA4"/><stop offset="0.5" stop-color="#F5B195"/><stop offset="1" stop-color="#F8A581"/></linearGradient><linearGradient x1="542.5" y1="302" x2="542.5" y2="418" gradientUnits="userSpaceOnUse" spreadMethod="reflect" id="fill2"><stop offset="0" stop-color="#B1CBE9"/><stop offset="0.5" stop-color="#A3C1E5"/><stop offset="1" stop-color="#92B9E4"/></linearGradient><linearGradient x1="846" y1="396" x2="846" y2="512" gradientUnits="userSpaceOnUse" spreadMethod="reflect" id="fill3"><stop offset="0" stop-color="#B5D5A7"/><stop offset="0.5" stop-color="#AACE99"/><stop offset="1" stop-color="#9CCA86"/></linearGradient><linearGradient x1="846" y1="244" x2="846" y2="360" gradientUnits="userSpaceOnUse" spreadMethod="reflect" id="fill4"><stop offset="0" stop-color="#FFDD9C"/><stop offset="0.5" stop-color="#FFD78E"/><stop offset="1" stop-color="#FFD479"/></linearGradient><clipPath id="clip5"><rect x="473" y="172" width="62" height="62"/></clipPath><clipPath id="clip6"><rect x="473" y="172" width="62" height="62"/></clipPath><clipPath id="clip7"><rect x="473" y="172" width="62" height="62"/></clipPath></defs><g clip-path="url(#clip0)" transform="translate(-24 -142)"><path d="M219 316.167C219 303.924 249.445 294 287 294 324.555 294 355 303.924 355 316.167L355 404.833C355 417.076 324.555 427 287 427 249.445 427 219 417.076 219 404.833Z" fill="url(#fill1)" fill-rule="evenodd"/><path d="M355 316.167C355 328.409 324.555 338.333 287 338.333 249.445 338.333 219 328.409 219 316.167" stroke="#ED7D31" stroke-width="4" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M219 316.167C219 303.924 249.445 294 287 294 324.555 294 355 303.924 355 316.167L355 404.833C355 417.076 324.555 427 287 427 249.445 427 219 417.076 219 404.833Z" stroke="#ED7D31" stroke-width="4" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="700" font-size="24" transform="translate(244.287 366)">AutoGL<tspan x="4" y="29">Dataset</tspan></text><path d="M0 0 41.2168 42.6463" stroke="#000000" stroke-width="0.666667" stroke-miterlimit="8" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 79.5001 354.146)"/><path d="M0 0 34.2481 20.7045" stroke="#000000" stroke-width="0.666667" stroke-miterlimit="8" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 32.5001 330.205)"/><path d="M79.5001 354.5 123.635 372.19" stroke="#000000" stroke-width="0.666667" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M124.095 372.245 120.5 310.5" stroke="#000000" stroke-width="0.666667" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M136.411 324.163 120.5 310.5" stroke="#000000" stroke-width="0.666667" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M0 0 7.1474 53.6058" stroke="#000000" stroke-width="0.666667" stroke-miterlimit="8" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 71.5001 409.106)"/><path d="M33.5001 330.5 77.6353 354.087" stroke="#000000" stroke-width="0.666667" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M0 0 45.3032 28.0543" stroke="#000000" stroke-width="0.666667" stroke-miterlimit="8" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 30.5001 384.554)"/><path d="M69.8109 409.98 30.5001 385.5" stroke="#000000" stroke-width="0.666667" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M56.5001 310C56.5001 304.753 60.7533 300.5 66.0001 300.5 71.2468 300.5 75.5001 304.753 75.5001 310 75.5001 315.247 71.2468 319.5 66.0001 319.5 60.7533 319.5 56.5001 315.247 56.5001 310Z" stroke="#2F528F" stroke-width="1.33333" stroke-miterlimit="8" fill="#4472C4" fill-rule="evenodd"/><path d="M24.942 329.643C25.269 325.44 28.9413 322.298 33.1443 322.625 37.3473 322.952 40.4893 326.625 40.1623 330.828 39.8352 335.031 36.1629 338.173 31.9599 337.846 27.757 337.518 24.6149 333.846 24.942 329.643Z" stroke="#507E32" stroke-width="1.33333" stroke-miterlimit="8" fill="#70AD47" fill-rule="evenodd"/><path d="M116.5 310.5C116.5 308.291 118.067 306.5 120 306.5 121.933 306.5 123.5 308.291 123.5 310.5 123.5 312.709 121.933 314.5 120 314.5 118.067 314.5 116.5 312.709 116.5 310.5Z" stroke="#AE5A21" stroke-width="1.33333" stroke-miterlimit="8" fill="#ED7D31" fill-rule="evenodd"/><path d="M77.2168 344.698C82.4999 344.455 86.9799 348.541 87.2231 353.824 87.4662 359.107 83.3805 363.587 78.0974 363.83 72.8142 364.073 68.3342 359.988 68.0911 354.705 67.8479 349.421 71.9336 344.941 77.2168 344.698Z" stroke="#787878" stroke-width="1.33333" stroke-miterlimit="8" fill="#A5A5A5" fill-rule="evenodd"/><path d="M66.7274 401.383C70.6634 399.873 75.0783 401.84 76.5883 405.776 78.0984 409.712 76.1318 414.127 72.1958 415.637 68.2598 417.147 63.845 415.18 62.3349 411.244 60.8249 407.308 62.7915 402.893 66.7274 401.383Z" stroke="#BF9000" stroke-width="1.33333" stroke-miterlimit="8" fill="#FFC000" fill-rule="evenodd"/><path d="M28.3526 380.451C30.249 379.724 32.3761 380.671 33.1037 382.568 33.8312 384.464 32.8837 386.591 30.9873 387.319 29.0909 388.046 26.9638 387.099 26.2363 385.202 25.5087 383.306 26.4562 381.179 28.3526 380.451Z" stroke="#41719C" stroke-width="1.33333" stroke-miterlimit="8" fill="#5B9BD5" fill-rule="evenodd"/><path d="M117.096 369.074C118.915 365.81 123.036 364.64 126.3 366.459 129.563 368.279 130.734 372.4 128.914 375.663 127.094 378.927 122.974 380.097 119.71 378.278 116.447 376.458 115.276 372.337 117.096 369.074Z" stroke="#7030A0" stroke-width="1.33333" stroke-miterlimit="8" fill="#7030A0" fill-rule="evenodd"/><path d="M138.692 322.606C141.283 322.487 143.48 324.491 143.6 327.081 143.719 329.672 141.715 331.869 139.124 331.988 136.533 332.108 134.337 330.104 134.217 327.513 134.098 324.922 136.102 322.725 138.692 322.606Z" stroke="#C55A11" stroke-width="1.33333" stroke-miterlimit="8" fill="#FF0000" fill-rule="evenodd"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="700" font-size="24" transform="translate(50.3545 454)">Data</text><path d="M395.5 381 395.5 370.75 375 370.75 375 350.25 395.5 350.25 395.5 340 416 360.5Z" stroke="#000000" stroke-width="4" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M182 380 182 370 160 370 160 350 182 350 182 340 202 360Z" stroke="#000000" stroke-width="4" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M454 321.334C454 310.656 462.656 302 473.334 302L611.666 302C622.344 302 631 310.656 631 321.334L631 398.666C631 409.344 622.344 418 611.666 418L473.334 418C462.656 418 454 409.344 454 398.666Z" stroke="#5B9BD5" stroke-width="4" stroke-miterlimit="8" fill="url(#fill2)" fill-rule="evenodd"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="700" font-size="24" transform="translate(474.279 354)">Auto Feature<tspan x="5.45334" y="29">Engineering</tspan></text><path d="M758 415.334C758 404.656 766.656 396 777.334 396L914.666 396C925.344 396 934 404.656 934 415.334L934 492.666C934 503.344 925.344 512 914.666 512L777.334 512C766.656 512 758 503.344 758 492.666Z" stroke="#70AD47" stroke-width="4" stroke-miterlimit="8" fill="url(#fill3)" fill-rule="evenodd"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="700" font-size="24" transform="translate(813.541 448)">Model <tspan x="-11.78" y="29">Training</tspan></text><path d="M700 363.438C700 384.613 713.265 403.107 732.25 408.4L732.25 403.025 743 415.25 732.25 424.525 732.25 419.15C713.265 413.857 700 395.363 700 374.188Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M743 327.75C721.177 327.75 702.815 345.404 700.289 368.813 697.54 343.338 714.434 320.281 738.023 317.312 739.675 317.104 741.337 317 743 317Z" fill="#CDCDCD" fill-rule="evenodd"/><path d="M700 363.438C700 384.613 713.265 403.107 732.25 408.4L732.25 403.025 743 415.25 732.25 424.525 732.25 419.15C713.265 413.857 700 395.363 700 374.188L700 363.438C700 337.791 719.252 317 743 317L743 327.75C721.177 327.75 702.815 345.404 700.289 368.813" stroke="#000000" stroke-width="4" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M758 263.334C758 252.656 766.656 244 777.334 244L914.666 244C925.344 244 934 252.656 934 263.334L934 340.666C934 351.344 925.344 360 914.666 360L777.334 360C766.656 360 758 351.344 758 340.666Z" stroke="#FFC000" stroke-width="4" stroke-miterlimit="8" fill="url(#fill4)" fill-rule="evenodd"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="700" font-size="24" transform="translate(813.751 282)">Hyper <tspan x="-22.2133" y="29">Parameter</tspan><tspan x="-35.5467" y="58">Optimization</tspan></text><path d="M993 380.25C993 359.388 979.427 341.168 960 335.953L960 341.453 949 329 960 319.453 960 324.953C979.427 330.168 993 348.388 993 369.25Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M949 415C971.255 415 990.005 397.722 992.681 374.75 995.602 399.834 978.414 422.631 954.29 425.668 952.534 425.889 950.768 426 949 426Z" fill="#CDCDCD" fill-rule="evenodd"/><path d="M993 380.25C993 359.388 979.427 341.168 960 335.953L960 341.453 949 329 960 319.453 960 324.953C979.427 330.168 993 348.388 993 369.25L993 380.25C993 405.517 973.301 426 949 426L949 415C971.255 415 990.005 397.722 992.681 374.75" stroke="#000000" stroke-width="4" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M1067 322.334C1067 311.656 1075.66 303 1086.33 303L1223.67 303C1234.34 303 1243 311.656 1243 322.334L1243 399.666C1243 410.344 1234.34 419 1223.67 419L1086.33 419C1075.66 419 1067 410.344 1067 399.666Z" stroke="#C00000" stroke-width="4" stroke-miterlimit="8" fill="#FF9999" fill-rule="evenodd"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="700" font-size="24" transform="translate(1129.68 356)">Auto<tspan x="-24.6666" y="29">Ensemble</tspan></text><path d="M436 216.001C436 176.236 468.236 144 508.001 144L1183 144C1222.76 144 1255 176.236 1255 216.001L1255 503.999C1255 543.764 1222.76 576 1183 576L508.001 576C468.236 576 436 543.764 436 503.999Z" stroke="#000000" stroke-width="4" stroke-miterlimit="8" stroke-dasharray="32 12" fill="none" fill-rule="evenodd"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="700" font-size="24" transform="translate(1003.38 202)">AutoGL<tspan x="90.0067" y="0">Solver</tspan></text><g clip-path="url(#clip5)"><g clip-path="url(#clip6)"><g clip-path="url(#clip7)"><path d="M505.292 193.313C505.292 194.026 504.713 194.604 504 194.604 503.287 194.604 502.708 194.026 502.708 193.313 502.708 192.599 503.287 192.021 504 192.021 504.713 192.021 505.292 192.599 505.292 193.313Z"/><path d="M505.292 219.146C505.292 219.859 504.713 220.438 504 220.438 503.287 220.438 502.708 219.859 502.708 219.146 502.708 218.432 503.287 217.854 504 217.854 504.713 217.854 505.292 218.432 505.292 219.146Z"/><path d="M518.208 205.583C518.208 206.297 517.63 206.875 516.917 206.875 516.203 206.875 515.625 206.297 515.625 205.583 515.625 204.87 516.203 204.292 516.917 204.292 517.63 204.292 518.208 204.87 518.208 205.583Z"/><path d="M492.375 205.583C492.375 206.297 491.797 206.875 491.083 206.875 490.37 206.875 489.792 206.297 489.792 205.583 489.792 204.87 490.37 204.292 491.083 204.292 491.797 204.292 492.375 204.87 492.375 205.583Z"/><path d="M505.292 196.542 502.708 196.542 502.708 205.583C502.708 205.906 502.837 206.229 503.096 206.488L509.49 212.881 511.298 211.073 505.292 205.067 505.292 196.542Z"/><path d="M504 224.313C493.99 224.313 485.917 216.24 485.917 206.229 485.917 196.219 493.99 188.146 504 188.146 514.01 188.146 522.083 196.219 522.083 206.229 522.083 216.24 514.01 224.313 504 224.313L504 224.313ZM519.306 190.471 521.244 188.533C521.954 187.758 521.954 186.596 521.179 185.821 520.469 185.11 519.242 185.046 518.467 185.756L516.271 188.017C513.171 185.95 509.619 184.658 505.938 184.4L505.938 181.688 511.75 181.688 511.75 177.813 496.25 177.813 496.25 181.688 502.063 181.688 502.063 184.335C491.794 185.24 483.527 193.183 482.235 203.452 480.944 213.721 486.95 223.473 496.702 226.896 506.454 230.319 517.24 226.573 522.729 217.79 528.219 209.006 526.669 197.64 519.306 190.471L519.306 190.471Z"/></g></g></g><path d="M668.5 382 668.5 371.75 647 371.75 647 351.25 668.5 351.25 668.5 341 689 361.5Z" stroke="#000000" stroke-width="4" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M1032 380 1032 370 1010 370 1010 350 1032 350 1032 340 1052 360Z" stroke="#000000" stroke-width="4" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/></g></svg>
\ No newline at end of file
+<svg width="1297" height="325" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" overflow="hidden"><defs><clipPath id="clip0"><rect x="192" y="336" width="1297" height="325"/></clipPath><clipPath id="clip1"><rect x="550" y="351" width="54" height="55"/></clipPath><clipPath id="clip2"><rect x="550" y="351" width="54" height="55"/></clipPath><clipPath id="clip3"><rect x="550" y="351" width="54" height="55"/></clipPath></defs><g clip-path="url(#clip0)" transform="translate(-192 -336)"><path d="M331 457.5C331 446.73 357.415 438 390 438 422.585 438 449 446.73 449 457.5L449 535.5C449 546.27 422.585 555 390 555 357.415 555 331 546.27 331 535.5Z" fill="#FBE5D6" fill-rule="evenodd"/><path d="M449 457.5C449 468.27 422.585 477 390 477 357.415 477 331 468.27 331 457.5" stroke="#ED7D31" stroke-width="2" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M331 457.5C331 446.73 357.415 438 390 438 422.585 438 449 446.73 449 457.5L449 535.5C449 546.27 422.585 555 390 555 357.415 555 331 546.27 331 535.5Z" stroke="#ED7D31" stroke-width="2" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="700" font-size="21" transform="translate(352.189 502)">AutoGL<tspan font-size="21" x="3.5" y="25">Dataset</tspan></text><path d="M0 0 21.4428 22.1865" stroke="#000000" stroke-width="0.666667" stroke-miterlimit="8" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 229.5 502.687)"/><path d="M0 0 17.8173 10.7713" stroke="#000000" stroke-width="0.666667" stroke-miterlimit="8" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 205.5 491.271)"/><path d="M230.5 503.5 253.461 512.703" stroke="#000000" stroke-width="0.666667" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M253.37 512.622 251.5 480.5" stroke="#000000" stroke-width="0.666667" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M258.778 487.608 250.5 480.5" stroke="#000000" stroke-width="0.666667" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M0 0 3.71843 27.888" stroke="#000000" stroke-width="0.666667" stroke-miterlimit="8" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 225.5 531.388)"/><path d="M205.5 490.5 228.461 502.771" stroke="#000000" stroke-width="0.666667" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M0 0 23.5687 14.5951" stroke="#000000" stroke-width="0.666667" stroke-miterlimit="8" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 204.5 519.095)"/><path d="M224.951 532.236 204.5 519.5" stroke="#000000" stroke-width="0.666667" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M217.5 480.5C217.5 477.739 219.739 475.5 222.5 475.5 225.261 475.5 227.5 477.739 227.5 480.5 227.5 483.261 225.261 485.5 222.5 485.5 219.739 485.5 217.5 483.261 217.5 480.5Z" stroke="#2F528F" stroke-width="1.33333" stroke-miterlimit="8" fill="#4472C4" fill-rule="evenodd"/><path d="M201.278 490.163C201.448 487.976 203.359 486.341 205.545 486.512 207.732 486.682 209.367 488.592 209.196 490.779 209.026 492.965 207.116 494.6 204.929 494.43 202.743 494.26 201.108 492.349 201.278 490.163Z" stroke="#507E32" stroke-width="1.33333" stroke-miterlimit="8" fill="#70AD47" fill-rule="evenodd"/><path d="M249.5 480.5C249.5 479.395 250.172 478.5 251 478.5 251.828 478.5 252.5 479.395 252.5 480.5 252.5 481.605 251.828 482.5 251 482.5 250.172 482.5 249.5 481.605 249.5 480.5Z" stroke="#AE5A21" stroke-width="1.33333" stroke-miterlimit="8" fill="#ED7D31" fill-rule="evenodd"/><path d="M228.474 497.995C231.222 497.868 233.553 499.994 233.68 502.743 233.806 505.491 231.68 507.822 228.932 507.948 226.183 508.075 223.853 505.949 223.726 503.201 223.6 500.452 225.725 498.121 228.474 497.995Z" stroke="#787878" stroke-width="1.33333" stroke-miterlimit="8" fill="#A5A5A5" fill-rule="evenodd"/><path d="M223.017 527.485C225.064 526.699 227.361 527.723 228.147 529.77 228.932 531.818 227.909 534.115 225.862 534.9 223.814 535.686 221.517 534.663 220.732 532.615 219.946 530.567 220.969 528.271 223.017 527.485Z" stroke="#BF9000" stroke-width="1.33333" stroke-miterlimit="8" fill="#FFC000" fill-rule="evenodd"/><path d="M203.053 516.595C204.039 516.217 205.146 516.71 205.524 517.696 205.903 518.683 205.41 519.789 204.423 520.168 203.437 520.546 202.33 520.054 201.952 519.067 201.573 518.08 202.066 516.974 203.053 516.595Z" stroke="#41719C" stroke-width="1.33333" stroke-miterlimit="8" fill="#5B9BD5" fill-rule="evenodd"/><path d="M249.221 510.676C250.167 508.978 252.311 508.369 254.009 509.316 255.707 510.263 256.316 512.406 255.369 514.104 254.422 515.802 252.279 516.411 250.581 515.464 248.883 514.518 248.274 512.374 249.221 510.676Z" stroke="#7030A0" stroke-width="1.33333" stroke-miterlimit="8" fill="#7030A0" fill-rule="evenodd"/><path d="M260.456 486.502C261.804 486.44 262.947 487.482 263.009 488.83 263.071 490.178 262.029 491.321 260.681 491.383 259.333 491.445 258.19 490.402 258.128 489.054 258.066 487.707 259.108 486.564 260.456 486.502Z" stroke="#C55A11" stroke-width="1.33333" stroke-miterlimit="8" fill="#FF0000" fill-rule="evenodd"/><path d="M723.5 510.5 723.5 503 707.5 503 707.5 488 723.5 488 723.5 480.5 738.5 495.5Z" stroke="#000000" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M306 513.5 306 506.25 290.5 506.25 290.5 491.75 306 491.75 306 484.5 320.5 499Z" stroke="#000000" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M524 463.167C524 454.238 531.238 447 540.167 447L680.833 447C689.762 447 697 454.238 697 463.167L697 527.833C697 536.762 689.762 544 680.833 544L540.167 544C531.238 544 524 536.762 524 527.833Z" stroke="#5B9BD5" stroke-width="2" stroke-miterlimit="8" fill="#DAE3F3" fill-rule="evenodd"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="700" font-size="21" transform="translate(549.75 491)">Auto Feature <tspan font-size="21" x="4.8067" y="25">Engineering</tspan></text><path d="M1030 527.167C1030 518.238 1037.24 511 1046.17 511L1186.83 511C1195.76 511 1203 518.238 1203 527.167L1203 591.833C1203 600.762 1195.76 608 1186.83 608L1046.17 608C1037.24 608 1030 600.762 1030 591.833Z" stroke="#70AD47" stroke-width="2" stroke-miterlimit="8" fill="#E2F0D9" fill-rule="evenodd"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="700" font-size="21" transform="translate(1045.66 568)">Model Training</text><path d="M987.5 495.688C987.5 510.821 997.063 524.038 1010.75 527.821L1010.75 523.946 1018.5 532.75 1010.75 539.446 1010.75 535.571C997.063 531.788 987.5 518.571 987.5 503.438Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1018.5 470.25C1002.78 470.25 989.548 482.848 987.712 499.563 985.713 481.359 997.877 464.867 1014.88 462.727 1016.08 462.576 1017.29 462.5 1018.5 462.5Z" fill="#CDCDCD" fill-rule="evenodd"/><path d="M987.5 495.688C987.5 510.821 997.063 524.038 1010.75 527.821L1010.75 523.946 1018.5 532.75 1010.75 539.446 1010.75 535.571C997.063 531.788 987.5 518.571 987.5 503.438L987.5 495.688C987.5 477.359 1001.38 462.5 1018.5 462.5L1018.5 470.25C1002.78 470.25 989.548 482.848 987.712 499.563" stroke="#000000" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M1030 414.334C1030 405.313 1037.31 398 1046.33 398L1186.67 398C1195.69 398 1203 405.313 1203 414.334L1203 479.666C1203 488.687 1195.69 496 1186.67 496L1046.33 496C1037.31 496 1030 488.687 1030 479.666Z" stroke="#FFC000" stroke-width="2" stroke-miterlimit="8" fill="#FFF2CC" fill-rule="evenodd"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="700" font-size="21" transform="translate(1083.96 430)">Hyper<tspan font-size="21" x="57.3734" y="0">-</tspan><tspan font-size="21" x="-16.2" y="25">Parameter </tspan><tspan font-size="21" x="-28.3933" y="51">Optimization</tspan></text><path d="M1300 463.167C1300 454.238 1307.24 447 1316.17 447L1456.83 447C1465.76 447 1473 454.238 1473 463.167L1473 527.833C1473 536.762 1465.76 544 1456.83 544L1316.17 544C1307.24 544 1300 536.762 1300 527.833Z" stroke="#C00000" stroke-width="2" stroke-miterlimit="8" fill="#FFBDBD" fill-rule="evenodd"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="700" font-size="21" transform="translate(1317.14 504)">Auto<tspan font-size="21" x="50.3334" y="0">Ensemble</tspan></text><path d="M512.5 362.45C512.5 349.223 523.223 338.5 536.45 338.5L1463.55 338.5C1476.78 338.5 1487.5 349.223 1487.5 362.45L1487.5 635.55C1487.5 648.777 1476.78 659.5 1463.55 659.5L536.45 659.5C523.223 659.5 512.5 648.777 512.5 635.55Z" stroke="#000000" stroke-width="3" stroke-miterlimit="8" stroke-dasharray="24 9" fill="none" fill-rule="evenodd"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="700" font-size="21" transform="translate(1317.18 386)">AutoGL<tspan font-size="21" x="79.9867" y="0">Solver</tspan></text><g clip-path="url(#clip1)"><g clip-path="url(#clip2)"><g clip-path="url(#clip3)"><path d="M28.125 18.5625C28.125 19.1838 27.6213 19.6875 27 19.6875 26.3787 19.6875 25.875 19.1838 25.875 18.5625 25.875 17.9412 26.3787 17.4375 27 17.4375 27.6213 17.4375 28.125 17.9412 28.125 18.5625Z" transform="matrix(1 0 0 1.01852 550 351)"/><path d="M28.125 41.0625C28.125 41.6838 27.6213 42.1875 27 42.1875 26.3787 42.1875 25.875 41.6838 25.875 41.0625 25.875 40.4412 26.3787 39.9375 27 39.9375 27.6213 39.9375 28.125 40.4412 28.125 41.0625Z" transform="matrix(1 0 0 1.01852 550 351)"/><path d="M39.375 29.25C39.375 29.8713 38.8713 30.375 38.25 30.375 37.6287 30.375 37.125 29.8713 37.125 29.25 37.125 28.6287 37.6287 28.125 38.25 28.125 38.8713 28.125 39.375 28.6287 39.375 29.25Z" transform="matrix(1 0 0 1.01852 550 351)"/><path d="M16.875 29.25C16.875 29.8713 16.3713 30.375 15.75 30.375 15.1287 30.375 14.625 29.8713 14.625 29.25 14.625 28.6287 15.1287 28.125 15.75 28.125 16.3713 28.125 16.875 28.6287 16.875 29.25Z" transform="matrix(1 0 0 1.01852 550 351)"/><path d="M28.125 21.375 25.875 21.375 25.875 29.25C25.875 29.5312 25.9875 29.8125 26.2125 30.0375L31.7812 35.6063 33.3563 34.0312 28.125 28.8 28.125 21.375Z" transform="matrix(1 0 0 1.01852 550 351)"/><path d="M27 45.5625C18.2812 45.5625 11.25 38.5312 11.25 29.8125 11.25 21.0938 18.2812 14.0625 27 14.0625 35.7188 14.0625 42.75 21.0938 42.75 29.8125 42.75 38.5312 35.7188 45.5625 27 45.5625L27 45.5625ZM40.3312 16.0875 42.0187 14.4C42.6375 13.725 42.6375 12.7125 41.9625 12.0375 41.3438 11.4187 40.275 11.3625 39.6 11.9812L37.6875 13.95C34.9875 12.15 31.8938 11.025 28.6875 10.8L28.6875 8.4375 33.75 8.4375 33.75 5.0625 20.25 5.0625 20.25 8.4375 25.3125 8.4375 25.3125 10.7438C16.3687 11.5312 9.16875 18.45 8.04375 27.3938 6.91875 36.3375 12.15 44.8312 20.6438 47.8125 29.1375 50.7938 38.5312 47.5312 43.3125 39.8813 48.0938 32.2313 46.7437 22.3313 40.3312 16.0875L40.3312 16.0875Z" transform="matrix(1 0 0 1.01852 550 351)"/></g></g></g><path d="M956.5 512.5 956.5 505 941.5 505 941.5 490 956.5 490 956.5 482.5 971.5 497.5Z" stroke="#000000" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M1272.5 512.5 1272.5 505 1257.5 505 1257.5 490 1272.5 490 1272.5 482.5 1287.5 497.5Z" stroke="#000000" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M1238.5 506.813C1238.5 491.451 1228.94 478.035 1215.25 474.195L1215.25 478.07 1207.5 469.25 1215.25 462.57 1215.25 466.445C1228.94 470.285 1238.5 483.701 1238.5 499.063Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1207.5 532.75C1223.24 532.75 1236.48 519.93 1238.29 502.938 1240.26 521.419 1228.07 538.136 1211.07 540.276 1209.88 540.425 1208.69 540.5 1207.5 540.5Z" fill="#CDCDCD" fill-rule="evenodd"/><path d="M1238.5 506.813C1238.5 491.451 1228.94 478.035 1215.25 474.195L1215.25 478.07 1207.5 469.25 1215.25 462.57 1215.25 466.445C1228.94 470.285 1238.5 483.701 1238.5 499.063L1238.5 506.813C1238.5 525.418 1224.62 540.5 1207.5 540.5L1207.5 532.75C1223.24 532.75 1236.48 519.93 1238.29 502.938" stroke="#000000" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="700" font-size="21" transform="translate(205.005 571)">Data</text><path d="M752 462.167C752 453.238 759.238 446 768.167 446L908.833 446C917.762 446 925 453.238 925 462.167L925 526.833C925 535.762 917.762 543 908.833 543L768.167 543C759.238 543 752 535.762 752 526.833Z" stroke="#7030A0" stroke-width="2" stroke-miterlimit="8" fill="#FEF3FF" fill-rule="evenodd"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="700" font-size="21" transform="translate(807.187 477)">Neural <tspan font-size="21" x="-26.36" y="25">Architecture </tspan><tspan font-size="21" x="0.193359" y="51">Search</tspan></text><path d="M483 511.5 483 504.25 466.5 504.25 466.5 489.75 483 489.75 483 482.5 497.5 497Z" stroke="#000000" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/></g></svg>
\ No newline at end of file

From a663fea7b28cb928850317d683e05b272082cdea Mon Sep 17 00:00:00 2001
From: wondergo2017 <wondergo2017@gmail.com>
Date: Thu, 15 Apr 2021 02:29:04 +0000
Subject: [PATCH 139/144] change name subgraph to graph

---
 README.md                                     |  2 +-
 autogl/module/feature/__init__.py             | 17 +++---
 autogl/module/feature/auto_feature.py         | 15 ++---
 autogl/module/feature/base.py                 | 22 +++----
 autogl/module/feature/generators/base.py      |  6 +-
 autogl/module/feature/generators/pyg.py       |  4 +-
 .../feature/{subgraph => graph}/__init__.py   |  8 +--
 .../feature/{subgraph => graph}/base.py       | 10 ++--
 .../feature/{subgraph => graph}/netlsd.py     | 11 ++--
 .../module/feature/{subgraph => graph}/nx.py  | 58 ++++++++++---------
 autogl/module/feature/selectors/base.py       |  6 +-
 autogl/module/hpo/autone.py                   |  6 +-
 autogl/solver/base.py                         |  8 +--
 docs/docfile/tutorial/t_fe.rst                | 37 ++++++------
 examples/fe_gcl_test.py                       | 33 ++---------
 examples/fe_ncl_test.py                       | 21 +------
 16 files changed, 102 insertions(+), 162 deletions(-)
 rename autogl/module/feature/{subgraph => graph}/__init__.py (91%)
 rename autogl/module/feature/{subgraph => graph}/base.py (70%)
 rename autogl/module/feature/{subgraph => graph}/netlsd.py (79%)
 rename autogl/module/feature/{subgraph => graph}/nx.py (72%)

diff --git a/README.md b/README.md
index 511818d..dd22169 100644
--- a/README.md
+++ b/README.md
@@ -39,7 +39,7 @@ Currently, the following algorithms are supported in AutoGL:
     </tr>
     <tr valign="top">
         <!--<td><b>Generators</b><br>graphlet <br> eigen <br> pagerank <br> PYGLocalDegreeProfile <br> PYGNormalizeFeatures <br> PYGOneHotDegree <br> onehot <br> <br><b>Selectors</b><br> SeFilterConstant<br> gbdt <br> <br><b>Subgraph</b><br> NxLargeCliqueSize<br> NxAverageClusteringApproximate<br> NxDegreeAssortativityCoefficient<br> NxDegreePearsonCorrelationCoefficient<br> NxHasBridge <br>NxGraphCliqueNumber<br> NxGraphNumberOfCliques<br> NxTransitivity<br> NxAverageClustering<br> NxIsConnected<br> NxNumberConnectedComponents<br> NxIsDistanceRegular<br> NxLocalEfficiency<br> NxGlobalEfficiency<br> NxIsEulerian </td>-->
-        <td><b>Generators</b><br>graphlet <br> eigen <br> <a href="https://autogl.readthedocs.io/en/latest/docfile/tutorial/t_fe.html">more ...</a><br><br><b>Selectors</b><br> SeFilterConstant<br> gbdt <br> <br><b>Subgraph</b><br> netlsd<br> NxAverageClustering<br> <a href="https://autogl.readthedocs.io/en/latest/docfile/tutorial/t_fe.html">more ...</a></td>
+        <td><b>Generators</b><br>graphlet <br> eigen <br> <a href="https://autogl.readthedocs.io/en/latest/docfile/tutorial/t_fe.html">more ...</a><br><br><b>Selectors</b><br> SeFilterConstant<br> gbdt <br> <br><b>Graph</b><br> netlsd<br> NxAverageClustering<br> <a href="https://autogl.readthedocs.io/en/latest/docfile/tutorial/t_fe.html">more ...</a></td>
         <td><b>Node Classification</b><br> GCN <br> GAT <br> GraphSAGE <br><br><b>Graph Classification</b><br> GIN <br> TopKPool </td>
         <td>
         <b>Algorithms</b><br>
diff --git a/autogl/module/feature/__init__.py b/autogl/module/feature/__init__.py
index ec789a1..5af47c5 100644
--- a/autogl/module/feature/__init__.py
+++ b/autogl/module/feature/__init__.py
@@ -1,4 +1,4 @@
-from .base import BaseFeatureAtom
+from .base import BaseFeature
 from .base import BaseFeatureEngineer
 
 FEATURE_DICT = {}
@@ -11,7 +11,7 @@ def register_feature(name):
                 "Cannot register duplicate feature engineer ({})".format(name)
             )
         # if not issubclass(cls, BaseFeatureEngineer):
-        if not issubclass(cls, BaseFeatureAtom):
+        if not issubclass(cls, BaseFeature):
             raise ValueError(
                 "Trainer ({}: {}) must extend BaseFeatureEngineer".format(
                     name, cls.__name__
@@ -24,7 +24,6 @@ def register_feature(name):
 
 
 from .auto_feature import AutoFeatureEngineer
-from .base import BaseFeatureEngineer
 
 from .generators import (
     BaseGenerator,
@@ -45,11 +44,11 @@ from .selectors import (
     SeGBDT
 )
 
-from .subgraph import (
-    BaseSubgraph,
+from .graph import (
+    BaseGraph,
     SgNetLSD,
     register_nx,
-    NxSubgraph,
+    NxGraph,
     nxfunc,
     NxLargeCliqueSize,
     NxAverageClusteringApproximate,
@@ -71,7 +70,7 @@ from .subgraph import (
 __all__ = [
     "BaseFeatureEngineer",
     "AutoFeatureEngineer",
-    "BaseFeatureAtom",
+    "BaseFeature",
     "BaseGenerator",
     "GeGraphlet",
     "GeEigen",
@@ -85,10 +84,10 @@ __all__ = [
     "BaseSelector",
     "SeFilterConstant",
     "SeGBDT",
-    "BaseSubgraph",
+    "BaseGraph",
     "SgNetLSD",
     "register_nx",
-    "NxSubgraph",
+    "NxGraph",
     "nxfunc",
     "NxLargeCliqueSize",
     "NxAverageClusteringApproximate",
diff --git a/autogl/module/feature/auto_feature.py b/autogl/module/feature/auto_feature.py
index a1f4015..d04c98c 100644
--- a/autogl/module/feature/auto_feature.py
+++ b/autogl/module/feature/auto_feature.py
@@ -6,7 +6,7 @@ from tqdm import tqdm
 from tabulate import tabulate
 import time
 
-from .base import BaseFeatureAtom, BaseFeatureEngineer
+from .base import BaseFeature, BaseFeatureEngineer
 from .selectors import SeGBDT
 from . import register_feature
 
@@ -119,35 +119,30 @@ class Timer:
 @register_feature("deepgl")
 class AutoFeatureEngineer(BaseFeatureEngineer):
     r"""
-
-    An implementation of auto feature engineering method Deepgl [#]_ , which iteratively generates features by aggregating neighbour features
+    Notes
+    -----
+    An implementation of auto feature engineering method Deepgl [#]_ ,which iteratively generates features by aggregating neighbour features
     and select a fixed number of  features to automatically add important graph-aware features.
-
     References
     ----------
     .. [#] Rossi, R. A., Zhou, R., & Ahmed, N. K. (2020).
         Deep Inductive Graph Representation Learning.
         IEEE Transactions on Knowledge and Data Engineering, 32(3), 438–452.
         https://doi.org/10.1109/TKDE.2018.2878247
-
     Parameters
     ----------
     fixlen : int
         fixed number of features for every epoch. The final number of features added will be
         ``fixlen`` \times ``max_epoch``, 200 \times 5 in default.
-
     max_epoch : int
         number of epochs in total process.
-
     timebudget : int
         timebudget(seconds) for the feature engineering process, None for no time budget . Note that
         this time budget is a soft budget ,which is obtained by rough time estimation through previous iterations and
         may finally exceed the actual timebudget
-
     y_sel_func : Callable
         feature selector function object for selection at each iteration ,lightgbm in default. Note that in original paper,
         connected components of feature graph is used , and you may implement it by yourself if you want.
-
     verbosity : int
         hide any infomation except error and fatal if ``verbosity`` < 1
     """
@@ -237,4 +232,4 @@ class AutoFeatureEngineer(BaseFeatureEngineer):
             gx = gx[:, sel]
             x = np.concatenate([x, gx], axis=1)
         data.x = x
-        return data
+        return data
\ No newline at end of file
diff --git a/autogl/module/feature/base.py b/autogl/module/feature/base.py
index 94ac3bb..9d1e9cc 100644
--- a/autogl/module/feature/base.py
+++ b/autogl/module/feature/base.py
@@ -10,8 +10,8 @@ from ...utils import get_logger
 LOGGER = get_logger("Feature")
 
 
-class BaseFeatureAtom:
-    r"""Any feature funcion object should inherit BaseFeatureAtom,
+class BaseFeature:
+    r"""Any feature funcion object should inherit BaseFeature,
     which provides basic transformations and composing operation for feature
     engineering. Basic transformations include data type adjusting(tensor or numpy),
     complementing necessary attributes for future transform. Any subclass needs
@@ -22,19 +22,15 @@ class BaseFeatureAtom:
     Parameters
     ----------
     pipe : list
-        stores pipeline of ``BaseFeatureAtom``.
-
+        stores pipeline of ``BaseFeature``.
     data_t: str
         represents the data type needed for this transform, where 'tensor' accounts for ``torch.Tensor``,
         'np' for ``numpy.array`` and 'nx' for ``networkx``. When ``data_t`` values 'nx', then a ``networkx.DiGraph`` will
         be added to data as data.G .
-
     multigraph : bool
         determine whether it supports dataset with multiple graphs
-
     subgraph : bool
         determine whether it extracts subgraph features.
-
     """
 
     def __init__(self, pipe=None, data_t="tensor", multigraph=True, subgraph=False):
@@ -50,7 +46,7 @@ class BaseFeatureAtom:
         r"""enable and operation to support feature engineering pipeline syntax like
         SeFilterConstant()&GeEigen()&...
         """
-        return BaseFeatureAtom(self._pipe + o._pipe)
+        return BaseFeature(self._pipe + o._pipe)
 
     def _rebuild(self, dataset, datalist):
         dataset.__indices__ = None
@@ -143,14 +139,14 @@ class BaseFeatureAtom:
 
     @staticmethod
     def compose(trans_list):
-        r"""put a list of ``BaseFeatureAtom`` into feature engineering pipeline"""
-        res = BaseFeatureAtom()
+        r"""put a list of ``BaseFeature`` into feature engineering pipeline"""
+        res = BaseFeature()
         for tran in trans_list:
             res = res & tran
         return res
 
 
-class BaseFeatureEngineer(BaseFeatureAtom):
+class BaseFeatureEngineer(BaseFeature):
     def __init__(self, data_t="np", multigraph=False, *args, **kwargs):
         super(BaseFeatureEngineer, self).__init__(
             data_t=data_t, multigraph=multigraph, *args, **kwargs
@@ -159,7 +155,7 @@ class BaseFeatureEngineer(BaseFeatureAtom):
         self.kwargs = kwargs
 
 
-class TransformWrapper(BaseFeatureAtom):
+class TransformWrapper(BaseFeature):
     def __init__(self, cls, *args, **kwargs):
         super(TransformWrapper, self).__init__(data_t="tensor", *args, **kwargs)
         self._cls = cls
@@ -173,4 +169,4 @@ class TransformWrapper(BaseFeatureAtom):
             return self
 
     def _transform(self, data=None):
-        return self._func(data)
+        return self._func(data)
\ No newline at end of file
diff --git a/autogl/module/feature/generators/base.py b/autogl/module/feature/generators/base.py
index 1b7bab1..0ab2024 100644
--- a/autogl/module/feature/generators/base.py
+++ b/autogl/module/feature/generators/base.py
@@ -1,9 +1,9 @@
 import numpy as np
 from .. import register_feature
-from ..base import BaseFeatureAtom
+from ..base import BaseFeature
 
 
-class BaseGenerator(BaseFeatureAtom):
+class BaseGenerator(BaseFeature):
     def __init__(self, data_t="np", multigraph=True, **kwargs):
         super(BaseGenerator, self).__init__(
             data_t=data_t, multigraph=multigraph, **kwargs
@@ -15,4 +15,4 @@ class GeOnehot(BaseGenerator):
     def _transform(self, data):
         fe = np.eye(data.x.shape[0])
         data.x = np.concatenate([data.x, fe], axis=1)
-        return data
+        return data
\ No newline at end of file
diff --git a/autogl/module/feature/generators/pyg.py b/autogl/module/feature/generators/pyg.py
index f7919ef..c3c5e8e 100644
--- a/autogl/module/feature/generators/pyg.py
+++ b/autogl/module/feature/generators/pyg.py
@@ -36,13 +36,11 @@ class PYGGenerator(BaseGenerator):
 
 def pygfunc(func):
     r"""A decorator for pyg transforms. You may want to use it to quickly wrap a feature transform function object.
-
     Examples
     --------
     @register_pyg
     @pygfunc(local_degree_profile)
     class PYGLocalDegreeProfile(local_degree_profile):pass
-
     """
 
     def decorator_func(cls):
@@ -88,4 +86,4 @@ class PYGOneHotDegree(PYGGenerator):
         dsc = self.extract(data)
         data.x = torch.cat([data.x, dsc], dim=1)
         return data
-    """
+    """
\ No newline at end of file
diff --git a/autogl/module/feature/subgraph/__init__.py b/autogl/module/feature/graph/__init__.py
similarity index 91%
rename from autogl/module/feature/subgraph/__init__.py
rename to autogl/module/feature/graph/__init__.py
index abf1b99..d0870e4 100644
--- a/autogl/module/feature/subgraph/__init__.py
+++ b/autogl/module/feature/graph/__init__.py
@@ -1,8 +1,8 @@
 from .netlsd import SgNetLSD
-from .base import BaseSubgraph
+from .base import BaseGraph
 from .nx import (
     register_nx,
-    NxSubgraph,
+    NxGraph,
     nxfunc,
     NxLargeCliqueSize,
     NxAverageClusteringApproximate,
@@ -23,9 +23,9 @@ from .nx import (
 
 __all__ = [
     "SgNetLSD",
-    "BaseSubgraph",
+    "BaseGraph",
     "register_nx",
-    "NxSubgraph",
+    "NxGraph",
     "nxfunc",
     "NxLargeCliqueSize",
     "NxAverageClusteringApproximate",
diff --git a/autogl/module/feature/subgraph/base.py b/autogl/module/feature/graph/base.py
similarity index 70%
rename from autogl/module/feature/subgraph/base.py
rename to autogl/module/feature/graph/base.py
index a85d77c..1daace9 100644
--- a/autogl/module/feature/subgraph/base.py
+++ b/autogl/module/feature/graph/base.py
@@ -1,13 +1,13 @@
-from ..base import BaseFeatureAtom
+from ..base import BaseFeature
 import numpy as np
 import torch
 from .. import register_feature
 
 
-@register_feature("subgraph")
-class BaseSubgraph(BaseFeatureAtom):
+@register_feature("graph")
+class BaseGraph(BaseFeature):
     def __init__(self, data_t="np", multigraph=True, **kwargs):
-        super(BaseSubgraph, self).__init__(
+        super(BaseGraph, self).__init__(
             data_t=data_t, multigraph=multigraph, subgraph=True, **kwargs
         )
 
@@ -16,4 +16,4 @@ class BaseSubgraph(BaseFeatureAtom):
             data.gf = torch.FloatTensor([[]])
 
     def _postprocess(self, data):
-        pass
+        pass
\ No newline at end of file
diff --git a/autogl/module/feature/subgraph/netlsd.py b/autogl/module/feature/graph/netlsd.py
similarity index 79%
rename from autogl/module/feature/subgraph/netlsd.py
rename to autogl/module/feature/graph/netlsd.py
index 65b3644..22859e4 100644
--- a/autogl/module/feature/subgraph/netlsd.py
+++ b/autogl/module/feature/graph/netlsd.py
@@ -1,21 +1,20 @@
 import netlsd
-from .base import BaseSubgraph
+from .base import BaseGraph
+import numpy as np
 import torch
 from .. import register_feature
 
 
 @register_feature("netlsd")
-class SgNetLSD(BaseSubgraph):
+class SgNetLSD(BaseGraph):
     r"""
     Notes
     -----
-    a subgraph feature generation method. This is a simple wrapper of NetLSD [#]_.
-
+    a graph feature generation method. This is a simple wrapper of NetLSD [#]_.
     References
     ----------
     .. [#] A. Tsitsulin, D. Mottin, P. Karras, A. Bronstein, and E. Müller, “NetLSD: Hearing the shape of a graph,”
      Proc. ACM SIGKDD Int. Conf. Knowl. Discov. Data Min., pp. 2347–2356, 2018.
-
     """
 
     def __init__(self, *args, **kwargs):
@@ -26,4 +25,4 @@ class SgNetLSD(BaseSubgraph):
     def _transform(self, data):
         dsc = torch.FloatTensor([netlsd.heat(data.G, *self._args, **self._kwargs)])
         data.gf = torch.cat([data.gf, dsc], dim=1)
-        return data
+        return data
\ No newline at end of file
diff --git a/autogl/module/feature/subgraph/nx.py b/autogl/module/feature/graph/nx.py
similarity index 72%
rename from autogl/module/feature/subgraph/nx.py
rename to autogl/module/feature/graph/nx.py
index 0653cc9..70042b4 100644
--- a/autogl/module/feature/subgraph/nx.py
+++ b/autogl/module/feature/graph/nx.py
@@ -4,6 +4,7 @@ from networkx.algorithms.efficiency_measures import local_efficiency
 from networkx.algorithms.distance_regular import is_distance_regular
 from networkx.algorithms.components import number_connected_components
 from networkx.algorithms.components import is_connected
+from networkx.algorithms.cluster import average_clustering
 from networkx.algorithms.cluster import transitivity
 from networkx.algorithms.clique import graph_number_of_cliques
 from networkx.algorithms.clique import graph_clique_number
@@ -12,8 +13,11 @@ from networkx.algorithms.assortativity import degree_pearson_correlation_coeffic
 from networkx.algorithms.assortativity import degree_assortativity_coefficient
 from networkx.algorithms.approximation.clustering_coefficient import average_clustering
 from networkx.algorithms.approximation.clique import large_clique_size
-from .base import BaseSubgraph
+import netlsd
+from .base import BaseGraph
+import numpy as np
 import torch
+from functools import wraps
 from .. import register_feature
 
 NX_EXTRACTORS = []
@@ -26,9 +30,9 @@ def register_nx(cls):
 
 
 @register_nx
-class NxSubgraph(BaseSubgraph):
+class NxGraph(BaseGraph):
     def __init__(self, *args, **kwargs):
-        super(NxSubgraph, self).__init__(data_t="nx")
+        super(NxGraph, self).__init__(data_t="nx")
         self._args = args
         self._kwargs = kwargs
 
@@ -43,14 +47,12 @@ class NxSubgraph(BaseSubgraph):
 
 
 def nxfunc(func):
-    r"""A decorator for networkx subgraph transforms. You may want to use it to quickly wrap a nx subgraph feature function object.
-
+    r"""A decorator for networkx Graph transforms. You may want to use it to quickly wrap a nx Graph feature function object.
     Examples
     --------
     @register_nx
     @nxfunc(large_clique_size)
-    class NxLargeCliqueSize(NxSubgraph):pass
-
+    class NxLargeCliqueSize(NxGraph):pass
     """
 
     def decorator_func(cls):
@@ -62,118 +64,118 @@ def nxfunc(func):
 
 @register_nx
 @nxfunc(large_clique_size)
-class NxLargeCliqueSize(NxSubgraph):
+class NxLargeCliqueSize(NxGraph):
     pass
 
 
 @register_nx
 @nxfunc(average_clustering)
-class NxAverageClusteringApproximate(NxSubgraph):
+class NxAverageClusteringApproximate(NxGraph):
     pass
 
 
 @register_nx
 @nxfunc(degree_assortativity_coefficient)
-class NxDegreeAssortativityCoefficient(NxSubgraph):
+class NxDegreeAssortativityCoefficient(NxGraph):
     pass
 
 
 @register_nx
 @nxfunc(degree_pearson_correlation_coefficient)
-class NxDegreePearsonCorrelationCoefficient(NxSubgraph):
+class NxDegreePearsonCorrelationCoefficient(NxGraph):
     pass
 
 
 @register_nx
 @nxfunc(has_bridges)
-class NxHasBridge(NxSubgraph):
+class NxHasBridge(NxGraph):
     pass
 
 
 @register_nx
 @nxfunc(graph_clique_number)
-class NxGraphCliqueNumber(NxSubgraph):
+class NxGraphCliqueNumber(NxGraph):
     pass
 
 
 @register_nx
 @nxfunc(graph_number_of_cliques)
-class NxGraphNumberOfCliques(NxSubgraph):
+class NxGraphNumberOfCliques(NxGraph):
     pass
 
 
 @register_nx
 @nxfunc(transitivity)
-class NxTransitivity(NxSubgraph):
+class NxTransitivity(NxGraph):
     pass
 
 
 @register_nx
 @nxfunc(average_clustering)
-class NxAverageClustering(NxSubgraph):
+class NxAverageClustering(NxGraph):
     pass
 
 
 @register_nx
 @nxfunc(is_connected)
-class NxIsConnected(NxSubgraph):
+class NxIsConnected(NxGraph):
     pass
 
 
 @register_nx
 @nxfunc(number_connected_components)
-class NxNumberConnectedComponents(NxSubgraph):
+class NxNumberConnectedComponents(NxGraph):
     pass
 
 
 # from networkx.algorithms.components import is_attracting_component
 # @register_nx
 # @nxfunc(is_attracting_component)
-# class NxIsAttractingComponent(NxSubgraph):pass
+# class NxIsAttractingComponent(NxGraph):pass
 
 # from networkx.algorithms.components import number_attracting_components
 # @register_nx
 # @nxfunc(number_attracting_components)
-# class NxNumberAttractingComponents(NxSubgraph):pass
+# class NxNumberAttractingComponents(NxGraph):pass
 
 # from networkx.algorithms.connectivity.connectivity import average_node_connectivity
 # @register_nx
 # @nxfunc(average_node_connectivity)
-# class NxAverageNodeConnectivity(NxSubgraph):pass
+# class NxAverageNodeConnectivity(NxGraph):pass
 
 # from networkx.algorithms.distance_measures import diameter
 # @register_nx
 # @nxfunc(diameter)
-# class NxDiameter(NxSubgraph):pass
+# class NxDiameter(NxGraph):pass
 
 # from networkx.algorithms.distance_measures import radius
 # @register_nx
 # @nxfunc(radius)
-# class NxRadius(NxSubgraph):pass
+# class NxRadius(NxGraph):pass
 
 
 @register_nx
 @nxfunc(is_distance_regular)
-class NxIsDistanceRegular(NxSubgraph):
+class NxIsDistanceRegular(NxGraph):
     pass
 
 
 @register_nx
 @nxfunc(local_efficiency)
-class NxLocalEfficiency(NxSubgraph):
+class NxLocalEfficiency(NxGraph):
     pass
 
 
 @register_nx
 @nxfunc(global_efficiency)
-class NxGlobalEfficiency(NxSubgraph):
+class NxGlobalEfficiency(NxGraph):
     pass
 
 
 @register_nx
 @nxfunc(is_eulerian)
-class NxIsEulerian(NxSubgraph):
+class NxIsEulerian(NxGraph):
     pass
 
 
-# till algorithms.flows
+# till algorithms.flows
\ No newline at end of file
diff --git a/autogl/module/feature/selectors/base.py b/autogl/module/feature/selectors/base.py
index 01806ee..2553836 100644
--- a/autogl/module/feature/selectors/base.py
+++ b/autogl/module/feature/selectors/base.py
@@ -1,8 +1,8 @@
-from ..base import BaseFeatureAtom
+from ..base import BaseFeature
 import numpy as np
 
 
-class BaseSelector(BaseFeatureAtom):
+class BaseSelector(BaseFeature):
     def __init__(self, data_t="np", multigraph=False, **kwargs):
         super(BaseSelector, self).__init__(
             data_t=data_t, multigraph=multigraph, **kwargs
@@ -12,4 +12,4 @@ class BaseSelector(BaseFeatureAtom):
     def _transform(self, data):
         if self._sel is not None:
             data.x = data.x[:, self._sel]
-        return data
+        return data
\ No newline at end of file
diff --git a/autogl/module/hpo/autone.py b/autogl/module/hpo/autone.py
index f499eed..e56f985 100644
--- a/autogl/module/hpo/autone.py
+++ b/autogl/module/hpo/autone.py
@@ -3,20 +3,16 @@ HPO Module for tuning hyper parameters
 """
 
 import time
-import json
-import math
 import numpy as np
 from tqdm import trange
 from . import register_hpo
-from .suggestion.models import Study
 from .base import BaseHPOptimizer, TimeTooLimitedError
 
 from .autone_file import utils
 
 from torch_geometric.data import GraphSAINTRandomWalkSampler
 
-from ..feature.subgraph.nx import NxSubgraph, NxLargeCliqueSize
-from ..feature.subgraph import nx, SgNetLSD
+from ..feature.graph import SgNetLSD
 
 from torch_geometric.data import InMemoryDataset
 
diff --git a/autogl/solver/base.py b/autogl/solver/base.py
index 4b020b9..75083d9 100644
--- a/autogl/solver/base.py
+++ b/autogl/solver/base.py
@@ -15,7 +15,7 @@ from ..module.model import MODEL_DICT
 from ..module.nas.algorithm import NAS_ALGO_DICT
 from ..module.nas.estimator import NAS_ESTIMATOR_DICT
 from ..module.nas.space import NAS_SPACE_DICT
-from ..module import BaseFeatureAtom, BaseHPOptimizer, BaseTrainer
+from ..module import BaseFeature, BaseHPOptimizer, BaseTrainer
 from .utils import LeaderBoard
 from ..utils import get_logger
 
@@ -146,7 +146,7 @@ class BaseSolver:
 
         Parameters
         ----------
-        feature_module: autogl.module.feature.BaseFeatureAtom or str or None
+        feature_module: autogl.module.feature.BaseFeature or str or None
             The (name of) auto feature engineer used to process the given dataset.
             Disable feature engineer by setting it to ``None``.
 
@@ -158,7 +158,7 @@ class BaseSolver:
         # load feature engineer module
 
         def get_feature(feature_engineer):
-            if isinstance(feature_engineer, BaseFeatureAtom):
+            if isinstance(feature_engineer, BaseFeature):
                 return feature_engineer
             if isinstance(feature_engineer, str):
                 if feature_engineer in FEATURE_DICT:
@@ -173,7 +173,7 @@ class BaseSolver:
 
         if feature_module is None:
             self.feature_module = None
-        elif isinstance(feature_module, (BaseFeatureAtom, str)):
+        elif isinstance(feature_module, (BaseFeature, str)):
             self.feature_module = get_feature(feature_module)
         elif isinstance(feature_module, list):
             self.feature_module = get_feature(feature_module[0])
diff --git a/docs/docfile/tutorial/t_fe.rst b/docs/docfile/tutorial/t_fe.rst
index f93b698..df92809 100644
--- a/docs/docfile/tutorial/t_fe.rst
+++ b/docs/docfile/tutorial/t_fe.rst
@@ -3,7 +3,7 @@
 AutoGL Feature Engineering
 ==========================
 
-We provide a series of node and subgraph feature engineers for 
+We provide a series of node and graph feature engineers for 
 you to compose within a feature engineering pipeline. An automatic
 feature engineering algorithm is also provided.
 
@@ -16,12 +16,12 @@ Quick Start
     data = build_dataset_from_name('cora')
 
     # 2. Compose a feature engineering pipeline
-    from autogl.module.feature import BaseFeatureAtom,AutoFeatureEngineer
+    from autogl.module.feature import BaseFeature,AutoFeatureEngineer
     from autogl.module.feature.generators import GeEigen
     from autogl.module.feature.selectors import SeGBDT
-    from autogl.module.feature.subgraph import SgNetLSD
-    # you may compose feature engineering atoms through BaseFeatureAtom.compose
-    fe = BaseFeatureAtom.compose([
+    from autogl.module.feature.graph import SgNetLSD
+    # you may compose feature engineering bases through BaseFeature.compose
+    fe = BaseFeature.compose([
     GeEigen(size=32) ,
     SeGBDT(fixlen=100),
     SgNetLSD()
@@ -33,16 +33,16 @@ Quick Start
     fe.fit(data)
     data1=fe.transform(data,inplace=False)
 
-List of FE atom names
+List of FE base names
 ---------------------
-Now three kinds of feature engineering atoms are supported,namely ``generators``, ``selectors`` , ``subgraph``.You can import 
-atoms from according module as is mentioned in the ``Quick Start`` part. Or you may want to just list names of atoms
+Now three kinds of feature engineering bases are supported,namely ``generators``, ``selectors`` , ``graph``.You can import 
+bases from according module as is mentioned in the ``Quick Start`` part. Or you may want to just list names of bases
 in configurations or as arguments of the autogl solver. 
 
 1. ``generators``
 
 +---------------------------+-------------------------------------------------+
-|           Atom            |                   Description                   |
+|           Base            |                   Description                   |
 +===========================+=================================================+
 | ``graphlet``              | concatenate local graphlet numbers as features. |
 +---------------------------+-------------------------------------------------+
@@ -62,31 +62,31 @@ in configurations or as arguments of the autogl solver.
 2. ``selectors``
 
 +----------------------+--------------------------------------------------------------------------------+
-|         Atom         |                                  Description                                   |
+|         Base         |                                  Description                                   |
 +======================+================================================================================+
 | ``SeFilterConstant`` | delete all constant and one-hot encoding node features.                        |
 +----------------------+--------------------------------------------------------------------------------+
 | ``gbdt``             | select top-k important node features ranked by Gradient Descent Decision Tree. |
 +----------------------+--------------------------------------------------------------------------------+
 
-3. ``subgraph``
+3. ``graph``
 
-``netlsd`` is a subgraph feature generation method. please refer to the according document.
+``netlsd`` is a graph feature generation method. please refer to the according document.
 
-A set of subgraph feature extractors implemented in NetworkX are wrapped, please refer to NetworkX for details.  (``NxLargeCliqueSize``, ``NxAverageClusteringApproximate``, ``NxDegreeAssortativityCoefficient``, ``NxDegreePearsonCorrelationCoefficient``, ``NxHasBridge``
+A set of graph feature extractors implemented in NetworkX are wrapped, please refer to NetworkX for details.  (``NxLargeCliqueSize``, ``NxAverageClusteringApproximate``, ``NxDegreeAssortativityCoefficient``, ``NxDegreePearsonCorrelationCoefficient``, ``NxHasBridge``
 ,``NxGraphCliqueNumber``, ``NxGraphNumberOfCliques``, ``NxTransitivity``, ``NxAverageClustering``, ``NxIsConnected``, ``NxNumberConnectedComponents``, 
 ``NxIsDistanceRegular``, ``NxLocalEfficiency``, ``NxGlobalEfficiency``, ``NxIsEulerian``)
 
-The taxonomy of atom types is based on the way of transforming features. ``generators`` concatenate the original features with ones newly generated
+The taxonomy of base types is based on the way of transforming features. ``generators`` concatenate the original features with ones newly generated
 or just overwrite the original ones. Instead of generating new features , ``selectors`` try to select useful features and keep learned selecting methods
-in the atom itself. The former two types of atoms can be exploited in node or edge level (modification upon each
-node or edge feature) ,while ``subgraph`` focuses on feature engineering  in graph level (modification upon each graph feature). 
+in the base itself. The former two types of bases can be exploited in node or edge level (modification upon each
+node or edge feature) ,while ``graph`` focuses on feature engineering  in graph level (modification upon each graph feature). 
 For your convenience in further development,you may want to design a new item by inheriting one of them. 
-Of course, you can directly inherit the ``BaseFeatureAtom`` as well.
+Of course, you can directly inherit the ``BaseFeature`` as well.
 
 Create Your Own FE
 ------------------
-You can create your own feature engineering object by simply inheriting one of feature engineering atom types ,namely ``generators``, ``selectors`` , ``subgraph``,
+You can create your own feature engineering object by simply inheriting one of feature engineering base types ,namely ``generators``, ``selectors`` , ``graph``,
 and overloading methods ``_fit`` and ``_transform``.
 
 .. code-block :: python
@@ -108,4 +108,3 @@ and overloading methods ``_fit`` and ``_transform``.
             fe=np.eye(data.x.shape[0])
             data.x=np.concatenate([data.x,fe],axis=1)
             return data 
-
diff --git a/examples/fe_gcl_test.py b/examples/fe_gcl_test.py
index b44128c..abec81e 100644
--- a/examples/fe_gcl_test.py
+++ b/examples/fe_gcl_test.py
@@ -1,9 +1,8 @@
 import sys
-
-from networkx.algorithms.reciprocity import reciprocity
 sys.path.append('../')
+
 from autogl.datasets import build_dataset_from_name
-from autogl.solver import AutoNodeClassifier,AutoGraphClassifier
+from autogl.solver import AutoGraphClassifier
 from autogl.module import Acc
 import yaml
 import random
@@ -12,22 +11,12 @@ import numpy as np
 
 import logging
 logging.basicConfig(level=logging.INFO)
-import sys
-from numpy.core.defchararray import index
-from torch.utils.data import dataset
-
-from yaml import compose, load
-sys.path.append('../')
 import random
 import numpy as np
 import torch
 import os
 import yaml
-import re
-from autogl.module.feature.base import BaseFeatureAtom
 from autogl.module.feature import FEATURE_DICT
-import pandas as pd
-import copy
 from argparse import ArgumentParser
 parser = ArgumentParser()
 # parser.add_argument('--device', default=0, type=int)
@@ -69,8 +58,6 @@ def run_gcl(dataset,configs,features,seed):
         cross_validation=True,
         cv_split=10, 
     )
-    val = autoClassifier.get_model_by_performance(0)[0].get_valid_score()[0]
-
     # test
     predict_result = autoClassifier.predict_proba()
     acc=Acc.evaluate(predict_result, dataset.data.y[dataset.test_index].cpu().detach().numpy())
@@ -95,7 +82,7 @@ if __name__ == "__main__":
     feature_set=[
             '',
             'netlsd',
-            'NxSubgraph', 'NxLargeCliqueSize', 'NxAverageClusteringApproximate', 'NxDegreeAssortativityCoefficient', 'NxDegreePearsonCorrelationCoefficient', 'NxHasBridge', 'NxGraphCliqueNumber', 'NxGraphNumberOfCliques', 'NxTransitivity', 'NxAverageClustering', 'NxIsConnected', 'NxNumberConnectedComponents', 'NxIsDistanceRegular', 'NxLocalEfficiency', 'NxGlobalEfficiency', 'NxIsEulerian'
+            'NxGraph', 'NxLargeCliqueSize', 'NxAverageClusteringApproximate', 'NxDegreeAssortativityCoefficient', 'NxDegreePearsonCorrelationCoefficient', 'NxHasBridge', 'NxGraphCliqueNumber', 'NxGraphNumberOfCliques', 'NxTransitivity', 'NxAverageClustering', 'NxIsConnected', 'NxNumberConnectedComponents', 'NxIsDistanceRegular', 'NxLocalEfficiency', 'NxGlobalEfficiency', 'NxIsEulerian'
         ]
     datasets=[
         'mutag',
@@ -113,7 +100,7 @@ if __name__ == "__main__":
                     cnt+=1
                     if cnt<=0:
                         continue
-                    fs=['onlyconst',f] if f !='' else ['onlyconst','subgraph']
+                    fs=['onlyconst',f] if f !='' else ['onlyconst','graph']
                     try:
                         # queue_configs.append([d,f'../configs/gcl_{m}.yaml',fs,seed])
                         acc=run_gcl(d,f'../configs/gcl_{m}.yaml',fs,seed)   
@@ -122,15 +109,3 @@ if __name__ == "__main__":
                         acc=-1
                     record_file.write(f'{cnt},{acc},{m},{d},{f},{seed}\n')
                     record_file.flush()
-    
-
-
-                
-
-
-
-    
-
-
-
-
diff --git a/examples/fe_ncl_test.py b/examples/fe_ncl_test.py
index 11b61f9..1ea1117 100644
--- a/examples/fe_ncl_test.py
+++ b/examples/fe_ncl_test.py
@@ -1,7 +1,6 @@
 import sys
-
-from networkx.algorithms.reciprocity import reciprocity
 sys.path.append('../')
+
 from autogl.datasets import build_dataset_from_name
 from autogl.solver import AutoNodeClassifier
 from autogl.module import Acc
@@ -13,21 +12,14 @@ import numpy as np
 import logging
 logging.basicConfig(level=logging.INFO)
 import sys
-from numpy.core.defchararray import index
-from torch.utils.data import dataset
 
-from yaml import compose, load
 sys.path.append('../')
 import random
 import numpy as np
 import torch
 import os
 import yaml
-import re
-from autogl.module.feature.base import BaseFeatureAtom
 from autogl.module.feature import FEATURE_DICT
-import pandas as pd
-import copy
 from argparse import ArgumentParser
 parser = ArgumentParser()
 # parser.add_argument('--device', default=0, type=int)
@@ -124,14 +116,3 @@ if __name__ == '__main__':
                         acc=-1
                     record_file.write(f'{cnt},{acc},{m},{d},{f},{seed}\n')
                     record_file.flush()
-
-
-                
-
-
-
-    
-
-
-
-

From 5db6c5a1ba9069d6323da742b466bfd6788180f9 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sun, 11 Jul 2021 14:44:01 +0000
Subject: [PATCH 140/144] name subgraph to graph

---
 autogl/module/feature/graph/nx.py | 2 ++
 autogl/module/hpo/autone.py       | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/autogl/module/feature/graph/nx.py b/autogl/module/feature/graph/nx.py
index 70042b4..9179b3b 100644
--- a/autogl/module/feature/graph/nx.py
+++ b/autogl/module/feature/graph/nx.py
@@ -48,11 +48,13 @@ class NxGraph(BaseGraph):
 
 def nxfunc(func):
     r"""A decorator for networkx Graph transforms. You may want to use it to quickly wrap a nx Graph feature function object.
+
     Examples
     --------
     @register_nx
     @nxfunc(large_clique_size)
     class NxLargeCliqueSize(NxGraph):pass
+
     """
 
     def decorator_func(cls):
diff --git a/autogl/module/hpo/autone.py b/autogl/module/hpo/autone.py
index e56f985..bc6d6d8 100644
--- a/autogl/module/hpo/autone.py
+++ b/autogl/module/hpo/autone.py
@@ -82,7 +82,7 @@ class AutoNE(BaseHPOptimizer):
 
         def get_wne(graph):
             graph = func.fit_transform(graph)
-            # transform = nx.NxSubgraph.compose(map(lambda x: x(), nx.NX_EXTRACTORS))
+            # transform = nx.NxGraph.compose(map(lambda x: x(), nx.NX_EXTRACTORS))
             # print(type(graph))
             # gf = transform.fit_transform(graph).data.gf
             gf = graph.data.gf

From d094759a17febaa1cc1bd4b9021514f27d982c27 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sun, 11 Jul 2021 15:21:01 +0000
Subject: [PATCH 141/144] remove uncessary files

---
 configs/nodeclf_nas_benchmark.yml       |  40 --------
 configs/nodeclf_nas_darts_benchmark.yml |  42 ---------
 configs/nodeclf_nas_enas_benchmark.yml  |  42 ---------
 examples/fe_gcl_test.py                 | 111 ----------------------
 examples/fe_ncl_test.py                 | 118 ------------------------
 examples/graphnas.py                    |   6 +-
 examples/test_enas.py                   |  29 ------
 examples/test_graph_nas.py              |  39 --------
 examples/test_graph_nas_rand.py         |  71 --------------
 examples/test_graph_nas_rl.py           |  55 -----------
 examples/test_graph_nas_space.py        |  73 ---------------
 examples/test_nas.py                    |  22 -----
 examples/test_nas_grah.py               |  34 -------
 13 files changed, 4 insertions(+), 678 deletions(-)
 delete mode 100644 configs/nodeclf_nas_benchmark.yml
 delete mode 100644 configs/nodeclf_nas_darts_benchmark.yml
 delete mode 100644 configs/nodeclf_nas_enas_benchmark.yml
 delete mode 100644 examples/fe_gcl_test.py
 delete mode 100644 examples/fe_ncl_test.py
 delete mode 100644 examples/test_enas.py
 delete mode 100644 examples/test_graph_nas.py
 delete mode 100644 examples/test_graph_nas_rand.py
 delete mode 100644 examples/test_graph_nas_rl.py
 delete mode 100644 examples/test_graph_nas_space.py
 delete mode 100644 examples/test_nas.py
 delete mode 100644 examples/test_nas_grah.py

diff --git a/configs/nodeclf_nas_benchmark.yml b/configs/nodeclf_nas_benchmark.yml
deleted file mode 100644
index dd52a7f..0000000
--- a/configs/nodeclf_nas_benchmark.yml
+++ /dev/null
@@ -1,40 +0,0 @@
-ensemble:
-  name: null
-feature:
-- name: PYGNormalizeFeatures
-hpo:
-  max_evals: 10
-  name: random
-nas:
-  space:
-    name: graphnas
-    hidden_dim: 64
-    layer_number: 4
-  algorithm:
-    name: graphnas
-    num_epochs: 200
-  estimator:
-    name: scratch
-models: []
-trainer:
-  hp_space:
-  - maxValue: 300
-    minValue: 100
-    parameterName: max_epoch
-    scalingType: LINEAR
-    type: INTEGER
-  - maxValue: 30
-    minValue: 10
-    parameterName: early_stopping_round
-    scalingType: LINEAR
-    type: INTEGER
-  - maxValue: 0.05
-    minValue: 0.01
-    parameterName: lr
-    scalingType: LOG
-    type: DOUBLE
-  - maxValue: 0.0005
-    minValue: 5.0e-05
-    parameterName: weight_decay
-    scalingType: LOG
-    type: DOUBLE
diff --git a/configs/nodeclf_nas_darts_benchmark.yml b/configs/nodeclf_nas_darts_benchmark.yml
deleted file mode 100644
index 154bffa..0000000
--- a/configs/nodeclf_nas_darts_benchmark.yml
+++ /dev/null
@@ -1,42 +0,0 @@
-ensemble:
-  name: null
-feature:
-- name: PYGNormalizeFeatures
-hpo:
-  max_evals: 10
-  name: random
-nas:
-  space:
-    name: singlepath
-    hidden_dim: 64
-    layer_number: 2
-    dropout: 0.8
-    ops: ['gcn', 'gat', 'linear'] 
-  algorithm:
-    name: darts
-    num_epochs: 200
-  estimator:
-    name: oneshot
-models: []
-trainer:
-  hp_space:
-  - maxValue: 300
-    minValue: 100
-    parameterName: max_epoch
-    scalingType: LINEAR
-    type: INTEGER
-  - maxValue: 30
-    minValue: 10
-    parameterName: early_stopping_round
-    scalingType: LINEAR
-    type: INTEGER
-  - maxValue: 0.05
-    minValue: 0.01
-    parameterName: lr
-    scalingType: LOG
-    type: DOUBLE
-  - maxValue: 0.0005
-    minValue: 5.0e-05
-    parameterName: weight_decay
-    scalingType: LOG
-    type: DOUBLE
diff --git a/configs/nodeclf_nas_enas_benchmark.yml b/configs/nodeclf_nas_enas_benchmark.yml
deleted file mode 100644
index 59d0767..0000000
--- a/configs/nodeclf_nas_enas_benchmark.yml
+++ /dev/null
@@ -1,42 +0,0 @@
-ensemble:
-  name: null
-feature:
-- name: PYGNormalizeFeatures
-hpo:
-  max_evals: 10
-  name: random
-nas:
-  space:
-    name: singlepath
-    hidden_dim: 64
-    layer_number: 2
-    dropout: 0.8
-    ops: ['gcn', 'gat', 'linear'] 
-  algorithm:
-    name: enas
-    num_epochs: 200
-  estimator:
-    name: oneshot
-models: []
-trainer:
-  hp_space:
-  - maxValue: 300
-    minValue: 100
-    parameterName: max_epoch
-    scalingType: LINEAR
-    type: INTEGER
-  - maxValue: 30
-    minValue: 10
-    parameterName: early_stopping_round
-    scalingType: LINEAR
-    type: INTEGER
-  - maxValue: 0.05
-    minValue: 0.01
-    parameterName: lr
-    scalingType: LOG
-    type: DOUBLE
-  - maxValue: 0.0005
-    minValue: 5.0e-05
-    parameterName: weight_decay
-    scalingType: LOG
-    type: DOUBLE
diff --git a/examples/fe_gcl_test.py b/examples/fe_gcl_test.py
deleted file mode 100644
index abec81e..0000000
--- a/examples/fe_gcl_test.py
+++ /dev/null
@@ -1,111 +0,0 @@
-import sys
-sys.path.append('../')
-
-from autogl.datasets import build_dataset_from_name
-from autogl.solver import AutoGraphClassifier
-from autogl.module import Acc
-import yaml
-import random
-import torch
-import numpy as np
-
-import logging
-logging.basicConfig(level=logging.INFO)
-import random
-import numpy as np
-import torch
-import os
-import yaml
-from autogl.module.feature import FEATURE_DICT
-from argparse import ArgumentParser
-parser = ArgumentParser()
-# parser.add_argument('--device', default=0, type=int)
-# parser.add_argument('--max_eval', default=10, type=int)
-parser.add_argument('--sn',default=5,type=int)
-parser.add_argument('--output',default='./record_gcl2.txt',type=str)
-parser.add_argument('--clean',default=False,type=bool)
-args=dict()
-def setseed(seed):
-    random.seed(seed)
-    np.random.seed(seed)
-    torch.manual_seed(seed)
-    if torch.cuda.is_available():
-        torch.cuda.manual_seed(seed)
-        torch.backends.cudnn.deterministic = True
-        torch.backends.cudnn.benchmark = False
-
-def renew(record_file):
-    with open(record_file,'w') as file:
-        file.write('')
-
-def run_gcl(dataset,configs,features,seed):
-
-    print(f'run {dataset} \t {configs} \t {features} \t {seed}')
-    setseed(seed)
-    dataset = build_dataset_from_name(dataset)
-    configs = yaml.load(open(configs, 'r').read(), Loader=yaml.FullLoader)
-    configs['features']=[]
-    for f in features:
-        configs['feature'].append({'name':f})
-    
-    autoClassifier = AutoGraphClassifier.from_config(configs)
-    # train
-    autoClassifier.fit(
-        dataset, 
-        time_limit=3600, 
-        train_split=0.8, 
-        val_split=0.1, 
-        cross_validation=True,
-        cv_split=10, 
-    )
-    # test
-    predict_result = autoClassifier.predict_proba()
-    acc=Acc.evaluate(predict_result, dataset.data.y[dataset.test_index].cpu().detach().numpy())
-    # print(acc)
-    return acc
-
-if __name__ == "__main__":
-    print(f"all FEs {FEATURE_DICT.keys()}")
-    args = parser.parse_args()
-
-    record_file=args.output
-    if not os.path.exists(record_file):
-        renew(record_file)
-    print(f"record file {record_file}")
-    record_file=open(record_file,'a+')
-
-    sn=args.sn # seeds num for each config
-    setseed(2021)
-    seeds=[random.randint(0,12345678) for x in range(sn)]
-    print('setting seeds ',seeds)
-
-    feature_set=[
-            '',
-            'netlsd',
-            'NxGraph', 'NxLargeCliqueSize', 'NxAverageClusteringApproximate', 'NxDegreeAssortativityCoefficient', 'NxDegreePearsonCorrelationCoefficient', 'NxHasBridge', 'NxGraphCliqueNumber', 'NxGraphNumberOfCliques', 'NxTransitivity', 'NxAverageClustering', 'NxIsConnected', 'NxNumberConnectedComponents', 'NxIsDistanceRegular', 'NxLocalEfficiency', 'NxGlobalEfficiency', 'NxIsEulerian'
-        ]
-    datasets=[
-        'mutag',
-        'imdb-b',
-        'imdb-m',
-        'proteins',
-        'collab'
-    ]
-    models=['gin']
-    cnt=0
-    for fi,f in enumerate(feature_set):
-        for mi,m in enumerate(models):
-            for di,d in enumerate(datasets):
-                for si,seed in enumerate(seeds):
-                    cnt+=1
-                    if cnt<=0:
-                        continue
-                    fs=['onlyconst',f] if f !='' else ['onlyconst','graph']
-                    try:
-                        # queue_configs.append([d,f'../configs/gcl_{m}.yaml',fs,seed])
-                        acc=run_gcl(d,f'../configs/gcl_{m}.yaml',fs,seed)   
-                    except Exception as e:
-                        print(e)
-                        acc=-1
-                    record_file.write(f'{cnt},{acc},{m},{d},{f},{seed}\n')
-                    record_file.flush()
diff --git a/examples/fe_ncl_test.py b/examples/fe_ncl_test.py
deleted file mode 100644
index 1ea1117..0000000
--- a/examples/fe_ncl_test.py
+++ /dev/null
@@ -1,118 +0,0 @@
-import sys
-sys.path.append('../')
-
-from autogl.datasets import build_dataset_from_name
-from autogl.solver import AutoNodeClassifier
-from autogl.module import Acc
-import yaml
-import random
-import torch
-import numpy as np
-
-import logging
-logging.basicConfig(level=logging.INFO)
-import sys
-
-sys.path.append('../')
-import random
-import numpy as np
-import torch
-import os
-import yaml
-from autogl.module.feature import FEATURE_DICT
-from argparse import ArgumentParser
-parser = ArgumentParser()
-# parser.add_argument('--device', default=0, type=int)
-# parser.add_argument('--max_eval', default=10, type=int)
-parser.add_argument('--sn',default=5,type=int)
-parser.add_argument('--output',default='./record.txt',type=str)
-parser.add_argument('--clean',default=False,type=bool)
-args=dict()
-def setseed(seed):
-    random.seed(seed)
-    np.random.seed(seed)
-    torch.manual_seed(seed)
-    if torch.cuda.is_available():
-        torch.cuda.manual_seed(seed)
-        torch.backends.cudnn.deterministic = True
-        torch.backends.cudnn.benchmark = False
-
-def renew(record_file):
-    with open(record_file,'w') as file:
-        file.write('')
-
-def run_ncl(dataset,configs,features,seed):
-    print(f'run {dataset} \t {configs} \t {features} \t {seed}')
-    setseed(seed)
-    dataset = build_dataset_from_name(dataset)
-    configs = yaml.load(open(configs, 'r').read(), Loader=yaml.FullLoader)
-    configs['features']=[]
-    for f in features:
-        configs['feature'].append({'name':f})
-    
-    autoClassifier = AutoNodeClassifier.from_config(configs)
-    # train
-    if dataset in ['cora', 'citeseer', 'pubmed']:
-        autoClassifier.fit(dataset, time_limit=3600, evaluation_method=[Acc])
-    else:
-        autoClassifier.fit(dataset, time_limit=3600, evaluation_method=[Acc], seed=seed, train_split=20*dataset.num_classes, val_split=30*dataset.num_classes, balanced=False)
-    val = autoClassifier.get_model_by_performance(0)[0].get_valid_score()[0]
-    # print('val acc: ', val)
-
-    # test
-    predict_result = autoClassifier.predict_proba(use_best=True, use_ensemble=False)
-    test_result=Acc.evaluate(predict_result, dataset.data.y[dataset.data.test_mask].numpy())
-    # print('test acc: ', test_result)
-    return test_result
-
-if __name__ == '__main__':
-    print(f"all FEs {FEATURE_DICT.keys()}")
-    args = parser.parse_args()
-    
-    record_file=args.output
-    if not os.path.exists(record_file):
-        renew(record_file)
-    print(f"record file {record_file}")
-    record_file=open(record_file,'a+')
-
-    sn=args.sn # seeds num for each config
-    setseed(2021)
-    seeds=[random.randint(0,12345678) for x in range(sn)]
-    print('setting seeds ',seeds)
-
-    feature_set=[
-            '',
-            'onehot',
-            'PYGOneHotDegree',
-            'eigen',
-            'pagerank',
-            'PYGLocalDegreeProfile',
-            'graphlet',
-        ]
-    datasets=[
-        'cora',
-        'citeseer',
-        'pubmed',
-        'amazon_computers',
-        'amazon_photo',
-        'coauthor_cs',
-        'coauthor_physics',
-        # 'reddit'
-    ]
-    models=['gcn','gat']
-    cnt=0
-    for fi,f in enumerate(feature_set):
-        for mi,m in enumerate(models):
-            for di,d in enumerate(datasets):
-                for si,seed in enumerate(seeds):
-                    cnt+=1
-                    if cnt<=100:
-                        continue
-                    fs=['onlyconst',f] if f !='' else ['onlyconst']
-                    try:
-                        acc=run_ncl(d,f'../configs/ncl_{m}.yaml',fs,seed)   
-                    except Exception as e:
-                        print(e)
-                        acc=-1
-                    record_file.write(f'{cnt},{acc},{m},{d},{f},{seed}\n')
-                    record_file.flush()
diff --git a/examples/graphnas.py b/examples/graphnas.py
index d0028be..0211403 100644
--- a/examples/graphnas.py
+++ b/examples/graphnas.py
@@ -3,11 +3,13 @@ sys.path.append('../')
 from autogl.datasets import build_dataset_from_name
 from autogl.solver import AutoNodeClassifier
 from autogl.module.train import Acc
+from autogl.solver.utils import set_seed
 import argparse
 
 if __name__ == '__main__':
+    set_seed(202106)
     parser = argparse.ArgumentParser()
-    parser.add_argument('--config', type=str, default='../configs/nodeclf_nas_benchmark.yml')
+    parser.add_argument('--config', type=str, default='../configs/nodeclf_nas_macro_benchmark.yml')
     parser.add_argument('--dataset', choices=['cora', 'citeseer', 'pubmed'], default='cora', type=str)
 
     args = parser.parse_args()
@@ -17,4 +19,4 @@ if __name__ == '__main__':
     solver.fit(dataset)
     solver.get_leaderboard().show()
     out = solver.predict_proba()
-    print('acc on cora', Acc.evaluate(out, dataset[0].y[dataset[0].test_mask].detach().numpy()))
+    print('acc on dataset', Acc.evaluate(out, dataset[0].y[dataset[0].test_mask].detach().numpy()))
diff --git a/examples/test_enas.py b/examples/test_enas.py
deleted file mode 100644
index ade6e50..0000000
--- a/examples/test_enas.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from copy import deepcopy
-import sys
-from nni.nas.pytorch.fixed import apply_fixed_architecture
-from torch_geometric.nn.conv.gat_conv import GATConv
-from torch_geometric.nn.conv.gcn_conv import GCNConv
-sys.path.append('../')
-import torch
-from autogl.solver import AutoNodeClassifier
-from autogl.module.nas.nas import DartsNodeClfEstimator
-from autogl.module.nas.space import GraphSpace
-from autogl.datasets import build_dataset_from_name
-from autogl.module.model import BaseModel
-# from autogl.module.nas.darts import Darts
-from autogl.utils import get_logger
-from autogl.module.nas.enas import Enas
-if __name__ == '__main__':
-    dataset = build_dataset_from_name('cora')
-    solver = AutoNodeClassifier(
-        feature_module=None,
-        graph_models=[],
-        hpo_module="random",
-        max_evals=10,
-        ensemble_module=None,
-        nas_algorithms=[Enas()],
-        nas_spaces=[GraphSpace(hidden_dim=64, ops=[GATConv, GCNConv])],
-        nas_estimators=[DartsNodeClfEstimator()]
-    )
-    solver.fit(dataset)
-    out = solver.predict(dataset)
\ No newline at end of file
diff --git a/examples/test_graph_nas.py b/examples/test_graph_nas.py
deleted file mode 100644
index 4f6740f..0000000
--- a/examples/test_graph_nas.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import sys
-sys.path.append('../')
-from torch_geometric.nn import GCNConv
-import torch
-from autogl.datasets import build_dataset_from_name
-from autogl.solver import AutoNodeClassifier
-from autogl.module.train import NodeClassificationFullTrainer
-from autogl.module.nas import Darts, OneShotEstimator
-from autogl.module.nas.space.graph_nas import GraphNasNodeClassificationSpace
-from autogl.module.train import Acc
-from autogl.module.nas.algorithm.enas import Enas
-
-if __name__ == '__main__':
-    dataset = build_dataset_from_name('cora')
-    solver = AutoNodeClassifier(
-        feature_module='PYGNormalizeFeatures',
-        graph_models=[],
-        hpo_module=None,
-        ensemble_module=None,
-        default_trainer=NodeClassificationFullTrainer(
-            optimizer=torch.optim.Adam,
-            lr=0.01,
-            max_epoch=200,
-            early_stopping_round=200,
-            weight_decay=5e-4,
-            device="auto",
-            init=False,
-            feval=['acc'],
-            loss="nll_loss",
-            lr_scheduler_type=None,),
-        nas_algorithms=[Enas(num_epochs=400,n_warmup=250)],
-        #nas_algorithms=[Darts(num_epochs=200)],
-        nas_spaces=[GraphNasNodeClassificationSpace(hidden_dim=32,search_act_con=False,layer_number=2)],
-        nas_estimators=[OneShotEstimator()]
-    )
-    solver.fit(dataset)
-    solver.get_leaderboard().show()
-    out = solver.predict_proba()
-    print('acc on cora', Acc.evaluate(out, dataset[0].y[dataset[0].test_mask].detach().numpy()))
diff --git a/examples/test_graph_nas_rand.py b/examples/test_graph_nas_rand.py
deleted file mode 100644
index bf906de..0000000
--- a/examples/test_graph_nas_rand.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import sys
-sys.path.append('../')
-from torch_geometric.nn import GCNConv
-import torch
-from autogl.datasets import build_dataset_from_name
-from autogl.solver import AutoNodeClassifier
-from autogl.module.train import NodeClassificationFullTrainer
-from autogl.module.nas import Darts, OneShotEstimator
-from autogl.module.nas.space.graph_nas import GraphNasNodeClassificationSpace
-from autogl.module.nas.space.graph_nas_macro import GraphNasMacroNodeClassificationSpace
-from autogl.module.train import Acc
-from autogl.module.nas.algorithm.enas import Enas
-from autogl.module.nas.algorithm.rl import RL,GraphNasRL
-from autogl.module.nas.estimator.train_scratch import TrainEstimator
-from autogl.module.nas.algorithm.random_search import RandomSearch
-import numpy as np
-import logging
-def one_run():
-    logging.getLogger().setLevel(logging.WARNING)
-    cora = build_dataset_from_name('cora')
-
-    clf = AutoNodeClassifier(
-        feature_module='PYGNormalizeFeatures',
-        graph_models=[],
-        nas_algorithms=[Enas(num_epochs=10)],
-        nas_spaces=[GraphNasNodeClassificationSpace()],
-        nas_estimators=[OneShotEstimator()],
-        max_evals=2
-    )
-
-    clf.fit(cora)
-    clf.predict(cora)
-
-    return
-
-    dataset = build_dataset_from_name('cora')
-    solver = AutoNodeClassifier(
-        feature_module='PYGNormalizeFeatures',
-        graph_models=[],
-        hpo_module=None,
-        ensemble_module=None,
-        default_trainer=NodeClassificationFullTrainer(
-            optimizer=torch.optim.Adam,
-            lr=0.005,
-            max_epoch=300,
-            early_stopping_round=20,
-            weight_decay=5e-4,
-            device="auto",
-            init=False,
-            feval=['acc'],
-            loss="nll_loss",
-            lr_scheduler_type=None,),
-        # nas_algorithms=[RL(num_epochs=400)],
-        nas_algorithms=[GraphNasRL(num_epochs=1)],
-        #nas_algorithms=[Darts(num_epochs=200)],
-        nas_spaces=[GraphNasMacroNodeClfSpace(hidden_dim=16,search_act_con=True,layer_number=2)],
-        nas_estimators=[TrainEstimator()]
-    )
-
-    solver.fit(dataset)
-    solver.get_leaderboard().show()
-    out = solver.predict_proba()
-    acc = Acc.evaluate(out, dataset[0].y[dataset[0].test_mask].detach().numpy())
-    print('acc on cora', acc)
-    return acc
-
-if __name__ == '__main__':
-    acc_li = []
-    for i in range(2):
-        acc_li.append(one_run())
-    print("results:", np.mean(acc_li), np.std(acc_li))
\ No newline at end of file
diff --git a/examples/test_graph_nas_rl.py b/examples/test_graph_nas_rl.py
deleted file mode 100644
index 1170674..0000000
--- a/examples/test_graph_nas_rl.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import sys
-sys.path.append('../')
-from torch_geometric.nn import GCNConv
-import torch
-from autogl.datasets import build_dataset_from_name
-from autogl.solver import AutoNodeClassifier
-from autogl.module.train import NodeClassificationFullTrainer
-from autogl.module.nas import Darts, OneShotEstimator
-from autogl.module.nas.space.graph_nas import GraphNasNodeClassificationSpace
-from autogl.module.nas.space.graph_nas_macro import GraphNasMacroNodeClassificationSpace
-from autogl.module.train import Acc
-from autogl.module.nas.algorithm.enas import Enas
-from autogl.module.nas.algorithm.rl import RL,GraphNasRL
-from autogl.module.nas.estimator.train_scratch import TrainEstimator
-from autogl.module.nas.algorithm.random_search import RandomSearch
-import numpy as np
-import logging
-def one_run():
-    logging.getLogger().setLevel(logging.WARNING)
-    dataset = build_dataset_from_name('cora')
-    solver = AutoNodeClassifier(
-        feature_module='PYGNormalizeFeatures',
-        graph_models=[],
-        hpo_module=None,
-        ensemble_module=None,
-        default_trainer=NodeClassificationFullTrainer(
-            optimizer=torch.optim.Adam,
-            lr=0.005,
-            max_epoch=300,
-            early_stopping_round=20,
-            weight_decay=5e-4,
-            device="auto",
-            init=False,
-            feval=['acc'],
-            loss="nll_loss",
-            lr_scheduler_type=None,),
-        # nas_algorithms=[RL(num_epochs=400)],
-        nas_algorithms=[GraphNasRL(num_epochs=1)],
-        #nas_algorithms=[Darts(num_epochs=200)],
-        nas_spaces=[GraphNasMacroNodeClfSpace(hidden_dim=16,search_act_con=True,layer_number=2)],
-        nas_estimators=[TrainEstimator()]
-    )
-
-    solver.fit(dataset)
-    solver.get_leaderboard().show()
-    out = solver.predict_proba()
-    acc = Acc.evaluate(out, dataset[0].y[dataset[0].test_mask].detach().numpy())
-    print('acc on cora', acc)
-    return acc
-
-if __name__ == '__main__':
-    acc_li = []
-    for i in range(100):
-        acc_li.append(one_run())
-    print("results:", np.mean(acc_li), np.std(acc_li))
\ No newline at end of file
diff --git a/examples/test_graph_nas_space.py b/examples/test_graph_nas_space.py
deleted file mode 100644
index 0d60f3c..0000000
--- a/examples/test_graph_nas_space.py
+++ /dev/null
@@ -1,73 +0,0 @@
-import sys
-from nni.nas.pytorch.mutables import Mutable
-sys.path.append('../')
-from torch_geometric.nn import GCNConv
-import torch
-from autogl.datasets import build_dataset_from_name
-from autogl.solver import AutoNodeClassifier
-from autogl.module.train import NodeClassificationFullTrainer
-from autogl.module.nas import Darts, OneShotEstimator
-from autogl.module.nas.space.graph_nas import *
-from autogl.module.train import Acc
-from autogl.module.nas.algorithm.enas import Enas
-from autogl.module.nas.algorithm.rl import *
-from autogl.module.nas.estimator.one_shot import TrainEstimator
-import logging
-import numpy as np
-from tqdm import  tqdm
-if __name__ == '__main__':
-    logging.getLogger().setLevel(logging.WARNING)
-    dataset = build_dataset_from_name('cora')
-    space=GraphNasNodeClassificationSpace(hidden_dim=16,search_act_con=True,layer_number=2)
-    space.instantiate(input_dim=dataset[0].x.shape[1],
-                output_dim=dataset.num_classes,)
-    estim=TrainEstimator()
-    # solver.fit(dataset)
-    # solver.get_leaderboard().show()
-    # out = solver.predict_proba()
-    
-    # print('acc on cora', Acc.evaluate(out, dataset[0].y[dataset[0].test_mask].detach().numpy()))
-    class Tmp:
-        def __init__(self,space):
-            self.model = space
-            self.nas_modules = []
-            k2o = get_module_order(self.model)
-            replace_layer_choice(self.model, PathSamplingLayerChoice, self.nas_modules)
-            replace_input_choice(self.model, PathSamplingInputChoice, self.nas_modules)
-            self.nas_modules = sort_replaced_module(k2o, self.nas_modules)
-    
-    t=Tmp(space)
-    print(t.nas_modules)
-    nm=t.nas_modules
-    selection_range={}
-    for k,v in nm:
-        selection_range[k]=len(v)
-    ks=list(selection_range.keys())
-    selections=[]
-    def dfs(selection,d):
-        if d>=len(ks):
-            selections.append(selection.copy())
-            return 
-        k=ks[d]
-        r=selection_range[k]
-        for i in range(r):
-            selection[k]=i
-            dfs(selection,d+1)
-    dfs({},0)
-    print(f'#selections {len(selections)}')
-    device=torch.device('cuda:0')
-    accs=[]
-    from datetime import datetime
-    timestamp=datetime.now().strftime('%m%d-%H-%M-%S')
-    log=open(f'acclog{timestamp}.txt','w')
-    with tqdm(selections) as bar:
-        for selection in bar:
-            arch=space.export(selection,device)
-            m,l=estim.infer(arch,dataset,'test')
-            bar.set_postfix(m=m,l=l.item())
-            log.write(f'{arch}\n{selection}\n{m},{l}\n')
-            log.flush()
-            accs.append(m)
-
-    np.save(f'space_acc{timestamp}',np.array(accs))
-    print(f'max acc {np.max(accs)}')
\ No newline at end of file
diff --git a/examples/test_nas.py b/examples/test_nas.py
deleted file mode 100644
index 0211403..0000000
--- a/examples/test_nas.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import sys
-sys.path.append('../')
-from autogl.datasets import build_dataset_from_name
-from autogl.solver import AutoNodeClassifier
-from autogl.module.train import Acc
-from autogl.solver.utils import set_seed
-import argparse
-
-if __name__ == '__main__':
-    set_seed(202106)
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--config', type=str, default='../configs/nodeclf_nas_macro_benchmark.yml')
-    parser.add_argument('--dataset', choices=['cora', 'citeseer', 'pubmed'], default='cora', type=str)
-
-    args = parser.parse_args()
-
-    dataset = build_dataset_from_name('cora')
-    solver = AutoNodeClassifier.from_config(args.config)
-    solver.fit(dataset)
-    solver.get_leaderboard().show()
-    out = solver.predict_proba()
-    print('acc on dataset', Acc.evaluate(out, dataset[0].y[dataset[0].test_mask].detach().numpy()))
diff --git a/examples/test_nas_grah.py b/examples/test_nas_grah.py
deleted file mode 100644
index 7a30a65..0000000
--- a/examples/test_nas_grah.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import sys
-sys.path.append('../')
-from autogl.solver import AutoGraphClassifier
-from autogl.module.hpo.nas import BaseNAS, BaseEstimator, GraphSpace
-from autogl.datasets import build_dataset_from_name
-from autogl.module.train import GraphClassificationFullTrainer
-from torch_geometric.nn import GATConv, GCNConv
-
-class TestNASAlgorithm(BaseNAS):
-    model = None
-    def search(self, space, dset, trainer):
-        num_classes = dset.num_classes
-        num_features = dset.num_features
-        return GraphClassificationFullTrainer(
-            "gin",
-            num_features=num_features,
-            num_classes=num_classes,
-            device="auto"
-        )
-
-if __name__ == '__main__':
-    dataset = build_dataset_from_name('mutag')
-    solver = AutoGraphClassifier(
-        feature_module=None,
-        graph_models=[],
-        hpo_module="random",
-        max_evals=10,
-        ensemble_module=None,
-        nas_algorithms=[TestNASAlgorithm()],
-        nas_spaces=[GraphSpace(dataset.num_features, 64, dataset.num_classes, [GATConv, GCNConv])],
-        nas_estimators=[BaseEstimator()]
-    )
-    solver.fit(dataset, train_split=0.8, val_split=0.1)
-    out = solver.predict()
\ No newline at end of file

From be3c462b2a20558187ec0cfb3f98c8a56c37950e Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sun, 11 Jul 2021 15:23:40 +0000
Subject: [PATCH 142/144] black format

---
 autogl/datasets/__init__.py                   |  18 +-
 autogl/datasets/pyg.py                        |   2 +-
 autogl/module/__init__.py                     |   9 +-
 autogl/module/feature/__init__.py             |   8 +-
 autogl/module/feature/auto_feature.py         |   2 +-
 autogl/module/feature/base.py                 |   2 +-
 autogl/module/feature/generators/__init__.py  |  11 +-
 autogl/module/feature/generators/base.py      |   2 +-
 autogl/module/feature/generators/pyg.py       |   2 +-
 autogl/module/feature/graph/base.py           |   2 +-
 autogl/module/feature/graph/netlsd.py         |   2 +-
 autogl/module/feature/graph/nx.py             |   2 +-
 autogl/module/feature/selectors/base.py       |   2 +-
 autogl/module/hpo/base.py                     |   4 +-
 autogl/module/model/base.py                   |   8 +-
 autogl/module/model/gcn.py                    | 190 +++--
 autogl/module/model/graph_saint.py            | 232 +++---
 autogl/module/model/graphsage.py              | 106 ++-
 autogl/module/nas/__init__.py                 |   6 +-
 autogl/module/nas/algorithm/__init__.py       |   8 +-
 autogl/module/nas/algorithm/darts.py          |  13 +-
 autogl/module/nas/algorithm/enas.py           | 129 ++--
 autogl/module/nas/algorithm/random_search.py  |  80 +-
 autogl/module/nas/algorithm/rl.py             | 404 +++++++----
 autogl/module/nas/estimator/__init__.py       |   8 +-
 autogl/module/nas/estimator/base.py           |   6 +-
 autogl/module/nas/estimator/one_shot.py       |   7 +-
 autogl/module/nas/estimator/train_scratch.py  |  33 +-
 autogl/module/nas/space/__init__.py           |  11 +-
 autogl/module/nas/space/base.py               |  70 +-
 autogl/module/nas/space/graph_nas.py          | 106 ++-
 autogl/module/nas/space/graph_nas_macro.py    | 328 ++++++---
 autogl/module/nas/space/operation.py          |  35 +-
 autogl/module/nas/space/single_path.py        |  20 +-
 autogl/module/nas/utils.py                    |   8 +-
 autogl/module/train/evaluation.py             |  46 +-
 .../module/train/graph_classification_full.py |  19 +-
 .../module/train/node_classification_full.py  |   5 +-
 .../node_classification_sampled_trainer.py    | 684 +++++++++++-------
 .../sampling/sampler/graphsaint_sampler.py    |  52 +-
 .../layer_dependent_importance_sampler.py     | 369 ++++++----
 .../sampling/sampler/neighbor_sampler.py      | 111 ++-
 .../sampler/target_dependant_sampler.py       | 191 +++--
 autogl/solver/base.py                         |  70 +-
 autogl/solver/classifier/graph_classifier.py  |   6 +-
 autogl/solver/classifier/node_classifier.py   |  42 +-
 autogl/solver/utils.py                        |  15 +-
 autogl/utils/device.py                        |  13 +-
 48 files changed, 2279 insertions(+), 1220 deletions(-)

diff --git a/autogl/datasets/__init__.py b/autogl/datasets/__init__.py
index 363b5d1..df450d1 100644
--- a/autogl/datasets/__init__.py
+++ b/autogl/datasets/__init__.py
@@ -178,17 +178,17 @@ __all__ = [
     "OGBLcitationDataset",
     "OGBLwikikgDataset",
     "OGBLbiokgDataset",
-    "GatneDataset", 
-    "AmazonDataset", 
-    "TwitterDataset", 
+    "GatneDataset",
+    "AmazonDataset",
+    "TwitterDataset",
     "YouTubeDataset",
-    "GTNDataset", 
-    "ACM_GTNDataset", 
-    "DBLP_GTNDataset", 
+    "GTNDataset",
+    "ACM_GTNDataset",
+    "DBLP_GTNDataset",
     "IMDB_GTNDataset",
-    "HANDataset", 
-    "ACM_HANDataset", 
-    "DBLP_HANDataset", 
+    "HANDataset",
+    "ACM_HANDataset",
+    "DBLP_HANDataset",
     "IMDB_HANDataset",
     "MatlabMatrix",
     "BlogcatalogDataset",
diff --git a/autogl/datasets/pyg.py b/autogl/datasets/pyg.py
index df9f480..7a39d3d 100644
--- a/autogl/datasets/pyg.py
+++ b/autogl/datasets/pyg.py
@@ -10,7 +10,7 @@ from torch_geometric.datasets import (
     QM9,
     Amazon,
     Coauthor,
-    Flickr
+    Flickr,
 )
 from torch_geometric.utils import remove_self_loops
 from . import register_dataset
diff --git a/autogl/module/__init__.py b/autogl/module/__init__.py
index 80f94da..02238e7 100644
--- a/autogl/module/__init__.py
+++ b/autogl/module/__init__.py
@@ -1,11 +1,4 @@
-from . import (
-    feature,
-    model,
-    train,
-    hpo,
-    nas,
-    ensemble
-)
+from . import feature, model, train, hpo, nas, ensemble
 
 from .ensemble import *
 from .feature import *
diff --git a/autogl/module/feature/__init__.py b/autogl/module/feature/__init__.py
index 5af47c5..f2f9c2b 100644
--- a/autogl/module/feature/__init__.py
+++ b/autogl/module/feature/__init__.py
@@ -35,14 +35,10 @@ from .generators import (
     PYGGenerator,
     PYGLocalDegreeProfile,
     PYGNormalizeFeatures,
-    PYGOneHotDegree
+    PYGOneHotDegree,
 )
 
-from .selectors import (
-    BaseSelector,
-    SeFilterConstant, 
-    SeGBDT
-)
+from .selectors import BaseSelector, SeFilterConstant, SeGBDT
 
 from .graph import (
     BaseGraph,
diff --git a/autogl/module/feature/auto_feature.py b/autogl/module/feature/auto_feature.py
index d04c98c..beed033 100644
--- a/autogl/module/feature/auto_feature.py
+++ b/autogl/module/feature/auto_feature.py
@@ -232,4 +232,4 @@ class AutoFeatureEngineer(BaseFeatureEngineer):
             gx = gx[:, sel]
             x = np.concatenate([x, gx], axis=1)
         data.x = x
-        return data
\ No newline at end of file
+        return data
diff --git a/autogl/module/feature/base.py b/autogl/module/feature/base.py
index 9d1e9cc..4b812ba 100644
--- a/autogl/module/feature/base.py
+++ b/autogl/module/feature/base.py
@@ -169,4 +169,4 @@ class TransformWrapper(BaseFeature):
             return self
 
     def _transform(self, data=None):
-        return self._func(data)
\ No newline at end of file
+        return self._func(data)
diff --git a/autogl/module/feature/generators/__init__.py b/autogl/module/feature/generators/__init__.py
index c15a9b3..45fb044 100644
--- a/autogl/module/feature/generators/__init__.py
+++ b/autogl/module/feature/generators/__init__.py
@@ -2,7 +2,14 @@ from .base import BaseGenerator
 from .graphlet import GeGraphlet
 from .eigen import GeEigen
 from .page_rank import GePageRank
-from .pyg import register_pyg, PYGGenerator, pygfunc, PYGLocalDegreeProfile, PYGNormalizeFeatures, PYGOneHotDegree
+from .pyg import (
+    register_pyg,
+    PYGGenerator,
+    pygfunc,
+    PYGLocalDegreeProfile,
+    PYGNormalizeFeatures,
+    PYGOneHotDegree,
+)
 
 __all__ = [
     "BaseGenerator",
@@ -14,5 +21,5 @@ __all__ = [
     "PYGGenerator",
     "PYGLocalDegreeProfile",
     "PYGNormalizeFeatures",
-    "PYGOneHotDegree"
+    "PYGOneHotDegree",
 ]
diff --git a/autogl/module/feature/generators/base.py b/autogl/module/feature/generators/base.py
index 0ab2024..194d539 100644
--- a/autogl/module/feature/generators/base.py
+++ b/autogl/module/feature/generators/base.py
@@ -15,4 +15,4 @@ class GeOnehot(BaseGenerator):
     def _transform(self, data):
         fe = np.eye(data.x.shape[0])
         data.x = np.concatenate([data.x, fe], axis=1)
-        return data
\ No newline at end of file
+        return data
diff --git a/autogl/module/feature/generators/pyg.py b/autogl/module/feature/generators/pyg.py
index c3c5e8e..82731b1 100644
--- a/autogl/module/feature/generators/pyg.py
+++ b/autogl/module/feature/generators/pyg.py
@@ -86,4 +86,4 @@ class PYGOneHotDegree(PYGGenerator):
         dsc = self.extract(data)
         data.x = torch.cat([data.x, dsc], dim=1)
         return data
-    """
\ No newline at end of file
+    """
diff --git a/autogl/module/feature/graph/base.py b/autogl/module/feature/graph/base.py
index 1daace9..1f3ba21 100644
--- a/autogl/module/feature/graph/base.py
+++ b/autogl/module/feature/graph/base.py
@@ -16,4 +16,4 @@ class BaseGraph(BaseFeature):
             data.gf = torch.FloatTensor([[]])
 
     def _postprocess(self, data):
-        pass
\ No newline at end of file
+        pass
diff --git a/autogl/module/feature/graph/netlsd.py b/autogl/module/feature/graph/netlsd.py
index 22859e4..205cda1 100644
--- a/autogl/module/feature/graph/netlsd.py
+++ b/autogl/module/feature/graph/netlsd.py
@@ -25,4 +25,4 @@ class SgNetLSD(BaseGraph):
     def _transform(self, data):
         dsc = torch.FloatTensor([netlsd.heat(data.G, *self._args, **self._kwargs)])
         data.gf = torch.cat([data.gf, dsc], dim=1)
-        return data
\ No newline at end of file
+        return data
diff --git a/autogl/module/feature/graph/nx.py b/autogl/module/feature/graph/nx.py
index 9179b3b..e79f614 100644
--- a/autogl/module/feature/graph/nx.py
+++ b/autogl/module/feature/graph/nx.py
@@ -180,4 +180,4 @@ class NxIsEulerian(NxGraph):
     pass
 
 
-# till algorithms.flows
\ No newline at end of file
+# till algorithms.flows
diff --git a/autogl/module/feature/selectors/base.py b/autogl/module/feature/selectors/base.py
index 2553836..7838ffc 100644
--- a/autogl/module/feature/selectors/base.py
+++ b/autogl/module/feature/selectors/base.py
@@ -12,4 +12,4 @@ class BaseSelector(BaseFeature):
     def _transform(self, data):
         if self._sel is not None:
             data.x = data.x[:, self._sel]
-        return data
\ No newline at end of file
+        return data
diff --git a/autogl/module/hpo/base.py b/autogl/module/hpo/base.py
index fdef8a2..180efd6 100644
--- a/autogl/module/hpo/base.py
+++ b/autogl/module/hpo/base.py
@@ -30,7 +30,9 @@ class BaseHPOptimizer:
             raise WrongDependedParameterError("The depended parameter does not exist.")
 
         for para in config:
-            if para["type"] in ("NUMERICAL_LIST", "CATEGORICAL_LIST") and para.get("cutPara", None):
+            if para["type"] in ("NUMERICAL_LIST", "CATEGORICAL_LIST") and para.get(
+                "cutPara", None
+            ):
                 self._depend_map[para["parameterName"]] = para
                 if type(para["cutPara"]) == str:
                     get_depended_para(para["cutPara"])
diff --git a/autogl/module/model/base.py b/autogl/module/model/base.py
index e251468..99f2c2c 100644
--- a/autogl/module/model/base.py
+++ b/autogl/module/model/base.py
@@ -302,8 +302,11 @@ class ClassificationModel(_BaseModel):
         if "initialize" in kwargs:
             del kwargs["initialize"]
         super(ClassificationModel, self).__init__(
-            initialize=init, hyper_parameter_space=hyper_parameter_space,
-            hyper_parameter=hyper_parameter, device=device, **kwargs
+            initialize=init,
+            hyper_parameter_space=hyper_parameter_space,
+            hyper_parameter=hyper_parameter,
+            device=device,
+            **kwargs
         )
         if num_classes != Ellipsis and type(num_classes) == int:
             self.__num_classes: int = num_classes if num_classes > 0 else 0
@@ -323,6 +326,7 @@ class ClassificationModel(_BaseModel):
 
     def __repr__(self) -> str:
         import yaml
+
         return yaml.dump(self.hyper_parameter)
 
     @property
diff --git a/autogl/module/model/gcn.py b/autogl/module/model/gcn.py
index 6862036..af6671b 100644
--- a/autogl/module/model/gcn.py
+++ b/autogl/module/model/gcn.py
@@ -14,36 +14,40 @@ LOGGER = get_logger("GCNModel")
 class GCN(ClassificationSupportedSequentialModel):
     class _GCNLayer(torch.nn.Module):
         def __init__(
-                self, input_channels: int, output_channels: int,
-                add_self_loops: bool = True, normalize: bool = True,
-                activation_name: _typing.Optional[str] = ...,
-                dropout_probability: _typing.Optional[float] = ...
+            self,
+            input_channels: int,
+            output_channels: int,
+            add_self_loops: bool = True,
+            normalize: bool = True,
+            activation_name: _typing.Optional[str] = ...,
+            dropout_probability: _typing.Optional[float] = ...,
         ):
             super().__init__()
             self._convolution: GCNConv = GCNConv(
-                input_channels, output_channels,
+                input_channels,
+                output_channels,
                 add_self_loops=bool(add_self_loops),
-                normalize=bool(normalize)
+                normalize=bool(normalize),
             )
             if (
-                    activation_name is not Ellipsis and
-                    activation_name is not None and
-                    type(activation_name) == str
+                activation_name is not Ellipsis
+                and activation_name is not None
+                and type(activation_name) == str
             ):
                 self._activation_name: _typing.Optional[str] = activation_name
             else:
                 self._activation_name: _typing.Optional[str] = None
             if (
-                    dropout_probability is not Ellipsis and
-                    dropout_probability is not None and
-                    type(dropout_probability) == float
+                dropout_probability is not Ellipsis
+                and dropout_probability is not None
+                and type(dropout_probability) == float
             ):
                 if dropout_probability < 0:
                     dropout_probability = 0
                 if dropout_probability > 1:
                     dropout_probability = 1
-                self._dropout: _typing.Optional[torch.nn.Dropout] = (
-                    torch.nn.Dropout(dropout_probability)
+                self._dropout: _typing.Optional[torch.nn.Dropout] = torch.nn.Dropout(
+                    dropout_probability
                 )
             else:
                 self._dropout: _typing.Optional[torch.nn.Dropout] = None
@@ -51,13 +55,15 @@ class GCN(ClassificationSupportedSequentialModel):
         def forward(self, data, enable_activation: bool = True) -> torch.Tensor:
             x: torch.Tensor = getattr(data, "x")
             edge_index: torch.LongTensor = getattr(data, "edge_index")
-            edge_weight: _typing.Optional[torch.Tensor] = getattr(data, "edge_weight", None)
+            edge_weight: _typing.Optional[torch.Tensor] = getattr(
+                data, "edge_weight", None
+            )
             """ Validate the arguments """
             if not type(x) == type(edge_index) == torch.Tensor:
                 raise TypeError
             if edge_weight is not None and (
-                    type(edge_weight) != torch.Tensor or
-                    edge_index.size() != (2, edge_weight.size(0))
+                type(edge_weight) != torch.Tensor
+                or edge_index.size() != (2, edge_weight.size(0))
             ):
                 edge_weight: _typing.Optional[torch.Tensor] = None
 
@@ -69,15 +75,16 @@ class GCN(ClassificationSupportedSequentialModel):
             return x
 
     def __init__(
-            self,
-            num_features: int,
-            num_classes: int,
-            hidden_features: _typing.Sequence[int],
-            activation_name: str,
-            dropout: _typing.Union[
-                _typing.Optional[float], _typing.Sequence[_typing.Optional[float]]
-            ] = None,
-            add_self_loops: bool = True, normalize: bool = True
+        self,
+        num_features: int,
+        num_classes: int,
+        hidden_features: _typing.Sequence[int],
+        activation_name: str,
+        dropout: _typing.Union[
+            _typing.Optional[float], _typing.Sequence[_typing.Optional[float]]
+        ] = None,
+        add_self_loops: bool = True,
+        normalize: bool = True,
     ):
         if isinstance(dropout, _typing.Sequence):
             if len(dropout) != len(hidden_features) + 1:
@@ -97,9 +104,9 @@ class GCN(ClassificationSupportedSequentialModel):
                 dropout = 0
             if dropout > 1:
                 dropout = 1
-            dropout_list: _typing.Sequence[_typing.Optional[float]] = (
-                [dropout for _ in range(len(hidden_features))] + [None]
-            )
+            dropout_list: _typing.Sequence[_typing.Optional[float]] = [
+                dropout for _ in range(len(hidden_features))
+            ] + [None]
         elif dropout in (None, Ellipsis, ...):
             dropout_list: _typing.Sequence[_typing.Optional[float]] = [
                 None for _ in range(len(hidden_features) + 1)
@@ -111,60 +118,86 @@ class GCN(ClassificationSupportedSequentialModel):
             )
         super().__init__()
         if len(hidden_features) == 0:
-            self.__sequential_encoding_layers: torch.nn.ModuleList = torch.nn.ModuleList(
-                (
-                    self._GCNLayer(
-                        num_features, num_classes, add_self_loops, normalize,
-                        dropout_probability=dropout_list[0]
-                    ),
+            self.__sequential_encoding_layers: torch.nn.ModuleList = (
+                torch.nn.ModuleList(
+                    (
+                        self._GCNLayer(
+                            num_features,
+                            num_classes,
+                            add_self_loops,
+                            normalize,
+                            dropout_probability=dropout_list[0],
+                        ),
+                    )
                 )
             )
         else:
-            self.__sequential_encoding_layers: torch.nn.ModuleList = torch.nn.ModuleList()
-            self.__sequential_encoding_layers.append(self._GCNLayer(
-                num_features, hidden_features[0], add_self_loops,
-                normalize, activation_name, dropout_list[0]
-            ))
+            self.__sequential_encoding_layers: torch.nn.ModuleList = (
+                torch.nn.ModuleList()
+            )
+            self.__sequential_encoding_layers.append(
+                self._GCNLayer(
+                    num_features,
+                    hidden_features[0],
+                    add_self_loops,
+                    normalize,
+                    activation_name,
+                    dropout_list[0],
+                )
+            )
             for hidden_feature_index in range(len(hidden_features)):
                 if hidden_feature_index + 1 < len(hidden_features):
-                    self.__sequential_encoding_layers.append(self._GCNLayer(
-                        hidden_features[hidden_feature_index],
-                        hidden_features[hidden_feature_index + 1],
-                        add_self_loops, normalize, activation_name,
-                        dropout_list[hidden_feature_index + 1]
-                    ))
+                    self.__sequential_encoding_layers.append(
+                        self._GCNLayer(
+                            hidden_features[hidden_feature_index],
+                            hidden_features[hidden_feature_index + 1],
+                            add_self_loops,
+                            normalize,
+                            activation_name,
+                            dropout_list[hidden_feature_index + 1],
+                        )
+                    )
                 else:
-                    self.__sequential_encoding_layers.append(self._GCNLayer(
-                        hidden_features[hidden_feature_index], num_classes,
-                        add_self_loops, normalize,
-                        dropout_list[-1]
-                    ))
+                    self.__sequential_encoding_layers.append(
+                        self._GCNLayer(
+                            hidden_features[hidden_feature_index],
+                            num_classes,
+                            add_self_loops,
+                            normalize,
+                            dropout_list[-1],
+                        )
+                    )
 
     @property
     def sequential_encoding_layers(self) -> torch.nn.ModuleList:
         return self.__sequential_encoding_layers
 
-    def __extract_edge_indexes_and_weights(self, data) -> _typing.Union[
-        _typing.Sequence[_typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]],
-        _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]
+    def __extract_edge_indexes_and_weights(
+        self, data
+    ) -> _typing.Union[
+        _typing.Sequence[
+            _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]
+        ],
+        _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]],
     ]:
         def __compose_edge_index_and_weight(
-                _edge_index: torch.LongTensor,
-                _edge_weight: _typing.Optional[torch.Tensor] = None
+            _edge_index: torch.LongTensor,
+            _edge_weight: _typing.Optional[torch.Tensor] = None,
         ) -> _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]:
             if type(_edge_index) != torch.Tensor or _edge_index.dtype != torch.int64:
                 raise TypeError
             if _edge_weight is not None and (
-                    type(_edge_weight) != torch.Tensor or
-                    _edge_index.size() != (2, _edge_weight.size(0))
+                type(_edge_weight) != torch.Tensor
+                or _edge_index.size() != (2, _edge_weight.size(0))
             ):
                 _edge_weight: _typing.Optional[torch.Tensor] = None
             return _edge_index, _edge_weight
 
         if not (
-                hasattr(data, "edge_indexes") and
-                isinstance(getattr(data, "edge_indexes"), _typing.Sequence) and
-                len(getattr(data, "edge_indexes")) == len(self.__sequential_encoding_layers)
+            hasattr(data, "edge_indexes")
+            and isinstance(getattr(data, "edge_indexes"), _typing.Sequence)
+            and len(getattr(data, "edge_indexes"))
+            == len(self.__sequential_encoding_layers)
         ):
             return __compose_edge_index_and_weight(
                 getattr(data, "edge_index"), getattr(data, "edge_weight", None)
@@ -176,14 +209,16 @@ class GCN(ClassificationSupportedSequentialModel):
                 )
 
         if (
-                hasattr(data, "edge_weights") and
-                isinstance(getattr(data, "edge_weights"), _typing.Sequence) and
-                len(getattr(data, "edge_weights")) == len(self.__sequential_encoding_layers)
+            hasattr(data, "edge_weights")
+            and isinstance(getattr(data, "edge_weights"), _typing.Sequence)
+            and len(getattr(data, "edge_weights"))
+            == len(self.__sequential_encoding_layers)
         ):
             return [
                 __compose_edge_index_and_weight(_edge_index, _edge_weight)
-                for _edge_index, _edge_weight
-                in zip(getattr(data, "edge_indexes"), getattr(data, "edge_weights"))
+                for _edge_index, _edge_weight in zip(
+                    getattr(data, "edge_indexes"), getattr(data, "edge_weights")
+                )
             ]
         else:
             return [
@@ -193,19 +228,22 @@ class GCN(ClassificationSupportedSequentialModel):
 
     def cls_encode(self, data) -> torch.Tensor:
         edge_indexes_and_weights: _typing.Union[
-            _typing.Sequence[_typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]],
-            _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]
+            _typing.Sequence[
+                _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]
+            ],
+            _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]],
         ] = self.__extract_edge_indexes_and_weights(data)
 
-        if (
-                (not isinstance(edge_indexes_and_weights, tuple))
-                and isinstance(edge_indexes_and_weights[0], tuple)
+        if (not isinstance(edge_indexes_and_weights, tuple)) and isinstance(
+            edge_indexes_and_weights[0], tuple
         ):
             """ edge_indexes_and_weights is sequence of (edge_index, edge_weight) """
-            assert len(edge_indexes_and_weights) == len(self.__sequential_encoding_layers)
+            assert len(edge_indexes_and_weights) == len(
+                self.__sequential_encoding_layers
+            )
             x: torch.Tensor = getattr(data, "x")
             for _edge_index_and_weight, gcn in zip(
-                    edge_indexes_and_weights, self.__sequential_encoding_layers
+                edge_indexes_and_weights, self.__sequential_encoding_layers
             ):
                 _temp_data = autogl.data.Data(x=x, edge_index=_edge_index_and_weight[0])
                 _temp_data.edge_weight = _edge_index_and_weight[1]
@@ -215,7 +253,9 @@ class GCN(ClassificationSupportedSequentialModel):
             """ edge_indexes_and_weights is (edge_index, edge_weight) """
             x = getattr(data, "x")
             for gcn in self.__sequential_encoding_layers:
-                _temp_data = autogl.data.Data(x=x, edge_index=edge_indexes_and_weights[0])
+                _temp_data = autogl.data.Data(
+                    x=x, edge_index=edge_indexes_and_weights[0]
+                )
                 _temp_data.edge_weight = edge_indexes_and_weights[1]
                 x = gcn(_temp_data)
             return x
@@ -364,5 +404,5 @@ class AutoGCN(BaseModel):
             self.hyperparams.get("act"),
             self.hyperparams.get("dropout", None),
             bool(self.hyperparams.get("add_self_loops", True)),
-            bool(self.hyperparams.get("normalize", True))
+            bool(self.hyperparams.get("normalize", True)),
         ).to(self.device)
diff --git a/autogl/module/model/graph_saint.py b/autogl/module/model/graph_saint.py
index 9ebbb04..1b4bc7a 100644
--- a/autogl/module/model/graph_saint.py
+++ b/autogl/module/model/graph_saint.py
@@ -11,8 +11,12 @@ class _GraphSAINTAggregationLayers:
     class MultiOrderAggregationLayer(torch.nn.Module):
         class Order0Aggregator(torch.nn.Module):
             def __init__(
-                    self, input_dimension: int, output_dimension: int, bias: bool = True,
-                    activation: _typing.Optional[str] = "ReLU", batch_norm: bool = True
+                self,
+                input_dimension: int,
+                output_dimension: int,
+                bias: bool = True,
+                activation: _typing.Optional[str] = "ReLU",
+                batch_norm: bool = True,
             ):
                 super().__init__()
                 if not type(input_dimension) == type(output_dimension) == int:
@@ -21,16 +25,17 @@ class _GraphSAINTAggregationLayers:
                     raise ValueError
                 if not type(bias) == bool:
                     raise TypeError
-                self.__linear_transform = torch.nn.Linear(input_dimension, output_dimension, bias)
+                self.__linear_transform = torch.nn.Linear(
+                    input_dimension, output_dimension, bias
+                )
                 self.__linear_transform.reset_parameters()
                 if type(activation) == str:
                     if activation.lower() == "ReLU".lower():
                         self.__activation = torch.nn.functional.relu
                     elif activation.lower() == "elu":
                         self.__activation = torch.nn.functional.elu
-                    elif (
-                            hasattr(torch.nn.functional, activation) and
-                            callable(getattr(torch.nn.functional, activation))
+                    elif hasattr(torch.nn.functional, activation) and callable(
+                        getattr(torch.nn.functional, activation)
                     ):
                         self.__activation = getattr(torch.nn.functional, activation)
                     else:
@@ -40,30 +45,42 @@ class _GraphSAINTAggregationLayers:
                 if type(batch_norm) != bool:
                     raise TypeError
                 else:
-                    self.__optional_batch_normalization: _typing.Optional[torch.nn.BatchNorm1d] = (
+                    self.__optional_batch_normalization: _typing.Optional[
+                        torch.nn.BatchNorm1d
+                    ] = (
                         torch.nn.BatchNorm1d(output_dimension, 1e-8)
-                        if batch_norm else None
+                        if batch_norm
+                        else None
                     )
 
             def forward(
-                    self, x: _typing.Union[torch.Tensor, _typing.Tuple[torch.Tensor, torch.Tensor]],
-                    _edge_index: torch.Tensor, _edge_weight: _typing.Optional[torch.Tensor] = None,
-                    _size: _typing.Optional[_typing.Tuple[int, int]] = None
+                self,
+                x: _typing.Union[
+                    torch.Tensor, _typing.Tuple[torch.Tensor, torch.Tensor]
+                ],
+                _edge_index: torch.Tensor,
+                _edge_weight: _typing.Optional[torch.Tensor] = None,
+                _size: _typing.Optional[_typing.Tuple[int, int]] = None,
             ) -> torch.Tensor:
                 __output: torch.Tensor = self.__linear_transform(x)
                 if self.__activation is not None and callable(self.__activation):
                     __output: torch.Tensor = self.__activation(__output)
-                if (
-                        self.__optional_batch_normalization is not None and
-                        isinstance(self.__optional_batch_normalization, torch.nn.BatchNorm1d)
+                if self.__optional_batch_normalization is not None and isinstance(
+                    self.__optional_batch_normalization, torch.nn.BatchNorm1d
                 ):
-                    __output: torch.Tensor = self.__optional_batch_normalization(__output)
+                    __output: torch.Tensor = self.__optional_batch_normalization(
+                        __output
+                    )
                 return __output
 
         class Order1Aggregator(MessagePassing):
             def __init__(
-                    self, input_dimension: int, output_dimension: int, bias: bool = True,
-                    activation: _typing.Optional[str] = "ReLU", batch_norm: bool = True
+                self,
+                input_dimension: int,
+                output_dimension: int,
+                bias: bool = True,
+                activation: _typing.Optional[str] = "ReLU",
+                batch_norm: bool = True,
             ):
                 super().__init__(aggr="add")
                 if not type(input_dimension) == type(output_dimension) == int:
@@ -72,16 +89,17 @@ class _GraphSAINTAggregationLayers:
                     raise ValueError
                 if not type(bias) == bool:
                     raise TypeError
-                self.__linear_transform = torch.nn.Linear(input_dimension, output_dimension, bias)
+                self.__linear_transform = torch.nn.Linear(
+                    input_dimension, output_dimension, bias
+                )
                 self.__linear_transform.reset_parameters()
                 if type(activation) == str:
                     if activation.lower() == "ReLU".lower():
                         self.__activation = torch.nn.functional.relu
                     elif activation.lower() == "elu":
                         self.__activation = torch.nn.functional.elu
-                    elif (
-                            hasattr(torch.nn.functional, activation) and
-                            callable(getattr(torch.nn.functional, activation))
+                    elif hasattr(torch.nn.functional, activation) and callable(
+                        getattr(torch.nn.functional, activation)
                     ):
                         self.__activation = getattr(torch.nn.functional, activation)
                     else:
@@ -91,15 +109,22 @@ class _GraphSAINTAggregationLayers:
                 if type(batch_norm) != bool:
                     raise TypeError
                 else:
-                    self.__optional_batch_normalization: _typing.Optional[torch.nn.BatchNorm1d] = (
+                    self.__optional_batch_normalization: _typing.Optional[
+                        torch.nn.BatchNorm1d
+                    ] = (
                         torch.nn.BatchNorm1d(output_dimension, 1e-8)
-                        if batch_norm else None
+                        if batch_norm
+                        else None
                     )
 
             def forward(
-                    self, x: _typing.Union[torch.Tensor, _typing.Tuple[torch.Tensor, torch.Tensor]],
-                    _edge_index: torch.Tensor, _edge_weight: _typing.Optional[torch.Tensor] = None,
-                    _size: _typing.Optional[_typing.Tuple[int, int]] = None
+                self,
+                x: _typing.Union[
+                    torch.Tensor, _typing.Tuple[torch.Tensor, torch.Tensor]
+                ],
+                _edge_index: torch.Tensor,
+                _edge_weight: _typing.Optional[torch.Tensor] = None,
+                _size: _typing.Optional[_typing.Tuple[int, int]] = None,
             ) -> torch.Tensor:
 
                 if type(x) == torch.Tensor:
@@ -111,19 +136,25 @@ class _GraphSAINTAggregationLayers:
                 __output: torch.Tensor = self.__linear_transform(__output)
                 if self.__activation is not None and callable(self.__activation):
                     __output: torch.Tensor = self.__activation(__output)
-                if (
-                        self.__optional_batch_normalization is not None and
-                        isinstance(self.__optional_batch_normalization, torch.nn.BatchNorm1d)
+                if self.__optional_batch_normalization is not None and isinstance(
+                    self.__optional_batch_normalization, torch.nn.BatchNorm1d
                 ):
-                    __output: torch.Tensor = self.__optional_batch_normalization(__output)
+                    __output: torch.Tensor = self.__optional_batch_normalization(
+                        __output
+                    )
                 return __output
 
-            def message(self, x_j: torch.Tensor, edge_weight: _typing.Optional[torch.Tensor]) -> torch.Tensor:
+            def message(
+                self, x_j: torch.Tensor, edge_weight: _typing.Optional[torch.Tensor]
+            ) -> torch.Tensor:
                 return x_j if edge_weight is None else edge_weight.view(-1, 1) * x_j
 
             def message_and_aggregate(
-                    self, adj_t: SparseTensor,
-                    x: _typing.Union[torch.Tensor, _typing.Tuple[torch.Tensor, torch.Tensor]]
+                self,
+                adj_t: SparseTensor,
+                x: _typing.Union[
+                    torch.Tensor, _typing.Tuple[torch.Tensor, torch.Tensor]
+                ],
             ) -> torch.Tensor:
                 return matmul(adj_t, x[0], reduce=self.aggr)
 
@@ -132,14 +163,19 @@ class _GraphSAINTAggregationLayers:
             return (self._order + 1) * self._each_order_output_dimension
 
         def __init__(
-                self, _input_dimension: int, _each_order_output_dimension: int, _order: int,
-                bias: bool = True, activation: _typing.Optional[str] = "ReLU",
-                batch_norm: bool = True, _dropout: _typing.Optional[float] = ...
+            self,
+            _input_dimension: int,
+            _each_order_output_dimension: int,
+            _order: int,
+            bias: bool = True,
+            activation: _typing.Optional[str] = "ReLU",
+            batch_norm: bool = True,
+            _dropout: _typing.Optional[float] = ...,
         ):
             super().__init__()
             if not (
-                    type(_input_dimension) == type(_order) == int and
-                    type(_each_order_output_dimension) == int
+                type(_input_dimension) == type(_order) == int
+                and type(_each_order_output_dimension) == int
             ):
                 raise TypeError
             if _input_dimension <= 0 or _each_order_output_dimension <= 0:
@@ -152,13 +188,19 @@ class _GraphSAINTAggregationLayers:
             if type(bias) != bool:
                 raise TypeError
             self.__order0_transform = self.Order0Aggregator(
-                self._input_dimension, self._each_order_output_dimension, bias,
-                activation, batch_norm
+                self._input_dimension,
+                self._each_order_output_dimension,
+                bias,
+                activation,
+                batch_norm,
             )
             if _order == 1:
                 self.__order1_transform = self.Order1Aggregator(
-                    self._input_dimension, self._each_order_output_dimension, bias,
-                    activation, batch_norm
+                    self._input_dimension,
+                    self._each_order_output_dimension,
+                    bias,
+                    activation,
+                    batch_norm,
                 )
             else:
                 self.__order1_transform = None
@@ -167,33 +209,35 @@ class _GraphSAINTAggregationLayers:
                     _dropout = 0
                 if _dropout > 1:
                     _dropout = 1
-                self.__optional_dropout: _typing.Optional[torch.nn.Dropout] = (
-                    torch.nn.Dropout(_dropout)
-                )
+                self.__optional_dropout: _typing.Optional[
+                    torch.nn.Dropout
+                ] = torch.nn.Dropout(_dropout)
             else:
                 self.__optional_dropout: _typing.Optional[torch.nn.Dropout] = None
 
         def _forward(
-                self, x: _typing.Union[torch.Tensor, _typing.Tuple[torch.Tensor, torch.Tensor]],
-                edge_index: torch.Tensor, edge_weight: _typing.Optional[torch.Tensor] = None,
-                size: _typing.Optional[_typing.Tuple[int, int]] = None
+            self,
+            x: _typing.Union[torch.Tensor, _typing.Tuple[torch.Tensor, torch.Tensor]],
+            edge_index: torch.Tensor,
+            edge_weight: _typing.Optional[torch.Tensor] = None,
+            size: _typing.Optional[_typing.Tuple[int, int]] = None,
         ) -> torch.Tensor:
-            if (
-                    self.__order1_transform is not None and
-                    isinstance(self.__order1_transform, self.Order1Aggregator)
+            if self.__order1_transform is not None and isinstance(
+                self.__order1_transform, self.Order1Aggregator
             ):
                 __output: torch.Tensor = torch.cat(
                     [
                         self.__order0_transform(x, edge_index, edge_weight, size),
-                        self.__order1_transform(x, edge_index, edge_weight, size)
+                        self.__order1_transform(x, edge_index, edge_weight, size),
                     ],
-                    dim=1
+                    dim=1,
                 )
             else:
-                __output: torch.Tensor = self.__order0_transform(x, edge_index, edge_weight, size)
-            if (
-                    self.__optional_dropout is not None and
-                    isinstance(self.__optional_dropout, torch.nn.Dropout)
+                __output: torch.Tensor = self.__order0_transform(
+                    x, edge_index, edge_weight, size
+                )
+            if self.__optional_dropout is not None and isinstance(
+                self.__optional_dropout, torch.nn.Dropout
             ):
                 __output: torch.Tensor = self.__optional_dropout(__output)
             return __output
@@ -205,7 +249,9 @@ class _GraphSAINTAggregationLayers:
             edge_index: torch.LongTensor = getattr(data, "edge_index")
             if type(edge_index) != torch.Tensor:
                 raise TypeError
-            edge_weight: _typing.Optional[torch.Tensor] = getattr(data, "edge_weight", None)
+            edge_weight: _typing.Optional[torch.Tensor] = getattr(
+                data, "edge_weight", None
+            )
             if edge_weight is not None and type(edge_weight) != torch.Tensor:
                 raise TypeError
             return self._forward(x, edge_index, edge_weight)
@@ -219,8 +265,8 @@ class _GraphSAINTAggregationLayers:
             if type(tenser_or_data) == torch.Tensor:
                 return self.__dropout_module(tenser_or_data)
             elif (
-                    hasattr(tenser_or_data, "x") and
-                    type(getattr(tenser_or_data, "x")) == torch.Tensor
+                hasattr(tenser_or_data, "x")
+                and type(getattr(tenser_or_data, "x")) == torch.Tensor
             ):
                 return self.__dropout_module(getattr(tenser_or_data, "x"))
             else:
@@ -229,14 +275,17 @@ class _GraphSAINTAggregationLayers:
 
 class GraphSAINTMultiOrderAggregationModel(ClassificationSupportedSequentialModel):
     def __init__(
-            self, num_features: int, num_classes: int,
-            _output_dimension_for_each_order: int,
-            _layers_order_list: _typing.Sequence[int],
-            _pre_dropout: float,
-            _layers_dropout: _typing.Union[float, _typing.Sequence[float]],
-            activation: _typing.Optional[str] = "ReLU",
-            bias: bool = True, batch_norm: bool = True,
-            normalize: bool = True
+        self,
+        num_features: int,
+        num_classes: int,
+        _output_dimension_for_each_order: int,
+        _layers_order_list: _typing.Sequence[int],
+        _pre_dropout: float,
+        _layers_dropout: _typing.Union[float, _typing.Sequence[float]],
+        activation: _typing.Optional[str] = "ReLU",
+        bias: bool = True,
+        batch_norm: bool = True,
+        normalize: bool = True,
     ):
         super(GraphSAINTMultiOrderAggregationModel, self).__init__()
         if type(_output_dimension_for_each_order) != int:
@@ -269,10 +318,17 @@ class GraphSAINTMultiOrderAggregationModel(ClassificationSupportedSequentialMode
                 _pre_dropout = 1
         self.__sequential_encoding_layers: torch.nn.ModuleList = torch.nn.ModuleList(
             (
-                _GraphSAINTAggregationLayers.WrappedDropout(torch.nn.Dropout(_pre_dropout)),
+                _GraphSAINTAggregationLayers.WrappedDropout(
+                    torch.nn.Dropout(_pre_dropout)
+                ),
                 _GraphSAINTAggregationLayers.MultiOrderAggregationLayer(
-                    num_features, _output_dimension_for_each_order, _layers_order_list[0], bias,
-                    activation, batch_norm, _layers_dropout[0]
+                    num_features,
+                    _output_dimension_for_each_order,
+                    _layers_order_list[0],
+                    bias,
+                    activation,
+                    batch_norm,
+                    _layers_dropout[0],
                 ),
             )
         )
@@ -280,14 +336,19 @@ class GraphSAINTMultiOrderAggregationModel(ClassificationSupportedSequentialMode
             self.__sequential_encoding_layers.append(
                 _GraphSAINTAggregationLayers.MultiOrderAggregationLayer(
                     self.__sequential_encoding_layers[-1].integral_output_dimension,
-                    _output_dimension_for_each_order, _layers_order_list[_layer_index], bias,
-                    activation, batch_norm, _layers_dropout[_layer_index]
-
+                    _output_dimension_for_each_order,
+                    _layers_order_list[_layer_index],
+                    bias,
+                    activation,
+                    batch_norm,
+                    _layers_dropout[_layer_index],
                 )
             )
         self.__apply_normalize: bool = normalize
         self.__linear_transform: torch.nn.Linear = torch.nn.Linear(
-            self.__sequential_encoding_layers[-1].integral_output_dimension, num_classes, bias
+            self.__sequential_encoding_layers[-1].integral_output_dimension,
+            num_classes,
+            bias,
         )
         self.__linear_transform.reset_parameters()
 
@@ -302,8 +363,8 @@ class GraphSAINTMultiOrderAggregationModel(ClassificationSupportedSequentialMode
         if type(getattr(data, "edge_index")) != torch.Tensor:
             raise TypeError
         if (
-                getattr(data, "edge_weight", None) is not None and
-                type(getattr(data, "edge_weight")) != torch.Tensor
+            getattr(data, "edge_weight", None) is not None
+            and type(getattr(data, "edge_weight")) != torch.Tensor
         ):
             raise TypeError
         for encoding_layer in self.__sequential_encoding_layers:
@@ -318,12 +379,12 @@ class GraphSAINTMultiOrderAggregationModel(ClassificationSupportedSequentialMode
 @register_model("GraphSAINTAggregationModel")
 class GraphSAINTAggregationModel(ClassificationModel):
     def __init__(
-            self,
-            num_features: int = ...,
-            num_classes: int = ...,
-            device: _typing.Union[str, torch.device] = ...,
-            init: bool = False,
-            **kwargs
+        self,
+        num_features: int = ...,
+        num_classes: int = ...,
+        device: _typing.Union[str, torch.device] = ...,
+        init: bool = False,
+        **kwargs
     ):
         super(GraphSAINTAggregationModel, self).__init__(
             num_features, num_classes, device=device, init=init, **kwargs
@@ -333,7 +394,8 @@ class GraphSAINTAggregationModel(ClassificationModel):
     def _initialize(self):
         """ Initialize model """
         self.model = GraphSAINTMultiOrderAggregationModel(
-            self.num_features, self.num_classes,
+            self.num_features,
+            self.num_classes,
             self.hyper_parameter.get("output_dimension_for_each_order"),
             self.hyper_parameter.get("layers_order_list"),
             self.hyper_parameter.get("pre_dropout"),
@@ -341,5 +403,5 @@ class GraphSAINTAggregationModel(ClassificationModel):
             self.hyper_parameter.get("activation", "ReLU"),
             bool(self.hyper_parameter.get("bias", True)),
             bool(self.hyper_parameter.get("batch_norm", True)),
-            bool(self.hyper_parameter.get("normalize", True))
+            bool(self.hyper_parameter.get("normalize", True)),
         ).to(self.device)
diff --git a/autogl/module/model/graphsage.py b/autogl/module/model/graphsage.py
index 727b9d0..3b63baf 100644
--- a/autogl/module/model/graphsage.py
+++ b/autogl/module/model/graphsage.py
@@ -14,33 +14,36 @@ LOGGER = get_logger("SAGEModel")
 class GraphSAGE(ClassificationSupportedSequentialModel):
     class _SAGELayer(torch.nn.Module):
         def __init__(
-                self, input_channels: int, output_channels: int, aggr: str,
-                activation_name: _typing.Optional[str] = ...,
-                dropout_probability: _typing.Optional[float] = ...
+            self,
+            input_channels: int,
+            output_channels: int,
+            aggr: str,
+            activation_name: _typing.Optional[str] = ...,
+            dropout_probability: _typing.Optional[float] = ...,
         ):
             super().__init__()
             self._convolution: SAGEConv = SAGEConv(
                 input_channels, output_channels, aggr=aggr
             )
             if (
-                    activation_name is not Ellipsis and
-                    activation_name is not None and
-                    type(activation_name) == str
+                activation_name is not Ellipsis
+                and activation_name is not None
+                and type(activation_name) == str
             ):
                 self._activation_name: _typing.Optional[str] = activation_name
             else:
                 self._activation_name: _typing.Optional[str] = None
             if (
-                    dropout_probability is not Ellipsis and
-                    dropout_probability is not None and
-                    type(dropout_probability) == float
+                dropout_probability is not Ellipsis
+                and dropout_probability is not None
+                and type(dropout_probability) == float
             ):
                 if dropout_probability < 0:
                     dropout_probability = 0
                 if dropout_probability > 1:
                     dropout_probability = 1
-                self._dropout: _typing.Optional[torch.nn.Dropout] = (
-                    torch.nn.Dropout(dropout_probability)
+                self._dropout: _typing.Optional[torch.nn.Dropout] = torch.nn.Dropout(
+                    dropout_probability
                 )
             else:
                 self._dropout: _typing.Optional[torch.nn.Dropout] = None
@@ -59,13 +62,15 @@ class GraphSAGE(ClassificationSupportedSequentialModel):
             return x
 
     def __init__(
-            self, num_features: int, num_classes: int,
-            hidden_features: _typing.Sequence[int],
-            activation_name: str,
-            layers_dropout: _typing.Union[
-                _typing.Optional[float], _typing.Sequence[_typing.Optional[float]]
-            ] = None,
-            aggr: str = "mean"
+        self,
+        num_features: int,
+        num_classes: int,
+        hidden_features: _typing.Sequence[int],
+        activation_name: str,
+        layers_dropout: _typing.Union[
+            _typing.Optional[float], _typing.Sequence[_typing.Optional[float]]
+        ] = None,
+        aggr: str = "mean",
     ):
         super().__init__()
         if not type(num_features) == type(num_classes) == int:
@@ -85,9 +90,9 @@ class GraphSAGE(ClassificationSupportedSequentialModel):
                     raise TypeError
             _layers_dropout: _typing.Sequence[_typing.Optional[float]] = layers_dropout
         elif layers_dropout is None or type(layers_dropout) == float:
-            _layers_dropout: _typing.Sequence[_typing.Optional[float]] = (
-                [layers_dropout for _ in range(len(hidden_features))] + [None]
-            )
+            _layers_dropout: _typing.Sequence[_typing.Optional[float]] = [
+                layers_dropout for _ in range(len(hidden_features))
+            ] + [None]
         else:
             raise TypeError
         if not type(activation_name) == type(aggr) == str:
@@ -96,32 +101,51 @@ class GraphSAGE(ClassificationSupportedSequentialModel):
             aggr = "mean"
 
         if len(hidden_features) == 0:
-            self.__sequential_encoding_layers: torch.nn.ModuleList = torch.nn.ModuleList([
-                self._SAGELayer(
-                    num_features, num_classes,
-                    aggr, activation_name, _layers_dropout[0]
+            self.__sequential_encoding_layers: torch.nn.ModuleList = (
+                torch.nn.ModuleList(
+                    [
+                        self._SAGELayer(
+                            num_features,
+                            num_classes,
+                            aggr,
+                            activation_name,
+                            _layers_dropout[0],
+                        )
+                    ]
                 )
-            ])
+            )
         else:
-            self.__sequential_encoding_layers: torch.nn.ModuleList = torch.nn.ModuleList([
-                self._SAGELayer(
-                    num_features, hidden_features[0],
-                    aggr, activation_name, _layers_dropout[0]
+            self.__sequential_encoding_layers: torch.nn.ModuleList = (
+                torch.nn.ModuleList(
+                    [
+                        self._SAGELayer(
+                            num_features,
+                            hidden_features[0],
+                            aggr,
+                            activation_name,
+                            _layers_dropout[0],
+                        )
+                    ]
                 )
-            ])
+            )
             for i in range(len(hidden_features)):
                 if i + 1 < len(hidden_features):
                     self.__sequential_encoding_layers.append(
                         self._SAGELayer(
-                            hidden_features[i], hidden_features[i + 1], aggr,
-                            activation_name, _layers_dropout[i + 1]
+                            hidden_features[i],
+                            hidden_features[i + 1],
+                            aggr,
+                            activation_name,
+                            _layers_dropout[i + 1],
                         )
                     )
                 else:
                     self.__sequential_encoding_layers.append(
                         self._SAGELayer(
-                            hidden_features[i], num_classes, aggr,
-                            _layers_dropout[i + 1]
+                            hidden_features[i],
+                            num_classes,
+                            aggr,
+                            _layers_dropout[i + 1],
                         )
                     )
 
@@ -131,9 +155,10 @@ class GraphSAGE(ClassificationSupportedSequentialModel):
 
     def cls_encode(self, data) -> torch.Tensor:
         if (
-                hasattr(data, "edge_indexes") and
-                isinstance(getattr(data, "edge_indexes"), _typing.Sequence) and
-                len(getattr(data, "edge_indexes")) == len(self.__sequential_encoding_layers)
+            hasattr(data, "edge_indexes")
+            and isinstance(getattr(data, "edge_indexes"), _typing.Sequence)
+            and len(getattr(data, "edge_indexes"))
+            == len(self.__sequential_encoding_layers)
         ):
             for __edge_index in getattr(data, "edge_indexes"):
                 if type(__edge_index) != torch.Tensor:
@@ -272,9 +297,10 @@ class AutoSAGE(BaseModel):
             return
         self.initialized = True
         self.model = GraphSAGE(
-            self.num_features, self.num_classes,
+            self.num_features,
+            self.num_classes,
             self.hyperparams.get("hidden"),
             self.hyperparams.get("act", "relu"),
             self.hyperparams.get("dropout", None),
-            self.hyperparams.get("agg", "mean")
+            self.hyperparams.get("agg", "mean"),
         ).to(self.device)
diff --git a/autogl/module/nas/__init__.py b/autogl/module/nas/__init__.py
index be2b683..dab357f 100644
--- a/autogl/module/nas/__init__.py
+++ b/autogl/module/nas/__init__.py
@@ -1,8 +1,4 @@
-from . import (
-    algorithm,
-    estimator,
-    space
-)
+from . import algorithm, estimator, space
 
 from .algorithm import NAS_ALGO_DICT
 from .estimator import NAS_ESTIMATOR_DICT
diff --git a/autogl/module/nas/algorithm/__init__.py b/autogl/module/nas/algorithm/__init__.py
index eacc45b..5056d17 100644
--- a/autogl/module/nas/algorithm/__init__.py
+++ b/autogl/module/nas/algorithm/__init__.py
@@ -8,10 +8,13 @@ from .base import BaseNAS
 
 NAS_ALGO_DICT = {}
 
+
 def register_nas_algo(name):
     def register_nas_algo_cls(cls):
         if name in NAS_ALGO_DICT:
-            raise ValueError("Cannot register duplicate NAS algorithm ({})".format(name))
+            raise ValueError(
+                "Cannot register duplicate NAS algorithm ({})".format(name)
+            )
         if not issubclass(cls, BaseNAS):
             raise ValueError(
                 "Model ({}: {}) must extend NAS algorithm".format(name, cls.__name__)
@@ -21,11 +24,13 @@ def register_nas_algo(name):
 
     return register_nas_algo_cls
 
+
 from .darts import Darts
 from .enas import Enas
 from .random_search import RandomSearch
 from .rl import RL, GraphNasRL
 
+
 def build_nas_algo_from_name(name: str) -> BaseNAS:
     """
     Parameters
@@ -46,4 +51,5 @@ def build_nas_algo_from_name(name: str) -> BaseNAS:
     assert name in NAS_ALGO_DICT, "HPO module do not have name " + name
     return NAS_ALGO_DICT[name]()
 
+
 __all__ = ["BaseNAS", "Darts", "Enas", "RandomSearch", "RL", "GraphNasRL"]
diff --git a/autogl/module/nas/algorithm/darts.py b/autogl/module/nas/algorithm/darts.py
index 924ab7f..358b1a8 100644
--- a/autogl/module/nas/algorithm/darts.py
+++ b/autogl/module/nas/algorithm/darts.py
@@ -17,6 +17,7 @@ from nni.retiarii.oneshot.pytorch.darts import DartsLayerChoice, DartsInputChoic
 
 _logger = logging.getLogger(__name__)
 
+
 @register_nas_algo("darts")
 class Darts(BaseNAS):
     """
@@ -42,7 +43,17 @@ class Darts(BaseNAS):
         The device of the whole process
     """
 
-    def __init__(self, num_epochs=5, workers = 4, gradient_clip = 5.0, model_lr = 1e-3, model_wd = 5e-4, arch_lr = 3e-4, arch_wd = 1e-3, device="cuda"):
+    def __init__(
+        self,
+        num_epochs=5,
+        workers=4,
+        gradient_clip=5.0,
+        model_lr=1e-3,
+        model_wd=5e-4,
+        arch_lr=3e-4,
+        arch_wd=1e-3,
+        device="cuda",
+    ):
         super().__init__(device=device)
         self.num_epochs = num_epochs
         self.workers = workers
diff --git a/autogl/module/nas/algorithm/enas.py b/autogl/module/nas/algorithm/enas.py
index 7e09dfb..5259e07 100644
--- a/autogl/module/nas/algorithm/enas.py
+++ b/autogl/module/nas/algorithm/enas.py
@@ -8,13 +8,25 @@ import torch.nn.functional as F
 from . import register_nas_algo
 from .base import BaseNAS
 from ..space import BaseSpace
-from ..utils import AverageMeterGroup, replace_layer_choice, replace_input_choice, get_module_order, sort_replaced_module
+from ..utils import (
+    AverageMeterGroup,
+    replace_layer_choice,
+    replace_input_choice,
+    get_module_order,
+    sort_replaced_module,
+)
 from tqdm import tqdm, trange
-from .rl import PathSamplingLayerChoice,PathSamplingInputChoice,ReinforceField,ReinforceController
+from .rl import (
+    PathSamplingLayerChoice,
+    PathSamplingInputChoice,
+    ReinforceField,
+    ReinforceController,
+)
 from ....utils import get_logger
 
 LOGGER = get_logger("ENAS")
 
+
 @register_nas_algo("enas")
 class Enas(BaseNAS):
     """
@@ -52,29 +64,44 @@ class Enas(BaseNAS):
         The device of the whole process, e.g. "cuda", torch.device("cpu")
     """
 
-    def __init__(self, num_epochs = 5, n_warmup = 100, log_frequency=None, grad_clip=5., entropy_weight=0.0001, skip_weight=0.8, baseline_decay=0.999,
-                 ctrl_lr=0.00035, ctrl_steps_aggregate=20, ctrl_kwargs=None,model_lr=5e-3,model_wd=5e-4, disable_progress = True, device="cuda"):
+    def __init__(
+        self,
+        num_epochs=5,
+        n_warmup=100,
+        log_frequency=None,
+        grad_clip=5.0,
+        entropy_weight=0.0001,
+        skip_weight=0.8,
+        baseline_decay=0.999,
+        ctrl_lr=0.00035,
+        ctrl_steps_aggregate=20,
+        ctrl_kwargs=None,
+        model_lr=5e-3,
+        model_wd=5e-4,
+        disable_progress=True,
+        device="cuda",
+    ):
         super().__init__(device)
-        self.device=device
+        self.device = device
         self.num_epochs = num_epochs
         self.log_frequency = log_frequency
         self.entropy_weight = entropy_weight
         self.skip_weight = skip_weight
         self.baseline_decay = baseline_decay
-        self.baseline = 0.
+        self.baseline = 0.0
         self.ctrl_steps_aggregate = ctrl_steps_aggregate
         self.grad_clip = grad_clip
-        self.ctrl_kwargs=ctrl_kwargs
-        self.ctrl_lr=ctrl_lr
-        self.n_warmup=n_warmup
+        self.ctrl_kwargs = ctrl_kwargs
+        self.ctrl_lr = ctrl_lr
+        self.n_warmup = n_warmup
         self.model_lr = model_lr
         self.model_wd = model_wd
         self.disable_progress = disable_progress
 
     def search(self, space: BaseSpace, dset, estimator):
         self.model = space
-        self.dataset = dset#.to(self.device)
-        self.estimator = estimator    
+        self.dataset = dset  # .to(self.device)
+        self.estimator = estimator
         # replace choice
         self.nas_modules = []
 
@@ -89,80 +116,98 @@ class Enas(BaseNAS):
             self.model.parameters(), lr=self.model_lr, weight_decay=self.model_wd
         )
         # fields
-        self.nas_fields = [ReinforceField(name, len(module),
-                                          isinstance(module, PathSamplingLayerChoice) or module.n_chosen == 1)
-                           for name, module in self.nas_modules]
-        self.controller = ReinforceController(self.nas_fields, **(self.ctrl_kwargs or {}))
-        self.ctrl_optim = torch.optim.Adam(self.controller.parameters(), lr=self.ctrl_lr)
+        self.nas_fields = [
+            ReinforceField(
+                name,
+                len(module),
+                isinstance(module, PathSamplingLayerChoice) or module.n_chosen == 1,
+            )
+            for name, module in self.nas_modules
+        ]
+        self.controller = ReinforceController(
+            self.nas_fields, **(self.ctrl_kwargs or {})
+        )
+        self.ctrl_optim = torch.optim.Adam(
+            self.controller.parameters(), lr=self.ctrl_lr
+        )
 
         # warm up supernet
         with tqdm(range(self.n_warmup), disable=self.disable_progress) as bar:
             for i in bar:
-                acc,l1=self._train_model(i)
+                acc, l1 = self._train_model(i)
                 with torch.no_grad():
-                    val_acc,val_loss=self._infer('val')
-                bar.set_postfix(loss=l1,acc=acc,val_acc=val_acc,val_loss=val_loss)
+                    val_acc, val_loss = self._infer("val")
+                bar.set_postfix(loss=l1, acc=acc, val_acc=val_acc, val_loss=val_loss)
 
         # train
         with tqdm(range(self.num_epochs), disable=self.disable_progress) as bar:
             for i in bar:
                 try:
-                    l1=self._train_model(i)
-                    l2=self._train_controller(i)
+                    l1 = self._train_model(i)
+                    l2 = self._train_controller(i)
                 except Exception as e:
                     print(e)
-                    nm=self.nas_modules
+                    nm = self.nas_modules
                     for i in range(len(nm)):
                         print(nm[i][1].sampled)
-                bar.set_postfix(loss_model=l1,reward_controller=l2)
-            
-        selection=self.export()
-        #print(selection)
-        return space.parse_model(selection,self.device)
-    
-    def _train_model(self, epoch): 
+                bar.set_postfix(loss_model=l1, reward_controller=l2)
+
+        selection = self.export()
+        # print(selection)
+        return space.parse_model(selection, self.device)
+
+    def _train_model(self, epoch):
         self.model.train()
         self.controller.eval()
         self.model_optim.zero_grad()
         self._resample()
-        metric,loss=self._infer()
+        metric, loss = self._infer()
         loss.backward()
         if self.grad_clip > 0:
             nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_clip)
         self.model_optim.step()
 
-        return metric,loss.item()
+        return metric, loss.item()
 
     def _train_controller(self, epoch):
         self.model.eval()
         self.controller.train()
         self.ctrl_optim.zero_grad()
-        rewards=[]
+        rewards = []
         for ctrl_step in range(self.ctrl_steps_aggregate):
             self._resample()
             with torch.no_grad():
-                metric,loss=self._infer(mask='val')
-            reward =metric 
+                metric, loss = self._infer(mask="val")
+            reward = metric
             rewards.append(reward)
             if self.entropy_weight:
                 reward += self.entropy_weight * self.controller.sample_entropy.item()
-            self.baseline = self.baseline * self.baseline_decay + reward * (1 - self.baseline_decay)
+            self.baseline = self.baseline * self.baseline_decay + reward * (
+                1 - self.baseline_decay
+            )
             loss = self.controller.sample_log_prob * (reward - self.baseline)
             if self.skip_weight:
                 loss += self.skip_weight * self.controller.sample_skip_penalty
             loss /= self.ctrl_steps_aggregate
             loss.backward()
-        
+
             if (ctrl_step + 1) % self.ctrl_steps_aggregate == 0:
                 if self.grad_clip > 0:
-                    nn.utils.clip_grad_norm_(self.controller.parameters(), self.grad_clip)
+                    nn.utils.clip_grad_norm_(
+                        self.controller.parameters(), self.grad_clip
+                    )
                 self.ctrl_optim.step()
                 self.ctrl_optim.zero_grad()
 
             if self.log_frequency is not None and ctrl_step % self.log_frequency == 0:
-                LOGGER.info('RL Epoch [%d/%d] Step [%d/%d]  %s', epoch + 1, self.num_epochs,
-                                ctrl_step + 1, self.ctrl_steps_aggregate)
-        return sum(rewards)/len(rewards)
+                LOGGER.info(
+                    "RL Epoch [%d/%d] Step [%d/%d]  %s",
+                    epoch + 1,
+                    self.num_epochs,
+                    ctrl_step + 1,
+                    self.ctrl_steps_aggregate,
+                )
+        return sum(rewards) / len(rewards)
 
     def _resample(self):
         result = self.controller.resample()
@@ -174,6 +219,6 @@ class Enas(BaseNAS):
         with torch.no_grad():
             return self.controller.resample()
 
-    def _infer(self,mask='train'):
-        metric, loss = self.estimator.infer(self.model, self.dataset,mask=mask)
+    def _infer(self, mask="train"):
+        metric, loss = self.estimator.infer(self.model, self.dataset, mask=mask)
         return metric[0], loss
diff --git a/autogl/module/nas/algorithm/random_search.py b/autogl/module/nas/algorithm/random_search.py
index b3286ac..7d6bc90 100644
--- a/autogl/module/nas/algorithm/random_search.py
+++ b/autogl/module/nas/algorithm/random_search.py
@@ -5,17 +5,24 @@ import torch.nn.functional as F
 from . import register_nas_algo
 from .base import BaseNAS
 from ..space import BaseSpace
-from ..utils import AverageMeterGroup, replace_layer_choice, replace_input_choice, get_module_order, sort_replaced_module
+from ..utils import (
+    AverageMeterGroup,
+    replace_layer_choice,
+    replace_input_choice,
+    get_module_order,
+    sort_replaced_module,
+)
 from tqdm import tqdm
-from .rl import PathSamplingLayerChoice,PathSamplingInputChoice
+from .rl import PathSamplingLayerChoice, PathSamplingInputChoice
 import numpy as np
 from ....utils import get_logger
 
 LOGGER = get_logger("random_search_NAS")
 
+
 @register_nas_algo("random")
 class RandomSearch(BaseNAS):
-    '''
+    """
     Uniformly random architecture search
 
     Parameters
@@ -26,52 +33,53 @@ class RandomSearch(BaseNAS):
         Number of epochs planned for training.
     disable_progeress: boolean
         Control whether show the progress bar.
-    '''
-    def __init__(self, device='cuda', num_epochs=400, disable_progress=False):
+    """
+
+    def __init__(self, device="cuda", num_epochs=400, disable_progress=False):
         super().__init__(device)
-        self.num_epochs=num_epochs
-        self.disable_progress=disable_progress
+        self.num_epochs = num_epochs
+        self.disable_progress = disable_progress
 
     def search(self, space: BaseSpace, dset, estimator):
-        self.estimator=estimator
-        self.dataset=dset
-        self.space=space
-        
+        self.estimator = estimator
+        self.dataset = dset
+        self.space = space
+
         self.nas_modules = []
         k2o = get_module_order(self.space)
         replace_layer_choice(self.space, PathSamplingLayerChoice, self.nas_modules)
         replace_input_choice(self.space, PathSamplingInputChoice, self.nas_modules)
-        self.nas_modules = sort_replaced_module(k2o, self.nas_modules) 
-        selection_range={}
-        for k,v in self.nas_modules:
-            selection_range[k]=len(v)
-        self.selection_dict=selection_range
-        
-        #space_size=np.prod(list(selection_range.values()))
+        self.nas_modules = sort_replaced_module(k2o, self.nas_modules)
+        selection_range = {}
+        for k, v in self.nas_modules:
+            selection_range[k] = len(v)
+        self.selection_dict = selection_range
 
-        arch_perfs=[]
-        cache={}
-        with tqdm(range(self.num_epochs),disable=self.disable_progress) as bar:
+        # space_size=np.prod(list(selection_range.values()))
+
+        arch_perfs = []
+        cache = {}
+        with tqdm(range(self.num_epochs), disable=self.disable_progress) as bar:
             for i in bar:
-                selection=self.sample() 
-                vec=tuple(list(selection.values()))
+                selection = self.sample()
+                vec = tuple(list(selection.values()))
                 if vec not in cache:
-                    self.arch=space.parse_model(selection,self.device)
-                    metric,loss=self._infer(mask='val')
-                    arch_perfs.append([metric,selection])
-                    cache[vec]=metric
-                bar.set_postfix(acc=metric,max_acc=max(cache.values()))
-        selection=arch_perfs[np.argmax([x[0] for x in arch_perfs])][1]
-        arch=space.parse_model(selection,self.device)
-        return arch 
-    
+                    self.arch = space.parse_model(selection, self.device)
+                    metric, loss = self._infer(mask="val")
+                    arch_perfs.append([metric, selection])
+                    cache[vec] = metric
+                bar.set_postfix(acc=metric, max_acc=max(cache.values()))
+        selection = arch_perfs[np.argmax([x[0] for x in arch_perfs])][1]
+        arch = space.parse_model(selection, self.device)
+        return arch
+
     def sample(self):
         # uniformly sample
-        selection={}
-        for k,v in self.selection_dict.items():
-            selection[k]=np.random.choice(range(v))
+        selection = {}
+        for k, v in self.selection_dict.items():
+            selection[k] = np.random.choice(range(v))
         return selection
 
-    def _infer(self,mask='train'):
+    def _infer(self, mask="train"):
         metric, loss = self.estimator.infer(self.arch._model, self.dataset, mask=mask)
         return metric[0], loss
diff --git a/autogl/module/nas/algorithm/rl.py b/autogl/module/nas/algorithm/rl.py
index a8fdbcf..2f48ba4 100644
--- a/autogl/module/nas/algorithm/rl.py
+++ b/autogl/module/nas/algorithm/rl.py
@@ -6,7 +6,13 @@ import torch.nn.functional as F
 from . import register_nas_algo
 from .base import BaseNAS
 from ..space import BaseSpace
-from ..utils import AverageMeterGroup, replace_layer_choice, replace_input_choice, get_module_order, sort_replaced_module
+from ..utils import (
+    AverageMeterGroup,
+    replace_layer_choice,
+    replace_input_choice,
+    get_module_order,
+    sort_replaced_module,
+)
 from nni.nas.pytorch.fixed import apply_fixed_architecture
 from tqdm import tqdm
 from datetime import datetime
@@ -14,10 +20,16 @@ import numpy as np
 from ....utils import get_logger
 
 LOGGER = get_logger("random_search_NAS")
+
+
 def _get_mask(sampled, total):
-    multihot = [i == sampled or (isinstance(sampled, list) and i in sampled) for i in range(total)]
+    multihot = [
+        i == sampled or (isinstance(sampled, list) and i in sampled)
+        for i in range(total)
+    ]
     return torch.tensor(multihot, dtype=torch.bool)  # pylint: disable=not-callable
 
+
 class PathSamplingLayerChoice(nn.Module):
     """
     Mixed module, in which fprop is decided by exactly one or multiple (sampled) module.
@@ -37,15 +49,21 @@ class PathSamplingLayerChoice(nn.Module):
         for name, module in layer_choice.named_children():
             self.add_module(name, module)
             self.op_names.append(name)
-        assert self.op_names, 'There has to be at least one op to choose from.'
+        assert self.op_names, "There has to be at least one op to choose from."
         self.sampled = None  # sampled can be either a list of indices or an index
 
     def forward(self, *args, **kwargs):
-        assert self.sampled is not None, 'At least one path needs to be sampled before fprop.'
+        assert (
+            self.sampled is not None
+        ), "At least one path needs to be sampled before fprop."
         if isinstance(self.sampled, list):
-            return sum([getattr(self, self.op_names[i])(*args, **kwargs) for i in self.sampled])  # pylint: disable=not-an-iterable
+            return sum(
+                [getattr(self, self.op_names[i])(*args, **kwargs) for i in self.sampled]
+            )  # pylint: disable=not-an-iterable
         else:
-            return getattr(self, self.op_names[self.sampled])(*args, **kwargs)  # pylint: disable=invalid-sequence-index
+            return getattr(self, self.op_names[self.sampled])(
+                *args, **kwargs
+            )  # pylint: disable=invalid-sequence-index
 
     def __len__(self):
         return len(self.op_names)
@@ -75,7 +93,9 @@ class PathSamplingInputChoice(nn.Module):
 
     def forward(self, input_tensors):
         if isinstance(self.sampled, list):
-            return sum([input_tensors[t] for t in self.sampled])  # pylint: disable=not-an-iterable
+            return sum(
+                [input_tensors[t] for t in self.sampled]
+            )  # pylint: disable=not-an-iterable
         else:
             return input_tensors[self.sampled]
 
@@ -87,14 +107,16 @@ class PathSamplingInputChoice(nn.Module):
         return _get_mask(self.sampled, len(self))
 
     def __repr__(self):
-        return f'PathSamplingInputChoice(n_candidates={self.n_candidates}, chosen={self.sampled})'
+        return f"PathSamplingInputChoice(n_candidates={self.n_candidates}, chosen={self.sampled})"
+
 
 class StackedLSTMCell(nn.Module):
     def __init__(self, layers, size, bias):
         super().__init__()
         self.lstm_num_layers = layers
-        self.lstm_modules = nn.ModuleList([nn.LSTMCell(size, size, bias=bias)
-                                           for _ in range(self.lstm_num_layers)])
+        self.lstm_modules = nn.ModuleList(
+            [nn.LSTMCell(size, size, bias=bias) for _ in range(self.lstm_num_layers)]
+        )
 
     def forward(self, inputs, hidden):
         prev_h, prev_c = hidden
@@ -108,6 +130,7 @@ class StackedLSTMCell(nn.Module):
             inputs = curr_h[-1].view(1, -1)
         return next_h, next_c
 
+
 class ReinforceField:
     """
     A field with ``name``, with ``total`` choices. ``choose_one`` is true if one and only one is meant to be
@@ -120,7 +143,8 @@ class ReinforceField:
         self.choose_one = choose_one
 
     def __repr__(self):
-        return f'ReinforceField(name={self.name}, total={self.total}, choose_one={self.choose_one})'
+        return f"ReinforceField(name={self.name}, total={self.total}, choose_one={self.choose_one})"
+
 
 class ReinforceController(nn.Module):
     """
@@ -144,8 +168,16 @@ class ReinforceController(nn.Module):
         Can be one of ``sum`` and ``mean``. How the entropy of multi-input-choice is reduced.
     """
 
-    def __init__(self, fields, lstm_size=64, lstm_num_layers=1, tanh_constant=1.5,
-                 skip_target=0.4, temperature=None, entropy_reduction='sum'):
+    def __init__(
+        self,
+        fields,
+        lstm_size=64,
+        lstm_num_layers=1,
+        tanh_constant=1.5,
+        skip_target=0.4,
+        temperature=None,
+        entropy_reduction="sum",
+    ):
         super(ReinforceController, self).__init__()
         self.fields = fields
         self.lstm_size = lstm_size
@@ -159,17 +191,27 @@ class ReinforceController(nn.Module):
         self.attn_query = nn.Linear(self.lstm_size, self.lstm_size, bias=False)
         self.v_attn = nn.Linear(self.lstm_size, 1, bias=False)
         self.g_emb = nn.Parameter(torch.randn(1, self.lstm_size) * 0.1)
-        self.skip_targets = nn.Parameter(torch.tensor([1.0 - self.skip_target, self.skip_target]),  # pylint: disable=not-callable
-                                         requires_grad=False)
-        assert entropy_reduction in ['sum', 'mean'], 'Entropy reduction must be one of sum and mean.'
-        self.entropy_reduction = torch.sum if entropy_reduction == 'sum' else torch.mean
-        self.cross_entropy_loss = nn.CrossEntropyLoss(reduction='none')
-        self.soft = nn.ModuleDict({
-            field.name: nn.Linear(self.lstm_size, field.total, bias=False) for field in fields
-        })
-        self.embedding = nn.ModuleDict({
-            field.name: nn.Embedding(field.total, self.lstm_size) for field in fields
-        })
+        self.skip_targets = nn.Parameter(
+            torch.tensor(
+                [1.0 - self.skip_target, self.skip_target]
+            ),  # pylint: disable=not-callable
+            requires_grad=False,
+        )
+        assert entropy_reduction in [
+            "sum",
+            "mean",
+        ], "Entropy reduction must be one of sum and mean."
+        self.entropy_reduction = torch.sum if entropy_reduction == "sum" else torch.mean
+        self.cross_entropy_loss = nn.CrossEntropyLoss(reduction="none")
+        self.soft = nn.ModuleDict(
+            {
+                field.name: nn.Linear(self.lstm_size, field.total, bias=False)
+                for field in fields
+            }
+        )
+        self.embedding = nn.ModuleDict(
+            {field.name: nn.Embedding(field.total, self.lstm_size) for field in fields}
+        )
 
     def resample(self):
         self._initialize()
@@ -180,12 +222,22 @@ class ReinforceController(nn.Module):
 
     def _initialize(self):
         self._inputs = self.g_emb.data
-        self._c = [torch.zeros((1, self.lstm_size),
-                               dtype=self._inputs.dtype,
-                               device=self._inputs.device) for _ in range(self.lstm_num_layers)]
-        self._h = [torch.zeros((1, self.lstm_size),
-                               dtype=self._inputs.dtype,
-                               device=self._inputs.device) for _ in range(self.lstm_num_layers)]
+        self._c = [
+            torch.zeros(
+                (1, self.lstm_size),
+                dtype=self._inputs.dtype,
+                device=self._inputs.device,
+            )
+            for _ in range(self.lstm_num_layers)
+        ]
+        self._h = [
+            torch.zeros(
+                (1, self.lstm_size),
+                dtype=self._inputs.dtype,
+                device=self._inputs.device,
+            )
+            for _ in range(self.lstm_num_layers)
+        ]
         self.sample_log_prob = 0
         self.sample_entropy = 0
         self.sample_skip_penalty = 0
@@ -206,7 +258,9 @@ class ReinforceController(nn.Module):
             self._inputs = self.embedding[field.name](sampled)
         else:
             logit = logit.view(-1, 1)
-            logit = torch.cat([-logit, logit], 1)  # pylint: disable=invalid-unary-operand-type
+            logit = torch.cat(
+                [-logit, logit], 1
+            )  # pylint: disable=invalid-unary-operand-type
             sampled = torch.multinomial(F.softmax(logit, dim=-1), 1).view(-1)
             skip_prob = torch.sigmoid(logit)
             kl = torch.sum(skip_prob * torch.log(skip_prob / self.skip_targets))
@@ -214,18 +268,26 @@ class ReinforceController(nn.Module):
             log_prob = self.cross_entropy_loss(logit, sampled)
             sampled = sampled.nonzero().view(-1)
             if sampled.sum().item():
-                self._inputs = (torch.sum(self.embedding[field.name](sampled.view(-1)), 0) / (1. + torch.sum(sampled))).unsqueeze(0)
+                self._inputs = (
+                    torch.sum(self.embedding[field.name](sampled.view(-1)), 0)
+                    / (1.0 + torch.sum(sampled))
+                ).unsqueeze(0)
             else:
-                self._inputs = torch.zeros(1, self.lstm_size, device=self.embedding[field.name].weight.device)
+                self._inputs = torch.zeros(
+                    1, self.lstm_size, device=self.embedding[field.name].weight.device
+                )
 
         sampled = sampled.detach().numpy().tolist()
         self.sample_log_prob += self.entropy_reduction(log_prob)
-        entropy = (log_prob * torch.exp(-log_prob)).detach()  # pylint: disable=invalid-unary-operand-type
+        entropy = (
+            log_prob * torch.exp(-log_prob)
+        ).detach()  # pylint: disable=invalid-unary-operand-type
         self.sample_entropy += self.entropy_reduction(entropy)
         if len(sampled) == 1:
             sampled = sampled[0]
         return sampled
 
+
 @register_nas_algo("rl")
 class RL(BaseNAS):
     """
@@ -265,30 +327,44 @@ class RL(BaseNAS):
         Control whether show the progress bar.
     """
 
-    def __init__(self, num_epochs = 5, device='cuda', log_frequency=None,
-                 grad_clip=5., entropy_weight=0.0001, skip_weight=0.8, baseline_decay=0.999,
-                 ctrl_lr=0.00035, ctrl_steps_aggregate=20, ctrl_kwargs=None,n_warmup=100,model_lr=5e-3,model_wd=5e-4, disable_progress=True):
+    def __init__(
+        self,
+        num_epochs=5,
+        device="cuda",
+        log_frequency=None,
+        grad_clip=5.0,
+        entropy_weight=0.0001,
+        skip_weight=0.8,
+        baseline_decay=0.999,
+        ctrl_lr=0.00035,
+        ctrl_steps_aggregate=20,
+        ctrl_kwargs=None,
+        n_warmup=100,
+        model_lr=5e-3,
+        model_wd=5e-4,
+        disable_progress=True,
+    ):
         super().__init__(device)
-        self.device=device
+        self.device = device
         self.num_epochs = num_epochs
         self.log_frequency = log_frequency
         self.entropy_weight = entropy_weight
         self.skip_weight = skip_weight
         self.baseline_decay = baseline_decay
-        self.baseline = 0.
+        self.baseline = 0.0
         self.ctrl_steps_aggregate = ctrl_steps_aggregate
         self.grad_clip = grad_clip
-        self.ctrl_kwargs=ctrl_kwargs
-        self.ctrl_lr=ctrl_lr
-        self.n_warmup=n_warmup
+        self.ctrl_kwargs = ctrl_kwargs
+        self.ctrl_lr = ctrl_lr
+        self.n_warmup = n_warmup
         self.model_lr = model_lr
         self.model_wd = model_wd
-        self.disable_progress=disable_progress
+        self.disable_progress = disable_progress
 
     def search(self, space: BaseSpace, dset, estimator):
         self.model = space
-        self.dataset = dset#.to(self.device)
-        self.estimator = estimator    
+        self.dataset = dset  # .to(self.device)
+        self.estimator = estimator
         # replace choice
         self.nas_modules = []
 
@@ -300,69 +376,95 @@ class RL(BaseNAS):
         # to device
         self.model = self.model.to(self.device)
         # fields
-        self.nas_fields = [ReinforceField(name, len(module),
-                                          isinstance(module, PathSamplingLayerChoice) or module.n_chosen == 1)
-                           for name, module in self.nas_modules]
-        self.controller = ReinforceController(self.nas_fields, **(self.ctrl_kwargs or {}))
-        self.ctrl_optim = torch.optim.Adam(self.controller.parameters(), lr=self.ctrl_lr)
+        self.nas_fields = [
+            ReinforceField(
+                name,
+                len(module),
+                isinstance(module, PathSamplingLayerChoice) or module.n_chosen == 1,
+            )
+            for name, module in self.nas_modules
+        ]
+        self.controller = ReinforceController(
+            self.nas_fields, **(self.ctrl_kwargs or {})
+        )
+        self.ctrl_optim = torch.optim.Adam(
+            self.controller.parameters(), lr=self.ctrl_lr
+        )
         # train
-        with tqdm(range(self.num_epochs), disable = self.disable_progress) as bar:
+        with tqdm(range(self.num_epochs), disable=self.disable_progress) as bar:
             for i in bar:
-                l2=self._train_controller(i)
+                l2 = self._train_controller(i)
                 bar.set_postfix(reward_controller=l2)
-        
-        selection=self.export()
-        arch=space.parse_model(selection,self.device)
-        #print(selection,arch)
+
+        selection = self.export()
+        arch = space.parse_model(selection, self.device)
+        # print(selection,arch)
         return arch
-    
+
     def _train_controller(self, epoch):
         self.model.eval()
         self.controller.train()
         self.ctrl_optim.zero_grad()
-        rewards=[]
-        with tqdm(range(self.ctrl_steps_aggregate), disable=self.disable_progress) as bar:
+        rewards = []
+        with tqdm(
+            range(self.ctrl_steps_aggregate), disable=self.disable_progress
+        ) as bar:
             for ctrl_step in bar:
                 self._resample()
-                metric,loss=self._infer(mask='val')
-                bar.set_postfix(acc=metric,loss=loss.item())
-                LOGGER.info(f'{self.arch}\n{self.selection}\n{metric},{loss}')
-                reward =metric 
+                metric, loss = self._infer(mask="val")
+                bar.set_postfix(acc=metric, loss=loss.item())
+                LOGGER.info(f"{self.arch}\n{self.selection}\n{metric},{loss}")
+                reward = metric
                 rewards.append(reward)
                 if self.entropy_weight:
-                    reward += self.entropy_weight * self.controller.sample_entropy.item()
-                self.baseline = self.baseline * self.baseline_decay + reward * (1 - self.baseline_decay)
+                    reward += (
+                        self.entropy_weight * self.controller.sample_entropy.item()
+                    )
+                self.baseline = self.baseline * self.baseline_decay + reward * (
+                    1 - self.baseline_decay
+                )
                 loss = self.controller.sample_log_prob * (reward - self.baseline)
                 if self.skip_weight:
                     loss += self.skip_weight * self.controller.sample_skip_penalty
                 loss /= self.ctrl_steps_aggregate
                 loss.backward()
-            
+
                 if (ctrl_step + 1) % self.ctrl_steps_aggregate == 0:
                     if self.grad_clip > 0:
-                        nn.utils.clip_grad_norm_(self.controller.parameters(), self.grad_clip)
+                        nn.utils.clip_grad_norm_(
+                            self.controller.parameters(), self.grad_clip
+                        )
                     self.ctrl_optim.step()
                     self.ctrl_optim.zero_grad()
 
-                if self.log_frequency is not None and ctrl_step % self.log_frequency == 0:
-                    LOGGER.info('RL Epoch [%d/%d] Step [%d/%d]  %s', epoch + 1, self.num_epochs,
-                                    ctrl_step + 1, self.ctrl_steps_aggregate)
-        return sum(rewards)/len(rewards)
+                if (
+                    self.log_frequency is not None
+                    and ctrl_step % self.log_frequency == 0
+                ):
+                    LOGGER.info(
+                        "RL Epoch [%d/%d] Step [%d/%d]  %s",
+                        epoch + 1,
+                        self.num_epochs,
+                        ctrl_step + 1,
+                        self.ctrl_steps_aggregate,
+                    )
+        return sum(rewards) / len(rewards)
 
     def _resample(self):
         result = self.controller.resample()
-        self.arch=self.model.parse_model(result,device=self.device)
-        self.selection=result
+        self.arch = self.model.parse_model(result, device=self.device)
+        self.selection = result
 
     def export(self):
         self.controller.eval()
         with torch.no_grad():
             return self.controller.resample()
 
-    def _infer(self,mask='train'):
-        metric, loss = self.estimator.infer(self.arch._model, self.dataset,mask=mask)
+    def _infer(self, mask="train"):
+        metric, loss = self.estimator.infer(self.arch._model, self.dataset, mask=mask)
         return metric[0], loss
 
+
 @register_nas_algo("graphnas")
 class GraphNasRL(BaseNAS):
     """
@@ -404,11 +506,26 @@ class GraphNasRL(BaseNAS):
         Control whether show the progress bar.
     """
 
-    def __init__(self, device='cuda', num_epochs=10, log_frequency=None,
-                 grad_clip=5., entropy_weight=0.0001, skip_weight=0, baseline_decay=0.95,
-                 ctrl_lr=0.00035, ctrl_steps_aggregate=100, ctrl_kwargs=None, n_warmup=100, model_lr=5e-3, model_wd=5e-4, topk=5, disable_progress = True):
+    def __init__(
+        self,
+        device="cuda",
+        num_epochs=10,
+        log_frequency=None,
+        grad_clip=5.0,
+        entropy_weight=0.0001,
+        skip_weight=0,
+        baseline_decay=0.95,
+        ctrl_lr=0.00035,
+        ctrl_steps_aggregate=100,
+        ctrl_kwargs=None,
+        n_warmup=100,
+        model_lr=5e-3,
+        model_wd=5e-4,
+        topk=5,
+        disable_progress=True,
+    ):
         super().__init__(device)
-        self.device=device
+        self.device = device
         self.num_epochs = num_epochs
         self.log_frequency = log_frequency
         self.entropy_weight = entropy_weight
@@ -416,19 +533,19 @@ class GraphNasRL(BaseNAS):
         self.baseline_decay = baseline_decay
         self.ctrl_steps_aggregate = ctrl_steps_aggregate
         self.grad_clip = grad_clip
-        self.ctrl_kwargs=ctrl_kwargs
-        self.ctrl_lr=ctrl_lr
-        self.n_warmup=n_warmup
+        self.ctrl_kwargs = ctrl_kwargs
+        self.ctrl_lr = ctrl_lr
+        self.n_warmup = n_warmup
         self.model_lr = model_lr
         self.model_wd = model_wd
-        self.hist=[]
-        self.topk=topk
-        self.disable_progress=disable_progress
+        self.hist = []
+        self.topk = topk
+        self.disable_progress = disable_progress
 
     def search(self, space: BaseSpace, dset, estimator):
         self.model = space
-        self.dataset = dset#.to(self.device)
-        self.estimator = estimator    
+        self.dataset = dset  # .to(self.device)
+        self.estimator = estimator
         # replace choice
         self.nas_modules = []
 
@@ -440,91 +557,114 @@ class GraphNasRL(BaseNAS):
         # to device
         self.model = self.model.to(self.device)
         # fields
-        self.nas_fields = [ReinforceField(name, len(module),
-                                          isinstance(module, PathSamplingLayerChoice) or module.n_chosen == 1)
-                           for name, module in self.nas_modules]
-        self.controller = ReinforceController(self.nas_fields,lstm_size=100,temperature=5.0,tanh_constant=2.5, **(self.ctrl_kwargs or {}))
-        self.ctrl_optim = torch.optim.Adam(self.controller.parameters(), lr=self.ctrl_lr)
+        self.nas_fields = [
+            ReinforceField(
+                name,
+                len(module),
+                isinstance(module, PathSamplingLayerChoice) or module.n_chosen == 1,
+            )
+            for name, module in self.nas_modules
+        ]
+        self.controller = ReinforceController(
+            self.nas_fields,
+            lstm_size=100,
+            temperature=5.0,
+            tanh_constant=2.5,
+            **(self.ctrl_kwargs or {}),
+        )
+        self.ctrl_optim = torch.optim.Adam(
+            self.controller.parameters(), lr=self.ctrl_lr
+        )
         # train
         with tqdm(range(self.num_epochs), disable=self.disable_progress) as bar:
             for i in bar:
-                l2=self._train_controller(i)
+                l2 = self._train_controller(i)
                 bar.set_postfix(reward_controller=l2)
-        
+
         # selection=self.export()
-        
-        selections=[x[1] for x in self.hist]
-        candidiate_accs=[-x[0] for x in self.hist]
-        #print('candidiate accuracies',candidiate_accs)
-        selection=self._choose_best(selections)
-        arch=space.parse_model(selection,self.device)
-        #print(selection,arch)
+
+        selections = [x[1] for x in self.hist]
+        candidiate_accs = [-x[0] for x in self.hist]
+        # print('candidiate accuracies',candidiate_accs)
+        selection = self._choose_best(selections)
+        arch = space.parse_model(selection, self.device)
+        # print(selection,arch)
         return arch
 
-    def _choose_best(self,selections):
+    def _choose_best(self, selections):
         # graphnas use top 5 models, can evaluate 20 times epoch and choose the best.
-        results=[]
+        results = []
         for selection in selections:
-            accs=[]
+            accs = []
             for i in tqdm(range(20), disable=self.disable_progress):
-                self.arch=self.model.parse_model(selection,device=self.device)
-                metric,loss=self._infer(mask='val')
+                self.arch = self.model.parse_model(selection, device=self.device)
+                metric, loss = self._infer(mask="val")
                 accs.append(metric)
-            result=np.mean(accs) 
-            LOGGER.info('selection {} \n acc {:.4f} +- {:.4f}'.format(selection,np.mean(accs),np.std(accs)/np.sqrt(20)))
+            result = np.mean(accs)
+            LOGGER.info(
+                "selection {} \n acc {:.4f} +- {:.4f}".format(
+                    selection, np.mean(accs), np.std(accs) / np.sqrt(20)
+                )
+            )
             results.append(result)
-        best_selection=selections[np.argmax(results)]
+        best_selection = selections[np.argmax(results)]
         return best_selection
-            
+
     def _train_controller(self, epoch):
         self.model.eval()
         self.controller.train()
         self.ctrl_optim.zero_grad()
-        rewards=[]
-        baseline=None
+        rewards = []
+        baseline = None
         # diff: graph nas train 100 and derive 100 for every epoch(10 epochs), we just train 100(20 epochs). totol num of samples are same (2000)
-        with tqdm(range(self.ctrl_steps_aggregate), disable=self.disable_progress) as bar:
+        with tqdm(
+            range(self.ctrl_steps_aggregate), disable=self.disable_progress
+        ) as bar:
             for ctrl_step in bar:
                 self._resample()
-                metric,loss=self._infer(mask='val')
+                metric, loss = self._infer(mask="val")
 
                 # bar.set_postfix(acc=metric,loss=loss.item())
-                LOGGER.debug(f'{self.arch}\n{self.selection}\n{metric},{loss}')
+                LOGGER.debug(f"{self.arch}\n{self.selection}\n{metric},{loss}")
                 # diff: not do reward shaping as in graphnas code
-                reward =metric
-                self.hist.append([-metric,self.selection])
-                if len(self.hist)>self.topk:
-                    self.hist.sort(key=lambda x:x[0])
+                reward = metric
+                self.hist.append([-metric, self.selection])
+                if len(self.hist) > self.topk:
+                    self.hist.sort(key=lambda x: x[0])
                     self.hist.pop()
                 rewards.append(reward)
-                
+
                 if self.entropy_weight:
-                    reward += self.entropy_weight * self.controller.sample_entropy.item()
+                    reward += (
+                        self.entropy_weight * self.controller.sample_entropy.item()
+                    )
 
                 if not baseline:
-                    baseline= reward
+                    baseline = reward
                 else:
-                    baseline = baseline * self.baseline_decay + reward * (1 - self.baseline_decay)
+                    baseline = baseline * self.baseline_decay + reward * (
+                        1 - self.baseline_decay
+                    )
 
                 loss = self.controller.sample_log_prob * (reward - baseline)
                 self.ctrl_optim.zero_grad()
                 loss.backward()
-        
+
                 self.ctrl_optim.step()
-                
-                bar.set_postfix(acc=metric,max_acc=max(rewards))
-        return sum(rewards)/len(rewards)
+
+                bar.set_postfix(acc=metric, max_acc=max(rewards))
+        return sum(rewards) / len(rewards)
 
     def _resample(self):
         result = self.controller.resample()
-        self.arch=self.model.parse_model(result,device=self.device)
-        self.selection=result
+        self.arch = self.model.parse_model(result, device=self.device)
+        self.selection = result
 
     def export(self):
         self.controller.eval()
         with torch.no_grad():
             return self.controller.resample()
 
-    def _infer(self,mask='train'):
-        metric, loss = self.estimator.infer(self.arch._model, self.dataset,mask=mask)
-        return metric[0], loss
\ No newline at end of file
+    def _infer(self, mask="train"):
+        metric, loss = self.estimator.infer(self.arch._model, self.dataset, mask=mask)
+        return metric[0], loss
diff --git a/autogl/module/nas/estimator/__init__.py b/autogl/module/nas/estimator/__init__.py
index e768aa2..88b9115 100644
--- a/autogl/module/nas/estimator/__init__.py
+++ b/autogl/module/nas/estimator/__init__.py
@@ -4,10 +4,13 @@ from .base import BaseEstimator
 
 NAS_ESTIMATOR_DICT = {}
 
+
 def register_nas_estimator(name):
     def register_nas_estimator_cls(cls):
         if name in NAS_ESTIMATOR_DICT:
-            raise ValueError("Cannot register duplicate NAS estimator ({})".format(name))
+            raise ValueError(
+                "Cannot register duplicate NAS estimator ({})".format(name)
+            )
         if not issubclass(cls, BaseEstimator):
             raise ValueError(
                 "Model ({}: {}) must extend NAS estimator".format(name, cls.__name__)
@@ -17,9 +20,11 @@ def register_nas_estimator(name):
 
     return register_nas_estimator_cls
 
+
 from .one_shot import OneShotEstimator
 from .train_scratch import TrainEstimator
 
+
 def build_nas_estimator_from_name(name: str) -> BaseEstimator:
     """
     Parameters
@@ -40,4 +45,5 @@ def build_nas_estimator_from_name(name: str) -> BaseEstimator:
     assert name in NAS_ESTIMATOR_DICT, "HPO module do not have name " + name
     return NAS_ESTIMATOR_DICT[name]()
 
+
 __all__ = ["BaseEstimator", "OneShotEstimator", "TrainEstimator"]
diff --git a/autogl/module/nas/estimator/base.py b/autogl/module/nas/estimator/base.py
index b846dc1..0505253 100644
--- a/autogl/module/nas/estimator/base.py
+++ b/autogl/module/nas/estimator/base.py
@@ -9,6 +9,7 @@ from ...train.evaluation import Evaluation, Acc
 import torch.nn.functional as F
 import torch
 
+
 class BaseEstimator:
     """
     The estimator of NAS model.
@@ -21,13 +22,14 @@ class BaseEstimator:
     evaluation: list of autogl.module.train.evaluation.Evaluation
         Default evaluation metric
     """
-    def __init__(self, loss_f: str = 'nll_loss', evaluation = [Acc()]):
+
+    def __init__(self, loss_f: str = "nll_loss", evaluation=[Acc()]):
         self.loss_f = loss_f
         self.evaluation = evaluation
 
     def setLossFunction(self, loss_f: str):
         self.loss_f = loss_f
-    
+
     def setEvaluation(self, evaluation):
         self.evaluation = evaluation
 
diff --git a/autogl/module/nas/estimator/one_shot.py b/autogl/module/nas/estimator/one_shot.py
index 5a49dca..f719e4f 100644
--- a/autogl/module/nas/estimator/one_shot.py
+++ b/autogl/module/nas/estimator/one_shot.py
@@ -5,6 +5,7 @@ from . import register_nas_estimator
 from ..space import BaseSpace
 from .base import BaseEstimator
 
+
 @register_nas_estimator("oneshot")
 class OneShotEstimator(BaseEstimator):
     """
@@ -17,10 +18,10 @@ class OneShotEstimator(BaseEstimator):
         device = next(model.parameters()).device
         dset = dataset[0].to(device)
         pred = model(dset)[getattr(dset, f"{mask}_mask")]
-        y = dset.y[getattr(dset, f'{mask}_mask')]
+        y = dset.y[getattr(dset, f"{mask}_mask")]
         loss = getattr(F, self.loss_f)(pred, y)
-        #acc=sum(pred.max(1)[1]==y).item()/y.size(0)
-        probs = F.softmax(pred, dim = 1).detach().cpu().numpy()
+        # acc=sum(pred.max(1)[1]==y).item()/y.size(0)
+        probs = F.softmax(pred, dim=1).detach().cpu().numpy()
         y = y.cpu()
         metrics = [eva.evaluate(probs, y) for eva in self.evaluation]
         return metrics, loss
diff --git a/autogl/module/nas/estimator/train_scratch.py b/autogl/module/nas/estimator/train_scratch.py
index 2077b7a..38036cc 100644
--- a/autogl/module/nas/estimator/train_scratch.py
+++ b/autogl/module/nas/estimator/train_scratch.py
@@ -9,32 +9,35 @@ import torch
 
 from autogl.module.train import NodeClassificationFullTrainer, Acc
 
+
 @register_nas_estimator("scratch")
 class TrainEstimator(BaseEstimator):
     """
     An estimator which trans from scratch
     """
-    def __init__(self, loss_f = "nll_loss", evaluation = [Acc()]):
+
+    def __init__(self, loss_f="nll_loss", evaluation=[Acc()]):
         super().__init__(loss_f, evaluation)
         self.evaluation = evaluation
-        self.estimator=OneShotEstimator(self.loss_f, self.evaluation)
+        self.estimator = OneShotEstimator(self.loss_f, self.evaluation)
 
     def infer(self, model: BaseSpace, dataset, mask="train"):
         # self.trainer.model=model
         # self.trainer.device=model.device
         boxmodel = model.wrap()
-        self.trainer=NodeClassificationFullTrainer(
-                    model=boxmodel,
-                    optimizer=torch.optim.Adam,
-                    lr=0.005,
-                    max_epoch=300,
-                    early_stopping_round=30,
-                    weight_decay=5e-4,
-                    device="auto",
-                    init=False,
-                    feval=self.evaluation,
-                    loss=self.loss_f,
-                    lr_scheduler_type=None)
+        self.trainer = NodeClassificationFullTrainer(
+            model=boxmodel,
+            optimizer=torch.optim.Adam,
+            lr=0.005,
+            max_epoch=300,
+            early_stopping_round=30,
+            weight_decay=5e-4,
+            device="auto",
+            init=False,
+            feval=self.evaluation,
+            loss=self.loss_f,
+            lr_scheduler_type=None,
+        )
         try:
             self.trainer.train(dataset)
             with torch.no_grad():
@@ -42,7 +45,7 @@ class TrainEstimator(BaseEstimator):
         except RuntimeError as e:
             if "cuda" in str(e) or "CUDA" in str(e):
                 INF = 100
-                fin = [-INF if eva.is_higher_better else INF for eva in self.evaluation] 
+                fin = [-INF if eva.is_higher_better else INF for eva in self.evaluation]
                 return fin, 0
             else:
                 raise e
diff --git a/autogl/module/nas/space/__init__.py b/autogl/module/nas/space/__init__.py
index 60c1054..6d1e8f1 100644
--- a/autogl/module/nas/space/__init__.py
+++ b/autogl/module/nas/space/__init__.py
@@ -4,6 +4,7 @@ from .base import BaseSpace
 
 NAS_SPACE_DICT = {}
 
+
 def register_nas_space(name):
     def register_nas_space_cls(cls):
         if name in NAS_SPACE_DICT:
@@ -17,10 +18,12 @@ def register_nas_space(name):
 
     return register_nas_space_cls
 
+
 from .graph_nas_macro import GraphNasMacroNodeClassificationSpace
 from .graph_nas import GraphNasNodeClassificationSpace
 from .single_path import SinglePathNodeClassificationSpace
 
+
 def build_nas_space_from_name(name: str) -> BaseSpace:
     """
     Parameters
@@ -41,4 +44,10 @@ def build_nas_space_from_name(name: str) -> BaseSpace:
     assert name in NAS_SPACE_DICT, "HPO module do not have name " + name
     return NAS_SPACE_DICT[name]()
 
-__all__ = ["BaseSpace", "GraphNasMacroNodeClassificationSpace", "GraphNasNodeClassificationSpace", "SinglePathNodeClassificationSpace"]
+
+__all__ = [
+    "BaseSpace",
+    "GraphNasMacroNodeClassificationSpace",
+    "GraphNasNodeClassificationSpace",
+    "SinglePathNodeClassificationSpace",
+]
diff --git a/autogl/module/nas/space/base.py b/autogl/module/nas/space/base.py
index 433157d..02d4ca6 100644
--- a/autogl/module/nas/space/base.py
+++ b/autogl/module/nas/space/base.py
@@ -11,7 +11,8 @@ from ....utils import get_logger
 
 from ...model import AutoGCN
 
-class OrderedMutable():
+
+class OrderedMutable:
     """
     An abstract class with order, enabling to sort mutables with a certain rank.
 
@@ -20,20 +21,35 @@ class OrderedMutable():
     order : int
         The order of the mutable
     """
+
     def __init__(self, order):
         self.order = order
 
+
 class OrderedLayerChoice(OrderedMutable, mutables.LayerChoice):
-    def __init__(self, order, op_candidates, reduction="sum", return_mask=False, key=None):
+    def __init__(
+        self, order, op_candidates, reduction="sum", return_mask=False, key=None
+    ):
         OrderedMutable.__init__(self, order)
         mutables.LayerChoice.__init__(self, op_candidates, reduction, return_mask, key)
 
+
 class OrderedInputChoice(OrderedMutable, mutables.InputChoice):
-    def __init__(self, order, n_candidates=None, choose_from=None, n_chosen=None,
-                 reduction="sum", return_mask=False, key=None):
+    def __init__(
+        self,
+        order,
+        n_candidates=None,
+        choose_from=None,
+        n_chosen=None,
+        reduction="sum",
+        return_mask=False,
+        key=None,
+    ):
         OrderedMutable.__init__(self, order)
-        mutables.InputChoice.__init__(self, n_candidates, choose_from, n_chosen,
-                 reduction, return_mask, key)
+        mutables.InputChoice.__init__(
+            self, n_candidates, choose_from, n_chosen, reduction, return_mask, key
+        )
+
 
 class StrModule(nn.Module):
     """
@@ -45,15 +61,17 @@ class StrModule(nn.Module):
     name : anything
         the name of module, can be any type
     """
+
     def __init__(self, name):
         super().__init__()
         self.str = name
 
-    def forward(self, *args,**kwargs):
-        return self.str  
+    def forward(self, *args, **kwargs):
+        return self.str
 
     def __repr__(self):
-        return '{}({})'.format(self.__class__.__name__,self.str)
+        return "{}({})".format(self.__class__.__name__, self.str)
+
 
 def map_nn(names):
     """
@@ -66,6 +84,7 @@ def map_nn(names):
     """
     return [StrModule(x) for x in names]
 
+
 class BoxModel(BaseModel):
     """
     The box wrapping a space, can be passed to later procedure or trainer
@@ -77,6 +96,7 @@ class BoxModel(BaseModel):
     device : str or torch.device
         The device to place the model
     """
+
     _logger = get_logger("space model")
 
     def __init__(self, space_model, device=torch.device("cuda")):
@@ -93,7 +113,7 @@ class BoxModel(BaseModel):
 
     def fix(self, selection):
         """
-        To fix self._model with a selection 
+        To fix self._model with a selection
 
         Parameters
         ----------
@@ -129,6 +149,7 @@ class BoxModel(BaseModel):
     def model(self):
         return self._model
 
+
 class BaseSpace(nn.Module):
     """
     Base space class of NAS module. Defining space containing all models.
@@ -187,7 +208,9 @@ class BaseSpace(nn.Module):
         if not self._initialized:
             self._initialized = True
 
-    def setLayerChoice(self, order, op_candidates, reduction="sum", return_mask=False, key=None):
+    def setLayerChoice(
+        self, order, op_candidates, reduction="sum", return_mask=False, key=None
+    ):
         """
         Give a unique key if not given
         """
@@ -199,8 +222,16 @@ class BaseSpace(nn.Module):
         layer = OrderedLayerChoice(order, op_candidates, reduction, return_mask, orikey)
         return layer
 
-    def setInputChoice(self, order, n_candidates=None, choose_from=None, n_chosen=None,
-                 reduction="sum", return_mask=False, key=None):
+    def setInputChoice(
+        self,
+        order,
+        n_candidates=None,
+        choose_from=None,
+        n_chosen=None,
+        reduction="sum",
+        return_mask=False,
+        key=None,
+    ):
         """
         Give a unique key if not given
         """
@@ -209,8 +240,9 @@ class BaseSpace(nn.Module):
             key = f"default_key_{self._default_key}"
             self._default_key += 1
             orikey = key
-        layer = OrderedInputChoice(order, n_candidates, choose_from, n_chosen,
-                 reduction, return_mask, orikey)
+        layer = OrderedInputChoice(
+            order, n_candidates, choose_from, n_chosen, reduction, return_mask, orikey
+        )
         return layer
 
     def wrap(self, device="cuda"):
@@ -218,8 +250,9 @@ class BaseSpace(nn.Module):
         Return a BoxModel which wrap self as a model
         Used to pass to trainer
         To use this function, must contain `input_dim` and `output_dim`
-        """ 
-        return BoxModel(self, device) 
+        """
+        return BoxModel(self, device)
+
 
 class FixedInputChoice(nn.Module):
     """
@@ -230,6 +263,7 @@ class FixedInputChoice(nn.Module):
     mask : list
         The mask indicating which input to choose
     """
+
     def __init__(self, mask):
         self.mask_len = len(mask)
         for i in range(self.mask_len):
@@ -242,6 +276,7 @@ class FixedInputChoice(nn.Module):
         if len(optional_inputs) == self.mask_len:
             return optional_inputs[self.selected]
 
+
 class CleanFixedArchitecture(FixedArchitecture):
     """
     Fixed architecture mutator that always selects a certain graph, allowing deepcopy
@@ -295,6 +330,7 @@ class CleanFixedArchitecture(FixedArchitecture):
             else:
                 self.replace_all_choice(mutable, global_name)
 
+
 def apply_fixed_architecture(model, fixed_arc, verbose=True):
     """
     Load architecture from `fixed_arc` and apply to model.
diff --git a/autogl/module/nas/space/graph_nas.py b/autogl/module/nas/space/graph_nas.py
index 42ce952..57efedb 100644
--- a/autogl/module/nas/space/graph_nas.py
+++ b/autogl/module/nas/space/graph_nas.py
@@ -30,9 +30,14 @@ GRAPHNAS_DEFAULT_GNN_OPS = [
 GRAPHNAS_DEFAULT_ACT_OPS = [
     # "sigmoid", "tanh", "relu", "linear",
     #  "softplus", "leaky_relu", "relu6", "elu"
-    "sigmoid", "tanh", "relu", "linear", "elu"
+    "sigmoid",
+    "tanh",
+    "relu",
+    "linear",
+    "elu",
 ]
 
+
 class LambdaModule(nn.Module):
     def __init__(self, lambd):
         super().__init__()
@@ -40,27 +45,31 @@ class LambdaModule(nn.Module):
 
     def forward(self, x):
         return self.lambd(x)
-    
+
     def __repr__(self):
-        return '{}({})'.format(self.__class__.__name__,self.lambd)
+        return "{}({})".format(self.__class__.__name__, self.lambd)
+
 
 class StrModule(nn.Module):
     def __init__(self, lambd):
         super().__init__()
         self.str = lambd
 
-    def forward(self, *args,**kwargs):
-        return self.str  
+    def forward(self, *args, **kwargs):
+        return self.str
 
     def __repr__(self):
-        return '{}({})'.format(self.__class__.__name__,self.str)
+        return "{}({})".format(self.__class__.__name__, self.str)
+
 
 def act_map_nn(act):
     return LambdaModule(act_map(act))
 
+
 def map_nn(l):
     return [StrModule(x) for x in l]
 
+
 @register_nas_space("graphnas")
 class GraphNasNodeClassificationSpace(BaseSpace):
     def __init__(
@@ -71,7 +80,7 @@ class GraphNasNodeClassificationSpace(BaseSpace):
         input_dim: _typ.Optional[int] = None,
         output_dim: _typ.Optional[int] = None,
         gnn_ops: _typ.Sequence[_typ.Union[str, _typ.Any]] = GRAPHNAS_DEFAULT_GNN_OPS,
-        act_ops: _typ.Sequence[_typ.Union[str, _typ.Any]] = GRAPHNAS_DEFAULT_ACT_OPS
+        act_ops: _typ.Sequence[_typ.Union[str, _typ.Any]] = GRAPHNAS_DEFAULT_ACT_OPS,
     ):
         super().__init__()
         self.layer_number = layer_number
@@ -81,7 +90,7 @@ class GraphNasNodeClassificationSpace(BaseSpace):
         self.gnn_ops = gnn_ops
         self.act_ops = act_ops
         self.dropout = dropout
-        
+
     def instantiate(
         self,
         hidden_dim: _typ.Optional[int] = None,
@@ -90,7 +99,7 @@ class GraphNasNodeClassificationSpace(BaseSpace):
         input_dim: _typ.Optional[int] = None,
         output_dim: _typ.Optional[int] = None,
         gnn_ops: _typ.Sequence[_typ.Union[str, _typ.Any]] = None,
-        act_ops: _typ.Sequence[_typ.Union[str, _typ.Any]] = None
+        act_ops: _typ.Sequence[_typ.Union[str, _typ.Any]] = None,
     ):
         super().instantiate()
         self.dropout = dropout or self.dropout
@@ -103,45 +112,80 @@ class GraphNasNodeClassificationSpace(BaseSpace):
         self.preproc0 = nn.Linear(self.input_dim, self.hidden_dim)
         self.preproc1 = nn.Linear(self.input_dim, self.hidden_dim)
         node_labels = [mutables.InputChoice.NO_KEY, mutables.InputChoice.NO_KEY]
-        for layer in range(2,self.layer_number+2):
+        for layer in range(2, self.layer_number + 2):
             node_labels.append(f"op_{layer}")
-            setattr(self,f"in_{layer}",self.setInputChoice(layer,choose_from=node_labels[:-1], n_chosen=1, return_mask=False,key=f"in_{layer}"))
-            setattr(self,f"op_{layer}",self.setLayerChoice(layer,[gnn_map(op,self.hidden_dim,self.hidden_dim)for op in self.gnn_ops],key=f"op_{layer}"))
-        setattr(self,"act",self.setLayerChoice(2*layer,[act_map_nn(a)for a in self.act_ops],key="act"))
-        setattr(self,"concat",self.setLayerChoice(2*layer+1,map_nn(["add", "product", "concat"]) ,key="concat"))
+            setattr(
+                self,
+                f"in_{layer}",
+                self.setInputChoice(
+                    layer,
+                    choose_from=node_labels[:-1],
+                    n_chosen=1,
+                    return_mask=False,
+                    key=f"in_{layer}",
+                ),
+            )
+            setattr(
+                self,
+                f"op_{layer}",
+                self.setLayerChoice(
+                    layer,
+                    [
+                        gnn_map(op, self.hidden_dim, self.hidden_dim)
+                        for op in self.gnn_ops
+                    ],
+                    key=f"op_{layer}",
+                ),
+            )
+        setattr(
+            self,
+            "act",
+            self.setLayerChoice(
+                2 * layer, [act_map_nn(a) for a in self.act_ops], key="act"
+            ),
+        )
+        setattr(
+            self,
+            "concat",
+            self.setLayerChoice(
+                2 * layer + 1, map_nn(["add", "product", "concat"]), key="concat"
+            ),
+        )
         self._initialized = True
-        self.classifier1 = nn.Linear(self.hidden_dim*self.layer_number, self.output_dim)
+        self.classifier1 = nn.Linear(
+            self.hidden_dim * self.layer_number, self.output_dim
+        )
         self.classifier2 = nn.Linear(self.hidden_dim, self.output_dim)
 
     def forward(self, data):
-        x, edges = data.x, data.edge_index # x [2708,1433] ,[2, 10556]
-        x = F.dropout(x, p=self.dropout, training = self.training)
+        x, edges = data.x, data.edge_index  # x [2708,1433] ,[2, 10556]
+        x = F.dropout(x, p=self.dropout, training=self.training)
         pprev_, prev_ = self.preproc0(x), self.preproc1(x)
-        prev_nodes_out = [pprev_,prev_]
-        for layer in range(2,self.layer_number+2):
+        prev_nodes_out = [pprev_, prev_]
+        for layer in range(2, self.layer_number + 2):
             node_in = getattr(self, f"in_{layer}")(prev_nodes_out)
-            node_out= getattr(self, f"op_{layer}")(node_in,edges)
+            node_out = getattr(self, f"op_{layer}")(node_in, edges)
             prev_nodes_out.append(node_out)
-        act=getattr(self, "act")
-        con=getattr(self, "concat")()
-        states=prev_nodes_out
+        act = getattr(self, "act")
+        con = getattr(self, "concat")()
+        states = prev_nodes_out
         if con == "concat":
-            x=torch.cat(states[2:], dim=1)
+            x = torch.cat(states[2:], dim=1)
         else:
             tmp = states[2]
-            for i in range(2,len(states)):
+            for i in range(2, len(states)):
                 if con == "add":
                     tmp = torch.add(tmp, states[i])
                 elif con == "product":
                     tmp = torch.mul(tmp, states[i])
-            x=tmp
+            x = tmp
         x = act(x)
-        if con=='concat':
-            x=self.classifier1(x)
+        if con == "concat":
+            x = self.classifier1(x)
         else:
-            x=self.classifier2(x)
+            x = self.classifier2(x)
         return F.log_softmax(x, dim=1)
 
     def parse_model(self, selection, device) -> BaseModel:
-        #return AutoGCN(self.input_dim, self.output_dim, device)
-        return self.wrap(device).fix(selection)
\ No newline at end of file
+        # return AutoGCN(self.input_dim, self.output_dim, device)
+        return self.wrap(device).fix(selection)
diff --git a/autogl/module/nas/space/graph_nas_macro.py b/autogl/module/nas/space/graph_nas_macro.py
index ddca04a..4929399 100644
--- a/autogl/module/nas/space/graph_nas_macro.py
+++ b/autogl/module/nas/space/graph_nas_macro.py
@@ -10,7 +10,12 @@ from .operation import act_map
 
 from torch.nn import Parameter
 from torch_geometric.nn.inits import glorot, zeros
-from torch_geometric.utils import remove_self_loops, add_self_loops, add_remaining_self_loops, softmax
+from torch_geometric.utils import (
+    remove_self_loops,
+    add_self_loops,
+    add_remaining_self_loops,
+    softmax,
+)
 from torch_scatter import scatter_add
 import torch_scatter
 
@@ -18,14 +23,22 @@ import inspect
 import sys
 
 special_args = [
-    'edge_index', 'edge_index_i', 'edge_index_j', 'size', 'size_i', 'size_j'
+    "edge_index",
+    "edge_index_i",
+    "edge_index_j",
+    "size",
+    "size_i",
+    "size_j",
 ]
-__size_error_msg__ = ('All tensors which should get mapped to the same source '
-                      'or target nodes must be of same size in dimension 0.')
+__size_error_msg__ = (
+    "All tensors which should get mapped to the same source "
+    "or target nodes must be of same size in dimension 0."
+)
 
 is_python2 = sys.version_info[0] < 3
 getargspec = inspect.getargspec if is_python2 else inspect.getfullargspec
 
+
 def scatter_(name, src, index, dim_size=None):
     r"""Aggregates all values from the :attr:`src` tensor at the indices
     specified in the :attr:`index` tensor along the first dimension.
@@ -45,35 +58,37 @@ def scatter_(name, src, index, dim_size=None):
     :rtype: :class:`Tensor`
     """
 
-    assert name in ['add', 'mean', 'max']
+    assert name in ["add", "mean", "max"]
 
-    op = getattr(torch_scatter, 'scatter_{}'.format(name))
-    fill_value = -1e9 if name == 'max' else 0
+    op = getattr(torch_scatter, "scatter_{}".format(name))
+    fill_value = -1e9 if name == "max" else 0
 
     out = op(src, index, 0, None, dim_size)
     if isinstance(out, tuple):
         out = out[0]
 
-    if name == 'max':
+    if name == "max":
         out[out == fill_value] = 0
 
     return out
 
-class MessagePassing(torch.nn.Module):
 
-    def __init__(self, aggr='add', flow='source_to_target'):
+class MessagePassing(torch.nn.Module):
+    def __init__(self, aggr="add", flow="source_to_target"):
         super(MessagePassing, self).__init__()
 
         self.aggr = aggr
-        assert self.aggr in ['add', 'mean', 'max']
+        assert self.aggr in ["add", "mean", "max"]
 
         self.flow = flow
-        assert self.flow in ['source_to_target', 'target_to_source']
+        assert self.flow in ["source_to_target", "target_to_source"]
 
         self.__message_args__ = getargspec(self.message)[0][1:]
-        self.__special_args__ = [(i, arg)
-                                 for i, arg in enumerate(self.__message_args__)
-                                 if arg in special_args]
+        self.__special_args__ = [
+            (i, arg)
+            for i, arg in enumerate(self.__message_args__)
+            if arg in special_args
+        ]
         self.__message_args__ = [
             arg for arg in self.__message_args__ if arg not in special_args
         ]
@@ -96,7 +111,7 @@ class MessagePassing(torch.nn.Module):
         size = [None, None] if size is None else list(size)
         assert len(size) == 2
 
-        i, j = (0, 1) if self.flow == 'target_to_source' else (1, 0)
+        i, j = (0, 1) if self.flow == "target_to_source" else (1, 0)
         ij = {"_i": i, "_j": j}
 
         message_args = []
@@ -129,8 +144,8 @@ class MessagePassing(torch.nn.Module):
         size[0] = size[1] if size[0] is None else size[0]
         size[1] = size[0] if size[1] is None else size[1]
 
-        kwargs['edge_index'] = edge_index
-        kwargs['size'] = size
+        kwargs["edge_index"] = edge_index
+        kwargs["size"] = size
 
         for (idx, arg) in self.__special_args__:
             if arg[-2:] in ij.keys():
@@ -168,21 +183,23 @@ class MessagePassing(torch.nn.Module):
 
         return aggr_out
 
-class GeoLayer(MessagePassing):
 
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 heads=1,
-                 concat=True,
-                 negative_slope=0.2,
-                 dropout=0,
-                 bias=True,
-                 att_type="gat",
-                 agg_type="sum",
-                 pool_dim=0):
+class GeoLayer(MessagePassing):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        heads=1,
+        concat=True,
+        negative_slope=0.2,
+        dropout=0,
+        bias=True,
+        att_type="gat",
+        agg_type="sum",
+        pool_dim=0,
+    ):
         if agg_type in ["sum", "mlp"]:
-            super(GeoLayer, self).__init__('add')
+            super(GeoLayer, self).__init__("add")
         elif agg_type in ["mean", "max"]:
             super(GeoLayer, self).__init__(agg_type)
         self.in_channels = in_channels
@@ -197,8 +214,7 @@ class GeoLayer(MessagePassing):
         # GCN weight
         self.gcn_weight = None
 
-        self.weight = Parameter(
-            torch.Tensor(in_channels, heads * out_channels))
+        self.weight = Parameter(torch.Tensor(in_channels, heads * out_channels))
         self.att = Parameter(torch.Tensor(1, heads, 2 * out_channels))
 
         if bias and concat:
@@ -206,7 +222,7 @@ class GeoLayer(MessagePassing):
         elif bias and not concat:
             self.bias = Parameter(torch.Tensor(out_channels))
         else:
-            self.register_parameter('bias', None)
+            self.register_parameter("bias", None)
 
         if self.att_type in ["generalized_linear"]:
             self.general_att_layer = torch.nn.Linear(out_channels, 1, bias=False)
@@ -226,18 +242,19 @@ class GeoLayer(MessagePassing):
     @staticmethod
     def norm(edge_index, num_nodes, edge_weight, improved=False, dtype=None):
         if edge_weight is None:
-            edge_weight = torch.ones((edge_index.size(1), ),
-                                     dtype=dtype,
-                                     device=edge_index.device)
+            edge_weight = torch.ones(
+                (edge_index.size(1),), dtype=dtype, device=edge_index.device
+            )
 
         fill_value = 1 if not improved else 2
         edge_index, edge_weight = add_remaining_self_loops(
-            edge_index, edge_weight, fill_value, num_nodes)
+            edge_index, edge_weight, fill_value, num_nodes
+        )
 
         row, col = edge_index
         deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
         deg_inv_sqrt = deg.pow(-0.5)
-        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
+        deg_inv_sqrt[deg_inv_sqrt == float("inf")] = 0
 
         return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]
 
@@ -269,14 +286,16 @@ class GeoLayer(MessagePassing):
                 x_j = F.dropout(x_j, p=self.dropout, training=True)
             neighbor = x_j
         elif self.att_type == "gcn":
-            if self.gcn_weight is None or self.gcn_weight.size(0) != x_j.size(0):  # 对于不同的图gcn_weight需要重新计算
+            if self.gcn_weight is None or self.gcn_weight.size(0) != x_j.size(
+                0
+            ):  # 对于不同的图gcn_weight需要重新计算
                 _, norm = self.norm(edge_index, num_nodes, None)
                 self.gcn_weight = norm
             neighbor = self.gcn_weight.view(-1, 1, 1) * x_j
         else:
             # Compute attention coefficients.
             alpha = self.apply_attention(edge_index, num_nodes, x_i, x_j)
-            alpha = softmax(alpha, edge_index[0], num_nodes = num_nodes)
+            alpha = softmax(alpha, edge_index[0], num_nodes=num_nodes)
             # Sample attention coefficients stochastically.
             if self.training and self.dropout > 0:
                 alpha = F.dropout(alpha, p=self.dropout, training=True)
@@ -293,28 +312,30 @@ class GeoLayer(MessagePassing):
             alpha = F.leaky_relu(alpha, self.negative_slope)
 
         elif self.att_type == "gat_sym":
-            wl = self.att[:, :, :self.out_channels]  # weight left
-            wr = self.att[:, :, self.out_channels:]  # weight right
+            wl = self.att[:, :, : self.out_channels]  # weight left
+            wr = self.att[:, :, self.out_channels :]  # weight right
             alpha = (x_i * wl).sum(dim=-1) + (x_j * wr).sum(dim=-1)
             alpha_2 = (x_j * wl).sum(dim=-1) + (x_i * wr).sum(dim=-1)
-            alpha = F.leaky_relu(alpha, self.negative_slope) + F.leaky_relu(alpha_2, self.negative_slope)
+            alpha = F.leaky_relu(alpha, self.negative_slope) + F.leaky_relu(
+                alpha_2, self.negative_slope
+            )
 
         elif self.att_type == "linear":
-            wl = self.att[:, :, :self.out_channels]  # weight left
-            wr = self.att[:, :, self.out_channels:]  # weight right
+            wl = self.att[:, :, : self.out_channels]  # weight left
+            wr = self.att[:, :, self.out_channels :]  # weight right
             al = x_j * wl
             ar = x_j * wr
             alpha = al.sum(dim=-1) + ar.sum(dim=-1)
             alpha = torch.tanh(alpha)
         elif self.att_type == "cos":
-            wl = self.att[:, :, :self.out_channels]  # weight left
-            wr = self.att[:, :, self.out_channels:]  # weight right
+            wl = self.att[:, :, : self.out_channels]  # weight left
+            wr = self.att[:, :, self.out_channels :]  # weight right
             alpha = x_i * wl * x_j * wr
             alpha = alpha.sum(dim=-1)
 
         elif self.att_type == "generalized_linear":
-            wl = self.att[:, :, :self.out_channels]  # weight left
-            wr = self.att[:, :, self.out_channels:]  # weight right
+            wl = self.att[:, :, : self.out_channels]  # weight left
+            wr = self.att[:, :, self.out_channels :]  # weight right
             al = x_i * wl
             ar = x_j * wr
             alpha = al + ar
@@ -335,9 +356,9 @@ class GeoLayer(MessagePassing):
         return aggr_out
 
     def __repr__(self):
-        return '{}({}, {}, heads={})'.format(self.__class__.__name__,
-                                             self.in_channels,
-                                             self.out_channels, self.heads)
+        return "{}({}, {}, heads={})".format(
+            self.__class__.__name__, self.in_channels, self.out_channels, self.heads
+        )
 
     def get_param_dict(self):
         params = {}
@@ -374,6 +395,7 @@ class GeoLayer(MessagePassing):
         if agg_key in params and hasattr(self, "pool_layer"):
             self.pool_layer.load_state_dict(params[agg_key])
 
+
 @register_nas_space("graphnasmacro")
 class GraphNasMacroNodeClassificationSpace(BaseSpace):
     def __init__(
@@ -384,7 +406,7 @@ class GraphNasMacroNodeClassificationSpace(BaseSpace):
         input_dim: _typ.Optional[int] = None,
         output_dim: _typ.Optional[int] = None,
         ops: _typ.Tuple = None,
-        search_act_con=False
+        search_act_con=False,
     ):
         super().__init__()
         self.layer_number = layer_number
@@ -393,7 +415,7 @@ class GraphNasMacroNodeClassificationSpace(BaseSpace):
         self.output_dim = output_dim
         self.ops = ops
         self.dropout = dropout
-        self.search_act_con=search_act_con
+        self.search_act_con = search_act_con
 
     def instantiate(
         self,
@@ -402,7 +424,7 @@ class GraphNasMacroNodeClassificationSpace(BaseSpace):
         input_dim: _typ.Optional[int] = None,
         output_dim: _typ.Optional[int] = None,
         ops: _typ.Tuple = None,
-        dropout = None
+        dropout=None,
     ):
         super().instantiate()
         self.hidden_dim = hidden_dim or self.hidden_dim
@@ -421,32 +443,145 @@ class GraphNasMacroNodeClassificationSpace(BaseSpace):
         # build hidden layer
         for i in range(layer_nums):
             # extract layer information
-            setattr(self,f"attention_{i}",self.setLayerChoice(i * state_num + 0, map_nn(["gat", "gcn", "cos", "const", "gat_sym", 'linear', 'generalized_linear']), key = f"attention_{i}"))
-            setattr(self,f"aggregator_{i}",self.setLayerChoice(i * state_num + 1, map_nn(["sum", "mean", "max", "mlp", ]), key = f"aggregator_{i}"))
-            setattr(self,f"act_{i}",self.setLayerChoice(i * state_num + 0, map_nn(["sigmoid", "tanh", "relu", "linear",
-                                      "softplus", "leaky_relu", "relu6", "elu"]), key=f"act_{i}"))
-            setattr(self,f"head_{i}",self.setLayerChoice(i * state_num + 0, map_nn([1, 2, 4, 6, 8, 16]), key= f"head_{i}"))
+            setattr(
+                self,
+                f"attention_{i}",
+                self.setLayerChoice(
+                    i * state_num + 0,
+                    map_nn(
+                        [
+                            "gat",
+                            "gcn",
+                            "cos",
+                            "const",
+                            "gat_sym",
+                            "linear",
+                            "generalized_linear",
+                        ]
+                    ),
+                    key=f"attention_{i}",
+                ),
+            )
+            setattr(
+                self,
+                f"aggregator_{i}",
+                self.setLayerChoice(
+                    i * state_num + 1,
+                    map_nn(
+                        [
+                            "sum",
+                            "mean",
+                            "max",
+                            "mlp",
+                        ]
+                    ),
+                    key=f"aggregator_{i}",
+                ),
+            )
+            setattr(
+                self,
+                f"act_{i}",
+                self.setLayerChoice(
+                    i * state_num + 0,
+                    map_nn(
+                        [
+                            "sigmoid",
+                            "tanh",
+                            "relu",
+                            "linear",
+                            "softplus",
+                            "leaky_relu",
+                            "relu6",
+                            "elu",
+                        ]
+                    ),
+                    key=f"act_{i}",
+                ),
+            )
+            setattr(
+                self,
+                f"head_{i}",
+                self.setLayerChoice(
+                    i * state_num + 0, map_nn([1, 2, 4, 6, 8, 16]), key=f"head_{i}"
+                ),
+            )
             if i < layer_nums - 1:
-                setattr(self,f"out_channels_{i}",self.setLayerChoice(i * state_num + 0, map_nn([4, 8, 16, 32, 64, 128, 256]), key=f"out_channels_{i}"))
+                setattr(
+                    self,
+                    f"out_channels_{i}",
+                    self.setLayerChoice(
+                        i * state_num + 0,
+                        map_nn([4, 8, 16, 32, 64, 128, 256]),
+                        key=f"out_channels_{i}",
+                    ),
+                )
 
     def parse_model(self, selection, device) -> BaseModel:
         sel_list = []
         for i in range(self.layer_number):
-            sel_list.append(["gat", "gcn", "cos", "const", "gat_sym", 'linear', 'generalized_linear'][selection[f"attention_{i}"]])
-            sel_list.append(["sum", "mean", "max", "mlp", ][selection[f"aggregator_{i}"]])
-            sel_list.append(["sigmoid", "tanh", "relu", "linear","softplus", "leaky_relu", "relu6", "elu"][selection[f"act_{i}"]])
+            sel_list.append(
+                [
+                    "gat",
+                    "gcn",
+                    "cos",
+                    "const",
+                    "gat_sym",
+                    "linear",
+                    "generalized_linear",
+                ][selection[f"attention_{i}"]]
+            )
+            sel_list.append(
+                [
+                    "sum",
+                    "mean",
+                    "max",
+                    "mlp",
+                ][selection[f"aggregator_{i}"]]
+            )
+            sel_list.append(
+                [
+                    "sigmoid",
+                    "tanh",
+                    "relu",
+                    "linear",
+                    "softplus",
+                    "leaky_relu",
+                    "relu6",
+                    "elu",
+                ][selection[f"act_{i}"]]
+            )
             sel_list.append([1, 2, 4, 6, 8, 16][selection[f"head_{i}"]])
             if i < self.layer_number - 1:
-                sel_list.append([4, 8, 16, 32, 64, 128, 256][selection[f"out_channels_{i}"]])
+                sel_list.append(
+                    [4, 8, 16, 32, 64, 128, 256][selection[f"out_channels_{i}"]]
+                )
         sel_list.append(self.output_dim)
-        #sel_list = ['const', 'sum', 'relu6', 2, 128, 'gat', 'sum', 'linear', 2, 7]
-        model = GraphNet(sel_list, self.input_dim, self.output_dim, self.dropout, multi_label=False, batch_normal=False, layers = self.layer_number).wrap(device)
+        # sel_list = ['const', 'sum', 'relu6', 2, 128, 'gat', 'sum', 'linear', 2, 7]
+        model = GraphNet(
+            sel_list,
+            self.input_dim,
+            self.output_dim,
+            self.dropout,
+            multi_label=False,
+            batch_normal=False,
+            layers=self.layer_number,
+        ).wrap(device)
         return model
 
-class GraphNet(BaseSpace):
 
-    def __init__(self, actions, num_feat, num_label, drop_out=0.6, multi_label=False, batch_normal=True, state_num=5,
-                 residual=False, layers = 2):
+class GraphNet(BaseSpace):
+    def __init__(
+        self,
+        actions,
+        num_feat,
+        num_label,
+        drop_out=0.6,
+        multi_label=False,
+        batch_normal=True,
+        state_num=5,
+        residual=False,
+        layers=2,
+    ):
         self.residual = residual
         self.batch_normal = batch_normal
         self.layer_nums = layers
@@ -456,11 +591,15 @@ class GraphNet(BaseSpace):
         self.input_dim = num_feat
         self.output_dim = num_label
         self.dropout = drop_out
-        
+
         super().__init__()
-        self.build_model(actions, batch_normal, drop_out, num_feat, num_label, state_num)
+        self.build_model(
+            actions, batch_normal, drop_out, num_feat, num_label, state_num
+        )
 
-    def build_model(self, actions, batch_normal, drop_out, num_feat, num_label, state_num):
+    def build_model(
+        self, actions, batch_normal, drop_out, num_feat, num_label, state_num
+    ):
         if self.residual:
             self.fcs = torch.nn.ModuleList()
         if self.batch_normal:
@@ -468,9 +607,26 @@ class GraphNet(BaseSpace):
         self.layers = torch.nn.ModuleList()
         self.acts = []
         self.gates = torch.nn.ModuleList()
-        self.build_hidden_layers(actions, batch_normal, drop_out, self.layer_nums, num_feat, num_label, state_num)
-
-    def build_hidden_layers(self, actions, batch_normal, drop_out, layer_nums, num_feat, num_label, state_num=6):
+        self.build_hidden_layers(
+            actions,
+            batch_normal,
+            drop_out,
+            self.layer_nums,
+            num_feat,
+            num_label,
+            state_num,
+        )
+
+    def build_hidden_layers(
+        self,
+        actions,
+        batch_normal,
+        drop_out,
+        layer_nums,
+        num_feat,
+        num_label,
+        state_num=6,
+    ):
 
         # build hidden layer
         for i in range(layer_nums):
@@ -492,17 +648,27 @@ class GraphNet(BaseSpace):
             if self.batch_normal:
                 self.bns.append(torch.nn.BatchNorm1d(in_channels, momentum=0.5))
             self.layers.append(
-                GeoLayer(in_channels, out_channels, head_num, concat, dropout=self.dropout,
-                         att_type=attention_type, agg_type=aggregator_type, ))
+                GeoLayer(
+                    in_channels,
+                    out_channels,
+                    head_num,
+                    concat,
+                    dropout=self.dropout,
+                    att_type=attention_type,
+                    agg_type=aggregator_type,
+                )
+            )
             self.acts.append(act_map(act))
             if self.residual:
                 if concat:
-                    self.fcs.append(torch.nn.Linear(in_channels, out_channels * head_num))
+                    self.fcs.append(
+                        torch.nn.Linear(in_channels, out_channels * head_num)
+                    )
                 else:
                     self.fcs.append(torch.nn.Linear(in_channels, out_channels))
 
     def forward(self, data):
-        output, edge_index_all = data.x, data.edge_index # x [2708,1433] ,[2, 10556]
+        output, edge_index_all = data.x, data.edge_index  # x [2708,1433] ,[2, 10556]
         if self.residual:
             for i, (act, layer, fc) in enumerate(zip(self.acts, self.layers, self.fcs)):
                 output = F.dropout(output, p=self.dropout, training=self.training)
diff --git a/autogl/module/nas/space/operation.py b/autogl/module/nas/space/operation.py
index e642928..1690305 100644
--- a/autogl/module/nas/space/operation.py
+++ b/autogl/module/nas/space/operation.py
@@ -14,6 +14,7 @@ import torch
 from torch import nn
 import torch.nn.functional as F
 
+
 class LinearConv(nn.Module):
     def __init__(self, in_channels, out_channels, bias=True):
         super(LinearConv, self).__init__()
@@ -26,24 +27,28 @@ class LinearConv(nn.Module):
         return self.linear(x)
 
     def __repr__(self):
-        return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
-                                   self.out_channels)
+        return "{}({}, {})".format(
+            self.__class__.__name__, self.in_channels, self.out_channels
+        )
 
-class ZeroConv(nn.Module):
 
+class ZeroConv(nn.Module):
     def forward(self, x, edge_index, edge_weight=None):
         out = torch.zeros_like(x)
         out.requires_grad = True
         return out
 
     def __repr__(self):
-        return 'ZeroConv()'
+        return "ZeroConv()"
+
 
 class Identity(nn.Module):
     def forward(self, x, edge_index, edge_weight=None):
         return x
+
     def __repr__(self):
-        return 'Identity()'
+        return "Identity()"
+
 
 def act_map(act):
     if act == "linear":
@@ -65,15 +70,16 @@ def act_map(act):
     else:
         raise Exception("wrong activate function")
 
+
 def gnn_map(gnn_name, in_dim, out_dim, concat=False, bias=True) -> nn.Module:
-    '''
+    """
 
     :param gnn_name:
     :param in_dim:
     :param out_dim:
     :param concat: for gat, concat multi-head output or not
     :return: GNN model
-    '''
+    """
     if gnn_name == "gat_8":
         return GATConv(in_dim, out_dim, 8, concat=concat, bias=bias)
     elif gnn_name == "gat_6":
@@ -100,12 +106,21 @@ def gnn_map(gnn_name, in_dim, out_dim, concat=False, bias=True) -> nn.Module:
         return LinearConv(in_dim, out_dim, bias=bias)
     elif gnn_name == "zero":
         return ZeroConv()
-    elif gnn_name == 'identity':
+    elif gnn_name == "identity":
         return Identity()
     elif hasattr(torch_geometric.nn, gnn_name):
         cls = getattr(torch_geometric.nn, gnn_name)
         assert isinstance(cls, type), "Only support modules, get %s" % (gnn_name)
-        kwargs = {'in_channels': in_dim, 'out_channels': out_dim, 'concat': concat, 'bias': bias}
-        kwargs = {key: kwargs[key] for key in cls.__init__.__code__.co_varnames if key in kwargs}
+        kwargs = {
+            "in_channels": in_dim,
+            "out_channels": out_dim,
+            "concat": concat,
+            "bias": bias,
+        }
+        kwargs = {
+            key: kwargs[key]
+            for key in cls.__init__.__code__.co_varnames
+            if key in kwargs
+        }
         return cls(**kwargs)
     raise KeyError("Cannot parse key %s" % (gnn_name))
diff --git a/autogl/module/nas/space/single_path.py b/autogl/module/nas/space/single_path.py
index 3475b42..eadff77 100644
--- a/autogl/module/nas/space/single_path.py
+++ b/autogl/module/nas/space/single_path.py
@@ -12,6 +12,7 @@ from ....utils import get_logger
 
 from ...model import AutoGCN
 
+
 @register_nas_space("singlepath")
 class SinglePathNodeClassificationSpace(BaseSpace):
     def __init__(
@@ -21,7 +22,7 @@ class SinglePathNodeClassificationSpace(BaseSpace):
         dropout: _typ.Optional[float] = 0.2,
         input_dim: _typ.Optional[int] = None,
         output_dim: _typ.Optional[int] = None,
-        ops: _typ.Tuple = ['GCNConv', 'GATConv'],
+        ops: _typ.Tuple = ["GCNConv", "GATConv"],
     ):
         super().__init__()
         self.layer_number = layer_number
@@ -38,7 +39,7 @@ class SinglePathNodeClassificationSpace(BaseSpace):
         input_dim: _typ.Optional[int] = None,
         output_dim: _typ.Optional[int] = None,
         ops: _typ.Tuple = None,
-        dropout = None
+        dropout=None,
     ):
         super().instantiate()
         self.hidden_dim = hidden_dim or self.hidden_dim
@@ -59,8 +60,15 @@ class SinglePathNodeClassificationSpace(BaseSpace):
                             self.output_dim
                             if layer == self.layer_number - 1
                             else self.hidden_dim,
-                        ) if isinstance(op, type) else gnn_map(op, self.input_dim if layer == 0 else self.hidden_dim,
-                            self.output_dim if layer == self.layer_number - 1 else self.hidden_dim)
+                        )
+                        if isinstance(op, type)
+                        else gnn_map(
+                            op,
+                            self.input_dim if layer == 0 else self.hidden_dim,
+                            self.output_dim
+                            if layer == self.layer_number - 1
+                            else self.hidden_dim,
+                        )
                         for op in self.ops
                     ],
                 ),
@@ -73,9 +81,9 @@ class SinglePathNodeClassificationSpace(BaseSpace):
             x = getattr(self, f"op_{layer}")(x, edges)
             if layer != self.layer_number - 1:
                 x = F.leaky_relu(x)
-                x = F.dropout(x, p=self.dropout, training = self.training)
+                x = F.dropout(x, p=self.dropout, training=self.training)
         return F.log_softmax(x, dim=1)
 
     def parse_model(self, selection, device) -> BaseModel:
-        #return AutoGCN(self.input_dim, self.output_dim, device)
+        # return AutoGCN(self.input_dim, self.output_dim, device)
         return self.wrap(device).fix(selection)
diff --git a/autogl/module/nas/utils.py b/autogl/module/nas/utils.py
index 2504cfc..9ca8a05 100644
--- a/autogl/module/nas/utils.py
+++ b/autogl/module/nas/utils.py
@@ -123,22 +123,26 @@ class AverageMeter:
         fmtstr = "{name}: {avg" + self.fmt + "}"
         return fmtstr.format(**self.__dict__)
 
+
 def get_module_order(root_module):
     key2order = {}
+
     def apply(m):
         for name, child in m.named_children():
             if isinstance(child, Mutable):
-                key2order[child.key] = child.order 
+                key2order[child.key] = child.order
             else:
                 apply(child)
 
     apply(root_module)
     return key2order
 
+
 def sort_replaced_module(k2o, modules):
-    modules = sorted(modules, key = lambda x:k2o[x[0]])
+    modules = sorted(modules, key=lambda x: k2o[x[0]])
     return modules
 
+
 def _replace_module_with_type(root_module, init_fn, type_name, modules):
     if modules is None:
         modules = []
diff --git a/autogl/module/train/evaluation.py b/autogl/module/train/evaluation.py
index d2a390f..595df12 100644
--- a/autogl/module/train/evaluation.py
+++ b/autogl/module/train/evaluation.py
@@ -28,6 +28,7 @@ class Evaluation:
 
 class EvaluatorUtility:
     """ Auxiliary utilities for evaluation """
+
     class PredictionBatchCumulativeBuilder:
         """
         Batch-cumulative builder for prediction
@@ -37,22 +38,22 @@ class EvaluatorUtility:
         a batch-cumulative prediction collector `PredictionBatchCumulativeBuilder`
         is implemented for prediction in mini-batch manner.
         """
+
         def __init__(self):
             self.__indexes_in_integral_data: _typing.Optional[np.ndarray] = None
             self.__prediction: _typing.Optional[np.ndarray] = None
 
         def clear_batches(
-                self, *__args, **__kwargs
-        ) -> 'EvaluatorUtility.PredictionBatchCumulativeBuilder':
+            self, *__args, **__kwargs
+        ) -> "EvaluatorUtility.PredictionBatchCumulativeBuilder":
             self.__indexes_in_integral_data = None
             self.__prediction = None
             return self
 
         def add_batch(
-                self, indexes_in_integral_data: np.ndarray,
-                batch_prediction: np.ndarray
-        ) -> 'EvaluatorUtility.PredictionBatchCumulativeBuilder':
-            if not(
+            self, indexes_in_integral_data: np.ndarray, batch_prediction: np.ndarray
+        ) -> "EvaluatorUtility.PredictionBatchCumulativeBuilder":
+            if not (
                 isinstance(indexes_in_integral_data, np.ndarray)
                 and isinstance(batch_prediction, np.ndarray)
                 and len(indexes_in_integral_data.shape) == 1
@@ -62,33 +63,50 @@ class EvaluatorUtility:
                 raise ValueError
 
             if self.__indexes_in_integral_data is None:
-                if indexes_in_integral_data.shape != np.unique(indexes_in_integral_data).shape:
+                if (
+                    indexes_in_integral_data.shape
+                    != np.unique(indexes_in_integral_data).shape
+                ):
                     raise ValueError(
                         f"There exists duplicate index "
                         f"in the argument indexes_in_integral_data {indexes_in_integral_data}"
                     )
                 else:
-                    self.__indexes_in_integral_data: np.ndarray = np.unique(indexes_in_integral_data)
+                    self.__indexes_in_integral_data: np.ndarray = np.unique(
+                        indexes_in_integral_data
+                    )
             else:
                 __indexes_in_integral_data = np.concatenate(
                     (self.__indexes_in_integral_data, indexes_in_integral_data)
                 )
-                if __indexes_in_integral_data.shape != np.unique(__indexes_in_integral_data).shape:
+                if (
+                    __indexes_in_integral_data.shape
+                    != np.unique(__indexes_in_integral_data).shape
+                ):
                     raise ValueError
                 else:
-                    self.__indexes_in_integral_data: np.ndarray = __indexes_in_integral_data
+                    self.__indexes_in_integral_data: np.ndarray = (
+                        __indexes_in_integral_data
+                    )
 
             if self.__prediction is None:
                 self.__prediction: np.ndarray = batch_prediction
             else:
-                self.__prediction: np.ndarray = np.concatenate((self.__prediction, batch_prediction))
+                self.__prediction: np.ndarray = np.concatenate(
+                    (self.__prediction, batch_prediction)
+                )
 
             return self
 
-        def compose(self, __sorted: bool = True, **__kwargs) -> _typing.Tuple[np.ndarray, np.ndarray]:
+        def compose(
+            self, __sorted: bool = True, **__kwargs
+        ) -> _typing.Tuple[np.ndarray, np.ndarray]:
             if __sorted:
                 sorted_index = np.argsort(self.__indexes_in_integral_data)
-                return self.__indexes_in_integral_data[sorted_index], self.__prediction[sorted_index]
+                return (
+                    self.__indexes_in_integral_data[sorted_index],
+                    self.__prediction[sorted_index],
+                )
             else:
                 return self.__indexes_in_integral_data, self.__prediction
 
@@ -260,4 +278,4 @@ class MicroF1(Evaluation):
 
     @staticmethod
     def evaluate(predict, label) -> float:
-        return f1_score(label, np.argmax(predict, axis=1), average='micro')
+        return f1_score(label, np.argmax(predict, axis=1), average="micro")
diff --git a/autogl/module/train/graph_classification_full.py b/autogl/module/train/graph_classification_full.py
index 7504da8..8b12a05 100644
--- a/autogl/module/train/graph_classification_full.py
+++ b/autogl/module/train/graph_classification_full.py
@@ -399,14 +399,17 @@ class GraphClassificationFullTrainer(BaseGraphClassificationTrainer):
 
     def __repr__(self) -> str:
         import yaml
-        return yaml.dump({
-            "trainer_name": self.__class__.__name__,
-            "optimizer": self.optimizer,
-            "learning_rate": self.lr,
-            "max_epoch": self.max_epoch,
-            "early_stopping_round": self.early_stopping_round,
-            "model": repr(self.model)
-        })
+
+        return yaml.dump(
+            {
+                "trainer_name": self.__class__.__name__,
+                "optimizer": self.optimizer,
+                "learning_rate": self.lr,
+                "max_epoch": self.max_epoch,
+                "early_stopping_round": self.early_stopping_round,
+                "model": repr(self.model),
+            }
+        )
 
     def evaluate(self, dataset, mask="val", feval=None):
         """
diff --git a/autogl/module/train/node_classification_full.py b/autogl/module/train/node_classification_full.py
index 65c1f6f..e9ec45d 100644
--- a/autogl/module/train/node_classification_full.py
+++ b/autogl/module/train/node_classification_full.py
@@ -57,7 +57,7 @@ class NodeClassificationFullTrainer(BaseNodeClassificationTrainer):
 
     def __init__(
         self,
-        model: Union[BaseModel, str]=None,
+        model: Union[BaseModel, str] = None,
         num_features=None,
         num_classes=None,
         optimizer=None,
@@ -375,6 +375,7 @@ class NodeClassificationFullTrainer(BaseNodeClassificationTrainer):
 
     def __repr__(self) -> str:
         import yaml
+
         return yaml.dump(
             {
                 "trainer_name": self.__class__.__name__,
@@ -382,7 +383,7 @@ class NodeClassificationFullTrainer(BaseNodeClassificationTrainer):
                 "learning_rate": self.lr,
                 "max_epoch": self.max_epoch,
                 "early_stopping_round": self.early_stopping_round,
-                "model": repr(self.model)
+                "model": repr(self.model),
             }
         )
 
diff --git a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
index 415e9bd..110a6c4 100644
--- a/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
+++ b/autogl/module/train/node_classification_trainer/node_classification_sampled_trainer.py
@@ -13,7 +13,7 @@ from ..sampling.sampler.target_dependant_sampler import TargetDependantSampledDa
 from ..sampling.sampler.neighbor_sampler import NeighborSampler
 from ..sampling.sampler.graphsaint_sampler import *
 from ..sampling.sampler.layer_dependent_importance_sampler import (
-    LayerDependentImportanceSampler
+    LayerDependentImportanceSampler,
 )
 from ...model import BaseModel
 from ...model.base import ClassificationSupportedSequentialModel
@@ -28,14 +28,18 @@ class _DeterministicNeighborSamplerStore:
         ] = []
 
     @classmethod
-    def __is_target_node_indexes_equal(cls, a: torch.LongTensor, b: torch.LongTensor) -> bool:
+    def __is_target_node_indexes_equal(
+        cls, a: torch.LongTensor, b: torch.LongTensor
+    ) -> bool:
         if not a.dtype == b.dtype == torch.int64:
             return False
         if a.size() != b.size():
             return False
         return torch.where(a != b)[0].size(0) == 0
 
-    def __setitem__(self, target_nodes: torch.Tensor, neighbor_sampler: NeighborSampler):
+    def __setitem__(
+        self, target_nodes: torch.Tensor, neighbor_sampler: NeighborSampler
+    ):
         target_nodes: _typing.Any = target_nodes.cpu()
         if type(target_nodes) != torch.Tensor or target_nodes.dtype != torch.int64:
             raise TypeError
@@ -43,18 +47,25 @@ class _DeterministicNeighborSamplerStore:
             raise TypeError
         for i in range(len(self.__neighbor_sampler_mapping)):
             if self.__is_target_node_indexes_equal(
-                    target_nodes, self.__neighbor_sampler_mapping[i][0]
+                target_nodes, self.__neighbor_sampler_mapping[i][0]
             ):
                 self.__neighbor_sampler_mapping[i] = (target_nodes, neighbor_sampler)
                 return
         self.__neighbor_sampler_mapping.append((target_nodes, neighbor_sampler))
 
-    def __getitem__(self, target_nodes: torch.Tensor) -> _typing.Optional[NeighborSampler]:
+    def __getitem__(
+        self, target_nodes: torch.Tensor
+    ) -> _typing.Optional[NeighborSampler]:
         target_nodes: _typing.Any = target_nodes.cpu()
         if type(target_nodes) != torch.Tensor or target_nodes.dtype != torch.int64:
             raise TypeError
-        for __current_target_nodes, __neighbor_sampler in self.__neighbor_sampler_mapping:
-            if self.__is_target_node_indexes_equal(target_nodes, __current_target_nodes):
+        for (
+            __current_target_nodes,
+            __neighbor_sampler,
+        ) in self.__neighbor_sampler_mapping:
+            if self.__is_target_node_indexes_equal(
+                target_nodes, __current_target_nodes
+            ):
                 return __neighbor_sampler
         return None
 
@@ -89,24 +100,25 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
     feval: ``str``.
         The evaluation method adopted in this function.
     """
+
     def __init__(
-            self,
-            model: _typing.Union[BaseModel, str],
-            num_features: int,
-            num_classes: int,
-            optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None] = ...,
-            lr: float = 1e-4,
-            max_epoch: int = 100,
-            early_stopping_round: int = 100,
-            weight_decay: float = 1e-4,
-            device: _typing.Optional[torch.device] = None,
-            init: bool = True,
-            feval: _typing.Union[
-                _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
-            ] = (MicroF1,),
-            loss: str = "nll_loss",
-            lr_scheduler_type: _typing.Optional[str] = None,
-            **kwargs,
+        self,
+        model: _typing.Union[BaseModel, str],
+        num_features: int,
+        num_classes: int,
+        optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None] = ...,
+        lr: float = 1e-4,
+        max_epoch: int = 100,
+        early_stopping_round: int = 100,
+        weight_decay: float = 1e-4,
+        device: _typing.Optional[torch.device] = None,
+        init: bool = True,
+        feval: _typing.Union[
+            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = (MicroF1,),
+        loss: str = "nll_loss",
+        lr_scheduler_type: _typing.Optional[str] = None,
+        **kwargs,
     ):
         if isinstance(optimizer, type) and issubclass(optimizer, torch.optim.Optimizer):
             self._optimizer_class: _typing.Type[torch.optim.Optimizer] = optimizer
@@ -137,10 +149,12 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         self._weight_decay: float = weight_decay if weight_decay > 0 else 1e-4
         self._early_stopping = EarlyStopping(
             patience=early_stopping_round if early_stopping_round > 0 else 1e2,
-            verbose=False
+            verbose=False,
         )
         """ Assign an empty initial hyper parameter space """
-        self._hyper_parameter_space: _typing.Sequence[_typing.Dict[str, _typing.Any]] = []
+        self._hyper_parameter_space: _typing.Sequence[
+            _typing.Dict[str, _typing.Any]
+        ] = []
 
         self._valid_result: torch.Tensor = torch.zeros(0)
         self._valid_result_prob: torch.Tensor = torch.zeros(0)
@@ -200,13 +214,13 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         # self.__training_sampler_num_workers: int = kwargs.get(
         #     "training_sampler_num_workers", _cpu_count()
         # )
-        
+
         # if not 0 <= self.__training_sampler_num_workers <= _cpu_count():
         #     self.__training_sampler_num_workers: int = _cpu_count()
 
         # force to be 0 to be compactible with current pyg solution.
         self.__training_sampler_num_workers: int = 0
-        
+
         super(NodeClassificationGraphSAINTTrainer, self).__init__(
             model, num_features, num_classes, device, init, feval, loss
         )
@@ -238,7 +252,7 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         optimizer: torch.optim.Optimizer = self._optimizer_class(
             self.model.model.parameters(),
             lr=self._learning_rate,
-            weight_decay=self._weight_decay
+            weight_decay=self._weight_decay,
         )
         if type(self._lr_scheduler_type) == str:
             if self._lr_scheduler_type.lower() == "step" + "lr":
@@ -269,42 +283,51 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
             )
 
         setattr(
-            integral_data, "edge_weight",
-            self.__compute_normalized_edge_weight(getattr(integral_data, "edge_index"))
+            integral_data,
+            "edge_weight",
+            self.__compute_normalized_edge_weight(getattr(integral_data, "edge_index")),
         )
         " Generate Sampler "
         if self.__sampler_type.lower() == "edge":
             _sampler: torch_geometric.data.GraphSAINTEdgeSampler = (
                 GraphSAINTSamplerFactory.create_edge_sampler(
-                    integral_data, self.__num_graphs_per_epoch, self.__sampled_budget,
+                    integral_data,
+                    self.__num_graphs_per_epoch,
+                    self.__sampled_budget,
                     self.__sample_coverage_factor,
-                    num_workers=self.__training_sampler_num_workers
+                    num_workers=self.__training_sampler_num_workers,
                 )
             )
         elif self.__sampler_type.lower() == "node":
             _sampler: torch_geometric.data.GraphSAINTNodeSampler = (
                 GraphSAINTSamplerFactory.create_node_sampler(
-                    integral_data, self.__num_graphs_per_epoch, self.__sampled_budget,
+                    integral_data,
+                    self.__num_graphs_per_epoch,
+                    self.__sampled_budget,
                     self.__sample_coverage_factor,
-                    num_workers=self.__training_sampler_num_workers
+                    num_workers=self.__training_sampler_num_workers,
                 )
             )
         elif self.__sampler_type.lower() == "rw":
             _sampler: torch_geometric.data.GraphSAINTRandomWalkSampler = (
                 GraphSAINTSamplerFactory.create_random_walk_sampler(
-                    integral_data, self.__num_graphs_per_epoch,
-                    self.__sampled_budget, self.__walk_length,
+                    integral_data,
+                    self.__num_graphs_per_epoch,
+                    self.__sampled_budget,
+                    self.__walk_length,
                     self.__sample_coverage_factor,
-                    num_workers=self.__training_sampler_num_workers
+                    num_workers=self.__training_sampler_num_workers,
                 )
             )
         else:
             _sampler: torch_geometric.data.GraphSAINTRandomWalkSampler = (
                 GraphSAINTSamplerFactory.create_random_walk_sampler(
-                    integral_data, self.__num_graphs_per_epoch,
-                    self.__sampled_budget, self.__walk_length,
+                    integral_data,
+                    self.__num_graphs_per_epoch,
+                    self.__sampled_budget,
+                    self.__walk_length,
                     self.__sample_coverage_factor,
-                    num_workers=self.__training_sampler_num_workers
+                    num_workers=self.__training_sampler_num_workers,
                 )
             )
 
@@ -315,31 +338,35 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
             for sampled_data in _sampler:
                 sampled_data = sampled_data.to(self.device)
                 setattr(
-                    sampled_data, "edge_weight",
-                    getattr(sampled_data, "edge_norm") * getattr(sampled_data, "edge_weight")
+                    sampled_data,
+                    "edge_weight",
+                    getattr(sampled_data, "edge_norm")
+                    * getattr(sampled_data, "edge_weight"),
                 )
                 optimizer.zero_grad()
                 if isinstance(self.model.model, ClassificationSupportedSequentialModel):
-                    prediction: torch.Tensor = self.model.model.cls_forward(sampled_data)
+                    prediction: torch.Tensor = self.model.model.cls_forward(
+                        sampled_data
+                    )
                 else:
                     prediction: torch.Tensor = self.model.model(sampled_data)
                 if not hasattr(torch.nn.functional, self.loss):
-                    raise TypeError(
-                        f"PyTorch does not support loss type {self.loss}"
-                    )
+                    raise TypeError(f"PyTorch does not support loss type {self.loss}")
                 loss_function = getattr(torch.nn.functional, self.loss)
                 loss_value: torch.Tensor = loss_function(
-                    prediction, getattr(sampled_data, "y"), reduction='none'
+                    prediction, getattr(sampled_data, "y"), reduction="none"
                 )
-                loss_value = (loss_value * getattr(sampled_data, "node_norm"))[sampled_data.train_mask].sum()
+                loss_value = (loss_value * getattr(sampled_data, "node_norm"))[
+                    sampled_data.train_mask
+                ].sum()
                 loss_value.backward()
                 optimizer.step()
 
             lr_scheduler.step()
             if (
-                    hasattr(integral_data, "val_mask") and
-                    getattr(integral_data, "val_mask") is not None and
-                    type(getattr(integral_data, "val_mask")) == torch.Tensor
+                hasattr(integral_data, "val_mask")
+                and getattr(integral_data, "val_mask") is not None
+                and type(getattr(integral_data, "val_mask")) == torch.Tensor
             ):
                 validation_results: _typing.Sequence[float] = self.evaluate(
                     (integral_data,), "val", [self.feval[0]]
@@ -353,17 +380,16 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
                     LOGGER.debug("Early stopping at %d", current_epoch)
                     break
         if (
-                hasattr(integral_data, "val_mask") and
-                getattr(integral_data, "val_mask") is not None and
-                type(getattr(integral_data, "val_mask")) == torch.Tensor
+            hasattr(integral_data, "val_mask")
+            and getattr(integral_data, "val_mask") is not None
+            and type(getattr(integral_data, "val_mask")) == torch.Tensor
         ):
             self._early_stopping.load_checkpoint(self.model.model)
 
     def __predict_only(
-            self, integral_data,
-            mask_or_target_nodes_indexes: _typing.Union[
-                torch.BoolTensor, torch.LongTensor
-            ]
+        self,
+        integral_data,
+        mask_or_target_nodes_indexes: _typing.Union[torch.BoolTensor, torch.LongTensor],
     ) -> torch.Tensor:
         """
         The function of predicting on the given data.
@@ -372,11 +398,13 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         :return: the result of prediction on the given dataset
         """
         import copy
+
         integral_data = copy.copy(integral_data)
         self.model.model.eval()
         setattr(
-            integral_data, "edge_weight",
-            self.__compute_normalized_edge_weight(getattr(integral_data, "edge_index"))
+            integral_data,
+            "edge_weight",
+            self.__compute_normalized_edge_weight(getattr(integral_data, "edge_index")),
         )
         integral_data = integral_data.to(self.device)
         with torch.no_grad():
@@ -387,8 +415,7 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         return prediction[mask_or_target_nodes_indexes]
 
     def predict_proba(
-            self, dataset, mask: _typing.Optional[str] = None,
-            in_log_format: bool = False
+        self, dataset, mask: _typing.Optional[str] = None, in_log_format: bool = False
     ):
         """
         The function of predicting the probability on the given dataset.
@@ -443,12 +470,12 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         return self.predict_proba(dataset, mask, in_log_format=True).max(1)[1]
 
     def evaluate(
-            self,
-            dataset,
-            mask: _typing.Optional[str] = None,
-            feval: _typing.Union[
-                None, _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
-            ] = None,
+        self,
+        dataset,
+        mask: _typing.Optional[str] = None,
+        feval: _typing.Union[
+            None, _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = None,
     ) -> _typing.Sequence[float]:
         """
         The function of training on the given dataset and keeping valid result.
@@ -492,13 +519,15 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
             f.evaluate(
                 prediction_probability.cpu().numpy(),
                 y_ground_truth.cpu().numpy(),
-            ) for f in _feval
+            )
+            for f in _feval
         ]
 
     @classmethod
     def __compute_normalized_edge_weight(
-            cls, edge_index: torch.LongTensor,
-            original_edge_weight: _typing.Optional[torch.Tensor] = ...
+        cls,
+        edge_index: torch.LongTensor,
+        original_edge_weight: _typing.Optional[torch.Tensor] = ...,
     ) -> torch.Tensor:
         if type(edge_index) != torch.Tensor:
             raise TypeError
@@ -511,10 +540,8 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         elif original_edge_weight.size() != (edge_index.size(1),):
             original_edge_weight = original_edge_weight.resize(edge_index.size(1))
 
-        __out_degree: torch.Tensor = \
-            torch_geometric.utils.degree(edge_index[0])
-        __in_degree: torch.Tensor = \
-            torch_geometric.utils.degree(edge_index[1])
+        __out_degree: torch.Tensor = torch_geometric.utils.degree(edge_index[0])
+        __in_degree: torch.Tensor = torch_geometric.utils.degree(edge_index[1])
         temp_tensor: torch.Tensor = torch.stack(
             [__out_degree[edge_index[0]], __in_degree[edge_index[1]]]
         )
@@ -539,6 +566,7 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         None
         """
         import gc
+
         gc.collect()
         data = dataset[0].to(torch.device("cpu"))
         self.__train_only(data)
@@ -555,10 +583,10 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
         return self._valid_result_prob
 
     def get_valid_score(
-            self, return_major: bool = True
+        self, return_major: bool = True
     ) -> _typing.Union[
         _typing.Tuple[float, bool],
-        _typing.Tuple[_typing.Sequence[float], _typing.Sequence[bool]]
+        _typing.Tuple[_typing.Sequence[float], _typing.Sequence[bool]],
     ]:
         """
         The function of getting the valid score.
@@ -584,7 +612,7 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
 
     @hyper_parameter_space.setter
     def hyper_parameter_space(
-            self, hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
+        self, hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
     ) -> None:
         if not isinstance(hp_space, _typing.Sequence):
             raise TypeError
@@ -592,6 +620,7 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
 
     def __repr__(self) -> str:
         import yaml
+
         __repr: dict = {
             "trainer_name": self.__class__.__name__,
             "learning_rate": self._learning_rate,
@@ -599,16 +628,16 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
             "max_epoch": self._max_epoch,
             "early_stopping_round": self._early_stopping.patience,
             "sampler_type": self.__sampler_type,
-            "sampled_budget": self.__sampled_budget
+            "sampled_budget": self.__sampled_budget,
         }
         if self.__sampler_type == "rw":
             __repr.update({"walk_length": self.__walk_length})
         return yaml.dump(__repr)
 
     def duplicate_from_hyper_parameter(
-            self,
-            hp: _typing.Dict[str, _typing.Any],
-            model: _typing.Optional[BaseModel] = None,
+        self,
+        hp: _typing.Dict[str, _typing.Any],
+        model: _typing.Optional[BaseModel] = None,
     ) -> "NodeClassificationGraphSAINTTrainer":
         """
         The function of duplicating a new instance from the given hyper-parameter.
@@ -646,12 +675,14 @@ class NodeClassificationGraphSAINTTrainer(BaseNodeClassificationTrainer):
             feval=self.feval,
             loss=self.loss,
             lr_scheduler_type=self._lr_scheduler_type,
-            **hp
+            **hp,
         )
 
 
 @register_trainer("NodeClassificationLayerDependentImportanceSamplingTrainer")
-class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassificationTrainer):
+class NodeClassificationLayerDependentImportanceSamplingTrainer(
+    BaseNodeClassificationTrainer
+):
     """
     The node classification trainer utilizing Layer dependent importance sampling technique.
 
@@ -680,24 +711,25 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
     feval: ``str``.
         The evaluation method adopted in this function.
     """
+
     def __init__(
-            self,
-            model: _typing.Union[BaseModel, str],
-            num_features: int,
-            num_classes: int,
-            optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None] = ...,
-            lr: float = 1e-4,
-            max_epoch: int = 100,
-            early_stopping_round: int = 100,
-            weight_decay: float = 1e-4,
-            device: _typing.Optional[torch.device] = None,
-            init: bool = True,
-            feval: _typing.Union[
-                _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
-            ] = (MicroF1,),
-            loss: str = "nll_loss",
-            lr_scheduler_type: _typing.Optional[str] = None,
-            **kwargs,
+        self,
+        model: _typing.Union[BaseModel, str],
+        num_features: int,
+        num_classes: int,
+        optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None] = ...,
+        lr: float = 1e-4,
+        max_epoch: int = 100,
+        early_stopping_round: int = 100,
+        weight_decay: float = 1e-4,
+        device: _typing.Optional[torch.device] = None,
+        init: bool = True,
+        feval: _typing.Union[
+            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = (MicroF1,),
+        loss: str = "nll_loss",
+        lr_scheduler_type: _typing.Optional[str] = None,
+        **kwargs,
     ) -> None:
         if isinstance(optimizer, type) and issubclass(optimizer, torch.optim.Optimizer):
             self._optimizer_class: _typing.Type[torch.optim.Optimizer] = optimizer
@@ -728,17 +760,21 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
         self._weight_decay: float = weight_decay if weight_decay > 0 else 1e-4
         self._early_stopping = EarlyStopping(
             patience=early_stopping_round if early_stopping_round > 0 else 1e2,
-            verbose=False
+            verbose=False,
         )
         """ Assign an empty initial hyper parameter space """
-        self._hyper_parameter_space: _typing.Sequence[_typing.Dict[str, _typing.Any]] = []
+        self._hyper_parameter_space: _typing.Sequence[
+            _typing.Dict[str, _typing.Any]
+        ] = []
 
         self._valid_result: torch.Tensor = torch.zeros(0)
         self._valid_result_prob: torch.Tensor = torch.zeros(0)
         self._valid_score: _typing.Sequence[float] = ()
 
         """ Set hyper parameters """
-        self.__sampled_node_sizes: _typing.Sequence[int] = kwargs.get("sampled_node_sizes")
+        self.__sampled_node_sizes: _typing.Sequence[int] = kwargs.get(
+            "sampled_node_sizes"
+        )
 
         self.__training_batch_size: int = kwargs.get("training_batch_size", 1024)
         if not self.__training_batch_size > 0:
@@ -795,7 +831,7 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
         optimizer: torch.optim.Optimizer = self._optimizer_class(
             self.model.model.parameters(),
             lr=self._learning_rate,
-            weight_decay=self._weight_decay
+            weight_decay=self._weight_decay,
         )
 
         if type(self._lr_scheduler_type) == str:
@@ -828,9 +864,11 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
 
         __layer_dependent_importance_sampler: LayerDependentImportanceSampler = (
             LayerDependentImportanceSampler(
-                integral_data.edge_index, torch.where(integral_data.train_mask)[0].unique(),
-                self.__sampled_node_sizes, batch_size=self.__training_batch_size,
-                num_workers=self.__training_sampler_num_workers
+                integral_data.edge_index,
+                torch.where(integral_data.train_mask)[0].unique(),
+                self.__sampled_node_sizes,
+                batch_size=self.__training_batch_size,
+                num_workers=self.__training_sampler_num_workers,
             )
         )
         for current_epoch in range(self._max_epoch):
@@ -844,7 +882,7 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
                 # 由于现在的Model设计是接受Data的，所以只能组装一个采样的Data作为参数
                 sampled_graph: autogl.data.Data = autogl.data.Data(
                     x=integral_data.x[sampled_data.all_sampled_nodes_indexes],
-                    y=integral_data.y[sampled_data.all_sampled_nodes_indexes]
+                    y=integral_data.y[sampled_data.all_sampled_nodes_indexes],
                 )
                 sampled_graph.to(self.device)
                 sampled_graph.edge_indexes: _typing.Sequence[torch.LongTensor] = [
@@ -856,17 +894,21 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
                     for current_layer in sampled_data.sampled_edges_for_layers
                 ]
                 if isinstance(self.model.model, ClassificationSupportedSequentialModel):
-                    prediction: torch.Tensor = self.model.model.cls_forward(sampled_graph)
+                    prediction: torch.Tensor = self.model.model.cls_forward(
+                        sampled_graph
+                    )
                 else:
                     prediction: torch.Tensor = self.model.model(sampled_graph)
                 if not hasattr(torch.nn.functional, self.loss):
-                    raise TypeError(
-                        f"PyTorch does not support loss type {self.loss}"
-                    )
+                    raise TypeError(f"PyTorch does not support loss type {self.loss}")
                 loss_function = getattr(torch.nn.functional, self.loss)
                 loss_value: torch.Tensor = loss_function(
-                    prediction[sampled_data.target_nodes_indexes.indexes_in_sampled_graph],
-                    sampled_graph.y[sampled_data.target_nodes_indexes.indexes_in_sampled_graph]
+                    prediction[
+                        sampled_data.target_nodes_indexes.indexes_in_sampled_graph
+                    ],
+                    sampled_graph.y[
+                        sampled_data.target_nodes_indexes.indexes_in_sampled_graph
+                    ],
                 )
                 loss_value.backward()
                 optimizer.step()
@@ -875,9 +917,9 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
                 lr_scheduler.step()
 
             if (
-                    hasattr(integral_data, "val_mask") and
-                    getattr(integral_data, "val_mask") is not None and
-                    type(getattr(integral_data, "val_mask")) == torch.Tensor
+                hasattr(integral_data, "val_mask")
+                and getattr(integral_data, "val_mask") is not None
+                and type(getattr(integral_data, "val_mask")) == torch.Tensor
             ):
                 validation_results: _typing.Sequence[float] = self.evaluate(
                     (integral_data,), "val", [self.feval[0]]
@@ -891,17 +933,16 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
                     LOGGER.debug("Early stopping at %d", current_epoch)
                     break
         if (
-                hasattr(integral_data, "val_mask") and
-                getattr(integral_data, "val_mask") is not None and
-                type(getattr(integral_data, "val_mask")) == torch.Tensor
+            hasattr(integral_data, "val_mask")
+            and getattr(integral_data, "val_mask") is not None
+            and type(getattr(integral_data, "val_mask")) == torch.Tensor
         ):
             self._early_stopping.load_checkpoint(self.model.model)
 
     def __predict_only(
-            self, integral_data,
-            mask_or_target_nodes_indexes: _typing.Union[
-                torch.BoolTensor, torch.LongTensor
-            ]
+        self,
+        integral_data,
+        mask_or_target_nodes_indexes: _typing.Union[torch.BoolTensor, torch.LongTensor],
     ) -> torch.Tensor:
         """
         The function of predicting on the given data.
@@ -911,30 +952,41 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
         """
         self.model.model.eval()
         integral_data = integral_data.to(torch.device("cpu"))
-        mask_or_target_nodes_indexes = mask_or_target_nodes_indexes.to(torch.device("cpu"))
+        mask_or_target_nodes_indexes = mask_or_target_nodes_indexes.to(
+            torch.device("cpu")
+        )
         if isinstance(self.model.model, ClassificationSupportedSequentialModel):
-            sequential_gnn_model: ClassificationSupportedSequentialModel = self.model.model
+            sequential_gnn_model: ClassificationSupportedSequentialModel = (
+                self.model.model
+            )
             __num_layers: int = len(self.__sampled_node_sizes)
 
             x: torch.Tensor = getattr(integral_data, "x")
             for _current_layer_index in range(__num_layers - 1):
                 __next_x: _typing.Optional[torch.Tensor] = None
 
-                _optional_neighbor_sampler: _typing.Optional[NeighborSampler] = (
-                    self.__neighbor_sampler_store[torch.arange(x.size(0))]
-                )
+                _optional_neighbor_sampler: _typing.Optional[
+                    NeighborSampler
+                ] = self.__neighbor_sampler_store[torch.arange(x.size(0))]
                 if (
-                        _optional_neighbor_sampler is not None and
-                        type(_optional_neighbor_sampler) == NeighborSampler
+                    _optional_neighbor_sampler is not None
+                    and type(_optional_neighbor_sampler) == NeighborSampler
                 ):
-                    current_neighbor_sampler: NeighborSampler = _optional_neighbor_sampler
+                    current_neighbor_sampler: NeighborSampler = (
+                        _optional_neighbor_sampler
+                    )
                 else:
                     current_neighbor_sampler: NeighborSampler = NeighborSampler(
-                        integral_data.edge_index, torch.arange(x.size(0)).unique(),
-                        [-1], batch_size=self.__predicting_batch_size,
-                        num_workers=self.__predicting_sampler_num_workers, shuffle=False
+                        integral_data.edge_index,
+                        torch.arange(x.size(0)).unique(),
+                        [-1],
+                        batch_size=self.__predicting_batch_size,
+                        num_workers=self.__predicting_sampler_num_workers,
+                        shuffle=False,
                     )
-                    self.__neighbor_sampler_store[torch.arange(x.size(0))] = current_neighbor_sampler
+                    self.__neighbor_sampler_store[
+                        torch.arange(x.size(0))
+                    ] = current_neighbor_sampler
 
                 for _target_dependant_sampled_data in current_neighbor_sampler:
                     _target_dependant_sampled_data: TargetDependantSampledData = (
@@ -943,50 +995,63 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
                     _sampled_graph: autogl.data.Data = autogl.data.Data(
                         x=x[_target_dependant_sampled_data.all_sampled_nodes_indexes],
                         edge_index=(
-                            _target_dependant_sampled_data.sampled_edges_for_layers[0].edge_index_for_sampled_graph
-                        )
+                            _target_dependant_sampled_data.sampled_edges_for_layers[
+                                0
+                            ].edge_index_for_sampled_graph
+                        ),
                     )
                     _sampled_graph.edge_weight: torch.Tensor = (
-                        _target_dependant_sampled_data.sampled_edges_for_layers[0].edge_weight
+                        _target_dependant_sampled_data.sampled_edges_for_layers[
+                            0
+                        ].edge_weight
                     )
                     _sampled_graph: autogl.data.Data = _sampled_graph.to(self.device)
 
                     with torch.no_grad():
                         __sampled_graph_inferences: torch.Tensor = (
-                            sequential_gnn_model.sequential_encoding_layers[_current_layer_index](_sampled_graph)
+                            sequential_gnn_model.sequential_encoding_layers[
+                                _current_layer_index
+                            ](_sampled_graph)
                         )
                         _sampled_target_nodes_inferences: torch.Tensor = __sampled_graph_inferences[
                             _target_dependant_sampled_data.target_nodes_indexes.indexes_in_sampled_graph
                         ].cpu()
                     if __next_x is None:
-                        __next_x: torch.Tensor = torch.zeros(x.size(0), __sampled_graph_inferences.size(1))
-                    __next_x[_target_dependant_sampled_data.target_nodes_indexes.indexes_in_integral_graph] = (
-                        _sampled_target_nodes_inferences
-                    )
+                        __next_x: torch.Tensor = torch.zeros(
+                            x.size(0), __sampled_graph_inferences.size(1)
+                        )
+                    __next_x[
+                        _target_dependant_sampled_data.target_nodes_indexes.indexes_in_integral_graph
+                    ] = _sampled_target_nodes_inferences
                 x: torch.Tensor = __next_x
             " The following procedures are for the top layer "
             if mask_or_target_nodes_indexes.dtype == torch.bool:
-                target_nodes_indexes: _typing.Any = (
-                    torch.where(mask_or_target_nodes_indexes)[0]
-                )
+                target_nodes_indexes: _typing.Any = torch.where(
+                    mask_or_target_nodes_indexes
+                )[0]
             else:
                 target_nodes_indexes: _typing.Any = mask_or_target_nodes_indexes.long()
 
-            _optional_neighbor_sampler: _typing.Optional[NeighborSampler] = (
-                self.__neighbor_sampler_store[target_nodes_indexes]
-            )
+            _optional_neighbor_sampler: _typing.Optional[
+                NeighborSampler
+            ] = self.__neighbor_sampler_store[target_nodes_indexes]
             if (
-                    _optional_neighbor_sampler is not None and
-                    type(_optional_neighbor_sampler) == NeighborSampler
+                _optional_neighbor_sampler is not None
+                and type(_optional_neighbor_sampler) == NeighborSampler
             ):
                 current_neighbor_sampler: NeighborSampler = _optional_neighbor_sampler
             else:
                 current_neighbor_sampler: NeighborSampler = NeighborSampler(
-                    integral_data.edge_index, target_nodes_indexes,
-                    [-1], batch_size=self.__predicting_batch_size,
-                    num_workers=self.__predicting_sampler_num_workers, shuffle=False
+                    integral_data.edge_index,
+                    target_nodes_indexes,
+                    [-1],
+                    batch_size=self.__predicting_batch_size,
+                    num_workers=self.__predicting_sampler_num_workers,
+                    shuffle=False,
                 )
-                self.__neighbor_sampler_store[target_nodes_indexes] = current_neighbor_sampler
+                self.__neighbor_sampler_store[
+                    target_nodes_indexes
+                ] = current_neighbor_sampler
 
             prediction_batch_cumulative_builder = (
                 EvaluatorUtility.PredictionBatchCumulativeBuilder()
@@ -994,33 +1059,46 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
             for _target_dependant_sampled_data in current_neighbor_sampler:
                 _sampled_graph: autogl.data.Data = autogl.data.Data(
                     x[_target_dependant_sampled_data.all_sampled_nodes_indexes],
-                    _target_dependant_sampled_data.sampled_edges_for_layers[0].edge_index_for_sampled_graph
+                    _target_dependant_sampled_data.sampled_edges_for_layers[
+                        0
+                    ].edge_index_for_sampled_graph,
                 )
                 _sampled_graph.edge_weight: torch.Tensor = (
-                    _target_dependant_sampled_data.sampled_edges_for_layers[0].edge_weight
+                    _target_dependant_sampled_data.sampled_edges_for_layers[
+                        0
+                    ].edge_weight
                 )
                 _sampled_graph: autogl.data.Data = _sampled_graph.to(self.device)
                 with torch.no_grad():
                     prediction_batch_cumulative_builder.add_batch(
                         _target_dependant_sampled_data.target_nodes_indexes.indexes_in_integral_graph.cpu().numpy(),
                         sequential_gnn_model.cls_decode(
-                            sequential_gnn_model.sequential_encoding_layers[-1](_sampled_graph)
-                        )[_target_dependant_sampled_data.target_nodes_indexes.indexes_in_sampled_graph].cpu().numpy()
+                            sequential_gnn_model.sequential_encoding_layers[-1](
+                                _sampled_graph
+                            )
+                        )[
+                            _target_dependant_sampled_data.target_nodes_indexes.indexes_in_sampled_graph
+                        ]
+                        .cpu()
+                        .numpy(),
                     )
             return torch.from_numpy(prediction_batch_cumulative_builder.compose()[1])
         else:
             if mask_or_target_nodes_indexes.dtype == torch.bool:
-                target_nodes_indexes: _typing.Any = (
-                    torch.where(mask_or_target_nodes_indexes)[0]
-                )
+                target_nodes_indexes: _typing.Any = torch.where(
+                    mask_or_target_nodes_indexes
+                )[0]
             else:
                 target_nodes_indexes: _typing.Any = mask_or_target_nodes_indexes.long()
             neighbor_sampler: NeighborSampler = NeighborSampler(
-                torch_geometric.utils.add_remaining_self_loops(integral_data.edge_index)[0],
-                target_nodes_indexes, [-1 for _ in self.__sampled_node_sizes],
+                torch_geometric.utils.add_remaining_self_loops(
+                    integral_data.edge_index
+                )[0],
+                target_nodes_indexes,
+                [-1 for _ in self.__sampled_node_sizes],
                 batch_size=self.__predicting_batch_size,
                 num_workers=self.__predicting_sampler_num_workers,
-                shuffle=False
+                shuffle=False,
             )
             prediction_batch_cumulative_builder = (
                 EvaluatorUtility.PredictionBatchCumulativeBuilder()
@@ -1030,7 +1108,7 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
                 sampled_data: TargetDependantSampledData = sampled_data
                 sampled_graph: autogl.data.Data = autogl.data.Data(
                     integral_data.x[sampled_data.all_sampled_nodes_indexes],
-                    integral_data.y[sampled_data.all_sampled_nodes_indexes]
+                    integral_data.y[sampled_data.all_sampled_nodes_indexes],
                 )
                 sampled_graph.to(self.device)
                 sampled_graph.edge_indexes: _typing.Sequence[torch.LongTensor] = [
@@ -1046,13 +1124,14 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
                         sampled_data.target_nodes_indexes.indexes_in_integral_graph.cpu().numpy(),
                         self.model.model(sampled_graph)[
                             sampled_data.target_nodes_indexes.indexes_in_sampled_graph
-                        ].cpu().numpy()
+                        ]
+                        .cpu()
+                        .numpy(),
                     )
             return torch.from_numpy(prediction_batch_cumulative_builder.compose()[1])
 
     def predict_proba(
-            self, dataset, mask: _typing.Optional[str] = None,
-            in_log_format: bool = False
+        self, dataset, mask: _typing.Optional[str] = None, in_log_format: bool = False
     ):
         """
         The function of predicting the probability on the given dataset.
@@ -1107,12 +1186,12 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
         return self.predict_proba(dataset, mask, in_log_format=True).max(1)[1]
 
     def evaluate(
-            self,
-            dataset,
-            mask: _typing.Optional[str] = None,
-            feval: _typing.Union[
-                None, _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
-            ] = None,
+        self,
+        dataset,
+        mask: _typing.Optional[str] = None,
+        feval: _typing.Union[
+            None, _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = None,
     ) -> _typing.Sequence[float]:
         """
         The function of training on the given dataset and keeping valid result.
@@ -1156,7 +1235,8 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
             f.evaluate(
                 prediction_probability.cpu().numpy(),
                 y_ground_truth.cpu().numpy(),
-            ) for f in _feval
+            )
+            for f in _feval
         ]
 
     def train(self, dataset, keep_valid_result: bool = True):
@@ -1176,6 +1256,7 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
         None
         """
         import gc
+
         gc.collect()
         data = dataset[0]
         self.__train_only(data)
@@ -1193,10 +1274,10 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
         return self._valid_result_prob
 
     def get_valid_score(
-            self, return_major: bool = True
+        self, return_major: bool = True
     ) -> _typing.Union[
         _typing.Tuple[float, bool],
-        _typing.Tuple[_typing.Sequence[float], _typing.Sequence[bool]]
+        _typing.Tuple[_typing.Sequence[float], _typing.Sequence[bool]],
     ]:
         """
         The function of getting the valid score.
@@ -1222,7 +1303,7 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
 
     @hyper_parameter_space.setter
     def hyper_parameter_space(
-            self, hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
+        self, hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
     ) -> None:
         if not isinstance(hp_space, _typing.Sequence):
             raise TypeError
@@ -1230,6 +1311,7 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
 
     def __repr__(self) -> str:
         import yaml
+
         return yaml.dump(
             {
                 "trainer_name": self.__class__.__name__,
@@ -1238,14 +1320,14 @@ class NodeClassificationLayerDependentImportanceSamplingTrainer(BaseNodeClassifi
                 "max_epoch": self._max_epoch,
                 "early_stopping_round": self._early_stopping.patience,
                 "sampling_sizes": self.__sampled_node_sizes,
-                "model": repr(self.model)
+                "model": repr(self.model),
             }
         )
 
     def duplicate_from_hyper_parameter(
-            self,
-            hp: _typing.Dict[str, _typing.Any],
-            model: _typing.Optional[BaseModel] = None,
+        self,
+        hp: _typing.Dict[str, _typing.Any],
+        model: _typing.Optional[BaseModel] = None,
     ) -> "NodeClassificationLayerDependentImportanceSamplingTrainer":
         """
         The function of duplicating a new instance from the given hyper-parameter.
@@ -1317,24 +1399,25 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
     feval: ``str``.
         The evaluation method adopted in this function.
     """
+
     def __init__(
-            self,
-            model: _typing.Union[BaseModel, str],
-            num_features: int,
-            num_classes: int,
-            optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None] = ...,
-            lr: float = 1e-4,
-            max_epoch: int = 100,
-            early_stopping_round: int = 100,
-            weight_decay: float = 1e-4,
-            device: _typing.Optional[torch.device] = None,
-            init: bool = True,
-            feval: _typing.Union[
-                _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
-            ] = (Logloss,),
-            loss: str = "nll_loss",
-            lr_scheduler_type: _typing.Optional[str] = None,
-            **kwargs,
+        self,
+        model: _typing.Union[BaseModel, str],
+        num_features: int,
+        num_classes: int,
+        optimizer: _typing.Union[_typing.Type[torch.optim.Optimizer], str, None] = ...,
+        lr: float = 1e-4,
+        max_epoch: int = 100,
+        early_stopping_round: int = 100,
+        weight_decay: float = 1e-4,
+        device: _typing.Optional[torch.device] = None,
+        init: bool = True,
+        feval: _typing.Union[
+            _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = (Logloss,),
+        loss: str = "nll_loss",
+        lr_scheduler_type: _typing.Optional[str] = None,
+        **kwargs,
     ):
         if isinstance(optimizer, type) and issubclass(optimizer, torch.optim.Optimizer):
             self._optimizer_class: _typing.Type[torch.optim.Optimizer] = optimizer
@@ -1365,10 +1448,12 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         self._weight_decay: float = weight_decay if weight_decay > 0 else 1e-4
         self._early_stopping = EarlyStopping(
             patience=early_stopping_round if early_stopping_round > 0 else 1e2,
-            verbose=False
+            verbose=False,
         )
         """ Assign an empty initial hyper parameter space """
-        self._hyper_parameter_space: _typing.Sequence[_typing.Dict[str, _typing.Any]] = []
+        self._hyper_parameter_space: _typing.Sequence[
+            _typing.Dict[str, _typing.Any]
+        ] = []
 
         self._valid_result: torch.Tensor = torch.zeros(0)
         self._valid_result_prob: torch.Tensor = torch.zeros(0)
@@ -1432,7 +1517,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         optimizer: torch.optim.Optimizer = self._optimizer_class(
             self.model.model.parameters(),
             lr=self._learning_rate,
-            weight_decay=self._weight_decay
+            weight_decay=self._weight_decay,
         )
 
         if type(self._lr_scheduler_type) == str:
@@ -1464,9 +1549,11 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             )
 
         neighbor_sampler: NeighborSampler = NeighborSampler(
-            integral_data.edge_index, torch.where(integral_data.train_mask)[0].unique(),
-            self.__sampling_sizes, batch_size=self.__training_batch_size,
-            num_workers=self.__training_sampler_num_workers
+            integral_data.edge_index,
+            torch.where(integral_data.train_mask)[0].unique(),
+            self.__sampling_sizes,
+            batch_size=self.__training_batch_size,
+            num_workers=self.__training_sampler_num_workers,
         )
         for current_epoch in range(self._max_epoch):
             self.model.model.train()
@@ -1478,7 +1565,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
                 sampled_data: TargetDependantSampledData = sampled_data
                 sampled_graph: autogl.data.Data = autogl.data.Data(
                     x=integral_data.x[sampled_data.all_sampled_nodes_indexes],
-                    y=integral_data.y[sampled_data.all_sampled_nodes_indexes]
+                    y=integral_data.y[sampled_data.all_sampled_nodes_indexes],
                 )
                 sampled_graph.to(self.device)
                 sampled_graph.edge_indexes: _typing.Sequence[torch.LongTensor] = [
@@ -1486,17 +1573,21 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
                     for current_layer in sampled_data.sampled_edges_for_layers
                 ]
                 if isinstance(self.model.model, ClassificationSupportedSequentialModel):
-                    prediction: torch.Tensor = self.model.model.cls_forward(sampled_graph)
+                    prediction: torch.Tensor = self.model.model.cls_forward(
+                        sampled_graph
+                    )
                 else:
                     prediction: torch.Tensor = self.model.model(sampled_graph)
                 if not hasattr(torch.nn.functional, self.loss):
-                    raise TypeError(
-                        f"PyTorch does not support loss type {self.loss}"
-                    )
+                    raise TypeError(f"PyTorch does not support loss type {self.loss}")
                 loss_function = getattr(torch.nn.functional, self.loss)
                 loss_value: torch.Tensor = loss_function(
-                    prediction[sampled_data.target_nodes_indexes.indexes_in_sampled_graph],
-                    sampled_graph.y[sampled_data.target_nodes_indexes.indexes_in_sampled_graph]
+                    prediction[
+                        sampled_data.target_nodes_indexes.indexes_in_sampled_graph
+                    ],
+                    sampled_graph.y[
+                        sampled_data.target_nodes_indexes.indexes_in_sampled_graph
+                    ],
                 )
                 loss_value.backward()
                 optimizer.step()
@@ -1505,9 +1596,9 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
                 lr_scheduler.step()
 
             if (
-                    hasattr(integral_data, "val_mask") and
-                    getattr(integral_data, "val_mask") is not None and
-                    type(getattr(integral_data, "val_mask")) == torch.Tensor
+                hasattr(integral_data, "val_mask")
+                and getattr(integral_data, "val_mask") is not None
+                and type(getattr(integral_data, "val_mask")) == torch.Tensor
             ):
                 validation_results: _typing.Sequence[float] = self.evaluate(
                     (integral_data,), "val", [self.feval[0]]
@@ -1521,17 +1612,16 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
                     LOGGER.debug("Early stopping at %d", current_epoch)
                     break
         if (
-                hasattr(integral_data, "val_mask") and
-                getattr(integral_data, "val_mask") is not None and
-                type(getattr(integral_data, "val_mask")) == torch.Tensor
+            hasattr(integral_data, "val_mask")
+            and getattr(integral_data, "val_mask") is not None
+            and type(getattr(integral_data, "val_mask")) == torch.Tensor
         ):
             self._early_stopping.load_checkpoint(self.model.model)
 
     def __predict_only(
-            self, integral_data,
-            mask_or_target_nodes_indexes: _typing.Union[
-                torch.BoolTensor, torch.LongTensor
-            ]
+        self,
+        integral_data,
+        mask_or_target_nodes_indexes: _typing.Union[torch.BoolTensor, torch.LongTensor],
     ) -> torch.Tensor:
         """
         The function of predicting on the given data.
@@ -1541,28 +1631,37 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         """
         self.model.model.eval()
         integral_data = integral_data.to(torch.device("cpu"))
-        mask_or_target_nodes_indexes = mask_or_target_nodes_indexes.to(torch.device("cpu"))
+        mask_or_target_nodes_indexes = mask_or_target_nodes_indexes.to(
+            torch.device("cpu")
+        )
         if isinstance(self.model.model, ClassificationSupportedSequentialModel):
-            sequential_gnn_model: ClassificationSupportedSequentialModel = self.model.model
+            sequential_gnn_model: ClassificationSupportedSequentialModel = (
+                self.model.model
+            )
             __num_layers: int = len(self.__sampling_sizes)
 
             x: torch.Tensor = getattr(integral_data, "x")
             for _current_layer_index in range(__num_layers - 1):
                 __next_x: _typing.Optional[torch.Tensor] = None
 
-                _optional_neighbor_sampler: _typing.Optional[NeighborSampler] = (
-                    self.__neighbor_sampler_store[torch.arange(x.size(0)).unique()]
-                )
+                _optional_neighbor_sampler: _typing.Optional[
+                    NeighborSampler
+                ] = self.__neighbor_sampler_store[torch.arange(x.size(0)).unique()]
                 if (
-                        _optional_neighbor_sampler is not None and
-                        type(_optional_neighbor_sampler) == NeighborSampler
+                    _optional_neighbor_sampler is not None
+                    and type(_optional_neighbor_sampler) == NeighborSampler
                 ):
-                    current_neighbor_sampler: NeighborSampler = _optional_neighbor_sampler
+                    current_neighbor_sampler: NeighborSampler = (
+                        _optional_neighbor_sampler
+                    )
                 else:
                     current_neighbor_sampler: NeighborSampler = NeighborSampler(
-                        integral_data.edge_index, torch.arange(x.size(0)).unique(),
-                        [-1], batch_size=self.__predicting_batch_size,
-                        num_workers=self.__predicting_sampler_num_workers, shuffle=False
+                        integral_data.edge_index,
+                        torch.arange(x.size(0)).unique(),
+                        [-1],
+                        batch_size=self.__predicting_batch_size,
+                        num_workers=self.__predicting_sampler_num_workers,
+                        shuffle=False,
                     )
                     __temp: _typing.Any = torch.arange(x.size(0))
                     self.__neighbor_sampler_store[__temp] = current_neighbor_sampler
@@ -1574,47 +1673,58 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
                     _sampled_graph: autogl.data.Data = autogl.data.Data(
                         x=x[_target_dependant_sampled_data.all_sampled_nodes_indexes],
                         edge_index=(
-                            _target_dependant_sampled_data.sampled_edges_for_layers[0].edge_index_for_sampled_graph
-                        )
+                            _target_dependant_sampled_data.sampled_edges_for_layers[
+                                0
+                            ].edge_index_for_sampled_graph
+                        ),
                     )
                     _sampled_graph: autogl.data.Data = _sampled_graph.to(self.device)
 
                     with torch.no_grad():
                         __sampled_graph_inferences: torch.Tensor = (
-                            sequential_gnn_model.sequential_encoding_layers[_current_layer_index](_sampled_graph)
+                            sequential_gnn_model.sequential_encoding_layers[
+                                _current_layer_index
+                            ](_sampled_graph)
                         )
                         _sampled_target_nodes_inferences: torch.Tensor = __sampled_graph_inferences[
                             _target_dependant_sampled_data.target_nodes_indexes.indexes_in_sampled_graph
                         ].cpu()
                     if __next_x is None:
-                        __next_x: torch.Tensor = torch.zeros(x.size(0), __sampled_graph_inferences.size(1))
-                    __next_x[_target_dependant_sampled_data.target_nodes_indexes.indexes_in_integral_graph] = (
-                        _sampled_target_nodes_inferences
-                    )
+                        __next_x: torch.Tensor = torch.zeros(
+                            x.size(0), __sampled_graph_inferences.size(1)
+                        )
+                    __next_x[
+                        _target_dependant_sampled_data.target_nodes_indexes.indexes_in_integral_graph
+                    ] = _sampled_target_nodes_inferences
                 x: torch.Tensor = __next_x
             # The following procedures are for the top layer
             if mask_or_target_nodes_indexes.dtype == torch.bool:
-                target_nodes_indexes: _typing.Any = (
-                    torch.where(mask_or_target_nodes_indexes)[0]
-                )
+                target_nodes_indexes: _typing.Any = torch.where(
+                    mask_or_target_nodes_indexes
+                )[0]
             else:
                 target_nodes_indexes: _typing.Any = mask_or_target_nodes_indexes.long()
 
-            _optional_neighbor_sampler: _typing.Optional[NeighborSampler] = (
-                self.__neighbor_sampler_store[target_nodes_indexes]
-            )
+            _optional_neighbor_sampler: _typing.Optional[
+                NeighborSampler
+            ] = self.__neighbor_sampler_store[target_nodes_indexes]
             if (
-                    _optional_neighbor_sampler is not None and
-                    type(_optional_neighbor_sampler) == NeighborSampler
+                _optional_neighbor_sampler is not None
+                and type(_optional_neighbor_sampler) == NeighborSampler
             ):
                 current_neighbor_sampler: NeighborSampler = _optional_neighbor_sampler
             else:
                 current_neighbor_sampler: NeighborSampler = NeighborSampler(
-                    integral_data.edge_index, target_nodes_indexes,
-                    [-1], batch_size=self.__predicting_batch_size,
-                    num_workers=self.__predicting_sampler_num_workers, shuffle=False
+                    integral_data.edge_index,
+                    target_nodes_indexes,
+                    [-1],
+                    batch_size=self.__predicting_batch_size,
+                    num_workers=self.__predicting_sampler_num_workers,
+                    shuffle=False,
                 )
-                self.__neighbor_sampler_store[target_nodes_indexes] = current_neighbor_sampler
+                self.__neighbor_sampler_store[
+                    target_nodes_indexes
+                ] = current_neighbor_sampler
 
             prediction_batch_cumulative_builder = (
                 EvaluatorUtility.PredictionBatchCumulativeBuilder()
@@ -1622,29 +1732,40 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
             for _target_dependant_sampled_data in current_neighbor_sampler:
                 _sampled_graph: autogl.data.Data = autogl.data.Data(
                     x[_target_dependant_sampled_data.all_sampled_nodes_indexes],
-                    _target_dependant_sampled_data.sampled_edges_for_layers[0].edge_index_for_sampled_graph
+                    _target_dependant_sampled_data.sampled_edges_for_layers[
+                        0
+                    ].edge_index_for_sampled_graph,
                 )
                 _sampled_graph: autogl.data.Data = _sampled_graph.to(self.device)
                 with torch.no_grad():
                     prediction_batch_cumulative_builder.add_batch(
                         _target_dependant_sampled_data.target_nodes_indexes.indexes_in_integral_graph.cpu().numpy(),
                         sequential_gnn_model.cls_decode(
-                            sequential_gnn_model.sequential_encoding_layers[-1](_sampled_graph)
-                        )[_target_dependant_sampled_data.target_nodes_indexes.indexes_in_sampled_graph].cpu().numpy()
+                            sequential_gnn_model.sequential_encoding_layers[-1](
+                                _sampled_graph
+                            )
+                        )[
+                            _target_dependant_sampled_data.target_nodes_indexes.indexes_in_sampled_graph
+                        ]
+                        .cpu()
+                        .numpy(),
                     )
             return torch.from_numpy(prediction_batch_cumulative_builder.compose()[1])
         else:
             if mask_or_target_nodes_indexes.dtype == torch.bool:
-                target_nodes_indexes: _typing.Any = (
-                    torch.where(mask_or_target_nodes_indexes)[0]
-                )
+                target_nodes_indexes: _typing.Any = torch.where(
+                    mask_or_target_nodes_indexes
+                )[0]
             else:
                 target_nodes_indexes: _typing.Any = mask_or_target_nodes_indexes.long()
 
             neighbor_sampler: NeighborSampler = NeighborSampler(
-                integral_data.edge_index, target_nodes_indexes, [-1 for _ in self.__sampling_sizes],
+                integral_data.edge_index,
+                target_nodes_indexes,
+                [-1 for _ in self.__sampling_sizes],
                 batch_size=self.__predicting_batch_size,
-                num_workers=self.__predicting_sampler_num_workers, shuffle=False
+                num_workers=self.__predicting_sampler_num_workers,
+                shuffle=False,
             )
 
             prediction_batch_cumulative_builder = (
@@ -1667,13 +1788,14 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
                         _target_dependant_sampled_data.target_nodes_indexes.indexes_in_integral_graph.cpu().numpy(),
                         self.model.model(_sampled_graph)[
                             _target_dependant_sampled_data.target_nodes_indexes.indexes_in_sampled_graph
-                        ].cpu().numpy()
+                        ]
+                        .cpu()
+                        .numpy(),
                     )
             return torch.from_numpy(prediction_batch_cumulative_builder.compose()[1])
 
     def predict_proba(
-            self, dataset, mask: _typing.Optional[str] = None,
-            in_log_format: bool = False
+        self, dataset, mask: _typing.Optional[str] = None, in_log_format: bool = False
     ):
         """
         The function of predicting the probability on the given dataset.
@@ -1728,12 +1850,12 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         return self.predict_proba(dataset, mask, in_log_format=True).max(1)[1]
 
     def evaluate(
-            self,
-            dataset,
-            mask: _typing.Optional[str] = None,
-            feval: _typing.Union[
-                None, _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
-            ] = None,
+        self,
+        dataset,
+        mask: _typing.Optional[str] = None,
+        feval: _typing.Union[
+            None, _typing.Sequence[str], _typing.Sequence[_typing.Type[Evaluation]]
+        ] = None,
     ) -> _typing.Sequence[float]:
         """
         The function of training on the given dataset and keeping valid result.
@@ -1800,6 +1922,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         None
         """
         import gc
+
         gc.collect()
         data = dataset[0]
         self.__train_only(data)
@@ -1817,10 +1940,10 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
         return self._valid_result_prob
 
     def get_valid_score(
-            self, return_major: bool = True
+        self, return_major: bool = True
     ) -> _typing.Union[
         _typing.Tuple[float, bool],
-        _typing.Tuple[_typing.Sequence[float], _typing.Sequence[bool]]
+        _typing.Tuple[_typing.Sequence[float], _typing.Sequence[bool]],
     ]:
         """
         The function of getting the valid score.
@@ -1846,7 +1969,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
 
     @hyper_parameter_space.setter
     def hyper_parameter_space(
-            self, hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
+        self, hp_space: _typing.Sequence[_typing.Dict[str, _typing.Any]]
     ) -> None:
         if not isinstance(hp_space, _typing.Sequence):
             raise TypeError
@@ -1854,6 +1977,7 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
 
     def __repr__(self) -> str:
         import yaml
+
         return yaml.dump(
             {
                 "trainer_name": self.__class__.__name__,
@@ -1862,14 +1986,14 @@ class NodeClassificationNeighborSamplingTrainer(BaseNodeClassificationTrainer):
                 "max_epoch": self._max_epoch,
                 "early_stopping_round": self._early_stopping.patience,
                 "sampling_sizes": self.__sampling_sizes,
-                "model": repr(self.model)
+                "model": repr(self.model),
             }
         )
 
     def duplicate_from_hyper_parameter(
-            self,
-            hp: _typing.Dict[str, _typing.Any],
-            model: _typing.Optional[BaseModel] = None,
+        self,
+        hp: _typing.Dict[str, _typing.Any],
+        model: _typing.Optional[BaseModel] = None,
     ) -> "NodeClassificationNeighborSamplingTrainer":
         """
         The function of duplicating a new instance from the given hyper-parameter.
diff --git a/autogl/module/train/sampling/sampler/graphsaint_sampler.py b/autogl/module/train/sampling/sampler/graphsaint_sampler.py
index 8948990..00dbded 100644
--- a/autogl/module/train/sampling/sampler/graphsaint_sampler.py
+++ b/autogl/module/train/sampling/sampler/graphsaint_sampler.py
@@ -19,10 +19,15 @@ class GraphSAINTSamplerFactory:
     With the aim of abstracting a unified sampling module for representative mainstream varieties of
     Node-wise Sampling, Layer-wise Sampling, and Subgraph-wise Sampling.
     """
+
     @classmethod
     def create_node_sampler(
-            cls, data, num_graphs_per_epoch: int, node_budget: int,
-            sample_coverage_factor: int = 50, **kwargs
+        cls,
+        data,
+        num_graphs_per_epoch: int,
+        node_budget: int,
+        sample_coverage_factor: int = 50,
+        **kwargs
     ) -> torch_geometric.data.GraphSAINTNodeSampler:
         """
         A simple static method for instantiating :class:`torch_geometric.data.GraphSAINTNodeSampler`
@@ -48,14 +53,22 @@ class GraphSAINTSamplerFactory:
         Instance of :class:`torch_geometric.data.GraphSAINTNodeSampler`.
         """
         return torch_geometric.data.GraphSAINTNodeSampler(
-            data, node_budget,
-            num_graphs_per_epoch, sample_coverage_factor, log=False, **kwargs
+            data,
+            node_budget,
+            num_graphs_per_epoch,
+            sample_coverage_factor,
+            log=False,
+            **kwargs
         )
 
     @classmethod
     def create_edge_sampler(
-            cls, data, num_graphs_per_epoch: int, edge_budget: int,
-            sample_coverage_factor: int = 50, **kwargs
+        cls,
+        data,
+        num_graphs_per_epoch: int,
+        edge_budget: int,
+        sample_coverage_factor: int = 50,
+        **kwargs
     ) -> torch_geometric.data.GraphSAINTEdgeSampler:
         """
         A simple static method for instantiating :class:`torch_geometric.data.GraphSAINTEdgeSampler`
@@ -81,15 +94,23 @@ class GraphSAINTSamplerFactory:
         Instance of :class:`torch_geometric.data.GraphSAINTEdgeSampler`.
         """
         return torch_geometric.data.GraphSAINTEdgeSampler(
-            data, edge_budget,
-            num_graphs_per_epoch, sample_coverage_factor, log=False, **kwargs
+            data,
+            edge_budget,
+            num_graphs_per_epoch,
+            sample_coverage_factor,
+            log=False,
+            **kwargs
         )
 
     @classmethod
     def create_random_walk_sampler(
-            cls, data, num_graphs_per_epoch: int,
-            num_walks: int, walk_length: int,
-            sample_coverage_factor: int = 50, **kwargs
+        cls,
+        data,
+        num_graphs_per_epoch: int,
+        num_walks: int,
+        walk_length: int,
+        sample_coverage_factor: int = 50,
+        **kwargs
     ) -> torch_geometric.data.GraphSAINTRandomWalkSampler:
         """
         A simple static method for instantiating :class:`torch_geometric.data.GraphSAINTEdgeSampler`
@@ -117,6 +138,11 @@ class GraphSAINTSamplerFactory:
         Instance of :class:`torch_geometric.data.GraphSAINTEdgeSampler`.
         """
         return torch_geometric.data.GraphSAINTRandomWalkSampler(
-            data, num_walks, walk_length,
-            num_graphs_per_epoch, sample_coverage_factor, log=False, **kwargs
+            data,
+            num_walks,
+            walk_length,
+            num_graphs_per_epoch,
+            sample_coverage_factor,
+            log=False,
+            **kwargs
         )
diff --git a/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py b/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py
index ffb7df5..bc66bdf 100644
--- a/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py
+++ b/autogl/module/train/sampling/sampler/layer_dependent_importance_sampler.py
@@ -7,22 +7,29 @@ import torch_geometric
 from . import target_dependant_sampler
 
 
-class _LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTargetDependantSampler):
+class _LayerDependentImportanceSampler(
+    target_dependant_sampler.BasicLayerWiseTargetDependantSampler
+):
     """
     Obsolete implementation, unused
     """
+
     class _Utility:
         @classmethod
-        def compute_edge_weights(cls, __all_edge_index_with_self_loops: torch.Tensor) -> torch.Tensor:
-            __out_degree: torch.Tensor = \
-                torch_geometric.utils.degree(__all_edge_index_with_self_loops[0])
-            __in_degree: torch.Tensor = \
-                torch_geometric.utils.degree(__all_edge_index_with_self_loops[1])
+        def compute_edge_weights(
+            cls, __all_edge_index_with_self_loops: torch.Tensor
+        ) -> torch.Tensor:
+            __out_degree: torch.Tensor = torch_geometric.utils.degree(
+                __all_edge_index_with_self_loops[0]
+            )
+            __in_degree: torch.Tensor = torch_geometric.utils.degree(
+                __all_edge_index_with_self_loops[1]
+            )
 
             temp_tensor: torch.Tensor = torch.stack(
                 [
                     __out_degree[__all_edge_index_with_self_loops[0]],
-                    __in_degree[__all_edge_index_with_self_loops[1]]
+                    __in_degree[__all_edge_index_with_self_loops[1]],
                 ]
             )
             temp_tensor: torch.Tensor = torch.pow(temp_tensor, -0.5)
@@ -31,9 +38,10 @@ class _LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTa
 
         @classmethod
         def get_candidate_source_nodes_probabilities(
-                cls, all_candidate_edge_indexes: torch.LongTensor,
-                all_edge_index_with_self_loops: torch.Tensor,
-                all_edge_weights: torch.Tensor
+            cls,
+            all_candidate_edge_indexes: torch.LongTensor,
+            all_edge_index_with_self_loops: torch.Tensor,
+            all_edge_weights: torch.Tensor,
         ) -> _typing.Tuple[torch.LongTensor, torch.Tensor]:
             """
             :param all_candidate_edge_indexes:
@@ -41,8 +49,12 @@ class _LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTa
             :param all_edge_weights:
             :return: (all_source_nodes_indexes, all_source_nodes_probabilities)
             """
-            all_candidate_edge_indexes: torch.LongTensor = all_candidate_edge_indexes.unique()
-            _all_candidate_edges_weights: torch.Tensor = all_edge_weights[all_candidate_edge_indexes]
+            all_candidate_edge_indexes: torch.LongTensor = (
+                all_candidate_edge_indexes.unique()
+            )
+            _all_candidate_edges_weights: torch.Tensor = all_edge_weights[
+                all_candidate_edge_indexes
+            ]
             all_candidate_source_nodes_indexes: torch.LongTensor = (
                 all_edge_index_with_self_loops[0, all_candidate_edge_indexes].unique()
             )
@@ -50,23 +62,31 @@ class _LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTa
                 [
                     torch.sum(
                         _all_candidate_edges_weights[
-                            all_edge_index_with_self_loops[0, all_candidate_edge_indexes] == _current_source_node_index
+                            all_edge_index_with_self_loops[
+                                0, all_candidate_edge_indexes
                             ]
-                    ).item() / torch.sum(_all_candidate_edges_weights).item()
+                            == _current_source_node_index
+                        ]
+                    ).item()
+                    / torch.sum(_all_candidate_edges_weights).item()
                     for _current_source_node_index in all_candidate_source_nodes_indexes.tolist()
                 ]
             )
             assert (
-                    all_candidate_source_nodes_indexes.size() ==
-                    all_candidate_source_nodes_probabilities.size()
+                all_candidate_source_nodes_indexes.size()
+                == all_candidate_source_nodes_probabilities.size()
+            )
+            return (
+                all_candidate_source_nodes_indexes,
+                all_candidate_source_nodes_probabilities,
             )
-            return all_candidate_source_nodes_indexes, all_candidate_source_nodes_probabilities
 
         @classmethod
         def filter_selected_edges_by_source_nodes_and_target_nodes(
-                cls, all_edges_with_self_loops: torch.Tensor,
-                selected_source_node_indexes: torch.LongTensor,
-                selected_target_node_indexes: torch.LongTensor
+            cls,
+            all_edges_with_self_loops: torch.Tensor,
+            selected_source_node_indexes: torch.LongTensor,
+            selected_target_node_indexes: torch.LongTensor,
         ) -> torch.Tensor:
             """
             :param all_edges_with_self_loops: all edges with self loops
@@ -78,41 +98,65 @@ class _LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTa
                 all_edges_with_self_loops.size(1), dtype=torch.bool
             )
             selected_edges_mask_for_source_nodes[
-                torch.cat([
-                    torch.where(all_edges_with_self_loops[0] == __current_selected_source_node_index)[0]
-                    for __current_selected_source_node_index in selected_source_node_indexes.unique().tolist()
-                ]).unique()
+                torch.cat(
+                    [
+                        torch.where(
+                            all_edges_with_self_loops[0]
+                            == __current_selected_source_node_index
+                        )[0]
+                        for __current_selected_source_node_index in selected_source_node_indexes.unique().tolist()
+                    ]
+                ).unique()
             ] = True
             selected_edges_mask_for_target_nodes: torch.Tensor = torch.zeros(
                 all_edges_with_self_loops.size(1), dtype=torch.bool
             )
             selected_edges_mask_for_target_nodes[
-                torch.cat([
-                    torch.where(all_edges_with_self_loops[1] == __current_selected_target_node_index)[0]
-                    for __current_selected_target_node_index in selected_target_node_indexes.unique().tolist()
-                ])
+                torch.cat(
+                    [
+                        torch.where(
+                            all_edges_with_self_loops[1]
+                            == __current_selected_target_node_index
+                        )[0]
+                        for __current_selected_target_node_index in selected_target_node_indexes.unique().tolist()
+                    ]
+                )
             ] = True
             return torch.where(
-                selected_edges_mask_for_source_nodes & selected_edges_mask_for_target_nodes
+                selected_edges_mask_for_source_nodes
+                & selected_edges_mask_for_target_nodes
             )[0]
 
     def __init__(
-            self, edge_index: torch.LongTensor,
-            target_nodes_indexes: torch.LongTensor,
-            layer_wise_arguments: _typing.Sequence,
-            batch_size: _typing.Optional[int] = 1, num_workers: int = 0,
-            shuffle: bool = True, **kwargs
+        self,
+        edge_index: torch.LongTensor,
+        target_nodes_indexes: torch.LongTensor,
+        layer_wise_arguments: _typing.Sequence,
+        batch_size: _typing.Optional[int] = 1,
+        num_workers: int = 0,
+        shuffle: bool = True,
+        **kwargs
     ):
         super().__init__(
             torch_geometric.utils.add_remaining_self_loops(edge_index)[0],
-            target_nodes_indexes, layer_wise_arguments, batch_size, num_workers, shuffle, **kwargs
+            target_nodes_indexes,
+            layer_wise_arguments,
+            batch_size,
+            num_workers,
+            shuffle,
+            **kwargs
+        )
+        self.__all_edge_weights: torch.Tensor = self._Utility.compute_edge_weights(
+            self._edge_index
         )
-        self.__all_edge_weights: torch.Tensor = self._Utility.compute_edge_weights(self._edge_index)
 
     def _sample_edges_for_layer(
-            self, __current_layer_target_nodes_indexes: torch.LongTensor,
-            __top_layer_target_nodes_indexes: torch.LongTensor,
-            layer_argument: _typing.Any, *args, **kwargs
+        self,
+        __current_layer_target_nodes_indexes: torch.LongTensor,
+        __top_layer_target_nodes_indexes: torch.LongTensor,
+        layer_argument: _typing.Any,
+        *args,
+        **kwargs
     ) -> _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]:
         """
         Sample edges for one layer
@@ -136,71 +180,90 @@ class _LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTa
                 for current_target_node_index in __current_layer_target_nodes_indexes.unique().tolist()
             ]
         ).unique()
-        __all_candidate_source_nodes_indexes, all_candidate_source_nodes_probabilities = \
-            self._Utility.get_candidate_source_nodes_probabilities(
-                all_candidate_edge_indexes, self._edge_index,
-                self.__all_edge_weights * self.__all_edge_weights
-            )
-        assert __all_candidate_source_nodes_indexes.size() == all_candidate_source_nodes_probabilities.size()
+        (
+            __all_candidate_source_nodes_indexes,
+            all_candidate_source_nodes_probabilities,
+        ) = self._Utility.get_candidate_source_nodes_probabilities(
+            all_candidate_edge_indexes,
+            self._edge_index,
+            self.__all_edge_weights * self.__all_edge_weights,
+        )
+        assert (
+            __all_candidate_source_nodes_indexes.size()
+            == all_candidate_source_nodes_probabilities.size()
+        )
 
         """ Sampling """
         if sampled_node_size_budget < __all_candidate_source_nodes_indexes.numel():
-            selected_source_node_indexes: torch.LongTensor = __all_candidate_source_nodes_indexes[
-                torch.from_numpy(
-                    np.unique(np.random.choice(
-                        np.arange(__all_candidate_source_nodes_indexes.numel()), sampled_node_size_budget,
-                        p=all_candidate_source_nodes_probabilities.numpy(), replace=False
-                    ))
-                ).unique()
-            ].unique()
+            selected_source_node_indexes: torch.LongTensor = (
+                __all_candidate_source_nodes_indexes[
+                    torch.from_numpy(
+                        np.unique(
+                            np.random.choice(
+                                np.arange(__all_candidate_source_nodes_indexes.numel()),
+                                sampled_node_size_budget,
+                                p=all_candidate_source_nodes_probabilities.numpy(),
+                                replace=False,
+                            )
+                        )
+                    ).unique()
+                ].unique()
+            )
         else:
-            selected_source_node_indexes: torch.LongTensor = __all_candidate_source_nodes_indexes
+            selected_source_node_indexes: torch.LongTensor = (
+                __all_candidate_source_nodes_indexes
+            )
         selected_source_node_indexes: torch.LongTensor = torch.cat(
             [selected_source_node_indexes, __top_layer_target_nodes_indexes]
         ).unique()
 
         __selected_edges_indexes: torch.LongTensor = (
             self._Utility.filter_selected_edges_by_source_nodes_and_target_nodes(
-                self._edge_index, selected_source_node_indexes, __current_layer_target_nodes_indexes
+                self._edge_index,
+                selected_source_node_indexes,
+                __current_layer_target_nodes_indexes,
             ).unique()
         )
 
-        non_normalized_selected_edges_weight: torch.Tensor = (
-                self.__all_edge_weights[__selected_edges_indexes] /
-                torch.tensor(
-                    [
-                        all_candidate_source_nodes_probabilities[
-                            __all_candidate_source_nodes_indexes == current_source_node_index
-                            ].item()
-                        for current_source_node_index
-                        in self._edge_index[0, __selected_edges_indexes].tolist()
-                    ]
-                )
+        non_normalized_selected_edges_weight: torch.Tensor = self.__all_edge_weights[
+            __selected_edges_indexes
+        ] / torch.tensor(
+            [
+                all_candidate_source_nodes_probabilities[
+                    __all_candidate_source_nodes_indexes == current_source_node_index
+                ].item()
+                for current_source_node_index in self._edge_index[
+                    0, __selected_edges_indexes
+                ].tolist()
+            ]
         )
 
         def __normalize_edges_weight_by_target_nodes(
-                __edge_index: torch.Tensor, __edge_weight: torch.Tensor
+            __edge_index: torch.Tensor, __edge_weight: torch.Tensor
         ) -> torch.Tensor:
             if __edge_index.size(1) != __edge_weight.numel():
                 raise ValueError
             for current_target_node_index in __edge_index[1].unique().tolist():
                 __current_mask_for_edges: torch.BoolTensor = (
-                        __edge_index[1] == current_target_node_index
-                )
-                __edge_weight[__current_mask_for_edges] = (
-                        __edge_weight[__current_mask_for_edges] /
-                        torch.sum(__edge_weight[__current_mask_for_edges])
+                    __edge_index[1] == current_target_node_index
                 )
+                __edge_weight[__current_mask_for_edges] = __edge_weight[
+                    __current_mask_for_edges
+                ] / torch.sum(__edge_weight[__current_mask_for_edges])
             return __edge_weight
 
-        normalized_selected_edges_weight: torch.Tensor = __normalize_edges_weight_by_target_nodes(
-            self._edge_index[:, __selected_edges_indexes],
-            non_normalized_selected_edges_weight
+        normalized_selected_edges_weight: torch.Tensor = (
+            __normalize_edges_weight_by_target_nodes(
+                self._edge_index[:, __selected_edges_indexes],
+                non_normalized_selected_edges_weight,
+            )
         )
         return __selected_edges_indexes, normalized_selected_edges_weight
 
 
-class LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTargetDependantSampler):
+class LayerDependentImportanceSampler(
+    target_dependant_sampler.BasicLayerWiseTargetDependantSampler
+):
     """
     The layer-dependent importance sampler from the
     `"Layer-Dependent Importance Sampling for Training Deep and Large Graph Convolutional Networks"
@@ -236,38 +299,54 @@ class LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTar
     @classmethod
     def __compute_edge_weight(cls, edge_index: torch.Tensor) -> torch.Tensor:
         __num_nodes: int = max(int(edge_index[0].max()), int(edge_index[1].max())) + 1
-        _temp_tensor: torch.Tensor = torch.stack([
-            torch_geometric.utils.degree(edge_index[0], __num_nodes)[edge_index[0]],
-            torch_geometric.utils.degree(edge_index[1], __num_nodes)[edge_index[1]]
-        ])
+        _temp_tensor: torch.Tensor = torch.stack(
+            [
+                torch_geometric.utils.degree(edge_index[0], __num_nodes)[edge_index[0]],
+                torch_geometric.utils.degree(edge_index[1], __num_nodes)[edge_index[1]],
+            ]
+        )
         _temp_tensor: torch.Tensor = torch.pow(_temp_tensor, -0.5)
         _temp_tensor[torch.isinf(_temp_tensor)] = 0
         return _temp_tensor[0] * _temp_tensor[1]
 
     def __init__(
-            self, edge_index: torch.LongTensor,
-            target_nodes_indexes: torch.LongTensor,
-            layer_wise_arguments: _typing.Sequence,
-            batch_size: _typing.Optional[int] = 1, num_workers: int = 0,
-            shuffle: bool = True, **kwargs
+        self,
+        edge_index: torch.LongTensor,
+        target_nodes_indexes: torch.LongTensor,
+        layer_wise_arguments: _typing.Sequence,
+        batch_size: _typing.Optional[int] = 1,
+        num_workers: int = 0,
+        shuffle: bool = True,
+        **kwargs
     ):
         super(LayerDependentImportanceSampler, self).__init__(
             torch_geometric.utils.add_remaining_self_loops(edge_index)[0],
-            target_nodes_indexes, layer_wise_arguments, batch_size, num_workers, shuffle, **kwargs
+            target_nodes_indexes,
+            layer_wise_arguments,
+            batch_size,
+            num_workers,
+            shuffle,
+            **kwargs
         )
         self.__edge_weight: torch.Tensor = self.__compute_edge_weight(self._edge_index)
-        self.__integral_normalized_l_matrix: sp.csr_matrix = sp.csr_matrix((
-            self.__edge_weight.numpy(),
-            (self._edge_index[1].numpy(), self._edge_index[0].numpy())
-        ))
-        self.__integral_edges_indexes_sparse_matrix: sp.csr_matrix = sp.csr_matrix((
-            np.arange(self._edge_index.size(1)),
-            (self._edge_index[1].numpy(), self._edge_index[0].numpy())
-        ))
+        self.__integral_normalized_l_matrix: sp.csr_matrix = sp.csr_matrix(
+            (
+                self.__edge_weight.numpy(),
+                (self._edge_index[1].numpy(), self._edge_index[0].numpy()),
+            )
+        )
+        self.__integral_edges_indexes_sparse_matrix: sp.csr_matrix = sp.csr_matrix(
+            (
+                np.arange(self._edge_index.size(1)),
+                (self._edge_index[1].numpy(), self._edge_index[0].numpy()),
+            )
+        )
 
     def __sample_edges(
-            self, __current_layer_target_nodes_indexes: np.ndarray,
-            __top_layer_target_nodes_indexes: np.ndarray, sampled_source_nodes_budget: int
+        self,
+        __current_layer_target_nodes_indexes: np.ndarray,
+        __top_layer_target_nodes_indexes: np.ndarray,
+        sampled_source_nodes_budget: int,
     ) -> _typing.Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """
 
@@ -280,33 +359,48 @@ class LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTar
                     corresponding probabilities for sampled_source_nodes_indexes
         )
         """
-        partial_l_matrix: sp.csr_matrix = (
-            self.__integral_normalized_l_matrix[__current_layer_target_nodes_indexes, :]
-        )
-        p: np.ndarray = np.array(np.sum(partial_l_matrix.multiply(partial_l_matrix), axis=0))[0]
+        partial_l_matrix: sp.csr_matrix = self.__integral_normalized_l_matrix[
+            __current_layer_target_nodes_indexes, :
+        ]
+        p: np.ndarray = np.array(
+            np.sum(partial_l_matrix.multiply(partial_l_matrix), axis=0)
+        )[0]
         p: np.ndarray = p / np.sum(p)
-        _number_of_nodes_to_sample = np.min([np.sum(p > 0), sampled_source_nodes_budget])
-        _selected_source_nodes: np.ndarray = np.unique(np.concatenate([
-            np.random.choice(
-                p.size, _number_of_nodes_to_sample, replace=False, p=p
-            ),
-            __top_layer_target_nodes_indexes
-        ]))
+        _number_of_nodes_to_sample = np.min(
+            [np.sum(p > 0), sampled_source_nodes_budget]
+        )
+        _selected_source_nodes: np.ndarray = np.unique(
+            np.concatenate(
+                [
+                    np.random.choice(
+                        p.size, _number_of_nodes_to_sample, replace=False, p=p
+                    ),
+                    __top_layer_target_nodes_indexes,
+                ]
+            )
+        )
 
         _sampled_edges_indexes_sparse_matrix: sp.csr_matrix = (
-            self.__integral_edges_indexes_sparse_matrix[__current_layer_target_nodes_indexes, :]
+            self.__integral_edges_indexes_sparse_matrix[
+                __current_layer_target_nodes_indexes, :
+            ]
         )
         _sampled_edges_indexes_sparse_matrix: sp.csc_matrix = (
             _sampled_edges_indexes_sparse_matrix.tocsc()[:, _selected_source_nodes]
         )
-        _sampled_edges_indexes: np.ndarray = np.unique(_sampled_edges_indexes_sparse_matrix.data)
+        _sampled_edges_indexes: np.ndarray = np.unique(
+            _sampled_edges_indexes_sparse_matrix.data
+        )
 
         return _sampled_edges_indexes, _selected_source_nodes, p[_selected_source_nodes]
 
     def _sample_edges_for_layer(
-            self, __current_layer_target_nodes_indexes: torch.LongTensor,
-            __top_layer_target_nodes_indexes: torch.LongTensor,
-            layer_argument: _typing.Any, *args, **kwargs
+        self,
+        __current_layer_target_nodes_indexes: torch.LongTensor,
+        __top_layer_target_nodes_indexes: torch.LongTensor,
+        layer_argument: _typing.Any,
+        *args,
+        **kwargs
     ) -> _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]:
         """
         Sample edges for one specific layer, expected to be implemented in subclass.
@@ -331,34 +425,55 @@ class LayerDependentImportanceSampler(target_dependant_sampler.BasicLayerWiseTar
         edge_weight:
             the optional `edge_weight` for aggregation
         """
-        __wrapped_result: _typing.Tuple[np.ndarray, np.ndarray, np.ndarray] = self.__sample_edges(
+        __wrapped_result: _typing.Tuple[
+            np.ndarray, np.ndarray, np.ndarray
+        ] = self.__sample_edges(
             __current_layer_target_nodes_indexes.numpy(),
             __top_layer_target_nodes_indexes.numpy(),
-            layer_argument
+            layer_argument,
         )
         _sampled_edges_indexes: torch.Tensor = torch.from_numpy(__wrapped_result[0])
         _selected_source_nodes: torch.Tensor = torch.from_numpy(__wrapped_result[1])
-        _selected_source_nodes_probabilities: torch.Tensor = torch.from_numpy(__wrapped_result[2])
+        _selected_source_nodes_probabilities: torch.Tensor = torch.from_numpy(
+            __wrapped_result[2]
+        )
 
         """ Multiply corresponding discount weights """
         __selected_source_node_probability_mapping: _typing.Dict[int, float] = dict(
-            zip(_selected_source_nodes.tolist(), _selected_source_nodes_probabilities.tolist())
+            zip(
+                _selected_source_nodes.tolist(),
+                _selected_source_nodes_probabilities.tolist(),
+            )
+        )
+        _selected_edges_weight: torch.Tensor = self.__edge_weight[
+            _sampled_edges_indexes
+        ]
+        _selected_edges_weight: torch.Tensor = _selected_edges_weight / torch.tensor(
+            [
+                __selected_source_node_probability_mapping.get(
+                    _current_source_node_index
+                )
+                for _current_source_node_index in self._edge_index[
+                    0, _sampled_edges_indexes
+                ].tolist()
+            ]
         )
-        _selected_edges_weight: torch.Tensor = self.__edge_weight[_sampled_edges_indexes]
-        _selected_edges_weight: torch.Tensor = _selected_edges_weight / torch.tensor([
-            __selected_source_node_probability_mapping.get(_current_source_node_index)
-            for _current_source_node_index in self._edge_index[0, _sampled_edges_indexes].tolist()
-        ])
 
         """ Normalize edge weight for selected edges by target nodes """
-        for _current_target_node_index in self._edge_index[1, _sampled_edges_indexes].unique().tolist():
+        for _current_target_node_index in (
+            self._edge_index[1, _sampled_edges_indexes].unique().tolist()
+        ):
             _current_mask_for_selected_edges: torch.BoolTensor = (
-                    self._edge_index[1, _sampled_edges_indexes] == _current_target_node_index
+                self._edge_index[1, _sampled_edges_indexes]
+                == _current_target_node_index
             )
-            _selected_edges_weight[_current_mask_for_selected_edges] = (
-                    _selected_edges_weight[_current_mask_for_selected_edges] /
-                    torch.sum(_selected_edges_weight[_current_mask_for_selected_edges])
+            _selected_edges_weight[
+                _current_mask_for_selected_edges
+            ] = _selected_edges_weight[_current_mask_for_selected_edges] / torch.sum(
+                _selected_edges_weight[_current_mask_for_selected_edges]
             )
 
-        _sampled_edges_indexes: _typing.Union[torch.LongTensor, torch.Tensor] = _sampled_edges_indexes
+        _sampled_edges_indexes: _typing.Union[
+            torch.LongTensor, torch.Tensor
+        ] = _sampled_edges_indexes
         return _sampled_edges_indexes, _selected_edges_weight
diff --git a/autogl/module/train/sampling/sampler/neighbor_sampler.py b/autogl/module/train/sampling/sampler/neighbor_sampler.py
index 9d98b88..a9a7337 100644
--- a/autogl/module/train/sampling/sampler/neighbor_sampler.py
+++ b/autogl/module/train/sampling/sampler/neighbor_sampler.py
@@ -33,6 +33,7 @@ class NeighborSampler(TargetDependantSampler, _typing.Iterable):
     shuffle:
         whether to shuffle target nodes for mini-batches.
     """
+
     class _SequenceDataset(torch.utils.data.Dataset):
         def __init__(self, sequence):
             self.__sequence = sequence
@@ -60,11 +61,14 @@ class NeighborSampler(TargetDependantSampler, _typing.Iterable):
         return temp_tensor[0] * temp_tensor[1]
 
     def __init__(
-            self, edge_index: torch.LongTensor,
-            target_nodes_indexes: torch.LongTensor,
-            sampling_sizes: _typing.Sequence[int],
-            batch_size: int = 1, num_workers: int = 0,
-            shuffle: bool = True, **kwargs
+        self,
+        edge_index: torch.LongTensor,
+        target_nodes_indexes: torch.LongTensor,
+        sampling_sizes: _typing.Sequence[int],
+        batch_size: int = 1,
+        num_workers: int = 0,
+        shuffle: bool = True,
+        **kwargs
     ):
         def is_deterministic(__cached: bool = bool(kwargs.get("cached", True))) -> bool:
             if not __cached:
@@ -72,17 +76,25 @@ class NeighborSampler(TargetDependantSampler, _typing.Iterable):
             _deterministic: bool = True
             for _sampling_size in sampling_sizes:
                 if type(_sampling_size) != int:
-                    raise TypeError("The sampling_sizes argument must be a sequence of integer")
+                    raise TypeError(
+                        "The sampling_sizes argument must be a sequence of integer"
+                    )
                 if _sampling_size >= 0:
                     _deterministic = False
                     break
             return _deterministic
+
         self.__edge_weight: torch.Tensor = self.__compute_edge_weight(edge_index)
         self.__pyg_neighbor_sampler: torch_geometric.data.NeighborSampler = (
             torch_geometric.data.NeighborSampler(
-                edge_index, list(sampling_sizes[::-1]), target_nodes_indexes,
-                transform=self._transform, batch_size=batch_size,
-                num_workers=num_workers, shuffle=shuffle, **kwargs
+                edge_index,
+                list(sampling_sizes[::-1]),
+                target_nodes_indexes,
+                transform=self._transform,
+                batch_size=batch_size,
+                num_workers=num_workers,
+                shuffle=shuffle,
+                **kwargs
             )
         )
 
@@ -97,54 +109,78 @@ class NeighborSampler(TargetDependantSampler, _typing.Iterable):
             ] = None
 
     def _transform(
-        self, batch_size: int, n_id: torch.LongTensor,
+        self,
+        batch_size: int,
+        n_id: torch.LongTensor,
         adj_or_adj_list: _typing.Union[
             _typing.Sequence[
-                _typing.Tuple[torch.LongTensor, torch.LongTensor, _typing.Tuple[int, int]]
+                _typing.Tuple[
+                    torch.LongTensor, torch.LongTensor, _typing.Tuple[int, int]
+                ]
             ],
-            _typing.Tuple[torch.LongTensor, torch.LongTensor, _typing.Tuple[int, int]]
-        ]
+            _typing.Tuple[torch.LongTensor, torch.LongTensor, _typing.Tuple[int, int]],
+        ],
     ) -> TargetDependantSampledData:
         if (
-                isinstance(adj_or_adj_list[0], _typing.Tuple) and
-                isinstance(adj_or_adj_list, _typing.Sequence) and
-                not isinstance(adj_or_adj_list, _typing.Tuple)
+            isinstance(adj_or_adj_list[0], _typing.Tuple)
+            and isinstance(adj_or_adj_list, _typing.Sequence)
+            and not isinstance(adj_or_adj_list, _typing.Tuple)
         ):
             return TargetDependantSampledData(
                 [
-                    (current_layer[0], current_layer[1], self.__edge_weight[current_layer[1]])
+                    (
+                        current_layer[0],
+                        current_layer[1],
+                        self.__edge_weight[current_layer[1]],
+                    )
                     for current_layer in adj_or_adj_list
                 ],
-                (torch.arange(batch_size, dtype=torch.long).long(), n_id[:batch_size]), n_id
+                (torch.arange(batch_size, dtype=torch.long).long(), n_id[:batch_size]),
+                n_id,
             )
-        elif isinstance(adj_or_adj_list, _typing.Tuple) and type(adj_or_adj_list[0]) == torch.Tensor:
+        elif (
+            isinstance(adj_or_adj_list, _typing.Tuple)
+            and type(adj_or_adj_list[0]) == torch.Tensor
+        ):
             adj_or_adj_list: _typing.Tuple[
                 torch.LongTensor, torch.LongTensor, _typing.Tuple[int, int]
             ] = adj_or_adj_list
             return TargetDependantSampledData(
-                [(adj_or_adj_list[0], adj_or_adj_list[1], self.__edge_weight[adj_or_adj_list[1]])],
-                (torch.arange(batch_size, dtype=torch.long).long(), n_id[:batch_size]), n_id
+                [
+                    (
+                        adj_or_adj_list[0],
+                        adj_or_adj_list[1],
+                        self.__edge_weight[adj_or_adj_list[1]],
+                    )
+                ],
+                (torch.arange(batch_size, dtype=torch.long).long(), n_id[:batch_size]),
+                n_id,
             )
 
     def __iter__(self):
-        if (
-                self.__cached_sampled_data_list is not None and
-                isinstance(self.__cached_sampled_data_list, _typing.Sequence)
+        if self.__cached_sampled_data_list is not None and isinstance(
+            self.__cached_sampled_data_list, _typing.Sequence
         ):
-            return iter(torch.utils.data.DataLoader(
-                self._SequenceDataset(self.__cached_sampled_data_list),
-                collate_fn=lambda x: x[0]
-            ))
+            return iter(
+                torch.utils.data.DataLoader(
+                    self._SequenceDataset(self.__cached_sampled_data_list),
+                    collate_fn=lambda x: x[0],
+                )
+            )
         else:
             return iter(self.__pyg_neighbor_sampler)
 
     @classmethod
     def create_basic_sampler(
-            cls, edge_index: torch.LongTensor,
-            target_nodes_indexes: torch.LongTensor,
-            layer_wise_arguments: _typing.Sequence,
-            batch_size: int = 1, num_workers: int = 1,
-            shuffle: bool = True, *args, **kwargs
+        cls,
+        edge_index: torch.LongTensor,
+        target_nodes_indexes: torch.LongTensor,
+        layer_wise_arguments: _typing.Sequence,
+        batch_size: int = 1,
+        num_workers: int = 1,
+        shuffle: bool = True,
+        *args,
+        **kwargs
     ) -> TargetDependantSampler:
         """
         A static factory method to create instance of :class:`NeighborSampler`
@@ -172,6 +208,11 @@ class NeighborSampler(TargetDependantSampler, _typing.Iterable):
             whether to shuffle target nodes for mini-batches.
         """
         return cls(
-            edge_index, target_nodes_indexes, layer_wise_arguments,
-            batch_size, num_workers, shuffle, **kwargs
+            edge_index,
+            target_nodes_indexes,
+            layer_wise_arguments,
+            batch_size,
+            num_workers,
+            shuffle,
+            **kwargs
         )
diff --git a/autogl/module/train/sampling/sampler/target_dependant_sampler.py b/autogl/module/train/sampling/sampler/target_dependant_sampler.py
index f6b3547..679d544 100644
--- a/autogl/module/train/sampling/sampler/target_dependant_sampler.py
+++ b/autogl/module/train/sampling/sampler/target_dependant_sampler.py
@@ -38,18 +38,18 @@ class TargetDependantSampledData:
         The stored sequence of tuple
         `( edge_index_for_sampled_graph, edge_id_in_integral_graph, (optional)edge_weight )`.
     """
+
     class _LayerSampledEdgeData:
         def __init__(
-                self, edge_index_for_sampled_graph: torch.Tensor,
-                edge_id_in_integral_graph: torch.Tensor,
-                edge_weight: _typing.Optional[torch.Tensor]
+            self,
+            edge_index_for_sampled_graph: torch.Tensor,
+            edge_id_in_integral_graph: torch.Tensor,
+            edge_weight: _typing.Optional[torch.Tensor],
         ):
             self.__edge_index_for_sampled_graph: torch.Tensor = (
                 edge_index_for_sampled_graph
             )
-            self.__edge_id_in_integral_graph: torch.Tensor = (
-                edge_id_in_integral_graph
-            )
+            self.__edge_id_in_integral_graph: torch.Tensor = edge_id_in_integral_graph
             self.__edge_weight: _typing.Optional[torch.Tensor] = edge_weight
 
         @property
@@ -61,9 +61,7 @@ class TargetDependantSampledData:
 
         @property
         def edge_id_in_integral_graph(self) -> torch.LongTensor:
-            edge_id_in_integral_graph: _typing.Any = (
-                self.__edge_id_in_integral_graph
-            )
+            edge_id_in_integral_graph: _typing.Any = self.__edge_id_in_integral_graph
             return edge_id_in_integral_graph
 
         @property
@@ -82,9 +80,9 @@ class TargetDependantSampledData:
             return indexes_in_integral_graph
 
         def __init__(
-                self,
-                indexes_in_sampled_graph: torch.Tensor,
-                indexes_in_integral_graph: torch.Tensor,
+            self,
+            indexes_in_sampled_graph: torch.Tensor,
+            indexes_in_integral_graph: torch.Tensor,
         ):
             self.__indexes_in_sampled_graph: torch.Tensor = indexes_in_sampled_graph
             self.__indexes_in_integral_graph: torch.Tensor = indexes_in_integral_graph
@@ -105,12 +103,12 @@ class TargetDependantSampledData:
         return self.__sampled_edges_for_layers
 
     def __init__(
-            self,
-            sampled_edges_for_layers: _typing.Sequence[
-                _typing.Tuple[torch.Tensor, torch.Tensor, _typing.Optional[torch.Tensor]]
-            ],
-            target_nodes_indexes: _typing.Tuple[torch.Tensor, torch.Tensor],
-            all_sampled_nodes_indexes: torch.Tensor
+        self,
+        sampled_edges_for_layers: _typing.Sequence[
+            _typing.Tuple[torch.Tensor, torch.Tensor, _typing.Optional[torch.Tensor]]
+        ],
+        target_nodes_indexes: _typing.Tuple[torch.Tensor, torch.Tensor],
+        all_sampled_nodes_indexes: torch.Tensor,
     ):
         self.__sampled_edges_for_layers: _typing.Sequence[
             TargetDependantSampledData._LayerSampledEdgeData
@@ -128,13 +126,18 @@ class TargetDependantSampler(torch.utils.data.DataLoader, _typing.Iterable):
     """
     An abstract base class for various target-dependent sampler
     """
+
     @classmethod
     def create_basic_sampler(
-            cls, edge_index: torch.LongTensor,
-            target_nodes_indexes: torch.LongTensor,
-            layer_wise_arguments: _typing.Sequence,
-            batch_size: int = 1, num_workers: int = 0,
-            shuffle: bool = True, *args, **kwargs
+        cls,
+        edge_index: torch.LongTensor,
+        target_nodes_indexes: torch.LongTensor,
+        layer_wise_arguments: _typing.Sequence,
+        batch_size: int = 1,
+        num_workers: int = 0,
+        shuffle: bool = True,
+        *args,
+        **kwargs
     ) -> "TargetDependantSampler":
         """
         :param edge_index: edge index of integral graph
@@ -148,7 +151,7 @@ class TargetDependantSampler(torch.utils.data.DataLoader, _typing.Iterable):
         :return: instance of TargetDependantSampler
         """
         raise NotImplementedError
-    
+
     def __iter__(self):
         return super(TargetDependantSampler, self).__iter__()
 
@@ -175,12 +178,16 @@ class BasicLayerWiseTargetDependantSampler(TargetDependantSampler):
     kwargs:
         remaining keyword arguments
     """
+
     def __init__(
-            self, edge_index: torch.LongTensor,
-            target_nodes_indexes: torch.LongTensor,
-            layer_wise_arguments: _typing.Sequence,
-            batch_size: _typing.Optional[int] = 1, num_workers: int = 0,
-            shuffle: bool = True, **kwargs
+        self,
+        edge_index: torch.LongTensor,
+        target_nodes_indexes: torch.LongTensor,
+        layer_wise_arguments: _typing.Sequence,
+        batch_size: _typing.Optional[int] = 1,
+        num_workers: int = 0,
+        shuffle: bool = True,
+        **kwargs
     ):
         self._edge_index: torch.LongTensor = edge_index
         self.__layer_wise_arguments: _typing.Sequence = layer_wise_arguments
@@ -188,17 +195,24 @@ class BasicLayerWiseTargetDependantSampler(TargetDependantSampler):
             del kwargs["collate_fn"]
         super(BasicLayerWiseTargetDependantSampler, self).__init__(
             target_nodes_indexes.unique().numpy(),
-            batch_size, shuffle, num_workers=num_workers,
-            collate_fn=self._collate_fn, **kwargs
+            batch_size,
+            shuffle,
+            num_workers=num_workers,
+            collate_fn=self._collate_fn,
+            **kwargs
         )
 
     @classmethod
     def create_basic_sampler(
-            cls, edge_index: torch.LongTensor,
-            target_nodes_indexes: torch.LongTensor,
-            layer_wise_arguments: _typing.Sequence,
-            batch_size: int = 1, num_workers: int = 0,
-            shuffle: bool = True, *args, **kwargs
+        cls,
+        edge_index: torch.LongTensor,
+        target_nodes_indexes: torch.LongTensor,
+        layer_wise_arguments: _typing.Sequence,
+        batch_size: int = 1,
+        num_workers: int = 0,
+        shuffle: bool = True,
+        *args,
+        **kwargs
     ) -> TargetDependantSampler:
         """
         :param edge_index: edge index of integral graph
@@ -212,14 +226,22 @@ class BasicLayerWiseTargetDependantSampler(TargetDependantSampler):
         :return: instance of TargetDependantSampler
         """
         return BasicLayerWiseTargetDependantSampler(
-            edge_index, target_nodes_indexes, layer_wise_arguments,
-            batch_size, num_workers, shuffle, **kwargs
+            edge_index,
+            target_nodes_indexes,
+            layer_wise_arguments,
+            batch_size,
+            num_workers,
+            shuffle,
+            **kwargs
         )
 
     def _sample_edges_for_layer(
-            self, __current_layer_target_nodes_indexes: torch.LongTensor,
-            __top_layer_target_nodes_indexes: torch.LongTensor,
-            layer_argument: _typing.Any, *args, **kwargs
+        self,
+        __current_layer_target_nodes_indexes: torch.LongTensor,
+        __top_layer_target_nodes_indexes: torch.LongTensor,
+        layer_argument: _typing.Any,
+        *args,
+        **kwargs
     ) -> _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]:
         """
         Sample edges for one specific layer, expected to be implemented in subclass.
@@ -247,17 +269,21 @@ class BasicLayerWiseTargetDependantSampler(TargetDependantSampler):
         raise NotImplementedError
 
     def _collate_fn(
-            self, top_layer_target_nodes_indexes_list: _typing.List[int]
+        self, top_layer_target_nodes_indexes_list: _typing.List[int]
     ) -> TargetDependantSampledData:
-        return self.__sample_layers(torch.tensor(top_layer_target_nodes_indexes_list).unique())
+        return self.__sample_layers(
+            torch.tensor(top_layer_target_nodes_indexes_list).unique()
+        )
 
     def __sample_layers(
-            self, __top_layer_target_nodes_indexes: torch.LongTensor
+        self, __top_layer_target_nodes_indexes: torch.LongTensor
     ) -> TargetDependantSampledData:
         sampled_edges_for_layers: _typing.List[
             _typing.Tuple[torch.LongTensor, _typing.Optional[torch.Tensor]]
         ] = list()
-        __current_layer_target_nodes_indexes: torch.LongTensor = __top_layer_target_nodes_indexes
+        __current_layer_target_nodes_indexes: torch.LongTensor = (
+            __top_layer_target_nodes_indexes
+        )
         " Reverse self.__layer_wise_arguments from bottom-up to top-down "
         for layer_argument in self.__layer_wise_arguments[::-1]:
             current_layer_result: _typing.Tuple[
@@ -265,11 +291,11 @@ class BasicLayerWiseTargetDependantSampler(TargetDependantSampler):
             ] = self._sample_edges_for_layer(
                 __current_layer_target_nodes_indexes,
                 __top_layer_target_nodes_indexes,
-                layer_argument
-            )
-            __source_nodes_indexes_for_current_layer: torch.Tensor = (
-                self._edge_index[0, current_layer_result[0]]
+                layer_argument,
             )
+            __source_nodes_indexes_for_current_layer: torch.Tensor = self._edge_index[
+                0, current_layer_result[0]
+            ]
             __current_layer_target_nodes_indexes: torch.LongTensor = (
                 __source_nodes_indexes_for_current_layer.unique()
             )
@@ -285,49 +311,66 @@ class BasicLayerWiseTargetDependantSampler(TargetDependantSampler):
                 for current_layer_result in sampled_edges_for_layers
             ]
         ).unique()
-        __sampled_nodes_in_sub_graph_mapping: _typing.Dict[int, int] = dict(list(zip(
-            sampled_nodes_in_sub_graph.tolist(),
-            range(sampled_nodes_in_sub_graph.size(0))
-        )))
+        __sampled_nodes_in_sub_graph_mapping: _typing.Dict[int, int] = dict(
+            list(
+                zip(
+                    sampled_nodes_in_sub_graph.tolist(),
+                    range(sampled_nodes_in_sub_graph.size(0)),
+                )
+            )
+        )
 
         __sampled_edge_index_for_layers_in_sub_graph: _typing.Sequence[torch.Tensor] = [
-            torch.stack([
-                torch.tensor(
-                    [
-                        __sampled_nodes_in_sub_graph_mapping.get(node_index)
-                        for node_index in self._edge_index[0, current_layer_result[0]].tolist()
-                    ]
-                ),
-                torch.tensor(
-                    [
-                        __sampled_nodes_in_sub_graph_mapping.get(node_index)
-                        for node_index in self._edge_index[1, current_layer_result[0]].tolist()
-                    ]
-                ),
-            ])
+            torch.stack(
+                [
+                    torch.tensor(
+                        [
+                            __sampled_nodes_in_sub_graph_mapping.get(node_index)
+                            for node_index in self._edge_index[
+                                0, current_layer_result[0]
+                            ].tolist()
+                        ]
+                    ),
+                    torch.tensor(
+                        [
+                            __sampled_nodes_in_sub_graph_mapping.get(node_index)
+                            for node_index in self._edge_index[
+                                1, current_layer_result[0]
+                            ].tolist()
+                        ]
+                    ),
+                ]
+            )
             for current_layer_result in sampled_edges_for_layers
         ]
 
         return TargetDependantSampledData(
             [
-                (temp_tuple[0], temp_tuple[1][0], temp_tuple[1][1]) for temp_tuple
-                in zip(__sampled_edge_index_for_layers_in_sub_graph, sampled_edges_for_layers)
+                (temp_tuple[0], temp_tuple[1][0], temp_tuple[1][1])
+                for temp_tuple in zip(
+                    __sampled_edge_index_for_layers_in_sub_graph,
+                    sampled_edges_for_layers,
+                )
             ],
             (
                 torch.tensor(
                     [
-                        __sampled_nodes_in_sub_graph_mapping.get(current_target_node_index_in_integral_data)
+                        __sampled_nodes_in_sub_graph_mapping.get(
+                            current_target_node_index_in_integral_data
+                        )
                         for current_target_node_index_in_integral_data in __top_layer_target_nodes_indexes.tolist()
-                        if current_target_node_index_in_integral_data in __sampled_nodes_in_sub_graph_mapping
+                        if current_target_node_index_in_integral_data
+                        in __sampled_nodes_in_sub_graph_mapping
                     ]
                 ).long(),  # Remap
                 torch.tensor(
                     [
                         current_target_node_index_in_integral_data
                         for current_target_node_index_in_integral_data in __top_layer_target_nodes_indexes.tolist()
-                        if current_target_node_index_in_integral_data in __sampled_nodes_in_sub_graph_mapping
+                        if current_target_node_index_in_integral_data
+                        in __sampled_nodes_in_sub_graph_mapping
                     ]
-                ).long()
+                ).long(),
             ),
-            sampled_nodes_in_sub_graph
+            sampled_nodes_in_sub_graph,
         )
diff --git a/autogl/solver/base.py b/autogl/solver/base.py
index 75083d9..f25f4bc 100644
--- a/autogl/solver/base.py
+++ b/autogl/solver/base.py
@@ -298,23 +298,65 @@ class BaseSolver:
         if nas_algorithms is None and nas_estimators is None and nas_spaces is None:
             self.nas_algorithms = self.nas_estimators = self.nas_spaces = None
             return
-        assert None not in [nas_algorithms, nas_estimators, nas_spaces], "The algorithms, estimators and spaces should all be set"
-
-        nas_algorithms = nas_algorithms if isinstance(nas_algorithms, (list, tuple)) else [nas_algorithms]
-        nas_spaces = nas_spaces if isinstance(nas_spaces, (list, tuple)) else [nas_spaces]
-        nas_estimators = nas_estimators if isinstance(nas_estimators, (list, tuple)) else [nas_estimators]
+        assert None not in [
+            nas_algorithms,
+            nas_estimators,
+            nas_spaces,
+        ], "The algorithms, estimators and spaces should all be set"
+
+        nas_algorithms = (
+            nas_algorithms
+            if isinstance(nas_algorithms, (list, tuple))
+            else [nas_algorithms]
+        )
+        nas_spaces = (
+            nas_spaces if isinstance(nas_spaces, (list, tuple)) else [nas_spaces]
+        )
+        nas_estimators = (
+            nas_estimators
+            if isinstance(nas_estimators, (list, tuple))
+            else [nas_estimators]
+        )
 
         # parse all str elements
-        nas_algorithms = [algo if not isinstance(algo, str) else NAS_ALGO_DICT[algo]() for algo in nas_algorithms]
-        nas_spaces = [space if not isinstance(space, str) else NAS_SPACE_DICT[space]() for space in nas_spaces]
-        nas_estimators = [estimator if not isinstance(estimator, str) else NAS_ESTIMATOR_DICT[estimator]() for estimator in nas_estimators]
-        
-        max_number = max([len(x) for x in [nas_algorithms, nas_spaces, nas_estimators]])
-        assert all([len(x) in [1, max_number] for x in [nas_algorithms, nas_spaces, nas_estimators]]), "lengths of algorithms/spaces/estimators do not match!"
+        nas_algorithms = [
+            algo if not isinstance(algo, str) else NAS_ALGO_DICT[algo]()
+            for algo in nas_algorithms
+        ]
+        nas_spaces = [
+            space if not isinstance(space, str) else NAS_SPACE_DICT[space]()
+            for space in nas_spaces
+        ]
+        nas_estimators = [
+            estimator
+            if not isinstance(estimator, str)
+            else NAS_ESTIMATOR_DICT[estimator]()
+            for estimator in nas_estimators
+        ]
 
-        self.nas_algorithms = [deepcopy(nas_algorithms) for _ in range(max_number)] if len(nas_algorithms) == 1 and max_number > 1 else nas_algorithms
-        self.nas_spaces = [deepcopy(nas_spaces) for _ in range(max_number)] if len(nas_spaces) == 1 and max_number > 1 else nas_spaces
-        self.nas_estimators = [deepcopy(nas_estimators) for _ in range(max_number)] if len(nas_estimators) == 1 and max_number > 1 else nas_estimators
+        max_number = max([len(x) for x in [nas_algorithms, nas_spaces, nas_estimators]])
+        assert all(
+            [
+                len(x) in [1, max_number]
+                for x in [nas_algorithms, nas_spaces, nas_estimators]
+            ]
+        ), "lengths of algorithms/spaces/estimators do not match!"
+
+        self.nas_algorithms = (
+            [deepcopy(nas_algorithms) for _ in range(max_number)]
+            if len(nas_algorithms) == 1 and max_number > 1
+            else nas_algorithms
+        )
+        self.nas_spaces = (
+            [deepcopy(nas_spaces) for _ in range(max_number)]
+            if len(nas_spaces) == 1 and max_number > 1
+            else nas_spaces
+        )
+        self.nas_estimators = (
+            [deepcopy(nas_estimators) for _ in range(max_number)]
+            if len(nas_estimators) == 1 and max_number > 1
+            else nas_estimators
+        )
 
         return self
 
diff --git a/autogl/solver/classifier/graph_classifier.py b/autogl/solver/classifier/graph_classifier.py
index e77d00d..f7751a5 100644
--- a/autogl/solver/classifier/graph_classifier.py
+++ b/autogl/solver/classifier/graph_classifier.py
@@ -90,9 +90,9 @@ class AutoGraphClassifier(BaseClassifier):
         super().__init__(
             feature_module=feature_module,
             graph_models=graph_models,
-            nas_algorithms=None, #nas_algorithms,
-            nas_spaces=None, #nas_spaces,
-            nas_estimators=None, #nas_estimators,
+            nas_algorithms=None,  # nas_algorithms,
+            nas_spaces=None,  # nas_spaces,
+            nas_estimators=None,  # nas_estimators,
             hpo_module=hpo_module,
             ensemble_module=ensemble_module,
             max_evals=max_evals,
diff --git a/autogl/solver/classifier/node_classifier.py b/autogl/solver/classifier/node_classifier.py
index da676bc..cc8c2c1 100644
--- a/autogl/solver/classifier/node_classifier.py
+++ b/autogl/solver/classifier/node_classifier.py
@@ -43,13 +43,13 @@ class AutoNodeClassifier(BaseClassifier):
 
     graph_models: list of autogl.module.model.BaseModel or list of str
         The (name of) models to be optimized as backbone. Default ``['gat', 'gcn']``.
-    
+
     nas_algorithms: (list of) autogl.module.nas.algorithm.BaseNAS or str (Optional)
         The (name of) nas algorithms used. Default ``None``.
-    
+
     nas_spaces: (list of) autogl.module.nas.space.BaseSpace or str (Optional)
         The (name of) nas spaces used. Default ``None``.
-    
+
     nas_estimators: (list of) autogl.module.nas.estimator.BaseEstimator or str (Optional)
         The (name of) nas estimators used. Default ``None``.
 
@@ -167,7 +167,6 @@ class AutoNodeClassifier(BaseClassifier):
                         loss=loss,
                         feval=feval,
                         device=device,
-                    
                     )
                     self.graph_model_list.append(model)
                 else:
@@ -365,7 +364,11 @@ class AutoNodeClassifier(BaseClassifier):
                 loss="nll_loss" if not hasattr(dataset, "loss") else dataset.loss,
             )
 
-            assert not isinstance(self._default_trainer, list) or len(self.nas_algorithms) == len(self._default_trainer) - len(self.graph_model_list), "length of default trainer should match total graph models and nas models passed"
+            assert not isinstance(self._default_trainer, list) or len(
+                self.nas_algorithms
+            ) == len(self._default_trainer) - len(
+                self.graph_model_list
+            ), "length of default trainer should match total graph models and nas models passed"
 
             # perform nas and add them to model list
             idx_trainer = len(self.graph_model_list)
@@ -384,7 +387,9 @@ class AutoNodeClassifier(BaseClassifier):
                         model=model,
                         num_features=self.dataset[0].x.shape[1],
                         num_classes=self.dataset.num_classes,
-                        loss="nll_loss" if not hasattr(dataset, "loss") else dataset.loss,
+                        loss="nll_loss"
+                        if not hasattr(dataset, "loss")
+                        else dataset.loss,
                         feval=evaluator_list,
                         device=self.runtime_device,
                         init=False,
@@ -395,10 +400,11 @@ class AutoNodeClassifier(BaseClassifier):
                     trainer.update_parameters(
                         num_classes=self.dataset.num_classes,
                         num_features=self.dataset[0].x.shape[1],
-                        loss="nll_loss" if not hasattr(dataset, "loss") else dataset.loss,
+                        loss="nll_loss"
+                        if not hasattr(dataset, "loss")
+                        else dataset.loss,
                         feval=evaluator_list,
                         device=self.runtime_device,
-                    
                     )
                 self.graph_model_list.append(trainer)
 
@@ -814,25 +820,29 @@ class AutoNodeClassifier(BaseClassifier):
         if ensemble_dict is not None:
             name = ensemble_dict.pop("name")
             solver.set_ensemble_module(name, **ensemble_dict)
-        
+
         nas_dict = path_or_dict.pop("nas", None)
         if nas_dict is not None:
             keys: set = set(nas_dict.keys())
-            needed = {'space', 'algorithm', 'estimator'}
+            needed = {"space", "algorithm", "estimator"}
             if keys != needed:
-                LOGGER.error('Key mismatch, we need %s, you give %s', needed, keys)
-                raise KeyError('Key mismatch, we need %s, you give %s' % (needed, keys))
+                LOGGER.error("Key mismatch, we need %s, you give %s", needed, keys)
+                raise KeyError("Key mismatch, we need %s, you give %s" % (needed, keys))
 
             spaces, algorithms, estimators = [], [], []
 
-            for container, indexer, k in zip([spaces, algorithms, estimators], [NAS_SPACE_DICT, NAS_ALGO_DICT, NAS_ESTIMATOR_DICT], ['space', 'algorithm', 'estimator']):
+            for container, indexer, k in zip(
+                [spaces, algorithms, estimators],
+                [NAS_SPACE_DICT, NAS_ALGO_DICT, NAS_ESTIMATOR_DICT],
+                ["space", "algorithm", "estimator"],
+            ):
                 configs = nas_dict[k]
                 if isinstance(configs, list):
                     for item in configs:
-                        container.append(indexer[item.pop('name')](**item))
+                        container.append(indexer[item.pop("name")](**item))
                 else:
-                    container.append(indexer[configs.pop('name')](**configs))
-            
+                    container.append(indexer[configs.pop("name")](**configs))
+
             solver.set_nas_module(algorithms, spaces, estimators)
 
         return solver
diff --git a/autogl/solver/utils.py b/autogl/solver/utils.py
index 03fc36b..86843e5 100644
--- a/autogl/solver/utils.py
+++ b/autogl/solver/utils.py
@@ -52,7 +52,9 @@ class LeaderBoard:
         if field in self.keys and not field == "name":
             self.major_field = field
         else:
-            LOGGER.warning(f"Field [{field}] NOT found in the current LeaderBoard, will ignore.")
+            LOGGER.warning(
+                f"Field [{field}] NOT found in the current LeaderBoard, will ignore."
+            )
 
     def insert_model_performance(self, name, performance) -> None:
         """
@@ -144,10 +146,10 @@ class LeaderBoard:
         """
         top_k: int = top_k if top_k > 0 else len(self.perform_dict)
 
-        '''
+        """
         reindex self.__performance_data_frame
         to ensure the columns of name and representation are in left-side of the data frame
-        '''
+        """
         _columns = self.perform_dict.columns.tolist()
         maxcolwidths: _typing.List[_typing.Optional[int]] = []
         if "name" in _columns:
@@ -157,18 +159,19 @@ class LeaderBoard:
         self.perform_dict = self.perform_dict[_columns]
 
         sorted_performance_df: pd.DataFrame = self.perform_dict.sort_values(
-            self.major_field,
-            ascending=not self.is_higher_better[self.major_field]
+            self.major_field, ascending=not self.is_higher_better[self.major_field]
         )
         sorted_performance_df = sorted_performance_df.head(top_k)
 
         from tabulate import tabulate
+
         _columns = sorted_performance_df.columns.tolist()
         maxcolwidths.extend([None for _ in range(len(_columns) - len(maxcolwidths))])
         print(
             tabulate(
                 list(zip(*[sorted_performance_df[column] for column in _columns])),
-                headers=_columns, tablefmt="grid"
+                headers=_columns,
+                tablefmt="grid",
             )
         )
 
diff --git a/autogl/utils/device.py b/autogl/utils/device.py
index b331ace..05299e4 100644
--- a/autogl/utils/device.py
+++ b/autogl/utils/device.py
@@ -1,24 +1,27 @@
 import torch
 from typing import Union
 
+
 def get_device(device: Union[str, torch.device]):
     """
     Get device of passed argument. Will return a torch.device based on passed arguments.
     Can parse auto, cpu, gpu, cpu:x, gpu:x, etc. If auto is given, will automatically find
     available devices.
 
-    
+
     Parameters
     ----------
     device: ``str`` or ``torch.device``
         The device to parse. If ``auto`` if given, will determine automatically.
-    
+
     Returns
     -------
     device: ``torch.device``
         The parsed device.
     """
-    assert isinstance(device, (str, torch.device)), "Only support device of str or torch.device, get {} instead".format(device)
-    if device == 'auto':
-        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    assert isinstance(
+        device, (str, torch.device)
+    ), "Only support device of str or torch.device, get {} instead".format(device)
+    if device == "auto":
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     return torch.device(device)

From cb16552ad7b98c67dd7e72e241ce668477264151 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sun, 11 Jul 2021 15:37:39 +0000
Subject: [PATCH 143/144] change version to 0.2.0-pre

---
 autogl/__init__.py | 2 +-
 setup.py           | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/autogl/__init__.py b/autogl/__init__.py
index 97b5085..fd5d1af 100644
--- a/autogl/__init__.py
+++ b/autogl/__init__.py
@@ -15,4 +15,4 @@ from .module import (
     train,
 )
 
-__version__ = "0.2.0"
+__version__ = "0.2.0-pre"
diff --git a/setup.py b/setup.py
index 9d9458f..bd53a23 100644
--- a/setup.py
+++ b/setup.py
@@ -40,7 +40,7 @@ with open("README.md", 'r') as fh:
 ''' https://setuptools.readthedocs.io/en/latest/ '''
 setup(
     name='autogl',
-    version='0.2.0',
+    version='0.2.0-pre',
     author='THUMNLab/aglteam',
     maintainer='THUMNLab/aglteam',
     author_email='autogl@tsinghua.edu.cn',
@@ -80,6 +80,7 @@ setup(
         'torch-geometric',
         'torch-scatter',
         'torch-sparse',
-        'tqdm'
+        'tqdm',
+        'nni'
     ]
 )
\ No newline at end of file

From 4a455d002e92f6a5fb63d4d33b0cca6cc5256608 Mon Sep 17 00:00:00 2001
From: Frozenmad <frozenmad2015@outlook.com>
Date: Sun, 11 Jul 2021 15:50:40 +0000
Subject: [PATCH 144/144] add new version slogan

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index dd22169..d5af745 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,7 @@ Feel free to open <a href="https://github.com/THUMNLab/AutoGL/issues">issues</a>
 
 ## News!
 
+- 2021.07.11 New version! v0.2.0-pre is here! In new version, AutoGL support neural architecture search to customize the architectures for the given datasets and tasks. AutoGL also support sampling now to perform tasks on large datasets, including node-wise sampling, layer-wise sampling and sub-graph sampling. Link prediction task is now also supported! Learn more in our [tutorial](https://autogl.readthedocs.io/en/latest/index.html).
 - 2021.04.10 Our paper [__AutoGL: A Library for Automated Graph Learning__](https://arxiv.org/abs/2104.04987) are accepted in _ICLR 2021 Workshop on Geometrical and Topological Representation Learning_! You can cite our paper following methods [here](#Cite).
 
 ## Introduction