|
- """
- Test file for nas on node classification
-
- AUTOGL_BACKEND=pyg python test/nas/node_classification.py
- AUTOGL_BACKEND=dgl python test/nas/node_classification.py
-
- TODO: make it a unit test file to test all the possible combinations
- """
-
- import os
- import logging
-
- logging.basicConfig(level=logging.INFO)
-
- from autogl.backend import DependentBackend
-
- if DependentBackend.is_dgl():
- from autogl.module.model.dgl import BaseAutoModel
- from dgl.data import CoraGraphDataset
- elif DependentBackend.is_pyg():
- from torch_geometric.datasets import Planetoid
- from autogl.module.model.pyg import BaseAutoModel
- from autogl.datasets import build_dataset_from_name
- import torch
- from torch import nn
- import torch.nn.functional as F
- #from autogl.module.nas.algorithm.agnn_rl import AGNNRL
- from autogl.module.nas.backend import bk_feat, bk_label
- from autogl.module.nas.algorithm import Darts, RL, GraphNasRL, Enas, RandomSearch,Spos
- from autogl.module.nas.estimator import BaseEstimator
- from autogl.module.train.evaluation import Acc
- import numpy as np
- from autogl.solver.utils import set_seed
- from autogl.module.nas.space import BaseSpace
- import typing as _typ
- from nas_bench_graph import light_read, gnn_list, gnn_list_proteins, Arch
- import pandas as pd
- import argparse
- import os
- import os.path as osp
-
- # Define the search space in NAS-bench-graph
- class StrModule(nn.Module):
- def __init__(self, lambd):
- super().__init__()
- self.name = lambd
-
- def forward(self, *args, **kwargs):
- return self.name
-
- def __repr__(self):
- return "{}({})".format(self.__class__.__name__, self.name)
-
- class BenchSpace(BaseSpace):
- def __init__(
- self,
- hidden_dim: _typ.Optional[int] = 64,
- layer_number: _typ.Optional[int] = 2,
- dropout: _typ.Optional[float] = 0.9,
- input_dim: _typ.Optional[int] = None,
- output_dim: _typ.Optional[int] = None,
- ops_type = 0
- ):
- super().__init__()
- self.layer_number = layer_number
- self.hidden_dim = hidden_dim
- self.input_dim = input_dim
- self.output_dim = output_dim
- self.dropout = dropout
- self.ops_type=ops_type
-
- def instantiate(
- self,
- hidden_dim: _typ.Optional[int] = None,
- layer_number: _typ.Optional[int] = None,
- dropout: _typ.Optional[float] = None,
- input_dim: _typ.Optional[int] = None,
- output_dim: _typ.Optional[int] = None,
- ops_type=None
- ):
- super().instantiate()
- self.dropout = dropout or self.dropout
- self.hidden_dim = hidden_dim or self.hidden_dim
- self.layer_number = layer_number or self.layer_number
- self.input_dim = input_dim or self.input_dim
- self.output_dim = output_dim or self.output_dim
- self.ops_type = ops_type or self.ops_type
- self.ops = [gnn_list,gnn_list_proteins][self.ops_type]
- for layer in range(4):
- setattr(self,f"in{layer}",self.setInputChoice(layer,n_candidates=layer+1,n_chosen=1,return_mask=False,key=f"in{layer}"))
- setattr(self,f"op{layer}",self.setLayerChoice(layer,list(map(lambda x:StrModule(x),self.ops)),key=f"op{layer}"))
- self.dummy=nn.Linear(1,1)
-
- def forward(self, bench):
- lks = [getattr(self, "in" + str(i)).selected for i in range(4)]
- ops = [getattr(self, "op" + str(i)).name for i in range(4)]
- arch = Arch(lks, ops)
- h = arch.valid_hash()
- if h == "88888" or h==88888:
- return 0
- return bench[h]['perf']
-
- def parse_model(self, selection, device) -> BaseAutoModel:
- return self.wrap().fix(selection)
-
- # Define a new estimator which directly get performance from NAS-bench-graph instead of training the model
- class BenchEstimator(BaseEstimator):
- def __init__(self, data_name, loss_f="nll_loss", evaluation=[Acc()]):
- super().__init__(loss_f, evaluation)
- self.evaluation = evaluation
- self.bench=light_read(data_name)
- def infer(self, model: BaseSpace, dataset, mask="train"):
- perf=model(self.bench)
- return [perf],0
-
- # Run NAS with NAS-bench-graph
- def run(data_name='cora',algo='graphnas',num_epochs=50,ctrl_steps_aggregate=20,log_dir='./logs/tmp'):
- print("Testing backend: {}".format("dgl" if DependentBackend.is_dgl() else "pyg"))
- if DependentBackend.is_dgl():
- from autogl.datasets.utils.conversion._to_dgl_dataset import to_dgl_dataset as convert_dataset
- else:
- from autogl.datasets.utils.conversion._to_pyg_dataset import to_pyg_dataset as convert_dataset
-
- di=2
- do=2
- dataset=None
-
- ops_type=data_name=='proteins'
-
- space = BenchSpace().cuda()
- space.instantiate(input_dim=di, output_dim=do,ops_type=ops_type)
- esti = BenchEstimator(data_name)
- if algo=='graphnas':
- algo = GraphNasRL(num_epochs=num_epochs,ctrl_steps_aggregate=ctrl_steps_aggregate)
- elif algo=='agnn':
- algo = AGNNRL(guide_type=1,num_epochs=num_epochs,ctrl_steps_aggregate=ctrl_steps_aggregate)
- else:
- assert False,f'Not implemented algo {algo}'
- model = algo.search(space, dataset, esti)
- result=esti.infer(model._model,None)[0][0]
-
- os.makedirs(log_dir,exist_ok=True)
- with open(osp.join(log_dir,f'log.txt'),'w') as f:
- f.write(str(result))
-
- import json
- archs=algo.allhist
- json.dump(archs,open(osp.join(log_dir,f'archs.json'),'w'))
-
- arch_strs=[str(x[1]) for x in archs]
- print(f'number of archs: {len(arch_strs)} ; number of unique archs : {len(set(arch_strs))}')
-
- scores=[-x[0] for x in archs] # accs
- idxs=np.argsort(scores) # increasing order
- with open(osp.join(log_dir,f'idx.txt'),'w') as f:
- f.write(str(idxs))
- return result
-
- # Run NAS with NAS-bench-graph for all provided datasets
- def run_all():
- data_names='arxiv citeseer computers cora cs photo physics proteins pubmed'.split()
- algos='graphnas agnn'.split()
- results=[]
- for data_name in data_names:
- for algo in algos:
- print(f'data {data_name} algo {algo}')
- # metric=run(data_name,algo,2,2)
- if data_name=='proteins':
- metric=run(data_name,algo,8,5)
- else:
- metric=run(data_name,algo,50,10)
- results.append([data_name,algo,metric])
- return results
-
- if __name__ == "__main__":
- # results=run_all()
- # df=pd.DataFrame(results,columns='data algo v'.split()).pivot_table(values='v',index='algo',columns='data')
- # print(df.to_string())
-
- parser = argparse.ArgumentParser()
- parser.add_argument('--data', type=str, default='cora', help='datasets')
- parser.add_argument('--algo', type=str, default='graphnas')
- parser.add_argument('--log_dir', type=str, default='./logs/')
-
- args = parser.parse_args()
- dname=args.data
- algo=args.algo
- log_dir= os.path.join(args.log_dir,f'{dname,algo}')
- if dname=='proteins':
- # 40 archs in total
- num_epochs=8
- ctrl_steps_aggregate=5
- else:
- # 500 archs in total
- num_epochs=50
- ctrl_steps_aggregate=10
- result=run(dname,algo,num_epochs,ctrl_steps_aggregate,log_dir)
|