From f839edd5ef7d26272045ccda071a9cf43fa47fdb Mon Sep 17 00:00:00 2001 From: wondergo2017 Date: Tue, 19 Jul 2022 20:43:28 +0800 Subject: [PATCH] add GCNJaccard/GCNSVD --- .../_structure_engineer.py | 95 ++++++++++++++++++- test/preprocessing/fe.py | 18 ---- test/preprocessing/structure.py | 11 +++ 3 files changed, 102 insertions(+), 22 deletions(-) delete mode 100644 test/preprocessing/fe.py create mode 100644 test/preprocessing/structure.py diff --git a/autogl/module/preprocessing/structure_engineering/_structure_engineer.py b/autogl/module/preprocessing/structure_engineering/_structure_engineer.py index 67fe5fe..9d11206 100644 --- a/autogl/module/preprocessing/structure_engineering/_structure_engineer.py +++ b/autogl/module/preprocessing/structure_engineering/_structure_engineer.py @@ -5,17 +5,104 @@ class StructureEngineer(_data_preprocessor.DataPreprocessor): ... + +import torch +from ....utils import get_logger +LOGGER = get_logger("Structure") + +from torch_geometric.utils import to_dense_adj +def get_feature(data): + """return features : numpy.ndarray + """ + for fk in 'x feat'.split(): + if fk in data.nodes.data: + features=data.nodes.data[fk].numpy() + return features + +def get_edges(data): + return data.edges.connections + +def set_edges(data,adj): + data.data["edge_index"]=adj + +def to_adjacency_matrix(adj): + """ + adj : torch.Tensor [2,E] + return Tensor [N,N] + """ + adj = to_dense_adj(adj)[0].long() # adjacency matrix + return adj +def to_adjacency_list(adj): + """ + adj : Tensor [N,N] + return Tensor [2,E] + """ + adj = torch.stack(adj.nonzero(as_tuple=True)).long() # edge list + return adj + from .._data_preprocessor_registry import DataPreprocessorUniversalRegistry from deeprobust.graph.defense.gcn_preprocess import GCNJaccard as Jaccard @DataPreprocessorUniversalRegistry.register_data_preprocessor("gcnjaccard") class GCNJaccard(StructureEngineer): + """ + GCNJaccard preprocesses input graph via droppining dissimilar + edges. See more details in + Adversarial Examples on Graph Data: Deep Insights into Attack and Defense, + https://arxiv.org/pdf/1903.01610.pdf. + """ def __init__(self, threshold=0.01, *args, **kwargs): + """ drop dissimilar edges with similarity smaller than given threshold + + Parameters + ---------- + threshold : float + similarity threshold for dropping edges. If two connected nodes with similarity smaller than threshold, the edge between them will be removed. + """ super(GCNJaccard, self).__init__(*args, **kwargs) self.engine=Jaccard(2,2,2) self.engine.threshold=threshold def _transform(self,data): - features=data.x - adj=data.edge_index - modified_adj = self.engine.drop_dissimilar_edges(features, adj) - data.edge_index=modified_adj + features = get_feature(data) + adj = get_edges(data) # edge list + LOGGER.info(f'before modified: {adj.shape}') + adj = to_adjacency_matrix(adj).numpy() # adjacency matrix + modified_adj = self.engine.drop_dissimilar_edges(features, adj).toarray() # adjacency matrix + modified_adj = to_adjacency_list(torch.Tensor(modified_adj)) # edge list + LOGGER.info(f'after modified: {modified_adj.shape}' ) + set_edges(data,modified_adj) + return data + +from deeprobust.graph.defense.gcn_preprocess import GCNSVD as SVD +@DataPreprocessorUniversalRegistry.register_data_preprocessor("gcnsvd") +class GCNSVD(StructureEngineer): + """GCNSVD uses Truncated SVD as preprocessing.See more details in All You Need Is Low (Rank): Defending + Against Adversarial Attacks on Graphs, + https://dl.acm.org/doi/abs/10.1145/3336191.3371789. + """ + def __init__(self, k=50, threshold=0.05, *args, **kwargs): + """perform rank-k approximation of adjacency matrix via + truncated SVD + + Parameters + ---------- + k : int + number of singular values and vectors to compute. + + threshold : float + edges with scores larger than threshold will be kept. + """ + super(GCNSVD, self).__init__(*args, **kwargs) + self.engine=SVD(2,2,2) + self.k=k + self.threshold=threshold + + def _transform(self,data): + adj = get_edges(data) # edge list + LOGGER.info(f'before modified: {adj.shape}') + adj = to_adjacency_matrix(adj).numpy() # adjacency matrix + modified_adj = self.engine.truncatedSVD(adj,self.k) # adjacency matrix + modified_adj = (modified_adj> self.threshold).astype(int) + modified_adj = to_adjacency_list(torch.Tensor(modified_adj)) # edge list + LOGGER.info(f'after modified: {modified_adj.shape}' ) + set_edges(data,modified_adj) return data \ No newline at end of file diff --git a/test/preprocessing/fe.py b/test/preprocessing/fe.py deleted file mode 100644 index 9fb6df3..0000000 --- a/test/preprocessing/fe.py +++ /dev/null @@ -1,18 +0,0 @@ -from autogl.datasets import build_dataset_from_name -data = build_dataset_from_name('cora') - -# 2. Compose a feature engineering pipeline -from autogl.module.preprocessing.feature_engineering import EigenFeatureGenerator - -# you may compose feature engineering bases through autogl.module.feature._base_feature_engineer -from autogl.module.preprocessing.structure_engineering._structure_engineer import * -# fe = EigenFeatureGenerator(size=32) -fe=GCNJaccard() - -# 3. Fit and transform the data -data1=fe.fit_transform(data,inplace=False) - -# from autogl.data.graph import GeneralStaticGraph -# print(isinstance(data, GeneralStaticGraph)) -# print(data.nodes) - diff --git a/test/preprocessing/structure.py b/test/preprocessing/structure.py new file mode 100644 index 0000000..96e635f --- /dev/null +++ b/test/preprocessing/structure.py @@ -0,0 +1,11 @@ +from autogl.datasets import build_dataset_from_name +data = build_dataset_from_name('cora') +from autogl.module.preprocessing.structure_engineering._structure_engineer import * + +fes=[GCNJaccard,GCNSVD] +for fe in fes: + print(f'Doing {fe}') + fe = fe() + data=fe.fit_transform(data,inplace=False) + +