|
|
|
@@ -5,17 +5,104 @@ class StructureEngineer(_data_preprocessor.DataPreprocessor): |
|
|
|
... |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import torch |
|
|
|
from ....utils import get_logger |
|
|
|
LOGGER = get_logger("Structure") |
|
|
|
|
|
|
|
from torch_geometric.utils import to_dense_adj |
|
|
|
def get_feature(data): |
|
|
|
"""return features : numpy.ndarray |
|
|
|
""" |
|
|
|
for fk in 'x feat'.split(): |
|
|
|
if fk in data.nodes.data: |
|
|
|
features=data.nodes.data[fk].numpy() |
|
|
|
return features |
|
|
|
|
|
|
|
def get_edges(data): |
|
|
|
return data.edges.connections |
|
|
|
|
|
|
|
def set_edges(data,adj): |
|
|
|
data.data["edge_index"]=adj |
|
|
|
|
|
|
|
def to_adjacency_matrix(adj): |
|
|
|
""" |
|
|
|
adj : torch.Tensor [2,E] |
|
|
|
return Tensor [N,N] |
|
|
|
""" |
|
|
|
adj = to_dense_adj(adj)[0].long() # adjacency matrix |
|
|
|
return adj |
|
|
|
def to_adjacency_list(adj): |
|
|
|
""" |
|
|
|
adj : Tensor [N,N] |
|
|
|
return Tensor [2,E] |
|
|
|
""" |
|
|
|
adj = torch.stack(adj.nonzero(as_tuple=True)).long() # edge list |
|
|
|
return adj |
|
|
|
|
|
|
|
from .._data_preprocessor_registry import DataPreprocessorUniversalRegistry |
|
|
|
from deeprobust.graph.defense.gcn_preprocess import GCNJaccard as Jaccard |
|
|
|
@DataPreprocessorUniversalRegistry.register_data_preprocessor("gcnjaccard") |
|
|
|
class GCNJaccard(StructureEngineer): |
|
|
|
""" |
|
|
|
GCNJaccard preprocesses input graph via droppining dissimilar |
|
|
|
edges. See more details in |
|
|
|
Adversarial Examples on Graph Data: Deep Insights into Attack and Defense, |
|
|
|
https://arxiv.org/pdf/1903.01610.pdf. |
|
|
|
""" |
|
|
|
def __init__(self, threshold=0.01, *args, **kwargs): |
|
|
|
""" drop dissimilar edges with similarity smaller than given threshold |
|
|
|
|
|
|
|
Parameters |
|
|
|
---------- |
|
|
|
threshold : float |
|
|
|
similarity threshold for dropping edges. If two connected nodes with similarity smaller than threshold, the edge between them will be removed. |
|
|
|
""" |
|
|
|
super(GCNJaccard, self).__init__(*args, **kwargs) |
|
|
|
self.engine=Jaccard(2,2,2) |
|
|
|
self.engine.threshold=threshold |
|
|
|
def _transform(self,data): |
|
|
|
features=data.x |
|
|
|
adj=data.edge_index |
|
|
|
modified_adj = self.engine.drop_dissimilar_edges(features, adj) |
|
|
|
data.edge_index=modified_adj |
|
|
|
features = get_feature(data) |
|
|
|
adj = get_edges(data) # edge list |
|
|
|
LOGGER.info(f'before modified: {adj.shape}') |
|
|
|
adj = to_adjacency_matrix(adj).numpy() # adjacency matrix |
|
|
|
modified_adj = self.engine.drop_dissimilar_edges(features, adj).toarray() # adjacency matrix |
|
|
|
modified_adj = to_adjacency_list(torch.Tensor(modified_adj)) # edge list |
|
|
|
LOGGER.info(f'after modified: {modified_adj.shape}' ) |
|
|
|
set_edges(data,modified_adj) |
|
|
|
return data |
|
|
|
|
|
|
|
from deeprobust.graph.defense.gcn_preprocess import GCNSVD as SVD |
|
|
|
@DataPreprocessorUniversalRegistry.register_data_preprocessor("gcnsvd") |
|
|
|
class GCNSVD(StructureEngineer): |
|
|
|
"""GCNSVD uses Truncated SVD as preprocessing.See more details in All You Need Is Low (Rank): Defending |
|
|
|
Against Adversarial Attacks on Graphs, |
|
|
|
https://dl.acm.org/doi/abs/10.1145/3336191.3371789. |
|
|
|
""" |
|
|
|
def __init__(self, k=50, threshold=0.05, *args, **kwargs): |
|
|
|
"""perform rank-k approximation of adjacency matrix via |
|
|
|
truncated SVD |
|
|
|
|
|
|
|
Parameters |
|
|
|
---------- |
|
|
|
k : int |
|
|
|
number of singular values and vectors to compute. |
|
|
|
|
|
|
|
threshold : float |
|
|
|
edges with scores larger than threshold will be kept. |
|
|
|
""" |
|
|
|
super(GCNSVD, self).__init__(*args, **kwargs) |
|
|
|
self.engine=SVD(2,2,2) |
|
|
|
self.k=k |
|
|
|
self.threshold=threshold |
|
|
|
|
|
|
|
def _transform(self,data): |
|
|
|
adj = get_edges(data) # edge list |
|
|
|
LOGGER.info(f'before modified: {adj.shape}') |
|
|
|
adj = to_adjacency_matrix(adj).numpy() # adjacency matrix |
|
|
|
modified_adj = self.engine.truncatedSVD(adj,self.k) # adjacency matrix |
|
|
|
modified_adj = (modified_adj> self.threshold).astype(int) |
|
|
|
modified_adj = to_adjacency_list(torch.Tensor(modified_adj)) # edge list |
|
|
|
LOGGER.info(f'after modified: {modified_adj.shape}' ) |
|
|
|
set_edges(data,modified_adj) |
|
|
|
return data |