2. correct an error in the common walk kernel. DON NOT use the old one. 3. improve the method to construct fully-labeled direct product graphs, much faster for sparse graphs.tags/v0.1
| @@ -0,0 +1 @@ | |||||
| ljia@ljia-Precision-7520.4716:1530265749 | |||||
| @@ -24,7 +24,7 @@ def commonwalkkernel(*args, | |||||
| edge_label='bond_type', | edge_label='bond_type', | ||||
| n=None, | n=None, | ||||
| weight=1, | weight=1, | ||||
| compute_method='exp'): | |||||
| compute_method=None): | |||||
| """Calculate common walk graph kernels up to depth d between graphs. | """Calculate common walk graph kernels up to depth d between graphs. | ||||
| Parameters | Parameters | ||||
| ---------- | ---------- | ||||
| @@ -40,10 +40,11 @@ def commonwalkkernel(*args, | |||||
| n : integer | n : integer | ||||
| Longest length of walks. | Longest length of walks. | ||||
| weight: integer | weight: integer | ||||
| Weight coefficient of different lengths of walks. | |||||
| Weight coefficient of different lengths of walks, which represents beta in 'exp' method and gamma in 'geo'. | |||||
| compute_method : string | compute_method : string | ||||
| Method used to compute walk kernel. The Following choices are available: | Method used to compute walk kernel. The Following choices are available: | ||||
| 'direct' : direct product graph method, as shown in reference [1]. The time complexity is O(n^6) for unlabeled graphs with n vertices. | |||||
| 'exp' : exponential serial method applied on the direct product graph, as shown in reference [1]. The time complexity is O(n^6) for graphs with n vertices. | |||||
| 'geo' : geometric serial method applied on the direct product graph, as shown in reference [1]. The time complexity is O(n^6) for graphs with n vertices. | |||||
| 'brute' : brute force, simply search for all walks and compare them. | 'brute' : brute force, simply search for all walks and compare them. | ||||
| Return | Return | ||||
| @@ -66,6 +67,8 @@ def commonwalkkernel(*args, | |||||
| if not ds_attrs['edge_labeled']: | if not ds_attrs['edge_labeled']: | ||||
| for G in Gn: | for G in Gn: | ||||
| nx.set_edge_attributes(G, '0', 'bond_type') | nx.set_edge_attributes(G, '0', 'bond_type') | ||||
| if not ds_attrs['is_directed']: | |||||
| Gn = [G.to_directed() for G in Gn] | |||||
| start_time = time.time() | start_time = time.time() | ||||
| @@ -77,7 +80,7 @@ def commonwalkkernel(*args, | |||||
| file=sys.stdout) | file=sys.stdout) | ||||
| for i in range(0, len(Gn)): | for i in range(0, len(Gn)): | ||||
| for j in range(i, len(Gn)): | for j in range(i, len(Gn)): | ||||
| Kmatrix[i][j] = _untilnwalkkernel_exp(Gn[i], Gn[j], node_label, | |||||
| Kmatrix[i][j] = _commonwalkkernel_exp(Gn[i], Gn[j], node_label, | |||||
| edge_label, weight) | edge_label, weight) | ||||
| Kmatrix[j][i] = Kmatrix[i][j] | Kmatrix[j][i] = Kmatrix[i][j] | ||||
| pbar.update(1) | pbar.update(1) | ||||
| @@ -90,7 +93,7 @@ def commonwalkkernel(*args, | |||||
| file=sys.stdout) | file=sys.stdout) | ||||
| for i in range(0, len(Gn)): | for i in range(0, len(Gn)): | ||||
| for j in range(i, len(Gn)): | for j in range(i, len(Gn)): | ||||
| Kmatrix[i][j] = _untilnwalkkernel_geo(Gn[i], Gn[j], node_label, | |||||
| Kmatrix[i][j] = _commonwalkkernel_geo(Gn[i], Gn[j], node_label, | |||||
| edge_label, weight) | edge_label, weight) | ||||
| Kmatrix[j][i] = Kmatrix[i][j] | Kmatrix[j][i] = Kmatrix[i][j] | ||||
| pbar.update(1) | pbar.update(1) | ||||
| @@ -106,7 +109,7 @@ def commonwalkkernel(*args, | |||||
| for i in range(0, len(Gn)): | for i in range(0, len(Gn)): | ||||
| for j in range(i, len(Gn)): | for j in range(i, len(Gn)): | ||||
| Kmatrix[i][j] = _untilnwalkkernel_brute( | |||||
| Kmatrix[i][j] = _commonwalkkernel_brute( | |||||
| all_walks[i], | all_walks[i], | ||||
| all_walks[j], | all_walks[j], | ||||
| node_label=node_label, | node_label=node_label, | ||||
| @@ -122,7 +125,7 @@ def commonwalkkernel(*args, | |||||
| return Kmatrix, run_time | return Kmatrix, run_time | ||||
| def _untilnwalkkernel_exp(G1, G2, node_label, edge_label, beta): | |||||
| def _commonwalkkernel_exp(G1, G2, node_label, edge_label, beta): | |||||
| """Calculate walk graph kernels up to n between 2 graphs using exponential series. | """Calculate walk graph kernels up to n between 2 graphs using exponential series. | ||||
| Parameters | Parameters | ||||
| @@ -168,7 +171,7 @@ def _untilnwalkkernel_exp(G1, G2, node_label, edge_label, beta): | |||||
| D = np.zeros((len(ew), len(ew))) | D = np.zeros((len(ew), len(ew))) | ||||
| for i in range(len(ew)): | for i in range(len(ew)): | ||||
| D[i][i] = np.exp(beta * ew[i]) | D[i][i] = np.exp(beta * ew[i]) | ||||
| # print('D: ', D) | |||||
| # print('D: ', D) | |||||
| # print('hshs: ', T.I * D * T) | # print('hshs: ', T.I * D * T) | ||||
| # print(np.exp(-2)) | # print(np.exp(-2)) | ||||
| @@ -176,16 +179,16 @@ def _untilnwalkkernel_exp(G1, G2, node_label, edge_label, beta): | |||||
| # print(np.exp(weight * D)) | # print(np.exp(weight * D)) | ||||
| # print(ev) | # print(ev) | ||||
| # print(np.linalg.inv(ev)) | # print(np.linalg.inv(ev)) | ||||
| exp_D = ev * D * ev.I | |||||
| exp_D = ev * D * ev.T | |||||
| # print(exp_D) | # print(exp_D) | ||||
| # print(np.exp(weight * A)) | # print(np.exp(weight * A)) | ||||
| # print('-------') | # print('-------') | ||||
| return np.sum(exp_D.diagonal()) | |||||
| return exp_D.sum() | |||||
| def _untilnwalkkernel_geo(G1, G2, node_label, edge_label, gamma): | |||||
| """Calculate walk graph kernels up to n between 2 graphs using geometric series. | |||||
| def _commonwalkkernel_geo(G1, G2, node_label, edge_label, gamma): | |||||
| """Calculate common walk graph kernels up to n between 2 graphs using geometric series. | |||||
| Parameters | Parameters | ||||
| ---------- | ---------- | ||||
| @@ -207,46 +210,14 @@ def _untilnwalkkernel_geo(G1, G2, node_label, edge_label, gamma): | |||||
| # get tensor product / direct product | # get tensor product / direct product | ||||
| gp = direct_product(G1, G2, node_label, edge_label) | gp = direct_product(G1, G2, node_label, edge_label) | ||||
| A = nx.adjacency_matrix(gp).todense() | A = nx.adjacency_matrix(gp).todense() | ||||
| # print(A) | |||||
| # from matplotlib import pyplot as plt | |||||
| # nx.draw_networkx(G1) | |||||
| # plt.show() | |||||
| # nx.draw_networkx(G2) | |||||
| # plt.show() | |||||
| # nx.draw_networkx(gp) | |||||
| # plt.show() | |||||
| # print(G1.nodes(data=True)) | |||||
| # print(G2.nodes(data=True)) | |||||
| # print(gp.nodes(data=True)) | |||||
| # print(gp.edges(data=True)) | |||||
| ew, ev = np.linalg.eig(A) | |||||
| # print('ew: ', ew) | |||||
| # print(ev) | |||||
| # T = np.matrix(ev) | |||||
| # print('T: ', T) | |||||
| # T = ev.I | |||||
| D = np.zeros((len(ew), len(ew))) | |||||
| for i in range(len(ew)): | |||||
| D[i][i] = np.exp(beta * ew[i]) | |||||
| # print('D: ', D) | |||||
| # print('hshs: ', T.I * D * T) | |||||
| # print(np.exp(-2)) | |||||
| # print(D) | |||||
| # print(np.exp(weight * D)) | |||||
| # print(ev) | |||||
| # print(np.linalg.inv(ev)) | |||||
| exp_D = ev * D * ev.I | |||||
| # print(exp_D) | |||||
| # print(np.exp(weight * A)) | |||||
| # print('-------') | |||||
| return np.sum(exp_D.diagonal()) | |||||
| mat = np.identity(len(A)) - gamma * A | |||||
| try: | |||||
| return mat.I.sum() | |||||
| except np.linalg.LinAlgError: | |||||
| return np.nan | |||||
| def _untilnwalkkernel_brute(walks1, | |||||
| def _commonwalkkernel_brute(walks1, | |||||
| walks2, | walks2, | ||||
| node_label='atom', | node_label='atom', | ||||
| edge_label='bond_type', | edge_label='bond_type', | ||||
| @@ -19,7 +19,11 @@ from pygraph.utils.graphdataset import get_dataset_attributes | |||||
| def randomwalkkernel(*args, | def randomwalkkernel(*args, | ||||
| node_label='atom', | node_label='atom', | ||||
| edge_label='bond_type', | edge_label='bond_type', | ||||
| edge_weight=None, | |||||
| h=10, | h=10, | ||||
| p=None, | |||||
| q=None, | |||||
| weight=None, | |||||
| compute_method=''): | compute_method=''): | ||||
| """Calculate random walk graph kernels. | """Calculate random walk graph kernels. | ||||
| Parameters | Parameters | ||||
| @@ -33,7 +37,7 @@ def randomwalkkernel(*args, | |||||
| node attribute used as label. The default node label is atom. | node attribute used as label. The default node label is atom. | ||||
| edge_label : string | edge_label : string | ||||
| edge attribute used as label. The default edge label is bond_type. | edge attribute used as label. The default edge label is bond_type. | ||||
| n : integer | |||||
| h : integer | |||||
| Longest length of walks. | Longest length of walks. | ||||
| method : string | method : string | ||||
| Method used to compute the random walk kernel. Available methods are 'sylvester', 'conjugate', 'fp', 'spectral' and 'kron'. | Method used to compute the random walk kernel. Available methods are 'sylvester', 'conjugate', 'fp', 'spectral' and 'kron'. | ||||
| @@ -46,7 +50,25 @@ def randomwalkkernel(*args, | |||||
| compute_method = compute_method.lower() | compute_method = compute_method.lower() | ||||
| h = int(h) | h = int(h) | ||||
| Gn = args[0] if len(args) == 1 else [args[0], args[1]] | Gn = args[0] if len(args) == 1 else [args[0], args[1]] | ||||
| Kmatrix = np.zeros((len(Gn), len(Gn))) | |||||
| eweight = None | |||||
| if edge_weight == None: | |||||
| print('\n None edge weight specified. Set all weight to 1.\n') | |||||
| else: | |||||
| try: | |||||
| some_weight = list( | |||||
| nx.get_edge_attributes(Gn[0], edge_weight).values())[0] | |||||
| if isinstance(some_weight, float) or isinstance(some_weight, int): | |||||
| eweight = edge_weight | |||||
| else: | |||||
| print( | |||||
| '\n Edge weight with name %s is not float or integer. Set all weight to 1.\n' | |||||
| % edge_weight) | |||||
| except: | |||||
| print( | |||||
| '\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n' | |||||
| % edge_weight) | |||||
| ds_attrs = get_dataset_attributes( | ds_attrs = get_dataset_attributes( | ||||
| Gn, | Gn, | ||||
| attr_names=['node_labeled', 'edge_labeled', 'is_directed'], | attr_names=['node_labeled', 'edge_labeled', 'is_directed'], | ||||
| @@ -71,76 +93,224 @@ def randomwalkkernel(*args, | |||||
| # labeled=labeled) for i in range(0, len(Gn)) | # labeled=labeled) for i in range(0, len(Gn)) | ||||
| # ] | # ] | ||||
| pbar = tqdm( | |||||
| total=(1 + len(Gn)) * len(Gn) / 2, | |||||
| desc='calculating kernels', | |||||
| file=sys.stdout) | |||||
| if compute_method == 'sylvester': | if compute_method == 'sylvester': | ||||
| import warnings | import warnings | ||||
| warnings.warn( | warnings.warn( | ||||
| 'The Sylvester equation (rather than generalized Sylvester equation) is used; only walks of length 1 is considered.' | |||||
| 'The Sylvester equation (rather than generalized Sylvester equation) is used; edge label number has to smaller than 3.' | |||||
| ) | ) | ||||
| from control import dlyap | |||||
| Kmatrix = _randomwalkkernel_sylvester(Gn, weight, p, q, node_label, | |||||
| edge_label, eweight) | |||||
| elif compute_method == 'conjugate': | |||||
| for i in range(0, len(Gn)): | for i in range(0, len(Gn)): | ||||
| for j in range(i, len(Gn)): | for j in range(i, len(Gn)): | ||||
| Kmatrix[i][j] = _randomwalkkernel_sylvester( | |||||
| all_walks[i], | |||||
| all_walks[j], | |||||
| node_label=node_label, | |||||
| edge_label=edge_label) | |||||
| Kmatrix[i][j] = _randomwalkkernel_conjugate( | |||||
| Gn[i], Gn[j], node_label, edge_label) | |||||
| Kmatrix[j][i] = Kmatrix[i][j] | Kmatrix[j][i] = Kmatrix[i][j] | ||||
| pbar.update(1) | pbar.update(1) | ||||
| elif compute_method == 'conjugate': | |||||
| pass | |||||
| elif compute_method == 'fp': | elif compute_method == 'fp': | ||||
| pass | |||||
| for i in range(0, len(Gn)): | |||||
| for j in range(i, len(Gn)): | |||||
| Kmatrix[i][j] = _randomwalkkernel_fp(Gn[i], Gn[j], node_label, | |||||
| edge_label) | |||||
| Kmatrix[j][i] = Kmatrix[i][j] | |||||
| pbar.update(1) | |||||
| elif compute_method == 'spectral': | elif compute_method == 'spectral': | ||||
| pass | |||||
| for i in range(0, len(Gn)): | |||||
| for j in range(i, len(Gn)): | |||||
| Kmatrix[i][j] = _randomwalkkernel_spectral( | |||||
| Gn[i], Gn[j], node_label, edge_label) | |||||
| Kmatrix[j][i] = Kmatrix[i][j] | |||||
| pbar.update(1) | |||||
| elif compute_method == 'kron': | elif compute_method == 'kron': | ||||
| pass | |||||
| for i in range(0, len(Gn)): | |||||
| for j in range(i, len(Gn)): | |||||
| Kmatrix[i][j] = _randomwalkkernel_kron(Gn[i], Gn[j], | |||||
| node_label, edge_label) | |||||
| Kmatrix[j][i] = Kmatrix[i][j] | |||||
| pbar.update(1) | |||||
| else: | else: | ||||
| raise Exception( | raise Exception( | ||||
| 'compute method name incorrect. Available methods: "sylvester", "conjugate", "fp", "spectral" and "kron".' | 'compute method name incorrect. Available methods: "sylvester", "conjugate", "fp", "spectral" and "kron".' | ||||
| ) | ) | ||||
| for i in range(0, len(Gn)): | |||||
| for j in range(i, len(Gn)): | |||||
| Kmatrix[i][j] = _randomwalkkernel_do( | |||||
| all_walks[i], | |||||
| all_walks[j], | |||||
| node_label=node_label, | |||||
| edge_label=edge_label, | |||||
| labeled=labeled) | |||||
| Kmatrix[j][i] = Kmatrix[i][j] | |||||
| # for i in range(0, len(Gn)): | |||||
| # for j in range(i, len(Gn)): | |||||
| # Kmatrix[i][j] = _randomwalkkernel_do( | |||||
| # all_walks[i], | |||||
| # all_walks[j], | |||||
| # node_label=node_label, | |||||
| # edge_label=edge_label, | |||||
| # labeled=labeled) | |||||
| # Kmatrix[j][i] = Kmatrix[i][j] | |||||
| run_time = time.time() - start_time | run_time = time.time() - start_time | ||||
| print( | print( | ||||
| "\n --- kernel matrix of walk kernel up to %d of size %d built in %s seconds ---" | |||||
| % (n, len(Gn), run_time)) | |||||
| "\n --- kernel matrix of random walk kernel of size %d built in %s seconds ---" | |||||
| % (len(Gn), run_time)) | |||||
| return Kmatrix, run_time | return Kmatrix, run_time | ||||
| def _randomwalkkernel_sylvester(walks1, | |||||
| walks2, | |||||
| node_label='atom', | |||||
| edge_label='bond_type'): | |||||
| def _randomwalkkernel_sylvester(Gn, lmda, p, q, node_label, edge_label, | |||||
| eweight): | |||||
| """Calculate walk graph kernels up to n between 2 graphs using Sylvester method. | """Calculate walk graph kernels up to n between 2 graphs using Sylvester method. | ||||
| Parameters | Parameters | ||||
| ---------- | ---------- | ||||
| walks1, walks2 : list | |||||
| List of walks in 2 graphs, where for unlabeled graphs, each walk is represented by a list of nodes; while for labeled graphs, each walk is represented by a string consists of labels of nodes and edges on that walk. | |||||
| G1, G2 : NetworkX graph | |||||
| Graphs between which the kernel is calculated. | |||||
| node_label : string | node_label : string | ||||
| node attribute used as label. The default node label is atom. | |||||
| node attribute used as label. | |||||
| edge_label : string | edge_label : string | ||||
| edge attribute used as label. The default edge label is bond_type. | |||||
| edge attribute used as label. | |||||
| Return | |||||
| ------ | |||||
| kernel : float | |||||
| Kernel between 2 graphs. | |||||
| """ | |||||
| from control import dlyap | |||||
| Kmatrix = np.zeros((len(Gn), len(Gn))) | |||||
| if q == None: | |||||
| # don't normalize adjacency matrices if q is a uniform vector. | |||||
| A_list = [ | |||||
| nx.adjacency_matrix(G, eweight).todense() for G in tqdm( | |||||
| Gn, desc='compute adjacency matrices', file=sys.stdout) | |||||
| ] | |||||
| if p == None: | |||||
| pbar = tqdm( | |||||
| total=(1 + len(Gn)) * len(Gn) / 2, | |||||
| desc='calculating kernels', | |||||
| file=sys.stdout) | |||||
| for i in range(0, len(Gn)): | |||||
| for j in range(i, len(Gn)): | |||||
| A = lmda * A_list[j] | |||||
| Q = A_list[i] | |||||
| # use uniform distribution if there is no prior knowledge. | |||||
| nb_pd = len(A_list[i]) * len(A_list[j]) | |||||
| pd_uni = 1 / nb_pd | |||||
| C = np.full((len(A_list[j]), len(A_list[i])), pd_uni) | |||||
| try: | |||||
| X = dlyap(A, Q, C) | |||||
| X = np.reshape(X, (-1, 1), order='F') | |||||
| # use uniform distribution if there is no prior knowledge. | |||||
| q_direct = np.full((1, nb_pd), pd_uni) | |||||
| Kmatrix[i][j] = np.dot(q_direct, X) | |||||
| except TypeError: | |||||
| # print('sth wrong.') | |||||
| Kmatrix[i][j] = np.nan | |||||
| Kmatrix[j][i] = Kmatrix[i][j] | |||||
| pbar.update(1) | |||||
| # A_list = [] | |||||
| # for G in tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout): | |||||
| # A_tilde = nx.adjacency_matrix(G, weight=None).todense() | |||||
| # # normalized adjacency matrices | |||||
| # # A_list.append(A_tilde / A_tilde.sum(axis=0)) | |||||
| # A_list.append(A_tilde) | |||||
| return Kmatrix | |||||
| def _randomwalkkernel_conjugate(G1, G2, node_label, edge_label): | |||||
| """Calculate walk graph kernels up to n between 2 graphs using conjugate method. | |||||
| Parameters | |||||
| ---------- | |||||
| G1, G2 : NetworkX graph | |||||
| Graphs between which the kernel is calculated. | |||||
| node_label : string | |||||
| node attribute used as label. | |||||
| edge_label : string | |||||
| edge attribute used as label. | |||||
| Return | |||||
| ------ | |||||
| kernel : float | |||||
| Kernel between 2 graphs. | |||||
| """ | |||||
| dpg = nx.tensor_product(G1, G2) # direct product graph | |||||
| import matplotlib.pyplot as plt | |||||
| nx.draw_networkx(G1) | |||||
| plt.show() | |||||
| nx.draw_networkx(G2) | |||||
| plt.show() | |||||
| nx.draw_networkx(dpg) | |||||
| plt.show() | |||||
| X = dlyap(A, Q, C) | |||||
| return kernel | |||||
| def _randomwalkkernel_fp(G1, G2, node_label, edge_label): | |||||
| """Calculate walk graph kernels up to n between 2 graphs using Fixed-Point method. | |||||
| Parameters | |||||
| ---------- | |||||
| G1, G2 : NetworkX graph | |||||
| Graphs between which the kernel is calculated. | |||||
| node_label : string | |||||
| node attribute used as label. | |||||
| edge_label : string | |||||
| edge attribute used as label. | |||||
| Return | |||||
| ------ | |||||
| kernel : float | |||||
| Kernel between 2 graphs. | |||||
| """ | |||||
| dpg = nx.tensor_product(G1, G2) # direct product graph | |||||
| X = dlyap(A, Q, C) | |||||
| return kernel | |||||
| def _randomwalkkernel_spectral(G1, G2, node_label, edge_label): | |||||
| """Calculate walk graph kernels up to n between 2 graphs using spectral decomposition method. | |||||
| Parameters | |||||
| ---------- | |||||
| G1, G2 : NetworkX graph | |||||
| Graphs between which the kernel is calculated. | |||||
| node_label : string | |||||
| node attribute used as label. | |||||
| edge_label : string | |||||
| edge attribute used as label. | |||||
| Return | |||||
| ------ | |||||
| kernel : float | |||||
| Kernel between 2 graphs. | |||||
| """ | |||||
| dpg = nx.tensor_product(G1, G2) # direct product graph | |||||
| X = dlyap(A, Q, C) | |||||
| return kernel | |||||
| def _randomwalkkernel_kron(G1, G2, node_label, edge_label): | |||||
| """Calculate walk graph kernels up to n between 2 graphs using nearest Kronecker product approximation method. | |||||
| Parameters | |||||
| ---------- | |||||
| G1, G2 : NetworkX graph | |||||
| Graphs between which the kernel is calculated. | |||||
| node_label : string | |||||
| node attribute used as label. | |||||
| edge_label : string | |||||
| edge attribute used as label. | |||||
| Return | Return | ||||
| ------ | ------ | ||||
| kernel : float | kernel : float | ||||
| Treelet Kernel between 2 graphs. | |||||
| Kernel between 2 graphs. | |||||
| """ | """ | ||||
| dpg = nx.tensor_product(G1, G2) # direct product graph | dpg = nx.tensor_product(G1, G2) # direct product graph | ||||
| @@ -8,6 +8,7 @@ import pathlib | |||||
| sys.path.insert(0, "../") | sys.path.insert(0, "../") | ||||
| from tqdm import tqdm | from tqdm import tqdm | ||||
| import time | import time | ||||
| from itertools import combinations_with_replacement, product | |||||
| import networkx as nx | import networkx as nx | ||||
| import numpy as np | import numpy as np | ||||
| @@ -39,8 +40,6 @@ def spkernel(*args, node_label='atom', edge_weight=None, node_kernels=None): | |||||
| # pre-process | # pre-process | ||||
| Gn = args[0] if len(args) == 1 else [args[0], args[1]] | Gn = args[0] if len(args) == 1 else [args[0], args[1]] | ||||
| Gn = [nx.to_directed(G) for G in Gn] | |||||
| weight = None | weight = None | ||||
| if edge_weight == None: | if edge_weight == None: | ||||
| print('\n None edge weight specified. Set all weight to 1.\n') | print('\n None edge weight specified. Set all weight to 1.\n') | ||||
| @@ -89,174 +88,158 @@ def spkernel(*args, node_label='atom', edge_weight=None, node_kernels=None): | |||||
| # node symb and non-synb labeled | # node symb and non-synb labeled | ||||
| if ds_attrs['node_attr_dim'] > 0: | if ds_attrs['node_attr_dim'] > 0: | ||||
| if ds_attrs['is_directed']: | if ds_attrs['is_directed']: | ||||
| for i in range(0, len(Gn)): | |||||
| for j in range(i, len(Gn)): | |||||
| for e1 in Gn[i].edges(data=True): | |||||
| for e2 in Gn[j].edges(data=True): | |||||
| if e1[2]['cost'] == e2[2]['cost']: | |||||
| kn = node_kernels['mix'] | |||||
| try: | |||||
| n11, n12, n21, n22 = Gn[i].nodes[e1[ | |||||
| 0]], Gn[i].nodes[e1[1]], Gn[ | |||||
| j].nodes[e2[0]], Gn[j].nodes[ | |||||
| e2[1]] | |||||
| kn1 = kn(n11[node_label], n21[ | |||||
| node_label], [n11['attributes']], | |||||
| [n21['attributes']]) * kn( | |||||
| n12[node_label], | |||||
| n22[node_label], | |||||
| [n12['attributes']], | |||||
| [n22['attributes']]) | |||||
| Kmatrix[i][j] += kn1 | |||||
| except KeyError: # missing labels or attributes | |||||
| pass | |||||
| Kmatrix[j][i] = Kmatrix[i][j] | |||||
| pbar.update(1) | |||||
| for i, j in combinations_with_replacement( | |||||
| range(0, len(Gn)), 2): | |||||
| for e1, e2 in product( | |||||
| Gn[i].edges(data=True), Gn[j].edges(data=True)): | |||||
| if e1[2]['cost'] == e2[2]['cost']: | |||||
| kn = node_kernels['mix'] | |||||
| try: | |||||
| n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[ | |||||
| i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[ | |||||
| j].nodes[e2[1]] | |||||
| kn1 = kn(n11[node_label], n21[node_label], [ | |||||
| n11['attributes'] | |||||
| ], [n21['attributes']]) * kn( | |||||
| n12[node_label], n22[node_label], | |||||
| [n12['attributes']], [n22['attributes']]) | |||||
| Kmatrix[i][j] += kn1 | |||||
| except KeyError: # missing labels or attributes | |||||
| pass | |||||
| Kmatrix[j][i] = Kmatrix[i][j] | |||||
| pbar.update(1) | |||||
| else: | else: | ||||
| for i in range(0, len(Gn)): | |||||
| for j in range(i, len(Gn)): | |||||
| for e1 in Gn[i].edges(data=True): | |||||
| for e2 in Gn[j].edges(data=True): | |||||
| if e1[2]['cost'] == e2[2]['cost']: | |||||
| kn = node_kernels['mix'] | |||||
| try: | |||||
| # each edge walk is counted twice, starting from both its extreme nodes. | |||||
| n11, n12, n21, n22 = Gn[i].nodes[e1[ | |||||
| 0]], Gn[i].nodes[e1[1]], Gn[ | |||||
| j].nodes[e2[0]], Gn[j].nodes[ | |||||
| e2[1]] | |||||
| kn1 = kn(n11[node_label], n21[ | |||||
| node_label], [n11['attributes']], | |||||
| [n21['attributes']]) * kn( | |||||
| n12[node_label], | |||||
| n22[node_label], | |||||
| [n12['attributes']], | |||||
| [n22['attributes']]) | |||||
| kn2 = kn(n11[node_label], n22[ | |||||
| node_label], [n11['attributes']], | |||||
| [n22['attributes']]) * kn( | |||||
| n12[node_label], | |||||
| n21[node_label], | |||||
| [n12['attributes']], | |||||
| [n21['attributes']]) | |||||
| Kmatrix[i][j] += kn1 + kn2 | |||||
| except KeyError: # missing labels or attributes | |||||
| pass | |||||
| Kmatrix[j][i] = Kmatrix[i][j] | |||||
| pbar.update(1) | |||||
| for i, j in combinations_with_replacement( | |||||
| range(0, len(Gn)), 2): | |||||
| for e1, e2 in product( | |||||
| Gn[i].edges(data=True), Gn[j].edges(data=True)): | |||||
| if e1[2]['cost'] == e2[2]['cost']: | |||||
| kn = node_kernels['mix'] | |||||
| try: | |||||
| # each edge walk is counted twice, starting from both its extreme nodes. | |||||
| n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[ | |||||
| i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[ | |||||
| j].nodes[e2[1]] | |||||
| kn1 = kn(n11[node_label], n21[node_label], [ | |||||
| n11['attributes'] | |||||
| ], [n21['attributes']]) * kn( | |||||
| n12[node_label], n22[node_label], | |||||
| [n12['attributes']], [n22['attributes']]) | |||||
| kn2 = kn(n11[node_label], n22[node_label], [ | |||||
| n11['attributes'] | |||||
| ], [n22['attributes']]) * kn( | |||||
| n12[node_label], n21[node_label], | |||||
| [n12['attributes']], [n21['attributes']]) | |||||
| Kmatrix[i][j] += kn1 + kn2 | |||||
| except KeyError: # missing labels or attributes | |||||
| pass | |||||
| Kmatrix[j][i] = Kmatrix[i][j] | |||||
| pbar.update(1) | |||||
| # node symb labeled | # node symb labeled | ||||
| else: | else: | ||||
| if ds_attrs['is_directed']: | if ds_attrs['is_directed']: | ||||
| for i in range(0, len(Gn)): | |||||
| for j in range(i, len(Gn)): | |||||
| for e1 in Gn[i].edges(data=True): | |||||
| for e2 in Gn[j].edges(data=True): | |||||
| if e1[2]['cost'] == e2[2]['cost']: | |||||
| kn = node_kernels['symb'] | |||||
| try: | |||||
| n11, n12, n21, n22 = Gn[i].nodes[e1[ | |||||
| 0]], Gn[i].nodes[e1[1]], Gn[ | |||||
| j].nodes[e2[0]], Gn[j].nodes[ | |||||
| e2[1]] | |||||
| kn1 = kn(n11[node_label], | |||||
| n21[node_label]) * kn( | |||||
| n12[node_label], | |||||
| n22[node_label]) | |||||
| Kmatrix[i][j] += kn1 | |||||
| except KeyError: # missing labels | |||||
| pass | |||||
| Kmatrix[j][i] = Kmatrix[i][j] | |||||
| pbar.update(1) | |||||
| for i, j in combinations_with_replacement( | |||||
| range(0, len(Gn)), 2): | |||||
| for e1, e2 in product( | |||||
| Gn[i].edges(data=True), Gn[j].edges(data=True)): | |||||
| if e1[2]['cost'] == e2[2]['cost']: | |||||
| kn = node_kernels['symb'] | |||||
| try: | |||||
| n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[ | |||||
| i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[ | |||||
| j].nodes[e2[1]] | |||||
| kn1 = kn(n11[node_label], | |||||
| n21[node_label]) * kn( | |||||
| n12[node_label], n22[node_label]) | |||||
| Kmatrix[i][j] += kn1 | |||||
| except KeyError: # missing labels | |||||
| pass | |||||
| Kmatrix[j][i] = Kmatrix[i][j] | |||||
| pbar.update(1) | |||||
| else: | else: | ||||
| for i in range(0, len(Gn)): | |||||
| for j in range(i, len(Gn)): | |||||
| for e1 in Gn[i].edges(data=True): | |||||
| for e2 in Gn[j].edges(data=True): | |||||
| if e1[2]['cost'] == e2[2]['cost']: | |||||
| kn = node_kernels['symb'] | |||||
| try: | |||||
| # each edge walk is counted twice, starting from both its extreme nodes. | |||||
| n11, n12, n21, n22 = Gn[i].nodes[e1[ | |||||
| 0]], Gn[i].nodes[e1[1]], Gn[ | |||||
| j].nodes[e2[0]], Gn[j].nodes[ | |||||
| e2[1]] | |||||
| kn1 = kn(n11[node_label], | |||||
| n21[node_label]) * kn( | |||||
| n12[node_label], | |||||
| n22[node_label]) | |||||
| kn2 = kn(n11[node_label], | |||||
| n22[node_label]) * kn( | |||||
| n12[node_label], | |||||
| n21[node_label]) | |||||
| Kmatrix[i][j] += kn1 + kn2 | |||||
| except KeyError: # missing labels | |||||
| pass | |||||
| Kmatrix[j][i] = Kmatrix[i][j] | |||||
| pbar.update(1) | |||||
| for i, j in combinations_with_replacement( | |||||
| range(0, len(Gn)), 2): | |||||
| for e1, e2 in product( | |||||
| Gn[i].edges(data=True), Gn[j].edges(data=True)): | |||||
| if e1[2]['cost'] == e2[2]['cost']: | |||||
| kn = node_kernels['symb'] | |||||
| try: | |||||
| # each edge walk is counted twice, starting from both its extreme nodes. | |||||
| n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[ | |||||
| i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[ | |||||
| j].nodes[e2[1]] | |||||
| kn1 = kn(n11[node_label], | |||||
| n21[node_label]) * kn( | |||||
| n12[node_label], n22[node_label]) | |||||
| kn2 = kn(n11[node_label], | |||||
| n22[node_label]) * kn( | |||||
| n12[node_label], n21[node_label]) | |||||
| Kmatrix[i][j] += kn1 + kn2 | |||||
| except KeyError: # missing labels | |||||
| pass | |||||
| Kmatrix[j][i] = Kmatrix[i][j] | |||||
| pbar.update(1) | |||||
| else: | else: | ||||
| # node non-synb labeled | # node non-synb labeled | ||||
| if ds_attrs['node_attr_dim'] > 0: | if ds_attrs['node_attr_dim'] > 0: | ||||
| if ds_attrs['is_directed']: | if ds_attrs['is_directed']: | ||||
| for i in range(0, len(Gn)): | |||||
| for j in range(i, len(Gn)): | |||||
| for e1 in Gn[i].edges(data=True): | |||||
| for e2 in Gn[j].edges(data=True): | |||||
| if e1[2]['cost'] == e2[2]['cost']: | |||||
| kn = node_kernels['nsymb'] | |||||
| try: | |||||
| # each edge walk is counted twice, starting from both its extreme nodes. | |||||
| n11, n12, n21, n22 = Gn[i].nodes[e1[ | |||||
| 0]], Gn[i].nodes[e1[1]], Gn[ | |||||
| j].nodes[e2[0]], Gn[j].nodes[ | |||||
| e2[1]] | |||||
| kn1 = kn([n11['attributes']], | |||||
| [n21['attributes']]) * kn( | |||||
| [n12['attributes']], | |||||
| [n22['attributes']]) | |||||
| Kmatrix[i][j] += kn1 | |||||
| except KeyError: # missing attributes | |||||
| pass | |||||
| Kmatrix[j][i] = Kmatrix[i][j] | |||||
| pbar.update(1) | |||||
| for i, j in combinations_with_replacement( | |||||
| range(0, len(Gn)), 2): | |||||
| for e1, e2 in product( | |||||
| Gn[i].edges(data=True), Gn[j].edges(data=True)): | |||||
| if e1[2]['cost'] == e2[2]['cost']: | |||||
| kn = node_kernels['nsymb'] | |||||
| try: | |||||
| # each edge walk is counted twice, starting from both its extreme nodes. | |||||
| n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[ | |||||
| i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[ | |||||
| j].nodes[e2[1]] | |||||
| kn1 = kn([n11['attributes']], | |||||
| [n21['attributes']]) * kn( | |||||
| [n12['attributes']], | |||||
| [n22['attributes']]) | |||||
| Kmatrix[i][j] += kn1 | |||||
| except KeyError: # missing attributes | |||||
| pass | |||||
| Kmatrix[j][i] = Kmatrix[i][j] | |||||
| pbar.update(1) | |||||
| else: | else: | ||||
| for i in range(0, len(Gn)): | |||||
| for j in range(i, len(Gn)): | |||||
| for e1 in Gn[i].edges(data=True): | |||||
| for e2 in Gn[j].edges(data=True): | |||||
| if e1[2]['cost'] == e2[2]['cost']: | |||||
| kn = node_kernels['nsymb'] | |||||
| try: | |||||
| # each edge walk is counted twice, starting from both its extreme nodes. | |||||
| n11, n12, n21, n22 = Gn[i].nodes[e1[ | |||||
| 0]], Gn[i].nodes[e1[1]], Gn[ | |||||
| j].nodes[e2[0]], Gn[j].nodes[ | |||||
| e2[1]] | |||||
| kn1 = kn([n11['attributes']], | |||||
| [n21['attributes']]) * kn( | |||||
| [n12['attributes']], | |||||
| [n22['attributes']]) | |||||
| kn2 = kn([n11['attributes']], | |||||
| [n22['attributes']]) * kn( | |||||
| [n12['attributes']], | |||||
| [n21['attributes']]) | |||||
| Kmatrix[i][j] += kn1 + kn2 | |||||
| except KeyError: # missing attributes | |||||
| pass | |||||
| Kmatrix[j][i] = Kmatrix[i][j] | |||||
| pbar.update(1) | |||||
| for i, j in combinations_with_replacement( | |||||
| range(0, len(Gn)), 2): | |||||
| for e1, e2 in product( | |||||
| Gn[i].edges(data=True), Gn[j].edges(data=True)): | |||||
| if e1[2]['cost'] == e2[2]['cost']: | |||||
| kn = node_kernels['nsymb'] | |||||
| try: | |||||
| # each edge walk is counted twice, starting from both its extreme nodes. | |||||
| n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[ | |||||
| i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[ | |||||
| j].nodes[e2[1]] | |||||
| kn1 = kn([n11['attributes']], | |||||
| [n21['attributes']]) * kn( | |||||
| [n12['attributes']], | |||||
| [n22['attributes']]) | |||||
| kn2 = kn([n11['attributes']], | |||||
| [n22['attributes']]) * kn( | |||||
| [n12['attributes']], | |||||
| [n21['attributes']]) | |||||
| Kmatrix[i][j] += kn1 + kn2 | |||||
| except KeyError: # missing attributes | |||||
| pass | |||||
| Kmatrix[j][i] = Kmatrix[i][j] | |||||
| pbar.update(1) | |||||
| # node unlabeled | # node unlabeled | ||||
| else: | else: | ||||
| for i in range(0, len(Gn)): | |||||
| for j in range(i, len(Gn)): | |||||
| for e1 in Gn[i].edges(data=True): | |||||
| for e2 in Gn[j].edges(data=True): | |||||
| if e1[2]['cost'] == e2[2]['cost']: | |||||
| Kmatrix[i][j] += 1 | |||||
| Kmatrix[j][i] = Kmatrix[i][j] | |||||
| pbar.update(1) | |||||
| for i, j in combinations_with_replacement(range(0, len(Gn)), 2): | |||||
| for e1, e2 in product( | |||||
| Gn[i].edges(data=True), Gn[j].edges(data=True)): | |||||
| if e1[2]['cost'] == e2[2]['cost']: | |||||
| Kmatrix[i][j] += 1 | |||||
| Kmatrix[j][i] = Kmatrix[i][j] | |||||
| pbar.update(1) | |||||
| run_time = time.time() - start_time | run_time = time.time() - start_time | ||||
| print( | print( | ||||
| @@ -119,7 +119,7 @@ def untotterTransformation(G, node_label, edge_label): | |||||
| def direct_product(G1, G2, node_label, edge_label): | def direct_product(G1, G2, node_label, edge_label): | ||||
| """Return the direct/tensor product of G1 and G2. | |||||
| """Return the direct/tensor product of directed graphs G1 and G2. | |||||
| Parameters | Parameters | ||||
| ---------- | ---------- | ||||
| @@ -137,7 +137,7 @@ def direct_product(G1, G2, node_label, edge_label): | |||||
| Notes | Notes | ||||
| ----- | ----- | ||||
| This method differs from networkx.tensor_product in that this method only adds nodes and edges in G1 and G2 that have the same labels to direct product graph. | |||||
| This method differs from networkx.tensor_product in that this method only adds nodes and edges in G1 and G2 that have the same labels to the direct product graph. | |||||
| References | References | ||||
| ---------- | ---------- | ||||
| @@ -147,25 +147,37 @@ def direct_product(G1, G2, node_label, edge_label): | |||||
| from itertools import product | from itertools import product | ||||
| # G = G.to_directed() | # G = G.to_directed() | ||||
| gt = nx.Graph() | |||||
| gt = nx.DiGraph() | |||||
| # add nodes | # add nodes | ||||
| for u, v in product(G1, G2): | for u, v in product(G1, G2): | ||||
| if G1.nodes[u][node_label] == G2.nodes[v][node_label]: | if G1.nodes[u][node_label] == G2.nodes[v][node_label]: | ||||
| gt.add_node((u, v)) | gt.add_node((u, v)) | ||||
| gt.nodes[(u, v)].update({node_label: G1.nodes[u][node_label]}) | gt.nodes[(u, v)].update({node_label: G1.nodes[u][node_label]}) | ||||
| # add edges | |||||
| for u, v in product(gt, gt): | |||||
| if (u[0], v[0]) in G1.edges and ( | |||||
| u[1], v[1] | |||||
| ) in G2.edges and G1.edges[u[0], | |||||
| v[0]][edge_label] == G2.edges[u[1], | |||||
| v[1]][edge_label]: | |||||
| gt.add_edge((u[0], u[1]), (v[0], v[1])) | |||||
| gt.edges[(u[0], u[1]), (v[0], v[1])].update({ | |||||
| # add edges, faster for sparse graphs (no so many edges), which is the most case for now. | |||||
| for (u1, v1), (u2, v2) in product(G1.edges, G2.edges): | |||||
| if (u1, u2) in gt and ( | |||||
| v1, v2 | |||||
| ) in gt and G1.edges[u1, v1][edge_label] == G2.edges[u2, | |||||
| v2][edge_label]: | |||||
| gt.add_edge((u1, u2), (v1, v2)) | |||||
| gt.edges[(u1, u2), (v1, v2)].update({ | |||||
| edge_label: | edge_label: | ||||
| G1.edges[u[0], v[0]][edge_label] | |||||
| G1.edges[u1, v1][edge_label] | |||||
| }) | }) | ||||
| # # add edges, faster for dense graphs (a lot of edges, complete graph would be super). | |||||
| # for u, v in product(gt, gt): | |||||
| # if (u[0], v[0]) in G1.edges and ( | |||||
| # u[1], v[1] | |||||
| # ) in G2.edges and G1.edges[u[0], | |||||
| # v[0]][edge_label] == G2.edges[u[1], | |||||
| # v[1]][edge_label]: | |||||
| # gt.add_edge((u[0], u[1]), (v[0], v[1])) | |||||
| # gt.edges[(u[0], u[1]), (v[0], v[1])].update({ | |||||
| # edge_label: | |||||
| # G1.edges[u[0], v[0]][edge_label] | |||||
| # }) | |||||
| # relabel nodes using consecutive integers for convenience of kernel calculation. | # relabel nodes using consecutive integers for convenience of kernel calculation. | ||||
| # gt = nx.convert_node_labels_to_integers( | # gt = nx.convert_node_labels_to_integers( | ||||
| # gt, first_label=0, label_attribute='label_orignal') | # gt, first_label=0, label_attribute='label_orignal') | ||||